Use reduced precision for scan times
Some checks failed
checkstyle / checkstyle (push) Has been cancelled
CodeQL / Analyze (push) Has been cancelled
smatch / smatch (push) Has been cancelled
zfs-qemu / Setup (push) Has been cancelled
zloop / zloop (push) Has been cancelled
zfs-qemu / qemu-x86 (push) Has been cancelled
zfs-qemu / Cleanup (push) Has been cancelled

Scan time limits do not need precision beyond 1ms.  Switching
scn_sync_start_time and spa_sync_starttime from gethrtime() to
getlrtime() saves ~3% of CPU time during resilver scan stage.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Alexander Motin <alexander.motin@TrueNAS.com>
Closes #18061
This commit is contained in:
Alexander Motin 2025-12-18 13:22:11 -05:00 committed by GitHub
parent a83bb15fcd
commit 962e68865e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 15 additions and 15 deletions

View file

@ -1669,7 +1669,7 @@ dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb)
* or
* - the scan queue has reached its memory use limit
*/
uint64_t curr_time_ns = gethrtime();
uint64_t curr_time_ns = getlrtime();
uint64_t scan_time_ns = curr_time_ns - scn->scn_sync_start_time;
uint64_t sync_time_ns = curr_time_ns -
scn->scn_dp->dp_spa->spa_sync_starttime;
@ -1731,7 +1731,7 @@ dsl_error_scrub_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb)
* - the spa is shutting down because this pool is being exported
* or the machine is rebooting.
*/
uint64_t curr_time_ns = gethrtime();
uint64_t curr_time_ns = getlrtime();
uint64_t error_scrub_time_ns = curr_time_ns - scn->scn_sync_start_time;
uint64_t sync_time_ns = curr_time_ns -
scn->scn_dp->dp_spa->spa_sync_starttime;
@ -3243,7 +3243,7 @@ static boolean_t
scan_io_queue_check_suspend(dsl_scan_t *scn)
{
/* See comment in dsl_scan_check_suspend() */
uint64_t curr_time_ns = gethrtime();
uint64_t curr_time_ns = getlrtime();
uint64_t scan_time_ns = curr_time_ns - scn->scn_sync_start_time;
uint64_t sync_time_ns = curr_time_ns -
scn->scn_dp->dp_spa->spa_sync_starttime;
@ -3600,7 +3600,7 @@ dsl_scan_async_block_should_pause(dsl_scan_t *scn)
return (B_TRUE);
}
elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
elapsed_nanosecs = getlrtime() - scn->scn_sync_start_time;
return (elapsed_nanosecs / (NANOSEC / 2) > zfs_txg_timeout ||
(NSEC2MSEC(elapsed_nanosecs) > scn->scn_async_block_min_time_ms &&
txg_sync_waiting(scn->scn_dp)) ||
@ -3887,7 +3887,7 @@ dsl_process_async_destroys(dsl_pool_t *dp, dmu_tx_t *tx)
"free_bpobj/bptree on %s in txg %llu; err=%u",
(longlong_t)scn->scn_visited_this_txg,
(longlong_t)
NSEC2MSEC(gethrtime() - scn->scn_sync_start_time),
NSEC2MSEC(getlrtime() - scn->scn_sync_start_time),
spa->spa_name, (longlong_t)tx->tx_txg, err);
scn->scn_visited_this_txg = 0;
scn->scn_async_frees_this_txg = 0;
@ -4218,14 +4218,14 @@ dsl_errorscrub_sync(dsl_pool_t *dp, dmu_tx_t *tx)
}
spa->spa_scrub_active = B_TRUE;
scn->scn_sync_start_time = gethrtime();
scn->scn_sync_start_time = getlrtime();
/*
* zfs_scan_suspend_progress can be set to disable scrub progress.
* See more detailed comment in dsl_scan_sync().
*/
if (zfs_scan_suspend_progress) {
uint64_t scan_time_ns = gethrtime() - scn->scn_sync_start_time;
uint64_t scan_time_ns = getlrtime() - scn->scn_sync_start_time;
int mintime = zfs_scrub_min_time_ms;
while (zfs_scan_suspend_progress &&
@ -4233,7 +4233,7 @@ dsl_errorscrub_sync(dsl_pool_t *dp, dmu_tx_t *tx)
!spa_shutting_down(scn->scn_dp->dp_spa) &&
NSEC2MSEC(scan_time_ns) < mintime) {
delay(hz);
scan_time_ns = gethrtime() - scn->scn_sync_start_time;
scan_time_ns = getlrtime() - scn->scn_sync_start_time;
}
return;
}
@ -4443,7 +4443,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
scn->scn_avg_zio_size_this_txg = 0;
scn->scn_zios_this_txg = 0;
scn->scn_suspending = B_FALSE;
scn->scn_sync_start_time = gethrtime();
scn->scn_sync_start_time = getlrtime();
spa->spa_scrub_active = B_TRUE;
/*
@ -4467,7 +4467,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
* useful for testing and debugging.
*/
if (zfs_scan_suspend_progress) {
uint64_t scan_time_ns = gethrtime() - scn->scn_sync_start_time;
uint64_t scan_time_ns = getlrtime() - scn->scn_sync_start_time;
uint_t mintime = (scn->scn_phys.scn_func ==
POOL_SCAN_RESILVER) ? zfs_resilver_min_time_ms :
zfs_scrub_min_time_ms;
@ -4477,7 +4477,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
!spa_shutting_down(scn->scn_dp->dp_spa) &&
NSEC2MSEC(scan_time_ns) < mintime) {
delay(hz);
scan_time_ns = gethrtime() - scn->scn_sync_start_time;
scan_time_ns = getlrtime() - scn->scn_sync_start_time;
}
return;
}
@ -4607,7 +4607,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
"%llu in ddt, %llu > maxtxg)",
(longlong_t)scn->scn_visited_this_txg,
spa->spa_name,
(longlong_t)NSEC2MSEC(gethrtime() -
(longlong_t)NSEC2MSEC(getlrtime() -
scn->scn_sync_start_time),
(longlong_t)scn->scn_objsets_visited_this_txg,
(longlong_t)scn->scn_holes_this_txg,
@ -4648,7 +4648,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
(longlong_t)scn->scn_zios_this_txg,
spa->spa_name,
(longlong_t)scn->scn_segs_this_txg,
(longlong_t)NSEC2MSEC(gethrtime() -
(longlong_t)NSEC2MSEC(getlrtime() -
scn->scn_sync_start_time),
(longlong_t)scn->scn_avg_zio_size_this_txg,
(longlong_t)scn->scn_avg_seg_size_this_txg);

View file

@ -10449,7 +10449,7 @@ spa_sync(spa_t *spa, uint64_t txg)
dsl_pool_t *dp = spa->spa_dsl_pool;
dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg);
spa->spa_sync_starttime = gethrtime();
spa->spa_sync_starttime = getlrtime();
taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid, B_TRUE);
spa->spa_deadman_tqid = taskq_dispatch_delay(system_delay_taskq,

View file

@ -720,7 +720,7 @@ spa_deadman(void *arg)
return;
zfs_dbgmsg("slow spa_sync: started %llu seconds ago, calls %llu",
(gethrtime() - spa->spa_sync_starttime) / NANOSEC,
(getlrtime() - spa->spa_sync_starttime) / NANOSEC,
(u_longlong_t)++spa->spa_deadman_calls);
if (zfs_deadman_enabled)
vdev_deadman(spa->spa_root_vdev, FTAG);