Skip to content

Commit 482eeef

Browse files
gamanakisbehlendorf
authored andcommitted
Teach zpool scrub to scrub only blocks in error log
Added a flag '-e' in zpool scrub to scrub only blocks in error log. A user can pause, resume and cancel the error scrub by passing additional command line arguments -p -s just like a regular scrub. This involves adding a new flag, creating new libzfs interfaces, a new ioctl, and the actual iteration and read-issuing logic. Error scrubbing is executed in multiple txg to make sure pool performance is not affected. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Tony Hutter <[email protected]> Co-authored-by: TulsiJain [email protected] Signed-off-by: George Amanakis <[email protected]> Closes #8995 Closes #12355
1 parent e34e15e commit 482eeef

29 files changed

+1602
-71
lines changed

cmd/zpool/zpool_main.c

Lines changed: 101 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ get_usage(zpool_help_t idx)
401401
return (gettext("\tinitialize [-c | -s | -u] [-w] <pool> "
402402
"[<device> ...]\n"));
403403
case HELP_SCRUB:
404-
return (gettext("\tscrub [-s | -p] [-w] <pool> ...\n"));
404+
return (gettext("\tscrub [-s | -p] [-w] [-e] <pool> ...\n"));
405405
case HELP_RESILVER:
406406
return (gettext("\tresilver <pool> ...\n"));
407407
case HELP_TRIM:
@@ -7309,8 +7309,9 @@ wait_callback(zpool_handle_t *zhp, void *data)
73097309
}
73107310

73117311
/*
7312-
* zpool scrub [-s | -p] [-w] <pool> ...
7312+
* zpool scrub [-s | -p] [-w] [-e] <pool> ...
73137313
*
7314+
* -e Only scrub blocks in the error log.
73147315
* -s Stop. Stops any in-progress scrub.
73157316
* -p Pause. Pause in-progress scrub.
73167317
* -w Wait. Blocks until scrub has completed.
@@ -7326,14 +7327,21 @@ zpool_do_scrub(int argc, char **argv)
73267327
cb.cb_type = POOL_SCAN_SCRUB;
73277328
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
73287329

7330+
boolean_t is_error_scrub = B_FALSE;
7331+
boolean_t is_pause = B_FALSE;
7332+
boolean_t is_stop = B_FALSE;
7333+
73297334
/* check options */
7330-
while ((c = getopt(argc, argv, "spw")) != -1) {
7335+
while ((c = getopt(argc, argv, "spwe")) != -1) {
73317336
switch (c) {
7337+
case 'e':
7338+
is_error_scrub = B_TRUE;
7339+
break;
73327340
case 's':
7333-
cb.cb_type = POOL_SCAN_NONE;
7341+
is_stop = B_TRUE;
73347342
break;
73357343
case 'p':
7336-
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
7344+
is_pause = B_TRUE;
73377345
break;
73387346
case 'w':
73397347
wait = B_TRUE;
@@ -7345,11 +7353,21 @@ zpool_do_scrub(int argc, char **argv)
73457353
}
73467354
}
73477355

7348-
if (cb.cb_type == POOL_SCAN_NONE &&
7349-
cb.cb_scrub_cmd == POOL_SCRUB_PAUSE) {
7350-
(void) fprintf(stderr, gettext("invalid option combination: "
7351-
"-s and -p are mutually exclusive\n"));
7356+
if (is_pause && is_stop) {
7357+
(void) fprintf(stderr, gettext("invalid option "
7358+
"combination :-s and -p are mutually exclusive\n"));
73527359
usage(B_FALSE);
7360+
} else {
7361+
if (is_error_scrub)
7362+
cb.cb_type = POOL_SCAN_ERRORSCRUB;
7363+
7364+
if (is_pause) {
7365+
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
7366+
} else if (is_stop) {
7367+
cb.cb_type = POOL_SCAN_NONE;
7368+
} else {
7369+
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
7370+
}
73537371
}
73547372

73557373
if (wait && (cb.cb_type == POOL_SCAN_NONE ||
@@ -7573,6 +7591,70 @@ secs_to_dhms(uint64_t total, char *buf)
75737591
}
75747592
}
75757593

7594+
/*
7595+
* Print out detailed error scrub status.
7596+
*/
7597+
static void
7598+
print_err_scrub_status(pool_scan_stat_t *ps)
7599+
{
7600+
time_t start, end, pause;
7601+
uint64_t total_secs_left;
7602+
uint64_t secs_left, mins_left, hours_left, days_left;
7603+
uint64_t examined, to_be_examined;
7604+
7605+
if (ps == NULL || ps->pss_error_scrub_func != POOL_SCAN_ERRORSCRUB) {
7606+
return;
7607+
}
7608+
7609+
(void) printf(gettext(" scrub: "));
7610+
7611+
start = ps->pss_error_scrub_start;
7612+
end = ps->pss_error_scrub_end;
7613+
pause = ps->pss_pass_error_scrub_pause;
7614+
examined = ps->pss_error_scrub_examined;
7615+
to_be_examined = ps->pss_error_scrub_to_be_examined;
7616+
7617+
assert(ps->pss_error_scrub_func == POOL_SCAN_ERRORSCRUB);
7618+
7619+
if (ps->pss_error_scrub_state == DSS_FINISHED) {
7620+
total_secs_left = end - start;
7621+
days_left = total_secs_left / 60 / 60 / 24;
7622+
hours_left = (total_secs_left / 60 / 60) % 24;
7623+
mins_left = (total_secs_left / 60) % 60;
7624+
secs_left = (total_secs_left % 60);
7625+
7626+
(void) printf(gettext("scrubbed %llu error blocks in %llu days "
7627+
"%02llu:%02llu:%02llu on %s"), (u_longlong_t)examined,
7628+
(u_longlong_t)days_left, (u_longlong_t)hours_left,
7629+
(u_longlong_t)mins_left, (u_longlong_t)secs_left,
7630+
ctime(&end));
7631+
7632+
return;
7633+
} else if (ps->pss_error_scrub_state == DSS_CANCELED) {
7634+
(void) printf(gettext("error scrub canceled on %s"),
7635+
ctime(&end));
7636+
return;
7637+
}
7638+
assert(ps->pss_error_scrub_state == DSS_ERRORSCRUBBING);
7639+
7640+
/* Error scrub is in progress. */
7641+
if (pause == 0) {
7642+
(void) printf(gettext("error scrub in progress since %s"),
7643+
ctime(&start));
7644+
} else {
7645+
(void) printf(gettext("error scrub paused since %s"),
7646+
ctime(&pause));
7647+
(void) printf(gettext("\terror scrub started on %s"),
7648+
ctime(&start));
7649+
}
7650+
7651+
double fraction_done = (double)examined / (to_be_examined + examined);
7652+
(void) printf(gettext("\t%.2f%% done, issued I/O for %llu error"
7653+
" blocks"), 100 * fraction_done, (u_longlong_t)examined);
7654+
7655+
(void) printf("\n");
7656+
}
7657+
75767658
/*
75777659
* Print out detailed scrub status.
75787660
*/
@@ -7909,10 +7991,12 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)
79097991
{
79107992
uint64_t rebuild_end_time = 0, resilver_end_time = 0;
79117993
boolean_t have_resilver = B_FALSE, have_scrub = B_FALSE;
7994+
boolean_t have_errorscrub = B_FALSE;
79127995
boolean_t active_resilver = B_FALSE;
79137996
pool_checkpoint_stat_t *pcs = NULL;
79147997
pool_scan_stat_t *ps = NULL;
79157998
uint_t c;
7999+
time_t scrub_start = 0, errorscrub_start = 0;
79168000

79178001
if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS,
79188002
(uint64_t **)&ps, &c) == 0) {
@@ -7921,16 +8005,23 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)
79218005
active_resilver = (ps->pss_state == DSS_SCANNING);
79228006
}
79238007

8008+
79248009
have_resilver = (ps->pss_func == POOL_SCAN_RESILVER);
79258010
have_scrub = (ps->pss_func == POOL_SCAN_SCRUB);
8011+
scrub_start = ps->pss_start_time;
8012+
have_errorscrub = (ps->pss_error_scrub_func ==
8013+
POOL_SCAN_ERRORSCRUB);
8014+
errorscrub_start = ps->pss_error_scrub_start;
79268015
}
79278016

79288017
boolean_t active_rebuild = check_rebuilding(nvroot, &rebuild_end_time);
79298018
boolean_t have_rebuild = (active_rebuild || (rebuild_end_time > 0));
79308019

79318020
/* Always print the scrub status when available. */
7932-
if (have_scrub)
8021+
if (have_scrub && scrub_start > errorscrub_start)
79338022
print_scan_scrub_resilver_status(ps);
8023+
else if (have_errorscrub && errorscrub_start >= scrub_start)
8024+
print_err_scrub_status(ps);
79348025

79358026
/*
79368027
* When there is an active resilver or rebuild print its status.

include/libzfs.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,14 @@ typedef enum zfs_error {
125125
EZFS_THREADCREATEFAILED, /* thread create failed */
126126
EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */
127127
EZFS_SCRUBBING, /* currently scrubbing */
128+
EZFS_ERRORSCRUBBING, /* currently error scrubbing */
129+
EZFS_ERRORSCRUB_PAUSED, /* error scrub currently paused */
128130
EZFS_NO_SCRUB, /* no active scrub */
129131
EZFS_DIFF, /* general failure of zfs diff */
130132
EZFS_DIFFDATA, /* bad zfs diff data */
131133
EZFS_POOLREADONLY, /* pool is in read-only mode */
132134
EZFS_SCRUB_PAUSED, /* scrub currently paused */
135+
EZFS_SCRUB_PAUSED_TO_CANCEL, /* scrub currently paused */
133136
EZFS_ACTIVE_POOL, /* pool is imported on a different system */
134137
EZFS_CRYPTOFAILED, /* failed to setup encryption */
135138
EZFS_NO_PENDING, /* cannot cancel, no operation is pending */

include/libzfs_core.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ _LIBZFS_CORE_H int lzc_get_bootenv(const char *, nvlist_t **);
155155
_LIBZFS_CORE_H int lzc_get_vdev_prop(const char *, nvlist_t *, nvlist_t **);
156156
_LIBZFS_CORE_H int lzc_set_vdev_prop(const char *, nvlist_t *, nvlist_t **);
157157

158+
_LIBZFS_CORE_H int lzc_scrub(zfs_ioc_t, const char *, nvlist_t *, nvlist_t **);
159+
158160
#ifdef __cplusplus
159161
}
160162
#endif

include/sys/dmu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,7 @@ typedef struct dmu_buf {
378378
#define DMU_POOL_DDT_STATS "DDT-statistics"
379379
#define DMU_POOL_CREATION_VERSION "creation_version"
380380
#define DMU_POOL_SCAN "scan"
381+
#define DMU_POOL_ERRORSCRUB "error_scrub"
381382
#define DMU_POOL_FREE_BPOBJ "free_bpobj"
382383
#define DMU_POOL_BPTREE_OBJ "bptree_obj"
383384
#define DMU_POOL_EMPTY_BPOBJ "empty_bpobj"

include/sys/dsl_scan.h

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
#include <sys/zfs_context.h>
3131
#include <sys/zio.h>
32+
#include <sys/zap.h>
3233
#include <sys/ddt.h>
3334
#include <sys/bplist.h>
3435

@@ -78,6 +79,21 @@ typedef enum dsl_scan_flags {
7879

7980
#define DSL_SCAN_FLAGS_MASK (DSF_VISIT_DS_AGAIN)
8081

82+
typedef struct dsl_errorscrub_phys {
83+
uint64_t dep_func; /* pool_scan_func_t */
84+
uint64_t dep_state; /* dsl_scan_state_t */
85+
uint64_t dep_cursor; /* serialized zap cursor for tracing progress */
86+
uint64_t dep_start_time; /* error scrub start time, unix timestamp */
87+
uint64_t dep_end_time; /* error scrub end time, unix timestamp */
88+
uint64_t dep_to_examine; /* total error blocks to be scrubbed */
89+
uint64_t dep_examined; /* blocks scrubbed so far */
90+
uint64_t dep_errors; /* error scrub I/O error count */
91+
uint64_t dep_paused_flags; /* flag for paused */
92+
} dsl_errorscrub_phys_t;
93+
94+
#define ERRORSCRUB_PHYS_NUMINTS (sizeof (dsl_errorscrub_phys_t) \
95+
/ sizeof (uint64_t))
96+
8197
/*
8298
* Every pool will have one dsl_scan_t and this structure will contain
8399
* in-memory information about the scan and a pointer to the on-disk
@@ -151,11 +167,15 @@ typedef struct dsl_scan {
151167
uint64_t scn_avg_zio_size_this_txg;
152168
uint64_t scn_zios_this_txg;
153169

170+
/* zap cursor for tracing error scrub progress */
171+
zap_cursor_t errorscrub_cursor;
154172
/* members needed for syncing scan status to disk */
155173
dsl_scan_phys_t scn_phys; /* on disk representation of scan */
156174
dsl_scan_phys_t scn_phys_cached;
157175
avl_tree_t scn_queue; /* queue of datasets to scan */
158176
uint64_t scn_queues_pending; /* outstanding data to issue */
177+
/* members needed for syncing error scrub status to disk */
178+
dsl_errorscrub_phys_t errorscrub_phys;
159179
} dsl_scan_t;
160180

161181
typedef struct dsl_scan_io_queue dsl_scan_io_queue_t;
@@ -171,8 +191,12 @@ int dsl_scan_cancel(struct dsl_pool *);
171191
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
172192
void dsl_scan_assess_vdev(struct dsl_pool *dp, vdev_t *vd);
173193
boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
174-
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
194+
boolean_t dsl_errorscrubbing(const struct dsl_pool *dp);
195+
boolean_t dsl_errorscrub_active(dsl_scan_t *scn);
175196
void dsl_scan_restart_resilver(struct dsl_pool *, uint64_t txg);
197+
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp,
198+
pool_scrub_cmd_t cmd);
199+
void dsl_errorscrub_sync(struct dsl_pool *, dmu_tx_t *);
176200
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
177201
boolean_t dsl_scan_resilver_scheduled(struct dsl_pool *dp);
178202
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
@@ -184,6 +208,7 @@ void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
184208
struct dmu_tx *tx);
185209
boolean_t dsl_scan_active(dsl_scan_t *scn);
186210
boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn);
211+
boolean_t dsl_errorscrub_is_paused(const dsl_scan_t *scn);
187212
void dsl_scan_freed(spa_t *spa, const blkptr_t *bp);
188213
void dsl_scan_io_queue_destroy(dsl_scan_io_queue_t *queue);
189214
void dsl_scan_io_queue_vdev_xfer(vdev_t *svd, vdev_t *tvd);

include/sys/fs/zfs.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1036,6 +1036,7 @@ typedef enum pool_scan_func {
10361036
POOL_SCAN_NONE,
10371037
POOL_SCAN_SCRUB,
10381038
POOL_SCAN_RESILVER,
1039+
POOL_SCAN_ERRORSCRUB,
10391040
POOL_SCAN_FUNCS
10401041
} pool_scan_func_t;
10411042

@@ -1099,6 +1100,20 @@ typedef struct pool_scan_stat {
10991100
uint64_t pss_pass_scrub_spent_paused;
11001101
uint64_t pss_pass_issued; /* issued bytes per scan pass */
11011102
uint64_t pss_issued; /* total bytes checked by scanner */
1103+
1104+
/* error scrub values stored on disk */
1105+
uint64_t pss_error_scrub_func; /* pool_scan_func_t */
1106+
uint64_t pss_error_scrub_state; /* dsl_scan_state_t */
1107+
uint64_t pss_error_scrub_start; /* error scrub start time */
1108+
uint64_t pss_error_scrub_end; /* error scrub end time */
1109+
uint64_t pss_error_scrub_examined; /* error blocks issued I/O */
1110+
/* error blocks to be issued I/O */
1111+
uint64_t pss_error_scrub_to_be_examined;
1112+
1113+
/* error scrub values not stored on disk */
1114+
/* error scrub pause time in milliseconds */
1115+
uint64_t pss_pass_error_scrub_pause;
1116+
11021117
} pool_scan_stat_t;
11031118

11041119
typedef struct pool_removal_stat {
@@ -1120,6 +1135,7 @@ typedef enum dsl_scan_state {
11201135
DSS_SCANNING,
11211136
DSS_FINISHED,
11221137
DSS_CANCELED,
1138+
DSS_ERRORSCRUBBING,
11231139
DSS_NUM_STATES
11241140
} dsl_scan_state_t;
11251141

@@ -1360,7 +1376,7 @@ typedef enum {
13601376
*/
13611377
typedef enum zfs_ioc {
13621378
/*
1363-
* Core features - 81/128 numbers reserved.
1379+
* Core features - 88/128 numbers reserved.
13641380
*/
13651381
#ifdef __FreeBSD__
13661382
ZFS_IOC_FIRST = 0,
@@ -1455,6 +1471,7 @@ typedef enum zfs_ioc {
14551471
ZFS_IOC_WAIT_FS, /* 0x5a54 */
14561472
ZFS_IOC_VDEV_GET_PROPS, /* 0x5a55 */
14571473
ZFS_IOC_VDEV_SET_PROPS, /* 0x5a56 */
1474+
ZFS_IOC_POOL_SCRUB, /* 0x5a57 */
14581475

14591476
/*
14601477
* Per-platform (Optional) - 8/128 numbers reserved.

include/sys/spa.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1155,6 +1155,7 @@ extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
11551155
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
11561156
extern uint64_t spa_approx_errlog_size(spa_t *spa);
11571157
extern int spa_get_errlog(spa_t *spa, void *uaddr, uint64_t *count);
1158+
extern uint64_t spa_get_last_errlog_size(spa_t *spa);
11581159
extern void spa_errlog_rotate(spa_t *spa);
11591160
extern void spa_errlog_drain(spa_t *spa);
11601161
extern void spa_errlog_sync(spa_t *spa, uint64_t txg);
@@ -1165,6 +1166,13 @@ extern void spa_swap_errlog(spa_t *spa, uint64_t new_head_ds,
11651166
extern void sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj,
11661167
dmu_tx_t *tx);
11671168
extern void spa_upgrade_errlog(spa_t *spa, dmu_tx_t *tx);
1169+
extern int find_top_affected_fs(spa_t *spa, uint64_t head_ds,
1170+
zbookmark_err_phys_t *zep, uint64_t *top_affected_fs);
1171+
extern int find_birth_txg(struct dsl_dataset *ds, zbookmark_err_phys_t *zep,
1172+
uint64_t *birth_txg);
1173+
extern void zep_to_zb(uint64_t dataset, zbookmark_err_phys_t *zep,
1174+
zbookmark_phys_t *zb);
1175+
extern void name_to_errphys(char *buf, zbookmark_err_phys_t *zep);
11681176

11691177
/* vdev cache */
11701178
extern void vdev_cache_stat_init(void);

include/sys/spa_impl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,10 @@ struct spa {
295295
uint64_t spa_scan_pass_exam; /* examined bytes per pass */
296296
uint64_t spa_scan_pass_issued; /* issued bytes per pass */
297297

298+
/* error scrub pause time in milliseconds */
299+
uint64_t spa_scan_pass_errorscrub_pause;
300+
/* total error scrub paused time in milliseconds */
301+
uint64_t spa_scan_pass_errorscrub_spent_paused;
298302
/*
299303
* We are in the middle of a resilver, and another resilver
300304
* is needed once this one completes. This is set iff any

include/sys/sysevent/eventdefs.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ extern "C" {
123123
#define ESC_ZFS_TRIM_CANCEL "trim_cancel"
124124
#define ESC_ZFS_TRIM_RESUME "trim_resume"
125125
#define ESC_ZFS_TRIM_SUSPEND "trim_suspend"
126+
#define ESC_ZFS_ERRORSCRUB_START "errorscrub_start"
127+
#define ESC_ZFS_ERRORSCRUB_FINISH "errorscrub_finish"
128+
#define ESC_ZFS_ERRORSCRUB_ABORT "errorscrub_abort"
129+
#define ESC_ZFS_ERRORSCRUB_RESUME "errorscrub_resume"
130+
#define ESC_ZFS_ERRORSCRUB_PAUSED "errorscrub_paused"
126131

127132
/*
128133
* datalink subclass definitions.

lib/libzfs/libzfs.abi

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5717,7 +5717,8 @@
57175717
<enumerator name='POOL_SCAN_NONE' value='0'/>
57185718
<enumerator name='POOL_SCAN_SCRUB' value='1'/>
57195719
<enumerator name='POOL_SCAN_RESILVER' value='2'/>
5720-
<enumerator name='POOL_SCAN_FUNCS' value='3'/>
5720+
<enumerator name='POOL_SCAN_ERRORSCRUB' value='3'/>
5721+
<enumerator name='POOL_SCAN_FUNCS' value='4'/>
57215722
</enum-decl>
57225723
<typedef-decl name='pool_scan_func_t' type-id='1b092565' id='7313fbe2'/>
57235724
<enum-decl name='pool_scrub_cmd' id='a1474cbd'>

0 commit comments

Comments
 (0)