Skip to content

Commit ff7bd26

Browse files
committed
Teach zpool scrub to scrub only blocks in error log
Added a flag '-e' in zpool scrub to scrub only blocks in error log. A user can pause, resume and cancel the error scrub by passing additional command line arguments -p -s just like a regular scrub. This involves adding a new flag, creating new libzfs interfaces, a new ioctl, and the actual iteration and read-issuing logic. Error scrubbing is executed in multiple txg to make sure pool performance is not affected. Co-authored-by: TulsiJain [email protected] Signed-off-by: George Amanakis <[email protected]>
1 parent 4eca03f commit ff7bd26

29 files changed

+1601
-71
lines changed

cmd/zpool/zpool_main.c

Lines changed: 101 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ get_usage(zpool_help_t idx)
401401
return (gettext("\tinitialize [-c | -s] [-w] <pool> "
402402
"[<device> ...]\n"));
403403
case HELP_SCRUB:
404-
return (gettext("\tscrub [-s | -p] [-w] <pool> ...\n"));
404+
return (gettext("\tscrub [-s | -p] [-w] [-e] <pool> ...\n"));
405405
case HELP_RESILVER:
406406
return (gettext("\tresilver <pool> ...\n"));
407407
case HELP_TRIM:
@@ -7297,8 +7297,9 @@ wait_callback(zpool_handle_t *zhp, void *data)
72977297
}
72987298

72997299
/*
7300-
* zpool scrub [-s | -p] [-w] <pool> ...
7300+
* zpool scrub [-s | -p] [-w] [-e] <pool> ...
73017301
*
7302+
* -e Only scrub blocks in the error log.
73027303
* -s Stop. Stops any in-progress scrub.
73037304
* -p Pause. Pause in-progress scrub.
73047305
* -w Wait. Blocks until scrub has completed.
@@ -7314,14 +7315,21 @@ zpool_do_scrub(int argc, char **argv)
73147315
cb.cb_type = POOL_SCAN_SCRUB;
73157316
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
73167317

7318+
boolean_t is_error_scrub = B_FALSE;
7319+
boolean_t is_pause = B_FALSE;
7320+
boolean_t is_stop = B_FALSE;
7321+
73177322
/* check options */
7318-
while ((c = getopt(argc, argv, "spw")) != -1) {
7323+
while ((c = getopt(argc, argv, "spwe")) != -1) {
73197324
switch (c) {
7325+
case 'e':
7326+
is_error_scrub = B_TRUE;
7327+
break;
73207328
case 's':
7321-
cb.cb_type = POOL_SCAN_NONE;
7329+
is_stop = B_TRUE;
73227330
break;
73237331
case 'p':
7324-
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
7332+
is_pause = B_TRUE;
73257333
break;
73267334
case 'w':
73277335
wait = B_TRUE;
@@ -7333,11 +7341,21 @@ zpool_do_scrub(int argc, char **argv)
73337341
}
73347342
}
73357343

7336-
if (cb.cb_type == POOL_SCAN_NONE &&
7337-
cb.cb_scrub_cmd == POOL_SCRUB_PAUSE) {
7338-
(void) fprintf(stderr, gettext("invalid option combination: "
7339-
"-s and -p are mutually exclusive\n"));
7344+
if (is_pause && is_stop) {
7345+
(void) fprintf(stderr, gettext("invalid option "
7346+
"combination :-s and -p are mutually exclusive\n"));
73407347
usage(B_FALSE);
7348+
} else {
7349+
if (is_error_scrub)
7350+
cb.cb_type = POOL_SCAN_ERRORSCRUB;
7351+
7352+
if (is_pause) {
7353+
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
7354+
} else if (is_stop) {
7355+
cb.cb_type = POOL_SCAN_NONE;
7356+
} else {
7357+
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
7358+
}
73417359
}
73427360

73437361
if (wait && (cb.cb_type == POOL_SCAN_NONE ||
@@ -7561,6 +7579,70 @@ secs_to_dhms(uint64_t total, char *buf)
75617579
}
75627580
}
75637581

7582+
/*
7583+
* Print out detailed error scrub status.
7584+
*/
7585+
static void
7586+
print_err_scrub_status(pool_scan_stat_t *ps)
7587+
{
7588+
time_t start, end, pause;
7589+
uint64_t total_secs_left;
7590+
uint64_t secs_left, mins_left, hours_left, days_left;
7591+
uint64_t examined, to_be_examined;
7592+
7593+
if (ps == NULL || ps->pss_error_scrub_func != POOL_SCAN_ERRORSCRUB) {
7594+
return;
7595+
}
7596+
7597+
(void) printf(gettext(" scrub: "));
7598+
7599+
start = ps->pss_error_scrub_start;
7600+
end = ps->pss_error_scrub_end;
7601+
pause = ps->pss_pass_error_scrub_pause;
7602+
examined = ps->pss_error_scrub_examined;
7603+
to_be_examined = ps->pss_error_scrub_to_be_examined;
7604+
7605+
assert(ps->pss_error_scrub_func == POOL_SCAN_ERRORSCRUB);
7606+
7607+
if (ps->pss_error_scrub_state == DSS_FINISHED) {
7608+
total_secs_left = end - start;
7609+
days_left = total_secs_left / 60 / 60 / 24;
7610+
hours_left = (total_secs_left / 60 / 60) % 24;
7611+
mins_left = (total_secs_left / 60) % 60;
7612+
secs_left = (total_secs_left % 60);
7613+
7614+
(void) printf(gettext("scrubbed %llu error blocks in %llu days "
7615+
"%02llu:%02llu:%02llu on %s"), (u_longlong_t)examined,
7616+
(u_longlong_t)days_left, (u_longlong_t)hours_left,
7617+
(u_longlong_t)mins_left, (u_longlong_t)secs_left,
7618+
ctime(&end));
7619+
7620+
return;
7621+
} else if (ps->pss_error_scrub_state == DSS_CANCELED) {
7622+
(void) printf(gettext("error scrub canceled on %s"),
7623+
ctime(&end));
7624+
return;
7625+
}
7626+
assert(ps->pss_error_scrub_state == DSS_ERRORSCRUBBING);
7627+
7628+
/* Error scrub is in progress. */
7629+
if (pause == 0) {
7630+
(void) printf(gettext("error scrub in progress since %s"),
7631+
ctime(&start));
7632+
} else {
7633+
(void) printf(gettext("error scrub paused since %s"),
7634+
ctime(&pause));
7635+
(void) printf(gettext("\terror scrub started on %s"),
7636+
ctime(&start));
7637+
}
7638+
7639+
double fraction_done = (double)examined / (to_be_examined + examined);
7640+
(void) printf(gettext("\t%.2f%% done, issued I/O for %llu error"
7641+
" blocks"), 100 * fraction_done, (u_longlong_t)examined);
7642+
7643+
(void) printf("\n");
7644+
}
7645+
75647646
/*
75657647
* Print out detailed scrub status.
75667648
*/
@@ -7897,10 +7979,12 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)
78977979
{
78987980
uint64_t rebuild_end_time = 0, resilver_end_time = 0;
78997981
boolean_t have_resilver = B_FALSE, have_scrub = B_FALSE;
7982+
boolean_t have_errorscrub = B_FALSE;
79007983
boolean_t active_resilver = B_FALSE;
79017984
pool_checkpoint_stat_t *pcs = NULL;
79027985
pool_scan_stat_t *ps = NULL;
79037986
uint_t c;
7987+
time_t scrub_start = 0, errorscrub_start = 0;
79047988

79057989
if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS,
79067990
(uint64_t **)&ps, &c) == 0) {
@@ -7909,16 +7993,23 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)
79097993
active_resilver = (ps->pss_state == DSS_SCANNING);
79107994
}
79117995

7996+
79127997
have_resilver = (ps->pss_func == POOL_SCAN_RESILVER);
79137998
have_scrub = (ps->pss_func == POOL_SCAN_SCRUB);
7999+
scrub_start = ps->pss_start_time;
8000+
have_errorscrub = (ps->pss_error_scrub_func ==
8001+
POOL_SCAN_ERRORSCRUB);
8002+
errorscrub_start = ps->pss_error_scrub_start;
79148003
}
79158004

79168005
boolean_t active_rebuild = check_rebuilding(nvroot, &rebuild_end_time);
79178006
boolean_t have_rebuild = (active_rebuild || (rebuild_end_time > 0));
79188007

79198008
/* Always print the scrub status when available. */
7920-
if (have_scrub)
8009+
if (have_scrub && scrub_start > errorscrub_start)
79218010
print_scan_scrub_resilver_status(ps);
8011+
else if (have_errorscrub && errorscrub_start >= scrub_start)
8012+
print_err_scrub_status(ps);
79228013

79238014
/*
79248015
* When there is an active resilver or rebuild print its status.

include/libzfs.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,14 @@ typedef enum zfs_error {
125125
EZFS_THREADCREATEFAILED, /* thread create failed */
126126
EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */
127127
EZFS_SCRUBBING, /* currently scrubbing */
128+
EZFS_ERRORSCRUBBING, /* currently error scrubbing */
129+
EZFS_ERRORSCRUB_PAUSED, /* error scrub currently paused */
128130
EZFS_NO_SCRUB, /* no active scrub */
129131
EZFS_DIFF, /* general failure of zfs diff */
130132
EZFS_DIFFDATA, /* bad zfs diff data */
131133
EZFS_POOLREADONLY, /* pool is in read-only mode */
132134
EZFS_SCRUB_PAUSED, /* scrub currently paused */
135+
EZFS_SCRUB_PAUSED_TO_CANCEL, /* scrub currently paused */
133136
EZFS_ACTIVE_POOL, /* pool is imported on a different system */
134137
EZFS_CRYPTOFAILED, /* failed to setup encryption */
135138
EZFS_NO_PENDING, /* cannot cancel, no operation is pending */

include/libzfs_core.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ _LIBZFS_CORE_H int lzc_get_bootenv(const char *, nvlist_t **);
155155
_LIBZFS_CORE_H int lzc_get_vdev_prop(const char *, nvlist_t *, nvlist_t **);
156156
_LIBZFS_CORE_H int lzc_set_vdev_prop(const char *, nvlist_t *, nvlist_t **);
157157

158+
_LIBZFS_CORE_H int lzc_scrub(zfs_ioc_t, const char *, nvlist_t *, nvlist_t **);
159+
158160
#ifdef __cplusplus
159161
}
160162
#endif

include/sys/dmu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,7 @@ typedef struct dmu_buf {
378378
#define DMU_POOL_DDT_STATS "DDT-statistics"
379379
#define DMU_POOL_CREATION_VERSION "creation_version"
380380
#define DMU_POOL_SCAN "scan"
381+
#define DMU_POOL_ERRORSCRUB "error_scrub"
381382
#define DMU_POOL_FREE_BPOBJ "free_bpobj"
382383
#define DMU_POOL_BPTREE_OBJ "bptree_obj"
383384
#define DMU_POOL_EMPTY_BPOBJ "empty_bpobj"

include/sys/dsl_scan.h

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
#include <sys/zfs_context.h>
3131
#include <sys/zio.h>
32+
#include <sys/zap.h>
3233
#include <sys/ddt.h>
3334
#include <sys/bplist.h>
3435

@@ -78,6 +79,21 @@ typedef enum dsl_scan_flags {
7879

7980
#define DSL_SCAN_FLAGS_MASK (DSF_VISIT_DS_AGAIN)
8081

82+
typedef struct dsl_errorscrub_phys {
83+
uint64_t dep_func; /* pool_scan_func_t */
84+
uint64_t dep_state; /* dsl_scan_state_t */
85+
uint64_t dep_cursor; /* serialized zap cursor for tracing progress */
86+
uint64_t dep_start_time; /* error scrub start time, unix timestamp */
87+
uint64_t dep_end_time; /* error scrub end time, unix timestamp */
88+
uint64_t dep_to_examine; /* total error blocks to be scrubbed */
89+
uint64_t dep_examined; /* blocks scrubbed so far */
90+
uint64_t dep_errors; /* error scrub I/O error count */
91+
uint64_t dep_paused_flags; /* flag for paused */
92+
} dsl_errorscrub_phys_t;
93+
94+
#define ERRORSCRUB_PHYS_NUMINTS (sizeof (dsl_errorscrub_phys_t) \
95+
/ sizeof (uint64_t))
96+
8197
/*
8298
* Every pool will have one dsl_scan_t and this structure will contain
8399
* in-memory information about the scan and a pointer to the on-disk
@@ -151,11 +167,15 @@ typedef struct dsl_scan {
151167
uint64_t scn_avg_zio_size_this_txg;
152168
uint64_t scn_zios_this_txg;
153169

170+
/* zap cursor for tracing error scrub progress */
171+
zap_cursor_t errorscrub_cursor;
154172
/* members needed for syncing scan status to disk */
155173
dsl_scan_phys_t scn_phys; /* on disk representation of scan */
156174
dsl_scan_phys_t scn_phys_cached;
157175
avl_tree_t scn_queue; /* queue of datasets to scan */
158176
uint64_t scn_queues_pending; /* outstanding data to issue */
177+
/* members needed for syncing error scrub status to disk */
178+
dsl_errorscrub_phys_t errorscrub_phys;
159179
} dsl_scan_t;
160180

161181
typedef struct dsl_scan_io_queue dsl_scan_io_queue_t;
@@ -171,8 +191,12 @@ int dsl_scan_cancel(struct dsl_pool *);
171191
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
172192
void dsl_scan_assess_vdev(struct dsl_pool *dp, vdev_t *vd);
173193
boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
174-
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
194+
boolean_t dsl_errorscrubbing(const struct dsl_pool *dp);
195+
boolean_t dsl_errorscrub_active(dsl_scan_t *scn);
175196
void dsl_scan_restart_resilver(struct dsl_pool *, uint64_t txg);
197+
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp,
198+
pool_scrub_cmd_t cmd);
199+
void dsl_errorscrub_sync(struct dsl_pool *, dmu_tx_t *);
176200
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
177201
boolean_t dsl_scan_resilver_scheduled(struct dsl_pool *dp);
178202
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
@@ -184,6 +208,7 @@ void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
184208
struct dmu_tx *tx);
185209
boolean_t dsl_scan_active(dsl_scan_t *scn);
186210
boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn);
211+
boolean_t dsl_errorscrub_is_paused(const dsl_scan_t *scn);
187212
void dsl_scan_freed(spa_t *spa, const blkptr_t *bp);
188213
void dsl_scan_io_queue_destroy(dsl_scan_io_queue_t *queue);
189214
void dsl_scan_io_queue_vdev_xfer(vdev_t *svd, vdev_t *tvd);

include/sys/fs/zfs.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1036,6 +1036,7 @@ typedef enum pool_scan_func {
10361036
POOL_SCAN_NONE,
10371037
POOL_SCAN_SCRUB,
10381038
POOL_SCAN_RESILVER,
1039+
POOL_SCAN_ERRORSCRUB,
10391040
POOL_SCAN_FUNCS
10401041
} pool_scan_func_t;
10411042

@@ -1099,6 +1100,20 @@ typedef struct pool_scan_stat {
10991100
uint64_t pss_pass_scrub_spent_paused;
11001101
uint64_t pss_pass_issued; /* issued bytes per scan pass */
11011102
uint64_t pss_issued; /* total bytes checked by scanner */
1103+
1104+
/* error scrub values stored on disk */
1105+
uint64_t pss_error_scrub_func; /* pool_scan_func_t */
1106+
uint64_t pss_error_scrub_state; /* dsl_scan_state_t */
1107+
uint64_t pss_error_scrub_start; /* error scrub start time */
1108+
uint64_t pss_error_scrub_end; /* error scrub end time */
1109+
uint64_t pss_error_scrub_examined; /* error blocks issued I/O */
1110+
/* error blocks to be issued I/O */
1111+
uint64_t pss_error_scrub_to_be_examined;
1112+
1113+
/* error scrub values not stored on disk */
1114+
/* error scrub pause time in milliseconds */
1115+
uint64_t pss_pass_error_scrub_pause;
1116+
11021117
} pool_scan_stat_t;
11031118

11041119
typedef struct pool_removal_stat {
@@ -1120,6 +1135,7 @@ typedef enum dsl_scan_state {
11201135
DSS_SCANNING,
11211136
DSS_FINISHED,
11221137
DSS_CANCELED,
1138+
DSS_ERRORSCRUBBING,
11231139
DSS_NUM_STATES
11241140
} dsl_scan_state_t;
11251141

@@ -1359,7 +1375,7 @@ typedef enum {
13591375
*/
13601376
typedef enum zfs_ioc {
13611377
/*
1362-
* Core features - 81/128 numbers reserved.
1378+
* Core features - 88/128 numbers reserved.
13631379
*/
13641380
#ifdef __FreeBSD__
13651381
ZFS_IOC_FIRST = 0,
@@ -1454,6 +1470,7 @@ typedef enum zfs_ioc {
14541470
ZFS_IOC_WAIT_FS, /* 0x5a54 */
14551471
ZFS_IOC_VDEV_GET_PROPS, /* 0x5a55 */
14561472
ZFS_IOC_VDEV_SET_PROPS, /* 0x5a56 */
1473+
ZFS_IOC_POOL_SCRUB, /* 0x5a57 */
14571474

14581475
/*
14591476
* Per-platform (Optional) - 8/128 numbers reserved.

include/sys/spa.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1154,6 +1154,7 @@ extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
11541154
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
11551155
extern uint64_t spa_approx_errlog_size(spa_t *spa);
11561156
extern int spa_get_errlog(spa_t *spa, void *uaddr, uint64_t *count);
1157+
extern uint64_t spa_get_last_errlog_size(spa_t *spa);
11571158
extern void spa_errlog_rotate(spa_t *spa);
11581159
extern void spa_errlog_drain(spa_t *spa);
11591160
extern void spa_errlog_sync(spa_t *spa, uint64_t txg);
@@ -1164,6 +1165,13 @@ extern void spa_swap_errlog(spa_t *spa, uint64_t new_head_ds,
11641165
extern void sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj,
11651166
dmu_tx_t *tx);
11661167
extern void spa_upgrade_errlog(spa_t *spa, dmu_tx_t *tx);
1168+
extern int find_top_affected_fs(spa_t *spa, uint64_t head_ds,
1169+
zbookmark_err_phys_t *zep, uint64_t *top_affected_fs);
1170+
extern int find_birth_txg(struct dsl_dataset *ds, zbookmark_err_phys_t *zep,
1171+
uint64_t *birth_txg);
1172+
extern void zep_to_zb(uint64_t dataset, zbookmark_err_phys_t *zep,
1173+
zbookmark_phys_t *zb);
1174+
extern void name_to_errphys(char *buf, zbookmark_err_phys_t *zep);
11671175

11681176
/* vdev cache */
11691177
extern void vdev_cache_stat_init(void);

include/sys/spa_impl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,10 @@ struct spa {
295295
uint64_t spa_scan_pass_exam; /* examined bytes per pass */
296296
uint64_t spa_scan_pass_issued; /* issued bytes per pass */
297297

298+
/* error scrub pause time in milliseconds */
299+
uint64_t spa_scan_pass_errorscrub_pause;
300+
/* total error scrub paused time in milliseconds */
301+
uint64_t spa_scan_pass_errorscrub_spent_paused;
298302
/*
299303
* We are in the middle of a resilver, and another resilver
300304
* is needed once this one completes. This is set iff any

include/sys/sysevent/eventdefs.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ extern "C" {
123123
#define ESC_ZFS_TRIM_CANCEL "trim_cancel"
124124
#define ESC_ZFS_TRIM_RESUME "trim_resume"
125125
#define ESC_ZFS_TRIM_SUSPEND "trim_suspend"
126+
#define ESC_ZFS_ERRORSCRUB_START "errorscrub_start"
127+
#define ESC_ZFS_ERRORSCRUB_FINISH "errorscrub_finish"
128+
#define ESC_ZFS_ERRORSCRUB_ABORT "errorscrub_abort"
129+
#define ESC_ZFS_ERRORSCRUB_RESUME "errorscrub_resume"
130+
#define ESC_ZFS_ERRORSCRUB_PAUSED "errorscrub_paused"
126131

127132
/*
128133
* datalink subclass definitions.

lib/libzfs/libzfs.abi

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5717,7 +5717,8 @@
57175717
<enumerator name='POOL_SCAN_NONE' value='0'/>
57185718
<enumerator name='POOL_SCAN_SCRUB' value='1'/>
57195719
<enumerator name='POOL_SCAN_RESILVER' value='2'/>
5720-
<enumerator name='POOL_SCAN_FUNCS' value='3'/>
5720+
<enumerator name='POOL_SCAN_ERRORSCRUB' value='3'/>
5721+
<enumerator name='POOL_SCAN_FUNCS' value='4'/>
57215722
</enum-decl>
57225723
<typedef-decl name='pool_scan_func_t' type-id='1b092565' id='7313fbe2'/>
57235724
<enum-decl name='pool_scrub_cmd' id='a1474cbd'>

0 commit comments

Comments
 (0)