Skip to content

Commit e121723

Browse files
committed
Teach zpool scrub to scrub only blocks in error log
Added a flag -e in zpool scrub to scrub only blocks in error log. A user can pause, resume and cancel the error scrub by passing additional command line arguments -p -s just like a regular scrub. This involves adding a new flag, creating new libzfs interfaces, a new ioctl, and the actual iteration and read-issuing logic. Error scrubbing is executed in multiple txg to make sure pool performance is not affected. Original-patch-by: TulsiJain <tulsi.jain@delphix.com> Signed-off-by: George Amanakis <gamanakis@gmail.com>
1 parent 07a4c76 commit e121723

File tree

26 files changed

+1155
-58
lines changed

26 files changed

+1155
-58
lines changed

cmd/zpool/zpool_main.c

Lines changed: 102 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,7 @@ get_usage(zpool_help_t idx)
395395
return (gettext("\tinitialize [-c | -s] [-w] <pool> "
396396
"[<device> ...]\n"));
397397
case HELP_SCRUB:
398-
return (gettext("\tscrub [-s | -p] [-w] <pool> ...\n"));
398+
return (gettext("\tscrub [-s | -p] [-w] [-e] <pool> ...\n"));
399399
case HELP_RESILVER:
400400
return (gettext("\tresilver <pool> ...\n"));
401401
case HELP_TRIM:
@@ -7145,8 +7145,9 @@ wait_callback(zpool_handle_t *zhp, void *data)
71457145
}
71467146

71477147
/*
7148-
* zpool scrub [-s | -p] [-w] <pool> ...
7148+
* zpool scrub [-s | -p] [-w] [-e] <pool> ...
71497149
*
7150+
* -e Only scrub blocks in the error log.
71507151
* -s Stop. Stops any in-progress scrub.
71517152
* -p Pause. Pause in-progress scrub.
71527153
* -w Wait. Blocks until scrub has completed.
@@ -7162,14 +7163,21 @@ zpool_do_scrub(int argc, char **argv)
71627163
cb.cb_type = POOL_SCAN_SCRUB;
71637164
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
71647165

7166+
boolean_t is_error_scrub = B_FALSE;
7167+
boolean_t is_pause = B_FALSE;
7168+
boolean_t is_stop = B_FALSE;
7169+
71657170
/* check options */
7166-
while ((c = getopt(argc, argv, "spw")) != -1) {
7171+
while ((c = getopt(argc, argv, "spwe")) != -1) {
71677172
switch (c) {
7173+
case 'e':
7174+
is_error_scrub = B_TRUE;
7175+
break;
71687176
case 's':
7169-
cb.cb_type = POOL_SCAN_NONE;
7177+
is_stop = B_TRUE;
71707178
break;
71717179
case 'p':
7172-
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
7180+
is_pause = B_TRUE;
71737181
break;
71747182
case 'w':
71757183
wait = B_TRUE;
@@ -7181,11 +7189,27 @@ zpool_do_scrub(int argc, char **argv)
71817189
}
71827190
}
71837191

7184-
if (cb.cb_type == POOL_SCAN_NONE &&
7185-
cb.cb_scrub_cmd == POOL_SCRUB_PAUSE) {
7186-
(void) fprintf(stderr, gettext("invalid option combination: "
7187-
"-s and -p are mutually exclusive\n"));
7192+
if (is_pause && is_stop) {
7193+
(void) fprintf(stderr, gettext("invalid option "
7194+
"combination :-s and -p are mutually exclusive\n"));
71887195
usage(B_FALSE);
7196+
} else {
7197+
if (is_error_scrub) {
7198+
cb.cb_type = POOL_ERRORSCRUB;
7199+
if (is_pause) {
7200+
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
7201+
} else if (is_stop) {
7202+
cb.cb_scrub_cmd = POOL_ERRORSCRUB_STOP;
7203+
} else {
7204+
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
7205+
}
7206+
} else {
7207+
if (is_pause) {
7208+
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
7209+
} else if (is_stop) {
7210+
cb.cb_type = POOL_SCAN_NONE;
7211+
}
7212+
}
71897213
}
71907214

71917215
if (wait && (cb.cb_type == POOL_SCAN_NONE ||
@@ -7408,6 +7432,70 @@ secs_to_dhms(uint64_t total, char *buf)
74087432
}
74097433
}
74107434

7435+
/*
7436+
* Print out detailed error scrub status.
7437+
*/
7438+
static void
7439+
print_err_scrub_status(pool_scan_stat_t *ps)
7440+
{
7441+
time_t start, end, pause;
7442+
uint64_t total_secs_left;
7443+
uint64_t secs_left, mins_left, hours_left, days_left;
7444+
uint64_t examined, to_be_examined;
7445+
7446+
if (ps == NULL || ps->pss_error_scrub_func != POOL_ERRORSCRUB) {
7447+
return;
7448+
}
7449+
7450+
(void) printf(gettext(" scrub: "));
7451+
7452+
start = ps->pss_error_scrub_start;
7453+
end = ps->pss_error_scrub_end;
7454+
pause = ps->pss_pass_error_scrub_pause;
7455+
examined = ps->pss_error_scrub_examined;
7456+
to_be_examined = ps->pss_error_scrub_to_be_examined;
7457+
7458+
assert(ps->pss_error_scrub_func == POOL_ERRORSCRUB);
7459+
7460+
if (ps->pss_error_scrub_state == DSS_FINISHED) {
7461+
total_secs_left = end - start;
7462+
days_left = total_secs_left / 60 / 60 / 24;
7463+
hours_left = (total_secs_left / 60 / 60) % 24;
7464+
mins_left = (total_secs_left / 60) % 60;
7465+
secs_left = (total_secs_left % 60);
7466+
7467+
(void) printf(gettext("scrubbed %llu error blocks in %llu days "
7468+
"%02llu:%02llu:%02llu on %s"), (u_longlong_t)examined,
7469+
(u_longlong_t)days_left, (u_longlong_t)hours_left,
7470+
(u_longlong_t)mins_left, (u_longlong_t)secs_left,
7471+
ctime(&end));
7472+
7473+
return;
7474+
} else if (ps->pss_error_scrub_state == DSS_CANCELED) {
7475+
(void) printf(gettext("error scrub canceled on %s"),
7476+
ctime(&end));
7477+
return;
7478+
}
7479+
assert(ps->pss_error_scrub_state == DSS_ERRORSCRUBBING);
7480+
7481+
/* Error scrub is in progress. */
7482+
if (pause == 0) {
7483+
(void) printf(gettext("error scrub in progress since %s"),
7484+
ctime(&start));
7485+
} else {
7486+
(void) printf(gettext("error scrub paused since %s"),
7487+
ctime(&pause));
7488+
(void) printf(gettext("\terror scrub started on %s"),
7489+
ctime(&start));
7490+
}
7491+
7492+
double fraction_done = (double)examined / (to_be_examined + examined);
7493+
(void) printf(gettext("\t%.2f%% done, issued I/O for %llu error"
7494+
" blocks"), 100 * fraction_done, (u_longlong_t)examined);
7495+
7496+
(void) printf("\n");
7497+
}
7498+
74117499
/*
74127500
* Print out detailed scrub status.
74137501
*/
@@ -7733,6 +7821,7 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)
77337821
{
77347822
uint64_t rebuild_end_time = 0, resilver_end_time = 0;
77357823
boolean_t have_resilver = B_FALSE, have_scrub = B_FALSE;
7824+
boolean_t have_errorscrub = B_FALSE;
77367825
boolean_t active_resilver = B_FALSE;
77377826
pool_checkpoint_stat_t *pcs = NULL;
77387827
pool_scan_stat_t *ps = NULL;
@@ -7747,6 +7836,7 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)
77477836

77487837
have_resilver = (ps->pss_func == POOL_SCAN_RESILVER);
77497838
have_scrub = (ps->pss_func == POOL_SCAN_SCRUB);
7839+
have_errorscrub = (ps->pss_error_scrub_func == POOL_ERRORSCRUB);
77507840
}
77517841

77527842
boolean_t active_rebuild = check_rebuilding(nvroot, &rebuild_end_time);
@@ -7755,6 +7845,8 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)
77557845
/* Always print the scrub status when available. */
77567846
if (have_scrub)
77577847
print_scan_scrub_resilver_status(ps);
7848+
if (have_errorscrub)
7849+
print_err_scrub_status(ps);
77587850

77597851
/*
77607852
* When there is an active resilver or rebuild print its status.
@@ -8463,6 +8555,7 @@ status_callback(zpool_handle_t *zhp, void *data)
84638555

84648556
(void) nvlist_lookup_uint64_array(nvroot,
84658557
ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t **)&prs, &c);
8558+
84668559
print_removal_status(zhp, prs);
84678560

84688561
(void) nvlist_lookup_uint64_array(nvroot,

include/libzfs.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,15 @@ typedef enum zfs_error {
126126
EZFS_THREADCREATEFAILED, /* thread create failed */
127127
EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */
128128
EZFS_SCRUBBING, /* currently scrubbing */
129+
EZFS_ERRORSCRUBBING, /* currently error scrubbing */
130+
EZFS_ERRORSCRUB_PAUSED, /* error scrub currently paused */
129131
EZFS_NO_SCRUB, /* no active scrub */
132+
EZFS_NO_ERRORSCRUB, /* no active error scrub */
130133
EZFS_DIFF, /* general failure of zfs diff */
131134
EZFS_DIFFDATA, /* bad zfs diff data */
132135
EZFS_POOLREADONLY, /* pool is in read-only mode */
133136
EZFS_SCRUB_PAUSED, /* scrub currently paused */
137+
EZFS_SCRUB_PAUSED_TO_CANCEL, /* scrub currently paused */
134138
EZFS_ACTIVE_POOL, /* pool is imported on a different system */
135139
EZFS_CRYPTOFAILED, /* failed to setup encryption */
136140
EZFS_NO_PENDING, /* cannot cancel, no operation is pending */

include/libzfs_core.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,9 @@ _LIBZFS_CORE_H int lzc_wait_fs(const char *, zfs_wait_activity_t, boolean_t *);
143143

144144
_LIBZFS_CORE_H int lzc_set_bootenv(const char *, const nvlist_t *);
145145
_LIBZFS_CORE_H int lzc_get_bootenv(const char *, nvlist_t **);
146+
147+
_LIBZFS_CORE_H int lzc_scrub(zfs_ioc_t, const char *, nvlist_t *, nvlist_t **);
148+
146149
#ifdef __cplusplus
147150
}
148151
#endif

include/sys/dmu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,7 @@ typedef struct dmu_buf {
370370
#define DMU_POOL_DDT_STATS "DDT-statistics"
371371
#define DMU_POOL_CREATION_VERSION "creation_version"
372372
#define DMU_POOL_SCAN "scan"
373+
#define DMU_POOL_ERRORSCRUB "error_scrub"
373374
#define DMU_POOL_FREE_BPOBJ "free_bpobj"
374375
#define DMU_POOL_BPTREE_OBJ "bptree_obj"
375376
#define DMU_POOL_EMPTY_BPOBJ "empty_bpobj"

include/sys/dsl_scan.h

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
#include <sys/zfs_context.h>
3131
#include <sys/zio.h>
32+
#include <sys/zap.h>
3233
#include <sys/ddt.h>
3334
#include <sys/bplist.h>
3435

@@ -78,6 +79,21 @@ typedef enum dsl_scan_flags {
7879

7980
#define DSL_SCAN_FLAGS_MASK (DSF_VISIT_DS_AGAIN)
8081

82+
typedef struct dsl_errorscrub_phys {
83+
uint64_t dep_func; /* pool_scan_func_t */
84+
uint64_t dep_state; /* dsl_scan_state_t */
85+
uint64_t dep_cursor; /* serialized zap cursor for tracing progress */
86+
uint64_t dep_start_time; /* error scrub start time, unix timestamp */
87+
uint64_t dep_end_time; /* error scrub end time, unix timestamp */
88+
uint64_t dep_to_examine; /* total error blocks to be scrubbed */
89+
uint64_t dep_examined; /* blocks scrubbed so far */
90+
uint64_t dep_errors; /* error scrub I/O error count */
91+
uint64_t dep_paused_flags; /* flag for paused */
92+
} dsl_errorscrub_phys_t;
93+
94+
#define ERRORSCRUB_PHYS_NUMINTS (sizeof (dsl_errorscrub_phys_t) \
95+
/ sizeof (uint64_t))
96+
8197
/*
8298
* Every pool will have one dsl_scan_t and this structure will contain
8399
* in-memory information about the scan and a pointer to the on-disk
@@ -151,11 +167,16 @@ typedef struct dsl_scan {
151167
uint64_t scn_avg_zio_size_this_txg;
152168
uint64_t scn_zios_this_txg;
153169

170+
/* zap cursor for tracing error scrub progress */
171+
zap_cursor_t errorscrub_cursor;
154172
/* members needed for syncing scan status to disk */
155173
dsl_scan_phys_t scn_phys; /* on disk representation of scan */
156174
dsl_scan_phys_t scn_phys_cached;
157175
avl_tree_t scn_queue; /* queue of datasets to scan */
158176
uint64_t scn_bytes_pending; /* outstanding data to issue */
177+
178+
/* members needed for syncing error scrub status to disk */
179+
dsl_errorscrub_phys_t errorscrub_phys;
159180
} dsl_scan_t;
160181

161182
typedef struct dsl_scan_io_queue dsl_scan_io_queue_t;
@@ -167,12 +188,16 @@ int dsl_scan_setup_check(void *, dmu_tx_t *);
167188
void dsl_scan_setup_sync(void *, dmu_tx_t *);
168189
void dsl_scan_fini(struct dsl_pool *dp);
169190
void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
170-
int dsl_scan_cancel(struct dsl_pool *);
191+
int dsl_scan_cancel(struct dsl_pool *, pool_scan_func_t func);
171192
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
172193
void dsl_scan_assess_vdev(struct dsl_pool *dp, vdev_t *vd);
173194
boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
174-
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
195+
boolean_t dsl_errorscrubbing(const struct dsl_pool *dp);
196+
boolean_t dsl_errorscrub_active(dsl_scan_t *scn);
175197
void dsl_scan_restart_resilver(struct dsl_pool *, uint64_t txg);
198+
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp,
199+
pool_scrub_cmd_t cmd, pool_scan_func_t func);
200+
void dsl_errorscrub_sync(struct dsl_pool *, dmu_tx_t *);
176201
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
177202
boolean_t dsl_scan_resilver_scheduled(struct dsl_pool *dp);
178203
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
@@ -184,6 +209,7 @@ void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
184209
struct dmu_tx *tx);
185210
boolean_t dsl_scan_active(dsl_scan_t *scn);
186211
boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn);
212+
boolean_t dsl_errorscrub_is_paused(const dsl_scan_t *scn);
187213
void dsl_scan_freed(spa_t *spa, const blkptr_t *bp);
188214
void dsl_scan_io_queue_destroy(dsl_scan_io_queue_t *queue);
189215
void dsl_scan_io_queue_vdev_xfer(vdev_t *svd, vdev_t *tvd);

include/sys/fs/zfs.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -942,6 +942,7 @@ typedef enum pool_scan_func {
942942
POOL_SCAN_NONE,
943943
POOL_SCAN_SCRUB,
944944
POOL_SCAN_RESILVER,
945+
POOL_ERRORSCRUB,
945946
POOL_SCAN_FUNCS
946947
} pool_scan_func_t;
947948

@@ -951,6 +952,7 @@ typedef enum pool_scan_func {
951952
typedef enum pool_scrub_cmd {
952953
POOL_SCRUB_NORMAL = 0,
953954
POOL_SCRUB_PAUSE,
955+
POOL_ERRORSCRUB_STOP,
954956
POOL_SCRUB_FLAGS_END
955957
} pool_scrub_cmd_t;
956958

@@ -1005,6 +1007,20 @@ typedef struct pool_scan_stat {
10051007
uint64_t pss_pass_scrub_spent_paused;
10061008
uint64_t pss_pass_issued; /* issued bytes per scan pass */
10071009
uint64_t pss_issued; /* total bytes checked by scanner */
1010+
1011+
/* error scrub values stored on disk */
1012+
uint64_t pss_error_scrub_func; /* pool_scan_func_t */
1013+
uint64_t pss_error_scrub_state; /* dsl_scan_state_t */
1014+
uint64_t pss_error_scrub_start; /* error scrub start time */
1015+
uint64_t pss_error_scrub_end; /* error scrub end time */
1016+
uint64_t pss_error_scrub_examined; /* error blocks issued I/O */
1017+
/* error blocks to be issued I/O */
1018+
uint64_t pss_error_scrub_to_be_examined;
1019+
1020+
/* error scrub values not stored on disk */
1021+
/* error scrub pause time in milliseconds */
1022+
uint64_t pss_pass_error_scrub_pause;
1023+
10081024
} pool_scan_stat_t;
10091025

10101026
typedef struct pool_removal_stat {
@@ -1026,6 +1042,7 @@ typedef enum dsl_scan_state {
10261042
DSS_SCANNING,
10271043
DSS_FINISHED,
10281044
DSS_CANCELED,
1045+
DSS_ERRORSCRUBBING,
10291046
DSS_NUM_STATES
10301047
} dsl_scan_state_t;
10311048

@@ -1357,6 +1374,7 @@ typedef enum zfs_ioc {
13571374
ZFS_IOC_GET_BOOKMARK_PROPS, /* 0x5a52 */
13581375
ZFS_IOC_WAIT, /* 0x5a53 */
13591376
ZFS_IOC_WAIT_FS, /* 0x5a54 */
1377+
ZFS_IOC_POOL_SCRUB, /* 0x5a55 */
13601378

13611379
/*
13621380
* Per-platform (Optional) - 8/128 numbers reserved.

include/sys/spa.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -818,8 +818,9 @@ extern void spa_l2cache_drop(spa_t *spa);
818818

819819
/* scanning */
820820
extern int spa_scan(spa_t *spa, pool_scan_func_t func);
821-
extern int spa_scan_stop(spa_t *spa);
822-
extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag);
821+
extern int spa_scan_stop(spa_t *spa, pool_scan_func_t func);
822+
extern int spa_scrub_pause_resume(spa_t *spa, pool_scan_func_t func,
823+
pool_scrub_cmd_t flag);
823824

824825
/* spa syncing */
825826
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
@@ -1140,6 +1141,7 @@ extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
11401141
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
11411142
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
11421143
extern uint64_t spa_get_errlog_size(spa_t *spa);
1144+
extern uint64_t spa_get_last_errlog_size(spa_t *spa);
11431145
extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
11441146
extern void spa_errlog_rotate(spa_t *spa);
11451147
extern void spa_errlog_drain(spa_t *spa);

include/sys/spa_impl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,10 @@ struct spa {
291291
uint64_t spa_scan_pass_exam; /* examined bytes per pass */
292292
uint64_t spa_scan_pass_issued; /* issued bytes per pass */
293293

294+
/* error scrub pause time in milliseconds */
295+
uint64_t spa_scan_pass_errorscrub_pause;
296+
/* total error scrub paused time in milliseconds */
297+
uint64_t spa_scan_pass_errorscrub_spent_paused;
294298
/*
295299
* We are in the middle of a resilver, and another resilver
296300
* is needed once this one completes. This is set iff any

include/sys/sysevent/eventdefs.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ extern "C" {
123123
#define ESC_ZFS_TRIM_CANCEL "trim_cancel"
124124
#define ESC_ZFS_TRIM_RESUME "trim_resume"
125125
#define ESC_ZFS_TRIM_SUSPEND "trim_suspend"
126+
#define ESC_ZFS_ERRORSCRUB_START "error_scrub_start"
127+
#define ESC_ZFS_ERRORSCRUB_FINISH "error_scrub_finish"
128+
#define ESC_ZFS_ERRORSCRUB_ABORT "error_scrub_abort"
129+
#define ESC_ZFS_ERRORSCRUB_RESUME "error_scrub_resume"
130+
#define ESC_ZFS_ERRORSCRUB_PAUSED "error_scrub_paused"
126131

127132
/*
128133
* datalink subclass definitions.

0 commit comments

Comments
 (0)