Skip to content

Commit 6231a2c

Browse files
author
TulsiJain
committed
rescrubbing only previously detected error block
Signed-off-by: TulsiJain <tulsi.jain@delphix.com>
1 parent 9fb6abe commit 6231a2c

File tree

20 files changed

+759
-13
lines changed

20 files changed

+759
-13
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,7 @@ cscope.*
6262
*.orig
6363
*.log
6464
venv
65+
# Ignore Eclipse files
66+
/.cproject
67+
/.project
6568

cmd/zpool/zpool_main.c

Lines changed: 106 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,8 @@ get_usage(zpool_help_t idx)
381381
return (gettext("\tinitialize [-c | -s] <pool> "
382382
"[<device> ...]\n"));
383383
case HELP_SCRUB:
384-
return (gettext("\tscrub [-s | -p] <pool> ...\n"));
384+
return (gettext("\tscrub [-s | -p | -e | -e -s | -e -p] <pool>"
385+
" ...\n"));
385386
case HELP_RESILVER:
386387
return (gettext("\tresilver <pool> ...\n"));
387388
case HELP_TRIM:
@@ -6691,7 +6692,11 @@ scrub_callback(zpool_handle_t *zhp, void *data)
66916692
return (1);
66926693
}
66936694

6694-
err = zpool_scan(zhp, cb->cb_type, cb->cb_scrub_cmd);
6695+
if (cb->cb_type == POOL_RESCRUB) {
6696+
err = zpool_rescrub(zhp, cb->cb_type, cb->cb_scrub_cmd);
6697+
} else {
6698+
err = zpool_scan(zhp, cb->cb_type, cb->cb_scrub_cmd);
6699+
}
66956700

66966701
if (err == 0 && zpool_has_checkpoint(zhp) &&
66976702
cb->cb_type == POOL_SCAN_SCRUB) {
@@ -6706,6 +6711,7 @@ scrub_callback(zpool_handle_t *zhp, void *data)
67066711
/*
67076712
* zpool scrub [-s | -p] <pool> ...
67086713
*
6714+
* -e For error blocks. Starts/Resumes error blocks scrubbing.
67096715
* -s Stop. Stops any in-progress scrub.
67106716
* -p Pause. Pause in-progress scrub.
67116717
*/
@@ -6718,27 +6724,52 @@ zpool_do_scrub(int argc, char **argv)
67186724
cb.cb_type = POOL_SCAN_SCRUB;
67196725
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
67206726

6727+
boolean_t is_error_scrub = B_FALSE;
6728+
boolean_t is_pause = B_FALSE;
6729+
boolean_t is_stop = B_FALSE;
67216730
/* check options */
6722-
while ((c = getopt(argc, argv, "sp")) != -1) {
6731+
while ((c = getopt(argc, argv, "eps")) != -1) {
67236732
switch (c) {
6724-
case 's':
6725-
cb.cb_type = POOL_SCAN_NONE;
6733+
case 'e': {
6734+
is_error_scrub = B_TRUE;
67266735
break;
6727-
case 'p':
6728-
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
6736+
}
6737+
case 's': {
6738+
is_stop = B_TRUE;
6739+
break;
6740+
}
6741+
case 'p': {
6742+
is_pause = B_TRUE;
67296743
break;
6744+
}
67306745
case '?':
67316746
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
67326747
optopt);
67336748
usage(B_FALSE);
67346749
}
67356750
}
67366751

6737-
if (cb.cb_type == POOL_SCAN_NONE &&
6738-
cb.cb_scrub_cmd == POOL_SCRUB_PAUSE) {
6739-
(void) fprintf(stderr, gettext("invalid option combination: "
6740-
"-s and -p are mutually exclusive\n"));
6752+
if (is_pause && is_stop) {
6753+
(void) fprintf(stderr, gettext("invalid option "
6754+
"combination :-s and -p are mutually exclusive\n"));
67416755
usage(B_FALSE);
6756+
} else {
6757+
if (is_error_scrub) {
6758+
cb.cb_type = POOL_RESCRUB;
6759+
if (is_pause) {
6760+
cb.cb_scrub_cmd = POOL_RESCRUB_PAUSE;
6761+
} else if (is_stop) {
6762+
cb.cb_scrub_cmd = POOL_RESCRUB_STOP;
6763+
} else {
6764+
cb.cb_scrub_cmd = POOL_RESCRUB_NORMAL;
6765+
}
6766+
} else {
6767+
if (is_pause) {
6768+
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
6769+
} else if (is_stop) {
6770+
cb.cb_type = POOL_SCAN_NONE;
6771+
}
6772+
}
67426773
}
67436774

67446775
cb.cb_argc = argc;
@@ -6912,6 +6943,69 @@ zpool_do_trim(int argc, char **argv)
69126943
return (error);
69136944
}
69146945

6946+
static void
6947+
print_err_scrub_status(pool_scan_stat_t *ps)
6948+
{
6949+
time_t start, end, pause;
6950+
uint64_t total_secs_left;
6951+
uint64_t secs_left, mins_left, hours_left, days_left;
6952+
uint64_t examined, to_be_examined;
6953+
6954+
(void) printf(gettext(" error scrub: "));
6955+
6956+
if (ps == NULL || ps->pss_error_scrub_func == POOL_SCAN_NONE ||
6957+
ps->pss_error_scrub_func >= POOL_SCAN_FUNCS) {
6958+
(void) printf(gettext("no error scrubbing requested\n"));
6959+
return;
6960+
}
6961+
6962+
start = ps->pss_error_scrub_start;
6963+
end = ps->pss_error_scrub_end;
6964+
pause = ps->pss_pass_error_scrub_pause;
6965+
examined = ps->pss_error_scrub_examined;
6966+
to_be_examined = ps->pss_error_scrub_to_be_examined;
6967+
6968+
assert(ps->pss_error_scrub_func == POOL_RESCRUB);
6969+
6970+
if (ps->pss_error_scrub_state == DSS_FINISHED) {
6971+
total_secs_left = end - start;
6972+
days_left = total_secs_left / 60 / 60 / 24;
6973+
hours_left = (total_secs_left / 60 / 60) % 24;
6974+
mins_left = (total_secs_left / 60) % 60;
6975+
secs_left = (total_secs_left % 60);
6976+
6977+
(void) printf(gettext("scrubbed %llu error blocks in %llu days "
6978+
"%02llu:%02llu:%02llu on %s"), (u_longlong_t)examined,
6979+
(u_longlong_t)days_left, (u_longlong_t)hours_left,
6980+
(u_longlong_t)mins_left, (u_longlong_t)secs_left,
6981+
ctime(&end));
6982+
6983+
return;
6984+
} else if (ps->pss_error_scrub_state == DSS_CANCELED) {
6985+
(void) printf(gettext("error scrub canceled on %s"),
6986+
ctime(&end));
6987+
return;
6988+
}
6989+
assert(ps->pss_error_scrub_state == DSS_RESCRUBING);
6990+
6991+
/* Error scrub is in progress. */
6992+
if (pause == 0) {
6993+
(void) printf(gettext("error scrub in progress since %s"),
6994+
ctime(&start));
6995+
} else {
6996+
(void) printf(gettext("error scrub paused since %s"),
6997+
ctime(&pause));
6998+
(void) printf(gettext("\terror scrub started on %s"),
6999+
ctime(&start));
7000+
}
7001+
7002+
double fraction_done = (double)examined / (to_be_examined + examined);
7003+
(void) printf(gettext("\t%.2f%% done, issused i/o for %llu error"
7004+
" blocks"), 100 * fraction_done, (u_longlong_t)examined);
7005+
7006+
(void) printf(gettext("\n"));
7007+
}
7008+
69157009
/*
69167010
* Print out detailed scrub status.
69177011
*/
@@ -7687,6 +7781,7 @@ status_callback(zpool_handle_t *zhp, void *data)
76877781
ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t **)&prs, &c);
76887782

76897783
print_scan_status(ps);
7784+
print_err_scrub_status(ps);
76907785
print_checkpoint_scan_warning(ps, pcs);
76917786
print_removal_status(zhp, prs);
76927787
print_checkpoint_status(pcs);

include/libzfs.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,10 @@ typedef enum zfs_error {
125125
EZFS_THREADCREATEFAILED, /* thread create failed */
126126
EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */
127127
EZFS_SCRUBBING, /* currently scrubbing */
128+
EZFS_RESCRUBBING, /* currently error scrubbing */
129+
EZFS_RESCRUB_PAUSED, /* rescrub currently paused */
128130
EZFS_NO_SCRUB, /* no active scrub */
131+
EZFS_NO_RESCRUB, /* no active rescrub */
129132
EZFS_DIFF, /* general failure of zfs diff */
130133
EZFS_DIFFDATA, /* bad zfs diff data */
131134
EZFS_POOLREADONLY, /* pool is in read-only mode */
@@ -273,6 +276,7 @@ typedef struct trimflags {
273276
* Functions to manipulate pool and vdev state
274277
*/
275278
extern int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t);
279+
extern int zpool_rescrub(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t);
276280
extern int zpool_initialize(zpool_handle_t *, pool_initialize_func_t,
277281
nvlist_t *);
278282
extern int zpool_trim(zpool_handle_t *, pool_trim_func_t, nvlist_t *,

include/libzfs_core.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ int lzc_reopen(const char *, boolean_t);
130130
int lzc_pool_checkpoint(const char *);
131131
int lzc_pool_checkpoint_discard(const char *);
132132

133+
int lzc_ioctl(zfs_ioc_t, const char *, nvlist_t *, nvlist_t **);
134+
133135
#ifdef __cplusplus
134136
}
135137
#endif

include/sys/dmu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,7 @@ typedef struct dmu_buf {
373373
#define DMU_POOL_DDT_STATS "DDT-statistics"
374374
#define DMU_POOL_CREATION_VERSION "creation_version"
375375
#define DMU_POOL_SCAN "scan"
376+
#define DMU_POOL_RESCRUB "rescrub"
376377
#define DMU_POOL_FREE_BPOBJ "free_bpobj"
377378
#define DMU_POOL_BPTREE_OBJ "bptree_obj"
378379
#define DMU_POOL_EMPTY_BPOBJ "empty_bpobj"

include/sys/dsl_scan.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
#include <sys/zfs_context.h>
3131
#include <sys/zio.h>
32+
#include <sys/zap.h>
3233
#include <sys/ddt.h>
3334
#include <sys/bplist.h>
3435

@@ -76,6 +77,18 @@ typedef enum dsl_scan_flags {
7677

7778
#define DSL_SCAN_FLAGS_MASK (DSF_VISIT_DS_AGAIN)
7879

80+
typedef struct dsl_rescrub_phys {
81+
uint64_t rescrub_func; /* pool_scan_func_t */
82+
uint64_t rescrub_state; /* dsl_scan_state_t */
83+
zap_cursor_t rescrub_cursor; /* zap cursor */
84+
uint64_t rescrub_start_time;
85+
uint64_t rescrub_end_time;
86+
uint64_t rescrub_to_examine; /* total blocks to be rescrubbed */
87+
uint64_t rescrub_examined; /* blocks remaining so far */
88+
uint64_t rescrub_errors; /* rescrub I/O error count */
89+
uint8_t rescrub_paused_flags; /* flag for paused */
90+
} dsl_rescrub_phys_t;
91+
7992
/*
8093
* Every pool will have one dsl_scan_t and this structure will contain
8194
* in-memory information about the scan and a pointer to the on-disk
@@ -153,6 +166,9 @@ typedef struct dsl_scan {
153166
dsl_scan_phys_t scn_phys_cached;
154167
avl_tree_t scn_queue; /* queue of datasets to scan */
155168
uint64_t scn_bytes_pending; /* outstanding data to issue */
169+
170+
/* members needed for syncing rescrub status to disk */
171+
dsl_rescrub_phys_t rescrub_phys; /* on disk representation of rescrub */
156172
} dsl_scan_t;
157173

158174
typedef struct dsl_scan_io_queue dsl_scan_io_queue_t;
@@ -166,6 +182,11 @@ int dsl_scan_cancel(struct dsl_pool *);
166182
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
167183
boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
168184
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
185+
int dsl_rescrub_set_pause_resume(const struct dsl_pool *dp,
186+
pool_scrub_cmd_t cmd);
187+
int dsl_rescrub(struct dsl_pool *, pool_scan_func_t);
188+
void dsl_rescrub_sync(struct dsl_pool *, dmu_tx_t *);
189+
int dsl_rescrub_cancel(struct dsl_pool *);
169190
void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
170191
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
171192
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);

include/sys/fs/zfs.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,7 @@ typedef enum pool_scan_func {
899899
POOL_SCAN_NONE,
900900
POOL_SCAN_SCRUB,
901901
POOL_SCAN_RESILVER,
902+
POOL_RESCRUB,
902903
POOL_SCAN_FUNCS
903904
} pool_scan_func_t;
904905

@@ -908,6 +909,9 @@ typedef enum pool_scan_func {
908909
typedef enum pool_scrub_cmd {
909910
POOL_SCRUB_NORMAL = 0,
910911
POOL_SCRUB_PAUSE,
912+
POOL_RESCRUB_NORMAL,
913+
POOL_RESCRUB_PAUSE,
914+
POOL_RESCRUB_STOP,
911915
POOL_SCRUB_FLAGS_END
912916
} pool_scrub_cmd_t;
913917

@@ -962,6 +966,16 @@ typedef struct pool_scan_stat {
962966
uint64_t pss_pass_scrub_spent_paused;
963967
uint64_t pss_pass_issued; /* issued bytes per scan pass */
964968
uint64_t pss_issued; /* total bytes checked by scanner */
969+
/* values not stored on disk */
970+
uint64_t pss_error_scrub_func; /* pool_scan_func_t */
971+
uint64_t pss_error_scrub_state; /* dsl_scan_state_t */
972+
uint64_t pss_error_scrub_start; /* error scrub start time */
973+
uint64_t pss_error_scrub_end; /* error scrub end time */
974+
uint64_t pss_error_scrub_examined; /* scan start time */
975+
uint64_t pss_error_scrub_to_be_examined; /* scan start time */
976+
uint64_t pss_pass_error_scrub_pause;
977+
uint64_t pss_pass_error_scrub_spent_paused;
978+
965979
} pool_scan_stat_t;
966980

967981
typedef struct pool_removal_stat {
@@ -983,6 +997,7 @@ typedef enum dsl_scan_state {
983997
DSS_SCANNING,
984998
DSS_FINISHED,
985999
DSS_CANCELED,
1000+
DSS_RESCRUBING,
9861001
DSS_NUM_STATES
9871002
} dsl_scan_state_t;
9881003

@@ -1277,6 +1292,7 @@ typedef enum zfs_ioc {
12771292
ZFS_IOC_POOL_TRIM, /* 0x5a50 */
12781293
ZFS_IOC_REDACT, /* 0x5a51 */
12791294
ZFS_IOC_GET_BOOKMARK_PROPS, /* 0x5a52 */
1295+
ZFS_IOC_POOL_RESCRUB, /* 0x5a53 */
12801296

12811297
/*
12821298
* Linux - 3/64 numbers reserved.

include/sys/spa.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -866,6 +866,11 @@ extern int spa_scan(spa_t *spa, pool_scan_func_t func);
866866
extern int spa_scan_stop(spa_t *spa);
867867
extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag);
868868

869+
/* rescrubing */
870+
extern int spa_rescrub(spa_t *spa, pool_scan_func_t func);
871+
extern int spa_rescrub_stop(spa_t *spa);
872+
extern int spa_rescrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag);
873+
869874
/* spa syncing */
870875
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
871876
extern void spa_sync_allpools(void);
@@ -1176,6 +1181,7 @@ extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
11761181
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
11771182
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
11781183
extern uint64_t spa_get_errlog_size(spa_t *spa);
1184+
extern uint64_t spa_get_last_errlog_size(spa_t *spa);
11791185
extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
11801186
extern void spa_errlog_rotate(spa_t *spa);
11811187
extern void spa_errlog_drain(spa_t *spa);

include/sys/spa_impl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,8 @@ struct spa {
285285
uint64_t spa_scan_pass_exam; /* examined bytes per pass */
286286
uint64_t spa_scan_pass_issued; /* issued bytes per pass */
287287

288+
uint64_t spa_scan_pass_rescrub_pause; /* rescrub pause time */
289+
uint64_t spa_scan_pass_rescrub_spent_paused; /* total paused */
288290
/*
289291
* We are in the middle of a resilver, and another resilver
290292
* is needed once this one completes. This is set iff any

include/sys/sysevent/eventdefs.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ extern "C" {
123123
#define ESC_ZFS_TRIM_CANCEL "trim_cancel"
124124
#define ESC_ZFS_TRIM_RESUME "trim_resume"
125125
#define ESC_ZFS_TRIM_SUSPEND "trim_suspend"
126+
#define ESC_ZFS_RESCRUB_START "rescrub_start"
127+
#define ESC_ZFS_RESCRUB_FINISH "rescrub_finish"
128+
#define ESC_ZFS_RESCRUB_ABORT "rescrub_abort"
129+
#define ESC_ZFS_RESCRUB_RESUME "rescrub_resume"
130+
#define ESC_ZFS_RESCRUB_PAUSED "rescrub_paused"
126131

127132
/*
128133
* datalink subclass definitions.

0 commit comments

Comments
 (0)