Skip to content

Commit 64b3931

Browse files
author
TulsiJain
committed
rescrubbing only previously detected error block
Signed-off-by: TulsiJain <tulsi.jain@delphix.com>
1 parent 09276fd commit 64b3931

File tree

16 files changed

+678
-1
lines changed

16 files changed

+678
-1
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,7 @@ cscope.*
6262
*.orig
6363
*.log
6464
venv
65+
# Ignore Eclipse files
66+
/.cproject
67+
/.project
6568

cmd/zpool/zpool_main.c

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ static int zpool_do_split(int, char **);
100100

101101
static int zpool_do_initialize(int, char **);
102102
static int zpool_do_scrub(int, char **);
103+
static int zpool_do_rescrub(int, char **);
103104
static int zpool_do_resilver(int, char **);
104105
static int zpool_do_trim(int, char **);
105106

@@ -157,6 +158,7 @@ typedef enum {
157158
HELP_REMOVE,
158159
HELP_INITIALIZE,
159160
HELP_SCRUB,
161+
HELP_RESCRUB,
160162
HELP_RESILVER,
161163
HELP_TRIM,
162164
HELP_STATUS,
@@ -296,6 +298,7 @@ static zpool_command_t command_table[] = {
296298
{ "initialize", zpool_do_initialize, HELP_INITIALIZE },
297299
{ "resilver", zpool_do_resilver, HELP_RESILVER },
298300
{ "scrub", zpool_do_scrub, HELP_SCRUB },
301+
{ "rescrub", zpool_do_rescrub, HELP_RESCRUB },
299302
{ "trim", zpool_do_trim, HELP_TRIM },
300303
{ NULL },
301304
{ "import", zpool_do_import, HELP_IMPORT },
@@ -382,6 +385,8 @@ get_usage(zpool_help_t idx)
382385
"[<device> ...]\n"));
383386
case HELP_SCRUB:
384387
return (gettext("\tscrub [-s | -p] <pool> ...\n"));
388+
case HELP_RESCRUB:
389+
return (gettext("\trescrub [-s | -p] <pool> ...\n"));
385390
case HELP_RESILVER:
386391
return (gettext("\tresilver <pool> ...\n"));
387392
case HELP_TRIM:
@@ -6703,6 +6708,26 @@ scrub_callback(zpool_handle_t *zhp, void *data)
67036708
return (err != 0);
67046709
}
67056710

6711+
int
6712+
rescrub_callback(zpool_handle_t *zhp, void *data)
6713+
{
6714+
scrub_cbdata_t *cb = data;
6715+
int err;
6716+
6717+
/*
6718+
* Ignore faulted pools.
6719+
*/
6720+
if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
6721+
(void) fprintf(stderr, gettext("cannot rescrub '%s': pool is "
6722+
"currently unavailable\n"), zpool_get_name(zhp));
6723+
return (1);
6724+
}
6725+
6726+
err = zpool_rescrub(zhp, cb->cb_type, cb->cb_scrub_cmd);
6727+
6728+
return (err != 0);
6729+
}
6730+
67066731
/*
67076732
* zpool scrub [-s | -p] <pool> ...
67086733
*
@@ -6754,6 +6779,62 @@ zpool_do_scrub(int argc, char **argv)
67546779
return (for_each_pool(argc, argv, B_TRUE, NULL, scrub_callback, &cb));
67556780
}
67566781

6782+
/*
6783+
* zpool rescrub [-s | -p] <pool> ...
6784+
*
6785+
* -s Stop. Stops any in-progress rescrub.
6786+
* -p Pause. Pause in-progress rescrub.
6787+
*/
6788+
int
6789+
zpool_do_rescrub(int argc, char **argv)
6790+
{
6791+
int c;
6792+
scrub_cbdata_t cb;
6793+
6794+
cb.cb_type = POOL_RESCRUB;
6795+
cb.cb_scrub_cmd = POOL_RESCRUB_NORMAL;
6796+
6797+
/* check options */
6798+
while ((c = getopt(argc, argv, "sp")) != -1) {
6799+
switch (c) {
6800+
case 's':
6801+
if (cb.cb_scrub_cmd != POOL_RESCRUB_NORMAL) {
6802+
(void) fprintf(stderr, gettext("invalid option"
6803+
" combination: -s and -p are mutually"
6804+
" exclusive\n"));
6805+
usage(B_FALSE);
6806+
}
6807+
cb.cb_scrub_cmd = POOL_RESCRUB_STOP;
6808+
break;
6809+
case 'p':
6810+
if (cb.cb_scrub_cmd != POOL_RESCRUB_NORMAL) {
6811+
(void) fprintf(stderr, gettext("invalid option"
6812+
" combination: -s and -p are mutually"
6813+
" exclusive\n"));
6814+
usage(B_FALSE);
6815+
}
6816+
cb.cb_scrub_cmd = POOL_RESCRUB_PAUSE;
6817+
break;
6818+
case '?':
6819+
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
6820+
optopt);
6821+
usage(B_FALSE);
6822+
}
6823+
}
6824+
6825+
cb.cb_argc = argc;
6826+
cb.cb_argv = argv;
6827+
argc -= optind;
6828+
argv += optind;
6829+
6830+
if (argc < 1) {
6831+
(void) fprintf(stderr, gettext("missing pool name argument\n"));
6832+
usage(B_FALSE);
6833+
}
6834+
6835+
return (for_each_pool(argc, argv, B_TRUE, NULL, rescrub_callback, &cb));
6836+
}
6837+
67576838
/*
67586839
* zpool resilver <pool> ...
67596840
*

include/libzfs.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,10 @@ typedef enum zfs_error {
125125
EZFS_THREADCREATEFAILED, /* thread create failed */
126126
EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */
127127
EZFS_SCRUBBING, /* currently scrubbing */
128+
EZFS_RESCRUBBING, /* currently rescrubbing */
129+
EZFS_RESCRUB_PAUSED, /* rescrub currently paused */
128130
EZFS_NO_SCRUB, /* no active scrub */
131+
EZFS_NO_RESCRUB, /* no active rescrub */
129132
EZFS_DIFF, /* general failure of zfs diff */
130133
EZFS_DIFFDATA, /* bad zfs diff data */
131134
EZFS_POOLREADONLY, /* pool is in read-only mode */
@@ -273,6 +276,7 @@ typedef struct trimflags {
273276
* Functions to manipulate pool and vdev state
274277
*/
275278
extern int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t);
279+
extern int zpool_rescrub(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t);
276280
extern int zpool_initialize(zpool_handle_t *, pool_initialize_func_t,
277281
nvlist_t *);
278282
extern int zpool_trim(zpool_handle_t *, pool_trim_func_t, nvlist_t *,

include/sys/dmu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,7 @@ typedef struct dmu_buf {
373373
#define DMU_POOL_DDT_STATS "DDT-statistics"
374374
#define DMU_POOL_CREATION_VERSION "creation_version"
375375
#define DMU_POOL_SCAN "scan"
376+
#define DMU_POOL_RESCRUB "rescrub"
376377
#define DMU_POOL_FREE_BPOBJ "free_bpobj"
377378
#define DMU_POOL_BPTREE_OBJ "bptree_obj"
378379
#define DMU_POOL_EMPTY_BPOBJ "empty_bpobj"

include/sys/dsl_scan.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
#include <sys/zfs_context.h>
3131
#include <sys/zio.h>
32+
#include <sys/zap.h>
3233
#include <sys/ddt.h>
3334
#include <sys/bplist.h>
3435

@@ -67,7 +68,9 @@ typedef struct dsl_scan_phys {
6768
uint64_t scn_flags; /* dsl_scan_flags_t */
6869
} dsl_scan_phys_t;
6970

71+
7072
#define SCAN_PHYS_NUMINTS (sizeof (dsl_scan_phys_t) / sizeof (uint64_t))
73+
#define RESCRUB_BLOCK_IN_TXG 10000
7174

7275
typedef enum dsl_scan_flags {
7376
DSF_VISIT_DS_AGAIN = 1<<0,
@@ -76,6 +79,19 @@ typedef enum dsl_scan_flags {
7679

7780
#define DSL_SCAN_FLAGS_MASK (DSF_VISIT_DS_AGAIN)
7881

82+
typedef struct dsl_rescrub_phys {
83+
uint64_t rescrub_func; /* pool_scan_func_t */
84+
uint64_t rescrub_state; /* dsl_scan_state_t */
85+
zap_cursor_t cursor; /* zap cursor */
86+
uint64_t rescrub_start_time;
87+
uint64_t rescrub_end_time;
88+
uint64_t rescrub_to_examine; /* total blocks to be rescrubbed */
89+
uint64_t rescrub_examined; /* blocks remaining so far */
90+
uint64_t rescrub_errors; /* rescrub I/O error count */
91+
uint8_t rescrub_paused_flags; /* flag for paused */
92+
} dsl_rescrub_phys_t;
93+
94+
7995
/*
8096
* Every pool will have one dsl_scan_t and this structure will contain
8197
* in-memory information about the scan and a pointer to the on-disk
@@ -153,6 +169,9 @@ typedef struct dsl_scan {
153169
dsl_scan_phys_t scn_phys_cached;
154170
avl_tree_t scn_queue; /* queue of datasets to scan */
155171
uint64_t scn_bytes_pending; /* outstanding data to issue */
172+
173+
/* members needed for syncing rescrub status to disk */
174+
dsl_rescrub_phys_t rescrub_phys; /* on disk representation of rescrub */
156175
} dsl_scan_t;
157176

158177
typedef struct dsl_scan_io_queue dsl_scan_io_queue_t;
@@ -166,6 +185,11 @@ int dsl_scan_cancel(struct dsl_pool *);
166185
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
167186
boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
168187
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
188+
int dsl_rescrub_set_pause_resume(const struct dsl_pool *dp,
189+
pool_scrub_cmd_t cmd);
190+
int dsl_rescrub(struct dsl_pool *, pool_scan_func_t);
191+
void dsl_rescrub_sync(struct dsl_pool *, dmu_tx_t *);
192+
int dsl_rescrub_cancel(struct dsl_pool *);
169193
void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
170194
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
171195
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);

include/sys/fs/zfs.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,7 @@ typedef enum pool_scan_func {
899899
POOL_SCAN_NONE,
900900
POOL_SCAN_SCRUB,
901901
POOL_SCAN_RESILVER,
902+
POOL_RESCRUB,
902903
POOL_SCAN_FUNCS
903904
} pool_scan_func_t;
904905

@@ -908,6 +909,9 @@ typedef enum pool_scan_func {
908909
typedef enum pool_scrub_cmd {
909910
POOL_SCRUB_NORMAL = 0,
910911
POOL_SCRUB_PAUSE,
912+
POOL_RESCRUB_NORMAL,
913+
POOL_RESCRUB_PAUSE,
914+
POOL_RESCRUB_STOP,
911915
POOL_SCRUB_FLAGS_END
912916
} pool_scrub_cmd_t;
913917

@@ -983,6 +987,7 @@ typedef enum dsl_scan_state {
983987
DSS_SCANNING,
984988
DSS_FINISHED,
985989
DSS_CANCELED,
990+
DSS_RESCRUBING,
986991
DSS_NUM_STATES
987992
} dsl_scan_state_t;
988993

@@ -1277,6 +1282,7 @@ typedef enum zfs_ioc {
12771282
ZFS_IOC_POOL_TRIM, /* 0x5a50 */
12781283
ZFS_IOC_REDACT, /* 0x5a51 */
12791284
ZFS_IOC_GET_BOOKMARK_PROPS, /* 0x5a52 */
1285+
ZFS_IOC_POOL_RESCRUB, /* 0x5a53 */
12801286

12811287
/*
12821288
* Linux - 3/64 numbers reserved.

include/sys/spa.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -858,6 +858,11 @@ extern int spa_scan(spa_t *spa, pool_scan_func_t func);
858858
extern int spa_scan_stop(spa_t *spa);
859859
extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag);
860860

861+
/* rescrubing */
862+
extern int spa_rescrub(spa_t *spa, pool_scan_func_t func);
863+
extern int spa_rescrub_stop(spa_t *spa);
864+
extern int spa_rescrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag);
865+
861866
/* spa syncing */
862867
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
863868
extern void spa_sync_allpools(void);
@@ -1167,11 +1172,13 @@ extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
11671172
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
11681173
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
11691174
extern uint64_t spa_get_errlog_size(spa_t *spa);
1175+
extern uint64_t spa_get_last_errlog_size(spa_t *spa);
11701176
extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
11711177
extern void spa_errlog_rotate(spa_t *spa);
11721178
extern void spa_errlog_drain(spa_t *spa);
11731179
extern void spa_errlog_sync(spa_t *spa, uint64_t txg);
11741180
extern void spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub);
1181+
// extern void name_to_bookmark(char *buf, zbookmark_phys_t *zb);
11751182

11761183
/* vdev cache */
11771184
extern void vdev_cache_stat_init(void);

include/sys/spa_impl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,8 @@ struct spa {
284284
uint64_t spa_scan_pass_exam; /* examined bytes per pass */
285285
uint64_t spa_scan_pass_issued; /* issued bytes per pass */
286286

287+
uint64_t spa_scan_pass_rescrub_pause; /* rescrub pause time */
288+
uint64_t spa_scan_pass_rescrub_spent_paused; /* total paused */
287289
/*
288290
* We are in the middle of a resilver, and another resilver
289291
* is needed once this one completes. This is set iff any

include/sys/sysevent/eventdefs.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ extern "C" {
123123
#define ESC_ZFS_TRIM_CANCEL "trim_cancel"
124124
#define ESC_ZFS_TRIM_RESUME "trim_resume"
125125
#define ESC_ZFS_TRIM_SUSPEND "trim_suspend"
126+
#define ESC_ZFS_RESCRUB_START "rescrub_start"
127+
#define ESC_ZFS_RESCRUB_FINISH "rescrub_finish"
128+
#define ESC_ZFS_RESCRUB_ABORT "rescrub_abort"
129+
#define ESC_ZFS_RESCRUB_RESUME "rescrub_resume"
130+
#define ESC_ZFS_RESCRUB_PAUSED "rescrub_paused"
126131

127132
/*
128133
* datalink subclass definitions.

lib/libzfs/libzfs_pool.c

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2364,6 +2364,7 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
23642364
pool_scan_stat_t *ps = NULL;
23652365
uint_t psc;
23662366

2367+
// needs some improvement because of rescrub
23672368
verify(nvlist_lookup_nvlist(zhp->zpool_config,
23682369
ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
23692370
(void) nvlist_lookup_uint64_array(nvroot,
@@ -2385,6 +2386,83 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
23852386
}
23862387
}
23872388

2389+
2390+
/*
2391+
* rescrub the pool.
2392+
*/
2393+
int
2394+
zpool_rescrub(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
2395+
{
2396+
zfs_cmd_t zc = {"\0"};
2397+
char msg[1024];
2398+
int err;
2399+
libzfs_handle_t *hdl = zhp->zpool_hdl;
2400+
2401+
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2402+
zc.zc_cookie = func;
2403+
zc.zc_flags = cmd;
2404+
2405+
if (zfs_ioctl(hdl, ZFS_IOC_POOL_RESCRUB, &zc) == 0)
2406+
return (0);
2407+
2408+
err = errno;
2409+
2410+
assert(func == POOL_RESCRUB);
2411+
2412+
/*
2413+
* ECANCELED is returned when either we resumed a paused rescrub or
2414+
* has no disk errorlog.
2415+
*/
2416+
if (err == ECANCELED && cmd == POOL_RESCRUB_NORMAL)
2417+
return (0);
2418+
2419+
if (err == ENOENT && cmd == POOL_RESCRUB_NORMAL)
2420+
return (0);
2421+
2422+
if (cmd == POOL_RESCRUB_PAUSE) {
2423+
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2424+
"cannot pause rescrubbing %s"), zc.zc_name);
2425+
} else if (cmd == POOL_RESCRUB_NORMAL) {
2426+
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2427+
"cannot rescrub %s"), zc.zc_name);
2428+
} else {
2429+
assert(cmd == POOL_RESCRUB_STOP);
2430+
(void) snprintf(msg, sizeof (msg),
2431+
dgettext(TEXT_DOMAIN, "cannot cancel rescrubbing "
2432+
"%s"), zc.zc_name);
2433+
}
2434+
2435+
if (err == EBUSY) {
2436+
nvlist_t *nvroot;
2437+
pool_scan_stat_t *ps = NULL;
2438+
uint_t psc;
2439+
2440+
// needs improvement working on it
2441+
verify(nvlist_lookup_nvlist(zhp->zpool_config,
2442+
ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2443+
(void) nvlist_lookup_uint64_array(nvroot,
2444+
ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
2445+
if (ps && ps->pss_func == POOL_SCAN_SCRUB) {
2446+
if (cmd == POOL_SCRUB_PAUSE)
2447+
return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg));
2448+
else
2449+
return (zfs_error(hdl, EZFS_SCRUBBING, msg));
2450+
} else if (ps && ps->pss_func == POOL_RESCRUB) {
2451+
if (cmd == POOL_RESCRUB_PAUSE)
2452+
return (zfs_error(hdl, EZFS_RESCRUB_PAUSED,
2453+
msg));
2454+
else
2455+
return (zfs_error(hdl, EZFS_RESCRUBBING, msg));
2456+
} else {
2457+
return (zfs_error(hdl, EZFS_RESILVERING, msg));
2458+
}
2459+
} else if (err == ENOENT) {
2460+
return (zfs_error(hdl, EZFS_NO_RESCRUB, msg));
2461+
} else {
2462+
return (zpool_standard_error(hdl, err, msg));
2463+
}
2464+
}
2465+
23882466
/*
23892467
* Find a vdev that matches the search criteria specified. We use the
23902468
* the nvpair name to determine how we should look for the device.

0 commit comments

Comments
 (0)