Skip to content

Commit 16c5858

Browse files
author
TulsiJain
committed
Added a flag -e in zpool scrub to scrub only blocks in errorlog.
Signed-off-by: TulsiJain <tulsi.jain@delphix.com>
1 parent 0e37a0f commit 16c5858

File tree

26 files changed

+1213
-65
lines changed

26 files changed

+1213
-65
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,7 @@ cscope.*
6262
*.orig
6363
*.log
6464
venv
65+
# Ignore Eclipse files
66+
/.cproject
67+
/.project
6568

cmd/zpool/zpool_main.c

Lines changed: 99 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,8 @@ get_usage(zpool_help_t idx)
381381
return (gettext("\tinitialize [-c | -s] <pool> "
382382
"[<device> ...]\n"));
383383
case HELP_SCRUB:
384-
return (gettext("\tscrub [-s | -p] <pool> ...\n"));
384+
return (gettext("\tscrub [-s | -p] [-e] <pool>"
385+
" ...\n"));
385386
case HELP_RESILVER:
386387
return (gettext("\tresilver <pool> ...\n"));
387388
case HELP_TRIM:
@@ -6704,8 +6705,9 @@ scrub_callback(zpool_handle_t *zhp, void *data)
67046705
}
67056706

67066707
/*
6707-
* zpool scrub [-s | -p] <pool> ...
6708+
* zpool scrub [-s | -p] [-e] <pool> ...
67086709
*
6710+
* -e Only scrub blocks in the error log.
67096711
* -s Stop. Stops any in-progress scrub.
67106712
* -p Pause. Pause in-progress scrub.
67116713
*/
@@ -6718,14 +6720,21 @@ zpool_do_scrub(int argc, char **argv)
67186720
cb.cb_type = POOL_SCAN_SCRUB;
67196721
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
67206722

6723+
boolean_t is_error_scrub = B_FALSE;
6724+
boolean_t is_pause = B_FALSE;
6725+
boolean_t is_stop = B_FALSE;
6726+
67216727
/* check options */
6722-
while ((c = getopt(argc, argv, "sp")) != -1) {
6728+
while ((c = getopt(argc, argv, "eps")) != -1) {
67236729
switch (c) {
6730+
case 'e':
6731+
is_error_scrub = B_TRUE;
6732+
break;
67246733
case 's':
6725-
cb.cb_type = POOL_SCAN_NONE;
6734+
is_stop = B_TRUE;
67266735
break;
67276736
case 'p':
6728-
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
6737+
is_pause = B_TRUE;
67296738
break;
67306739
case '?':
67316740
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
@@ -6734,11 +6743,27 @@ zpool_do_scrub(int argc, char **argv)
67346743
}
67356744
}
67366745

6737-
if (cb.cb_type == POOL_SCAN_NONE &&
6738-
cb.cb_scrub_cmd == POOL_SCRUB_PAUSE) {
6739-
(void) fprintf(stderr, gettext("invalid option combination: "
6740-
"-s and -p are mutually exclusive\n"));
6746+
if (is_pause && is_stop) {
6747+
(void) fprintf(stderr, gettext("invalid option "
6748+
"combination :-s and -p are mutually exclusive\n"));
67416749
usage(B_FALSE);
6750+
} else {
6751+
if (is_error_scrub) {
6752+
cb.cb_type = POOL_ERRORSCRUB;
6753+
if (is_pause) {
6754+
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
6755+
} else if (is_stop) {
6756+
cb.cb_scrub_cmd = POOL_ERRORSCRUB_STOP;
6757+
} else {
6758+
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
6759+
}
6760+
} else {
6761+
if (is_pause) {
6762+
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
6763+
} else if (is_stop) {
6764+
cb.cb_type = POOL_SCAN_NONE;
6765+
}
6766+
}
67426767
}
67436768

67446769
cb.cb_argc = argc;
@@ -6912,6 +6937,70 @@ zpool_do_trim(int argc, char **argv)
69126937
return (error);
69136938
}
69146939

6940+
/*
6941+
* Print out detailed error scrub status.
6942+
*/
6943+
static void
6944+
print_err_scrub_status(pool_scan_stat_t *ps)
6945+
{
6946+
time_t start, end, pause;
6947+
uint64_t total_secs_left;
6948+
uint64_t secs_left, mins_left, hours_left, days_left;
6949+
uint64_t examined, to_be_examined;
6950+
6951+
if (ps == NULL || ps->pss_error_scrub_func != POOL_ERRORSCRUB) {
6952+
return;
6953+
}
6954+
6955+
(void) printf(gettext(" scrub: "));
6956+
6957+
start = ps->pss_error_scrub_start;
6958+
end = ps->pss_error_scrub_end;
6959+
pause = ps->pss_pass_error_scrub_pause;
6960+
examined = ps->pss_error_scrub_examined;
6961+
to_be_examined = ps->pss_error_scrub_to_be_examined;
6962+
6963+
assert(ps->pss_error_scrub_func == POOL_ERRORSCRUB);
6964+
6965+
if (ps->pss_error_scrub_state == DSS_FINISHED) {
6966+
total_secs_left = end - start;
6967+
days_left = total_secs_left / 60 / 60 / 24;
6968+
hours_left = (total_secs_left / 60 / 60) % 24;
6969+
mins_left = (total_secs_left / 60) % 60;
6970+
secs_left = (total_secs_left % 60);
6971+
6972+
(void) printf(gettext("scrubbed %llu error blocks in %llu days "
6973+
"%02llu:%02llu:%02llu on %s"), (u_longlong_t)examined,
6974+
(u_longlong_t)days_left, (u_longlong_t)hours_left,
6975+
(u_longlong_t)mins_left, (u_longlong_t)secs_left,
6976+
ctime(&end));
6977+
6978+
return;
6979+
} else if (ps->pss_error_scrub_state == DSS_CANCELED) {
6980+
(void) printf(gettext("error scrub canceled on %s"),
6981+
ctime(&end));
6982+
return;
6983+
}
6984+
assert(ps->pss_error_scrub_state == DSS_ERRORSCRUBING);
6985+
6986+
/* Error scrub is in progress. */
6987+
if (pause == 0) {
6988+
(void) printf(gettext("error scrub in progress since %s"),
6989+
ctime(&start));
6990+
} else {
6991+
(void) printf(gettext("error scrub paused since %s"),
6992+
ctime(&pause));
6993+
(void) printf(gettext("\terror scrub started on %s"),
6994+
ctime(&start));
6995+
}
6996+
6997+
double fraction_done = (double)examined / (to_be_examined + examined);
6998+
(void) printf(gettext("\t%.2f%% done, issued I/O for %llu error"
6999+
" blocks"), 100 * fraction_done, (u_longlong_t)examined);
7000+
7001+
(void) printf("\n");
7002+
}
7003+
69157004
/*
69167005
* Print out detailed scrub status.
69177006
*/
@@ -7687,6 +7776,7 @@ status_callback(zpool_handle_t *zhp, void *data)
76877776
ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t **)&prs, &c);
76887777

76897778
print_scan_status(ps);
7779+
print_err_scrub_status(ps);
76907780
print_checkpoint_scan_warning(ps, pcs);
76917781
print_removal_status(zhp, prs);
76927782
print_checkpoint_status(pcs);

include/libzfs.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,15 @@ typedef enum zfs_error {
125125
EZFS_THREADCREATEFAILED, /* thread create failed */
126126
EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */
127127
EZFS_SCRUBBING, /* currently scrubbing */
128+
EZFS_ERRORSCRUBBING, /* currently error scrubbing */
129+
EZFS_ERRORSCRUB_PAUSED, /* error scrub currently paused */
128130
EZFS_NO_SCRUB, /* no active scrub */
131+
EZFS_NO_ERRORSCRUB, /* no active error scrub */
129132
EZFS_DIFF, /* general failure of zfs diff */
130133
EZFS_DIFFDATA, /* bad zfs diff data */
131134
EZFS_POOLREADONLY, /* pool is in read-only mode */
132135
EZFS_SCRUB_PAUSED, /* scrub currently paused */
136+
EZFS_SCRUB_PAUSED_TO_CANCEL, /* scrub currently paused */
133137
EZFS_ACTIVE_POOL, /* pool is imported on a different system */
134138
EZFS_CRYPTOFAILED, /* failed to setup encryption */
135139
EZFS_NO_PENDING, /* cannot cancel, no operation is pending */

include/libzfs_core.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ int lzc_reopen(const char *, boolean_t);
130130
int lzc_pool_checkpoint(const char *);
131131
int lzc_pool_checkpoint_discard(const char *);
132132

133+
int lzc_scrub(zfs_ioc_t, const char *, nvlist_t *, nvlist_t **);
134+
133135
#ifdef __cplusplus
134136
}
135137
#endif

include/sys/dmu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,7 @@ typedef struct dmu_buf {
373373
#define DMU_POOL_DDT_STATS "DDT-statistics"
374374
#define DMU_POOL_CREATION_VERSION "creation_version"
375375
#define DMU_POOL_SCAN "scan"
376+
#define DMU_POOL_ERRORSCRUB "error_scrub"
376377
#define DMU_POOL_FREE_BPOBJ "free_bpobj"
377378
#define DMU_POOL_BPTREE_OBJ "bptree_obj"
378379
#define DMU_POOL_EMPTY_BPOBJ "empty_bpobj"

include/sys/dsl_scan.h

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
#include <sys/zfs_context.h>
3131
#include <sys/zio.h>
32+
#include <sys/zap.h>
3233
#include <sys/ddt.h>
3334
#include <sys/bplist.h>
3435

@@ -76,6 +77,21 @@ typedef enum dsl_scan_flags {
7677

7778
#define DSL_SCAN_FLAGS_MASK (DSF_VISIT_DS_AGAIN)
7879

80+
typedef struct dsl_errorscrub_phys {
81+
uint64_t dep_func; /* pool_scan_func_t */
82+
uint64_t dep_state; /* dsl_scan_state_t */
83+
uint64_t dep_cursor; /* serialized zap cursor for tracing progress */
84+
uint64_t dep_start_time; /* error scrub start time, unix timestamp */
85+
uint64_t dep_end_time; /* error scrub end time, unix timestamp */
86+
uint64_t dep_to_examine; /* total error blocks to be scrubbed */
87+
uint64_t dep_examined; /* blocks scrubbed so far */
88+
uint64_t dep_errors; /* error scrub I/O error count */
89+
uint64_t dep_paused_flags; /* flag for paused */
90+
} dsl_errorscrub_phys_t;
91+
92+
#define ERRORSCRUB_PHYS_NUMINTS (sizeof (dsl_errorscrub_phys_t) \
93+
/ sizeof (uint64_t))
94+
7995
/*
8096
* Every pool will have one dsl_scan_t and this structure will contain
8197
* in-memory information about the scan and a pointer to the on-disk
@@ -148,11 +164,16 @@ typedef struct dsl_scan {
148164
uint64_t scn_avg_zio_size_this_txg;
149165
uint64_t scn_zios_this_txg;
150166

167+
/* zap cursor for tracing error scrub progress */
168+
zap_cursor_t errorscrub_cursor;
151169
/* members needed for syncing scan status to disk */
152170
dsl_scan_phys_t scn_phys; /* on disk representation of scan */
153171
dsl_scan_phys_t scn_phys_cached;
154172
avl_tree_t scn_queue; /* queue of datasets to scan */
155173
uint64_t scn_bytes_pending; /* outstanding data to issue */
174+
175+
/* members needed for syncing error scrub status to disk */
176+
dsl_errorscrub_phys_t errorscrub_phys;
156177
} dsl_scan_t;
157178

158179
typedef struct dsl_scan_io_queue dsl_scan_io_queue_t;
@@ -162,10 +183,12 @@ void scan_fini(void);
162183
int dsl_scan_init(struct dsl_pool *dp, uint64_t txg);
163184
void dsl_scan_fini(struct dsl_pool *dp);
164185
void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
165-
int dsl_scan_cancel(struct dsl_pool *);
186+
int dsl_scan_cancel(struct dsl_pool *, pool_scan_func_t func);
166187
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
167188
boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
168-
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
189+
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp,
190+
pool_scrub_cmd_t cmd, pool_scan_func_t func);
191+
void dsl_errorscrub_sync(struct dsl_pool *, dmu_tx_t *);
169192
void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
170193
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
171194
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
@@ -177,6 +200,7 @@ void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
177200
struct dmu_tx *tx);
178201
boolean_t dsl_scan_active(dsl_scan_t *scn);
179202
boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn);
203+
boolean_t dsl_errorscrub_is_paused(const dsl_scan_t *scn);
180204
void dsl_scan_freed(spa_t *spa, const blkptr_t *bp);
181205
void dsl_scan_io_queue_destroy(dsl_scan_io_queue_t *queue);
182206
void dsl_scan_io_queue_vdev_xfer(vdev_t *svd, vdev_t *tvd);

include/sys/fs/zfs.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,7 @@ typedef enum pool_scan_func {
899899
POOL_SCAN_NONE,
900900
POOL_SCAN_SCRUB,
901901
POOL_SCAN_RESILVER,
902+
POOL_ERRORSCRUB,
902903
POOL_SCAN_FUNCS
903904
} pool_scan_func_t;
904905

@@ -908,6 +909,7 @@ typedef enum pool_scan_func {
908909
typedef enum pool_scrub_cmd {
909910
POOL_SCRUB_NORMAL = 0,
910911
POOL_SCRUB_PAUSE,
912+
POOL_ERRORSCRUB_STOP,
911913
POOL_SCRUB_FLAGS_END
912914
} pool_scrub_cmd_t;
913915

@@ -962,6 +964,20 @@ typedef struct pool_scan_stat {
962964
uint64_t pss_pass_scrub_spent_paused;
963965
uint64_t pss_pass_issued; /* issued bytes per scan pass */
964966
uint64_t pss_issued; /* total bytes checked by scanner */
967+
968+
/* error scrub values stored on disk */
969+
uint64_t pss_error_scrub_func; /* pool_scan_func_t */
970+
uint64_t pss_error_scrub_state; /* dsl_scan_state_t */
971+
uint64_t pss_error_scrub_start; /* error scrub start time */
972+
uint64_t pss_error_scrub_end; /* error scrub end time */
973+
uint64_t pss_error_scrub_examined; /* error blocks issued I/O */
974+
/* error blocks to be issued I/O */
975+
uint64_t pss_error_scrub_to_be_examined;
976+
977+
/* error scrub values not stored on disk */
978+
/* error scrub pause time in milliseconds */
979+
uint64_t pss_pass_error_scrub_pause;
980+
965981
} pool_scan_stat_t;
966982

967983
typedef struct pool_removal_stat {
@@ -983,6 +999,7 @@ typedef enum dsl_scan_state {
983999
DSS_SCANNING,
9841000
DSS_FINISHED,
9851001
DSS_CANCELED,
1002+
DSS_ERRORSCRUBING,
9861003
DSS_NUM_STATES
9871004
} dsl_scan_state_t;
9881005

@@ -1277,6 +1294,7 @@ typedef enum zfs_ioc {
12771294
ZFS_IOC_POOL_TRIM, /* 0x5a50 */
12781295
ZFS_IOC_REDACT, /* 0x5a51 */
12791296
ZFS_IOC_GET_BOOKMARK_PROPS, /* 0x5a52 */
1297+
ZFS_IOC_POOL_SCRUB, /* 0x5a53 */
12801298

12811299
/*
12821300
* Linux - 3/64 numbers reserved.

include/sys/spa.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -863,8 +863,9 @@ extern void spa_l2cache_drop(spa_t *spa);
863863

864864
/* scanning */
865865
extern int spa_scan(spa_t *spa, pool_scan_func_t func);
866-
extern int spa_scan_stop(spa_t *spa);
867-
extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag);
866+
extern int spa_scan_stop(spa_t *spa, pool_scan_func_t func);
867+
extern int spa_scrub_pause_resume(spa_t *spa, pool_scan_func_t func,
868+
pool_scrub_cmd_t flag);
868869

869870
/* spa syncing */
870871
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
@@ -1175,6 +1176,7 @@ extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
11751176
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
11761177
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
11771178
extern uint64_t spa_get_errlog_size(spa_t *spa);
1179+
extern uint64_t spa_get_last_errlog_size(spa_t *spa);
11781180
extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
11791181
extern void spa_errlog_rotate(spa_t *spa);
11801182
extern void spa_errlog_drain(spa_t *spa);

include/sys/spa_impl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,10 @@ struct spa {
287287
uint64_t spa_scan_pass_exam; /* examined bytes per pass */
288288
uint64_t spa_scan_pass_issued; /* issued bytes per pass */
289289

290+
/* error scrub pause time in milliseconds */
291+
uint64_t spa_scan_pass_errorscrub_pause;
292+
/* total error scrub paused time in milliseconds */
293+
uint64_t spa_scan_pass_errorscrub_spent_paused;
290294
/*
291295
* We are in the middle of a resilver, and another resilver
292296
* is needed once this one completes. This is set iff any

include/sys/sysevent/eventdefs.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ extern "C" {
123123
#define ESC_ZFS_TRIM_CANCEL "trim_cancel"
124124
#define ESC_ZFS_TRIM_RESUME "trim_resume"
125125
#define ESC_ZFS_TRIM_SUSPEND "trim_suspend"
126+
#define ESC_ZFS_ERRORSCRUB_START "error_scrub_start"
127+
#define ESC_ZFS_ERRORSCRUB_FINISH "error_scrub_finish"
128+
#define ESC_ZFS_ERRORSCRUB_ABORT "error_scrub_abort"
129+
#define ESC_ZFS_ERRORSCRUB_RESUME "error_scrub_resume"
130+
#define ESC_ZFS_ERRORSCRUB_PAUSED "error_scrub_paused"
126131

127132
/*
128133
* datalink subclass definitions.

0 commit comments

Comments
 (0)