Skip to content

Commit 1b9672a

Browse files
author
TulsiJain
committed
scrubbing only blocks in errorlog
Signed-off-by: TulsiJain <tulsi.jain@delphix.com> m
1 parent 48be0df commit 1b9672a

File tree

25 files changed

+1043
-64
lines changed

25 files changed

+1043
-64
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,7 @@ cscope.*
6262
*.orig
6363
*.log
6464
venv
65+
# Ignore Eclipse files
66+
/.cproject
67+
/.project
6568

cmd/zpool/zpool_main.c

Lines changed: 100 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,8 @@ get_usage(zpool_help_t idx)
381381
return (gettext("\tinitialize [-c | -s] <pool> "
382382
"[<device> ...]\n"));
383383
case HELP_SCRUB:
384-
return (gettext("\tscrub [-s | -p] <pool> ...\n"));
384+
return (gettext("\tscrub [-s | -p] [-e] <pool>"
385+
" ...\n"));
385386
case HELP_RESILVER:
386387
return (gettext("\tresilver <pool> ...\n"));
387388
case HELP_TRIM:
@@ -6702,10 +6703,10 @@ scrub_callback(zpool_handle_t *zhp, void *data)
67026703

67036704
return (err != 0);
67046705
}
6705-
67066706
/*
6707-
* zpool scrub [-s | -p] <pool> ...
6707+
* zpool scrub [-s | -p] [-e] <pool> ...
67086708
*
6709+
* -e Only scrub blocks in the error log.
67096710
* -s Stop. Stops any in-progress scrub.
67106711
* -p Pause. Pause in-progress scrub.
67116712
*/
@@ -6718,14 +6719,21 @@ zpool_do_scrub(int argc, char **argv)
67186719
cb.cb_type = POOL_SCAN_SCRUB;
67196720
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
67206721

6722+
boolean_t is_error_scrub = B_FALSE;
6723+
boolean_t is_pause = B_FALSE;
6724+
boolean_t is_stop = B_FALSE;
6725+
67216726
/* check options */
6722-
while ((c = getopt(argc, argv, "sp")) != -1) {
6727+
while ((c = getopt(argc, argv, "pse")) != -1) {
67236728
switch (c) {
6729+
case 'e':
6730+
is_error_scrub = B_TRUE;
6731+
break;
67246732
case 's':
6725-
cb.cb_type = POOL_SCAN_NONE;
6733+
is_stop = B_TRUE;
67266734
break;
67276735
case 'p':
6728-
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
6736+
is_pause = B_TRUE;
67296737
break;
67306738
case '?':
67316739
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
@@ -6734,11 +6742,27 @@ zpool_do_scrub(int argc, char **argv)
67346742
}
67356743
}
67366744

6737-
if (cb.cb_type == POOL_SCAN_NONE &&
6738-
cb.cb_scrub_cmd == POOL_SCRUB_PAUSE) {
6739-
(void) fprintf(stderr, gettext("invalid option combination: "
6740-
"-s and -p are mutually exclusive\n"));
6745+
if (is_pause && is_stop) {
6746+
(void) fprintf(stderr, gettext("invalid option "
6747+
"combination :-s and -p are mutually exclusive\n"));
67416748
usage(B_FALSE);
6749+
} else {
6750+
if (is_error_scrub) {
6751+
cb.cb_type = POOL_ERRORSCRUB;
6752+
if (is_pause) {
6753+
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
6754+
} else if (is_stop) {
6755+
cb.cb_scrub_cmd = POOL_ERRORSCRUB_STOP;
6756+
} else {
6757+
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
6758+
}
6759+
} else {
6760+
if (is_pause) {
6761+
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
6762+
} else if (is_stop) {
6763+
cb.cb_type = POOL_SCAN_NONE;
6764+
}
6765+
}
67426766
}
67436767

67446768
cb.cb_argc = argc;
@@ -6912,6 +6936,71 @@ zpool_do_trim(int argc, char **argv)
69126936
return (error);
69136937
}
69146938

6939+
/*
6940+
* Print out detailed error scrub status.
6941+
*/
6942+
static void
6943+
print_err_scrub_status(pool_scan_stat_t *ps)
6944+
{
6945+
time_t start, end, pause;
6946+
uint64_t total_secs_left;
6947+
uint64_t secs_left, mins_left, hours_left, days_left;
6948+
uint64_t examined, to_be_examined;
6949+
6950+
(void) printf(gettext(" error scrub: "));
6951+
6952+
if (ps == NULL || ps->pss_error_scrub_func != POOL_ERRORSCRUB) {
6953+
(void) printf(gettext("no error scrubbing requested\n"));
6954+
return;
6955+
}
6956+
6957+
start = ps->pss_error_scrub_start;
6958+
end = ps->pss_error_scrub_end;
6959+
pause = ps->pss_pass_error_scrub_pause;
6960+
examined = ps->pss_error_scrub_examined;
6961+
to_be_examined = ps->pss_error_scrub_to_be_examined;
6962+
6963+
assert(ps->pss_error_scrub_func == POOL_ERRORSCRUB);
6964+
6965+
if (ps->pss_error_scrub_state == DSS_FINISHED) {
6966+
total_secs_left = end - start;
6967+
days_left = total_secs_left / 60 / 60 / 24;
6968+
hours_left = (total_secs_left / 60 / 60) % 24;
6969+
mins_left = (total_secs_left / 60) % 60;
6970+
secs_left = (total_secs_left % 60);
6971+
6972+
(void) printf(gettext("scrubbed %llu error blocks in %llu days "
6973+
"%02llu:%02llu:%02llu on %s"), (u_longlong_t)examined,
6974+
(u_longlong_t)days_left, (u_longlong_t)hours_left,
6975+
(u_longlong_t)mins_left, (u_longlong_t)secs_left,
6976+
ctime(&end));
6977+
6978+
return;
6979+
} else if (ps->pss_error_scrub_state == DSS_CANCELED) {
6980+
(void) printf(gettext("error scrub canceled on %s"),
6981+
ctime(&end));
6982+
return;
6983+
}
6984+
assert(ps->pss_error_scrub_state == DSS_ERRORSCRUBING);
6985+
6986+
/* Error scrub is in progress. */
6987+
if (pause == 0) {
6988+
(void) printf(gettext("error scrub in progress since %s"),
6989+
ctime(&start));
6990+
} else {
6991+
(void) printf(gettext("error scrub paused since %s"),
6992+
ctime(&pause));
6993+
(void) printf(gettext("\terror scrub started on %s"),
6994+
ctime(&start));
6995+
}
6996+
6997+
double fraction_done = (double)examined / (to_be_examined + examined);
6998+
(void) printf(gettext("\t%.2f%% done, issused i/o for %llu error"
6999+
" blocks"), 100 * fraction_done, (u_longlong_t)examined);
7000+
7001+
(void) printf(gettext("\n"));
7002+
}
7003+
69157004
/*
69167005
* Print out detailed scrub status.
69177006
*/
@@ -7687,6 +7776,7 @@ status_callback(zpool_handle_t *zhp, void *data)
76877776
ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t **)&prs, &c);
76887777

76897778
print_scan_status(ps);
7779+
print_err_scrub_status(ps);
76907780
print_checkpoint_scan_warning(ps, pcs);
76917781
print_removal_status(zhp, prs);
76927782
print_checkpoint_status(pcs);

include/libzfs.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,15 @@ typedef enum zfs_error {
125125
EZFS_THREADCREATEFAILED, /* thread create failed */
126126
EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */
127127
EZFS_SCRUBBING, /* currently scrubbing */
128+
EZFS_ERRORSCRUBBING, /* currently error scrubbing */
129+
EZFS_ERRORSCRUB_PAUSED, /* error scrub currently paused */
128130
EZFS_NO_SCRUB, /* no active scrub */
131+
EZFS_NO_ERRORSCRUB, /* no active error scrub */
129132
EZFS_DIFF, /* general failure of zfs diff */
130133
EZFS_DIFFDATA, /* bad zfs diff data */
131134
EZFS_POOLREADONLY, /* pool is in read-only mode */
132135
EZFS_SCRUB_PAUSED, /* scrub currently paused */
136+
EZFS_SCRUB_PAUSED_CANCEL, /* scrub currently paused */
133137
EZFS_ACTIVE_POOL, /* pool is imported on a different system */
134138
EZFS_CRYPTOFAILED, /* failed to setup encryption */
135139
EZFS_NO_PENDING, /* cannot cancel, no operation is pending */

include/libzfs_core.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ int lzc_reopen(const char *, boolean_t);
130130
int lzc_pool_checkpoint(const char *);
131131
int lzc_pool_checkpoint_discard(const char *);
132132

133+
int lzc_scrub(zfs_ioc_t, const char *, nvlist_t *, nvlist_t **);
134+
133135
#ifdef __cplusplus
134136
}
135137
#endif

include/sys/dmu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,7 @@ typedef struct dmu_buf {
373373
#define DMU_POOL_DDT_STATS "DDT-statistics"
374374
#define DMU_POOL_CREATION_VERSION "creation_version"
375375
#define DMU_POOL_SCAN "scan"
376+
#define DMU_POOL_ERRORSCRUB "errorscrub"
376377
#define DMU_POOL_FREE_BPOBJ "free_bpobj"
377378
#define DMU_POOL_BPTREE_OBJ "bptree_obj"
378379
#define DMU_POOL_EMPTY_BPOBJ "empty_bpobj"

include/sys/dsl_scan.h

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
#include <sys/zfs_context.h>
3131
#include <sys/zio.h>
32+
#include <sys/zap.h>
3233
#include <sys/ddt.h>
3334
#include <sys/bplist.h>
3435

@@ -76,6 +77,21 @@ typedef enum dsl_scan_flags {
7677

7778
#define DSL_SCAN_FLAGS_MASK (DSF_VISIT_DS_AGAIN)
7879

80+
typedef struct dsl_errorscrub_phys {
81+
uint64_t dep_func; /* pool_scan_func_t */
82+
uint64_t dep_state; /* dsl_scan_state_t */
83+
uint64_t dep_cursor; /* zap cursor */
84+
uint64_t dep_start_time; /* error scrub start time */
85+
uint64_t dep_end_time; /* error scrub end time */
86+
uint64_t dep_to_examine; /* total error blocks to be scrubbed */
87+
uint64_t dep_examined; /* blocks remaining so far */
88+
uint64_t dep_errors; /* error scrub I/O error count */
89+
uint64_t dep_paused_flags; /* flag for paused */
90+
} dsl_errorscrub_phys_t;
91+
92+
#define ERRORSCRUB_PHYS_NUMINTS (sizeof (dsl_errorscrub_phys_t) \
93+
/ sizeof (uint64_t))
94+
7995
/*
8096
* Every pool will have one dsl_scan_t and this structure will contain
8197
* in-memory information about the scan and a pointer to the on-disk
@@ -148,11 +164,16 @@ typedef struct dsl_scan {
148164
uint64_t scn_avg_zio_size_this_txg;
149165
uint64_t scn_zios_this_txg;
150166

167+
/* zap cursor for tracing error scrub progress */
168+
zap_cursor_t errorscrub_cursor;
151169
/* members needed for syncing scan status to disk */
152170
dsl_scan_phys_t scn_phys; /* on disk representation of scan */
153171
dsl_scan_phys_t scn_phys_cached;
154172
avl_tree_t scn_queue; /* queue of datasets to scan */
155173
uint64_t scn_bytes_pending; /* outstanding data to issue */
174+
175+
/* members needed for syncing error scrub status to disk */
176+
dsl_errorscrub_phys_t errorscrub_phys;
156177
} dsl_scan_t;
157178

158179
typedef struct dsl_scan_io_queue dsl_scan_io_queue_t;
@@ -162,10 +183,12 @@ void scan_fini(void);
162183
int dsl_scan_init(struct dsl_pool *dp, uint64_t txg);
163184
void dsl_scan_fini(struct dsl_pool *dp);
164185
void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
165-
int dsl_scan_cancel(struct dsl_pool *);
186+
int dsl_scan_cancel(struct dsl_pool *, pool_scan_func_t func);
166187
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
167188
boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
168-
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
189+
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp,
190+
pool_scrub_cmd_t cmd, pool_scan_func_t func);
191+
void dsl_errorscrub_sync(struct dsl_pool *, dmu_tx_t *);
169192
void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
170193
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
171194
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
@@ -177,6 +200,7 @@ void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
177200
struct dmu_tx *tx);
178201
boolean_t dsl_scan_active(dsl_scan_t *scn);
179202
boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn);
203+
boolean_t dsl_errorscrub_is_paused(const dsl_scan_t *scn);
180204
void dsl_scan_freed(spa_t *spa, const blkptr_t *bp);
181205
void dsl_scan_io_queue_destroy(dsl_scan_io_queue_t *queue);
182206
void dsl_scan_io_queue_vdev_xfer(vdev_t *svd, vdev_t *tvd);

include/sys/fs/zfs.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,7 @@ typedef enum pool_scan_func {
899899
POOL_SCAN_NONE,
900900
POOL_SCAN_SCRUB,
901901
POOL_SCAN_RESILVER,
902+
POOL_ERRORSCRUB,
902903
POOL_SCAN_FUNCS
903904
} pool_scan_func_t;
904905

@@ -908,6 +909,7 @@ typedef enum pool_scan_func {
908909
typedef enum pool_scrub_cmd {
909910
POOL_SCRUB_NORMAL = 0,
910911
POOL_SCRUB_PAUSE,
912+
POOL_ERRORSCRUB_STOP,
911913
POOL_SCRUB_FLAGS_END
912914
} pool_scrub_cmd_t;
913915

@@ -962,6 +964,16 @@ typedef struct pool_scan_stat {
962964
uint64_t pss_pass_scrub_spent_paused;
963965
uint64_t pss_pass_issued; /* issued bytes per scan pass */
964966
uint64_t pss_issued; /* total bytes checked by scanner */
967+
/* values not stored on disk */
968+
uint64_t pss_error_scrub_func; /* pool_scan_func_t */
969+
uint64_t pss_error_scrub_state; /* dsl_scan_state_t */
970+
uint64_t pss_error_scrub_start; /* error scrub start time */
971+
uint64_t pss_error_scrub_end; /* error scrub end time */
972+
uint64_t pss_error_scrub_examined; /* scan start time */
973+
uint64_t pss_error_scrub_to_be_examined; /* scan start time */
974+
uint64_t pss_pass_error_scrub_pause;
975+
uint64_t pss_pass_error_scrub_spent_paused;
976+
965977
} pool_scan_stat_t;
966978

967979
typedef struct pool_removal_stat {
@@ -983,6 +995,7 @@ typedef enum dsl_scan_state {
983995
DSS_SCANNING,
984996
DSS_FINISHED,
985997
DSS_CANCELED,
998+
DSS_ERRORSCRUBING,
986999
DSS_NUM_STATES
9871000
} dsl_scan_state_t;
9881001

@@ -1277,6 +1290,7 @@ typedef enum zfs_ioc {
12771290
ZFS_IOC_POOL_TRIM, /* 0x5a50 */
12781291
ZFS_IOC_REDACT, /* 0x5a51 */
12791292
ZFS_IOC_GET_BOOKMARK_PROPS, /* 0x5a52 */
1293+
ZFS_IOC_POOL_SCRUB, /* 0x5a53 */
12801294

12811295
/*
12821296
* Linux - 3/64 numbers reserved.

include/sys/spa.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -863,8 +863,9 @@ extern void spa_l2cache_drop(spa_t *spa);
863863

864864
/* scanning */
865865
extern int spa_scan(spa_t *spa, pool_scan_func_t func);
866-
extern int spa_scan_stop(spa_t *spa);
867-
extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag);
866+
extern int spa_scan_stop(spa_t *spa, pool_scan_func_t func);
867+
extern int spa_scrub_pause_resume(spa_t *spa, pool_scan_func_t func,
868+
pool_scrub_cmd_t flag);
868869

869870
/* spa syncing */
870871
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
@@ -1176,6 +1177,7 @@ extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
11761177
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
11771178
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
11781179
extern uint64_t spa_get_errlog_size(spa_t *spa);
1180+
extern uint64_t spa_get_last_errlog_size(spa_t *spa);
11791181
extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
11801182
extern void spa_errlog_rotate(spa_t *spa);
11811183
extern void spa_errlog_drain(spa_t *spa);

include/sys/spa_impl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,10 @@ struct spa {
285285
uint64_t spa_scan_pass_exam; /* examined bytes per pass */
286286
uint64_t spa_scan_pass_issued; /* issued bytes per pass */
287287

288+
/* errorscrub pause time */
289+
uint64_t spa_scan_pass_errorscrub_pause;
290+
/* total paused */
291+
uint64_t spa_scan_pass_errorscrub_spent_paused;
288292
/*
289293
* We are in the middle of a resilver, and another resilver
290294
* is needed once this one completes. This is set iff any

include/sys/sysevent/eventdefs.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ extern "C" {
123123
#define ESC_ZFS_TRIM_CANCEL "trim_cancel"
124124
#define ESC_ZFS_TRIM_RESUME "trim_resume"
125125
#define ESC_ZFS_TRIM_SUSPEND "trim_suspend"
126+
#define ESC_ZFS_ERRORSCRUB_START "error scrub_start"
127+
#define ESC_ZFS_ERRORSCRUB_FINISH "error scrub_finish"
128+
#define ESC_ZFS_ERRORSCRUB_ABORT "error scrub_abort"
129+
#define ESC_ZFS_ERRORSCRUB_RESUME "error scrub_resume"
130+
#define ESC_ZFS_ERRORSCRUB_PAUSED "error scrub_paused"
126131

127132
/*
128133
* datalink subclass definitions.

0 commit comments

Comments
 (0)