Skip to content

ZIL: "crash" the ZIL if the pool suspends during fallback #17398

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/zilstat.in
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ cols = {
"cec": [5, 1000, "zil_commit_error_count"],
"csc": [5, 1000, "zil_commit_stall_count"],
"cSc": [5, 1000, "zil_commit_suspend_count"],
"cCc": [5, 1000, "zil_commit_crash_count"],
"ic": [5, 1000, "zil_itx_count"],
"iic": [5, 1000, "zil_itx_indirect_count"],
"iib": [5, 1024, "zil_itx_indirect_bytes"],
Expand Down
8 changes: 4 additions & 4 deletions cmd/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -1995,7 +1995,7 @@ ztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr)
dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length,
((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH |
DMU_KEEP_CACHING) != 0) {
zil_itx_destroy(itx);
zil_itx_destroy(itx, 0);
itx = zil_itx_create(TX_WRITE, sizeof (*lr));
write_state = WR_NEED_COPY;
}
Expand Down Expand Up @@ -2965,7 +2965,7 @@ ztest_zil_commit(ztest_ds_t *zd, uint64_t id)

(void) pthread_rwlock_rdlock(&zd->zd_zilog_lock);

zil_commit(zilog, ztest_random(ZTEST_OBJECTS));
VERIFY0(zil_commit(zilog, ztest_random(ZTEST_OBJECTS)));

/*
* Remember the committed values in zd, which is in parent/child
Expand Down Expand Up @@ -7933,7 +7933,7 @@ ztest_freeze(void)
*/
while (BP_IS_HOLE(&zd->zd_zilog->zl_header->zh_log)) {
ztest_dmu_object_alloc_free(zd, 0);
zil_commit(zd->zd_zilog, 0);
VERIFY0(zil_commit(zd->zd_zilog, 0));
}

txg_wait_synced(spa_get_dsl(spa), 0);
Expand Down Expand Up @@ -7975,7 +7975,7 @@ ztest_freeze(void)
/*
* Commit all of the changes we just generated.
*/
zil_commit(zd->zd_zilog, 0);
VERIFY0(zil_commit(zd->zd_zilog, 0));
txg_wait_synced(spa_get_dsl(spa), 0);

/*
Expand Down
4 changes: 4 additions & 0 deletions include/os/freebsd/spl/sys/debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@
#define __maybe_unused __attribute__((unused))
#endif

#ifndef __must_check
#define __must_check __attribute__((__warn_unused_result__))
#endif

/*
* Without this, we see warnings from objtool during normal Linux builds when
* the kernel is built with CONFIG_STACK_VALIDATION=y:
Expand Down
4 changes: 4 additions & 0 deletions include/os/linux/spl/sys/debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@
#define __maybe_unused __attribute__((unused))
#endif

#ifndef __must_check
#define __must_check __attribute__((__warn_unused_result__))
#endif

/*
* Without this, we see warnings from objtool during normal Linux builds when
* the kernel is built with CONFIG_STACK_VALIDATION=y:
Expand Down
33 changes: 29 additions & 4 deletions include/sys/zil.h
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ typedef enum {
WR_NUM_STATES /* number of states */
} itx_wr_state_t;

typedef void (*zil_callback_t)(void *data);
typedef void (*zil_callback_t)(void *data, int err);

typedef struct itx {
list_node_t itx_node; /* linkage on zl_itx_list */
Expand Down Expand Up @@ -498,10 +498,13 @@ typedef struct zil_stats {
* (see zil_commit_writer_stall())
* - suspend: ZIL suspended
* (see zil_commit(), zil_get_commit_list())
* - crash: ZIL crashed
* (see zil_crash(), zil_commit(), ...)
*/
kstat_named_t zil_commit_error_count;
kstat_named_t zil_commit_stall_count;
kstat_named_t zil_commit_suspend_count;
kstat_named_t zil_commit_crash_count;

/*
* Number of transactions (reads, writes, renames, etc.)
Expand Down Expand Up @@ -549,6 +552,7 @@ typedef struct zil_sums {
wmsum_t zil_commit_error_count;
wmsum_t zil_commit_stall_count;
wmsum_t zil_commit_suspend_count;
wmsum_t zil_commit_crash_count;
wmsum_t zil_itx_count;
wmsum_t zil_itx_indirect_count;
wmsum_t zil_itx_indirect_bytes;
Expand Down Expand Up @@ -577,6 +581,25 @@ typedef struct zil_sums {
#define ZIL_STAT_BUMP(zil, stat) \
ZIL_STAT_INCR(zil, stat, 1);

/*
* Flags for zil_commit_flags(). zil_commit() is a shortcut for
* zil_commit_flags(ZIL_COMMIT_FAILMODE), which is the most common use.
*/
typedef enum {
/*
* Try to commit the ZIL. If it fails, fall back to txg_wait_synced().
* If that fails, return EIO.
*/
ZIL_COMMIT_NOW = 0,

/*
* Like ZIL_COMMIT_NOW, but if the ZIL commit fails because the pool
* suspended, act according to the pool's failmode= setting (wait for
* the pool to resume, or return EIO).
*/
ZIL_COMMIT_FAILMODE = (1 << 1),
} zil_commit_flag_t;

typedef int zil_parse_blk_func_t(zilog_t *zilog, const blkptr_t *bp, void *arg,
uint64_t txg);
typedef int zil_parse_lr_func_t(zilog_t *zilog, const lr_t *lr, void *arg,
Expand Down Expand Up @@ -606,14 +629,16 @@ extern boolean_t zil_destroy(zilog_t *zilog, boolean_t keep_first);
extern void zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx);

extern itx_t *zil_itx_create(uint64_t txtype, size_t lrsize);
extern void zil_itx_destroy(itx_t *itx);
extern void zil_itx_destroy(itx_t *itx, int err);
extern void zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx);

extern void zil_async_to_sync(zilog_t *zilog, uint64_t oid);
extern void zil_commit(zilog_t *zilog, uint64_t oid);
extern void zil_commit_impl(zilog_t *zilog, uint64_t oid);
extern void zil_remove_async(zilog_t *zilog, uint64_t oid);

extern int zil_commit_flags(zilog_t *zilog, uint64_t oid,
zil_commit_flag_t flags);
extern int __must_check zil_commit(zilog_t *zilog, uint64_t oid);

extern int zil_reset(const char *osname, void *txarg);
extern int zil_claim(struct dsl_pool *dp,
struct dsl_dataset *ds, void *txarg);
Expand Down
4 changes: 4 additions & 0 deletions include/sys/zil_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ struct zilog {
uint64_t zl_cur_left; /* current burst remaining size */
uint64_t zl_cur_max; /* biggest record in current burst */
list_t zl_lwb_list; /* in-flight log write list */
list_t zl_lwb_crash_list; /* log writes in-flight at crash */
avl_tree_t zl_bp_tree; /* track bps during log parse */
clock_t zl_replay_time; /* lbolt of when replay started */
uint64_t zl_replay_blks; /* number of log blocks replayed */
Expand All @@ -245,6 +246,9 @@ struct zilog {
*/
uint64_t zl_max_block_size;

/* After crash, txg to restart zil */
uint64_t zl_restart_txg;

/* Pointer for per dataset zil sums */
zil_sums_t *zl_sums;
};
Expand Down
4 changes: 4 additions & 0 deletions lib/libspl/include/sys/debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,8 @@
#define __maybe_unused __attribute__((unused))
#endif

#ifndef __must_check
#define __must_check __attribute__((warn_unused_result))
#endif

#endif
9 changes: 7 additions & 2 deletions module/os/freebsd/zfs/zfs_vfsops.c
Original file line number Diff line number Diff line change
Expand Up @@ -455,8 +455,13 @@ zfs_sync(vfs_t *vfsp, int waitfor)
return (0);
}

if (zfsvfs->z_log != NULL)
zil_commit(zfsvfs->z_log, 0);
if (zfsvfs->z_log != NULL) {
error = zil_commit(zfsvfs->z_log, 0);
if (error != 0) {
zfs_exit(zfsvfs, FTAG);
return (error);
}
}

zfs_exit(zfsvfs, FTAG);
} else {
Expand Down
65 changes: 43 additions & 22 deletions module/os/freebsd/zfs/zfs_vnops_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -1186,8 +1186,8 @@ zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode,
*zpp = zp;
}

if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
error = zil_commit(zilog, 0);

zfs_exit(zfsvfs, FTAG);
return (error);
Expand Down Expand Up @@ -1316,9 +1316,8 @@ zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
if (xzp)
vrele(ZTOV(xzp));

if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);

if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
error = zil_commit(zilog, 0);

zfs_exit(zfsvfs, FTAG);
return (error);
Expand Down Expand Up @@ -1549,8 +1548,8 @@ zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp,

getnewvnode_drop_reserve();

if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
error = zil_commit(zilog, 0);

zfs_exit(zfsvfs, FTAG);
return (error);
Expand Down Expand Up @@ -1630,8 +1629,8 @@ zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
if (zfsvfs->z_use_namecache)
cache_vop_rmdir(dvp, vp);
out:
if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
error = zil_commit(zilog, 0);

zfs_exit(zfsvfs, FTAG);
return (error);
Expand Down Expand Up @@ -3002,8 +3001,8 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns)
}

out2:
if (os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
if (err == 0 && os->os_sync == ZFS_SYNC_ALWAYS)
err = zil_commit(zilog, 0);

zfs_exit(zfsvfs, FTAG);
return (err);
Expand Down Expand Up @@ -3532,7 +3531,7 @@ zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,

out:
if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
error = zil_commit(zilog, 0);
zfs_exit(zfsvfs, FTAG);

return (error);
Expand Down Expand Up @@ -3724,7 +3723,7 @@ zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
*zpp = zp;

if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
error = zil_commit(zilog, 0);
}

zfs_exit(zfsvfs, FTAG);
Expand Down Expand Up @@ -3914,8 +3913,8 @@ zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
vnevent_link(ZTOV(szp), ct);
}

if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
error = zil_commit(zilog, 0);

zfs_exit(zfsvfs, FTAG);
return (error);
Expand Down Expand Up @@ -4306,7 +4305,7 @@ typedef struct {
} putpage_commit_arg_t;

static void
zfs_putpage_commit_cb(void *arg)
zfs_putpage_commit_cb(void *arg, int err)
{
putpage_commit_arg_t *pca = arg;
vm_object_t object = pca->pca_pages[0]->object;
Expand All @@ -4315,7 +4314,17 @@ zfs_putpage_commit_cb(void *arg)

for (uint_t i = 0; i < pca->pca_npages; i++) {
vm_page_t pp = pca->pca_pages[i];
vm_page_undirty(pp);

if (err == 0) {
/*
* Writeback succeeded, so undirty the page. If it
* fails, we leave it in the same state it was. That's
* most likely dirty, so it will get tried again some
* other time.
*/
vm_page_undirty(pp);
}

vm_page_sunbusy(pp);
}

Expand Down Expand Up @@ -4474,8 +4483,13 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,

out:
zfs_rangelock_exit(lr);
if (commit)
zil_commit(zfsvfs->z_log, zp->z_id);
if (commit) {
err = zil_commit(zfsvfs->z_log, zp->z_id);
if (err != 0) {
zfs_exit(zfsvfs, FTAG);
return (err);
}
}

dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, len);

Expand Down Expand Up @@ -4951,6 +4965,11 @@ struct vop_fsync_args {
static int
zfs_freebsd_fsync(struct vop_fsync_args *ap)
{
/*
* Push any dirty mmap()'d data out to the DMU and ZIL, ready for
* zil_commit() to be called in zfs_fsync().
*/
vn_flush_cached_data(ap->a_vp, B_FALSE);

return (zfs_fsync(VTOZ(ap->a_vp), 0, ap->a_td->td_ucred));
}
Expand Down Expand Up @@ -6466,9 +6485,11 @@ zfs_deallocate(struct vop_deallocate_args *ap)
if (error == 0) {
if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS ||
(ap->a_ioflag & IO_SYNC) != 0)
zil_commit(zilog, zp->z_id);
*ap->a_offset = off + len;
*ap->a_len = 0;
error = zil_commit(zilog, zp->z_id);
if (error == 0) {
*ap->a_offset = off + len;
*ap->a_len = 0;
}
}

zfs_exit(zfsvfs, FTAG);
Expand Down
12 changes: 6 additions & 6 deletions module/os/freebsd/zfs/zvol_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -727,9 +727,9 @@ zvol_strategy_impl(zv_request_t *zvr)
break;
}

if (commit) {
if (error == 0 && commit) {
commit:
zil_commit(zv->zv_zilog, ZVOL_OBJ);
error = zil_commit(zv->zv_zilog, ZVOL_OBJ);
}
resume:
rw_exit(&zv->zv_suspend_lock);
Expand Down Expand Up @@ -906,8 +906,8 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag)
zfs_rangelock_exit(lr);
int64_t nwritten = start_resid - zfs_uio_resid(&uio);
dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten);
if (commit)
zil_commit(zv->zv_zilog, ZVOL_OBJ);
if (error == 0 && commit)
error = zil_commit(zv->zv_zilog, ZVOL_OBJ);
rw_exit(&zv->zv_suspend_lock);

return (error);
Expand Down Expand Up @@ -1117,7 +1117,7 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data,
case DIOCGFLUSH:
rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
if (zv->zv_zilog != NULL)
zil_commit(zv->zv_zilog, ZVOL_OBJ);
error = zil_commit(zv->zv_zilog, ZVOL_OBJ);
rw_exit(&zv->zv_suspend_lock);
break;
case DIOCGDELETE:
Expand Down Expand Up @@ -1152,7 +1152,7 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data,
}
zfs_rangelock_exit(lr);
if (sync)
zil_commit(zv->zv_zilog, ZVOL_OBJ);
error = zil_commit(zv->zv_zilog, ZVOL_OBJ);
rw_exit(&zv->zv_suspend_lock);
break;
case DIOCGSTRIPESIZE:
Expand Down
Loading
Loading