Skip to content

Commit 6ec492c

Browse files
oshogbowcaallanjude
committed
zfs: support force exporting pools
This is primarily of use when a pool has lost its disk, while the user doesn't care about any pending (or otherwise) transactions. Implement various control methods to make this feasible: - txg_wait can now take a NOSUSPEND flag, in which case the caller will be alerted if their txg can't be committed. This is primarily of interest for callers that would normally pass TXG_WAIT, but don't want to wait if the pool becomes suspended, which allows unwinding in some cases, specifically when one is attempting a non-forced export. Without this, the non-forced export would preclude a forced export by virtue of holding the namespace lock indefinitely. - txg_wait also returns failure for TXG_WAIT users if a pool is actually being force exported. Adjust most callers to tolerate this. - spa_config_enter_flags now takes a NOSUSPEND flag to the same effect. - DMU objset initiator which may be set on an objset being forcibly exported / unmounted. - SPA export initiator may be set on a pool being forcibly exported. - DMU send/recv now use an interruption mechanism which relies on the SPA export initiator being able to enumerate datasets and closing any send/recv streams, causing their EINTR paths to be invoked. - ZIO now has a cancel entry point, which tells all suspended zios to fail, and which suppresses the failures for non-CANFAIL users. - metaslab, etc. cleanup, which consists of simply throwing away any changes that were not able to be synced out. - Linux specific: introduce a new tunable, zfs_forced_export_unmount_enabled, which allows the filesystem to remain in a modified 'unmounted' state upon exiting zpl_umount_begin, to achieve parity with FreeBSD and illumos, which have VFS-level support for yanking filesystems out from under users. However, this only helps when the user is actively performing I/O, while not sitting on the filesystem. In particular, this allows test #3 below to pass on Linux. - Add basic logic to zpool to indicate a force-exporting pool, instead of crashing due to lack of config, etc. Add tests which cover the basic use cases: - Force export while a send is in progress - Force export while a recv is in progress - Force export while POSIX I/O is in progress This change modifies the libzfs ABI: - New ZPOOL_STATUS_FORCE_EXPORTING zpool_status_t enum value. - New field libzfs_force_export for libzfs_handle. Co-Authored-by: Will Andrews <will@firepipe.net> Co-Authored-by: Allan Jude <allan@klarasystems.com> Sponsored-by: Klara, Inc. Sponsored-by: Catalogics, Inc. Sponsored-by: Wasabi Technology, Inc. Closes openzfs#3461 Signed-off-by: Will Andrews <will@firepipe.net> Signed-off-by: Allan Jude <allan@klarasystems.com> Signed-off-by: Mariusz Zaborski <mariusz.zaborski@klarasystems.com>
1 parent 482eeef commit 6ec492c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

91 files changed

+2349
-426
lines changed

cmd/zpool/zpool_main.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ get_usage(zpool_help_t idx)
363363
case HELP_DETACH:
364364
return (gettext("\tdetach <pool> <device>\n"));
365365
case HELP_EXPORT:
366-
return (gettext("\texport [-af] <pool> ...\n"));
366+
return (gettext("\texport [-afF] <pool> ...\n"));
367367
case HELP_HISTORY:
368368
return (gettext("\thistory [-il] [<pool>] ...\n"));
369369
case HELP_IMPORT:
@@ -1889,7 +1889,7 @@ zpool_do_destroy(int argc, char **argv)
18891889
return (1);
18901890
}
18911891

1892-
if (zpool_disable_datasets(zhp, force) != 0) {
1892+
if (zpool_disable_datasets(zhp, force, FALSE) != 0) {
18931893
(void) fprintf(stderr, gettext("could not destroy '%s': "
18941894
"could not unmount datasets\n"), zpool_get_name(zhp));
18951895
zpool_close(zhp);
@@ -1919,7 +1919,7 @@ zpool_export_one(zpool_handle_t *zhp, void *data)
19191919
{
19201920
export_cbdata_t *cb = data;
19211921

1922-
if (zpool_disable_datasets(zhp, cb->force) != 0)
1922+
if (zpool_disable_datasets(zhp, cb->force, cb->hardforce) != 0)
19231923
return (1);
19241924

19251925
/* The history must be logged as part of the export */
@@ -1940,10 +1940,13 @@ zpool_export_one(zpool_handle_t *zhp, void *data)
19401940
*
19411941
* -a Export all pools
19421942
* -f Forcefully unmount datasets
1943+
* -F Forcefully export, dropping all outstanding dirty data
19431944
*
19441945
* Export the given pools. By default, the command will attempt to cleanly
19451946
* unmount any active datasets within the pool. If the '-f' flag is specified,
1946-
* then the datasets will be forcefully unmounted.
1947+
* then the datasets will be forcefully unmounted. If the '-F' flag is
1948+
* specified, the pool's dirty data, if any, will simply be dropped after a
1949+
* best-effort attempt to forcibly stop all activity.
19471950
*/
19481951
int
19491952
zpool_do_export(int argc, char **argv)

include/libzfs.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,7 @@ typedef enum {
424424
ZPOOL_STATUS_NON_NATIVE_ASHIFT, /* (e.g. 512e dev with ashift of 9) */
425425
ZPOOL_STATUS_COMPATIBILITY_ERR, /* bad 'compatibility' property */
426426
ZPOOL_STATUS_INCOMPATIBLE_FEAT, /* feature set outside compatibility */
427+
ZPOOL_STATUS_FORCE_EXPORTING, /* pool is being force exported */
427428

428429
/*
429430
* Finally, the following indicates a healthy pool.
@@ -982,10 +983,16 @@ _LIBZFS_H int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *,
982983
* sharing/unsharing them.
983984
*/
984985
_LIBZFS_H int zpool_enable_datasets(zpool_handle_t *, const char *, int);
985-
_LIBZFS_H int zpool_disable_datasets(zpool_handle_t *, boolean_t);
986+
_LIBZFS_H int zpool_disable_datasets(zpool_handle_t *, boolean_t, boolean_t);
986987
_LIBZFS_H void zpool_disable_datasets_os(zpool_handle_t *, boolean_t);
987988
_LIBZFS_H void zpool_disable_volume_os(const char *);
988989

990+
/*
991+
* Procedure to inform os that we have started force unmount (linux specific).
992+
*/
993+
_LIBZFS_H void zpool_unmount_mark_hard_force_begin(zpool_handle_t *zhp);
994+
_LIBZFS_H void zpool_unmount_mark_hard_force_end(zpool_handle_t *zhp);
995+
989996
/*
990997
* Parse a features file for -o compatibility
991998
*/

include/os/freebsd/spl/sys/thread.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,7 @@
3131

3232
#define getcomm() curthread->td_name
3333
#define getpid() curthread->td_tid
34+
#define thread_signal spl_kthread_signal
35+
extern int spl_kthread_signal(kthread_t *tsk, int sig);
36+
3437
#endif

include/os/freebsd/zfs/sys/zfs_znode_impl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,8 @@ zfs_enter(zfsvfs_t *zfsvfs, const char *tag)
135135
return (0);
136136
}
137137

138+
#define zfs_enter_unmountok zfs_enter
139+
138140
/* Must be called before exiting the vop */
139141
static inline void
140142
zfs_exit(zfsvfs_t *zfsvfs, const char *tag)

include/os/linux/spl/sys/thread.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ typedef void (*thread_func_t)(void *);
5353
__thread_create(stk, stksize, (thread_func_t)func, #func, \
5454
arg, len, pp, state, pri)
5555

56+
#define thread_signal(t, s) spl_kthread_signal(t, s)
5657
#define thread_exit() spl_thread_exit()
5758
#define thread_join(t) VERIFY(0)
5859
#define curthread current
@@ -64,6 +65,7 @@ extern kthread_t *__thread_create(caddr_t stk, size_t stksize,
6465
int state, pri_t pri);
6566
extern struct task_struct *spl_kthread_create(int (*func)(void *),
6667
void *data, const char namefmt[], ...);
68+
extern int spl_kthread_signal(kthread_t *tsk, int sig);
6769

6870
static inline __attribute__((noreturn)) void
6971
spl_thread_exit(void)

include/os/linux/zfs/sys/zfs_vfsops_os.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,8 @@ struct zfsvfs {
101101
boolean_t z_utf8; /* utf8-only */
102102
int z_norm; /* normalization flags */
103103
boolean_t z_relatime; /* enable relatime mount option */
104-
boolean_t z_unmounted; /* unmounted */
104+
boolean_t z_unmounted; /* mount status */
105+
boolean_t z_force_unmounted; /* force-unmounted status */
105106
rrmlock_t z_teardown_lock;
106107
krwlock_t z_teardown_inactive_lock;
107108
list_t z_all_znodes; /* all znodes in the fs */

include/os/linux/zfs/sys/zfs_znode_impl.h

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -98,24 +98,39 @@ extern "C" {
9898
#define zhold(zp) VERIFY3P(igrab(ZTOI((zp))), !=, NULL)
9999
#define zrele(zp) iput(ZTOI((zp)))
100100

101+
#define zfsvfs_is_unmounted(zfsvfs) \
102+
((zfsvfs)->z_unmounted || (zfsvfs)->z_force_unmounted)
103+
104+
/* Must be called before exiting the operation. */
105+
static inline void
106+
zfs_exit(zfsvfs_t *zfsvfs, const char *tag)
107+
{
108+
zfs_exit_fs(zfsvfs);
109+
ZFS_TEARDOWN_EXIT_READ(zfsvfs, tag);
110+
}
111+
101112
/* Called on entry to each ZFS inode and vfs operation. */
102113
static inline int
103114
zfs_enter(zfsvfs_t *zfsvfs, const char *tag)
104115
{
105116
ZFS_TEARDOWN_ENTER_READ(zfsvfs, tag);
106-
if (unlikely(zfsvfs->z_unmounted)) {
117+
if (unlikely(zfsvfs_is_unmounted(zfsvfs))) {
107118
ZFS_TEARDOWN_EXIT_READ(zfsvfs, tag);
108119
return (SET_ERROR(EIO));
109120
}
110121
return (0);
111122
}
112123

113-
/* Must be called before exiting the operation. */
114-
static inline void
115-
zfs_exit(zfsvfs_t *zfsvfs, const char *tag)
124+
/* zfs_enter() but ok with forced unmount having begun */
125+
static inline int
126+
zfs_enter_unmountok(zfsvfs_t *zfsvfs, const char *tag)
116127
{
117-
zfs_exit_fs(zfsvfs);
118-
ZFS_TEARDOWN_EXIT_READ(zfsvfs, tag);
128+
ZFS_TEARDOWN_ENTER_READ(zfsvfs, tag);
129+
if (unlikely((zfsvfs)->z_unmounted == B_TRUE)) {
130+
zfs_exit(zfsvfs, tag);
131+
return (SET_ERROR(EIO));
132+
}
133+
return (0);
119134
}
120135

121136
static inline int

include/sys/arc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ void l2arc_fini(void);
338338
void l2arc_start(void);
339339
void l2arc_stop(void);
340340
void l2arc_spa_rebuild_start(spa_t *spa);
341+
void l2arc_spa_rebuild_stop(spa_t *spa);
341342

342343
#ifndef _KERNEL
343344
extern boolean_t arc_watch;

include/sys/dmu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ typedef enum dmu_object_type {
283283
#define TXG_NOWAIT (0ULL)
284284
#define TXG_WAIT (1ULL<<0)
285285
#define TXG_NOTHROTTLE (1ULL<<1)
286+
#define TXG_NOSUSPEND (1ULL<<2)
286287

287288
void byteswap_uint64_array(void *buf, size_t size);
288289
void byteswap_uint32_array(void *buf, size_t size);

include/sys/dmu_impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ typedef struct dmu_sendstatus {
241241
list_node_t dss_link;
242242
int dss_outfd;
243243
proc_t *dss_proc;
244+
kthread_t *dss_thread;
244245
offset_t *dss_off;
245246
uint64_t dss_blocks; /* blocks visited during the sending process */
246247
} dmu_sendstatus_t;

0 commit comments

Comments
 (0)