Skip to content

Commit e1dbb3c

Browse files
committed
zfs: support force exporting pools
This is primarily of use when a pool has lost its disk, while the user doesn't care about any pending (or otherwise) transactions. Implement various control methods to make this feasible: - txg_wait can now take a NOSUSPEND flag, in which case the caller will be alerted if their txg can't be committed. This is primarily of interest for callers that would normally pass TXG_WAIT, but don't want to wait if the pool becomes suspended, which allows unwinding in some cases, specifically when one is attempting a non-forced export. Without this, the non-forced export would preclude a forced export by virtue of holding the namespace lock indefinitely. - txg_wait also returns failure for TXG_WAIT users if a pool is actually being force exported. Adjust most callers to tolerate this. - spa_config_enter_flags now takes a NOSUSPEND flag to the same effect. - DMU objset initiator which may be set on an objset being forcibly exported / unmounted. - SPA export initiator may be set on a pool being forcibly exported. - DMU send/recv now use an interruption mechanism which relies on the SPA export initiator being able to enumerate datasets and closing any send/recv streams, causing their EINTR paths to be invoked. - ZIO now has a cancel entry point, which tells all suspended zios to fail, and which suppresses the failures for non-CANFAIL users. - metaslab, etc. cleanup, which consists of simply throwing away any changes that were not able to be synced out. - Linux specific: introduce a new tunable, zfs_forced_export_unmount_enabled, which allows the filesystem to remain in a modified 'unmounted' state upon exiting zpl_umount_begin, to achieve parity with FreeBSD and illumos, which have VFS-level support for yanking filesystems out from under users. However, this only helps when the user is actively performing I/O, while not sitting on the filesystem. In particular, this allows test #3 below to pass on Linux. - Add basic logic to zpool to indicate a force-exporting pool, instead of crashing due to lack of config, etc. Add tests which cover the basic use cases: - Force export while a send is in progress - Force export while a recv is in progress - Force export while POSIX I/O is in progress This change modifies the libzfs ABI: - New ZPOOL_STATUS_FORCE_EXPORTING zpool_status_t enum value. - New field libzfs_force_export for libzfs_handle. Signed-off-by: Will Andrews <will@firepipe.net> Sponsored-by: Klara, Inc. Sponsored-by: Catalogics, Inc. Closes #3461
1 parent 52cb284 commit e1dbb3c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+16726
-10409
lines changed

cmd/zpool/zpool_main.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ get_usage(zpool_help_t idx)
356356
case HELP_DETACH:
357357
return (gettext("\tdetach <pool> <device>\n"));
358358
case HELP_EXPORT:
359-
return (gettext("\texport [-af] <pool> ...\n"));
359+
return (gettext("\texport [-afF] <pool> ...\n"));
360360
case HELP_HISTORY:
361361
return (gettext("\thistory [-il] [<pool>] ...\n"));
362362
case HELP_IMPORT:
@@ -1813,7 +1813,7 @@ zpool_export_one(zpool_handle_t *zhp, void *data)
18131813
{
18141814
export_cbdata_t *cb = data;
18151815

1816-
if (zpool_disable_datasets(zhp, cb->force) != 0)
1816+
if (zpool_disable_datasets(zhp, cb->force || cb->hardforce) != 0)
18171817
return (1);
18181818

18191819
/* The history must be logged as part of the export */
@@ -1834,10 +1834,13 @@ zpool_export_one(zpool_handle_t *zhp, void *data)
18341834
*
18351835
* -a Export all pools
18361836
* -f Forcefully unmount datasets
1837+
* -F Forcefully export, dropping all outstanding dirty data
18371838
*
18381839
* Export the given pools. By default, the command will attempt to cleanly
18391840
* unmount any active datasets within the pool. If the '-f' flag is specified,
1840-
* then the datasets will be forcefully unmounted.
1841+
* then the datasets will be forcefully unmounted. If the '-F' flag is
1842+
* specified, the pool's dirty data, if any, will simply be dropped after a
1843+
* best-effort attempt to forcibly stop all activity.
18411844
*/
18421845
int
18431846
zpool_do_export(int argc, char **argv)

include/libzfs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,7 @@ typedef enum {
393393
ZPOOL_STATUS_REBUILD_SCRUB, /* recommend scrubbing the pool */
394394
ZPOOL_STATUS_NON_NATIVE_ASHIFT, /* (e.g. 512e dev with ashift of 9) */
395395
ZPOOL_STATUS_COMPATIBILITY_ERR, /* bad 'compatibility' property */
396+
ZPOOL_STATUS_FORCE_EXPORTING, /* pool is being force exported */
396397

397398
/*
398399
* Finally, the following indicates a healthy pool.

include/libzfs_impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ struct libzfs_handle {
7272
boolean_t libzfs_prop_debug;
7373
regex_t libzfs_urire;
7474
uint64_t libzfs_max_nvlist;
75+
boolean_t libzfs_force_export;
7576
};
7677

7778
struct zfs_handle {

include/os/freebsd/spl/sys/thread.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,7 @@
3131

3232
#define getcomm() curthread->td_name
3333
#define getpid() curthread->td_tid
34+
#define thread_signal spl_kthread_signal
35+
extern int spl_kthread_signal(kthread_t *tsk, int sig);
36+
3437
#endif

include/os/linux/spl/sys/thread.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ typedef void (*thread_func_t)(void *);
5555
#func, arg, len, pp, state, pri)
5656
/* END CSTYLED */
5757

58+
#define thread_signal(t, s) spl_kthread_signal(t, s)
5859
#define thread_exit() __thread_exit()
5960
#define thread_join(t) VERIFY(0)
6061
#define curthread current
@@ -67,6 +68,7 @@ extern kthread_t *__thread_create(caddr_t stk, size_t stksize,
6768
extern void __thread_exit(void);
6869
extern struct task_struct *spl_kthread_create(int (*func)(void *),
6970
void *data, const char namefmt[], ...);
71+
extern int spl_kthread_signal(kthread_t *tsk, int sig);
7072

7173
extern proc_t p0;
7274

include/os/linux/zfs/sys/zfs_vfsops_os.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,8 @@ struct zfsvfs {
101101
boolean_t z_utf8; /* utf8-only */
102102
int z_norm; /* normalization flags */
103103
boolean_t z_relatime; /* enable relatime mount option */
104-
boolean_t z_unmounted; /* unmounted */
104+
boolean_t z_unmounted; /* mount status */
105+
boolean_t z_force_unmounted; /* force-unmounted status */
105106
rrmlock_t z_teardown_lock;
106107
krwlock_t z_teardown_inactive_lock;
107108
list_t z_all_znodes; /* all znodes in the fs */

include/os/linux/zfs/sys/zfs_znode_impl.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,27 @@ extern "C" {
8080
#define ZFS_ENTER_ERROR(zfsvfs, error) \
8181
do { \
8282
rrm_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
83-
if ((zfsvfs)->z_unmounted) { \
83+
if ((zfsvfs)->z_unmounted == B_TRUE || \
84+
(zfsvfs)->z_force_unmounted == B_TRUE) { \
8485
ZFS_EXIT(zfsvfs); \
8586
return (error); \
8687
} \
8788
} while (0)
8889
#define ZFS_ENTER(zfsvfs) ZFS_ENTER_ERROR(zfsvfs, EIO)
8990
#define ZPL_ENTER(zfsvfs) ZFS_ENTER_ERROR(zfsvfs, -EIO)
9091

92+
/* ZFS_ENTER but ok with forced unmount having begun */
93+
#define _ZFS_ENTER_UNMOUNTOK(zfsvfs, error) \
94+
do { \
95+
rrm_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
96+
if ((zfsvfs)->z_unmounted == B_TRUE) { \
97+
ZFS_EXIT(zfsvfs); \
98+
return (error); \
99+
} \
100+
} while (0)
101+
#define ZFS_ENTER_UNMOUNTOK(zfsvfs) _ZFS_ENTER_UNMOUNTOK(zfsvfs, EIO)
102+
#define ZPL_ENTER_UNMOUNTOK(zfsvfs) _ZFS_ENTER_UNMOUNTOK(zfsvfs, -EIO)
103+
91104
/* Must be called before exiting the operation. */
92105
#define ZFS_EXIT(zfsvfs) \
93106
do { \

include/sys/arc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@ void l2arc_fini(void);
329329
void l2arc_start(void);
330330
void l2arc_stop(void);
331331
void l2arc_spa_rebuild_start(spa_t *spa);
332+
void l2arc_spa_rebuild_stop(spa_t *spa);
332333

333334
#ifndef _KERNEL
334335
extern boolean_t arc_watch;

include/sys/dmu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ typedef enum dmu_object_type {
276276
#define TXG_NOWAIT (0ULL)
277277
#define TXG_WAIT (1ULL<<0)
278278
#define TXG_NOTHROTTLE (1ULL<<1)
279+
#define TXG_NOSUSPEND (1ULL<<2)
279280

280281
void byteswap_uint64_array(void *buf, size_t size);
281282
void byteswap_uint32_array(void *buf, size_t size);

include/sys/dmu_impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ typedef struct dmu_sendstatus {
241241
list_node_t dss_link;
242242
int dss_outfd;
243243
proc_t *dss_proc;
244+
kthread_t *dss_thread;
244245
offset_t *dss_off;
245246
uint64_t dss_blocks; /* blocks visited during the sending process */
246247
} dmu_sendstatus_t;

0 commit comments

Comments
 (0)