Skip to content

Commit fb53e7c

Browse files
Will Andrewsallanjude
authored andcommitted
Add Linux namespace delegation support
This allows ZFS datasets to be delegated to a user/mount namespace Within that namespace, only the delegated datasets are visible Works very similarly to Zones/Jailes on other ZFS OSes As a user: ``` $ unshare -Um $ zfs list no datasets available $ readlink /proc/$$/ns/user user:[4026532291] ``` As root: ``` # zfs list NAME ZONED MOUNTPOINT containers off /containers containers/host off /containers/host containers/host/child off /containers/host/child containers/host/child/gchild off /containers/host/child/gchild containers/unpriv on /unpriv containers/unpriv/child on /unpriv/child containers/unpriv/child/gchild on /unpriv/child/gchild # zfs userns add 4026532291 containers/unpriv ``` Back to the user: ``` $ zfs list NAME USED AVAIL REFER MOUNTPOINT containers 129M 47.8G 24K /containers containers/unpriv 128M 47.8G 24K /unpriv containers/unpriv/child 128M 47.8G 128M /unpriv/child ``` Signed-off-by: Will Andrews <will.andrews@klarasystems.com> Signed-off-by: Allan Jude <allan@klarasystems.com> Sponsored-by: Buddy <https://buddy.works>
1 parent ba91311 commit fb53e7c

File tree

21 files changed

+705
-11
lines changed

21 files changed

+705
-11
lines changed

cmd/zfs/zfs_main.c

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,10 @@ static int zfs_do_jail(int argc, char **argv);
127127
static int zfs_do_unjail(int argc, char **argv);
128128
#endif
129129

130+
#ifdef __linux__
131+
static int zfs_do_userns(int argc, char **argv);
132+
#endif
133+
130134
/*
131135
* Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
132136
*/
@@ -184,6 +188,7 @@ typedef enum {
184188
HELP_JAIL,
185189
HELP_UNJAIL,
186190
HELP_WAIT,
191+
HELP_USERNS,
187192
} zfs_help_t;
188193

189194
typedef struct zfs_command {
@@ -254,6 +259,10 @@ static zfs_command_t command_table[] = {
254259
{ "jail", zfs_do_jail, HELP_JAIL },
255260
{ "unjail", zfs_do_unjail, HELP_UNJAIL },
256261
#endif
262+
263+
#ifdef __linux__
264+
{ "userns", zfs_do_userns, HELP_USERNS },
265+
#endif
257266
};
258267

259268
#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0]))
@@ -414,6 +423,8 @@ get_usage(zfs_help_t idx)
414423
return (gettext("\tunjail <jailid|jailname> <filesystem>\n"));
415424
case HELP_WAIT:
416425
return (gettext("\twait [-t <activity>] <filesystem>\n"));
426+
case HELP_USERNS:
427+
return (gettext("\tuserns <add|del> <nsnum> <filesystem>\n"));
417428
}
418429

419430
abort();
@@ -8729,6 +8740,55 @@ main(int argc, char **argv)
87298740
return (ret);
87308741
}
87318742

8743+
/*
8744+
* zfs userns add|del nsnum filesystem
8745+
*
8746+
* Add or delete the given dataset to/from the namespace.
8747+
*/
8748+
#ifdef __linux__
8749+
static int
8750+
zfs_do_userns(int argc, char **argv)
8751+
{
8752+
zfs_handle_t *zhp;
8753+
unsigned long nsnum;
8754+
int ret;
8755+
int attach;
8756+
8757+
if (argc < 4) {
8758+
(void) fprintf(stderr, gettext("missing argument(s)\n"));
8759+
usage(B_FALSE);
8760+
}
8761+
if (argc > 4) {
8762+
(void) fprintf(stderr, gettext("too many arguments\n"));
8763+
usage(B_FALSE);
8764+
}
8765+
8766+
if (strcmp(argv[1], "add") == 0) {
8767+
attach = 1;
8768+
} else if (strcmp(argv[1], "del") == 0) {
8769+
attach = 0;
8770+
} else {
8771+
(void) fprintf(stderr, gettext("invalid subcommand\n"));
8772+
usage(B_FALSE);
8773+
}
8774+
8775+
nsnum = strtoul(argv[2], NULL, 10);
8776+
if (nsnum > UINT_MAX) {
8777+
(void) fprintf(stderr, gettext("invalid namespace number\n"));
8778+
usage(B_FALSE);
8779+
}
8780+
8781+
zhp = zfs_open(g_zfs, argv[3], ZFS_TYPE_FILESYSTEM);
8782+
if (zhp == NULL)
8783+
return (1);
8784+
8785+
ret = (zfs_userns(zhp, (unsigned int)nsnum, attach) != 0);
8786+
8787+
zfs_close(zhp);
8788+
return (ret);
8789+
}
8790+
#endif
8791+
87328792
#ifdef __FreeBSD__
87338793
#include <sys/jail.h>
87348794
#include <jail.h>

config/kernel-user-ns-inum.m4

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
dnl #
2+
dnl # 3.18 API change
3+
dnl # struct user_namespace inum moved from .proc_inum to .ns.inum.
4+
dnl #
5+
AC_DEFUN([ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM], [
6+
ZFS_LINUX_TEST_SRC([user_ns_common_inum], [
7+
#include <linux/user_namespace.h>
8+
], [
9+
struct user_namespace uns;
10+
uns.ns.inum = 0;
11+
])
12+
])
13+
14+
AC_DEFUN([ZFS_AC_KERNEL_USER_NS_COMMON_INUM], [
15+
AC_MSG_CHECKING([whether user_namespace->ns.inum exists])
16+
ZFS_LINUX_TEST_RESULT([user_ns_common_inum], [
17+
AC_MSG_RESULT(yes)
18+
AC_DEFINE(HAVE_USER_NS_COMMON_INUM, 1,
19+
[user_namespace->ns.inum exists])
20+
],[
21+
AC_MSG_RESULT(no)
22+
])
23+
])

config/kernel.m4

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
132132
ZFS_AC_KERNEL_SRC_SIGNAL_STOP
133133
ZFS_AC_KERNEL_SRC_SIGINFO
134134
ZFS_AC_KERNEL_SRC_SET_SPECIAL_STATE
135+
ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM
135136
136137
AC_MSG_CHECKING([for available kernel interfaces])
137138
ZFS_LINUX_TEST_COMPILE_ALL([kabi])
@@ -237,6 +238,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
237238
ZFS_AC_KERNEL_SIGNAL_STOP
238239
ZFS_AC_KERNEL_SIGINFO
239240
ZFS_AC_KERNEL_SET_SPECIAL_STATE
241+
ZFS_AC_KERNEL_USER_NS_COMMON_INUM
240242
])
241243

242244
dnl #

include/libzfs.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -960,6 +960,15 @@ _LIBZFS_H int zpool_nextboot(libzfs_handle_t *, uint64_t, uint64_t,
960960

961961
#endif /* __FreeBSD__ */
962962

963+
#ifdef __linux__
964+
965+
/*
966+
* Add or delete the given filesystem to/from the given user namespace.
967+
*/
968+
_LIBZFS_H int zfs_userns(zfs_handle_t *zhp, unsigned int nsnum, int attach);
969+
970+
#endif
971+
963972
#ifdef __cplusplus
964973
}
965974
#endif

include/os/linux/spl/sys/zone.h

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,34 @@
2525
#define _SPL_ZONE_H
2626

2727
#include <sys/byteorder.h>
28+
#include <sys/cred.h>
2829

29-
#define GLOBAL_ZONEID 0
30+
#include <linux/cred.h>
31+
#include <linux/user_namespace.h>
3032

31-
#define zone_dataset_visible(x, y) (1)
32-
#define crgetzoneid(x) (GLOBAL_ZONEID)
33-
#define INGLOBALZONE(z) (1)
33+
/*
34+
* Attach the given dataset to the given user namespace.
35+
*/
36+
extern int zone_dataset_attach(cred_t *, const char *, unsigned int);
37+
38+
/*
39+
* Detach the given dataset from the given user namespace.
40+
*/
41+
extern int zone_dataset_detach(cred_t *, const char *, unsigned int);
42+
43+
/*
44+
* Returns true if the named pool/dataset is visible in the current zone.
45+
*/
46+
extern int zone_dataset_visible(const char *dataset, int *write);
47+
48+
int spl_zone_init(void);
49+
void spl_zone_fini(void);
50+
51+
extern unsigned int crgetzoneid(const cred_t *);
52+
extern unsigned int global_zoneid(void);
53+
extern boolean_t inglobalzone(proc_t *);
54+
55+
#define INGLOBALZONE(x) inglobalzone(x)
56+
#define GLOBAL_ZONEID global_zoneid()
3457

3558
#endif /* SPL_ZONE_H */

lib/libspl/include/sys/types.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
#include <inttypes.h>
4545
#endif /* HAVE_INTTYPES */
4646

47-
typedef int zoneid_t;
47+
typedef uint zoneid_t;
4848
typedef int projid_t;
4949

5050
/*

lib/libspl/include/zone.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,13 @@
3333
extern "C" {
3434
#endif
3535

36-
#define GLOBAL_ZONEID 0
36+
/*
37+
* Hardcoded in the kernel's root user namespace. A "better" way to get
38+
* this would be by using ioctl_ns(2), but this would need to be performed
39+
* recursively on NS_GET_PARENT and then NS_GET_USERNS. Also, that's only
40+
* supported since Linux 4.9.
41+
*/
42+
#define GLOBAL_ZONEID 4026531837U
3743

3844
extern zoneid_t getzoneid(void);
3945

lib/libspl/os/linux/zone.c

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,49 @@
2323
* Use is subject to license terms.
2424
*/
2525

26+
#include <unistd.h>
27+
#include <stdio.h>
28+
#include <errno.h>
29+
#include <stdlib.h>
30+
#include <limits.h>
31+
#include <string.h>
32+
2633
#include <zone.h>
2734

2835
zoneid_t
2936
getzoneid()
3037
{
31-
return (GLOBAL_ZONEID);
38+
zoneid_t z = 0;
39+
char path[PATH_MAX];
40+
char buf[128] = { '\0' };
41+
char *cp, *cp_end;
42+
unsigned long n;
43+
int c;
44+
ssize_t r;
45+
46+
c = snprintf(path, sizeof (path), "/proc/%d/ns/user", getpid());
47+
/* This API doesn't have any error checking... */
48+
if ((size_t)c >= sizeof (path))
49+
goto out;
50+
51+
r = readlink(path, buf, sizeof (buf) - 1);
52+
if ((size_t)r >= sizeof (buf))
53+
goto out;
54+
55+
cp = strchr(buf, '[');
56+
if (cp == NULL)
57+
goto out;
58+
cp++;
59+
cp_end = strchr(cp, ']');
60+
if (cp_end == NULL)
61+
goto out;
62+
63+
*cp_end = '\0';
64+
n = strtoul(cp, NULL, 10);
65+
if (errno == ERANGE)
66+
goto out;
67+
z = (zoneid_t)n;
68+
69+
out:
70+
return (z);
3271
}

lib/libzfs/os/linux/libzfs_util_os.c

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,3 +213,56 @@ zfs_version_kernel(char *version, int len)
213213

214214
return (0);
215215
}
216+
217+
/*
218+
* Add or delete the given filesystem to/from the given user namespace.
219+
*/
220+
int
221+
zfs_userns(zfs_handle_t *zhp, unsigned int nsnum, int attach)
222+
{
223+
libzfs_handle_t *hdl = zhp->zfs_hdl;
224+
zfs_cmd_t zc = {"\0"};
225+
char errbuf[1024];
226+
unsigned long cmd;
227+
int ret;
228+
229+
if (attach) {
230+
(void) snprintf(errbuf, sizeof (errbuf),
231+
dgettext(TEXT_DOMAIN, "cannot add '%s' to namespace"),
232+
zhp->zfs_name);
233+
} else {
234+
(void) snprintf(errbuf, sizeof (errbuf),
235+
dgettext(TEXT_DOMAIN, "cannot remove '%s' from namespace"),
236+
zhp->zfs_name);
237+
}
238+
239+
switch (zhp->zfs_type) {
240+
case ZFS_TYPE_VOLUME:
241+
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
242+
"volumes can not be namespaced"));
243+
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
244+
case ZFS_TYPE_SNAPSHOT:
245+
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
246+
"snapshots can not be namespaced"));
247+
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
248+
case ZFS_TYPE_BOOKMARK:
249+
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
250+
"bookmarks can not be namespaced"));
251+
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
252+
case ZFS_TYPE_POOL:
253+
case ZFS_TYPE_FILESYSTEM:
254+
/* OK */
255+
;
256+
}
257+
assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
258+
259+
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
260+
zc.zc_objset_type = DMU_OST_ZFS;
261+
zc.zc_zoneid = nsnum;
262+
263+
cmd = attach ? ZFS_IOC_JAIL : ZFS_IOC_UNJAIL;
264+
if ((ret = zfs_ioctl(hdl, cmd, &zc)) != 0)
265+
zfs_standard_error(hdl, errno, errbuf);
266+
267+
return (ret);
268+
}

module/os/linux/spl/Makefile.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,4 @@ $(MODULE)-objs += ../os/linux/spl/spl-tsd.o
1515
$(MODULE)-objs += ../os/linux/spl/spl-vmem.o
1616
$(MODULE)-objs += ../os/linux/spl/spl-xdr.o
1717
$(MODULE)-objs += ../os/linux/spl/spl-zlib.o
18+
$(MODULE)-objs += ../os/linux/spl/spl-zone.o

0 commit comments

Comments
 (0)