From 92ebb9a8f174f0e2d9062fdbb06a92266adce6c4 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Tue, 22 Mar 2022 17:12:33 +0000 Subject: [PATCH] Add an option to disable nested user namespaces by setting limit to 1 Some use-cases of bubblewrap want to ensure that the subprocess can't further re-arrange the filesystem namespace, or do other more complex namespace modification. For example, Flatpak wants to prevent sandboxed processes from altering their /proc/$pid/root/.flatpak-info, so that /.flatpak-info can safely be used as an indicator that a process is part of a Flatpak app. This approach was suggested by lukts30 on containers/bubblewrap#452. The sysctl-controlled maximum numbers of namespaces are themselves namespaced, so we can disable nested user namespaces by setting the limit to 1 and then entering a new, nested user namespace. The resulting process loses its privileges in the namespace where the limit was set to 1, so it is unable to move the limit back up. Signed-off-by: Simon McVittie --- bubblewrap.c | 38 ++++++++++++++++++++++++++++++++++++++ bwrap.xml | 14 ++++++++++++++ completions/bash/bwrap | 1 + completions/zsh/_bwrap | 1 + tests/test-run.sh | 10 +++++++++- 5 files changed, 63 insertions(+), 1 deletion(-) diff --git a/bubblewrap.c b/bubblewrap.c index b17ff990..78ff3081 100644 --- a/bubblewrap.c +++ b/bubblewrap.c @@ -66,6 +66,7 @@ static const char *opt_file_label = NULL; static bool opt_as_pid_1; const char *opt_chdir_path = NULL; +bool opt_disable_userns = FALSE; bool opt_unshare_user = FALSE; bool opt_unshare_user_try = FALSE; bool opt_unshare_pid = FALSE; @@ -301,6 +302,7 @@ usage (int ecode, FILE *out) " --unshare-cgroup-try Create new cgroup namespace if possible else continue by skipping it\n" " --userns FD Use this user namespace (cannot combine with --unshare-user)\n" " --userns2 FD After setup switch to this user namespace, only useful with --userns\n" + " --disable-userns Disable further use of user namespaces inside sandbox\n" " --pidns FD Use this user namespace (as parent namespace if using --unshare-pid)\n" " --uid UID Custom uid in the sandbox (requires --unshare-user or --userns)\n" " --gid GID Custom gid in the sandbox (requires --unshare-user or --userns)\n" @@ -1766,6 +1768,10 @@ parse_args_recurse (int *argcp, argv++; argc--; } + else if (strcmp (arg, "--disable-userns") == 0) + { + opt_disable_userns = TRUE; + } else if (strcmp (arg, "--remount-ro") == 0) { if (argc < 2) @@ -2610,6 +2616,9 @@ main (int argc, if (opt_userns_fd != -1 && opt_unshare_user_try) die ("--userns not compatible --unshare-user-try"); + if (opt_disable_userns && !opt_unshare_user) + die ("--disable-userns requires --unshare-user"); + /* Technically using setns() is probably safe even in the privileged * case, because we got passed in a file descriptor to the * namespace, and that can only be gotten if you have ptrace @@ -3103,6 +3112,35 @@ main (int argc, -1, FALSE, FALSE); } + if (opt_disable_userns) + { + cleanup_fd int sysctl_fd = -1; + + sysctl_fd = openat (proc_fd, "sys/user/max_user_namespaces", O_WRONLY); + + if (sysctl_fd < 0) + die_with_error ("cannot open /proc/sys/user/max_user_namespaces"); + + if (write_to_fd (sysctl_fd, "1", 1) < 0) + die_with_error ("sysctl user.max_user_namespaces = 1"); + + if (unshare (CLONE_NEWUSER)) + die_with_error ("unshare user ns"); + + /* Check that we can't make a new userns again */ + res = unshare (CLONE_NEWUSER); + + if (res == 0) + die ("unable to disable creation of new user namespaces"); + + /* We're in a new user namespace, we got back the bounding set, clear it again */ + drop_cap_bounding_set (FALSE); + + write_uid_gid_map (opt_sandbox_uid, opt_sandbox_uid, + opt_sandbox_gid, opt_sandbox_gid, + -1, FALSE, FALSE); + } + /* All privileged ops are done now, so drop caps we don't need */ drop_privs (!is_privileged, TRUE); diff --git a/bwrap.xml b/bwrap.xml index 2baec5d8..c67bf0d6 100644 --- a/bwrap.xml +++ b/bwrap.xml @@ -144,6 +144,20 @@ After setting up the new namespace, switch into the specified namespace. For this to work the specified namespace must be a descendant of the user namespace used for the setup, so this is only useful in combination with --userns. This is useful because sometimes bubblewrap itself creates nested user namespaces (to work around some kernel issues) and --userns2 can be used to enter these. + + + + Prevent the process in the sandbox from creating further user namespaces, + so that it cannot rearrange the filesystem namespace or do other more + complex namespace modification. + This is currently implemented by setting the + user.max_user_namespaces sysctl to 1, and then + entering a nested user namespace which is unable to raise that limit + in the outer namespace. + This option requires , and doesn't work + in the setuid version of bubblewrap. + + Use an existing pid namespace instead of creating one. This is often used with --userns, because the pid namespace must be owned by the same user namespace that bwrap uses. diff --git a/completions/bash/bwrap b/completions/bash/bwrap index c57d9abe..3ea0cb13 100644 --- a/completions/bash/bwrap +++ b/completions/bash/bwrap @@ -11,6 +11,7 @@ _bwrap() { local boolean_options=" --as-pid-1 --clearenv + --disable-userns --help --new-session --unshare-all diff --git a/completions/zsh/_bwrap b/completions/zsh/_bwrap index 1e365f09..1d7acc98 100644 --- a/completions/zsh/_bwrap +++ b/completions/zsh/_bwrap @@ -29,6 +29,7 @@ _bwrap_args=( '--dev-bind[Bind mount the host path SRC on DEST, allowing device access]:source:_files:destination:_files' '--dev[Mount new dev on DEST]:mount point for /dev:_files -/' "--die-with-parent[Kills with SIGKILL child process (COMMAND) when bwrap or bwrap's parent dies.]" + '--disable-userns[Disable further use of user namespaces inside sandbox]' '--exec-label[Exec label for the sandbox]:SELinux label:_selinux_contexts' '--file-label[File label for temporary sandbox content]:SELinux label:_selinux_contexts' '--gid[Custom gid in the sandbox (requires --unshare-user or --userns)]: :_guard "[0-9]#" "numeric group ID"' diff --git a/tests/test-run.sh b/tests/test-run.sh index da92ffb9..597a3c94 100755 --- a/tests/test-run.sh +++ b/tests/test-run.sh @@ -8,7 +8,7 @@ srcd=$(cd $(dirname "$0") && pwd) bn=$(basename "$0") -echo "1..54" +echo "1..55" # Test help ${BWRAP} --help > help.txt @@ -112,6 +112,7 @@ echo "ok exec failure doesn't include exit-code in json-status" if test -n "${bwrap_is_suid:-}"; then echo "ok - # SKIP no --cap-add support" echo "ok - # SKIP no --cap-add support" + echo "ok - # SKIP no --disable-userns" else BWRAP_RECURSE="$BWRAP --unshare-user --uid 0 --gid 0 --cap-add ALL --bind / / --bind /proc /proc" @@ -123,6 +124,13 @@ else $BWRAP_RECURSE -- /proc/self/exe --unshare-all ${BWRAP_RO_HOST_ARGS} findmnt > recursive-newroot.txt assert_file_has_content recursive-newroot.txt "/usr" echo "ok - can pivot to new rootfs recursively" + + $BWRAP --dev-bind / / -- true + $BWRAP --unshare-user --disable-userns --dev-bind / / -- true + ! $BWRAP --unshare-user --disable-userns --dev-bind / / -- $BWRAP --dev-bind / / -- true + ! $BWRAP --unshare-user --disable-userns --dev-bind / / -- sh -c "echo 2 > /proc/sys/user/max_user_namespaces || true; $BWRAP --dev-bind / / -- true" + ! $BWRAP --unshare-user --disable-userns --dev-bind / / -- sh -c "echo 100 > /proc/sys/user/max_user_namespaces || true; $BWRAP --dev-bind / / -- true" + echo "ok - can disable nested userns" fi # Test error prefixing