Skip to content

Commit

Permalink
rootfs: try to scope MkdirAll to stay inside the rootfs
Browse files Browse the repository at this point in the history
While we use SecureJoin to try to make all of our target paths inside
the container safe, SecureJoin is not safe against an attacker than can
change the path after we "resolve" it.

os.MkdirAll can inadvertently follow symlinks and thus an attacker could
end up tricking runc into creating empty directories on the host (note
that the container doesn't get access to these directories, and the host
just sees empty directories). However, this could potentially cause DoS
issues by (for instance) creating a directory in a conf.d directory for
a daemon that doesn't handle subdirectories properly.

In addition, the handling for creating file bind-mounts did a plain
open(O_CREAT) on the SecureJoin'd path, which is even more obviously
unsafe (luckily we didn't use O_TRUNC, or this bug could've allowed an
attacker to cause data loss...). Regardless of the symlink issue,
opening an untrusted file could result in a DoS if the file is a hung
tty or some other "nasty" file. We can use mknodat to safely create a
regular file without opening anything anyway (O_CREAT|O_EXCL would also
work but it makes the logic a bit more complicated, and we don't want to
open the file for any particular reason anyway).

libpathrs[1] is the long-term solution for these kinds of problems, but
for now we can patch this particular issue by creating a more restricted
MkdirAll that refuses to resolve symlinks and does the creation using
file descriptors. This is loosely based on a more secure version that
filepath-securejoin now has[2] and will be added to libpathrs soon[3].

[1]: https://github.com/openSUSE/libpathrs
[2]: https://github.com/cyphar/filepath-securejoin/releases/tag/v0.3.0
[3]: openSUSE/libpathrs#10

Fixes: CVE-2024-45310
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
  • Loading branch information
cyphar committed Sep 2, 2024
1 parent 346b818 commit 63c2908
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 10 deletions.
31 changes: 21 additions & 10 deletions libcontainer/rootfs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ func mountCgroupV1(m *configs.Mount, c *mountConfig) error {
// inside the tmpfs, so we don't want to resolve symlinks).
subsystemPath := filepath.Join(c.root, b.Destination)
subsystemName := filepath.Base(b.Destination)
if err := os.MkdirAll(subsystemPath, 0o755); err != nil {
if err := utils.MkdirAllInRoot(c.root, subsystemPath, 0o755); err != nil {
return err
}
if err := utils.WithProcfd(c.root, b.Destination, func(dstFd string) error {
Expand Down Expand Up @@ -505,15 +505,26 @@ func createMountpoint(rootfs string, m mountEntry) (string, error) {
return "", fmt.Errorf("%w: file bind mount over rootfs", errRootfsToFile)
}
// Make the parent directory.
if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
destDir, destBase := filepath.Split(dest)
destDirFd, err := utils.MkdirAllInRootOpen(rootfs, destDir, 0o755)
if err != nil {
return "", fmt.Errorf("make parent dir of file bind-mount: %w", err)
}
// Make the target file.
f, err := os.OpenFile(dest, os.O_CREATE, 0o755)
if err != nil {
return "", fmt.Errorf("create target of file bind-mount: %w", err)
defer destDirFd.Close()
// Make the target file. We want to avoid opening any file that is
// already there because it could be a "bad" file like an invalid
// device or hung tty that might cause a DoS, so we use mknodat.
// destBase does not contain any "/" components, and mknodat does
// not follow trailing symlinks, so we can safely just call mknodat
// here.
if err := unix.Mknodat(int(destDirFd.Fd()), destBase, unix.S_IFREG|0o644, 0); err != nil {
// If we get EEXIST, there was already an inode there and
// we can consider that a success.
if !errors.Is(err, unix.EEXIST) {
err = &os.PathError{Op: "mknod regular file", Path: dest, Err: err}
return "", fmt.Errorf("create target of file bind-mount: %w", err)
}
}
_ = f.Close()
// Nothing left to do.
return dest, nil
}
Expand All @@ -532,7 +543,7 @@ func createMountpoint(rootfs string, m mountEntry) (string, error) {
}
}

if err := os.MkdirAll(dest, 0o755); err != nil {
if err := utils.MkdirAllInRoot(rootfs, dest, 0o755); err != nil {
return "", err
}
return dest, nil
Expand Down Expand Up @@ -565,7 +576,7 @@ func mountToRootfs(c *mountConfig, m mountEntry) error {
} else if !fi.IsDir() {
return fmt.Errorf("filesystem %q must be mounted on ordinary directory", m.Device)
}
if err := os.MkdirAll(dest, 0o755); err != nil {
if err := utils.MkdirAllInRoot(rootfs, dest, 0o755); err != nil {
return err
}
// Selinux kernels do not support labeling of /proc or /sys.
Expand Down Expand Up @@ -928,7 +939,7 @@ func createDeviceNode(rootfs string, node *devices.Device, bind bool) error {
if dest == rootfs {
return fmt.Errorf("%w: mknod over rootfs", errRootfsToFile)
}
if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
if err := utils.MkdirAllInRoot(rootfs, filepath.Dir(dest), 0o755); err != nil {
return err
}
if bind {
Expand Down
41 changes: 41 additions & 0 deletions libcontainer/system/linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ import (
"fmt"
"io"
"os"
"runtime"
"strconv"
"strings"
"syscall"
"unsafe"

Expand Down Expand Up @@ -214,3 +216,42 @@ func SetLinuxPersonality(personality int) error {
}
return nil
}

func prepareAt(dir *os.File, path string) (int, string) {
if dir == nil {
return unix.AT_FDCWD, path
}

// Rather than just filepath.Join-ing path here, do it manually so the
// error and handle correctly indicate cases like path=".." as being
// relative to the correct directory. The handle.Name() might end up being
// wrong but because this is (currently) only used in MkdirAllInRoot, that
// isn't a problem.
dirName := dir.Name()
if !strings.HasSuffix(dirName, "/") {
dirName += "/"
}
fullPath := dirName + path

return int(dir.Fd()), fullPath
}

func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) {
dirFd, fullPath := prepareAt(dir, path)
fd, err := unix.Openat(dirFd, path, flags, mode)
if err != nil {
return nil, &os.PathError{Op: "openat", Path: fullPath, Err: err}
}
runtime.KeepAlive(dir)
return os.NewFile(uintptr(fd), fullPath), nil
}

func Mkdirat(dir *os.File, path string, mode uint32) error {
dirFd, fullPath := prepareAt(dir, path)
err := unix.Mkdirat(dirFd, path, mode)
if err != nil {
err = &os.PathError{Op: "mkdirat", Path: fullPath, Err: err}
}
runtime.KeepAlive(dir)
return err
}
112 changes: 112 additions & 0 deletions libcontainer/utils/utils_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
package utils

import (
"errors"
"fmt"
"math"
"os"
Expand All @@ -13,6 +14,8 @@ import (
"sync"
_ "unsafe" // for go:linkname

"github.com/opencontainers/runc/libcontainer/system"

securejoin "github.com/cyphar/filepath-securejoin"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
Expand Down Expand Up @@ -275,3 +278,112 @@ func IsLexicallyInRoot(root, path string) bool {
}
return strings.HasPrefix(path, root)
}

// MkdirAllInRootOpen attempts to make
//
// path, _ := securejoin.SecureJoin(root, unsafePath)
// os.MkdirAll(path, mode)
// os.Open(path)
//
// safer against attacks where components in the path are changed between
// SecureJoin returning and MkdirAll (or Open) being called. In particular, we
// try to detect any symlink components in the path while we are doing the
// MkdirAll.
//
// NOTE: Unlike os.MkdirAll, mode is not Go's os.FileMode, it is the unix mode
// (the suid/sgid/sticky bits are not the same as for os.FileMode).
//
// NOTE: If unsafePath is a subpath of root, we assume that you have already
// called SecureJoin and so we use the provided path verbatim without resolving
// any symlinks (this is done in a way that avoids symlink-exchange races).
// This means that the path also must not contain ".." elements, otherwise an
// error will occur.
//
// This is a somewhat less safe alternative to
// <https://github.com/cyphar/filepath-securejoin/pull/13>, but it should
// detect attempts to trick us into creating directories outside of the root.
// We should migrate to securejoin.MkdirAll once it is merged.
func MkdirAllInRootOpen(root, unsafePath string, mode uint32) (_ *os.File, Err error) {
// If the path is already "within" the root, use it verbatim.
fullPath := unsafePath
if !IsLexicallyInRoot(root, unsafePath) {
var err error
fullPath, err = securejoin.SecureJoin(root, unsafePath)
if err != nil {
return nil, err
}
}
subPath, err := filepath.Rel(root, fullPath)
if err != nil {
return nil, err
}

// Check for any silly mode bits.
if mode&^0o7777 != 0 {
return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode)
}

currentDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
if err != nil {
return nil, fmt.Errorf("open root handle: %w", err)
}
defer func() {
if Err != nil {
currentDir.Close()
}
}()

for _, part := range strings.Split(subPath, string(filepath.Separator)) {
switch part {
case "", ".":
// Skip over no-op components.
continue
case "..":
return nil, fmt.Errorf("possible breakout detected: found %q component in SecureJoin subpath %s", part, subPath)
}

nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
switch {
case err == nil:
// Update the currentDir.
_ = currentDir.Close()
currentDir = nextDir

case errors.Is(err, unix.ENOTDIR):
// This might be a symlink or some other random file. Either way,
// error out.
return nil, fmt.Errorf("cannot mkdir in %s/%s: %w", currentDir.Name(), part, unix.ENOTDIR)

case errors.Is(err, os.ErrNotExist):
// Luckily, mkdirat will not follow trailing symlinks, so this is
// safe to do as-is.
if err := system.Mkdirat(currentDir, part, mode); err != nil {
return nil, err
}
// Open the new directory. There is a race here where an attacker
// could swap the directory with a different directory, but
// MkdirAll's fuzzy semantics mean we don't care about that.
nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
if err != nil {
return nil, fmt.Errorf("open newly created directory: %w", err)
}
// Update the currentDir.
_ = currentDir.Close()
currentDir = nextDir

default:
return nil, err
}
}
return currentDir, nil
}

// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the
// returned handle, for callers that don't need to use it.
func MkdirAllInRoot(root, unsafePath string, mode uint32) error {
f, err := MkdirAllInRootOpen(root, unsafePath, mode)
if err == nil {
_ = f.Close()
}
return err
}

0 comments on commit 63c2908

Please sign in to comment.