Skip to content

Commit 01de9d6

Browse files
committed
rootfs: avoid using os.Create for new device inodes
If an attacker were to make the target of a device inode creation be a symlink to some host path, os.Create would happily truncate the target which could lead to all sorts of issues. This exploit is probably not as exploitable because device inodes are usually only bind-mounted for rootless containers, which cannot overwrite important host files (though user files would still be up for grabs). The regular inode creation logic could also theoretically be tricked into changing the access mode and ownership of host files if the newly-created device inode was swapped with a symlink to a host path. Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
1 parent 77889b5 commit 01de9d6

File tree

3 files changed

+130
-23
lines changed

3 files changed

+130
-23
lines changed

internal/sys/opath_linux.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
package sys
2+
3+
import (
4+
"fmt"
5+
"os"
6+
"runtime"
7+
"strconv"
8+
9+
"golang.org/x/sys/unix"
10+
11+
"github.com/opencontainers/runc/internal/pathrs"
12+
)
13+
14+
// FchmodFile is a wrapper around fchmodat2(AT_EMPTY_PATH) with fallbacks for
15+
// older kernels. This is distinct from [File.Chmod] and [unix.Fchmod] in that
16+
// it works on O_PATH file descriptors.
17+
func FchmodFile(f *os.File, mode uint32) error {
18+
err := unix.Fchmodat(int(f.Fd()), "", mode, unix.AT_EMPTY_PATH)
19+
// If fchmodat2(2) is not available at all, golang.org/x/unix (probably
20+
// in order to mirror glibc) returns EOPNOTSUPP rather than EINVAL
21+
// (what the kernel actually returns for invalid flags, which is being
22+
// emulated) or ENOSYS (which is what glibc actually sees).
23+
if err != unix.EINVAL && err != unix.EOPNOTSUPP { //nolint:errorlint // unix errors are bare
24+
// err == nil is implicitly handled
25+
return os.NewSyscallError("fchmodat2 AT_EMPTY_PATH", err)
26+
}
27+
28+
// AT_EMPTY_PATH support was added to fchmodat2 in Linux 6.6
29+
// (5daeb41a6fc9d0d81cb2291884b7410e062d8fa1). The alternative for
30+
// older kernels is to go through /proc.
31+
fdDir, closer, err2 := pathrs.ProcThreadSelfOpen("fd/", unix.O_DIRECTORY)
32+
if err2 != nil {
33+
return fmt.Errorf("fchmodat2 AT_EMPTY_PATH fallback: %w", err2)
34+
}
35+
defer closer()
36+
defer fdDir.Close()
37+
38+
err = unix.Fchmodat(int(fdDir.Fd()), strconv.Itoa(int(f.Fd())), mode, 0)
39+
if err != nil {
40+
err = fmt.Errorf("fchmodat /proc/self/fd/%d: %w", f.Fd(), err)
41+
}
42+
runtime.KeepAlive(f)
43+
return err
44+
}
45+
46+
// FchownFile is a wrapper around fchownat(AT_EMPTY_PATH). This is distinct
47+
// from [File.Chown] and [unix.Fchown] in that it works on O_PATH file
48+
// descriptors.
49+
func FchownFile(f *os.File, uid, gid int) error {
50+
err := unix.Fchownat(int(f.Fd()), "", uid, gid, unix.AT_EMPTY_PATH)
51+
runtime.KeepAlive(f)
52+
return os.NewSyscallError("fchownat AT_EMPTY_PATH", err)
53+
}

libcontainer/rootfs_linux.go

Lines changed: 57 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"os"
88
"path"
99
"path/filepath"
10+
"runtime"
1011
"strconv"
1112
"strings"
1213
"syscall"
@@ -932,17 +933,18 @@ func createDevices(config *configs.Config) error {
932933
return nil
933934
}
934935

935-
func bindMountDeviceNode(rootfs, dest string, node *devices.Device) error {
936-
f, err := os.Create(dest)
937-
if err != nil && !os.IsExist(err) {
938-
return err
939-
}
940-
if f != nil {
941-
_ = f.Close()
936+
func bindMountDeviceNode(destDir *os.File, destName string, node *devices.Device) error {
937+
dstFile, err := utils.Openat(destDir, destName, unix.O_CREAT|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0o000)
938+
if err != nil {
939+
return fmt.Errorf("create device inode %s: %w", node.Path, err)
942940
}
943-
return utils.WithProcfd(rootfs, dest, func(dstFd string) error {
944-
return mountViaFds(node.Path, nil, dest, dstFd, "bind", unix.MS_BIND, "")
945-
})
941+
defer dstFile.Close()
942+
943+
dstFd, closer := utils.ProcThreadSelfFd(dstFile.Fd())
944+
defer closer()
945+
946+
dstPath := filepath.Join(destDir.Name(), destName)
947+
return mountViaFds(node.Path, nil, dstPath, dstFd, "bind", unix.MS_BIND, "")
946948
}
947949

948950
// Creates the device node in the rootfs of the container.
@@ -951,31 +953,33 @@ func createDeviceNode(rootfs string, node *devices.Device, bind bool) error {
951953
// The node only exists for cgroup reasons, ignore it here.
952954
return nil
953955
}
954-
dest, err := securejoin.SecureJoin(rootfs, node.Path)
956+
destPath, err := securejoin.SecureJoin(rootfs, node.Path)
955957
if err != nil {
956958
return err
957959
}
958-
if dest == rootfs {
960+
if destPath == rootfs {
959961
return fmt.Errorf("%w: mknod over rootfs", errRootfsToFile)
960962
}
961-
if err := pathrs.MkdirAllInRoot(rootfs, filepath.Dir(dest), 0o755); err != nil {
962-
return err
963+
destDirPath, destName := filepath.Split(destPath)
964+
destDir, err := pathrs.MkdirAllInRootOpen(rootfs, destDirPath, 0o755)
965+
if err != nil {
966+
return fmt.Errorf("mkdir parent of device inode %q: %w", node.Path, err)
963967
}
964968
if bind {
965-
return bindMountDeviceNode(rootfs, dest, node)
969+
return bindMountDeviceNode(destDir, destName, node)
966970
}
967-
if err := mknodDevice(dest, node); err != nil {
971+
if err := mknodDevice(destDir, destName, node); err != nil {
968972
if errors.Is(err, os.ErrExist) {
969973
return nil
970974
} else if errors.Is(err, os.ErrPermission) {
971-
return bindMountDeviceNode(rootfs, dest, node)
975+
return bindMountDeviceNode(destDir, destName, node)
972976
}
973977
return err
974978
}
975979
return nil
976980
}
977981

978-
func mknodDevice(dest string, node *devices.Device) error {
982+
func mknodDevice(destDir *os.File, destName string, node *devices.Device) error {
979983
fileMode := node.FileMode
980984
switch node.Type {
981985
case devices.BlockDevice:
@@ -991,14 +995,44 @@ func mknodDevice(dest string, node *devices.Device) error {
991995
if err != nil {
992996
return err
993997
}
994-
if err := unix.Mknod(dest, uint32(fileMode), int(dev)); err != nil {
995-
return &os.PathError{Op: "mknod", Path: dest, Err: err}
998+
if err := unix.Mknodat(int(destDir.Fd()), destName, uint32(fileMode), int(dev)); err != nil {
999+
return &os.PathError{Op: "mknodat", Path: filepath.Join(destDir.Name(), destName), Err: err}
9961000
}
997-
// Ensure permission bits (can be different because of umask).
998-
if err := os.Chmod(dest, fileMode); err != nil {
1001+
1002+
// Get a handle and verify that it matches the expected inode type and
1003+
// major:minor before we operate on it.
1004+
devFile, err := utils.Openat(destDir, destName, unix.O_NOFOLLOW|unix.O_PATH, 0)
1005+
if err != nil {
1006+
return fmt.Errorf("open new %c device inode %s: %w", node.Type, node.Path, err)
1007+
}
1008+
defer devFile.Close()
1009+
1010+
if err := sys.VerifyInode(devFile, func(stat *unix.Stat_t, _ *unix.Statfs_t) error {
1011+
if stat.Mode&unix.S_IFMT != uint32(fileMode)&unix.S_IFMT {
1012+
return fmt.Errorf("new %c device inode %s has incorrect ftype: %#x doesn't match expected %#v",
1013+
node.Type, node.Path,
1014+
stat.Mode&unix.S_IFMT, fileMode&unix.S_IFMT)
1015+
}
1016+
if stat.Rdev != dev {
1017+
return fmt.Errorf("new %c device inode %s has incorrect major:minor: %d:%d doesn't match expected %d:%d",
1018+
node.Type, node.Path,
1019+
unix.Major(stat.Rdev), unix.Minor(stat.Rdev),
1020+
unix.Major(dev), unix.Minor(dev))
1021+
}
1022+
return nil
1023+
}); err != nil {
9991024
return err
10001025
}
1001-
return os.Chown(dest, int(node.Uid), int(node.Gid))
1026+
1027+
// Ensure permission bits (can be different because of umask).
1028+
if err := sys.FchmodFile(devFile, uint32(fileMode)); err != nil {
1029+
return fmt.Errorf("update new %c device inode %s file mode: %w", node.Type, node.Path, err)
1030+
}
1031+
if err := sys.FchownFile(devFile, int(node.Uid), int(node.Gid)); err != nil {
1032+
return fmt.Errorf("update new %c device inode %s owner: %w", node.Type, node.Path, err)
1033+
}
1034+
runtime.KeepAlive(devFile)
1035+
return nil
10021036
}
10031037

10041038
// rootfsParentMountPrivate ensures rootfs parent mount is private.

libcontainer/system/linux.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,3 +160,23 @@ func SetLinuxPersonality(personality int) error {
160160
}
161161
return nil
162162
}
163+
164+
// GetPtyPeer is a wrapper for ioctl(TIOCGPTPEER).
165+
func GetPtyPeer(ptyFd uintptr, unsafePeerPath string, flags int) (*os.File, error) {
166+
// Make sure O_NOCTTY is always set -- otherwise runc might accidentally
167+
// gain it as a controlling terminal. O_CLOEXEC also needs to be set to
168+
// make sure we don't leak the handle either.
169+
flags |= unix.O_NOCTTY | unix.O_CLOEXEC
170+
171+
// There is no nice wrapper for this kind of ioctl in unix.
172+
peerFd, _, errno := unix.Syscall(
173+
unix.SYS_IOCTL,
174+
ptyFd,
175+
uintptr(unix.TIOCGPTPEER),
176+
uintptr(flags),
177+
)
178+
if errno != 0 {
179+
return nil, os.NewSyscallError("ioctl TIOCGPTPEER", errno)
180+
}
181+
return os.NewFile(peerFd, unsafePeerPath), nil
182+
}

0 commit comments

Comments
 (0)