77 "os"
88 "path"
99 "path/filepath"
10+ "runtime"
1011 "strconv"
1112 "strings"
1213 "syscall"
@@ -932,17 +933,18 @@ func createDevices(config *configs.Config) error {
932933 return nil
933934}
934935
935- func bindMountDeviceNode (rootfs , dest string , node * devices.Device ) error {
936- f , err := os .Create (dest )
937- if err != nil && ! os .IsExist (err ) {
938- return err
939- }
940- if f != nil {
941- _ = f .Close ()
936+ func bindMountDeviceNode (destDir * os.File , destName string , node * devices.Device ) error {
937+ dstFile , err := utils .Openat (destDir , destName , unix .O_CREAT | unix .O_NOFOLLOW | unix .O_CLOEXEC , 0o000 )
938+ if err != nil {
939+ return fmt .Errorf ("create device inode %s: %w" , node .Path , err )
942940 }
943- return utils .WithProcfd (rootfs , dest , func (dstFd string ) error {
944- return mountViaFds (node .Path , nil , dest , dstFd , "bind" , unix .MS_BIND , "" )
945- })
941+ defer dstFile .Close ()
942+
943+ dstFd , closer := utils .ProcThreadSelfFd (dstFile .Fd ())
944+ defer closer ()
945+
946+ dstPath := filepath .Join (destDir .Name (), destName )
947+ return mountViaFds (node .Path , nil , dstPath , dstFd , "bind" , unix .MS_BIND , "" )
946948}
947949
948950// Creates the device node in the rootfs of the container.
@@ -951,31 +953,33 @@ func createDeviceNode(rootfs string, node *devices.Device, bind bool) error {
951953 // The node only exists for cgroup reasons, ignore it here.
952954 return nil
953955 }
954- dest , err := securejoin .SecureJoin (rootfs , node .Path )
956+ destPath , err := securejoin .SecureJoin (rootfs , node .Path )
955957 if err != nil {
956958 return err
957959 }
958- if dest == rootfs {
960+ if destPath == rootfs {
959961 return fmt .Errorf ("%w: mknod over rootfs" , errRootfsToFile )
960962 }
961- if err := pathrs .MkdirAllInRoot (rootfs , filepath .Dir (dest ), 0o755 ); err != nil {
962- return err
963+ destDirPath , destName := filepath .Split (destPath )
964+ destDir , err := pathrs .MkdirAllInRootOpen (rootfs , destDirPath , 0o755 )
965+ if err != nil {
966+ return fmt .Errorf ("mkdir parent of device inode %q: %w" , node .Path , err )
963967 }
964968 if bind {
965- return bindMountDeviceNode (rootfs , dest , node )
969+ return bindMountDeviceNode (destDir , destName , node )
966970 }
967- if err := mknodDevice (dest , node ); err != nil {
971+ if err := mknodDevice (destDir , destName , node ); err != nil {
968972 if errors .Is (err , os .ErrExist ) {
969973 return nil
970974 } else if errors .Is (err , os .ErrPermission ) {
971- return bindMountDeviceNode (rootfs , dest , node )
975+ return bindMountDeviceNode (destDir , destName , node )
972976 }
973977 return err
974978 }
975979 return nil
976980}
977981
978- func mknodDevice (dest string , node * devices.Device ) error {
982+ func mknodDevice (destDir * os. File , destName string , node * devices.Device ) error {
979983 fileMode := node .FileMode
980984 switch node .Type {
981985 case devices .BlockDevice :
@@ -991,14 +995,44 @@ func mknodDevice(dest string, node *devices.Device) error {
991995 if err != nil {
992996 return err
993997 }
994- if err := unix .Mknod ( dest , uint32 (fileMode ), int (dev )); err != nil {
995- return & os.PathError {Op : "mknod " , Path : dest , Err : err }
998+ if err := unix .Mknodat ( int ( destDir . Fd ()), destName , uint32 (fileMode ), int (dev )); err != nil {
999+ return & os.PathError {Op : "mknodat " , Path : filepath . Join ( destDir . Name (), destName ) , Err : err }
9961000 }
997- // Ensure permission bits (can be different because of umask).
998- if err := os .Chmod (dest , fileMode ); err != nil {
1001+
1002+ // Get a handle and verify that it matches the expected inode type and
1003+ // major:minor before we operate on it.
1004+ devFile , err := utils .Openat (destDir , destName , unix .O_NOFOLLOW | unix .O_PATH , 0 )
1005+ if err != nil {
1006+ return fmt .Errorf ("open new %c device inode %s: %w" , node .Type , node .Path , err )
1007+ }
1008+ defer devFile .Close ()
1009+
1010+ if err := sys .VerifyInode (devFile , func (stat * unix.Stat_t , _ * unix.Statfs_t ) error {
1011+ if stat .Mode & unix .S_IFMT != uint32 (fileMode )& unix .S_IFMT {
1012+ return fmt .Errorf ("new %c device inode %s has incorrect ftype: %#x doesn't match expected %#v" ,
1013+ node .Type , node .Path ,
1014+ stat .Mode & unix .S_IFMT , fileMode & unix .S_IFMT )
1015+ }
1016+ if stat .Rdev != dev {
1017+ return fmt .Errorf ("new %c device inode %s has incorrect major:minor: %d:%d doesn't match expected %d:%d" ,
1018+ node .Type , node .Path ,
1019+ unix .Major (stat .Rdev ), unix .Minor (stat .Rdev ),
1020+ unix .Major (dev ), unix .Minor (dev ))
1021+ }
1022+ return nil
1023+ }); err != nil {
9991024 return err
10001025 }
1001- return os .Chown (dest , int (node .Uid ), int (node .Gid ))
1026+
1027+ // Ensure permission bits (can be different because of umask).
1028+ if err := sys .FchmodFile (devFile , uint32 (fileMode )); err != nil {
1029+ return fmt .Errorf ("update new %c device inode %s file mode: %w" , node .Type , node .Path , err )
1030+ }
1031+ if err := sys .FchownFile (devFile , int (node .Uid ), int (node .Gid )); err != nil {
1032+ return fmt .Errorf ("update new %c device inode %s owner: %w" , node .Type , node .Path , err )
1033+ }
1034+ runtime .KeepAlive (devFile )
1035+ return nil
10021036}
10031037
10041038// rootfsParentMountPrivate ensures rootfs parent mount is private.
0 commit comments