diff --git a/.cirrus.yml b/.cirrus.yml index 9912d74da8d..275b83e6c12 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -1,85 +1,19 @@ --- -# We use Cirrus for RHEL clones (native) and Fedora (in Vagrant), because -# neither is available on GHA natively, so the only option is VM. -# In GHA, nested virtualization is only supported on macOS instances, which -# are slow and flaky. +# We use Cirrus for RHEL clones because Cirrus can directly run them +# without depending on nested virtualization. # NOTE Cirrus execution environments lack a terminal, needed for # some integration tests. So we use `ssh -tt` command to fake a terminal. -task: - timeout_in: 30m - - env: - DEBIAN_FRONTEND: noninteractive - HOME: /root - # yamllint disable rule:key-duplicates - matrix: - DISTRO: fedora - - name: vagrant DISTRO:$DISTRO - - compute_engine_instance: - image_project: cirrus-images - image: family/docker-kvm - platform: linux - nested_virtualization: true - # CPU limit: `16 / NTASK`: see https://cirrus-ci.org/faq/#are-there-any-limits - cpu: 4 - # Memory limit: `4GB * NCPU` - memory: 16G - - host_info_script: | - uname -a - # ----- - cat /etc/os-release - # ----- - df -T - # ----- - cat /proc/cpuinfo - install_libvirt_vagrant_script: | - curl -fsSL https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg - echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list - sudo sed -i 's/^# deb-src/deb-src/' /etc/apt/sources.list - apt-get update - apt-get install -y libvirt-daemon libvirt-daemon-system vagrant - systemctl enable --now libvirtd - apt-get build-dep -y vagrant ruby-libvirt - apt-get install -y --no-install-recommends libxslt-dev libxml2-dev libvirt-dev ruby-bundler ruby-dev zlib1g-dev - vagrant plugin install vagrant-libvirt - vagrant_cache: - fingerprint_script: cat Vagrantfile.$DISTRO - folder: /root/.vagrant.d/boxes - vagrant_up_script: | - ln -sf Vagrantfile.$DISTRO Vagrantfile - # Retry if it fails (download.fedoraproject.org returns 404 sometimes) - vagrant up --no-tty || vagrant up --no-tty - mkdir -p -m 0700 /root/.ssh - vagrant ssh-config >> /root/.ssh/config - guest_info_script: | - ssh default 'sh -exc "uname -a && systemctl --version && df -T && cat /etc/os-release && go version && sestatus && rpm -q container-selinux"' - check_config_script: | - ssh default /vagrant/script/check-config.sh - unit_tests_script: | - ssh default 'sudo -i make -C /vagrant localunittest' - integration_systemd_script: | - ssh -tt default "sudo -i make -C /vagrant localintegration RUNC_USE_SYSTEMD=yes" - integration_fs_script: | - ssh -tt default "sudo -i make -C /vagrant localintegration" - integration_systemd_rootless_script: | - ssh -tt default "sudo -i make -C /vagrant localrootlessintegration RUNC_USE_SYSTEMD=yes" - integration_fs_rootless_script: | - ssh -tt default "sudo -i make -C /vagrant localrootlessintegration" - task: timeout_in: 30m env: HOME: /root CIRRUS_WORKING_DIR: /home/runc - GO_VERSION: "1.22" + GO_VERSION: "1.23" BATS_VERSION: "v1.9.0" - RPMS: gcc git iptables jq glibc-static libseccomp-devel make criu fuse-sshfs container-selinux + RPMS: gcc git-core iptables jq glibc-static libseccomp-devel make criu fuse-sshfs container-selinux # yamllint disable rule:key-duplicates matrix: DISTRO: almalinux-8 @@ -97,7 +31,7 @@ task: install_dependencies_script: | case $DISTRO in *-8) - yum config-manager --set-enabled powertools # for glibc-static + dnf config-manager --set-enabled powertools # for glibc-static ;; *-9) dnf config-manager --set-enabled crb # for glibc-static @@ -112,10 +46,19 @@ task: # Work around dnf mirror failures by retrying a few times. for i in $(seq 0 2); do sleep $i - yum install -y $RPMS && break + yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs $RPMS && break done [ $? -eq 0 ] # fail if yum failed + case $DISTRO in + *-8) + # Use newer criu (with https://github.com/checkpoint-restore/criu/pull/2545). + # Alas we have to disable container-tools for that. + dnf -y module disable container-tools + dnf -y copr enable adrian/criu-el8 + dnf -y install criu + esac + # Install Go. PREFIX="https://go.dev/dl/" # Find out the latest minor release URL. @@ -128,14 +71,8 @@ task: git checkout $BATS_VERSION ./install.sh /usr/local cd - - # Add a user for rootless tests - useradd -u2000 -m -d/home/rootless -s/bin/bash rootless - # Allow root and rootless itself to execute `ssh rootless@localhost` in tests/rootless.sh - ssh-keygen -t ecdsa -N "" -f /root/rootless.key - mkdir -m 0700 -p /home/rootless/.ssh - cp /root/rootless.key /home/rootless/.ssh/id_ecdsa - cat /root/rootless.key.pub >> /home/rootless/.ssh/authorized_keys - chown -R rootless.rootless /home/rootless + # Setup rootless tests. + /home/runc/script/setup_rootless.sh # set PATH echo 'export PATH=/usr/local/go/bin:/usr/local/bin:$PATH' >> /root/.bashrc # Setup ssh localhost for terminal emulation (script -e did not work) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 754fd87f155..864d6b3cbb9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -23,8 +23,8 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-20.04, ubuntu-24.04, actuated-arm64-6cpu-8gb] - go-version: [1.22.x, 1.23.x] + os: [ubuntu-24.04, ubuntu-24.04-arm] + go-version: [1.22.x, 1.23.x, 1.24.x] rootless: ["rootless", ""] race: ["-race", ""] criu: ["", "criu-dev"] @@ -34,38 +34,18 @@ jobs: - criu: criu-dev go-version: 1.22.x - criu: criu-dev - rootless: rootless - - criu: criu-dev - race: -race - - go-version: 1.22.x - os: actuated-arm64-6cpu-8gb - - race: "-race" - os: actuated-arm64-6cpu-8gb + go-version: 1.23.x - criu: criu-dev - os: actuated-arm64-6cpu-8gb + rootless: rootless + # Do race detection only on latest Go. + - race: -race + go-version: 1.22.x + - race: -race + go-version: 1.23.x runs-on: ${{ matrix.os }} steps: -# https://gist.github.com/alexellis/1f33e581c75e11e161fe613c46180771#file-metering-gha-md -# vmmeter start - - name: Prepare arkade - uses: alexellis/arkade-get@master - if: matrix.os == 'actuated-arm64-6cpu-8gb' - with: - crane: latest - print-summary: false - - - name: Install vmmeter - if: matrix.os == 'actuated-arm64-6cpu-8gb' - run: | - crane export --platform linux/arm64 ghcr.io/openfaasltd/vmmeter:latest | sudo tar -xvf - -C /usr/local/bin - - - name: Run vmmeter - uses: self-actuated/vmmeter-action@master - if: matrix.os == 'actuated-arm64-6cpu-8gb' -# vmmeter end - - name: checkout uses: actions/checkout@v4 @@ -91,17 +71,6 @@ jobs: # kernel config script/check-config.sh - - name: start sshd (used for testing rootless with systemd user session) - if: ${{ matrix.os == 'actuated-arm64-6cpu-8gb' && matrix.rootless == 'rootless' }} - run: | - # Generate new keys to fix "sshd: no hostkeys available -- exiting." - sudo ssh-keygen -A - if ! sudo systemctl start ssh.service; then - sudo journalctl -xeu ssh.service - exit 1 - fi - ps auxw | grep sshd - - name: install deps run: | sudo apt update @@ -118,11 +87,11 @@ jobs: sudo apt update sudo apt -y install criu - - name: install CRIU (criu ${{ matrix.criu }}) + - name: install CRIU (${{ matrix.criu }}) if: ${{ matrix.criu != '' }} run: | sudo apt -qy install \ - libcap-dev libnet1-dev libnl-3-dev \ + libcap-dev libnet1-dev libnl-3-dev uuid-dev \ libprotobuf-c-dev libprotobuf-dev protobuf-c-compiler protobuf-compiler git clone https://github.com/checkpoint-restore/criu.git ~/criu (cd ~/criu && git checkout ${{ matrix.criu }} && sudo make install-criu) @@ -148,7 +117,7 @@ jobs: - name: Allow userns for runc # https://discourse.ubuntu.com/t/ubuntu-24-04-lts-noble-numbat-release-notes/39890#unprivileged-user-namespace-restrictions-15 - if: matrix.os == 'ubuntu-24.04' + if: startsWith(matrix.os, 'ubuntu-24.04') run: | sed "s;^profile runc /usr/sbin/;profile runc-test $PWD/;" < /etc/apparmor.d/runc | sudo apparmor_parser @@ -159,21 +128,13 @@ jobs: - name: add rootless user if: matrix.rootless == 'rootless' run: | - sudo useradd -u2000 -m -d/home/rootless -s/bin/bash rootless - # Allow root and rootless itself to execute `ssh rootless@localhost` in tests/rootless.sh - ssh-keygen -t ecdsa -N "" -f $HOME/rootless.key - sudo mkdir -m 0700 -p /home/rootless/.ssh - sudo cp $HOME/rootless.key /home/rootless/.ssh/id_ecdsa - sudo cp $HOME/rootless.key.pub /home/rootless/.ssh/authorized_keys - sudo chown -R rootless.rootless /home/rootless + ./script/setup_rootless.sh sudo chmod a+X $HOME # for Ubuntu 22.04 and later - name: integration test (fs driver) run: sudo -E PATH="$PATH" script -e -c 'make local${{ matrix.rootless }}integration' - name: integration test (systemd driver) - # Skip rootless+systemd for ubuntu 20.04 because of cgroup v1. - if: ${{ !(matrix.os == 'ubuntu-20.04' && matrix.rootless == 'rootless') }} run: | # Delegate all cgroup v2 controllers to rootless user via --systemd-cgroup. # The default (since systemd v252) is "pids memory cpu". @@ -215,10 +176,70 @@ jobs: - name: unit test run: sudo -E PATH="$PATH" -- make GOARCH=386 localunittest + fedora: + timeout-minutes: 30 + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + + - uses: lima-vm/lima-actions/setup@v1 + id: lima-actions-setup + + - uses: actions/cache@v4 + with: + path: ~/.cache/lima + key: lima-${{ steps.lima-actions-setup.outputs.version }} + + - name: "Start VM" + # --plain is set to disable file sharing, port forwarding, built-in containerd, etc. for faster start up + # + # CPUs: min(4, host CPU cores) + # RAM: min(4 GiB, half of host memory) + # Disk: 100 GiB + run: limactl start --plain --name=default template://fedora + + - name: "Initialize VM" + run: | + set -eux -o pipefail + limactl cp -r . default:/tmp/runc + lima sudo /tmp/runc/script/setup_host_fedora.sh + + - name: "Show guest info" + run: | + set -eux -o pipefail + lima uname -a + lima systemctl --version + lima df -T + lima cat /etc/os-release + lima go version + lima sestatus + lima rpm -q container-selinux + + - name: "Check config" + run: lima /tmp/runc/script/check-config.sh + + # NOTE the execution environment lacks a terminal, needed for + # some integration tests. So we use `ssh -tt` command to fake a terminal. + - name: "Run unit tests" + run: ssh -tt lima-default sudo -i make -C /tmp/runc localunittest + + - name: "Run integration tests (systemd driver)" + run: ssh -tt lima-default sudo -i make -C /tmp/runc localintegration RUNC_USE_SYSTEMD=yes + + - name: "Run integration tests (fs driver)" + run: ssh -tt lima-default sudo -i make -C /tmp/runc localintegration + + - name: "Run integration tests (systemd driver, rootless)" + run: ssh -tt lima-default sudo -i make -C /tmp/runc localrootlessintegration RUNC_USE_SYSTEMD=yes + + - name: "Run integration tests (fs driver, rootless)" + run: ssh -tt lima-default sudo -i make -C /tmp/runc localrootlessintegration + all-done: needs: - test - cross-i386 + - fedora runs-on: ubuntu-24.04 steps: - run: echo "All jobs completed" diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index 9f6c7ffc62e..e96a072174b 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -8,7 +8,7 @@ on: - release-* pull_request: env: - GO_VERSION: 1.22.x + GO_VERSION: 1.23.x permissions: contents: read @@ -40,10 +40,10 @@ jobs: sudo apt -qy install libseccomp-dev - uses: golangci/golangci-lint-action@v6 with: - version: v1.60 - # Extra linters, only checking new code from a pull request. + version: v1.64 + # Extra linters, only checking new code from a pull request to main. - name: lint-extra - if: github.event_name == 'pull_request' + if: github.event_name == 'pull_request' && github.base_ref == 'main' run: | golangci-lint run --config .golangci-extra.yml --new-from-rev=HEAD~1 @@ -102,9 +102,9 @@ jobs: - uses: actions/checkout@v4 - name: install shellcheck env: - VERSION: v0.9.0 + VERSION: v0.10.0 BASEURL: https://github.com/koalaman/shellcheck/releases/download - SHA256: 7087178d54de6652b404c306233264463cb9e7a9afeb259bb663cc4dbfd64149 + SHA256: f35ae15a4677945428bdfe61ccc297490d89dd1e544cc06317102637638c6deb run: | mkdir ~/bin curl -sSfL --retry 5 $BASEURL/$VERSION/shellcheck-$VERSION.linux.x86_64.tar.xz | diff --git a/CHANGELOG.md b/CHANGELOG.md index 6193d35678b..bdda19dfbe1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,144 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased 1.2.z] +## [1.2.8] - 2025-11-05 + +> 鳥籠の中に囚われた屈辱を + +### Security + +This release includes fixes for the following high-severity security issues: + +* [CVE-2025-31133][] exploits an issue with how masked paths are implemented in + runc. When masking files, runc will bind-mount the container's `/dev/null` + inode on top of the file. However, if an attacker can replace `/dev/null` + with a symlink to some other procfs file, runc will instead bind-mount the + symlink target read-write. This issue affected all known runc versions. + +* [CVE-2025-52565][] is very similar in concept and application to + [CVE-2025-31133][], except that it exploits a flaw in `/dev/console` + bind-mounts. When creating the `/dev/console` bind-mount (to `/dev/pts/$n`), + if an attacker replaces `/dev/pts/$n` with a symlink then runc will + bind-mount the symlink target over `/dev/console`. This issue affected all + versions of runc >= 1.0.0-rc3. + +* [CVE-2025-52881][] is a more sophisticated variant of [CVE-2019-19921][], + which was a flaw that allowed an attacker to trick runc into writing the LSM + process labels for a container process into a dummy tmpfs file and thus not + apply the correct LSM labels to the container process. The mitigation we + applied for [CVE-2019-19921][] was fairly limited and effectively only caused + runc to verify that when we write LSM labels that those labels are actual + procfs files. This issue affects all known runc versions. + +[CVE-2019-19921]: https://github.com/opencontainers/runc/security/advisories/GHSA-fh74-hm69-rqjw +[CVE-2025-31133]: https://github.com/opencontainers/runc/security/advisories/GHSA-9493-h29p-rfm2 +[CVE-2025-52565]: https://github.com/opencontainers/runc/security/advisories/GHSA-qw9x-cqr3-wc7r +[CVE-2025-52881]: https://github.com/opencontainers/runc/security/advisories/GHSA-cgrx-mc8f-2prm + +## [1.2.7] - 2025-09-05 + +> さんをつけろよデコ助野郎! + +### Fixed + * Removed preemptive "full access to cgroups" warning when calling `runc + pause` or `runc unpause` as an unprivileged user without + `--systemd-cgroups`. Now the warning is only emitted if an actual permission + error was encountered. (#4709, #4720) + * Add time namespace to container config after checkpoint/restore. CRIU since + version 3.14 uses a time namespace for checkpoint/restore, however it was + not joining the time namespace in runc. (#4696, #4714) + * Container processes will no longer inherit the CPU affinity of runc by + default. Instead, the default CPU affinity of container processes will be + the largest set of CPUs permitted by the container's cpuset cgroup and any + other system restrictions (such as isolated CPUs). (#4041, #4815, #4858) + * Close seccomp agent connection to prevent resource leaks. (#4796, #4800) + * Several fixes to our CI, mainly related to AlmaLinux and CRIU. (#4670, + #4728, #4736, #4742) + * Setting `linux.rootfsPropagation` to `shared` or `unbindable` now functions + properly. (#1755, #1815, #4724, #4791) + * `runc update` will no longer clear intelRdt state information. (#4828, + #4834) + +### Changed + * In runc 1.2, we changed our mount behaviour to correctly handle clearing + flags. However, the error messages we returned did not provide as much + information to users about what clearing flags were conflicting with locked + mount flags. We now provide more diagnostic information if there is an error + when in the fallback path to handle locked mount flags. (#4734, #4740) + * Ignore the dmem controller in our cgroup tests, as systemd does not yet + support it. (#4806, #4811) + * `/proc/net/dev` is no longer included in the permitted procfs overmount + list. Its inclusion was almost certainly an error, and because `/proc/net` + is a symlink to `/proc/self/net`, overmounting this was almost certainly + never useful (and will be blocked by future kernel versions). (#4817, #4820) + * CI: Switch to GitHub-hosted ARM runners. Thanks again to @alexellis for + supporting runc's ARM CI up until now. (#4844, #4856, #4867) + * Simplify the `prepareCriuRestoreMounts` logic for checkpoint-restore. + (#4765, #4872) + +## [1.2.6] - 2025-03-17 + +> Hasta la victoria, siempre. + +### Fixed + * Fix a stall issue that would happen if setting `O_CLOEXEC` with + `CloseExecFrom` failed (#4647). + * `runc` now properly handles joining time namespaces (such as with `runc + exec`). Previously we would attempt to set the time offsets when joining, + which would fail. (#4635, #4649) + * Handle `EINTR` retries correctly for socket-related direct + `golang.org/x/sys/unix` system calls. (#4650) + * We no longer use `F_SEAL_FUTURE_WRITE` when sealing the runc binary, as it + turns out this had some unfortunate bugs in older kernel versions and was + never necessary in the first place. (#4651, #4640) + +### Removed + * Remove `Fexecve` helper from `libcontainer/system`. Runc 1.2.1 removed + runc-dmz, but we forgot to remove this helper added only for that. (#4646) + +### Changed + * Use Go 1.23 for official builds, run CI with Go 1.24 and drop Ubuntu 20.04 + from CI. We need to drop Ubuntu 20.04 from CI because Github Actions + announced it's already deprecated and it will be discontinued soon. (#4648) + +## [1.2.5] - 2025-02-13 + +> Мороз и солнце; день чудесный! + +### Fixed +* There was a regression in systemd v230 which made the way we define device + rule restrictions require a systemctl daemon-reload for our transient + units. This caused issues for workloads using NVIDIA GPUs. Workaround the + upstream regression by re-arranging how the unit properties are defined. + (#4568, #4612, #4615) + * Dependency github.com/cyphar/filepath-securejoin is updated to v0.4.1, + allowing projects that vendor runc to bump it as well. (#4608) + * CI: fixed criu-dev compilation. (#4611) + +### Changed + * Dependency golang.org/x/net is updated to 0.33.0. (#4632) + +## [1.2.4] - 2025-01-07 + +> Христос се роди! + +### Fixed + * Re-add tun/tap devices to built-in allowed devices lists. + + In runc 1.2.0 we removed these devices from the default allow-list (which + were added seemingly by accident early in Docker's history) as a precaution + in order to try to reduce the attack surface of device inodes available to + most containers (#3468). At the time we thought that the vast majority of + users using tun/tap would already be specifying what devices they need (such + as by using `--device` with Docker/Podman) as opposed to doing the `mknod` + manually, and thus there would've been no user-visible change. + + Unfortunately, it seems that this regressed a noticeable number of users + (and not all higher-level tools provide easy ways to specify devices to + allow) and so this change needed to be reverted. Users that do not need + these devices are recommended to explicitly disable them by adding deny + rules in their container configuration. (#4555, #4556) + ## [1.2.3] - 2024-12-12 > Winter is not a season, it's a celebration. @@ -951,7 +1089,12 @@ implementation (libcontainer) is *not* covered by this policy. [1.1.0-rc.1]: https://github.com/opencontainers/runc/compare/v1.0.0...v1.1.0-rc.1 -[Unreleased 1.2.z]: https://github.com/opencontainers/runc/compare/v1.2.3...release-1.2 +[Unreleased 1.2.z]: https://github.com/opencontainers/runc/compare/v1.2.8...release-1.2 +[1.2.8]: https://github.com/opencontainers/runc/compare/v1.2.7...v1.2.8 +[1.2.7]: https://github.com/opencontainers/runc/compare/v1.2.6...v1.2.7 +[1.2.6]: https://github.com/opencontainers/runc/compare/v1.2.5...v1.2.6 +[1.2.5]: https://github.com/opencontainers/runc/compare/v1.2.4...v1.2.5 +[1.2.4]: https://github.com/opencontainers/runc/compare/v1.2.3...v1.2.4 [1.2.3]: https://github.com/opencontainers/runc/compare/v1.2.2...v1.2.3 [1.2.2]: https://github.com/opencontainers/runc/compare/v1.2.1...v1.2.2 [1.2.1]: https://github.com/opencontainers/runc/compare/v1.2.0...v1.2.1 diff --git a/Dockerfile b/Dockerfile index d04a958892d..f51f5956c85 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -ARG GO_VERSION=1.22 +ARG GO_VERSION=1.23 ARG BATS_VERSION=v1.9.0 ARG LIBSECCOMP_VERSION=2.5.5 diff --git a/Makefile b/Makefile index 0a15fd908ea..9ff8bad3f70 100644 --- a/Makefile +++ b/Makefile @@ -220,7 +220,7 @@ shellcheck: shfmt: $(CONTAINER_ENGINE) run $(CONTAINER_ENGINE_RUN_FLAGS) \ --rm -v $(CURDIR):/src -w /src \ - mvdan/shfmt:v3.5.1 -d -w . + mvdan/shfmt:v3.11.0 -d -w . .PHONY: localshfmt localshfmt: diff --git a/README.md b/README.md index 50fcd4e9222..59d9fe55e77 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,6 @@ [![gha/validate](https://github.com/opencontainers/runc/workflows/validate/badge.svg)](https://github.com/opencontainers/runc/actions?query=workflow%3Avalidate) [![gha/ci](https://github.com/opencontainers/runc/workflows/ci/badge.svg)](https://github.com/opencontainers/runc/actions?query=workflow%3Aci) [![CirrusCI](https://api.cirrus-ci.com/github/opencontainers/runc.svg)](https://cirrus-ci.com/github/opencontainers/runc) -Arm CI sponsored by Actuated ## Introduction diff --git a/VERSION b/VERSION index 0495c4a88ca..db6fb4a9113 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.2.3 +1.2.8 diff --git a/Vagrantfile.fedora b/Vagrantfile.fedora deleted file mode 100644 index f0099721521..00000000000 --- a/Vagrantfile.fedora +++ /dev/null @@ -1,53 +0,0 @@ -# -*- mode: ruby -*- -# vi: set ft=ruby : - -Vagrant.configure("2") do |config| - config.vm.box = "fedora-41" - # For URL, check https://www.fedoraproject.org/cloud/download - config.vm.box_url = "https://download.fedoraproject.org/pub/fedora/linux/releases/41/Cloud/x86_64/images/Fedora-Cloud-Base-Vagrant-libvirt-41-1.4.x86_64.vagrant.libvirt.box" - config.vm.provider :virtualbox do |v| - v.memory = 2048 - v.cpus = 2 - end - config.vm.provider :libvirt do |v| - v.memory = 2048 - v.cpus = 2 - end - config.vm.provision "shell", inline: <<-SHELL - set -e -u -o pipefail - DNF_OPTS="-y --setopt=install_weak_deps=False --setopt=tsflags=nodocs --exclude=kernel,kernel-core" - RPMS="bats git-core glibc-static golang jq libseccomp-devel make" - # Work around dnf mirror failures by retrying a few times. - for i in $(seq 0 2); do - sleep $i - dnf $DNF_OPTS update && dnf $DNF_OPTS install $RPMS && break - done - dnf clean all - - # To avoid "avc: denied { nosuid_transition }" from SELinux as we run tests on /tmp. - mount -o remount,suid /tmp - - # Prevent the "fatal: unsafe repository" git complain during build. - git config --global --add safe.directory /vagrant - - # Add a user for rootless tests - useradd -u2000 -m -d/home/rootless -s/bin/bash rootless - - # Allow root and rootless itself to execute `ssh rootless@localhost` in tests/rootless.sh - ssh-keygen -t ecdsa -N "" -f /root/rootless.key - mkdir -m 0700 -p /home/rootless/.ssh - cp /root/rootless.key /home/rootless/.ssh/id_ecdsa - cat /root/rootless.key.pub >> /home/rootless/.ssh/authorized_keys - chown -R rootless.rootless /home/rootless - - # Delegate cgroup v2 controllers to rootless user via --systemd-cgroup - mkdir -p /etc/systemd/system/user@.service.d - cat > /etc/systemd/system/user@.service.d/delegate.conf << EOF -[Service] -# default: Delegate=pids memory -# NOTE: delegation of cpuset requires systemd >= 244 (Fedora >= 32, Ubuntu >= 20.04). -Delegate=yes -EOF - systemctl daemon-reload - SHELL -end diff --git a/go.mod b/go.mod index 348bc9c6a0b..fabaf535fba 100644 --- a/go.mod +++ b/go.mod @@ -10,9 +10,9 @@ toolchain go1.22.4 require ( github.com/checkpoint-restore/go-criu/v6 v6.3.0 github.com/cilium/ebpf v0.16.0 - github.com/containerd/console v1.0.4 + github.com/containerd/console v1.0.5 github.com/coreos/go-systemd/v22 v22.5.0 - github.com/cyphar/filepath-securejoin v0.3.5 + github.com/cyphar/filepath-securejoin v0.5.1 github.com/docker/go-units v0.5.0 github.com/godbus/dbus/v5 v5.1.0 github.com/moby/sys/mountinfo v0.7.1 @@ -20,13 +20,13 @@ require ( github.com/moby/sys/userns v0.1.0 github.com/mrunalp/fileutils v0.5.1 github.com/opencontainers/runtime-spec v1.2.0 - github.com/opencontainers/selinux v1.11.0 + github.com/opencontainers/selinux v1.12.0 github.com/seccomp/libseccomp-golang v0.10.0 github.com/sirupsen/logrus v1.9.3 github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 github.com/urfave/cli v1.22.14 github.com/vishvananda/netlink v1.1.0 - golang.org/x/net v0.24.0 + golang.org/x/net v0.33.0 golang.org/x/sys v0.28.0 google.golang.org/protobuf v1.33.0 ) @@ -37,3 +37,8 @@ require ( github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df // indirect golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 // indirect ) + +// FIXME: This is only intended as a short-term solution to include a patch for +// CVE-2025-52881 in go-selinux without pushing the patches upstream. This +// should be removed as soon as possible after the embargo is lifted. +replace github.com/opencontainers/selinux => ./internal/third_party/selinux diff --git a/go.sum b/go.sum index 225d5860eb6..2349ee645cf 100644 --- a/go.sum +++ b/go.sum @@ -3,14 +3,14 @@ github.com/checkpoint-restore/go-criu/v6 v6.3.0 h1:mIdrSO2cPNWQY1truPg6uHLXyKHk3 github.com/checkpoint-restore/go-criu/v6 v6.3.0/go.mod h1:rrRTN/uSwY2X+BPRl/gkulo9gsKOSAeVp9/K2tv7xZI= github.com/cilium/ebpf v0.16.0 h1:+BiEnHL6Z7lXnlGUsXQPPAE7+kenAd4ES8MQ5min0Ok= github.com/cilium/ebpf v0.16.0/go.mod h1:L7u2Blt2jMM/vLAVgjxluxtBKlz3/GWjB0dMOEngfwE= -github.com/containerd/console v1.0.4 h1:F2g4+oChYvBTsASRTz8NP6iIAi97J3TtSAsLbIFn4ro= -github.com/containerd/console v1.0.4/go.mod h1:YynlIjWYF8myEu6sdkwKIvGQq+cOckRm6So2avqoYAk= +github.com/containerd/console v1.0.5 h1:R0ymNeydRqH2DmakFNdmjR2k0t7UPuiOV/N/27/qqsc= +github.com/containerd/console v1.0.5/go.mod h1:YynlIjWYF8myEu6sdkwKIvGQq+cOckRm6So2avqoYAk= github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/cyphar/filepath-securejoin v0.3.5 h1:L81NHjquoQmcPgXcttUS9qTSR/+bXry6pbSINQGpjj4= -github.com/cyphar/filepath-securejoin v0.3.5/go.mod h1:edhVd3c6OXKjUmSrVa/tGJRS9joFTxlslFCAyaxigkE= +github.com/cyphar/filepath-securejoin v0.5.1 h1:eYgfMq5yryL4fbWfkLpFFy2ukSELzaJOTaUTuh+oF48= +github.com/cyphar/filepath-securejoin v0.5.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -48,8 +48,6 @@ github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk= github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU= -github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= @@ -68,9 +66,8 @@ github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpE github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/urfave/cli v1.22.14 h1:ebbhrRiGK2i4naQJr+1Xj92HXZCrK7MsyTS/ob3HnAk= @@ -81,8 +78,8 @@ github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df h1:OviZH7qLw/7Zo github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 h1:Jvc7gsqn21cJHCmAWx0LiimpP18LZmUxkT5Mp7EZ1mI= golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= -golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= -golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= +golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/internal/linux/doc.go b/internal/linux/doc.go new file mode 100644 index 00000000000..4d1eb900108 --- /dev/null +++ b/internal/linux/doc.go @@ -0,0 +1,3 @@ +// Package linux provides minimal wrappers around Linux system calls, primarily +// to provide support for automatic EINTR-retries. +package linux diff --git a/internal/linux/linux.go b/internal/linux/linux.go new file mode 100644 index 00000000000..f9e67534271 --- /dev/null +++ b/internal/linux/linux.go @@ -0,0 +1,44 @@ +package linux + +import ( + "os" + + "golang.org/x/sys/unix" +) + +// Readlinkat wraps [unix.Readlinkat]. +func Readlinkat(dir *os.File, path string) (string, error) { + size := 4096 + for { + linkBuf := make([]byte, size) + n, err := unix.Readlinkat(int(dir.Fd()), path, linkBuf) + if err != nil { + return "", &os.PathError{Op: "readlinkat", Path: dir.Name() + "/" + path, Err: err} + } + if n != size { + return string(linkBuf[:n]), nil + } + // Possible truncation, resize the buffer. + size *= 2 + } +} + +// GetPtyPeer is a wrapper for ioctl(TIOCGPTPEER). +func GetPtyPeer(ptyFd uintptr, unsafePeerPath string, flags int) (*os.File, error) { + // Make sure O_NOCTTY is always set -- otherwise runc might accidentally + // gain it as a controlling terminal. O_CLOEXEC also needs to be set to + // make sure we don't leak the handle either. + flags |= unix.O_NOCTTY | unix.O_CLOEXEC + + // There is no nice wrapper for this kind of ioctl in unix. + peerFd, _, errno := unix.Syscall( + unix.SYS_IOCTL, + ptyFd, + uintptr(unix.TIOCGPTPEER), + uintptr(flags), + ) + if errno != 0 { + return nil, os.NewSyscallError("ioctl TIOCGPTPEER", errno) + } + return os.NewFile(peerFd, unsafePeerPath), nil +} diff --git a/internal/pathrs/doc.go b/internal/pathrs/doc.go new file mode 100644 index 00000000000..496ca59510d --- /dev/null +++ b/internal/pathrs/doc.go @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright (C) 2024-2025 Aleksa Sarai + * Copyright (C) 2024-2025 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Package pathrs provides wrappers around filepath-securejoin to add the +// minimum set of features needed from libpathrs that are not provided by +// filepath-securejoin, with the eventual goal being that these can be used to +// ease the transition by converting them stubs when enabling libpathrs builds. +package pathrs diff --git a/internal/pathrs/mkdirall_pathrslite.go b/internal/pathrs/mkdirall_pathrslite.go new file mode 100644 index 00000000000..a9a0157c681 --- /dev/null +++ b/internal/pathrs/mkdirall_pathrslite.go @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright (C) 2024-2025 Aleksa Sarai + * Copyright (C) 2024-2025 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package pathrs + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/cyphar/filepath-securejoin/pathrs-lite" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +// MkdirAllInRootOpen attempts to make +// +// path, _ := securejoin.SecureJoin(root, unsafePath) +// os.MkdirAll(path, mode) +// os.Open(path) +// +// safer against attacks where components in the path are changed between +// SecureJoin returning and MkdirAll (or Open) being called. In particular, we +// try to detect any symlink components in the path while we are doing the +// MkdirAll. +// +// NOTE: If unsafePath is a subpath of root, we assume that you have already +// called SecureJoin and so we use the provided path verbatim without resolving +// any symlinks (this is done in a way that avoids symlink-exchange races). +// This means that the path also must not contain ".." elements, otherwise an +// error will occur. +// +// This uses (pathrs-lite).MkdirAllHandle under the hood, but it has special +// handling if unsafePath has already been scoped within the rootfs (this is +// needed for a lot of runc callers and fixing this would require reworking a +// lot of path logic). +func MkdirAllInRootOpen(root, unsafePath string, mode os.FileMode) (*os.File, error) { + // If the path is already "within" the root, get the path relative to the + // root and use that as the unsafe path. This is necessary because a lot of + // MkdirAllInRootOpen callers have already done SecureJoin, and refactoring + // all of them to stop using these SecureJoin'd paths would require a fair + // amount of work. + // TODO(cyphar): Do the refactor to libpathrs once it's ready. + if IsLexicallyInRoot(root, unsafePath) { + subPath, err := filepath.Rel(root, unsafePath) + if err != nil { + return nil, err + } + unsafePath = subPath + } + + // Check for any silly mode bits. + if mode&^0o7777 != 0 { + return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode) + } + // Linux (and thus os.MkdirAll) silently ignores the suid and sgid bits if + // passed. While it would make sense to return an error in that case (since + // the user has asked for a mode that won't be applied), for compatibility + // reasons we have to ignore these bits. + if ignoredBits := mode &^ 0o1777; ignoredBits != 0 { + logrus.Warnf("MkdirAll called with no-op mode bits that are ignored by Linux: 0o%.3o", ignoredBits) + mode &= 0o1777 + } + + rootDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0) + if err != nil { + return nil, fmt.Errorf("open root handle: %w", err) + } + defer rootDir.Close() + + return retryEAGAIN(func() (*os.File, error) { + return pathrs.MkdirAllHandle(rootDir, unsafePath, mode) + }) +} + +// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the +// returned handle, for callers that don't need to use it. +func MkdirAllInRoot(root, unsafePath string, mode os.FileMode) error { + f, err := MkdirAllInRootOpen(root, unsafePath, mode) + if err == nil { + _ = f.Close() + } + return err +} diff --git a/internal/pathrs/path.go b/internal/pathrs/path.go new file mode 100644 index 00000000000..1ee7c795d5b --- /dev/null +++ b/internal/pathrs/path.go @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright (C) 2024-2025 Aleksa Sarai + * Copyright (C) 2024-2025 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package pathrs + +import ( + "strings" +) + +// IsLexicallyInRoot is shorthand for strings.HasPrefix(path+"/", root+"/"), +// but properly handling the case where path or root have a "/" suffix. +// +// NOTE: The return value only make sense if the path is already mostly cleaned +// (i.e., doesn't contain "..", ".", nor unneeded "/"s). +func IsLexicallyInRoot(root, path string) bool { + root = strings.TrimRight(root, "/") + path = strings.TrimRight(path, "/") + return strings.HasPrefix(path+"/", root+"/") +} diff --git a/internal/pathrs/path_test.go b/internal/pathrs/path_test.go new file mode 100644 index 00000000000..19d577fba3b --- /dev/null +++ b/internal/pathrs/path_test.go @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright (C) 2024-2025 Aleksa Sarai + * Copyright (C) 2024-2025 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package pathrs + +import "testing" + +func TestIsLexicallyInRoot(t *testing.T) { + for _, test := range []struct { + name string + root, path string + expected bool + }{ + {"Equal1", "/foo", "/foo", true}, + {"Equal2", "/bar/baz", "/bar/baz", true}, + {"Equal3", "/bar/baz/", "/bar/baz/", true}, + {"Root", "/", "/foo/bar", true}, + {"Root-Equal", "/", "/", true}, + {"InRoot-Basic1", "/foo/bar", "/foo/bar/baz/abcd", true}, + {"InRoot-Basic2", "/a/b/c/d", "/a/b/c/d/e/f/g/h", true}, + {"InRoot-Long", "/var/lib/docker/container/1234abcde/rootfs", "/var/lib/docker/container/1234abcde/rootfs/a/b/c", true}, + {"InRoot-TrailingSlash1", "/foo/bar/", "/foo/bar", true}, + {"InRoot-TrailingSlash2", "/foo/", "/foo/bar/baz/boop", true}, + {"NotInRoot-Basic1", "/foo", "/bar", false}, + {"NotInRoot-Basic2", "/foo", "/bar", false}, + {"NotInRoot-Basic3", "/foo/bar/baz", "/foo/boo/baz/abc", false}, + {"NotInRoot-Long", "/var/lib/docker/container/1234abcde/rootfs", "/a/b/c", false}, + {"NotInRoot-Tricky1", "/foo/bar", "/foo/bara", false}, + {"NotInRoot-Tricky2", "/foo/bar", "/foo/ba/r", false}, + } { + t.Run(test.name, func(t *testing.T) { + got := IsLexicallyInRoot(test.root, test.path) + if test.expected != got { + t.Errorf("IsLexicallyInRoot(%q, %q) = %v (expected %v)", test.root, test.path, got, test.expected) + } + }) + } +} diff --git a/internal/pathrs/procfs_pathrslite.go b/internal/pathrs/procfs_pathrslite.go new file mode 100644 index 00000000000..37450a0eca7 --- /dev/null +++ b/internal/pathrs/procfs_pathrslite.go @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright (C) 2025 Aleksa Sarai + * Copyright (C) 2025 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package pathrs + +import ( + "fmt" + "os" + + "github.com/cyphar/filepath-securejoin/pathrs-lite" + "github.com/cyphar/filepath-securejoin/pathrs-lite/procfs" +) + +func procOpenReopen(openFn func(subpath string) (*os.File, error), subpath string, flags int) (*os.File, error) { + handle, err := retryEAGAIN(func() (*os.File, error) { + return openFn(subpath) + }) + if err != nil { + return nil, err + } + defer handle.Close() + + f, err := Reopen(handle, flags) + if err != nil { + return nil, fmt.Errorf("reopen %s: %w", handle.Name(), err) + } + return f, nil +} + +// ProcSelfOpen is a wrapper around [procfs.Handle.OpenSelf] and +// [pathrs.Reopen], to let you one-shot open a procfs file with the given +// flags. +func ProcSelfOpen(subpath string, flags int) (*os.File, error) { + proc, err := retryEAGAIN(procfs.OpenProcRoot) + if err != nil { + return nil, err + } + defer proc.Close() + return procOpenReopen(proc.OpenSelf, subpath, flags) +} + +// ProcPidOpen is a wrapper around [procfs.Handle.OpenPid] and [pathrs.Reopen], +// to let you one-shot open a procfs file with the given flags. +func ProcPidOpen(pid int, subpath string, flags int) (*os.File, error) { + proc, err := retryEAGAIN(procfs.OpenProcRoot) + if err != nil { + return nil, err + } + defer proc.Close() + return procOpenReopen(func(subpath string) (*os.File, error) { + return proc.OpenPid(pid, subpath) + }, subpath, flags) +} + +// ProcThreadSelfOpen is a wrapper around [procfs.Handle.OpenThreadSelf] and +// [pathrs.Reopen], to let you one-shot open a procfs file with the given +// flags. The returned [procfs.ProcThreadSelfCloser] needs the same handling as +// when using pathrs-lite. +func ProcThreadSelfOpen(subpath string, flags int) (_ *os.File, _ procfs.ProcThreadSelfCloser, Err error) { + proc, err := retryEAGAIN(procfs.OpenProcRoot) + if err != nil { + return nil, nil, err + } + defer proc.Close() + + handle, closer, err := retryEAGAIN2(func() (*os.File, procfs.ProcThreadSelfCloser, error) { + return proc.OpenThreadSelf(subpath) + }) + if err != nil { + return nil, nil, err + } + if closer != nil { + defer func() { + if Err != nil { + closer() + } + }() + } + defer handle.Close() + + f, err := Reopen(handle, flags) + if err != nil { + return nil, nil, fmt.Errorf("reopen %s: %w", handle.Name(), err) + } + return f, closer, nil +} + +// Reopen is a wrapper around pathrs.Reopen. +func Reopen(file *os.File, flags int) (*os.File, error) { + return retryEAGAIN(func() (*os.File, error) { + return pathrs.Reopen(file, flags) + }) +} diff --git a/internal/pathrs/retry.go b/internal/pathrs/retry.go new file mode 100644 index 00000000000..a51d335c0df --- /dev/null +++ b/internal/pathrs/retry.go @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright (C) 2024-2025 Aleksa Sarai + * Copyright (C) 2024-2025 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package pathrs + +import ( + "errors" + "fmt" + "time" + + "golang.org/x/sys/unix" +) + +// Based on >50k tests running "runc run" on a 16-core system with very heavy +// rename(2) load, the single longest latency caused by -EAGAIN retries was +// ~800us (with the vast majority being closer to 400us). So, a 2ms limit +// should give more than enough headroom for any real system in practice. +const retryDeadline = 2 * time.Millisecond + +// retryEAGAIN is a top-level retry loop for pathrs to try to returning +// spurious errors in most normal user cases when using openat2 (libpathrs +// itself does up to 128 retries already, but this method takes a +// wallclock-deadline approach to simply retry until a timer elapses). +func retryEAGAIN[T any](fn func() (T, error)) (T, error) { + deadline := time.After(retryDeadline) + for { + v, err := fn() + if !errors.Is(err, unix.EAGAIN) { + return v, err + } + select { + case <-deadline: + return *new(T), fmt.Errorf("%v retry deadline exceeded: %w", retryDeadline, err) + default: + // retry + } + } +} + +// retryEAGAIN2 is like retryEAGAIN except it returns two values. +func retryEAGAIN2[T1, T2 any](fn func() (T1, T2, error)) (T1, T2, error) { + type ret struct { + v1 T1 + v2 T2 + } + v, err := retryEAGAIN(func() (ret, error) { + v1, v2, err := fn() + return ret{v1: v1, v2: v2}, err + }) + return v.v1, v.v2, err +} diff --git a/internal/pathrs/root_pathrslite.go b/internal/pathrs/root_pathrslite.go new file mode 100644 index 00000000000..899af270369 --- /dev/null +++ b/internal/pathrs/root_pathrslite.go @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright (C) 2024-2025 Aleksa Sarai + * Copyright (C) 2024-2025 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package pathrs + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/cyphar/filepath-securejoin/pathrs-lite" + "golang.org/x/sys/unix" +) + +// OpenInRoot opens the given path inside the root with the provided flags. It +// is effectively shorthand for [securejoin.OpenInRoot] followed by +// [securejoin.Reopen]. +func OpenInRoot(root, subpath string, flags int) (*os.File, error) { + handle, err := retryEAGAIN(func() (*os.File, error) { + return pathrs.OpenInRoot(root, subpath) + }) + if err != nil { + return nil, err + } + defer handle.Close() + + return Reopen(handle, flags) +} + +// CreateInRoot creates a new file inside a root (as well as any missing parent +// directories) and returns a handle to said file. This effectively has +// open(O_CREAT|O_NOFOLLOW) semantics. If you want the creation to use O_EXCL, +// include it in the passed flags. The fileMode argument uses unix.* mode bits, +// *not* os.FileMode. +func CreateInRoot(root, subpath string, flags int, fileMode uint32) (*os.File, error) { + dir, filename := filepath.Split(subpath) + if filepath.Join("/", filename) == "/" { + return nil, fmt.Errorf("create in root subpath %q has bad trailing component %q", subpath, filename) + } + + dirFd, err := MkdirAllInRootOpen(root, dir, 0o755) + if err != nil { + return nil, err + } + defer dirFd.Close() + + // We know that the filename does not have any "/" components, and that + // dirFd is inside the root. O_NOFOLLOW will stop us from following + // trailing symlinks, so this is safe to do. libpathrs's Root::create_file + // works the same way. + flags |= unix.O_CREAT | unix.O_NOFOLLOW + fd, err := unix.Openat(int(dirFd.Fd()), filename, flags, fileMode) + if err != nil { + return nil, err + } + return os.NewFile(uintptr(fd), root+"/"+subpath), nil +} diff --git a/internal/sys/doc.go b/internal/sys/doc.go new file mode 100644 index 00000000000..075387f7a3b --- /dev/null +++ b/internal/sys/doc.go @@ -0,0 +1,5 @@ +// Package sys is an internal package that contains helper methods for dealing +// with Linux that are more complicated than basic wrappers. Basic wrappers +// usually belong in internal/linux. If you feel something belongs in +// libcontainer/utils or libcontainer/system, it probably belongs here instead. +package sys diff --git a/internal/sys/opath_linux.go b/internal/sys/opath_linux.go new file mode 100644 index 00000000000..17a216bc505 --- /dev/null +++ b/internal/sys/opath_linux.go @@ -0,0 +1,53 @@ +package sys + +import ( + "fmt" + "os" + "runtime" + "strconv" + + "golang.org/x/sys/unix" + + "github.com/opencontainers/runc/internal/pathrs" +) + +// FchmodFile is a wrapper around fchmodat2(AT_EMPTY_PATH) with fallbacks for +// older kernels. This is distinct from [File.Chmod] and [unix.Fchmod] in that +// it works on O_PATH file descriptors. +func FchmodFile(f *os.File, mode uint32) error { + err := unix.Fchmodat(int(f.Fd()), "", mode, unix.AT_EMPTY_PATH) + // If fchmodat2(2) is not available at all, golang.org/x/unix (probably + // in order to mirror glibc) returns EOPNOTSUPP rather than EINVAL + // (what the kernel actually returns for invalid flags, which is being + // emulated) or ENOSYS (which is what glibc actually sees). + if err != unix.EINVAL && err != unix.EOPNOTSUPP { //nolint:errorlint // unix errors are bare + // err == nil is implicitly handled + return os.NewSyscallError("fchmodat2 AT_EMPTY_PATH", err) + } + + // AT_EMPTY_PATH support was added to fchmodat2 in Linux 6.6 + // (5daeb41a6fc9d0d81cb2291884b7410e062d8fa1). The alternative for + // older kernels is to go through /proc. + fdDir, closer, err2 := pathrs.ProcThreadSelfOpen("fd/", unix.O_DIRECTORY) + if err2 != nil { + return fmt.Errorf("fchmodat2 AT_EMPTY_PATH fallback: %w", err2) + } + defer closer() + defer fdDir.Close() + + err = unix.Fchmodat(int(fdDir.Fd()), strconv.Itoa(int(f.Fd())), mode, 0) + if err != nil { + err = fmt.Errorf("fchmodat /proc/self/fd/%d: %w", f.Fd(), err) + } + runtime.KeepAlive(f) + return err +} + +// FchownFile is a wrapper around fchownat(AT_EMPTY_PATH). This is distinct +// from [File.Chown] and [unix.Fchown] in that it works on O_PATH file +// descriptors. +func FchownFile(f *os.File, uid, gid int) error { + err := unix.Fchownat(int(f.Fd()), "", uid, gid, unix.AT_EMPTY_PATH) + runtime.KeepAlive(f) + return os.NewSyscallError("fchownat AT_EMPTY_PATH", err) +} diff --git a/internal/sys/sysctl_linux.go b/internal/sys/sysctl_linux.go new file mode 100644 index 00000000000..96876a55fff --- /dev/null +++ b/internal/sys/sysctl_linux.go @@ -0,0 +1,54 @@ +package sys + +import ( + "fmt" + "io" + "os" + "strings" + + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/pathrs-lite" + "github.com/cyphar/filepath-securejoin/pathrs-lite/procfs" +) + +func procfsOpenRoot(proc *procfs.Handle, subpath string, flags int) (*os.File, error) { + handle, err := proc.OpenRoot(subpath) + if err != nil { + return nil, err + } + defer handle.Close() + + return pathrs.Reopen(handle, flags) +} + +// WriteSysctls sets the given sysctls to the requested values. +func WriteSysctls(sysctls map[string]string) error { + // We are going to write multiple sysctls, which require writing to an + // unmasked procfs which is not going to be cached. To avoid creating a new + // procfs instance for each one, just allocate one handle for all of them. + proc, err := procfs.OpenUnsafeProcRoot() + if err != nil { + return err + } + defer proc.Close() + + for key, value := range sysctls { + keyPath := strings.ReplaceAll(key, ".", "/") + + sysctlFile, err := procfsOpenRoot(proc, "sys/"+keyPath, unix.O_WRONLY|unix.O_TRUNC|unix.O_CLOEXEC) + if err != nil { + return fmt.Errorf("open sysctl %s file: %w", key, err) + } + defer sysctlFile.Close() + + n, err := io.WriteString(sysctlFile, value) + if n != len(value) && err == nil { + err = fmt.Errorf("short write to file (%d bytes != %d bytes)", n, len(value)) + } + if err != nil { + return fmt.Errorf("failed to write sysctl %s = %q: %w", key, value, err) + } + } + return nil +} diff --git a/internal/sys/verify_inode_unix.go b/internal/sys/verify_inode_unix.go new file mode 100644 index 00000000000..d5019db57e0 --- /dev/null +++ b/internal/sys/verify_inode_unix.go @@ -0,0 +1,30 @@ +package sys + +import ( + "fmt" + "os" + "runtime" + + "golang.org/x/sys/unix" +) + +// VerifyInodeFunc is the callback passed to [VerifyInode] to check if the +// inode is the expected type (and on the correct filesystem type, in the case +// of filesystem-specific inodes). +type VerifyInodeFunc func(stat *unix.Stat_t, statfs *unix.Statfs_t) error + +// VerifyInode verifies that the underlying inode for the given file matches an +// expected inode type (possibly on a particular kind of filesystem). This is +// mainly a wrapper around [VerifyInodeFunc]. +func VerifyInode(file *os.File, checkFunc VerifyInodeFunc) error { + var stat unix.Stat_t + if err := unix.Fstat(int(file.Fd()), &stat); err != nil { + return fmt.Errorf("fstat %q: %w", file.Name(), err) + } + var statfs unix.Statfs_t + if err := unix.Fstatfs(int(file.Fd()), &statfs); err != nil { + return fmt.Errorf("fstatfs %q: %w", file.Name(), err) + } + runtime.KeepAlive(file) + return checkFunc(&stat, &statfs) +} diff --git a/internal/third_party/selinux/.codespellrc b/internal/third_party/selinux/.codespellrc new file mode 100644 index 00000000000..8f0866a3e67 --- /dev/null +++ b/internal/third_party/selinux/.codespellrc @@ -0,0 +1,2 @@ +[codespell] +skip = ./.git,./go.sum,./go-selinux/testdata diff --git a/internal/third_party/selinux/.github/dependabot.yml b/internal/third_party/selinux/.github/dependabot.yml new file mode 100644 index 00000000000..b534a2b9ff0 --- /dev/null +++ b/internal/third_party/selinux/.github/dependabot.yml @@ -0,0 +1,10 @@ +# Please see the documentation for all configuration options: +# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + # Dependencies listed in .github/workflows/*.yml + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" diff --git a/internal/third_party/selinux/.github/workflows/validate.yml b/internal/third_party/selinux/.github/workflows/validate.yml new file mode 100644 index 00000000000..fab1cb49422 --- /dev/null +++ b/internal/third_party/selinux/.github/workflows/validate.yml @@ -0,0 +1,163 @@ +name: validate +on: + push: + tags: + - v* + branches: + - master + pull_request: + +jobs: + + commit: + runs-on: ubuntu-24.04 + # Only check commits on pull requests. + if: github.event_name == 'pull_request' + steps: + - name: get pr commits + id: 'get-pr-commits' + uses: tim-actions/get-pr-commits@v1.3.1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + + - name: check subject line length + uses: tim-actions/commit-message-checker-with-regex@v0.3.2 + with: + commits: ${{ steps.get-pr-commits.outputs.commits }} + pattern: '^.{0,72}(\n.*)*$' + error: 'Subject too long (max 72)' + + lint: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-go@v6 + with: + go-version: 1.24.x + - uses: golangci/golangci-lint-action@v7 + with: + version: v2.0 + + codespell: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v5 + - name: install deps + # Version of codespell bundled with Ubuntu is way old, so use pip. + run: pip install codespell + - name: run codespell + run: codespell + + cross: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v5 + - name: cross + run: make build-cross + + test-stubs: + runs-on: macos-latest + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-go@v6 + with: + go-version: 1.24.x + - uses: golangci/golangci-lint-action@v7 + with: + version: v2.0 + - name: test-stubs + run: make test + + test: + strategy: + fail-fast: false + matrix: + go-version: [1.19.x, 1.23.x, 1.24.x] + race: ["-race", ""] + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v5 + + - name: install go ${{ matrix.go-version }} + uses: actions/setup-go@v6 + with: + go-version: ${{ matrix.go-version }} + + - name: build + run: make BUILDFLAGS="${{ matrix.race }}" build + + - name: test + run: make TESTFLAGS="${{ matrix.race }}" test + + vm: + name: "VM" + strategy: + fail-fast: false + matrix: + template: + - template://almalinux-8 + - template://centos-stream-9 + - template://fedora + - template://experimental/opensuse-tumbleweed + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v5 + + - name: "Install Lima" + uses: lima-vm/lima-actions/setup@v1 + id: lima-actions-setup + + - name: "Cache ~/.cache/lima" + uses: actions/cache@v4 + with: + path: ~/.cache/lima + key: lima-${{ steps.lima-actions-setup.outputs.version }}-${{ matrix.template }} + + - name: "Start VM" + # --plain is set to disable file sharing, port forwarding, built-in containerd, etc. for faster start up + run: limactl start --plain --name=default ${{ matrix.template }} + + - name: "Initialize VM" + run: | + set -eux -o pipefail + # Sync the current directory to /tmp/selinux in the guest + limactl cp -r . default:/tmp/selinux + # Install packages + if lima command -v dnf >/dev/null; then + lima sudo dnf install --setopt=install_weak_deps=false --setopt=tsflags=nodocs -y git-core make golang + elif lima command -v zypper >/dev/null; then + lima sudo zypper install -y git make go + else + echo >&2 "Unsupported distribution" + exit 1 + fi + + - name: "make test" + continue-on-error: true + run: lima make -C /tmp/selinux test + + - name: "32-bit test" + continue-on-error: true + run: lima make -C /tmp/selinux GOARCH=386 test + + # https://github.com/opencontainers/selinux/issues/222 + # https://github.com/opencontainers/selinux/issues/225 + - name: "racy test" + continue-on-error: true + run: lima bash -c 'cd /tmp/selinux && go test -timeout 10m -count 100000 ./go-selinux' + + - name: "Show AVC denials" + run: lima sudo ausearch -m AVC,USER_AVC || true + + all-done: + needs: + - commit + - lint + - codespell + - cross + - test-stubs + - test + - vm + runs-on: ubuntu-24.04 + steps: + - run: echo "All jobs completed" diff --git a/internal/third_party/selinux/.gitignore b/internal/third_party/selinux/.gitignore new file mode 100644 index 00000000000..378eac25d31 --- /dev/null +++ b/internal/third_party/selinux/.gitignore @@ -0,0 +1 @@ +build diff --git a/internal/third_party/selinux/.golangci.yml b/internal/third_party/selinux/.golangci.yml new file mode 100644 index 00000000000..b1b98925140 --- /dev/null +++ b/internal/third_party/selinux/.golangci.yml @@ -0,0 +1,44 @@ +version: "2" + +formatters: + enable: + - gofumpt + +linters: + enable: + # - copyloopvar # Detects places where loop variables are copied. TODO enable for Go 1.22+ + - dupword # Detects duplicate words. + - errorlint # Detects code that may cause problems with Go 1.13 error wrapping. + - gocritic # Metalinter; detects bugs, performance, and styling issues. + - gosec # Detects security problems. + - misspell # Detects commonly misspelled English words in comments. + - nilerr # Detects code that returns nil even if it checks that the error is not nil. + - nolintlint # Detects ill-formed or insufficient nolint directives. + - prealloc # Detects slice declarations that could potentially be pre-allocated. + - predeclared # Detects code that shadows one of Go's predeclared identifiers + - revive # Metalinter; drop-in replacement for golint. + - thelper # Detects test helpers without t.Helper(). + - tparallel # Detects inappropriate usage of t.Parallel(). + - unconvert # Detects unnecessary type conversions. + - usetesting # Reports uses of functions with replacement inside the testing package. + settings: + govet: + enable-all: true + settings: + shadow: + strict: true + exclusions: + generated: strict + presets: + - comments + - common-false-positives + - legacy + - std-error-handling + rules: + - linters: + - govet + text: '^shadow: declaration of "err" shadows declaration' + +issues: + max-issues-per-linter: 0 + max-same-issues: 0 diff --git a/internal/third_party/selinux/CODEOWNERS b/internal/third_party/selinux/CODEOWNERS new file mode 100644 index 00000000000..14392178a11 --- /dev/null +++ b/internal/third_party/selinux/CODEOWNERS @@ -0,0 +1 @@ +* @kolyshkin @mrunalp @rhatdan @runcom @thajeztah diff --git a/internal/third_party/selinux/CONTRIBUTING.md b/internal/third_party/selinux/CONTRIBUTING.md new file mode 100644 index 00000000000..dc3ff6a516e --- /dev/null +++ b/internal/third_party/selinux/CONTRIBUTING.md @@ -0,0 +1,119 @@ +## Contribution Guidelines + +### Security issues + +If you are reporting a security issue, do not create an issue or file a pull +request on GitHub. Instead, disclose the issue responsibly by sending an email +to security@opencontainers.org (which is inhabited only by the maintainers of +the various OCI projects). + +### Pull requests are always welcome + +We are always thrilled to receive pull requests, and do our best to +process them as fast as possible. Not sure if that typo is worth a pull +request? Do it! We will appreciate it. + +If your pull request is not accepted on the first try, don't be +discouraged! If there's a problem with the implementation, hopefully you +received feedback on what to improve. + +We're trying very hard to keep the project lean and focused. We don't want it +to do everything for everybody. This means that we might decide against +incorporating a new feature. + + +### Conventions + +Fork the repo and make changes on your fork in a feature branch. +For larger bugs and enhancements, consider filing a leader issue or mailing-list thread for discussion that is independent of the implementation. +Small changes or changes that have been discussed on the project mailing list may be submitted without a leader issue. + +If the project has a test suite, submit unit tests for your changes. Take a +look at existing tests for inspiration. Run the full test suite on your branch +before submitting a pull request. + +Update the documentation when creating or modifying features. Test +your documentation changes for clarity, concision, and correctness, as +well as a clean documentation build. See ``docs/README.md`` for more +information on building the docs and how docs get released. + +Write clean code. Universally formatted code promotes ease of writing, reading, +and maintenance. Always run `gofmt -s -w file.go` on each changed file before +committing your changes. Most editors have plugins that do this automatically. + +Pull requests descriptions should be as clear as possible and include a +reference to all the issues that they address. + +Commit messages must start with a capitalized and short summary +written in the imperative, followed by an optional, more detailed +explanatory text which is separated from the summary by an empty line. + +Code review comments may be added to your pull request. Discuss, then make the +suggested modifications and push additional commits to your feature branch. Be +sure to post a comment after pushing. The new commits will show up in the pull +request automatically, but the reviewers will not be notified unless you +comment. + +Before the pull request is merged, make sure that you squash your commits into +logical units of work using `git rebase -i` and `git push -f`. After every +commit the test suite (if any) should be passing. Include documentation changes +in the same commit so that a revert would remove all traces of the feature or +fix. + +Commits that fix or close an issue should include a reference like `Closes #XXX` +or `Fixes #XXX`, which will automatically close the issue when merged. + +### Sign your work + +The sign-off is a simple line at the end of the explanation for the +patch, which certifies that you wrote it or otherwise have the right to +pass it on as an open-source patch. The rules are pretty simple: if you +can certify the below (from +[developercertificate.org](http://developercertificate.org/)): + +``` +Developer Certificate of Origin +Version 1.1 + +Copyright (C) 2004, 2006 The Linux Foundation and its contributors. +660 York Street, Suite 102, +San Francisco, CA 94110 USA + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + + +Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. +``` + +then you just add a line to every git commit message: + + Signed-off-by: Joe Smith + +using your real name (sorry, no pseudonyms or anonymous contributions.) + +You can add the sign off when creating the git commit via `git commit -s`. diff --git a/internal/third_party/selinux/LICENSE b/internal/third_party/selinux/LICENSE new file mode 100644 index 00000000000..8dada3edaf5 --- /dev/null +++ b/internal/third_party/selinux/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/internal/third_party/selinux/MAINTAINERS b/internal/third_party/selinux/MAINTAINERS new file mode 100644 index 00000000000..748c18b4cd3 --- /dev/null +++ b/internal/third_party/selinux/MAINTAINERS @@ -0,0 +1,5 @@ +Antonio Murdaca (@runcom) +Daniel J Walsh (@rhatdan) +Mrunal Patel (@mrunalp) +Sebastiaan van Stijn (@thaJeztah) +Kirill Kolyshikin (@kolyshkin) diff --git a/internal/third_party/selinux/Makefile b/internal/third_party/selinux/Makefile new file mode 100644 index 00000000000..f7b9c3dac56 --- /dev/null +++ b/internal/third_party/selinux/Makefile @@ -0,0 +1,37 @@ +GO ?= go + +all: build build-cross + +define go-build + GOOS=$(1) GOARCH=$(2) $(GO) build ${BUILDFLAGS} ./... +endef + +.PHONY: build +build: + $(call go-build,linux,amd64) + +.PHONY: build-cross +build-cross: + $(call go-build,linux,386) + $(call go-build,linux,arm) + $(call go-build,linux,arm64) + $(call go-build,linux,ppc64le) + $(call go-build,linux,s390x) + $(call go-build,linux,mips64le) + $(call go-build,linux,riscv64) + $(call go-build,windows,amd64) + $(call go-build,windows,386) + + +.PHONY: test +test: + $(GO) test -timeout 3m ${TESTFLAGS} -v ./... + +.PHONY: lint +lint: + golangci-lint run + +.PHONY: vendor +vendor: + $(GO) mod tidy + $(GO) mod verify diff --git a/internal/third_party/selinux/README.md b/internal/third_party/selinux/README.md new file mode 100644 index 00000000000..cd6a60f805d --- /dev/null +++ b/internal/third_party/selinux/README.md @@ -0,0 +1,23 @@ +# selinux + +[![GoDoc](https://godoc.org/github.com/opencontainers/selinux?status.svg)](https://godoc.org/github.com/opencontainers/selinux) [![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/selinux)](https://goreportcard.com/report/github.com/opencontainers/selinux) [![Build Status](https://travis-ci.org/opencontainers/selinux.svg?branch=master)](https://travis-ci.org/opencontainers/selinux) + +Common SELinux package used across the container ecosystem. + +## Usage + +Prior to v1.8.0, the `selinux` build tag had to be used to enable selinux functionality for compiling consumers of this project. +Starting with v1.8.0, the `selinux` build tag is no longer needed. + +For complete documentation, see [godoc](https://godoc.org/github.com/opencontainers/selinux). + +## Code of Conduct + +Participation in the OpenContainers community is governed by [OpenContainer's Code of Conduct][code-of-conduct]. + +## Security + +If you find an issue, please follow the [security][security] protocol to report it. + +[security]: https://github.com/opencontainers/org/blob/master/SECURITY.md +[code-of-conduct]: https://github.com/opencontainers/org/blob/master/CODE_OF_CONDUCT.md diff --git a/internal/third_party/selinux/go-selinux/doc.go b/internal/third_party/selinux/go-selinux/doc.go new file mode 100644 index 00000000000..57a15c9a11e --- /dev/null +++ b/internal/third_party/selinux/go-selinux/doc.go @@ -0,0 +1,13 @@ +/* +Package selinux provides a high-level interface for interacting with selinux. + +Usage: + + import "github.com/opencontainers/selinux/go-selinux" + + // Ensure that selinux is enforcing mode. + if selinux.EnforceMode() != selinux.Enforcing { + selinux.SetEnforceMode(selinux.Enforcing) + } +*/ +package selinux diff --git a/internal/third_party/selinux/go-selinux/label/label.go b/internal/third_party/selinux/go-selinux/label/label.go new file mode 100644 index 00000000000..884a8b80593 --- /dev/null +++ b/internal/third_party/selinux/go-selinux/label/label.go @@ -0,0 +1,48 @@ +package label + +import ( + "fmt" + + "github.com/opencontainers/selinux/go-selinux" +) + +// Init initialises the labeling system +func Init() { + _ = selinux.GetEnabled() +} + +// FormatMountLabel returns a string to be used by the mount command. Using +// the SELinux `context` mount option. Changing labels of files on mount +// points with this option can never be changed. +// FormatMountLabel returns a string to be used by the mount command. +// The format of this string will be used to alter the labeling of the mountpoint. +// The string returned is suitable to be used as the options field of the mount command. +// If you need to have additional mount point options, you can pass them in as +// the first parameter. Second parameter is the label that you wish to apply +// to all content in the mount point. +func FormatMountLabel(src, mountLabel string) string { + return FormatMountLabelByType(src, mountLabel, "context") +} + +// FormatMountLabelByType returns a string to be used by the mount command. +// Allow caller to specify the mount options. For example using the SELinux +// `fscontext` mount option would allow certain container processes to change +// labels of files created on the mount points, where as `context` option does +// not. +// FormatMountLabelByType returns a string to be used by the mount command. +// The format of this string will be used to alter the labeling of the mountpoint. +// The string returned is suitable to be used as the options field of the mount command. +// If you need to have additional mount point options, you can pass them in as +// the first parameter. Second parameter is the label that you wish to apply +// to all content in the mount point. +func FormatMountLabelByType(src, mountLabel, contextType string) string { + if mountLabel != "" { + switch src { + case "": + src = fmt.Sprintf("%s=%q", contextType, mountLabel) + default: + src = fmt.Sprintf("%s,%s=%q", src, contextType, mountLabel) + } + } + return src +} diff --git a/internal/third_party/selinux/go-selinux/label/label_linux.go b/internal/third_party/selinux/go-selinux/label/label_linux.go new file mode 100644 index 00000000000..95f29e21f4e --- /dev/null +++ b/internal/third_party/selinux/go-selinux/label/label_linux.go @@ -0,0 +1,136 @@ +package label + +import ( + "errors" + "fmt" + "strings" + + "github.com/opencontainers/selinux/go-selinux" +) + +// Valid Label Options +var validOptions = map[string]bool{ + "disable": true, + "type": true, + "filetype": true, + "user": true, + "role": true, + "level": true, +} + +var ErrIncompatibleLabel = errors.New("bad SELinux option: z and Z can not be used together") + +// InitLabels returns the process label and file labels to be used within +// the container. A list of options can be passed into this function to alter +// the labels. The labels returned will include a random MCS String, that is +// guaranteed to be unique. +// If the disabled flag is passed in, the process label will not be set, but the mount label will be set +// to the container_file label with the maximum category. This label is not usable by any confined label. +func InitLabels(options []string) (plabel string, mlabel string, retErr error) { + if !selinux.GetEnabled() { + return "", "", nil + } + processLabel, mountLabel := selinux.ContainerLabels() + if processLabel != "" { + defer func() { + if retErr != nil { + selinux.ReleaseLabel(mountLabel) + } + }() + pcon, err := selinux.NewContext(processLabel) + if err != nil { + return "", "", err + } + mcsLevel := pcon["level"] + mcon, err := selinux.NewContext(mountLabel) + if err != nil { + return "", "", err + } + for _, opt := range options { + if opt == "disable" { + selinux.ReleaseLabel(mountLabel) + return "", selinux.PrivContainerMountLabel(), nil + } + if i := strings.Index(opt, ":"); i == -1 { + return "", "", fmt.Errorf("bad label option %q, valid options 'disable' or \n'user, role, level, type, filetype' followed by ':' and a value", opt) + } + con := strings.SplitN(opt, ":", 2) + if !validOptions[con[0]] { + return "", "", fmt.Errorf("bad label option %q, valid options 'disable, user, role, level, type, filetype'", con[0]) + } + if con[0] == "filetype" { + mcon["type"] = con[1] + continue + } + pcon[con[0]] = con[1] + if con[0] == "level" || con[0] == "user" { + mcon[con[0]] = con[1] + } + } + if pcon.Get() != processLabel { + if pcon["level"] != mcsLevel { + selinux.ReleaseLabel(processLabel) + } + processLabel = pcon.Get() + selinux.ReserveLabel(processLabel) + } + mountLabel = mcon.Get() + } + return processLabel, mountLabel, nil +} + +// SetFileLabel modifies the "path" label to the specified file label +func SetFileLabel(path string, fileLabel string) error { + if !selinux.GetEnabled() || fileLabel == "" { + return nil + } + return selinux.SetFileLabel(path, fileLabel) +} + +// SetFileCreateLabel tells the kernel the label for all files to be created +func SetFileCreateLabel(fileLabel string) error { + if !selinux.GetEnabled() { + return nil + } + return selinux.SetFSCreateLabel(fileLabel) +} + +// Relabel changes the label of path and all the entries beneath the path. +// It changes the MCS label to s0 if shared is true. +// This will allow all containers to share the content. +// +// The path itself is guaranteed to be relabeled last. +func Relabel(path string, fileLabel string, shared bool) error { + if !selinux.GetEnabled() || fileLabel == "" { + return nil + } + + if shared { + c, err := selinux.NewContext(fileLabel) + if err != nil { + return err + } + + c["level"] = "s0" + fileLabel = c.Get() + } + return selinux.Chcon(path, fileLabel, true) +} + +// Validate checks that the label does not include unexpected options +func Validate(label string) error { + if strings.Contains(label, "z") && strings.Contains(label, "Z") { + return ErrIncompatibleLabel + } + return nil +} + +// RelabelNeeded checks whether the user requested a relabel +func RelabelNeeded(label string) bool { + return strings.Contains(label, "z") || strings.Contains(label, "Z") +} + +// IsShared checks that the label includes a "shared" mark +func IsShared(label string) bool { + return strings.Contains(label, "z") +} diff --git a/internal/third_party/selinux/go-selinux/label/label_linux_test.go b/internal/third_party/selinux/go-selinux/label/label_linux_test.go new file mode 100644 index 00000000000..e25ead7951b --- /dev/null +++ b/internal/third_party/selinux/go-selinux/label/label_linux_test.go @@ -0,0 +1,130 @@ +package label + +import ( + "errors" + "os" + "testing" + + "github.com/opencontainers/selinux/go-selinux" +) + +func needSELinux(t *testing.T) { + t.Helper() + if !selinux.GetEnabled() { + t.Skip("SELinux not enabled, skipping.") + } +} + +func TestInit(t *testing.T) { + needSELinux(t) + + var testNull []string + _, _, err := InitLabels(testNull) + if err != nil { + t.Fatalf("InitLabels failed: %v:", err) + } + testDisabled := []string{"disable"} + if selinux.ROFileLabel() == "" { + t.Fatal("selinux.ROFileLabel: empty") + } + plabel, mlabel, err := InitLabels(testDisabled) + if err != nil { + t.Fatalf("InitLabels(disabled) failed: %v", err) + } + if plabel != "" { + t.Fatalf("InitLabels(disabled): %q not empty", plabel) + } + if mlabel != "system_u:object_r:container_file_t:s0:c1022,c1023" { + t.Fatalf("InitLabels Disabled mlabel Failed, %s", mlabel) + } + + testUser := []string{"user:user_u", "role:user_r", "type:user_t", "level:s0:c1,c15"} + plabel, mlabel, err = InitLabels(testUser) + if err != nil { + t.Fatalf("InitLabels(user) failed: %v", err) + } + if plabel != "user_u:user_r:user_t:s0:c1,c15" || (mlabel != "user_u:object_r:container_file_t:s0:c1,c15" && mlabel != "user_u:object_r:svirt_sandbox_file_t:s0:c1,c15") { + t.Fatalf("InitLabels(user) failed (plabel=%q, mlabel=%q)", plabel, mlabel) + } + + testBadData := []string{"user", "role:user_r", "type:user_t", "level:s0:c1,c15"} + if _, _, err = InitLabels(testBadData); err == nil { + t.Fatal("InitLabels(bad): expected error, got nil") + } +} + +func TestRelabel(t *testing.T) { + needSELinux(t) + + testdir := t.TempDir() + label := "system_u:object_r:container_file_t:s0:c1,c2" + if err := Relabel(testdir, "", true); err != nil { + t.Fatalf("Relabel with no label failed: %v", err) + } + if err := Relabel(testdir, label, true); err != nil { + t.Fatalf("Relabel shared failed: %v", err) + } + if err := Relabel(testdir, label, false); err != nil { + t.Fatalf("Relabel unshared failed: %v", err) + } + if err := Relabel("/etc", label, false); err == nil { + t.Fatalf("Relabel /etc succeeded") + } + if err := Relabel("/", label, false); err == nil { + t.Fatalf("Relabel / succeeded") + } + if err := Relabel("/usr", label, false); err == nil { + t.Fatalf("Relabel /usr succeeded") + } + if err := Relabel("/usr/", label, false); err == nil { + t.Fatalf("Relabel /usr/ succeeded") + } + if err := Relabel("/etc/passwd", label, false); err == nil { + t.Fatalf("Relabel /etc/passwd succeeded") + } + if home := os.Getenv("HOME"); home != "" { + if err := Relabel(home, label, false); err == nil { + t.Fatalf("Relabel %s succeeded", home) + } + } +} + +func TestValidate(t *testing.T) { + if err := Validate("zZ"); !errors.Is(err, ErrIncompatibleLabel) { + t.Fatalf("Expected incompatible error, got %v", err) + } + if err := Validate("Z"); err != nil { + t.Fatal(err) + } + if err := Validate("z"); err != nil { + t.Fatal(err) + } + if err := Validate(""); err != nil { + t.Fatal(err) + } +} + +func TestIsShared(t *testing.T) { + if shared := IsShared("Z"); shared { + t.Fatalf("Expected label `Z` to not be shared, got %v", shared) + } + if shared := IsShared("z"); !shared { + t.Fatalf("Expected label `z` to be shared, got %v", shared) + } + if shared := IsShared("Zz"); !shared { + t.Fatalf("Expected label `Zz` to be shared, got %v", shared) + } +} + +func TestFileLabel(t *testing.T) { + needSELinux(t) + + testUser := []string{"filetype:test_file_t", "level:s0:c1,c15"} + _, mlabel, err := InitLabels(testUser) + if err != nil { + t.Fatalf("InitLabels(user) failed: %v", err) + } + if mlabel != "system_u:object_r:test_file_t:s0:c1,c15" { + t.Fatalf("InitLabels(filetype) failed: %v", err) + } +} diff --git a/internal/third_party/selinux/go-selinux/label/label_stub.go b/internal/third_party/selinux/go-selinux/label/label_stub.go new file mode 100644 index 00000000000..7a54afc5e6d --- /dev/null +++ b/internal/third_party/selinux/go-selinux/label/label_stub.go @@ -0,0 +1,44 @@ +//go:build !linux +// +build !linux + +package label + +// InitLabels returns the process label and file labels to be used within +// the container. A list of options can be passed into this function to alter +// the labels. +func InitLabels([]string) (string, string, error) { + return "", "", nil +} + +func SetFileLabel(string, string) error { + return nil +} + +func SetFileCreateLabel(string) error { + return nil +} + +func Relabel(string, string, bool) error { + return nil +} + +// DisableSecOpt returns a security opt that can disable labeling +// support for future container processes +func DisableSecOpt() []string { + return nil +} + +// Validate checks that the label does not include unexpected options +func Validate(string) error { + return nil +} + +// RelabelNeeded checks whether the user requested a relabel +func RelabelNeeded(string) bool { + return false +} + +// IsShared checks that the label includes a "shared" mark +func IsShared(string) bool { + return false +} diff --git a/internal/third_party/selinux/go-selinux/label/label_stub_test.go b/internal/third_party/selinux/go-selinux/label/label_stub_test.go new file mode 100644 index 00000000000..e92cc8b9455 --- /dev/null +++ b/internal/third_party/selinux/go-selinux/label/label_stub_test.go @@ -0,0 +1,76 @@ +//go:build !linux +// +build !linux + +package label + +import ( + "testing" + + "github.com/opencontainers/selinux/go-selinux" +) + +const testLabel = "system_u:object_r:container_file_t:s0:c1,c2" + +func TestInit(t *testing.T) { + var testNull []string + _, _, err := InitLabels(testNull) + if err != nil { + t.Log("InitLabels Failed") + t.Fatal(err) + } + testDisabled := []string{"disable"} + if selinux.ROFileLabel() != "" { + t.Error("selinux.ROFileLabel Failed") + } + plabel, mlabel, err := InitLabels(testDisabled) + if err != nil { + t.Log("InitLabels Disabled Failed") + t.Fatal(err) + } + if plabel != "" { + t.Fatal("InitLabels Disabled Failed") + } + if mlabel != "" { + t.Fatal("InitLabels Disabled mlabel Failed") + } + testUser := []string{"user:user_u", "role:user_r", "type:user_t", "level:s0:c1,c15"} + _, _, err = InitLabels(testUser) + if err != nil { + t.Log("InitLabels User Failed") + t.Fatal(err) + } +} + +func TestRelabel(t *testing.T) { + if err := Relabel("/etc", testLabel, false); err != nil { + t.Fatalf("Relabel /etc succeeded") + } +} + +func TestCheckLabelCompile(t *testing.T) { + if _, _, err := InitLabels(nil); err != nil { + t.Fatal(err) + } + + tmpDir := t.TempDir() + + if err := SetFileLabel(tmpDir, "foobar"); err != nil { + t.Fatal(err) + } + + if err := SetFileCreateLabel("foobar"); err != nil { + t.Fatal(err) + } + + DisableSecOpt() + + if err := Validate("foobar"); err != nil { + t.Fatal(err) + } + if relabel := RelabelNeeded("foobar"); relabel { + t.Fatal("Relabel failed") + } + if shared := IsShared("foobar"); shared { + t.Fatal("isshared failed") + } +} diff --git a/internal/third_party/selinux/go-selinux/label/label_test.go b/internal/third_party/selinux/go-selinux/label/label_test.go new file mode 100644 index 00000000000..fb172f3ff23 --- /dev/null +++ b/internal/third_party/selinux/go-selinux/label/label_test.go @@ -0,0 +1,35 @@ +package label + +import "testing" + +func TestFormatMountLabel(t *testing.T) { + expected := `context="foobar"` + if test := FormatMountLabel("", "foobar"); test != expected { + t.Fatalf("Format failed. Expected %s, got %s", expected, test) + } + + expected = `src,context="foobar"` + if test := FormatMountLabel("src", "foobar"); test != expected { + t.Fatalf("Format failed. Expected %s, got %s", expected, test) + } + + expected = `src` + if test := FormatMountLabel("src", ""); test != expected { + t.Fatalf("Format failed. Expected %s, got %s", expected, test) + } + + expected = `fscontext="foobar"` + if test := FormatMountLabelByType("", "foobar", "fscontext"); test != expected { + t.Fatalf("Format failed. Expected %s, got %s", expected, test) + } + + expected = `src,fscontext="foobar"` + if test := FormatMountLabelByType("src", "foobar", "fscontext"); test != expected { + t.Fatalf("Format failed. Expected %s, got %s", expected, test) + } + + expected = `src` + if test := FormatMountLabelByType("src", "", "rootcontext"); test != expected { + t.Fatalf("Format failed. Expected %s, got %s", expected, test) + } +} diff --git a/internal/third_party/selinux/go-selinux/selinux.go b/internal/third_party/selinux/go-selinux/selinux.go new file mode 100644 index 00000000000..15150d47528 --- /dev/null +++ b/internal/third_party/selinux/go-selinux/selinux.go @@ -0,0 +1,322 @@ +package selinux + +import ( + "errors" +) + +const ( + // Enforcing constant indicate SELinux is in enforcing mode + Enforcing = 1 + // Permissive constant to indicate SELinux is in permissive mode + Permissive = 0 + // Disabled constant to indicate SELinux is disabled + Disabled = -1 + // maxCategory is the maximum number of categories used within containers + maxCategory = 1024 + // DefaultCategoryRange is the upper bound on the category range + DefaultCategoryRange = uint32(maxCategory) +) + +var ( + // ErrMCSAlreadyExists is returned when trying to allocate a duplicate MCS. + ErrMCSAlreadyExists = errors.New("MCS label already exists") + // ErrEmptyPath is returned when an empty path has been specified. + ErrEmptyPath = errors.New("empty path") + + // ErrInvalidLabel is returned when an invalid label is specified. + ErrInvalidLabel = errors.New("invalid Label") + + // InvalidLabel is returned when an invalid label is specified. + // + // Deprecated: use [ErrInvalidLabel]. + InvalidLabel = ErrInvalidLabel + + // ErrIncomparable is returned two levels are not comparable + ErrIncomparable = errors.New("incomparable levels") + // ErrLevelSyntax is returned when a sensitivity or category do not have correct syntax in a level + ErrLevelSyntax = errors.New("invalid level syntax") + + // ErrContextMissing is returned if a requested context is not found in a file. + ErrContextMissing = errors.New("context does not have a match") + // ErrVerifierNil is returned when a context verifier function is nil. + ErrVerifierNil = errors.New("verifier function is nil") + + // ErrNotTGLeader is returned by [SetKeyLabel] if the calling thread + // is not the thread group leader. + ErrNotTGLeader = errors.New("calling thread is not the thread group leader") + + // CategoryRange allows the upper bound on the category range to be adjusted + CategoryRange = DefaultCategoryRange + + privContainerMountLabel string +) + +// Context is a representation of the SELinux label broken into 4 parts +type Context map[string]string + +// SetDisabled disables SELinux support for the package +func SetDisabled() { + setDisabled() +} + +// GetEnabled returns whether SELinux is currently enabled. +func GetEnabled() bool { + return getEnabled() +} + +// ClassIndex returns the int index for an object class in the loaded policy, +// or -1 and an error +func ClassIndex(class string) (int, error) { + return classIndex(class) +} + +// SetFileLabel sets the SELinux label for this path, following symlinks, +// or returns an error. +func SetFileLabel(fpath string, label string) error { + return setFileLabel(fpath, label) +} + +// LsetFileLabel sets the SELinux label for this path, not following symlinks, +// or returns an error. +func LsetFileLabel(fpath string, label string) error { + return lSetFileLabel(fpath, label) +} + +// FileLabel returns the SELinux label for this path, following symlinks, +// or returns an error. +func FileLabel(fpath string) (string, error) { + return fileLabel(fpath) +} + +// LfileLabel returns the SELinux label for this path, not following symlinks, +// or returns an error. +func LfileLabel(fpath string) (string, error) { + return lFileLabel(fpath) +} + +// SetFSCreateLabel tells the kernel what label to use for all file system objects +// created by this task. +// Set the label to an empty string to return to the default label. Calls to SetFSCreateLabel +// should be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() until file system +// objects created by this task are finished to guarantee another goroutine does not migrate +// to the current thread before execution is complete. +func SetFSCreateLabel(label string) error { + return setFSCreateLabel(label) +} + +// FSCreateLabel returns the default label the kernel which the kernel is using +// for file system objects created by this task. "" indicates default. +func FSCreateLabel() (string, error) { + return fsCreateLabel() +} + +// CurrentLabel returns the SELinux label of the current process thread, or an error. +func CurrentLabel() (string, error) { + return currentLabel() +} + +// PidLabel returns the SELinux label of the given pid, or an error. +func PidLabel(pid int) (string, error) { + return pidLabel(pid) +} + +// ExecLabel returns the SELinux label that the kernel will use for any programs +// that are executed by the current process thread, or an error. +func ExecLabel() (string, error) { + return execLabel() +} + +// CanonicalizeContext takes a context string and writes it to the kernel +// the function then returns the context that the kernel will use. Use this +// function to check if two contexts are equivalent +func CanonicalizeContext(val string) (string, error) { + return canonicalizeContext(val) +} + +// ComputeCreateContext requests the type transition from source to target for +// class from the kernel. +func ComputeCreateContext(source string, target string, class string) (string, error) { + return computeCreateContext(source, target, class) +} + +// CalculateGlbLub computes the glb (greatest lower bound) and lub (least upper bound) +// of a source and target range. +// The glblub is calculated as the greater of the low sensitivities and +// the lower of the high sensitivities and the and of each category bitset. +func CalculateGlbLub(sourceRange, targetRange string) (string, error) { + return calculateGlbLub(sourceRange, targetRange) +} + +// SetExecLabel sets the SELinux label that the kernel will use for any programs +// that are executed by the current process thread, or an error. Calls to SetExecLabel +// should be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() until execution +// of the program is finished to guarantee another goroutine does not migrate to the current +// thread before execution is complete. +func SetExecLabel(label string) error { + return writeConThreadSelf("attr/exec", label) +} + +// SetTaskLabel sets the SELinux label for the current thread, or an error. +// This requires the dyntransition permission. Calls to SetTaskLabel should +// be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() to guarantee +// the current thread does not run in a new mislabeled thread. +func SetTaskLabel(label string) error { + return writeConThreadSelf("attr/current", label) +} + +// SetSocketLabel takes a process label and tells the kernel to assign the +// label to the next socket that gets created. Calls to SetSocketLabel +// should be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() until +// the socket is created to guarantee another goroutine does not migrate +// to the current thread before execution is complete. +func SetSocketLabel(label string) error { + return writeConThreadSelf("attr/sockcreate", label) +} + +// SocketLabel retrieves the current socket label setting +func SocketLabel() (string, error) { + return readConThreadSelf("attr/sockcreate") +} + +// PeerLabel retrieves the label of the client on the other side of a socket +func PeerLabel(fd uintptr) (string, error) { + return peerLabel(fd) +} + +// SetKeyLabel takes a process label and tells the kernel to assign the +// label to the next kernel keyring that gets created. +// +// Calls to SetKeyLabel should be wrapped in +// runtime.LockOSThread()/runtime.UnlockOSThread() until the kernel keyring is +// created to guarantee another goroutine does not migrate to the current +// thread before execution is complete. +// +// Only the thread group leader can set key label. +func SetKeyLabel(label string) error { + return setKeyLabel(label) +} + +// KeyLabel retrieves the current kernel keyring label setting +func KeyLabel() (string, error) { + return keyLabel() +} + +// Get returns the Context as a string +func (c Context) Get() string { + return c.get() +} + +// NewContext creates a new Context struct from the specified label +func NewContext(label string) (Context, error) { + return newContext(label) +} + +// ClearLabels clears all reserved labels +func ClearLabels() { + clearLabels() +} + +// ReserveLabel reserves the MLS/MCS level component of the specified label +func ReserveLabel(label string) { + reserveLabel(label) +} + +// MLSEnabled checks if MLS is enabled. +func MLSEnabled() bool { + return isMLSEnabled() +} + +// EnforceMode returns the current SELinux mode Enforcing, Permissive, Disabled +func EnforceMode() int { + return enforceMode() +} + +// SetEnforceMode sets the current SELinux mode Enforcing, Permissive. +// Disabled is not valid, since this needs to be set at boot time. +func SetEnforceMode(mode int) error { + return setEnforceMode(mode) +} + +// DefaultEnforceMode returns the systems default SELinux mode Enforcing, +// Permissive or Disabled. Note this is just the default at boot time. +// EnforceMode tells you the systems current mode. +func DefaultEnforceMode() int { + return defaultEnforceMode() +} + +// ReleaseLabel un-reserves the MLS/MCS Level field of the specified label, +// allowing it to be used by another process. +func ReleaseLabel(label string) { + releaseLabel(label) +} + +// ROFileLabel returns the specified SELinux readonly file label +func ROFileLabel() string { + return roFileLabel() +} + +// KVMContainerLabels returns the default processLabel and mountLabel to be used +// for kvm containers by the calling process. +func KVMContainerLabels() (string, string) { + return kvmContainerLabels() +} + +// InitContainerLabels returns the default processLabel and file labels to be +// used for containers running an init system like systemd by the calling process. +func InitContainerLabels() (string, string) { + return initContainerLabels() +} + +// ContainerLabels returns an allocated processLabel and fileLabel to be used for +// container labeling by the calling process. +func ContainerLabels() (processLabel string, fileLabel string) { + return containerLabels() +} + +// SecurityCheckContext validates that the SELinux label is understood by the kernel +func SecurityCheckContext(val string) error { + return securityCheckContext(val) +} + +// CopyLevel returns a label with the MLS/MCS level from src label replaced on +// the dest label. +func CopyLevel(src, dest string) (string, error) { + return copyLevel(src, dest) +} + +// Chcon changes the fpath file object to the SELinux label. +// If fpath is a directory and recurse is true, then Chcon walks the +// directory tree setting the label. +// +// The fpath itself is guaranteed to be relabeled last. +func Chcon(fpath string, label string, recurse bool) error { + return chcon(fpath, label, recurse) +} + +// DupSecOpt takes an SELinux process label and returns security options that +// can be used to set the SELinux Type and Level for future container processes. +func DupSecOpt(src string) ([]string, error) { + return dupSecOpt(src) +} + +// DisableSecOpt returns a security opt that can be used to disable SELinux +// labeling support for future container processes. +func DisableSecOpt() []string { + return []string{"disable"} +} + +// GetDefaultContextWithLevel gets a single context for the specified SELinux user +// identity that is reachable from the specified scon context. The context is based +// on the per-user /etc/selinux/{SELINUXTYPE}/contexts/users/ if it exists, +// and falls back to the global /etc/selinux/{SELINUXTYPE}/contexts/default_contexts +// file. +func GetDefaultContextWithLevel(user, level, scon string) (string, error) { + return getDefaultContextWithLevel(user, level, scon) +} + +// PrivContainerMountLabel returns mount label for privileged containers +func PrivContainerMountLabel() string { + // Make sure label is initialized. + _ = label("") + return privContainerMountLabel +} diff --git a/internal/third_party/selinux/go-selinux/selinux_linux.go b/internal/third_party/selinux/go-selinux/selinux_linux.go new file mode 100644 index 00000000000..70392d98904 --- /dev/null +++ b/internal/third_party/selinux/go-selinux/selinux_linux.go @@ -0,0 +1,1405 @@ +package selinux + +import ( + "bufio" + "bytes" + "crypto/rand" + "encoding/binary" + "errors" + "fmt" + "io" + "io/fs" + "math/big" + "os" + "os/user" + "path/filepath" + "strconv" + "strings" + "sync" + + "github.com/cyphar/filepath-securejoin/pathrs-lite" + "github.com/cyphar/filepath-securejoin/pathrs-lite/procfs" + "golang.org/x/sys/unix" + + "github.com/opencontainers/selinux/pkg/pwalkdir" +) + +const ( + minSensLen = 2 + contextFile = "/usr/share/containers/selinux/contexts" + selinuxDir = "/etc/selinux/" + selinuxUsersDir = "contexts/users" + defaultContexts = "contexts/default_contexts" + selinuxConfig = selinuxDir + "config" + selinuxfsMount = "/sys/fs/selinux" + selinuxTypeTag = "SELINUXTYPE" + selinuxTag = "SELINUX" + xattrNameSelinux = "security.selinux" +) + +type selinuxState struct { + mcsList map[string]bool + selinuxfs string + selinuxfsOnce sync.Once + enabledSet bool + enabled bool + sync.Mutex +} + +type level struct { + cats *big.Int + sens int +} + +type mlsRange struct { + low *level + high *level +} + +type defaultSECtx struct { + userRdr io.Reader + verifier func(string) error + defaultRdr io.Reader + user, level, scon string +} + +type levelItem byte + +const ( + sensitivity levelItem = 's' + category levelItem = 'c' +) + +var ( + readOnlyFileLabel string + state = selinuxState{ + mcsList: make(map[string]bool), + } + + // for policyRoot() + policyRootOnce sync.Once + policyRootVal string + + // for label() + loadLabelsOnce sync.Once + labels map[string]string +) + +func policyRoot() string { + policyRootOnce.Do(func() { + policyRootVal = filepath.Join(selinuxDir, readConfig(selinuxTypeTag)) + }) + + return policyRootVal +} + +func (s *selinuxState) setEnable(enabled bool) bool { + s.Lock() + defer s.Unlock() + s.enabledSet = true + s.enabled = enabled + return s.enabled +} + +func (s *selinuxState) getEnabled() bool { + s.Lock() + enabled := s.enabled + enabledSet := s.enabledSet + s.Unlock() + if enabledSet { + return enabled + } + + enabled = false + if fs := getSelinuxMountPoint(); fs != "" { + if con, _ := CurrentLabel(); con != "kernel" { + enabled = true + } + } + return s.setEnable(enabled) +} + +// setDisabled disables SELinux support for the package +func setDisabled() { + state.setEnable(false) +} + +func verifySELinuxfsMount(mnt string) bool { + var buf unix.Statfs_t + for { + err := unix.Statfs(mnt, &buf) + if err == nil { + break + } + if err == unix.EAGAIN || err == unix.EINTR { + continue + } + return false + } + + //#nosec G115 -- there is no overflow here. + if uint32(buf.Type) != uint32(unix.SELINUX_MAGIC) { + return false + } + if (buf.Flags & unix.ST_RDONLY) != 0 { + return false + } + + return true +} + +func findSELinuxfs() string { + // fast path: check the default mount first + if verifySELinuxfsMount(selinuxfsMount) { + return selinuxfsMount + } + + // check if selinuxfs is available before going the slow path + fs, err := os.ReadFile("/proc/filesystems") + if err != nil { + return "" + } + if !bytes.Contains(fs, []byte("\tselinuxfs\n")) { + return "" + } + + // slow path: try to find among the mounts + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "" + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for { + mnt := findSELinuxfsMount(scanner) + if mnt == "" { // error or not found + return "" + } + if verifySELinuxfsMount(mnt) { + return mnt + } + } +} + +// findSELinuxfsMount returns a next selinuxfs mount point found, +// if there is one, or an empty string in case of EOF or error. +func findSELinuxfsMount(s *bufio.Scanner) string { + for s.Scan() { + txt := s.Bytes() + // The first field after - is fs type. + // Safe as spaces in mountpoints are encoded as \040 + if !bytes.Contains(txt, []byte(" - selinuxfs ")) { + continue + } + const mPos = 5 // mount point is 5th field + fields := bytes.SplitN(txt, []byte(" "), mPos+1) + if len(fields) < mPos+1 { + continue + } + return string(fields[mPos-1]) + } + + return "" +} + +func (s *selinuxState) getSELinuxfs() string { + s.selinuxfsOnce.Do(func() { + s.selinuxfs = findSELinuxfs() + }) + + return s.selinuxfs +} + +// getSelinuxMountPoint returns the path to the mountpoint of an selinuxfs +// filesystem or an empty string if no mountpoint is found. Selinuxfs is +// a proc-like pseudo-filesystem that exposes the SELinux policy API to +// processes. The existence of an selinuxfs mount is used to determine +// whether SELinux is currently enabled or not. +func getSelinuxMountPoint() string { + return state.getSELinuxfs() +} + +// getEnabled returns whether SELinux is currently enabled. +func getEnabled() bool { + return state.getEnabled() +} + +func readConfig(target string) string { + in, err := os.Open(selinuxConfig) + if err != nil { + return "" + } + defer in.Close() + + scanner := bufio.NewScanner(in) + + for scanner.Scan() { + line := bytes.TrimSpace(scanner.Bytes()) + if len(line) == 0 { + // Skip blank lines + continue + } + if line[0] == ';' || line[0] == '#' { + // Skip comments + continue + } + fields := bytes.SplitN(line, []byte{'='}, 2) + if len(fields) != 2 { + continue + } + if bytes.Equal(fields[0], []byte(target)) { + return string(bytes.Trim(fields[1], `"`)) + } + } + return "" +} + +func readConFd(in *os.File) (string, error) { + data, err := io.ReadAll(in) + if err != nil { + return "", err + } + return string(bytes.TrimSuffix(data, []byte{0})), nil +} + +func writeConFd(out *os.File, val string) error { + var err error + if val != "" { + _, err = out.Write([]byte(val)) + } else { + _, err = out.Write(nil) + } + return err +} + +// openProcThreadSelf is a small wrapper around [OpenThreadSelf] and +// [pathrs.Reopen] to make "one-shot opens" slightly more ergonomic. The +// provided mode must be os.O_* flags to indicate what mode the returned file +// should be opened with (flags like os.O_CREAT and os.O_EXCL are not +// supported). +// +// If no error occurred, the returned handle is guaranteed to be exactly +// /proc/thread-self/ with no tricky mounts or symlinks causing you to +// operate on an unexpected path (with some caveats on pre-openat2 or +// pre-fsopen kernels). +// +// [OpenThreadSelf]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs#Handle.OpenThreadSelf +func openProcThreadSelf(subpath string, mode int) (*os.File, procfs.ProcThreadSelfCloser, error) { + if subpath == "" { + return nil, nil, ErrEmptyPath + } + + proc, err := procfs.OpenProcRoot() + if err != nil { + return nil, nil, err + } + defer proc.Close() + + handle, closer, err := proc.OpenThreadSelf(subpath) + if err != nil { + return nil, nil, fmt.Errorf("open /proc/thread-self/%s handle: %w", subpath, err) + } + defer handle.Close() // we will return a re-opened handle + + file, err := pathrs.Reopen(handle, mode) + if err != nil { + closer() + return nil, nil, fmt.Errorf("reopen /proc/thread-self/%s handle (%#x): %w", subpath, mode, err) + } + return file, closer, nil +} + +// Read the contents of /proc/thread-self/. +func readConThreadSelf(fpath string) (string, error) { + in, closer, err := openProcThreadSelf(fpath, os.O_RDONLY|unix.O_CLOEXEC) + if err != nil { + return "", err + } + defer closer() + defer in.Close() + + return readConFd(in) +} + +// Write to /proc/thread-self/. +func writeConThreadSelf(fpath, val string) error { + if val == "" { + if !getEnabled() { + return nil + } + } + + out, closer, err := openProcThreadSelf(fpath, os.O_WRONLY|unix.O_CLOEXEC) + if err != nil { + return err + } + defer closer() + defer out.Close() + + return writeConFd(out, val) +} + +// openProcSelf is a small wrapper around [OpenSelf] and [pathrs.Reopen] to +// make "one-shot opens" slightly more ergonomic. The provided mode must be +// os.O_* flags to indicate what mode the returned file should be opened with +// (flags like os.O_CREAT and os.O_EXCL are not supported). +// +// If no error occurred, the returned handle is guaranteed to be exactly +// /proc/self/ with no tricky mounts or symlinks causing you to +// operate on an unexpected path (with some caveats on pre-openat2 or +// pre-fsopen kernels). +// +// [OpenSelf]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs#Handle.OpenSelf +func openProcSelf(subpath string, mode int) (*os.File, error) { + if subpath == "" { + return nil, ErrEmptyPath + } + + proc, err := procfs.OpenProcRoot() + if err != nil { + return nil, err + } + defer proc.Close() + + handle, err := proc.OpenSelf(subpath) + if err != nil { + return nil, fmt.Errorf("open /proc/self/%s handle: %w", subpath, err) + } + defer handle.Close() // we will return a re-opened handle + + file, err := pathrs.Reopen(handle, mode) + if err != nil { + return nil, fmt.Errorf("reopen /proc/self/%s handle (%#x): %w", subpath, mode, err) + } + return file, nil +} + +// Read the contents of /proc/self/. +func readConSelf(fpath string) (string, error) { + in, err := openProcSelf(fpath, os.O_RDONLY|unix.O_CLOEXEC) + if err != nil { + return "", err + } + defer in.Close() + + return readConFd(in) +} + +// Write to /proc/self/. +func writeConSelf(fpath, val string) error { + if val == "" { + if !getEnabled() { + return nil + } + } + + out, err := openProcSelf(fpath, os.O_WRONLY|unix.O_CLOEXEC) + if err != nil { + return err + } + defer out.Close() + + return writeConFd(out, val) +} + +// openProcPid is a small wrapper around [OpenPid] and [pathrs.Reopen] to make +// "one-shot opens" slightly more ergonomic. The provided mode must be os.O_* +// flags to indicate what mode the returned file should be opened with (flags +// like os.O_CREAT and os.O_EXCL are not supported). +// +// If no error occurred, the returned handle is guaranteed to be exactly +// /proc/self/ with no tricky mounts or symlinks causing you to +// operate on an unexpected path (with some caveats on pre-openat2 or +// pre-fsopen kernels). +// +// [OpenPid]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs#Handle.OpenPid +func openProcPid(pid int, subpath string, mode int) (*os.File, error) { + if subpath == "" { + return nil, ErrEmptyPath + } + + proc, err := procfs.OpenProcRoot() + if err != nil { + return nil, err + } + defer proc.Close() + + handle, err := proc.OpenPid(pid, subpath) + if err != nil { + return nil, fmt.Errorf("open /proc/%d/%s handle: %w", pid, subpath, err) + } + defer handle.Close() // we will return a re-opened handle + + file, err := pathrs.Reopen(handle, mode) + if err != nil { + return nil, fmt.Errorf("reopen /proc/%d/%s handle (%#x): %w", pid, subpath, mode, err) + } + return file, nil +} + +// classIndex returns the int index for an object class in the loaded policy, +// or -1 and an error +func classIndex(class string) (int, error) { + permpath := fmt.Sprintf("class/%s/index", class) + indexpath := filepath.Join(getSelinuxMountPoint(), permpath) + + indexB, err := os.ReadFile(indexpath) + if err != nil { + return -1, err + } + index, err := strconv.Atoi(string(indexB)) + if err != nil { + return -1, err + } + + return index, nil +} + +// lSetFileLabel sets the SELinux label for this path, not following symlinks, +// or returns an error. +func lSetFileLabel(fpath string, label string) error { + if fpath == "" { + return ErrEmptyPath + } + for { + err := unix.Lsetxattr(fpath, xattrNameSelinux, []byte(label), 0) + if err == nil { + break + } + if err != unix.EINTR { + return &os.PathError{Op: fmt.Sprintf("lsetxattr(label=%s)", label), Path: fpath, Err: err} + } + } + + return nil +} + +// setFileLabel sets the SELinux label for this path, following symlinks, +// or returns an error. +func setFileLabel(fpath string, label string) error { + if fpath == "" { + return ErrEmptyPath + } + for { + err := unix.Setxattr(fpath, xattrNameSelinux, []byte(label), 0) + if err == nil { + break + } + if err != unix.EINTR { + return &os.PathError{Op: fmt.Sprintf("setxattr(label=%s)", label), Path: fpath, Err: err} + } + } + + return nil +} + +// fileLabel returns the SELinux label for this path, following symlinks, +// or returns an error. +func fileLabel(fpath string) (string, error) { + if fpath == "" { + return "", ErrEmptyPath + } + + label, err := getxattr(fpath, xattrNameSelinux) + if err != nil { + return "", &os.PathError{Op: "getxattr", Path: fpath, Err: err} + } + // Trim the NUL byte at the end of the byte buffer, if present. + if len(label) > 0 && label[len(label)-1] == '\x00' { + label = label[:len(label)-1] + } + return string(label), nil +} + +// lFileLabel returns the SELinux label for this path, not following symlinks, +// or returns an error. +func lFileLabel(fpath string) (string, error) { + if fpath == "" { + return "", ErrEmptyPath + } + + label, err := lgetxattr(fpath, xattrNameSelinux) + if err != nil { + return "", &os.PathError{Op: "lgetxattr", Path: fpath, Err: err} + } + // Trim the NUL byte at the end of the byte buffer, if present. + if len(label) > 0 && label[len(label)-1] == '\x00' { + label = label[:len(label)-1] + } + return string(label), nil +} + +func setFSCreateLabel(label string) error { + return writeConThreadSelf("attr/fscreate", label) +} + +// fsCreateLabel returns the default label the kernel which the kernel is using +// for file system objects created by this task. "" indicates default. +func fsCreateLabel() (string, error) { + return readConThreadSelf("attr/fscreate") +} + +// currentLabel returns the SELinux label of the current process thread, or an error. +func currentLabel() (string, error) { + return readConThreadSelf("attr/current") +} + +// pidLabel returns the SELinux label of the given pid, or an error. +func pidLabel(pid int) (string, error) { + it, err := openProcPid(pid, "attr/current", os.O_RDONLY|unix.O_CLOEXEC) + if err != nil { + return "", nil + } + defer it.Close() + return readConFd(it) +} + +// ExecLabel returns the SELinux label that the kernel will use for any programs +// that are executed by the current process thread, or an error. +func execLabel() (string, error) { + return readConThreadSelf("exec") +} + +// canonicalizeContext takes a context string and writes it to the kernel +// the function then returns the context that the kernel will use. Use this +// function to check if two contexts are equivalent +func canonicalizeContext(val string) (string, error) { + return readWriteCon(filepath.Join(getSelinuxMountPoint(), "context"), val) +} + +// computeCreateContext requests the type transition from source to target for +// class from the kernel. +func computeCreateContext(source string, target string, class string) (string, error) { + classidx, err := classIndex(class) + if err != nil { + return "", err + } + + return readWriteCon(filepath.Join(getSelinuxMountPoint(), "create"), fmt.Sprintf("%s %s %d", source, target, classidx)) +} + +// catsToBitset stores categories in a bitset. +func catsToBitset(cats string) (*big.Int, error) { + bitset := new(big.Int) + + catlist := strings.Split(cats, ",") + for _, r := range catlist { + ranges := strings.SplitN(r, ".", 2) + if len(ranges) > 1 { + catstart, err := parseLevelItem(ranges[0], category) + if err != nil { + return nil, err + } + catend, err := parseLevelItem(ranges[1], category) + if err != nil { + return nil, err + } + for i := catstart; i <= catend; i++ { + bitset.SetBit(bitset, i, 1) + } + } else { + cat, err := parseLevelItem(ranges[0], category) + if err != nil { + return nil, err + } + bitset.SetBit(bitset, cat, 1) + } + } + + return bitset, nil +} + +// parseLevelItem parses and verifies that a sensitivity or category are valid +func parseLevelItem(s string, sep levelItem) (int, error) { + if len(s) < minSensLen || levelItem(s[0]) != sep { + return 0, ErrLevelSyntax + } + const bitSize = 31 // Make sure the result fits into signed int32. + val, err := strconv.ParseUint(s[1:], 10, bitSize) + if err != nil { + return 0, err + } + + return int(val), nil +} + +// parseLevel fills a level from a string that contains +// a sensitivity and categories +func (l *level) parseLevel(levelStr string) error { + lvl := strings.SplitN(levelStr, ":", 2) + sens, err := parseLevelItem(lvl[0], sensitivity) + if err != nil { + return fmt.Errorf("failed to parse sensitivity: %w", err) + } + l.sens = sens + if len(lvl) > 1 { + cats, err := catsToBitset(lvl[1]) + if err != nil { + return fmt.Errorf("failed to parse categories: %w", err) + } + l.cats = cats + } + + return nil +} + +// rangeStrToMLSRange marshals a string representation of a range. +func rangeStrToMLSRange(rangeStr string) (*mlsRange, error) { + r := &mlsRange{} + l := strings.SplitN(rangeStr, "-", 2) + + switch len(l) { + // rangeStr that has a low and a high level, e.g. s4:c0.c1023-s6:c0.c1023 + case 2: + r.high = &level{} + if err := r.high.parseLevel(l[1]); err != nil { + return nil, fmt.Errorf("failed to parse high level %q: %w", l[1], err) + } + fallthrough + // rangeStr that is single level, e.g. s6:c0,c3,c5,c30.c1023 + case 1: + r.low = &level{} + if err := r.low.parseLevel(l[0]); err != nil { + return nil, fmt.Errorf("failed to parse low level %q: %w", l[0], err) + } + } + + if r.high == nil { + r.high = r.low + } + + return r, nil +} + +// bitsetToStr takes a category bitset and returns it in the +// canonical selinux syntax +func bitsetToStr(c *big.Int) string { + var str string + + length := 0 + i0 := int(c.TrailingZeroBits()) //#nosec G115 -- don't expect TralingZeroBits to return values with highest bit set. + for i := i0; i < c.BitLen(); i++ { + if c.Bit(i) == 0 { + continue + } + if length == 0 { + if str != "" { + str += "," + } + str += "c" + strconv.Itoa(i) + } + if c.Bit(i+1) == 1 { + length++ + continue + } + if length == 1 { + str += ",c" + strconv.Itoa(i) + } else if length > 1 { + str += ".c" + strconv.Itoa(i) + } + length = 0 + } + + return str +} + +func (l *level) equal(l2 *level) bool { + if l2 == nil || l == nil { + return l == l2 + } + if l2.sens != l.sens { + return false + } + if l2.cats == nil || l.cats == nil { + return l2.cats == l.cats + } + return l.cats.Cmp(l2.cats) == 0 +} + +// String returns an mlsRange as a string. +func (m mlsRange) String() string { + low := "s" + strconv.Itoa(m.low.sens) + if m.low.cats != nil && m.low.cats.BitLen() > 0 { + low += ":" + bitsetToStr(m.low.cats) + } + + if m.low.equal(m.high) { + return low + } + + high := "s" + strconv.Itoa(m.high.sens) + if m.high.cats != nil && m.high.cats.BitLen() > 0 { + high += ":" + bitsetToStr(m.high.cats) + } + + return low + "-" + high +} + +// TODO: remove these in favor of built-in min/max +// once we stop supporting Go < 1.21. +func maxInt(a, b int) int { + if a > b { + return a + } + return b +} + +func minInt(a, b int) int { + if a < b { + return a + } + return b +} + +// calculateGlbLub computes the glb (greatest lower bound) and lub (least upper bound) +// of a source and target range. +// The glblub is calculated as the greater of the low sensitivities and +// the lower of the high sensitivities and the and of each category bitset. +func calculateGlbLub(sourceRange, targetRange string) (string, error) { + s, err := rangeStrToMLSRange(sourceRange) + if err != nil { + return "", err + } + t, err := rangeStrToMLSRange(targetRange) + if err != nil { + return "", err + } + + if s.high.sens < t.low.sens || t.high.sens < s.low.sens { + /* these ranges have no common sensitivities */ + return "", ErrIncomparable + } + + outrange := &mlsRange{low: &level{}, high: &level{}} + + /* take the greatest of the low */ + outrange.low.sens = maxInt(s.low.sens, t.low.sens) + + /* take the least of the high */ + outrange.high.sens = minInt(s.high.sens, t.high.sens) + + /* find the intersecting categories */ + if s.low.cats != nil && t.low.cats != nil { + outrange.low.cats = new(big.Int) + outrange.low.cats.And(s.low.cats, t.low.cats) + } + if s.high.cats != nil && t.high.cats != nil { + outrange.high.cats = new(big.Int) + outrange.high.cats.And(s.high.cats, t.high.cats) + } + + return outrange.String(), nil +} + +func readWriteCon(fpath string, val string) (string, error) { + if fpath == "" { + return "", ErrEmptyPath + } + f, err := os.OpenFile(fpath, os.O_RDWR, 0) + if err != nil { + return "", err + } + defer f.Close() + + _, err = f.Write([]byte(val)) + if err != nil { + return "", err + } + + return readConFd(f) +} + +// peerLabel retrieves the label of the client on the other side of a socket +func peerLabel(fd uintptr) (string, error) { + l, err := unix.GetsockoptString(int(fd), unix.SOL_SOCKET, unix.SO_PEERSEC) + if err != nil { + return "", &os.PathError{Op: "getsockopt", Path: "fd " + strconv.Itoa(int(fd)), Err: err} + } + return l, nil +} + +// setKeyLabel takes a process label and tells the kernel to assign the +// label to the next kernel keyring that gets created +func setKeyLabel(label string) error { + // Rather than using /proc/thread-self, we want to use /proc/self to + // operate on the thread-group leader. + err := writeConSelf("attr/keycreate", label) + if errors.Is(err, os.ErrNotExist) { + return nil + } + if label == "" && errors.Is(err, os.ErrPermission) { + return nil + } + if errors.Is(err, unix.EACCES) && unix.Getpid() != unix.Gettid() { + return ErrNotTGLeader + } + return err +} + +// KeyLabel retrieves the current kernel keyring label setting for this +// thread-group. +func keyLabel() (string, error) { + // Rather than using /proc/thread-self, we want to use /proc/self to + // operate on the thread-group leader. + return readConSelf("attr/keycreate") +} + +// get returns the Context as a string +func (c Context) get() string { + if l := c["level"]; l != "" { + return c["user"] + ":" + c["role"] + ":" + c["type"] + ":" + l + } + return c["user"] + ":" + c["role"] + ":" + c["type"] +} + +// newContext creates a new Context struct from the specified label +func newContext(label string) (Context, error) { + c := make(Context) + + if len(label) != 0 { + con := strings.SplitN(label, ":", 4) + if len(con) < 3 { + return c, ErrInvalidLabel + } + c["user"] = con[0] + c["role"] = con[1] + c["type"] = con[2] + if len(con) > 3 { + c["level"] = con[3] + } + } + return c, nil +} + +// clearLabels clears all reserved labels +func clearLabels() { + state.Lock() + state.mcsList = make(map[string]bool) + state.Unlock() +} + +// reserveLabel reserves the MLS/MCS level component of the specified label +func reserveLabel(label string) { + if len(label) != 0 { + con := strings.SplitN(label, ":", 4) + if len(con) > 3 { + _ = mcsAdd(con[3]) + } + } +} + +func selinuxEnforcePath() string { + return filepath.Join(getSelinuxMountPoint(), "enforce") +} + +// isMLSEnabled checks if MLS is enabled. +func isMLSEnabled() bool { + enabledB, err := os.ReadFile(filepath.Join(getSelinuxMountPoint(), "mls")) + if err != nil { + return false + } + return bytes.Equal(enabledB, []byte{'1'}) +} + +// enforceMode returns the current SELinux mode Enforcing, Permissive, Disabled +func enforceMode() int { + var enforce int + + enforceB, err := os.ReadFile(selinuxEnforcePath()) + if err != nil { + return -1 + } + enforce, err = strconv.Atoi(string(enforceB)) + if err != nil { + return -1 + } + return enforce +} + +// setEnforceMode sets the current SELinux mode Enforcing, Permissive. +// Disabled is not valid, since this needs to be set at boot time. +func setEnforceMode(mode int) error { + return os.WriteFile(selinuxEnforcePath(), []byte(strconv.Itoa(mode)), 0) +} + +// defaultEnforceMode returns the systems default SELinux mode Enforcing, +// Permissive or Disabled. Note this is just the default at boot time. +// EnforceMode tells you the systems current mode. +func defaultEnforceMode() int { + switch readConfig(selinuxTag) { + case "enforcing": + return Enforcing + case "permissive": + return Permissive + } + return Disabled +} + +func mcsAdd(mcs string) error { + if mcs == "" { + return nil + } + state.Lock() + defer state.Unlock() + if state.mcsList[mcs] { + return ErrMCSAlreadyExists + } + state.mcsList[mcs] = true + return nil +} + +func mcsDelete(mcs string) { + if mcs == "" { + return + } + state.Lock() + defer state.Unlock() + state.mcsList[mcs] = false +} + +func intToMcs(id int, catRange uint32) string { + var ( + SETSIZE = int(catRange) + TIER = SETSIZE + ORD = id + ) + + if id < 1 || id > 523776 { + return "" + } + + for ORD > TIER { + ORD -= TIER + TIER-- + } + TIER = SETSIZE - TIER + ORD += TIER + return fmt.Sprintf("s0:c%d,c%d", TIER, ORD) +} + +func uniqMcs(catRange uint32) string { + var ( + n uint32 + c1, c2 uint32 + mcs string + ) + + for { + _ = binary.Read(rand.Reader, binary.LittleEndian, &n) + c1 = n % catRange + _ = binary.Read(rand.Reader, binary.LittleEndian, &n) + c2 = n % catRange + if c1 == c2 { + continue + } else if c1 > c2 { + c1, c2 = c2, c1 + } + mcs = fmt.Sprintf("s0:c%d,c%d", c1, c2) + if err := mcsAdd(mcs); err != nil { + continue + } + break + } + return mcs +} + +// releaseLabel un-reserves the MLS/MCS Level field of the specified label, +// allowing it to be used by another process. +func releaseLabel(label string) { + if len(label) != 0 { + con := strings.SplitN(label, ":", 4) + if len(con) > 3 { + mcsDelete(con[3]) + } + } +} + +// roFileLabel returns the specified SELinux readonly file label +func roFileLabel() string { + return readOnlyFileLabel +} + +func openContextFile() (*os.File, error) { + if f, err := os.Open(contextFile); err == nil { + return f, nil + } + return os.Open(filepath.Join(policyRoot(), "contexts", "lxc_contexts")) +} + +func loadLabels() { + labels = make(map[string]string) + in, err := openContextFile() + if err != nil { + return + } + defer in.Close() + + scanner := bufio.NewScanner(in) + + for scanner.Scan() { + line := bytes.TrimSpace(scanner.Bytes()) + if len(line) == 0 { + // Skip blank lines + continue + } + if line[0] == ';' || line[0] == '#' { + // Skip comments + continue + } + fields := bytes.SplitN(line, []byte{'='}, 2) + if len(fields) != 2 { + continue + } + key, val := bytes.TrimSpace(fields[0]), bytes.TrimSpace(fields[1]) + labels[string(key)] = string(bytes.Trim(val, `"`)) + } + + con, _ := NewContext(labels["file"]) + con["level"] = fmt.Sprintf("s0:c%d,c%d", maxCategory-2, maxCategory-1) + privContainerMountLabel = con.get() + reserveLabel(privContainerMountLabel) +} + +func label(key string) string { + loadLabelsOnce.Do(func() { + loadLabels() + }) + return labels[key] +} + +// kvmContainerLabels returns the default processLabel and mountLabel to be used +// for kvm containers by the calling process. +func kvmContainerLabels() (string, string) { + processLabel := label("kvm_process") + if processLabel == "" { + processLabel = label("process") + } + + return addMcs(processLabel, label("file")) +} + +// initContainerLabels returns the default processLabel and file labels to be +// used for containers running an init system like systemd by the calling process. +func initContainerLabels() (string, string) { + processLabel := label("init_process") + if processLabel == "" { + processLabel = label("process") + } + + return addMcs(processLabel, label("file")) +} + +// containerLabels returns an allocated processLabel and fileLabel to be used for +// container labeling by the calling process. +func containerLabels() (processLabel string, fileLabel string) { + if !getEnabled() { + return "", "" + } + + processLabel = label("process") + fileLabel = label("file") + readOnlyFileLabel = label("ro_file") + + if processLabel == "" || fileLabel == "" { + return "", fileLabel + } + + if readOnlyFileLabel == "" { + readOnlyFileLabel = fileLabel + } + + return addMcs(processLabel, fileLabel) +} + +func addMcs(processLabel, fileLabel string) (string, string) { + scon, _ := NewContext(processLabel) + if scon["level"] != "" { + mcs := uniqMcs(CategoryRange) + scon["level"] = mcs + processLabel = scon.Get() + scon, _ = NewContext(fileLabel) + scon["level"] = mcs + fileLabel = scon.Get() + } + return processLabel, fileLabel +} + +// securityCheckContext validates that the SELinux label is understood by the kernel +func securityCheckContext(val string) error { + return os.WriteFile(filepath.Join(getSelinuxMountPoint(), "context"), []byte(val), 0) +} + +// copyLevel returns a label with the MLS/MCS level from src label replaced on +// the dest label. +func copyLevel(src, dest string) (string, error) { + if src == "" { + return "", nil + } + if err := SecurityCheckContext(src); err != nil { + return "", err + } + if err := SecurityCheckContext(dest); err != nil { + return "", err + } + scon, err := NewContext(src) + if err != nil { + return "", err + } + tcon, err := NewContext(dest) + if err != nil { + return "", err + } + mcsDelete(tcon["level"]) + _ = mcsAdd(scon["level"]) + tcon["level"] = scon["level"] + return tcon.Get(), nil +} + +// chcon changes the fpath file object to the SELinux label. +// If fpath is a directory and recurse is true, then chcon walks the +// directory tree setting the label. +func chcon(fpath string, label string, recurse bool) error { + if fpath == "" { + return ErrEmptyPath + } + if label == "" { + return nil + } + + excludePaths := map[string]bool{ + "/": true, + "/bin": true, + "/boot": true, + "/dev": true, + "/etc": true, + "/etc/passwd": true, + "/etc/pki": true, + "/etc/shadow": true, + "/home": true, + "/lib": true, + "/lib64": true, + "/media": true, + "/opt": true, + "/proc": true, + "/root": true, + "/run": true, + "/sbin": true, + "/srv": true, + "/sys": true, + "/tmp": true, + "/usr": true, + "/var": true, + "/var/lib": true, + "/var/log": true, + } + + if home := os.Getenv("HOME"); home != "" { + excludePaths[home] = true + } + + if sudoUser := os.Getenv("SUDO_USER"); sudoUser != "" { + if usr, err := user.Lookup(sudoUser); err == nil { + excludePaths[usr.HomeDir] = true + } + } + + if fpath != "/" { + fpath = strings.TrimSuffix(fpath, "/") + } + if excludePaths[fpath] { + return fmt.Errorf("SELinux relabeling of %s is not allowed", fpath) + } + + if !recurse { + err := lSetFileLabel(fpath, label) + if err != nil { + // Check if file doesn't exist, must have been removed + if errors.Is(err, os.ErrNotExist) { + return nil + } + // Check if current label is correct on disk + flabel, nerr := lFileLabel(fpath) + if nerr == nil && flabel == label { + return nil + } + // Check if file doesn't exist, must have been removed + if errors.Is(nerr, os.ErrNotExist) { + return nil + } + return err + } + return nil + } + + return rchcon(fpath, label) +} + +func rchcon(fpath, label string) error { //revive:disable:cognitive-complexity + fastMode := false + // If the current label matches the new label, assume + // other labels are correct. + if cLabel, err := lFileLabel(fpath); err == nil && cLabel == label { + fastMode = true + } + return pwalkdir.Walk(fpath, func(p string, _ fs.DirEntry, _ error) error { + if fastMode { + if cLabel, err := lFileLabel(p); err == nil && cLabel == label { + return nil + } + } + err := lSetFileLabel(p, label) + // Walk a file tree can race with removal, so ignore ENOENT. + if errors.Is(err, os.ErrNotExist) { + return nil + } + return err + }) +} + +// dupSecOpt takes an SELinux process label and returns security options that +// can be used to set the SELinux Type and Level for future container processes. +func dupSecOpt(src string) ([]string, error) { + if src == "" { + return nil, nil + } + con, err := NewContext(src) + if err != nil { + return nil, err + } + if con["user"] == "" || + con["role"] == "" || + con["type"] == "" { + return nil, nil + } + dup := []string{ + "user:" + con["user"], + "role:" + con["role"], + "type:" + con["type"], + } + + if con["level"] != "" { + dup = append(dup, "level:"+con["level"]) + } + + return dup, nil +} + +// findUserInContext scans the reader for a valid SELinux context +// match that is verified with the verifier. Invalid contexts are +// skipped. It returns a matched context or an empty string if no +// match is found. If a scanner error occurs, it is returned. +func findUserInContext(context Context, r io.Reader, verifier func(string) error) (string, error) { + fromRole := context["role"] + fromType := context["type"] + scanner := bufio.NewScanner(r) + + for scanner.Scan() { + fromConns := strings.Fields(scanner.Text()) + if len(fromConns) == 0 { + // Skip blank lines + continue + } + + line := fromConns[0] + + if line[0] == ';' || line[0] == '#' { + // Skip comments + continue + } + + // user context files contexts are formatted as + // role_r:type_t:s0 where the user is missing. + lineArr := strings.SplitN(line, ":", 4) + // skip context with typo, or role and type do not match + if len(lineArr) != 3 || + lineArr[0] != fromRole || + lineArr[1] != fromType { + continue + } + + for _, cc := range fromConns[1:] { + toConns := strings.SplitN(cc, ":", 4) + if len(toConns) != 3 { + continue + } + + context["role"] = toConns[0] + context["type"] = toConns[1] + + outConn := context.get() + if err := verifier(outConn); err != nil { + continue + } + + return outConn, nil + } + } + if err := scanner.Err(); err != nil { + return "", fmt.Errorf("failed to scan for context: %w", err) + } + + return "", nil +} + +func getDefaultContextFromReaders(c *defaultSECtx) (string, error) { + if c.verifier == nil { + return "", ErrVerifierNil + } + + context, err := newContext(c.scon) + if err != nil { + return "", fmt.Errorf("failed to create label for %s: %w", c.scon, err) + } + + // set so the verifier validates the matched context with the provided user and level. + context["user"] = c.user + context["level"] = c.level + + conn, err := findUserInContext(context, c.userRdr, c.verifier) + if err != nil { + return "", err + } + + if conn != "" { + return conn, nil + } + + conn, err = findUserInContext(context, c.defaultRdr, c.verifier) + if err != nil { + return "", err + } + + if conn != "" { + return conn, nil + } + + return "", fmt.Errorf("context %q not found: %w", c.scon, ErrContextMissing) +} + +func getDefaultContextWithLevel(user, level, scon string) (string, error) { + userPath := filepath.Join(policyRoot(), selinuxUsersDir, user) + fu, err := os.Open(userPath) + if err != nil { + return "", err + } + defer fu.Close() + + defaultPath := filepath.Join(policyRoot(), defaultContexts) + fd, err := os.Open(defaultPath) + if err != nil { + return "", err + } + defer fd.Close() + + c := defaultSECtx{ + user: user, + level: level, + scon: scon, + userRdr: fu, + defaultRdr: fd, + verifier: securityCheckContext, + } + + return getDefaultContextFromReaders(&c) +} diff --git a/internal/third_party/selinux/go-selinux/selinux_linux_test.go b/internal/third_party/selinux/go-selinux/selinux_linux_test.go new file mode 100644 index 00000000000..71aa0b82cc6 --- /dev/null +++ b/internal/third_party/selinux/go-selinux/selinux_linux_test.go @@ -0,0 +1,711 @@ +package selinux + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "os" + "path/filepath" + "runtime" + "strconv" + "strings" + "testing" + + "golang.org/x/sys/unix" +) + +func TestSetFileLabel(t *testing.T) { + if !GetEnabled() { + t.Skip("SELinux not enabled, skipping.") + } + + const ( + tmpFile = "selinux_test" + tmpLink = "selinux_test_link" + con = "system_u:object_r:bin_t:s0:c1,c2" + con2 = "system_u:object_r:bin_t:s0:c3,c4" + ) + + _ = os.Remove(tmpFile) + out, err := os.OpenFile(tmpFile, os.O_WRONLY|os.O_CREATE, 0) + if err != nil { + t.Fatal(err) + } + out.Close() + defer os.Remove(tmpFile) + + _ = os.Remove(tmpLink) + if err := os.Symlink(tmpFile, tmpLink); err != nil { + t.Fatal(err) + } + defer os.Remove(tmpLink) + + if err := SetFileLabel(tmpLink, con); err != nil { + t.Fatalf("SetFileLabel failed: %s", err) + } + filelabel, err := FileLabel(tmpLink) + if err != nil { + t.Fatalf("FileLabel failed: %s", err) + } + if filelabel != con { + t.Fatalf("FileLabel failed, returned %s expected %s", filelabel, con) + } + + // Using LfileLabel to verify that the symlink itself is not labeled. + linkLabel, err := LfileLabel(tmpLink) + if err != nil { + t.Fatalf("LfileLabel failed: %s", err) + } + if linkLabel == con { + t.Fatalf("Label on symlink should not be set, got: %q", linkLabel) + } + + // Use LsetFileLabel to set a label on the symlink itself. + if err := LsetFileLabel(tmpLink, con2); err != nil { + t.Fatalf("LsetFileLabel failed: %s", err) + } + filelabel, err = FileLabel(tmpFile) + if err != nil { + t.Fatalf("FileLabel failed: %s", err) + } + if filelabel != con { + t.Fatalf("FileLabel was updated, returned %s expected %s", filelabel, con) + } + + linkLabel, err = LfileLabel(tmpLink) + if err != nil { + t.Fatalf("LfileLabel failed: %s", err) + } + if linkLabel != con2 { + t.Fatalf("LfileLabel failed: returned %s expected %s", linkLabel, con2) + } +} + +func TestKVMLabels(t *testing.T) { + if !GetEnabled() { + t.Skip("SELinux not enabled, skipping.") + } + + plabel, flabel := KVMContainerLabels() + if plabel == "" { + t.Log("Failed to read kvm label") + } + t.Log(plabel) + t.Log(flabel) + if _, err := CanonicalizeContext(plabel); err != nil { + t.Fatal(err) + } + if _, err := CanonicalizeContext(flabel); err != nil { + t.Fatal(err) + } + + ReleaseLabel(plabel) +} + +func TestInitLabels(t *testing.T) { + if !GetEnabled() { + t.Skip("SELinux not enabled, skipping.") + } + + plabel, flabel := InitContainerLabels() + if plabel == "" { + t.Log("Failed to read init label") + } + t.Log(plabel) + t.Log(flabel) + if _, err := CanonicalizeContext(plabel); err != nil { + t.Fatal(err) + } + if _, err := CanonicalizeContext(flabel); err != nil { + t.Fatal(err) + } + ReleaseLabel(plabel) +} + +func TestDuplicateLabel(t *testing.T) { + secopt, err := DupSecOpt("system_u:system_r:container_t:s0:c1,c2") + if err != nil { + t.Fatalf("DupSecOpt: %v", err) + } + for _, opt := range secopt { + con := strings.SplitN(opt, ":", 2) + if con[0] == "user" { + if con[1] != "system_u" { + t.Errorf("DupSecOpt Failed user incorrect") + } + continue + } + if con[0] == "role" { + if con[1] != "system_r" { + t.Errorf("DupSecOpt Failed role incorrect") + } + continue + } + if con[0] == "type" { + if con[1] != "container_t" { + t.Errorf("DupSecOpt Failed type incorrect") + } + continue + } + if con[0] == "level" { + if con[1] != "s0:c1,c2" { + t.Errorf("DupSecOpt Failed level incorrect") + } + continue + } + t.Errorf("DupSecOpt failed: invalid field %q", con[0]) + } + secopt = DisableSecOpt() + if secopt[0] != "disable" { + t.Errorf(`DisableSecOpt failed: want "disable", got %q`, secopt[0]) + } +} + +func TestSELinuxNoLevel(t *testing.T) { + if !GetEnabled() { + t.Skip("SELinux not enabled, skipping.") + } + + tlabel := "system_u:system_r:container_t" + dup, err := DupSecOpt(tlabel) + if err != nil { + t.Fatal(err) + } + + if len(dup) != 3 { + t.Errorf("DupSecOpt failed on non mls label: want 3, got %d", len(dup)) + } + con, err := NewContext(tlabel) + if err != nil { + t.Fatal(err) + } + if con.Get() != tlabel { + t.Errorf("NewContext and con.Get() failed on non mls label: want %q, got %q", tlabel, con.Get()) + } +} + +func TestSocketLabel(t *testing.T) { + if !GetEnabled() { + t.Skip("SELinux not enabled, skipping.") + } + + // Ensure the thread stays the same for duration of the test. + // Otherwise Go runtime can switch this to a different thread, + // which results in EACCES in call to SetSocketLabel. + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + label := "system_u:object_r:container_t:s0:c1,c2" + if err := SetSocketLabel(label); err != nil { + t.Fatal(err) + } + nlabel, err := SocketLabel() + if err != nil { + t.Fatal(err) + } + if label != nlabel { + t.Errorf("SocketLabel %s != %s", nlabel, label) + } +} + +func TestKeyLabel(t *testing.T) { + if !GetEnabled() { + t.Skip("SELinux not enabled, skipping.") + } + + // Ensure the thread stays the same for duration of the test. + // Otherwise Go runtime can switch this to a different thread, + // which results in EACCES in call to SetKeyLabel. + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + if unix.Getpid() != unix.Gettid() { + t.Skip(ErrNotTGLeader) + } + + label := "system_u:object_r:container_t:s0:c1,c2" + if err := SetKeyLabel(label); err != nil { + t.Fatal(err) + } + nlabel, err := KeyLabel() + if err != nil { + t.Fatal(err) + } + if label != nlabel { + t.Errorf("KeyLabel: want %q, got %q", label, nlabel) + } +} + +func BenchmarkContextGet(b *testing.B) { + ctx, err := NewContext("system_u:object_r:container_file_t:s0:c1022,c1023") + if err != nil { + b.Fatal(err) + } + str := "" + for i := 0; i < b.N; i++ { + str = ctx.get() + } + b.Log(str) +} + +func TestSELinux(t *testing.T) { + if !GetEnabled() { + t.Skip("SELinux not enabled, skipping.") + } + + // Ensure the thread stays the same for duration of the test. + // Otherwise Go runtime can switch this to a different thread, + // which results in EACCES in call to SetFSCreateLabel. + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + var ( + err error + plabel, flabel string + ) + + plabel, flabel = ContainerLabels() + t.Log(plabel) + t.Log(flabel) + plabel, flabel = ContainerLabels() + t.Log(plabel) + t.Log(flabel) + ReleaseLabel(plabel) + + plabel, flabel = ContainerLabels() + t.Log(plabel) + t.Log(flabel) + ClearLabels() + t.Log("ClearLabels") + plabel, flabel = ContainerLabels() + t.Log(plabel) + t.Log(flabel) + ReleaseLabel(plabel) + + pid := os.Getpid() + t.Logf("PID:%d MCS:%s", pid, intToMcs(pid, 1023)) + err = SetFSCreateLabel("unconfined_u:unconfined_r:unconfined_t:s0") + if err != nil { + t.Fatal("SetFSCreateLabel failed:", err) + } + t.Log(FSCreateLabel()) + err = SetFSCreateLabel("") + if err != nil { + t.Fatal("SetFSCreateLabel failed:", err) + } + t.Log(FSCreateLabel()) + t.Log(PidLabel(1)) +} + +func TestSetEnforceMode(t *testing.T) { + if !GetEnabled() { + t.Skip("SELinux not enabled, skipping.") + } + if os.Geteuid() != 0 { + t.Skip("root required, skipping") + } + + t.Log("Enforcing Mode:", EnforceMode()) + mode := DefaultEnforceMode() + t.Log("Default Enforce Mode:", mode) + defer func() { + _ = SetEnforceMode(mode) + }() + + if err := SetEnforceMode(Enforcing); err != nil { + t.Fatalf("setting selinux mode to enforcing failed: %v", err) + } + if err := SetEnforceMode(Permissive); err != nil { + t.Fatalf("setting selinux mode to permissive failed: %v", err) + } +} + +func TestCanonicalizeContext(t *testing.T) { + if !GetEnabled() { + t.Skip("SELinux not enabled, skipping.") + } + + con := "system_u:object_r:bin_t:s0:c1,c2,c3" + checkcon := "system_u:object_r:bin_t:s0:c1.c3" + newcon, err := CanonicalizeContext(con) + if err != nil { + t.Fatal(err) + } + if newcon != checkcon { + t.Fatalf("CanonicalizeContext(%s) returned %s expected %s", con, newcon, checkcon) + } + con = "system_u:object_r:bin_t:s0:c5,c2" + checkcon = "system_u:object_r:bin_t:s0:c2,c5" + newcon, err = CanonicalizeContext(con) + if err != nil { + t.Fatal(err) + } + if newcon != checkcon { + t.Fatalf("CanonicalizeContext(%s) returned %s expected %s", con, newcon, checkcon) + } +} + +func TestFindSELinuxfsInMountinfo(t *testing.T) { + //nolint:dupword // ignore duplicate words (sysfs sysfs) + const mountinfo = `18 62 0:17 / /sys rw,nosuid,nodev,noexec,relatime shared:6 - sysfs sysfs rw,seclabel +19 62 0:3 / /proc rw,nosuid,nodev,noexec,relatime shared:5 - proc proc rw +20 62 0:5 / /dev rw,nosuid shared:2 - devtmpfs devtmpfs rw,seclabel,size=3995472k,nr_inodes=998868,mode=755 +21 18 0:16 / /sys/kernel/security rw,nosuid,nodev,noexec,relatime shared:7 - securityfs securityfs rw +22 20 0:18 / /dev/shm rw,nosuid,nodev shared:3 - tmpfs tmpfs rw,seclabel +23 20 0:11 / /dev/pts rw,nosuid,noexec,relatime shared:4 - devpts devpts rw,seclabel,gid=5,mode=620,ptmxmode=000 +24 62 0:19 / /run rw,nosuid,nodev shared:23 - tmpfs tmpfs rw,seclabel,mode=755 +25 18 0:20 / /sys/fs/cgroup ro,nosuid,nodev,noexec shared:8 - tmpfs tmpfs ro,seclabel,mode=755 +26 25 0:21 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:9 - cgroup cgroup rw,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd +27 18 0:22 / /sys/fs/pstore rw,nosuid,nodev,noexec,relatime shared:20 - pstore pstore rw +28 25 0:23 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:10 - cgroup cgroup rw,perf_event +29 25 0:24 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:11 - cgroup cgroup rw,devices +30 25 0:25 / /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:12 - cgroup cgroup rw,cpuacct,cpu +31 25 0:26 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:13 - cgroup cgroup rw,freezer +32 25 0:27 / /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime shared:14 - cgroup cgroup rw,net_prio,net_cls +33 25 0:28 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:15 - cgroup cgroup rw,cpuset +34 25 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory +35 25 0:30 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,pids +36 25 0:31 / /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,hugetlb +37 25 0:32 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:19 - cgroup cgroup rw,blkio +59 18 0:33 / /sys/kernel/config rw,relatime shared:21 - configfs configfs rw +62 1 253:1 / / rw,relatime shared:1 - ext4 /dev/vda1 rw,seclabel,data=ordered +38 18 0:15 / /sys/fs/selinux rw,relatime shared:22 - selinuxfs selinuxfs rw +39 19 0:35 / /proc/sys/fs/binfmt_misc rw,relatime shared:24 - autofs systemd-1 rw,fd=29,pgrp=1,timeout=0,minproto=5,maxproto=5,direct,pipe_ino=11601 +40 20 0:36 / /dev/hugepages rw,relatime shared:25 - hugetlbfs hugetlbfs rw,seclabel +41 20 0:14 / /dev/mqueue rw,relatime shared:26 - mqueue mqueue rw,seclabel +42 18 0:6 / /sys/kernel/debug rw,relatime shared:27 - debugfs debugfs rw +112 62 253:1 /var/lib/docker/plugins /var/lib/docker/plugins rw,relatime - ext4 /dev/vda1 rw,seclabel,data=ordered +115 62 253:1 /var/lib/docker/overlay2 /var/lib/docker/overlay2 rw,relatime - ext4 /dev/vda1 rw,seclabel,data=ordered +118 62 7:0 / /root/mnt rw,relatime shared:66 - ext4 /dev/loop0 rw,seclabel,data=ordered +121 115 0:38 / /var/lib/docker/overlay2/8cdbabf81bc89b14ea54eaf418c1922068f06917fff57e184aa26541ff291073/merged rw,relatime - overlay overlay rw,seclabel,lowerdir=/var/lib/docker/overlay2/l/CPD4XI7UD4GGTGSJVPQSHWZKTK:/var/lib/docker/overlay2/l/NQKORR3IS7KNQDER35AZECLH4Z,upperdir=/var/lib/docker/overlay2/8cdbabf81bc89b14ea54eaf418c1922068f06917fff57e184aa26541ff291073/diff,workdir=/var/lib/docker/overlay2/8cdbabf81bc89b14ea54eaf418c1922068f06917fff57e184aa26541ff291073/work +125 62 0:39 / /var/lib/docker/containers/5e3fce422957c291a5b502c2cf33d512fc1fcac424e4113136c808360e5b7215/shm rw,nosuid,nodev,noexec,relatime shared:68 - tmpfs shm rw,seclabel,size=65536k +186 24 0:3 / /run/docker/netns/0a08e7496c6d rw,nosuid,nodev,noexec,relatime shared:5 - proc proc rw +130 62 0:15 / /root/chroot/selinux rw,relatime shared:22 - selinuxfs selinuxfs rw +109 24 0:37 / /run/user/0 rw,nosuid,nodev,relatime shared:62 - tmpfs tmpfs rw,seclabel,size=801032k,mode=700 +` + s := bufio.NewScanner(bytes.NewBuffer([]byte(mountinfo))) + for _, expected := range []string{"/sys/fs/selinux", "/root/chroot/selinux", ""} { + mnt := findSELinuxfsMount(s) + t.Logf("found %q", mnt) + if mnt != expected { + t.Fatalf("expected %q, got %q", expected, mnt) + } + } +} + +func TestSecurityCheckContext(t *testing.T) { + if !GetEnabled() { + t.Skip("SELinux not enabled, skipping.") + } + + // check with valid context + context, err := CurrentLabel() + if err != nil { + t.Fatalf("CurrentLabel() error: %v", err) + } + if context != "" { + t.Logf("SecurityCheckContext(%q)", context) + err = SecurityCheckContext(context) + if err != nil { + t.Errorf("SecurityCheckContext(%q) error: %v", context, err) + } + } + + context = "not-syntactically-valid" + err = SecurityCheckContext(context) + if err == nil { + t.Errorf("SecurityCheckContext(%q) succeeded, expected to fail", context) + } +} + +func TestClassIndex(t *testing.T) { + if !GetEnabled() { + t.Skip("SELinux not enabled, skipping.") + } + + idx, err := ClassIndex("process") + if err != nil { + t.Errorf("Classindex error: %v", err) + } + // Every known policy has process as index 2, but it isn't guaranteed + if idx != 2 { + t.Errorf("ClassIndex unexpected answer %d, possibly not reference policy", idx) + } + + _, err = ClassIndex("foobar") + if err == nil { + t.Errorf("ClassIndex(\"foobar\") succeeded, expected to fail:") + } +} + +func TestComputeCreateContext(t *testing.T) { + if !GetEnabled() { + t.Skip("SELinux not enabled, skipping.") + } + + // This may or may not be in the loaded policy but any refpolicy based policy should have it + init := "system_u:system_r:init_t:s0" + tmp := "system_u:object_r:tmp_t:s0" + file := "file" + t.Logf("ComputeCreateContext(%s, %s, %s)", init, tmp, file) + context, err := ComputeCreateContext(init, tmp, file) + if err != nil { + t.Errorf("ComputeCreateContext error: %v", err) + } + if context != "system_u:object_r:init_tmp_t:s0" { + t.Errorf("ComputeCreateContext unexpected answer %s, possibly not reference policy", context) + } + + badcon := "badcon" + process := "process" + // Test to ensure that a bad context returns an error + t.Logf("ComputeCreateContext(%s, %s, %s)", badcon, tmp, process) + _, err = ComputeCreateContext(badcon, tmp, process) + if err == nil { + t.Errorf("ComputeCreateContext(%s, %s, %s) succeeded, expected failure", badcon, tmp, process) + } +} + +func TestGlbLub(t *testing.T) { + tests := []struct { + expectedErr error + sourceRange string + targetRange string + expectedRange string + }{ + { + sourceRange: "s0:c0.c100-s10:c0.c150", + targetRange: "s5:c50.c100-s15:c0.c149", + expectedRange: "s5:c50.c100-s10:c0.c149", + }, + { + sourceRange: "s5:c50.c100-s15:c0.c149", + targetRange: "s0:c0.c100-s10:c0.c150", + expectedRange: "s5:c50.c100-s10:c0.c149", + }, + { + sourceRange: "s0:c0.c100-s10:c0.c150", + targetRange: "s0", + expectedRange: "s0", + }, + { + sourceRange: "s6:c0.c1023", + targetRange: "s6:c0,c2,c11,c201.c429,c431.c511", + expectedRange: "s6:c0,c2,c11,c201.c429,c431.c511", + }, + { + sourceRange: "s0-s15:c0.c1023", + targetRange: "s6:c0,c2,c11,c201.c429,c431.c511", + expectedRange: "s6-s6:c0,c2,c11,c201.c429,c431.c511", + }, + { + sourceRange: "s0:c0.c100,c125,c140,c150-s10", + targetRange: "s4:c0.c50,c140", + expectedRange: "s4:c0.c50,c140-s4", + }, + { + sourceRange: "s5:c512.c550,c552.c1023-s5:c0.c550,c552.c1023", + targetRange: "s5:c512.c550,c553.c1023-s5:c0,c1,c4,c5,c6,c512.c550,c553.c1023", + expectedRange: "s5:c512.c550,c553.c1023-s5:c0,c1,c4.c6,c512.c550,c553.c1023", + }, + { + sourceRange: "s5:c512.c540,c542,c543,c552.c1023-s5:c0.c550,c552.c1023", + targetRange: "s5:c512.c550,c553.c1023-s5:c0,c1,c4,c5,c6,c512.c550,c553.c1023", + expectedRange: "s5:c512.c540,c542,c543,c553.c1023-s5:c0,c1,c4.c6,c512.c550,c553.c1023", + }, + { + sourceRange: "s5:c50.c100-s15:c0.c149", + targetRange: "s5:c512.c550,c552.c1023-s5:c0.c550,c552.c1023", + expectedRange: "s5-s5:c0.c149", + }, + { + sourceRange: "s5-s15", + targetRange: "s6-s7", + expectedRange: "s6-s7", + }, + { + sourceRange: "s5:c50.c100-s15:c0.c149", + targetRange: "s4-s4:c0.c1023", + expectedErr: ErrIncomparable, + }, + { + sourceRange: "s4-s4:c0.c1023", + targetRange: "s5:c50.c100-s15:c0.c149", + expectedErr: ErrIncomparable, + }, + { + sourceRange: "s4-s4:c0.c1023.c10000", + targetRange: "s5:c50.c100-s15:c0.c149", + expectedErr: strconv.ErrSyntax, + }, + { + sourceRange: "s4-s4:c0.c1023.c10000-s4", + targetRange: "s5:c50.c100-s15:c0.c149-s5", + expectedErr: strconv.ErrSyntax, + }, + { + sourceRange: "4-4", + targetRange: "s5:c50.c100-s15:c0.c149", + expectedErr: ErrLevelSyntax, + }, + { + sourceRange: "t4-t4", + targetRange: "s5:c50.c100-s15:c0.c149", + expectedErr: ErrLevelSyntax, + }, + { + sourceRange: "s5:x50.x100-s15:c0.c149", + targetRange: "s5:c50.c100-s15:c0.c149", + expectedErr: ErrLevelSyntax, + }, + } + + for _, tt := range tests { + got, err := CalculateGlbLub(tt.sourceRange, tt.targetRange) + if !errors.Is(err, tt.expectedErr) { + // Go 1.13 strconv errors are not unwrappable, + // so do that manually. + // TODO remove this once we stop supporting Go 1.13. + var numErr *strconv.NumError + if errors.As(err, &numErr) && numErr.Err == tt.expectedErr { //nolint:errorlint // see above + continue + } + t.Fatalf("want %q got %q: src: %q tgt: %q", tt.expectedErr, err, tt.sourceRange, tt.targetRange) + } + + if got != tt.expectedRange { + t.Errorf("want %q got %q", tt.expectedRange, got) + } + } +} + +func TestContextWithLevel(t *testing.T) { + want := "bob:sysadm_r:sysadm_t:SystemLow-SystemHigh" + + goodDefaultBuff := ` +foo_r:foo_t:s0 sysadm_r:sysadm_t:s0 +staff_r:staff_t:s0 baz_r:baz_t:s0 sysadm_r:sysadm_t:s0 +` + + verifier := func(con string) error { + if con != want { + return fmt.Errorf("invalid context %s", con) + } + + return nil + } + + tests := []struct { + name, userBuff, defaultBuff string + }{ + { + name: "match exists in user context file", + userBuff: `# COMMENT +foo_r:foo_t:s0 sysadm_r:sysadm_t:s0 + +staff_r:staff_t:s0 baz_r:baz_t:s0 sysadm_r:sysadm_t:s0 +`, + defaultBuff: goodDefaultBuff, + }, + { + name: "match exists in default context file, but not in user file", + userBuff: `# COMMENT +foo_r:foo_t:s0 sysadm_r:sysadm_t:s0 +fake_r:fake_t:s0 baz_r:baz_t:s0 sysadm_r:sysadm_t:s0 +`, + defaultBuff: goodDefaultBuff, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := defaultSECtx{ + user: "bob", + level: "SystemLow-SystemHigh", + scon: "system_u:staff_r:staff_t:s0", + userRdr: bytes.NewBufferString(tt.userBuff), + defaultRdr: bytes.NewBufferString(tt.defaultBuff), + verifier: verifier, + } + + got, err := getDefaultContextFromReaders(&c) + if err != nil { + t.Fatalf("err should not exist but is: %v", err) + } + + if got != want { + t.Fatalf("got context: %q but expected %q", got, want) + } + }) + } + + t.Run("no match in user or default context files", func(t *testing.T) { + badUserBuff := "" + + badDefaultBuff := ` + foo_r:foo_t:s0 sysadm_r:sysadm_t:s0 + dne_r:dne_t:s0 baz_r:baz_t:s0 sysadm_r:sysadm_t:s0 + ` + c := defaultSECtx{ + user: "bob", + level: "SystemLow-SystemHigh", + scon: "system_u:staff_r:staff_t:s0", + userRdr: bytes.NewBufferString(badUserBuff), + defaultRdr: bytes.NewBufferString(badDefaultBuff), + verifier: verifier, + } + + _, err := getDefaultContextFromReaders(&c) + if err == nil { + t.Fatalf("err was expected") + } + }) +} + +func BenchmarkChcon(b *testing.B) { + file, err := filepath.Abs(os.Args[0]) + if err != nil { + b.Fatalf("filepath.Abs: %v", err) + } + dir := filepath.Dir(file) + con, err := FileLabel(file) + if err != nil { + b.Fatalf("FileLabel(%q): %v", file, err) + } + b.Logf("Chcon(%q, %q)", dir, con) + b.ResetTimer() + for n := 0; n < b.N; n++ { + if err := Chcon(dir, con, true); err != nil { + b.Fatal(err) + } + } +} + +func BenchmarkCurrentLabel(b *testing.B) { + var ( + l string + err error + ) + for n := 0; n < b.N; n++ { + l, err = CurrentLabel() + if err != nil { + b.Fatal(err) + } + } + b.Log(l) +} + +func BenchmarkReadConfig(b *testing.B) { + str := "" + for n := 0; n < b.N; n++ { + str = readConfig(selinuxTypeTag) + } + b.Log(str) +} + +func BenchmarkLoadLabels(b *testing.B) { + for n := 0; n < b.N; n++ { + loadLabels() + } +} diff --git a/internal/third_party/selinux/go-selinux/selinux_stub.go b/internal/third_party/selinux/go-selinux/selinux_stub.go new file mode 100644 index 00000000000..267921239c2 --- /dev/null +++ b/internal/third_party/selinux/go-selinux/selinux_stub.go @@ -0,0 +1,159 @@ +//go:build !linux +// +build !linux + +package selinux + +func attrPath(string) string { + return "" +} + +func readConThreadSelf(string) (string, error) { + return "", nil +} + +func writeConThreadSelf(string, string) error { + return nil +} + +func setDisabled() {} + +func getEnabled() bool { + return false +} + +func classIndex(string) (int, error) { + return -1, nil +} + +func setFileLabel(string, string) error { + return nil +} + +func lSetFileLabel(string, string) error { + return nil +} + +func fileLabel(string) (string, error) { + return "", nil +} + +func lFileLabel(string) (string, error) { + return "", nil +} + +func setFSCreateLabel(string) error { + return nil +} + +func fsCreateLabel() (string, error) { + return "", nil +} + +func currentLabel() (string, error) { + return "", nil +} + +func pidLabel(int) (string, error) { + return "", nil +} + +func execLabel() (string, error) { + return "", nil +} + +func canonicalizeContext(string) (string, error) { + return "", nil +} + +func computeCreateContext(string, string, string) (string, error) { + return "", nil +} + +func calculateGlbLub(string, string) (string, error) { + return "", nil +} + +func peerLabel(uintptr) (string, error) { + return "", nil +} + +func setKeyLabel(string) error { + return nil +} + +func keyLabel() (string, error) { + return "", nil +} + +func (c Context) get() string { + return "" +} + +func newContext(string) (Context, error) { + return Context{}, nil +} + +func clearLabels() { +} + +func reserveLabel(string) { +} + +func isMLSEnabled() bool { + return false +} + +func enforceMode() int { + return Disabled +} + +func setEnforceMode(int) error { + return nil +} + +func defaultEnforceMode() int { + return Disabled +} + +func releaseLabel(string) { +} + +func roFileLabel() string { + return "" +} + +func kvmContainerLabels() (string, string) { + return "", "" +} + +func initContainerLabels() (string, string) { + return "", "" +} + +func containerLabels() (string, string) { + return "", "" +} + +func securityCheckContext(string) error { + return nil +} + +func copyLevel(string, string) (string, error) { + return "", nil +} + +func chcon(string, string, bool) error { + return nil +} + +func dupSecOpt(string) ([]string, error) { + return nil, nil +} + +func getDefaultContextWithLevel(string, string, string) (string, error) { + return "", nil +} + +func label(_ string) string { + return "" +} diff --git a/internal/third_party/selinux/go-selinux/selinux_stub_test.go b/internal/third_party/selinux/go-selinux/selinux_stub_test.go new file mode 100644 index 00000000000..19ea636a49a --- /dev/null +++ b/internal/third_party/selinux/go-selinux/selinux_stub_test.go @@ -0,0 +1,127 @@ +//go:build !linux +// +build !linux + +package selinux + +import ( + "testing" +) + +const testLabel = "foobar" + +func TestSELinuxStubs(t *testing.T) { + if GetEnabled() { + t.Error("SELinux enabled on non-linux.") + } + + tmpDir := t.TempDir() + if _, err := FileLabel(tmpDir); err != nil { + t.Error(err) + } + + if err := SetFileLabel(tmpDir, testLabel); err != nil { + t.Error(err) + } + + if _, err := LfileLabel(tmpDir); err != nil { + t.Error(err) + } + if err := LsetFileLabel(tmpDir, testLabel); err != nil { + t.Error(err) + } + + if err := SetFSCreateLabel(testLabel); err != nil { + t.Error(err) + } + + if _, err := FSCreateLabel(); err != nil { + t.Error(err) + } + if _, err := CurrentLabel(); err != nil { + t.Error(err) + } + + if _, err := PidLabel(0); err != nil { + t.Error(err) + } + + ClearLabels() + + ReserveLabel(testLabel) + ReleaseLabel(testLabel) + if _, err := DupSecOpt(testLabel); err != nil { + t.Error(err) + } + if v := DisableSecOpt(); len(v) != 1 || v[0] != "disable" { + t.Errorf(`expected "disabled", got %v`, v) + } + SetDisabled() + if enabled := GetEnabled(); enabled { + t.Error("Should not be enabled") + } + if err := SetExecLabel(testLabel); err != nil { + t.Error(err) + } + if err := SetTaskLabel(testLabel); err != nil { + t.Error(err) + } + if _, err := ExecLabel(); err != nil { + t.Error(err) + } + if _, err := CanonicalizeContext(testLabel); err != nil { + t.Error(err) + } + if _, err := ComputeCreateContext("foo", "bar", testLabel); err != nil { + t.Error(err) + } + if err := SetSocketLabel(testLabel); err != nil { + t.Error(err) + } + if _, err := ClassIndex(testLabel); err != nil { + t.Error(err) + } + if _, err := SocketLabel(); err != nil { + t.Error(err) + } + if _, err := PeerLabel(0); err != nil { + t.Error(err) + } + if err := SetKeyLabel(testLabel); err != nil { + t.Error(err) + } + if _, err := KeyLabel(); err != nil { + t.Error(err) + } + if err := SetExecLabel(testLabel); err != nil { + t.Error(err) + } + if _, err := ExecLabel(); err != nil { + t.Error(err) + } + con, err := NewContext(testLabel) + if err != nil { + t.Error(err) + } + con.Get() + if err = SetEnforceMode(1); err != nil { + t.Error(err) + } + if v := DefaultEnforceMode(); v != Disabled { + t.Errorf("expected %d, got %d", Disabled, v) + } + if v := EnforceMode(); v != Disabled { + t.Errorf("expected %d, got %d", Disabled, v) + } + if v := ROFileLabel(); v != "" { + t.Errorf(`expected "", got %q`, v) + } + if processLbl, fileLbl := ContainerLabels(); processLbl != "" || fileLbl != "" { + t.Errorf(`expected fileLbl="", fileLbl="" got processLbl=%q, fileLbl=%q`, processLbl, fileLbl) + } + if err = SecurityCheckContext(testLabel); err != nil { + t.Error(err) + } + if _, err = CopyLevel("foo", "bar"); err != nil { + t.Error(err) + } +} diff --git a/internal/third_party/selinux/go-selinux/xattrs_linux.go b/internal/third_party/selinux/go-selinux/xattrs_linux.go new file mode 100644 index 00000000000..559c851075e --- /dev/null +++ b/internal/third_party/selinux/go-selinux/xattrs_linux.go @@ -0,0 +1,71 @@ +package selinux + +import ( + "golang.org/x/sys/unix" +) + +// lgetxattr returns a []byte slice containing the value of +// an extended attribute attr set for path. +func lgetxattr(path, attr string) ([]byte, error) { + // Start with a 128 length byte array + dest := make([]byte, 128) + sz, errno := doLgetxattr(path, attr, dest) + for errno == unix.ERANGE { //nolint:errorlint // unix errors are bare + // Buffer too small, use zero-sized buffer to get the actual size + sz, errno = doLgetxattr(path, attr, []byte{}) + if errno != nil { + return nil, errno + } + + dest = make([]byte, sz) + sz, errno = doLgetxattr(path, attr, dest) + } + if errno != nil { + return nil, errno + } + + return dest[:sz], nil +} + +// doLgetxattr is a wrapper that retries on EINTR +func doLgetxattr(path, attr string, dest []byte) (int, error) { + for { + sz, err := unix.Lgetxattr(path, attr, dest) + if err != unix.EINTR { + return sz, err + } + } +} + +// getxattr returns a []byte slice containing the value of +// an extended attribute attr set for path. +func getxattr(path, attr string) ([]byte, error) { + // Start with a 128 length byte array + dest := make([]byte, 128) + sz, errno := dogetxattr(path, attr, dest) + for errno == unix.ERANGE { //nolint:errorlint // unix errors are bare + // Buffer too small, use zero-sized buffer to get the actual size + sz, errno = dogetxattr(path, attr, []byte{}) + if errno != nil { + return nil, errno + } + + dest = make([]byte, sz) + sz, errno = dogetxattr(path, attr, dest) + } + if errno != nil { + return nil, errno + } + + return dest[:sz], nil +} + +// dogetxattr is a wrapper that retries on EINTR +func dogetxattr(path, attr string, dest []byte) (int, error) { + for { + sz, err := unix.Getxattr(path, attr, dest) + if err != unix.EINTR { + return sz, err + } + } +} diff --git a/internal/third_party/selinux/go.mod b/internal/third_party/selinux/go.mod new file mode 100644 index 00000000000..24d3261a942 --- /dev/null +++ b/internal/third_party/selinux/go.mod @@ -0,0 +1,8 @@ +module github.com/opencontainers/selinux + +go 1.19 + +require ( + github.com/cyphar/filepath-securejoin v0.5.0 + golang.org/x/sys v0.18.0 +) diff --git a/internal/third_party/selinux/go.sum b/internal/third_party/selinux/go.sum new file mode 100644 index 00000000000..b9ae09877a4 --- /dev/null +++ b/internal/third_party/selinux/go.sum @@ -0,0 +1,8 @@ +github.com/cyphar/filepath-securejoin v0.5.0 h1:hIAhkRBMQ8nIeuVwcAoymp7MY4oherZdAxD+m0u9zaw= +github.com/cyphar/filepath-securejoin v0.5.0/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/internal/third_party/selinux/pkg/pwalk/README.md b/internal/third_party/selinux/pkg/pwalk/README.md new file mode 100644 index 00000000000..a060ad364cd --- /dev/null +++ b/internal/third_party/selinux/pkg/pwalk/README.md @@ -0,0 +1,52 @@ +## pwalk: parallel implementation of filepath.Walk + +This is a wrapper for [filepath.Walk](https://pkg.go.dev/path/filepath?tab=doc#Walk) +which may speed it up by calling multiple callback functions (WalkFunc) in parallel, +utilizing goroutines. + +By default, it utilizes 2\*runtime.NumCPU() goroutines for callbacks. +This can be changed by using WalkN function which has the additional +parameter, specifying the number of goroutines (concurrency). + +### pwalk vs pwalkdir + +This package is deprecated in favor of +[pwalkdir](https://pkg.go.dev/github.com/opencontainers/selinux/pkg/pwalkdir), +which is faster, but requires at least Go 1.16. + +### Caveats + +Please note the following limitations of this code: + +* Unlike filepath.Walk, the order of calls is non-deterministic; + +* Only primitive error handling is supported: + + * filepath.SkipDir is not supported; + + * ErrNotExist errors from filepath.Walk are silently ignored for any path + except the top directory (Walk argument); any other error is returned to + the caller of Walk; + + * no errors are ever passed to WalkFunc; + + * once any error is returned from any WalkFunc instance, no more new calls + to WalkFunc are made, and the error is returned to the caller of Walk; + + * if more than one walkFunc instance will return an error, only one + of such errors will be propagated and returned by Walk, others + will be silently discarded. + +### Documentation + +For the official documentation, see +https://pkg.go.dev/github.com/opencontainers/selinux/pkg/pwalk?tab=doc + +### Benchmarks + +For a WalkFunc that consists solely of the return statement, this +implementation is about 10% slower than the standard library's +filepath.Walk. + +Otherwise (if a WalkFunc is doing something) this is usually faster, +except when the WalkN(..., 1) is used. diff --git a/internal/third_party/selinux/pkg/pwalk/pwalk.go b/internal/third_party/selinux/pkg/pwalk/pwalk.go new file mode 100644 index 00000000000..686c8bac32c --- /dev/null +++ b/internal/third_party/selinux/pkg/pwalk/pwalk.go @@ -0,0 +1,131 @@ +package pwalk + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "runtime" + "sync" +) + +// WalkFunc is the type of the function called by Walk to visit each +// file or directory. It is an alias for [filepath.WalkFunc]. +// +// Deprecated: use [github.com/opencontainers/selinux/pkg/pwalkdir] and [fs.WalkDirFunc]. +type WalkFunc = filepath.WalkFunc + +// Walk is a wrapper for filepath.Walk which can call multiple walkFn +// in parallel, allowing to handle each item concurrently. A maximum of +// twice the runtime.NumCPU() walkFn will be called at any one time. +// If you want to change the maximum, use WalkN instead. +// +// The order of calls is non-deterministic. +// +// Note that this implementation only supports primitive error handling: +// +// - no errors are ever passed to walkFn; +// +// - once a walkFn returns any error, all further processing stops +// and the error is returned to the caller of Walk; +// +// - filepath.SkipDir is not supported; +// +// - if more than one walkFn instance will return an error, only one +// of such errors will be propagated and returned by Walk, others +// will be silently discarded. +// +// Deprecated: use [github.com/opencontainers/selinux/pkg/pwalkdir.Walk] +func Walk(root string, walkFn WalkFunc) error { + return WalkN(root, walkFn, runtime.NumCPU()*2) +} + +// WalkN is a wrapper for filepath.Walk which can call multiple walkFn +// in parallel, allowing to handle each item concurrently. A maximum of +// num walkFn will be called at any one time. +// +// Please see Walk documentation for caveats of using this function. +// +// Deprecated: use [github.com/opencontainers/selinux/pkg/pwalkdir.WalkN] +func WalkN(root string, walkFn WalkFunc, num int) error { + // make sure limit is sensible + if num < 1 { + return fmt.Errorf("walk(%q): num must be > 0", root) + } + + files := make(chan *walkArgs, 2*num) + errCh := make(chan error, 1) // get the first error, ignore others + + // Start walking a tree asap + var ( + err error + wg sync.WaitGroup + + rootLen = len(root) + rootEntry *walkArgs + ) + wg.Add(1) + go func() { + err = filepath.Walk(root, func(p string, info os.FileInfo, err error) error { + if err != nil { + // Walking a file tree can race with removal, + // so ignore ENOENT, except for root. + // https://github.com/opencontainers/selinux/issues/199. + if errors.Is(err, os.ErrNotExist) && len(p) != rootLen { + return nil + } + + close(files) + return err + } + if len(p) == rootLen { + // Root entry is processed separately below. + rootEntry = &walkArgs{path: p, info: &info} + return nil + } + // add a file to the queue unless a callback sent an error + select { + case e := <-errCh: + close(files) + return e + default: + files <- &walkArgs{path: p, info: &info} + return nil + } + }) + if err == nil { + close(files) + } + wg.Done() + }() + + wg.Add(num) + for i := 0; i < num; i++ { + go func() { + for file := range files { + if e := walkFn(file.path, *file.info, nil); e != nil { + select { + case errCh <- e: // sent ok + default: // buffer full + } + } + } + wg.Done() + }() + } + + wg.Wait() + + if err == nil { + err = walkFn(rootEntry.path, *rootEntry.info, nil) + } + + return err +} + +// walkArgs holds the arguments that were passed to the Walk or WalkN +// functions. +type walkArgs struct { + info *os.FileInfo + path string +} diff --git a/internal/third_party/selinux/pkg/pwalk/pwalk_test.go b/internal/third_party/selinux/pkg/pwalk/pwalk_test.go new file mode 100644 index 00000000000..9cca3b6b15a --- /dev/null +++ b/internal/third_party/selinux/pkg/pwalk/pwalk_test.go @@ -0,0 +1,236 @@ +package pwalk + +import ( + "errors" + "math/rand" + "os" + "path/filepath" + "runtime" + "sync/atomic" + "testing" + "time" +) + +func TestWalk(t *testing.T) { + var ac atomic.Uint32 + concurrency := runtime.NumCPU() * 2 + + dir, total := prepareTestSet(t, 3, 2, 1) + + err := WalkN(dir, + func(_ string, _ os.FileInfo, _ error) error { + ac.Add(1) + return nil + }, + concurrency) + if err != nil { + t.Errorf("Walk failed: %v", err) + } + count := ac.Load() + if count != total { + t.Errorf("File count mismatch: found %d, expected %d", count, total) + } + + t.Logf("concurrency: %d, files found: %d", concurrency, count) +} + +func TestWalkTopLevelErrNotExistNotIgnored(t *testing.T) { + if WalkN("non-existent-directory", cbEmpty, 8) == nil { + t.Fatal("expected ErrNotExist, got nil") + } +} + +// https://github.com/opencontainers/selinux/issues/199 +func TestWalkRaceWithRemoval(t *testing.T) { + var ac atomic.Uint32 + concurrency := runtime.NumCPU() * 2 + // This test is still on a best-effort basis, meaning it can still pass + // when there is a bug in the code, but the larger the test set is, the + // higher the probability that this test fails (without a fix). + // + // With this set (4, 5, 6), and the fix commented out, it fails + // 100 out of 100 runs on my machine. + dir, total := prepareTestSet(t, 4, 5, 6) + + // Race walk with removal. + go os.RemoveAll(dir) + err := WalkN(dir, + func(_ string, _ os.FileInfo, _ error) error { + ac.Add(1) + return nil + }, + concurrency) + count := int(ac.Load()) + t.Logf("found %d of %d files", count, total) + if err != nil { + t.Fatalf("expected nil, got %v", err) + } +} + +func TestWalkDirManyErrors(t *testing.T) { + var ac atomic.Uint32 + + dir, total := prepareTestSet(t, 3, 3, 2) + + maxFiles := total / 2 + e42 := errors.New("42") + err := Walk(dir, + func(_ string, _ os.FileInfo, _ error) error { + if ac.Add(1) > maxFiles { + return e42 + } + return nil + }) + count := ac.Load() + t.Logf("found %d of %d files", count, total) + + if err == nil { + t.Errorf("Walk succeeded, but error is expected") + if count != total { + t.Errorf("File count mismatch: found %d, expected %d", count, total) + } + } +} + +func makeManyDirs(prefix string, levels, dirs, files int) (count uint32, err error) { + for d := 0; d < dirs; d++ { + var dir string + dir, err = os.MkdirTemp(prefix, "d-") + if err != nil { + return count, err + } + count++ + for f := 0; f < files; f++ { + var fi *os.File + fi, err = os.CreateTemp(dir, "f-") + if err != nil { + return count, err + } + _ = fi.Close() + count++ + } + if levels == 0 { + continue + } + var c uint32 + if c, err = makeManyDirs(dir, levels-1, dirs, files); err != nil { + return count, err + } + count += c + } + + return count, err +} + +// prepareTestSet() creates a directory tree of shallow files, +// to be used for testing or benchmarking. +// +// Total dirs: dirs^levels + dirs^(levels-1) + ... + dirs^1 +// Total files: total_dirs * files +func prepareTestSet(tb testing.TB, levels, dirs, files int) (dir string, total uint32) { + tb.Helper() + var err error + + dir = tb.TempDir() + total, err = makeManyDirs(dir, levels, dirs, files) + if err != nil { + tb.Fatal(err) + } + total++ // this dir + + return dir, total +} + +type walkerFunc func(root string, walkFn WalkFunc) error + +func genWalkN(n int) walkerFunc { + return func(root string, walkFn WalkFunc) error { + return WalkN(root, walkFn, n) + } +} + +func BenchmarkWalk(b *testing.B) { + const ( + levels = 5 // how deep + dirs = 3 // dirs on each levels + files = 8 // files on each levels + ) + + benchmarks := []struct { + walk filepath.WalkFunc + name string + }{ + {name: "Empty", walk: cbEmpty}, + {name: "ReadFile", walk: cbReadFile}, + {name: "ChownChmod", walk: cbChownChmod}, + {name: "RandomSleep", walk: cbRandomSleep}, + } + + walkers := []struct { + walker walkerFunc + name string + }{ + {name: "filepath.Walk", walker: filepath.Walk}, + {name: "pwalk.Walk", walker: Walk}, + // test WalkN with various values of N + {name: "pwalk.Walk1", walker: genWalkN(1)}, + {name: "pwalk.Walk2", walker: genWalkN(2)}, + {name: "pwalk.Walk4", walker: genWalkN(4)}, + {name: "pwalk.Walk8", walker: genWalkN(8)}, + {name: "pwalk.Walk16", walker: genWalkN(16)}, + {name: "pwalk.Walk32", walker: genWalkN(32)}, + {name: "pwalk.Walk64", walker: genWalkN(64)}, + {name: "pwalk.Walk128", walker: genWalkN(128)}, + {name: "pwalk.Walk256", walker: genWalkN(256)}, + } + + dir, total := prepareTestSet(b, levels, dirs, files) + b.Logf("dataset: %d levels x %d dirs x %d files, total entries: %d", levels, dirs, files, total) + + for _, bm := range benchmarks { + for _, w := range walkers { + walker := w.walker + walkFn := bm.walk + // preheat + if err := w.walker(dir, bm.walk); err != nil { + b.Errorf("walk failed: %v", err) + } + // benchmark + b.Run(bm.name+"/"+w.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + if err := walker(dir, walkFn); err != nil { + b.Errorf("walk failed: %v", err) + } + } + }) + } + } +} + +func cbEmpty(_ string, _ os.FileInfo, _ error) error { + return nil +} + +func cbChownChmod(path string, info os.FileInfo, _ error) error { + _ = os.Chown(path, 0, 0) + mode := os.FileMode(0o644) + if info.Mode().IsDir() { + mode = os.FileMode(0o755) + } + _ = os.Chmod(path, mode) + + return nil +} + +func cbReadFile(path string, info os.FileInfo, _ error) error { + var err error + if info.Mode().IsRegular() { + _, err = os.ReadFile(path) + } + return err +} + +func cbRandomSleep(_ string, _ os.FileInfo, _ error) error { + time.Sleep(time.Duration(rand.Intn(500)) * time.Microsecond) //nolint:gosec // ignore G404: Use of weak random number generator + return nil +} diff --git a/internal/third_party/selinux/pkg/pwalkdir/README.md b/internal/third_party/selinux/pkg/pwalkdir/README.md new file mode 100644 index 00000000000..b827e7dd73f --- /dev/null +++ b/internal/third_party/selinux/pkg/pwalkdir/README.md @@ -0,0 +1,56 @@ +## pwalkdir: parallel implementation of filepath.WalkDir + +This is a wrapper for [filepath.WalkDir](https://pkg.go.dev/path/filepath#WalkDir) +which may speed it up by calling multiple callback functions (WalkDirFunc) +in parallel, utilizing goroutines. + +By default, it utilizes 2\*runtime.NumCPU() goroutines for callbacks. +This can be changed by using WalkN function which has the additional +parameter, specifying the number of goroutines (concurrency). + +### pwalk vs pwalkdir + +This package is very similar to +[pwalk](https://pkg.go.dev/github.com/opencontainers/selinux/pkg/pwalkdir), +but utilizes `filepath.WalkDir` (added to Go 1.16), which does not call stat(2) +on every entry and is therefore faster (up to 3x, depending on usage scenario). + +Users who are OK with requiring Go 1.16+ should switch to this +implementation. + +### Caveats + +Please note the following limitations of this code: + +* Unlike filepath.WalkDir, the order of calls is non-deterministic; + +* Only primitive error handling is supported: + + * fs.SkipDir is not supported; + + * ErrNotExist errors from filepath.WalkDir are silently ignored for any path + except the top directory (WalkDir argument); any other error is returned to + the caller of WalkDir; + + * once any error is returned from any walkDirFunc instance, no more calls + to WalkDirFunc are made, and the error is returned to the caller of WalkDir; + + * if more than one WalkDirFunc instance will return an error, only one + of such errors will be propagated to and returned by WalkDir, others + will be silently discarded. + +### Documentation + +For the official documentation, see +https://pkg.go.dev/github.com/opencontainers/selinux/pkg/pwalkdir + +### Benchmarks + +For a WalkDirFunc that consists solely of the return statement, this +implementation is about 15% slower than the standard library's +filepath.WalkDir. + +Otherwise (if a WalkDirFunc is actually doing something) this is usually +faster, except when the WalkDirN(..., 1) is used. Run `go test -bench .` +to see how different operations can benefit from it, as well as how the +level of parallelism affects the speed. diff --git a/internal/third_party/selinux/pkg/pwalkdir/pwalkdir.go b/internal/third_party/selinux/pkg/pwalkdir/pwalkdir.go new file mode 100644 index 00000000000..5d2d09a2985 --- /dev/null +++ b/internal/third_party/selinux/pkg/pwalkdir/pwalkdir.go @@ -0,0 +1,123 @@ +//go:build go1.16 +// +build go1.16 + +package pwalkdir + +import ( + "errors" + "fmt" + "io/fs" + "path/filepath" + "runtime" + "sync" +) + +// Walk is a wrapper for filepath.WalkDir which can call multiple walkFn +// in parallel, allowing to handle each item concurrently. A maximum of +// twice the runtime.NumCPU() walkFn will be called at any one time. +// If you want to change the maximum, use WalkN instead. +// +// The order of calls is non-deterministic. +// +// Note that this implementation only supports primitive error handling: +// +// - no errors are ever passed to walkFn; +// +// - once a walkFn returns any error, all further processing stops +// and the error is returned to the caller of Walk; +// +// - filepath.SkipDir is not supported; +// +// - if more than one walkFn instance will return an error, only one +// of such errors will be propagated and returned by Walk, others +// will be silently discarded. +func Walk(root string, walkFn fs.WalkDirFunc) error { + return WalkN(root, walkFn, runtime.NumCPU()*2) +} + +// WalkN is a wrapper for filepath.WalkDir which can call multiple walkFn +// in parallel, allowing to handle each item concurrently. A maximum of +// num walkFn will be called at any one time. +// +// Please see Walk documentation for caveats of using this function. +func WalkN(root string, walkFn fs.WalkDirFunc, num int) error { + // make sure limit is sensible + if num < 1 { + return fmt.Errorf("walk(%q): num must be > 0", root) + } + + files := make(chan *walkArgs, 2*num) + errCh := make(chan error, 1) // Get the first error, ignore others. + + // Start walking a tree asap. + var ( + err error + wg sync.WaitGroup + + rootLen = len(root) + rootEntry *walkArgs + ) + wg.Add(1) + go func() { + err = filepath.WalkDir(root, func(p string, entry fs.DirEntry, err error) error { + if err != nil { + // Walking a file tree can race with removal, + // so ignore ENOENT, except for root. + // https://github.com/opencontainers/selinux/issues/199. + if errors.Is(err, fs.ErrNotExist) && len(p) != rootLen { + return nil + } + close(files) + return err + } + if len(p) == rootLen { + // Root entry is processed separately below. + rootEntry = &walkArgs{path: p, entry: entry} + return nil + } + // Add a file to the queue unless a callback sent an error. + select { + case e := <-errCh: + close(files) + return e + default: + files <- &walkArgs{path: p, entry: entry} + return nil + } + }) + if err == nil { + close(files) + } + wg.Done() + }() + + wg.Add(num) + for i := 0; i < num; i++ { + go func() { + for file := range files { + if e := walkFn(file.path, file.entry, nil); e != nil { + select { + case errCh <- e: // sent ok + default: // buffer full + } + } + } + wg.Done() + }() + } + + wg.Wait() + + if err == nil { + err = walkFn(rootEntry.path, rootEntry.entry, nil) + } + + return err +} + +// walkArgs holds the arguments that were passed to the Walk or WalkN +// functions. +type walkArgs struct { + entry fs.DirEntry + path string +} diff --git a/internal/third_party/selinux/pkg/pwalkdir/pwalkdir_test.go b/internal/third_party/selinux/pkg/pwalkdir/pwalkdir_test.go new file mode 100644 index 00000000000..e66a80d1ab3 --- /dev/null +++ b/internal/third_party/selinux/pkg/pwalkdir/pwalkdir_test.go @@ -0,0 +1,239 @@ +//go:build go1.16 +// +build go1.16 + +package pwalkdir + +import ( + "errors" + "io/fs" + "math/rand" + "os" + "path/filepath" + "runtime" + "sync/atomic" + "testing" + "time" +) + +func TestWalkDir(t *testing.T) { + var ac atomic.Uint32 + concurrency := runtime.NumCPU() * 2 + dir, total := prepareTestSet(t, 3, 2, 1) + + err := WalkN(dir, + func(_ string, _ fs.DirEntry, _ error) error { + ac.Add(1) + return nil + }, + concurrency) + if err != nil { + t.Errorf("Walk failed: %v", err) + } + count := ac.Load() + if count != total { + t.Errorf("File count mismatch: found %d, expected %d", count, total) + } + + t.Logf("concurrency: %d, files found: %d", concurrency, count) +} + +func TestWalkDirTopLevelErrNotExistNotIgnored(t *testing.T) { + err := WalkN("non-existent-directory", cbEmpty, 8) + if err == nil { + t.Fatal("expected ErrNotExist, got nil") + } +} + +// https://github.com/opencontainers/selinux/issues/199 +func TestWalkDirRaceWithRemoval(t *testing.T) { + var ac atomic.Uint32 + concurrency := runtime.NumCPU() * 2 + // This test is still on a best-effort basis, meaning it can still pass + // when there is a bug in the code, but the larger the test set is, the + // higher the probability that this test fails (without a fix). + // + // With this set (4, 5, 6), and the fix commented out, it fails + // about 90 out of 100 runs on my machine. + dir, total := prepareTestSet(t, 4, 5, 6) + + // Make walk race with removal. + go os.RemoveAll(dir) + err := WalkN(dir, + func(_ string, _ fs.DirEntry, _ error) error { + ac.Add(1) + return nil + }, + concurrency) + count := ac.Load() + t.Logf("found %d of %d files", count, total) + if err != nil { + t.Fatalf("expected nil, got %v", err) + } +} + +func TestWalkDirManyErrors(t *testing.T) { + var ac atomic.Uint32 + dir, total := prepareTestSet(t, 3, 3, 2) + + maxFiles := total / 2 + e42 := errors.New("42") + err := Walk(dir, + func(_ string, _ fs.DirEntry, _ error) error { + if ac.Add(1) > maxFiles { + return e42 + } + return nil + }) + count := ac.Load() + t.Logf("found %d of %d files", count, total) + + if err == nil { + t.Error("Walk succeeded, but error is expected") + if count != total { + t.Errorf("File count mismatch: found %d, expected %d", count, total) + } + } +} + +func makeManyDirs(prefix string, levels, dirs, files int) (count uint32, err error) { + for d := 0; d < dirs; d++ { + var dir string + dir, err = os.MkdirTemp(prefix, "d-") + if err != nil { + return count, err + } + count++ + for f := 0; f < files; f++ { + var fi *os.File + fi, err = os.CreateTemp(dir, "f-") + if err != nil { + return count, err + } + fi.Close() + count++ + } + if levels == 0 { + continue + } + var c uint32 + if c, err = makeManyDirs(dir, levels-1, dirs, files); err != nil { + return count, err + } + count += c + } + + return count, err +} + +// prepareTestSet() creates a directory tree of shallow files, +// to be used for testing or benchmarking. +// +// Total dirs: dirs^levels + dirs^(levels-1) + ... + dirs^1 +// Total files: total_dirs * files +func prepareTestSet(tb testing.TB, levels, dirs, files int) (dir string, total uint32) { + tb.Helper() + var err error + + dir = tb.TempDir() + total, err = makeManyDirs(dir, levels, dirs, files) + if err != nil { + tb.Fatal(err) + } + total++ // this dir + + return dir, total +} + +type walkerFunc func(root string, walkFn fs.WalkDirFunc) error + +func genWalkN(n int) walkerFunc { + return func(root string, walkFn fs.WalkDirFunc) error { + return WalkN(root, walkFn, n) + } +} + +func BenchmarkWalk(b *testing.B) { + const ( + levels = 5 // how deep + dirs = 3 // dirs on each levels + files = 8 // files on each levels + ) + + benchmarks := []struct { + walk fs.WalkDirFunc + name string + }{ + {name: "Empty", walk: cbEmpty}, + {name: "ReadFile", walk: cbReadFile}, + {name: "ChownChmod", walk: cbChownChmod}, + {name: "RandomSleep", walk: cbRandomSleep}, + } + + walkers := []struct { + walker walkerFunc + name string + }{ + {name: "filepath.WalkDir", walker: filepath.WalkDir}, + {name: "pwalkdir.Walk", walker: Walk}, + // test WalkN with various values of N + {name: "pwalkdir.Walk1", walker: genWalkN(1)}, + {name: "pwalkdir.Walk2", walker: genWalkN(2)}, + {name: "pwalkdir.Walk4", walker: genWalkN(4)}, + {name: "pwalkdir.Walk8", walker: genWalkN(8)}, + {name: "pwalkdir.Walk16", walker: genWalkN(16)}, + {name: "pwalkdir.Walk32", walker: genWalkN(32)}, + {name: "pwalkdir.Walk64", walker: genWalkN(64)}, + {name: "pwalkdir.Walk128", walker: genWalkN(128)}, + {name: "pwalkdir.Walk256", walker: genWalkN(256)}, + } + + dir, total := prepareTestSet(b, levels, dirs, files) + b.Logf("dataset: %d levels x %d dirs x %d files, total entries: %d", levels, dirs, files, total) + + for _, bm := range benchmarks { + for _, w := range walkers { + walker := w.walker + walkFn := bm.walk + // preheat + if err := w.walker(dir, bm.walk); err != nil { + b.Errorf("walk failed: %v", err) + } + // benchmark + b.Run(bm.name+"/"+w.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + if err := walker(dir, walkFn); err != nil { + b.Errorf("walk failed: %v", err) + } + } + }) + } + } +} + +func cbEmpty(_ string, _ fs.DirEntry, _ error) error { + return nil +} + +func cbChownChmod(path string, e fs.DirEntry, _ error) error { + _ = os.Chown(path, 0, 0) + mode := os.FileMode(0o644) + if e.IsDir() { + mode = os.FileMode(0o755) + } + _ = os.Chmod(path, mode) + + return nil +} + +func cbReadFile(path string, e fs.DirEntry, _ error) error { + var err error + if e.Type().IsRegular() { + _, err = os.ReadFile(path) + } + return err +} + +func cbRandomSleep(_ string, _ fs.DirEntry, _ error) error { + time.Sleep(time.Duration(rand.Intn(500)) * time.Microsecond) //nolint:gosec // ignore G404: Use of weak random number generator + return nil +} diff --git a/libcontainer/apparmor/apparmor_linux.go b/libcontainer/apparmor/apparmor_linux.go index 17d36ed15a3..a3a8e93258e 100644 --- a/libcontainer/apparmor/apparmor_linux.go +++ b/libcontainer/apparmor/apparmor_linux.go @@ -6,6 +6,9 @@ import ( "os" "sync" + "golang.org/x/sys/unix" + + "github.com/opencontainers/runc/internal/pathrs" "github.com/opencontainers/runc/libcontainer/utils" ) @@ -36,19 +39,13 @@ func setProcAttr(attr, value string) error { // Under AppArmor you can only change your own attr, so there's no reason // to not use /proc/thread-self/ (instead of /proc//, like libapparmor // does). - attrPath, closer := utils.ProcThreadSelf(attrSubPath) - defer closer() - - f, err := os.OpenFile(attrPath, os.O_WRONLY, 0) + f, closer, err := pathrs.ProcThreadSelfOpen(attrSubPath, unix.O_WRONLY|unix.O_CLOEXEC) if err != nil { return err } + defer closer() defer f.Close() - if err := utils.EnsureProcHandle(f); err != nil { - return err - } - _, err = f.WriteString(value) return err } diff --git a/libcontainer/cgroups/devices/devicefilter_test.go b/libcontainer/cgroups/devices/devicefilter_test.go index 3a415a71eed..23ad92ea06e 100644 --- a/libcontainer/cgroups/devices/devicefilter_test.go +++ b/libcontainer/cgroups/devices/devicefilter_test.go @@ -120,14 +120,21 @@ block-8: 51: MovImm32 dst: r0 imm: 1 52: Exit block-9: -// /dev/pts (c, 136, wildcard, rwm, true) +// tuntap (c, 10, 200, rwm, true) 53: JNEImm dst: r2 off: -1 imm: 2 - 54: JNEImm dst: r4 off: -1 imm: 136 - 55: MovImm32 dst: r0 imm: 1 - 56: Exit + 54: JNEImm dst: r4 off: -1 imm: 10 + 55: JNEImm dst: r5 off: -1 imm: 200 + 56: MovImm32 dst: r0 imm: 1 + 57: Exit block-10: - 57: MovImm32 dst: r0 imm: 0 - 58: Exit +// /dev/pts (c, 136, wildcard, rwm, true) + 58: JNEImm dst: r2 off: -1 imm: 2 + 59: JNEImm dst: r4 off: -1 imm: 136 + 60: MovImm32 dst: r0 imm: 1 + 61: Exit +block-11: + 62: MovImm32 dst: r0 imm: 0 + 63: Exit ` var devices []*devices.Rule for _, device := range specconv.AllowedDevices { diff --git a/libcontainer/cgroups/devices/systemd.go b/libcontainer/cgroups/devices/systemd.go index 5e7e46ae250..5ec6cb4e0bc 100644 --- a/libcontainer/cgroups/devices/systemd.go +++ b/libcontainer/cgroups/devices/systemd.go @@ -23,10 +23,18 @@ func systemdProperties(r *configs.Resources, sdVer int) ([]systemdDbus.Property, } properties := []systemdDbus.Property{ + // When we later add DeviceAllow=/dev/foo properties, we are + // appending devices to the allow list for the unit. However, + // if this is an existing unit, it already has DeviceAllow= + // entries, and we need to clear them all before applying the + // new set. (We also do this for new units, mainly for safety + // to ensure we only enable the devices we expect.) + // + // To clear any existing DeviceAllow= rules, we have to add an + // empty DeviceAllow= property. + newProp("DeviceAllow", []deviceAllowEntry{}), // Always run in the strictest white-list mode. newProp("DevicePolicy", "strict"), - // Empty the DeviceAllow array before filling it. - newProp("DeviceAllow", []deviceAllowEntry{}), } // Figure out the set of rules. @@ -239,7 +247,7 @@ func allowAllDevices() []systemdDbus.Property { // Setting mode to auto and removing all DeviceAllow rules // results in allowing access to all devices. return []systemdDbus.Property{ - newProp("DevicePolicy", "auto"), newProp("DeviceAllow", []deviceAllowEntry{}), + newProp("DevicePolicy", "auto"), } } diff --git a/libcontainer/console_linux.go b/libcontainer/console_linux.go index e506853e45a..c93151bc6b2 100644 --- a/libcontainer/console_linux.go +++ b/libcontainer/console_linux.go @@ -1,43 +1,164 @@ package libcontainer import ( + "errors" + "fmt" "os" + "runtime" + "github.com/containerd/console" "golang.org/x/sys/unix" + + "github.com/opencontainers/runc/internal/linux" + "github.com/opencontainers/runc/internal/pathrs" + "github.com/opencontainers/runc/internal/sys" + "github.com/opencontainers/runc/libcontainer/utils" ) -// mount initializes the console inside the rootfs mounting with the specified mount label -// and applying the correct ownership of the console. -func mountConsole(slavePath string) error { - f, err := os.Create("/dev/console") - if err != nil && !os.IsExist(err) { - return err +// checkPtmxHandle checks that the given file handle points to a real +// /dev/pts/ptmx device inode on a real devpts mount. We cannot (trivially) +// check that it is *the* /dev/pts for the container itself, but this is good +// enough. +func checkPtmxHandle(ptmx *os.File) error { + //nolint:revive,staticcheck,nolintlint // ignore "don't use ALL_CAPS" warning // nolintlint is needed to work around the different lint configs + const ( + PTMX_MAJOR = 5 // from TTYAUX_MAJOR in + PTMX_MINOR = 2 // from mknod_ptmx in fs/devpts/inode.c + PTMX_INO = 2 // from mknod_ptmx in fs/devpts/inode.c + ) + return sys.VerifyInode(ptmx, func(stat *unix.Stat_t, statfs *unix.Statfs_t) error { + if statfs.Type != unix.DEVPTS_SUPER_MAGIC { + return fmt.Errorf("ptmx handle is not on a real devpts mount: super magic is %#x", statfs.Type) + } + if stat.Ino != PTMX_INO { + return fmt.Errorf("ptmx handle has wrong inode number: %v", stat.Ino) + } + if stat.Mode&unix.S_IFMT != unix.S_IFCHR || stat.Rdev != unix.Mkdev(PTMX_MAJOR, PTMX_MINOR) { + return fmt.Errorf("ptmx handle is not a real char ptmx device: ftype %#x %d:%d", + stat.Mode&unix.S_IFMT, unix.Major(stat.Rdev), unix.Minor(stat.Rdev)) + } + return nil + }) +} + +func isPtyNoIoctlError(err error) bool { + // The kernel converts -ENOIOCTLCMD to -ENOTTY automatically, but handle + // -EINVAL just in case (which some drivers do, include pty). + return errors.Is(err, unix.EINVAL) || errors.Is(err, unix.ENOTTY) +} + +func getPtyPeer(pty console.Console, unsafePeerPath string, flags int) (*os.File, error) { + peer, err := linux.GetPtyPeer(pty.Fd(), unsafePeerPath, flags) + if err == nil || !isPtyNoIoctlError(err) { + return peer, err } - if f != nil { - // Ensure permission bits (can be different because of umask). - if err := f.Chmod(0o666); err != nil { - return err + + // On pre-TIOCGPTPEER kernels (Linux < 4.13), we need to fallback to using + // the /dev/pts/$n path generated using TIOCGPTN. We can do some validation + // that the inode is correct because the Unix-98 pty has a consistent + // numbering scheme for the device number of the peer. + + peerNum, err := unix.IoctlGetUint32(int(pty.Fd()), unix.TIOCGPTN) + if err != nil { + return nil, fmt.Errorf("get peer number of pty: %w", err) + } + //nolint:revive,staticcheck,nolintlint // ignore "don't use ALL_CAPS" warning // nolintlint is needed to work around the different lint configs + const ( + UNIX98_PTY_SLAVE_MAJOR = 136 // from + ) + wantPeerDev := unix.Mkdev(UNIX98_PTY_SLAVE_MAJOR, peerNum) + + // Use O_PATH to avoid opening a bad inode before we validate it. + peerHandle, err := os.OpenFile(unsafePeerPath, unix.O_PATH|unix.O_CLOEXEC, 0) + if err != nil { + return nil, err + } + defer peerHandle.Close() + + if err := sys.VerifyInode(peerHandle, func(stat *unix.Stat_t, statfs *unix.Statfs_t) error { + if statfs.Type != unix.DEVPTS_SUPER_MAGIC { + return fmt.Errorf("pty peer handle is not on a real devpts mount: super magic is %#x", statfs.Type) + } + if stat.Mode&unix.S_IFMT != unix.S_IFCHR || stat.Rdev != wantPeerDev { + return fmt.Errorf("pty peer handle is not the real char device for pty %d: ftype %#x %d:%d", + peerNum, stat.Mode&unix.S_IFMT, unix.Major(stat.Rdev), unix.Minor(stat.Rdev)) } - f.Close() + return nil + }); err != nil { + return nil, err } - return mount(slavePath, "/dev/console", "bind", unix.MS_BIND, "") + + return pathrs.Reopen(peerHandle, flags) } -// dupStdio opens the slavePath for the console and dups the fds to the current -// processes stdio, fd 0,1,2. -func dupStdio(slavePath string) error { - fd, err := unix.Open(slavePath, unix.O_RDWR, 0) +// safeAllocPty returns a new (ptmx, peer pty) allocation for use inside a +// container. +func safeAllocPty() (pty console.Console, peer *os.File, Err error) { + // TODO: Use openat2(RESOLVE_NO_SYMLINKS|RESOLVE_NO_XDEV). + ptmxHandle, err := os.OpenFile("/dev/pts/ptmx", unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) + if err != nil { + return nil, nil, err + } + defer ptmxHandle.Close() + + if err := checkPtmxHandle(ptmxHandle); err != nil { + return nil, nil, fmt.Errorf("verify ptmx handle: %w", err) + } + + ptyFile, err := pathrs.Reopen(ptmxHandle, unix.O_RDWR|unix.O_NOCTTY) + if err != nil { + return nil, nil, fmt.Errorf("reopen ptmx to get new pty pair: %w", err) + } + // On success, the ownership is transferred to pty. + defer func() { + if Err != nil { + _ = ptyFile.Close() + } + }() + + pty, unsafePeerPath, err := console.NewPtyFromFile(ptyFile) if err != nil { - return &os.PathError{ - Op: "open", - Path: slavePath, - Err: err, + return nil, nil, err + } + defer func() { + if Err != nil { + _ = pty.Close() } + }() + + peer, err = getPtyPeer(pty, unsafePeerPath, unix.O_RDWR|unix.O_NOCTTY) + if err != nil { + return nil, nil, fmt.Errorf("failed to get peer end of newly-allocated console: %w", err) + } + return pty, peer, nil +} + +// mountConsole bind-mounts the provided pty on top of /dev/console so programs +// that operate on /dev/console operate on the correct container pty. +func mountConsole(peerPty *os.File) error { + console, err := os.OpenFile("/dev/console", unix.O_NOFOLLOW|unix.O_CREAT|unix.O_CLOEXEC, 0o666) + if err != nil { + return fmt.Errorf("create /dev/console mount target: %w", err) } + defer console.Close() + + dstFd, closer := utils.ProcThreadSelfFd(console.Fd()) + defer closer() + + mntSrc := &mountSource{ + Type: mountSourcePlain, + file: peerPty, + } + return mountViaFds(peerPty.Name(), mntSrc, "/dev/console", dstFd, "bind", unix.MS_BIND, "") +} + +// dupStdio replaces stdio with the given peerPty. +func dupStdio(peerPty *os.File) error { for _, i := range []int{0, 1, 2} { - if err := unix.Dup3(fd, i, 0); err != nil { + if err := unix.Dup3(int(peerPty.Fd()), i, 0); err != nil { return err } } + runtime.KeepAlive(peerPty) return nil } diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index c02116177ad..3a51ec1b1b9 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -324,16 +324,6 @@ func (c *Container) start(process *Process) (retErr error) { defer process.closeClonedExes() logsDone := parent.forwardChildLogs() - if logsDone != nil { - defer func() { - // Wait for log forwarder to finish. This depends on - // runc init closing the _LIBCONTAINER_LOGPIPE log fd. - err := <-logsDone - if err != nil && retErr == nil { - retErr = fmt.Errorf("unable to forward init logs: %w", err) - } - }() - } // Before starting "runc init", mark all non-stdio open files as O_CLOEXEC // to make sure we don't leak any files into "runc init". Any files to be @@ -348,6 +338,17 @@ func (c *Container) start(process *Process) (retErr error) { return fmt.Errorf("unable to start container process: %w", err) } + if logsDone != nil { + defer func() { + // Wait for log forwarder to finish. This depends on + // runc init closing the _LIBCONTAINER_LOGPIPE log fd. + err := <-logsDone + if err != nil && retErr == nil { + retErr = fmt.Errorf("unable to forward init logs: %w", err) + } + }() + } + if process.Init { c.fifo.Close() if c.config.Hooks != nil { @@ -1114,8 +1115,9 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa Value: c.config.RootlessEUID, }) - // write boottime and monotonic time ns offsets. - if c.config.TimeOffsets != nil { + // write boottime and monotonic time ns offsets only when we are not joining an existing time ns + _, joinExistingTime := nsMaps[configs.NEWTIME] + if !joinExistingTime && c.config.TimeOffsets != nil { var offsetSpec bytes.Buffer for clock, offset := range c.config.TimeOffsets { fmt.Fprintf(&offsetSpec, "%s %d %d\n", clock, offset.Secs, offset.Nanosecs) diff --git a/libcontainer/criu_linux.go b/libcontainer/criu_linux.go index 4c6ae71465f..819c49c3d6a 100644 --- a/libcontainer/criu_linux.go +++ b/libcontainer/criu_linux.go @@ -519,34 +519,9 @@ func (c *Container) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts) { } } -// makeCriuRestoreMountpoints makes the actual mountpoints for the -// restore using CRIU. This function is inspired from the code in -// rootfs_linux.go. -func (c *Container) makeCriuRestoreMountpoints(m *configs.Mount) error { - if m.Device == "cgroup" { - // No mount point(s) need to be created: - // - // * for v1, mount points are saved by CRIU because - // /sys/fs/cgroup is a tmpfs mount - // - // * for v2, /sys/fs/cgroup is a real mount, but - // the mountpoint appears as soon as /sys is mounted - return nil - } - // TODO: pass srcFD? Not sure if criu is impacted by issue #2484. - me := mountEntry{Mount: m} - // For all other filesystems, just make the target. - if _, err := createMountpoint(c.config.Rootfs, me); err != nil { - return fmt.Errorf("create criu restore mountpoint for %s mount: %w", me.Destination, err) - } - return nil -} - -// isPathInPrefixList is a small function for CRIU restore to make sure -// mountpoints, which are on a tmpfs, are not created in the roofs. -func isPathInPrefixList(path string, prefix []string) bool { - for _, p := range prefix { - if strings.HasPrefix(path, p+"/") { +func isOnTmpfs(path string, mounts []*configs.Mount) bool { + for _, m := range mounts { + if m.Device == "tmpfs" && strings.HasPrefix(path, m.Destination+"/") { return true } } @@ -560,17 +535,6 @@ func isPathInPrefixList(path string, prefix []string) bool { // This function also creates missing mountpoints as long as they // are not on top of a tmpfs, as CRIU will restore tmpfs content anyway. func (c *Container) prepareCriuRestoreMounts(mounts []*configs.Mount) error { - // First get a list of a all tmpfs mounts - tmpfs := []string{} - for _, m := range mounts { - switch m.Device { - case "tmpfs": - tmpfs = append(tmpfs, m.Destination) - } - } - // Now go through all mounts and create the mountpoints - // if the mountpoints are not on a tmpfs, as CRIU will - // restore the complete tmpfs content from its checkpoint. umounts := []string{} defer func() { for _, u := range umounts { @@ -586,28 +550,51 @@ func (c *Container) prepareCriuRestoreMounts(mounts []*configs.Mount) error { }) } }() + // Now go through all mounts and create the required mountpoints. for _, m := range mounts { - if !isPathInPrefixList(m.Destination, tmpfs) { - if err := c.makeCriuRestoreMountpoints(m); err != nil { + // No cgroup mount point(s) need to be created: + // * for v1, mount points are saved by CRIU because + // /sys/fs/cgroup is a tmpfs mount; + // * for v2, /sys/fs/cgroup is a real mount, but + // the mountpoint appears as soon as /sys is mounted. + if m.Device == "cgroup" { + continue + } + // If the mountpoint is on a tmpfs, skip it as CRIU will + // restore the complete tmpfs content from its checkpoint. + if isOnTmpfs(m.Destination, mounts) { + continue + } + me := mountEntry{Mount: m} + if err := me.createOpenMountpoint(c.config.Rootfs); err != nil { + return fmt.Errorf("create criu restore mountpoint for %s mount: %w", me.Destination, err) + } + if me.dstFile != nil { + defer me.dstFile.Close() + } + // If the mount point is a bind mount, we need to mount + // it now so that runc can create the necessary mount + // points for mounts in bind mounts. + // This also happens during initial container creation. + // Without this CRIU restore will fail + // See: https://github.com/opencontainers/runc/issues/2748 + // It is also not necessary to order the mount points + // because during initial container creation mounts are + // set up in the order they are configured. + if m.Device == "bind" { + if err := utils.WithProcfdFile(me.dstFile, func(dstFd string) error { + return mountViaFds(m.Source, nil, m.Destination, dstFd, "", unix.MS_BIND|unix.MS_REC, "") + }); err != nil { return err } - // If the mount point is a bind mount, we need to mount - // it now so that runc can create the necessary mount - // points for mounts in bind mounts. - // This also happens during initial container creation. - // Without this CRIU restore will fail - // See: https://github.com/opencontainers/runc/issues/2748 - // It is also not necessary to order the mount points - // because during initial container creation mounts are - // set up in the order they are configured. - if m.Device == "bind" { - if err := utils.WithProcfd(c.config.Rootfs, m.Destination, func(dstFd string) error { - return mountViaFds(m.Source, nil, m.Destination, dstFd, "", unix.MS_BIND|unix.MS_REC, "") - }); err != nil { - return err - } - umounts = append(umounts, m.Destination) - } + umounts = append(umounts, m.Destination) + } + if me.dstFile != nil { + // As this is being done in a loop, the defer earlier will be + // delayed until all mountpoints are handled -- for a config with + // many mountpoints this could result in a lot of open files. So we + // opportunistically close the file as well as deferring it. + _ = me.dstFile.Close() } } return nil @@ -1146,6 +1133,13 @@ func (c *Container) criuNotifications(resp *criurpc.CriuResp, process *Process, } // create a timestamp indicating when the restored checkpoint was started c.created = time.Now().UTC() + if !c.config.Namespaces.Contains(configs.NEWTIME) && + configs.IsNamespaceSupported(configs.NEWTIME) && + c.checkCriuVersion(31400) == nil { + // CRIU restores processes into a time namespace. + c.config.Namespaces = append(c.config.Namespaces, + configs.Namespace{Type: configs.NEWTIME}) + } if _, err := c.updateState(r); err != nil { return err } diff --git a/libcontainer/dmz/cloned_binary_linux.go b/libcontainer/dmz/cloned_binary_linux.go index 1c034e4e6e5..24eddb80645 100644 --- a/libcontainer/dmz/cloned_binary_linux.go +++ b/libcontainer/dmz/cloned_binary_linux.go @@ -10,6 +10,7 @@ import ( "github.com/sirupsen/logrus" "golang.org/x/sys/unix" + "github.com/opencontainers/runc/internal/pathrs" "github.com/opencontainers/runc/libcontainer/system" ) @@ -47,11 +48,15 @@ func sealMemfd(f **os.File) error { // errors because they are not needed and we want to continue // to work on older kernels. fd := (*f).Fd() - // F_SEAL_FUTURE_WRITE -- Linux 5.1 - _, _ = unix.FcntlInt(fd, unix.F_ADD_SEALS, unix.F_SEAL_FUTURE_WRITE) + + // Skip F_SEAL_FUTURE_WRITE, it is not needed because we alreadu use the + // stronger F_SEAL_WRITE (and is buggy on Linux <5.5 -- see kernel commit + // 05d351102dbe and ). + // F_SEAL_EXEC -- Linux 6.3 const F_SEAL_EXEC = 0x20 //nolint:revive // this matches the unix.* name _, _ = unix.FcntlInt(fd, unix.F_ADD_SEALS, F_SEAL_EXEC) + // Apply all original memfd seals. _, err := unix.FcntlInt(fd, unix.F_ADD_SEALS, baseMemfdSeals) return os.NewSyscallError("fcntl(F_ADD_SEALS)", err) @@ -67,7 +72,7 @@ func sealFile(f **os.File) error { // When sealing an O_TMPFILE-style descriptor we need to // re-open the path as O_PATH to clear the existing write // handle we have. - opath, err := os.OpenFile(fmt.Sprintf("/proc/self/fd/%d", (*f).Fd()), unix.O_PATH|unix.O_CLOEXEC, 0) + opath, err := pathrs.Reopen(*f, unix.O_PATH|unix.O_CLOEXEC) if err != nil { return fmt.Errorf("reopen tmpfile: %w", err) } diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index 1eb0279d9e0..d80922ddf99 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -5,6 +5,7 @@ import ( "encoding/json" "errors" "fmt" + "io" "net" "os" "path/filepath" @@ -21,6 +22,7 @@ import ( "github.com/vishvananda/netlink" "golang.org/x/sys/unix" + "github.com/opencontainers/runc/internal/pathrs" "github.com/opencontainers/runc/libcontainer/capabilities" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" @@ -378,12 +380,13 @@ func setupConsole(socket *os.File, config *initConfig, mount bool) error { // the UID owner of the console to be the user the process will run as (so // they can actually control their console). - pty, slavePath, err := console.NewPty() + pty, peerPty, err := safeAllocPty() if err != nil { return err } // After we return from here, we don't need the console anymore. defer pty.Close() + defer peerPty.Close() if config.ConsoleHeight != 0 && config.ConsoleWidth != 0 { err = pty.Resize(console.WinSize{ @@ -397,7 +400,7 @@ func setupConsole(socket *os.File, config *initConfig, mount bool) error { // Mount the console inside our rootfs. if mount { - if err := mountConsole(slavePath); err != nil { + if err := mountConsole(peerPty); err != nil { return err } } @@ -408,7 +411,7 @@ func setupConsole(socket *os.File, config *initConfig, mount bool) error { runtime.KeepAlive(pty) // Now, dup over all the things. - return dupStdio(slavePath) + return dupStdio(peerPty) } // syncParentReady sends to the given pipe a JSON payload which indicates that @@ -510,7 +513,12 @@ func setupUser(config *initConfig) error { // We don't need to use /proc/thread-self here because setgroups is a // per-userns file and thus is global to all threads in a thread-group. // This lets us avoid having to do runtime.LockOSThread. - setgroups, err := os.ReadFile("/proc/self/setgroups") + var setgroups []byte + setgroupsFile, err := pathrs.ProcSelfOpen("setgroups", unix.O_RDONLY) + if err == nil { + setgroups, err = io.ReadAll(setgroupsFile) + _ = setgroupsFile.Close() + } if err != nil && !os.IsNotExist(err) { return err } @@ -554,19 +562,16 @@ func setupUser(config *initConfig) error { // The ownership needs to match because it is created outside of the container and needs to be // localized. func fixStdioPermissions(u *user.ExecUser) error { - var null unix.Stat_t - if err := unix.Stat("/dev/null", &null); err != nil { - return &os.PathError{Op: "stat", Path: "/dev/null", Err: err} - } for _, file := range []*os.File{os.Stdin, os.Stdout, os.Stderr} { var s unix.Stat_t if err := unix.Fstat(int(file.Fd()), &s); err != nil { return &os.PathError{Op: "fstat", Path: file.Name(), Err: err} } - // Skip chown if uid is already the one we want or any of the STDIO descriptors - // were redirected to /dev/null. - if int(s.Uid) == u.Uid || s.Rdev == null.Rdev { + // Skip chown if: + // - uid is already the one we want, or + // - fd is opened to /dev/null. + if int(s.Uid) == u.Uid || isDevNull(&s) { continue } diff --git a/libcontainer/integration/exec_test.go b/libcontainer/integration/exec_test.go index e8a2dc53c80..c0fbd101ec8 100644 --- a/libcontainer/integration/exec_test.go +++ b/libcontainer/integration/exec_test.go @@ -14,12 +14,13 @@ import ( "syscall" "testing" + "github.com/opencontainers/runc/internal/linux" + "github.com/opencontainers/runc/internal/pathrs" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/systemd" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/internal/userns" - "github.com/opencontainers/runc/libcontainer/utils" "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" @@ -1695,11 +1696,9 @@ func TestFdLeaksSystemd(t *testing.T) { } func fdList(t *testing.T) []string { - procSelfFd, closer := utils.ProcThreadSelf("fd") - defer closer() - - fdDir, err := os.Open(procSelfFd) + fdDir, closer, err := pathrs.ProcThreadSelfOpen("fd/", unix.O_DIRECTORY|unix.O_CLOEXEC) ok(t, err) + defer closer() defer fdDir.Close() fds, err := fdDir.Readdirnames(-1) @@ -1738,8 +1737,10 @@ func testFdLeaks(t *testing.T, systemd bool) { count := 0 - procSelfFd, closer := utils.ProcThreadSelf("fd/") + procSelfFd, closer, err := pathrs.ProcThreadSelfOpen("fd/", unix.O_DIRECTORY|unix.O_CLOEXEC) + ok(t, err) defer closer() + defer procSelfFd.Close() next_fd: for _, fd1 := range fds1 { @@ -1748,7 +1749,7 @@ next_fd: continue next_fd } } - dst, _ := os.Readlink(filepath.Join(procSelfFd, fd1)) + dst, _ := linux.Readlinkat(procSelfFd, fd1) for _, ex := range excludedPaths { if ex == dst { continue next_fd diff --git a/libcontainer/internal/userns/userns_maps.c b/libcontainer/internal/userns/userns_maps_linux.c similarity index 99% rename from libcontainer/internal/userns/userns_maps.c rename to libcontainer/internal/userns/userns_maps_linux.c index 84f2c6188c3..fdb20aecad8 100644 --- a/libcontainer/internal/userns/userns_maps.c +++ b/libcontainer/internal/userns/userns_maps_linux.c @@ -1,3 +1,5 @@ +//go:build linux + #define _GNU_SOURCE #include #include diff --git a/libcontainer/mount_linux.go b/libcontainer/mount_linux.go index f2eaa937ee6..683b5e62425 100644 --- a/libcontainer/mount_linux.go +++ b/libcontainer/mount_linux.go @@ -6,6 +6,7 @@ import ( "io/fs" "os" "strconv" + "strings" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" @@ -45,6 +46,64 @@ type mountError struct { err error } +// int32plus is a collection of int types with >=32 bits. +type int32plus interface { + int | uint | int32 | uint32 | int64 | uint64 | uintptr +} + +// stringifyMountFlags converts mount(2) flags to a string that you can use in +// error messages. +func stringifyMountFlags[Int int32plus](flags Int) string { + flagNames := []struct { + name string + bits Int + }{ + {"MS_RDONLY", unix.MS_RDONLY}, + {"MS_NOSUID", unix.MS_NOSUID}, + {"MS_NODEV", unix.MS_NODEV}, + {"MS_NOEXEC", unix.MS_NOEXEC}, + {"MS_SYNCHRONOUS", unix.MS_SYNCHRONOUS}, + {"MS_REMOUNT", unix.MS_REMOUNT}, + {"MS_MANDLOCK", unix.MS_MANDLOCK}, + {"MS_DIRSYNC", unix.MS_DIRSYNC}, + {"MS_NOSYMFOLLOW", unix.MS_NOSYMFOLLOW}, + // No (1 << 9) flag. + {"MS_NOATIME", unix.MS_NOATIME}, + {"MS_NODIRATIME", unix.MS_NODIRATIME}, + {"MS_BIND", unix.MS_BIND}, + {"MS_MOVE", unix.MS_MOVE}, + {"MS_REC", unix.MS_REC}, + // MS_VERBOSE was deprecated and swapped to MS_SILENT. + {"MS_SILENT", unix.MS_SILENT}, + {"MS_POSIXACL", unix.MS_POSIXACL}, + {"MS_UNBINDABLE", unix.MS_UNBINDABLE}, + {"MS_PRIVATE", unix.MS_PRIVATE}, + {"MS_SLAVE", unix.MS_SLAVE}, + {"MS_SHARED", unix.MS_SHARED}, + {"MS_RELATIME", unix.MS_RELATIME}, + // MS_KERNMOUNT (1 << 22) is internal to the kernel. + {"MS_I_VERSION", unix.MS_I_VERSION}, + {"MS_STRICTATIME", unix.MS_STRICTATIME}, + {"MS_LAZYTIME", unix.MS_LAZYTIME}, + } + var ( + flagSet []string + seenBits Int + ) + for _, flag := range flagNames { + if flags&flag.bits == flag.bits { + seenBits |= flag.bits + flagSet = append(flagSet, flag.name) + } + } + // If there were any remaining flags specified we don't know the name of, + // just add them in an 0x... format. + if remaining := flags &^ seenBits; remaining != 0 { + flagSet = append(flagSet, "0x"+strconv.FormatUint(uint64(remaining), 16)) + } + return strings.Join(flagSet, "|") +} + // Error provides a string error representation. func (e *mountError) Error() string { out := e.op + " " @@ -62,7 +121,7 @@ func (e *mountError) Error() string { } if e.flags != uintptr(0) { - out += ", flags=0x" + strconv.FormatUint(uint64(e.flags), 16) + out += ", flags=" + stringifyMountFlags(e.flags) } if e.data != "" { out += ", data=" + e.data diff --git a/libcontainer/mount_linux_test.go b/libcontainer/mount_linux_test.go new file mode 100644 index 00000000000..6bddfc2f08a --- /dev/null +++ b/libcontainer/mount_linux_test.go @@ -0,0 +1,54 @@ +package libcontainer + +import ( + "testing" + + "golang.org/x/sys/unix" +) + +func TestStringifyMountFlags(t *testing.T) { + for _, test := range []struct { + name string + flags uintptr + expected string + }{ + {"Empty", 0, ""}, + // Single valid flags. + {"Single-MS_RDONLY", unix.MS_RDONLY, "MS_RDONLY"}, + {"Single-MS_NOSUID", unix.MS_NOSUID, "MS_NOSUID"}, + {"Single-MS_NODEV", unix.MS_NODEV, "MS_NODEV"}, + {"Single-MS_NOEXEC", unix.MS_NOEXEC, "MS_NOEXEC"}, + {"Single-MS_SYNCHRONOUS", unix.MS_SYNCHRONOUS, "MS_SYNCHRONOUS"}, + {"Single-MS_REMOUNT", unix.MS_REMOUNT, "MS_REMOUNT"}, + {"Single-MS_MANDLOCK", unix.MS_MANDLOCK, "MS_MANDLOCK"}, + {"Single-MS_DIRSYNC", unix.MS_DIRSYNC, "MS_DIRSYNC"}, + {"Single-MS_NOSYMFOLLOW", unix.MS_NOSYMFOLLOW, "MS_NOSYMFOLLOW"}, + {"Single-MS_NOATIME", unix.MS_NOATIME, "MS_NOATIME"}, + {"Single-MS_NODIRATIME", unix.MS_NODIRATIME, "MS_NODIRATIME"}, + {"Single-MS_BIND", unix.MS_BIND, "MS_BIND"}, + {"Single-MS_MOVE", unix.MS_MOVE, "MS_MOVE"}, + {"Single-MS_REC", unix.MS_REC, "MS_REC"}, + {"Single-MS_SILENT", unix.MS_SILENT, "MS_SILENT"}, + {"Single-MS_POSIXACL", unix.MS_POSIXACL, "MS_POSIXACL"}, + {"Single-MS_UNBINDABLE", unix.MS_UNBINDABLE, "MS_UNBINDABLE"}, + {"Single-MS_PRIVATE", unix.MS_PRIVATE, "MS_PRIVATE"}, + {"Single-MS_SLAVE", unix.MS_SLAVE, "MS_SLAVE"}, + {"Single-MS_SHARED", unix.MS_SHARED, "MS_SHARED"}, + {"Single-MS_RELATIME", unix.MS_RELATIME, "MS_RELATIME"}, + {"Single-MS_KERNMOUNT", unix.MS_KERNMOUNT, "0x400000"}, + {"Single-MS_I_VERSION", unix.MS_I_VERSION, "MS_I_VERSION"}, + {"Single-MS_STRICTATIME", unix.MS_STRICTATIME, "MS_STRICTATIME"}, + {"Single-MS_LAZYTIME", unix.MS_LAZYTIME, "MS_LAZYTIME"}, + // Invalid flag value. + {"Unknown-512", 1 << 9, "0x200"}, + // Multiple flag values at the same time. + {"Multiple-Valid1", unix.MS_RDONLY | unix.MS_REC | unix.MS_BIND, "MS_RDONLY|MS_BIND|MS_REC"}, + {"Multiple-Valid2", unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_REC | unix.MS_NODIRATIME | unix.MS_I_VERSION, "MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_NODIRATIME|MS_REC|MS_I_VERSION"}, + {"Multiple-Mixed", unix.MS_REC | unix.MS_BIND | (1 << 9) | (1 << 31), "MS_BIND|MS_REC|0x80000200"}, + } { + got := stringifyMountFlags(test.flags) + if got != test.expected { + t.Errorf("%s: stringifyMountFlags(0x%x) = %q, expected %q", test.name, test.flags, got, test.expected) + } + } +} diff --git a/libcontainer/notify_v2_linux.go b/libcontainer/notify_v2_linux.go index 821536c8da0..8d15833ab43 100644 --- a/libcontainer/notify_v2_linux.go +++ b/libcontainer/notify_v2_linux.go @@ -2,6 +2,7 @@ package libcontainer import ( "fmt" + "os" "path/filepath" "unsafe" @@ -40,7 +41,11 @@ func registerMemoryEventV2(cgDir, evName, cgEvName string) (<-chan struct{}, err for { n, err := unix.Read(fd, buffer[:]) + if err == unix.EINTR { //nolint:errorlint // unix errors are bare + continue + } if err != nil { + err = os.NewSyscallError("read", err) logrus.Warnf("unable to read event data from inotify, got error: %v", err) return } diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index fcbb54a3e41..8dab3caefa1 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -122,6 +122,46 @@ func (p *setnsProcess) signal(sig os.Signal) error { return unix.Kill(p.pid(), s) } +// tryResetCPUAffinity tries to reset the CPU affinity of the process +// identified by pid to include all possible CPUs (notwithstanding cgroup +// cpuset restrictions and isolated CPUs). +func tryResetCPUAffinity(pid int) { + // When resetting the CPU affinity, we want to match the configured cgroup + // cpuset (or the default set of all CPUs, if no cpuset is configured) + // rather than some more restrictive affinity we were spawned in (such as + // one that may have been inherited from systemd). The cpuset cgroup used + // to reconfigure the cpumask automatically for joining processes, but + // kcommit da019032819a ("sched: Enforce user requested affinity") changed + // this behaviour in Linux 6.2. + // + // Parsing cpuset.cpus.effective is quite inefficient (and looking at + // things like /proc/stat would be wrong for most nested containers), but + // luckily sched_setaffinity(2) will implicitly: + // + // * Clamp the cpumask so that it matches the current number of CPUs on + // the system. + // * Mask out any CPUs that are not a member of the target task's + // configured cgroup cpuset. + // + // So we can just pass a very large array of set cpumask bits and the + // kernel will silently convert that to the correct value very cheaply. + + // Ideally, we would just set the array to 0xFF...FF. Unfortunately, the + // size depends on the architecture. It is also a private newtype, so we + // can't use (^0) or generics since those require us to be able to name the + // type. However, we can just underflow the zero value instead. + // TODO: Once is merged, switch to that. + cpuset := unix.CPUSet{} + for i := range cpuset { + cpuset[i]-- // underflow to 0xFF..FF + } + if err := unix.SchedSetaffinity(pid, &cpuset); err != nil { + logrus.WithError( + os.NewSyscallError("sched_setaffinity", err), + ).Warnf("resetting the CPU affinity of pid %d failed -- the container process may inherit runc's CPU affinity", pid) + } +} + func (p *setnsProcess) start() (retErr error) { defer p.comm.closeParent() @@ -184,6 +224,9 @@ func (p *setnsProcess) start() (retErr error) { } } } + // Reset the CPU affinity after cgroups are configured to make sure it + // matches any configured cpuset. + tryResetCPUAffinity(p.pid()) if p.intelRdtPath != "" { // if Intel RDT "resource control" filesystem path exists _, err := os.Stat(p.intelRdtPath) @@ -578,6 +621,9 @@ func (p *initProcess) start() (retErr error) { return fmt.Errorf("unable to apply cgroup configuration: %w", err) } } + // Reset the CPU affinity after cgroups are configured to make sure it + // matches any configured cpuset. + tryResetCPUAffinity(p.pid()) if p.intelRdtManager != nil { if err := p.intelRdtManager.Apply(p.pid()); err != nil { return fmt.Errorf("unable to apply Intel RDT configuration: %w", err) @@ -871,6 +917,7 @@ func sendContainerProcessState(listenerPath string, state *specs.ContainerProces if err != nil { return fmt.Errorf("failed to connect with seccomp agent specified in the seccomp profile: %w", err) } + defer conn.Close() socket, err := conn.(*net.UnixConn).File() if err != nil { diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index f7cd95dd189..255c2f8ab07 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -5,14 +5,15 @@ import ( "errors" "fmt" "os" - "path" "path/filepath" + "runtime" "strconv" "strings" "syscall" "time" securejoin "github.com/cyphar/filepath-securejoin" + "github.com/cyphar/filepath-securejoin/pathrs-lite/procfs" "github.com/moby/sys/mountinfo" "github.com/moby/sys/userns" "github.com/mrunalp/fileutils" @@ -21,6 +22,8 @@ import ( "github.com/sirupsen/logrus" "golang.org/x/sys/unix" + "github.com/opencontainers/runc/internal/pathrs" + "github.com/opencontainers/runc/internal/sys" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fs2" "github.com/opencontainers/runc/libcontainer/configs" @@ -43,6 +46,7 @@ type mountConfig struct { type mountEntry struct { *configs.Mount srcFile *mountSource + dstFile *os.File } // srcName is only meant for error messages, it returns a "friendly" name. @@ -213,6 +217,18 @@ func prepareRootfs(pipe *syncSocket, iConfig *initConfig) (err error) { return fmt.Errorf("error jailing process inside rootfs: %w", err) } + // Apply root mount propagation flags. + // This must be done after pivot_root/chroot because the mount propagation flag is applied + // to the current root ("/"), and not to the old rootfs before it becomes "/". Applying the + // flag in prepareRoot would affect the host mount namespace if the container's + // root mount is shared. + // MS_PRIVATE is skipped as rootfsParentMountPrivate() is already called. + if config.RootPropagation != 0 && config.RootPropagation&unix.MS_PRIVATE == 0 { + if err := mount("", "/", "", uintptr(config.RootPropagation), ""); err != nil { + return fmt.Errorf("unable to apply root propagation flags: %w", err) + } + } + if setupDev { if err := reOpenDevNull(); err != nil { return fmt.Errorf("error reopening /dev/null inside container: %w", err) @@ -281,8 +297,8 @@ func cleanupTmp(tmpdir string) { _ = os.RemoveAll(tmpdir) } -func mountCgroupV1(m *configs.Mount, c *mountConfig) error { - binds, err := getCgroupMounts(m) +func mountCgroupV1(m mountEntry, c *mountConfig) error { + binds, err := getCgroupMounts(m.Mount) if err != nil { return err } @@ -313,7 +329,7 @@ func mountCgroupV1(m *configs.Mount, c *mountConfig) error { // inside the tmpfs, so we don't want to resolve symlinks). subsystemPath := filepath.Join(c.root, b.Destination) subsystemName := filepath.Base(b.Destination) - if err := utils.MkdirAllInRoot(c.root, subsystemPath, 0o755); err != nil { + if err := pathrs.MkdirAllInRoot(c.root, subsystemPath, 0o755); err != nil { return err } if err := utils.WithProcfd(c.root, b.Destination, func(dstFd string) error { @@ -352,8 +368,8 @@ func mountCgroupV1(m *configs.Mount, c *mountConfig) error { return nil } -func mountCgroupV2(m *configs.Mount, c *mountConfig) error { - err := utils.WithProcfd(c.root, m.Destination, func(dstFd string) error { +func mountCgroupV2(m mountEntry, c *mountConfig) error { + err := utils.WithProcfdFile(m.dstFile, func(dstFd string) error { return mountViaFds(m.Source, nil, m.Destination, dstFd, "cgroup2", uintptr(m.Flags), m.Data) }) if err == nil || !(errors.Is(err, unix.EPERM) || errors.Is(err, unix.EBUSY)) { @@ -382,14 +398,14 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) error { // // Mask `/sys/fs/cgroup` to ensure it is read-only, even when `/sys` is mounted // with `rbind,ro` (`runc spec --rootless` produces `rbind,ro` for `/sys`). - err = utils.WithProcfd(c.root, m.Destination, func(procfd string) error { - return maskPath(procfd, c.label) + err = utils.WithProcfdFile(m.dstFile, func(procfd string) error { + return maskPaths([]string{procfd}, c.label) }) } return err } -func doTmpfsCopyUp(m mountEntry, rootfs, mountLabel string) (Err error) { +func doTmpfsCopyUp(m mountEntry, mountLabel string) (Err error) { // Set up a scratch dir for the tmpfs on the host. tmpdir, err := prepareTmp("/tmp") if err != nil { @@ -402,13 +418,19 @@ func doTmpfsCopyUp(m mountEntry, rootfs, mountLabel string) (Err error) { } defer os.RemoveAll(tmpDir) - // Configure the *host* tmpdir as if it's the container mount. We change - // m.Destination since we are going to mount *on the host*. - oldDest := m.Destination - m.Destination = tmpDir - err = mountPropagate(m, "/", mountLabel) - m.Destination = oldDest + tmpDirFile, err := os.OpenFile(tmpDir, unix.O_DIRECTORY|unix.O_CLOEXEC, 0) if err != nil { + return fmt.Errorf("tmpcopyup: %w", err) + } + defer tmpDirFile.Close() + + // Configure the *host* tmpdir as if it's the container mount. We change + // m.dstFile since we are going to mount *on the host*. + hostMount := mountEntry{ + Mount: m.Mount, + dstFile: tmpDirFile, + } + if err := hostMount.mountPropagate("/", mountLabel); err != nil { return err } defer func() { @@ -419,7 +441,7 @@ func doTmpfsCopyUp(m mountEntry, rootfs, mountLabel string) (Err error) { } }() - return utils.WithProcfd(rootfs, m.Destination, func(dstFd string) (Err error) { + return utils.WithProcfdFile(m.dstFile, func(dstFd string) (Err error) { // Copy the container data to the host tmpdir. We append "/" to force // CopyDirectory to resolve the symlink rather than trying to copy the // symlink itself. @@ -481,72 +503,87 @@ func statfsToMountFlags(st unix.Statfs_t) int { var errRootfsToFile = errors.New("config tries to change rootfs to file") -func createMountpoint(rootfs string, m mountEntry) (string, error) { - dest, err := securejoin.SecureJoin(rootfs, m.Destination) +func (m *mountEntry) createOpenMountpoint(rootfs string) (Err error) { + unsafePath := utils.StripRoot(rootfs, m.Destination) + dstFile, err := pathrs.OpenInRoot(rootfs, unsafePath, unix.O_PATH) + defer func() { + if dstFile != nil && Err != nil { + _ = dstFile.Close() + } + }() if err != nil { - return "", err - } - if err := checkProcMount(rootfs, dest, m); err != nil { - return "", fmt.Errorf("check proc-safety of %s mount: %w", m.Destination, err) - } + if !errors.Is(err, unix.ENOENT) { + return fmt.Errorf("lookup mountpoint target: %w", err) + } - switch m.Device { - case "bind": - fi, _, err := m.srcStat() - if err != nil { - // Error out if the source of a bind mount does not exist as we - // will be unable to bind anything to it. - return "", err - } - // If the original source is not a directory, make the target a file. - if !fi.IsDir() { - // Make sure we aren't tricked into trying to make the root a file. - if rootfs == dest { - return "", fmt.Errorf("%w: file bind mount over rootfs", errRootfsToFile) - } - // Make the parent directory. - destDir, destBase := filepath.Split(dest) - destDirFd, err := utils.MkdirAllInRootOpen(rootfs, destDir, 0o755) + // If the mountpoint doesn't already exist, we want to create a mountpoint + // that makes sense for the source. For file bind-mounts this is an empty + // file, for everything else it's a directory. + dstIsFile := false + if m.Device == "bind" { + fi, _, err := m.srcStat() if err != nil { - return "", fmt.Errorf("make parent dir of file bind-mount: %w", err) - } - defer destDirFd.Close() - // Make the target file. We want to avoid opening any file that is - // already there because it could be a "bad" file like an invalid - // device or hung tty that might cause a DoS, so we use mknodat. - // destBase does not contain any "/" components, and mknodat does - // not follow trailing symlinks, so we can safely just call mknodat - // here. - if err := unix.Mknodat(int(destDirFd.Fd()), destBase, unix.S_IFREG|0o644, 0); err != nil { - // If we get EEXIST, there was already an inode there and - // we can consider that a success. - if !errors.Is(err, unix.EEXIST) { - err = &os.PathError{Op: "mknod regular file", Path: dest, Err: err} - return "", fmt.Errorf("create target of file bind-mount: %w", err) - } + // Error out if the source of a bind mount does not exist as we + // will be unable to bind anything to it. + return err } - // Nothing left to do. - return dest, nil + dstIsFile = !fi.IsDir() } - case "tmpfs": - // If the original target exists, copy the mode for the tmpfs mount. - if stat, err := os.Stat(dest); err == nil { - dt := fmt.Sprintf("mode=%04o", syscallMode(stat.Mode())) - if m.Data != "" { - dt = dt + "," + m.Data - } - m.Data = dt + // In previous runc versions, we would tolerate nonsense paths with + // dangling symlinks as path components. pathrs-lite does not support + // this, so instead we have to emulate this behaviour by doing + // SecureJoin *purely to get a semi-reasonable path to use* and then we + // use pathrs-lite to operate on the path safely. + newUnsafePath, err := securejoin.SecureJoin(rootfs, unsafePath) + if err != nil { + return err + } + unsafePath = utils.StripRoot(rootfs, newUnsafePath) - // Nothing left to do. - return dest, nil + if dstIsFile { + dstFile, err = pathrs.CreateInRoot(rootfs, unsafePath, unix.O_CREAT|unix.O_EXCL|unix.O_NOFOLLOW, 0o644) + } else { + dstFile, err = pathrs.MkdirAllInRootOpen(rootfs, unsafePath, 0o755) + } + if err != nil { + return fmt.Errorf("make mountpoint %q: %w", m.Destination, err) } } - if err := utils.MkdirAllInRoot(rootfs, dest, 0o755); err != nil { - return "", err + if m.Device == "tmpfs" { + // If the original target exists, copy the mode for the tmpfs mount. + stat, err := dstFile.Stat() + if err != nil { + return fmt.Errorf("check tmpfs source mode: %w", err) + } + dt := fmt.Sprintf("mode=%04o", syscallMode(stat.Mode())) + if m.Data != "" { + dt = dt + "," + m.Data + } + m.Data = dt } - return dest, nil + + dstFullPath, err := procfs.ProcSelfFdReadlink(dstFile) + if err != nil { + return fmt.Errorf("get mount destination real path: %w", err) + } + if !pathrs.IsLexicallyInRoot(rootfs, dstFullPath) { + return fmt.Errorf("mountpoint %q is outside of rootfs %q", dstFullPath, rootfs) + } + if relPath, err := filepath.Rel(rootfs, dstFullPath); err != nil { + return fmt.Errorf("get relative path of %q: %w", dstFullPath, err) + } else if relPath == "." { + return fmt.Errorf("mountpoint %q is on the top of rootfs %q", dstFullPath, rootfs) + } + // TODO: Make checkProcMount use dstFile directly to avoid the need to + // operate on paths here. + if err := checkProcMount(rootfs, dstFullPath, *m); err != nil { + return fmt.Errorf("check proc-safety of %s mount: %w", m.Destination, err) + } + // Update mountEntry. + m.dstFile = dstFile + return nil } func mountToRootfs(c *mountConfig, m mountEntry) error { @@ -562,7 +599,7 @@ func mountToRootfs(c *mountConfig, m mountEntry) error { // TODO: This won't be necessary once we switch to libpathrs and we can // stop all of these symlink-exchange attacks. dest := filepath.Clean(m.Destination) - if !utils.IsLexicallyInRoot(rootfs, dest) { + if !pathrs.IsLexicallyInRoot(rootfs, dest) { // Do not use securejoin as it resolves symlinks. dest = filepath.Join(rootfs, dest) } @@ -576,36 +613,47 @@ func mountToRootfs(c *mountConfig, m mountEntry) error { } else if !fi.IsDir() { return fmt.Errorf("filesystem %q must be mounted on ordinary directory", m.Device) } - if err := utils.MkdirAllInRoot(rootfs, dest, 0o755); err != nil { + dstFile, err := pathrs.MkdirAllInRootOpen(rootfs, dest, 0o755) + if err != nil { return err } - // Selinux kernels do not support labeling of /proc or /sys. - return mountPropagate(m, rootfs, "") + defer dstFile.Close() + // "proc" and "sys" mounts need special handling (without resolving the + // destination) to avoid attacks. + m.dstFile = dstFile + return m.mountPropagate(rootfs, "") } - dest, err := createMountpoint(rootfs, m) - if err != nil { + mountLabel := c.label + if err := m.createOpenMountpoint(rootfs); err != nil { return fmt.Errorf("create mountpoint for %s mount: %w", m.Destination, err) } - mountLabel := c.label + defer func() { + if m.dstFile != nil { + _ = m.dstFile.Close() + m.dstFile = nil + } + }() switch m.Device { case "mqueue": - if err := mountPropagate(m, rootfs, ""); err != nil { + if err := m.mountPropagate(rootfs, ""); err != nil { return err } - return label.SetFileLabel(dest, mountLabel) + return utils.WithProcfdFile(m.dstFile, func(dstFd string) error { + return label.SetFileLabel(dstFd, mountLabel) + }) case "tmpfs": + var err error if m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP { - err = doTmpfsCopyUp(m, rootfs, mountLabel) + err = doTmpfsCopyUp(m, mountLabel) } else { - err = mountPropagate(m, rootfs, mountLabel) + err = m.mountPropagate(rootfs, mountLabel) } - return err case "bind": // open_tree()-related shenanigans are all handled in mountViaFds. - if err := mountPropagate(m, rootfs, mountLabel); err != nil { + if err := m.mountPropagate(rootfs, mountLabel); err != nil { return err } @@ -619,7 +667,7 @@ func mountToRootfs(c *mountConfig, m mountEntry) error { // contrast to mount(8)'s current behaviour, but is what users probably // expect. See . if m.Flags & ^(unix.MS_BIND|unix.MS_REC|unix.MS_REMOUNT) != 0 || m.ClearedFlags != 0 { - if err := utils.WithProcfd(rootfs, m.Destination, func(dstFd string) error { + if err := utils.WithProcfdFile(m.dstFile, func(dstFd string) error { flags := m.Flags | unix.MS_BIND | unix.MS_REMOUNT // The runtime-spec says we SHOULD map to the relevant mount(8) // behaviour. However, it's not clear whether we want the @@ -664,11 +712,17 @@ func mountToRootfs(c *mountConfig, m mountEntry) error { return err } srcFlags := statfsToMountFlags(*st) + + logrus.Debugf( + "working around failure to set vfs flags on bind-mount %s: srcFlags=%s flagsSet=%s flagsClr=%s: %v", + m.Destination, stringifyMountFlags(srcFlags), + stringifyMountFlags(m.Flags), stringifyMountFlags(m.ClearedFlags), mountErr) + // If the user explicitly request one of the locked flags *not* // be set, we need to return an error to avoid producing mounts // that don't match the user's request. - if srcFlags&m.ClearedFlags&mntLockFlags != 0 { - return mountErr + if cannotClearFlags := srcFlags & m.ClearedFlags & mntLockFlags; cannotClearFlags != 0 { + return fmt.Errorf("cannot clear locked flags %s: %w", stringifyMountFlags(cannotClearFlags), mountErr) } // If an MS_*ATIME flag was requested, it must match the @@ -689,17 +743,19 @@ func mountToRootfs(c *mountConfig, m mountEntry) error { // MS_STRICTATIME mounts even if the user requested MS_RELATIME // or MS_NOATIME. if m.Flags&mntAtimeFlags != 0 && m.Flags&mntAtimeFlags != srcFlags&mntAtimeFlags { - return mountErr + return fmt.Errorf("cannot change locked atime flags %s: %w", stringifyMountFlags(srcFlags&mntAtimeFlags), mountErr) } // Retry the mount with the existing lockable mount flags // applied. flags |= srcFlags & mntLockFlags mountErr = mountViaFds("", nil, m.Destination, dstFd, "", uintptr(flags), "") - logrus.Debugf("remount retry: srcFlags=0x%x flagsSet=0x%x flagsClr=0x%x: %v", srcFlags, m.Flags, m.ClearedFlags, mountErr) + if mountErr != nil { + mountErr = fmt.Errorf("remount with locked flags %s re-applied: %w", stringifyMountFlags(srcFlags&mntLockFlags), mountErr) + } return mountErr }); err != nil { - return err + return fmt.Errorf("failed to set user-requested vfs flags on bind-mount: %w", err) } } @@ -712,14 +768,14 @@ func mountToRootfs(c *mountConfig, m mountEntry) error { return err } } - return setRecAttr(m.Mount, rootfs) + return setRecAttr(m) case "cgroup": if cgroups.IsCgroup2UnifiedMode() { - return mountCgroupV2(m.Mount, c) + return mountCgroupV2(m, c) } - return mountCgroupV1(m.Mount, c) + return mountCgroupV1(m, c) default: - return mountPropagate(m, rootfs, mountLabel) + return m.mountPropagate(rootfs, mountLabel) } } @@ -818,7 +874,6 @@ func checkProcMount(rootfs, dest string, m mountEntry) error { "/proc/uptime", "/proc/loadavg", "/proc/slabinfo", - "/proc/net/dev", "/proc/sys/kernel/ns_last_pid", "/proc/sys/crypto/fips_enabled", } @@ -867,20 +922,20 @@ func setupDevSymlinks(rootfs string) error { // needs to be called after we chroot/pivot into the container's rootfs so that any // symlinks are resolved locally. func reOpenDevNull() error { - var stat, devNullStat unix.Stat_t file, err := os.OpenFile("/dev/null", os.O_RDWR, 0) if err != nil { return err } - defer file.Close() //nolint: errcheck - if err := unix.Fstat(int(file.Fd()), &devNullStat); err != nil { - return &os.PathError{Op: "fstat", Path: file.Name(), Err: err} + defer file.Close() + if err := verifyDevNull(file); err != nil { + return fmt.Errorf("can't reopen /dev/null: %w", err) } for fd := 0; fd < 3; fd++ { + var stat unix.Stat_t if err := unix.Fstat(fd, &stat); err != nil { return &os.PathError{Op: "fstat", Path: "fd " + strconv.Itoa(fd), Err: err} } - if stat.Rdev == devNullStat.Rdev { + if isDevNull(&stat) { // Close and re-open the fd. if err := unix.Dup3(int(file.Fd()), fd, 0); err != nil { return &os.PathError{ @@ -913,16 +968,15 @@ func createDevices(config *configs.Config) error { return nil } -func bindMountDeviceNode(rootfs, dest string, node *devices.Device) error { - f, err := os.Create(dest) - if err != nil && !os.IsExist(err) { - return err - } - if f != nil { - _ = f.Close() +func bindMountDeviceNode(destDir *os.File, destName string, node *devices.Device) error { + dstFile, err := utils.Openat(destDir, destName, unix.O_CREAT|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0o000) + if err != nil { + return fmt.Errorf("create device inode %s: %w", node.Path, err) } - return utils.WithProcfd(rootfs, dest, func(dstFd string) error { - return mountViaFds(node.Path, nil, dest, dstFd, "bind", unix.MS_BIND, "") + defer dstFile.Close() + + return utils.WithProcfdFile(dstFile, func(dstFd string) error { + return mountViaFds(node.Path, nil, dstFile.Name(), dstFd, "bind", unix.MS_BIND, "") }) } @@ -932,31 +986,33 @@ func createDeviceNode(rootfs string, node *devices.Device, bind bool) error { // The node only exists for cgroup reasons, ignore it here. return nil } - dest, err := securejoin.SecureJoin(rootfs, node.Path) + destPath, err := securejoin.SecureJoin(rootfs, node.Path) if err != nil { return err } - if dest == rootfs { + if destPath == rootfs { return fmt.Errorf("%w: mknod over rootfs", errRootfsToFile) } - if err := utils.MkdirAllInRoot(rootfs, filepath.Dir(dest), 0o755); err != nil { - return err + destDirPath, destName := filepath.Split(destPath) + destDir, err := pathrs.MkdirAllInRootOpen(rootfs, destDirPath, 0o755) + if err != nil { + return fmt.Errorf("mkdir parent of device inode %q: %w", node.Path, err) } if bind { - return bindMountDeviceNode(rootfs, dest, node) + return bindMountDeviceNode(destDir, destName, node) } - if err := mknodDevice(dest, node); err != nil { + if err := mknodDevice(destDir, destName, node); err != nil { if errors.Is(err, os.ErrExist) { return nil } else if errors.Is(err, os.ErrPermission) { - return bindMountDeviceNode(rootfs, dest, node) + return bindMountDeviceNode(destDir, destName, node) } return err } return nil } -func mknodDevice(dest string, node *devices.Device) error { +func mknodDevice(destDir *os.File, destName string, node *devices.Device) error { fileMode := node.FileMode switch node.Type { case devices.BlockDevice: @@ -972,14 +1028,44 @@ func mknodDevice(dest string, node *devices.Device) error { if err != nil { return err } - if err := unix.Mknod(dest, uint32(fileMode), int(dev)); err != nil { - return &os.PathError{Op: "mknod", Path: dest, Err: err} + if err := unix.Mknodat(int(destDir.Fd()), destName, uint32(fileMode), int(dev)); err != nil { + return &os.PathError{Op: "mknodat", Path: filepath.Join(destDir.Name(), destName), Err: err} } - // Ensure permission bits (can be different because of umask). - if err := os.Chmod(dest, fileMode); err != nil { + + // Get a handle and verify that it matches the expected inode type and + // major:minor before we operate on it. + devFile, err := utils.Openat(destDir, destName, unix.O_NOFOLLOW|unix.O_PATH, 0) + if err != nil { + return fmt.Errorf("open new %c device inode %s: %w", node.Type, node.Path, err) + } + defer devFile.Close() + + if err := sys.VerifyInode(devFile, func(stat *unix.Stat_t, _ *unix.Statfs_t) error { + if stat.Mode&unix.S_IFMT != uint32(fileMode)&unix.S_IFMT { + return fmt.Errorf("new %c device inode %s has incorrect ftype: %#x doesn't match expected %#v", + node.Type, node.Path, + stat.Mode&unix.S_IFMT, fileMode&unix.S_IFMT) + } + if stat.Rdev != dev { + return fmt.Errorf("new %c device inode %s has incorrect major:minor: %d:%d doesn't match expected %d:%d", + node.Type, node.Path, + unix.Major(stat.Rdev), unix.Minor(stat.Rdev), + unix.Major(dev), unix.Minor(dev)) + } + return nil + }); err != nil { return err } - return os.Chown(dest, int(node.Uid), int(node.Gid)) + + // Ensure permission bits (can be different because of umask). + if err := sys.FchmodFile(devFile, uint32(fileMode)); err != nil { + return fmt.Errorf("update new %c device inode %s file mode: %w", node.Type, node.Path, err) + } + if err := sys.FchownFile(devFile, int(node.Uid), int(node.Gid)); err != nil { + return fmt.Errorf("update new %c device inode %s owner: %w", node.Type, node.Path, err) + } + runtime.KeepAlive(devFile) + return nil } // rootfsParentMountPrivate ensures rootfs parent mount is private. @@ -1233,31 +1319,111 @@ func remountReadonly(m *configs.Mount) error { return fmt.Errorf("unable to mount %s as readonly max retries reached", dest) } -// maskPath masks the top of the specified path inside a container to avoid +func isDevNull(st *unix.Stat_t) bool { + return st.Mode&unix.S_IFMT == unix.S_IFCHR && st.Rdev == unix.Mkdev(1, 3) +} + +func verifyDevNull(f *os.File) error { + return sys.VerifyInode(f, func(st *unix.Stat_t, _ *unix.Statfs_t) error { + if !isDevNull(st) { + return errors.New("container's /dev/null is invalid") + } + return nil + }) +} + +// maskPaths masks the top of the specified paths inside a container to avoid // security issues from processes reading information from non-namespace aware // mounts ( proc/kcore ). // For files, maskPath bind mounts /dev/null over the top of the specified path. // For directories, maskPath mounts read-only tmpfs over the top of the specified path. -func maskPath(path string, mountLabel string) error { - if err := mount("/dev/null", path, "", unix.MS_BIND, ""); err != nil && !errors.Is(err, os.ErrNotExist) { - if errors.Is(err, unix.ENOTDIR) { - return mount("tmpfs", path, "tmpfs", unix.MS_RDONLY, label.FormatMountLabel("", mountLabel)) +func maskPaths(paths []string, mountLabel string) error { + devNull, err := os.OpenFile("/dev/null", unix.O_PATH, 0) + if err != nil { + return fmt.Errorf("can't mask paths: %w", err) + } + defer devNull.Close() + if err := verifyDevNull(devNull); err != nil { + return fmt.Errorf("can't mask paths: %w", err) + } + devNullSrc := &mountSource{Type: mountSourcePlain, file: devNull} + procSelfFd, closer := utils.ProcThreadSelf("fd/") + defer closer() + + for _, path := range paths { + // Open the target path; skip if it doesn't exist. + dstFh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + continue + } + return fmt.Errorf("can't mask path %q: %w", path, err) + } + st, err := dstFh.Stat() + if err != nil { + dstFh.Close() + return fmt.Errorf("can't mask path %q: %w", path, err) + } + var dstType string + if st.IsDir() { + // Destination is a directory: bind mount a ro tmpfs over it. + dstType = "dir" + err = mount("tmpfs", path, "tmpfs", unix.MS_RDONLY, label.FormatMountLabel("", mountLabel)) + } else { + // Destination is a file: mount it to /dev/null. + dstType = "path" + dstFd := filepath.Join(procSelfFd, strconv.Itoa(int(dstFh.Fd()))) + err = mountViaFds("", devNullSrc, path, dstFd, "", unix.MS_BIND, "") + } + dstFh.Close() + if err != nil { + return fmt.Errorf("can't mask %s %q: %w", dstType, path, err) } - return err } + return nil } -// writeSystemProperty writes the value to a path under /proc/sys as determined from the key. -// For e.g. net.ipv4.ip_forward translated to /proc/sys/net/ipv4/ip_forward. -func writeSystemProperty(key, value string) error { - keyPath := strings.Replace(key, ".", "/", -1) - return os.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0o644) +func reopenAfterMount(rootfs string, f *os.File, flags int) (_ *os.File, Err error) { + fullPath, err := procfs.ProcSelfFdReadlink(f) + if err != nil { + return nil, fmt.Errorf("get full path: %w", err) + } + if !pathrs.IsLexicallyInRoot(rootfs, fullPath) { + return nil, fmt.Errorf("mountpoint %q is outside of rootfs %q", fullPath, rootfs) + } + unsafePath := utils.StripRoot(rootfs, fullPath) + reopened, err := pathrs.OpenInRoot(rootfs, unsafePath, flags) + if err != nil { + return nil, fmt.Errorf("re-open mountpoint %q: %w", unsafePath, err) + } + defer func() { + if Err != nil { + _ = reopened.Close() + } + }() + + // NOTE: The best we can do here is confirm that the new mountpoint handle + // matches the original target handle, but an attacker could've swapped a + // different path to replace it. In the worst case this could result in us + // applying later vfsmount flags onto the wrong mount. + // + // This is far from ideal, but the only way of doing this in a race-free + // way is to switch the new mount API (move_mount(2) does not require this + // re-opening step, and thus no such races are possible). + reopenedFullPath, err := procfs.ProcSelfFdReadlink(reopened) + if err != nil { + return nil, fmt.Errorf("check full path of re-opened mountpoint: %w", err) + } + if reopenedFullPath != fullPath { + return nil, fmt.Errorf("mountpoint %q was moved while re-opening", unsafePath) + } + return reopened, nil } // Do the mount operation followed by additional mounts required to take care // of propagation flags. This will always be scoped inside the container rootfs. -func mountPropagate(m mountEntry, rootfs string, mountLabel string) error { +func (m *mountEntry) mountPropagate(rootfs string, mountLabel string) error { var ( data = label.FormatMountLabel(m.Data, mountLabel) flags = m.Flags @@ -1270,19 +1436,30 @@ func mountPropagate(m mountEntry, rootfs string, mountLabel string) error { flags &= ^unix.MS_RDONLY } - // Because the destination is inside a container path which might be - // mutating underneath us, we verify that we are actually going to mount - // inside the container with WithProcfd() -- mounting through a procfd - // mounts on the target. - if err := utils.WithProcfd(rootfs, m.Destination, func(dstFd string) error { + if err := utils.WithProcfdFile(m.dstFile, func(dstFd string) error { return mountViaFds(m.Source, m.srcFile, m.Destination, dstFd, m.Device, uintptr(flags), data) }); err != nil { return err } + + // We need to re-open the mountpoint after doing the mount, in order for us + // to operate on the new mount we just created. However, we cannot use + // pathrs.Reopen because we need to re-resolve from the parent directory to + // get a new handle to the top mount. + // + // TODO: Use move_mount(2) on newer kernels so that this is no longer + // necessary on modern systems. + newDstFile, err := reopenAfterMount(rootfs, m.dstFile, unix.O_PATH) + if err != nil { + return fmt.Errorf("reopen mountpoint after mount: %w", err) + } + _ = m.dstFile.Close() + m.dstFile = newDstFile + // We have to apply mount propagation flags in a separate WithProcfd() call // because the previous call invalidates the passed procfd -- the mount // target needs to be re-opened. - if err := utils.WithProcfd(rootfs, m.Destination, func(dstFd string) error { + if err := utils.WithProcfdFile(m.dstFile, func(dstFd string) error { for _, pflag := range m.PropagationFlags { if err := mountViaFds("", nil, m.Destination, dstFd, "", uintptr(pflag), ""); err != nil { return err @@ -1295,11 +1472,11 @@ func mountPropagate(m mountEntry, rootfs string, mountLabel string) error { return nil } -func setRecAttr(m *configs.Mount, rootfs string) error { +func setRecAttr(m mountEntry) error { if m.RecAttr == nil { return nil } - return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error { + return utils.WithProcfdFile(m.dstFile, func(procfd string) error { return unix.MountSetattr(-1, procfd, unix.AT_RECURSIVE, m.RecAttr) }) } diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index e7c6faae347..95ada4997dc 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -315,6 +315,23 @@ var AllowedDevices = []*devices.Device{ Allow: true, }, }, + // The following entry for /dev/net/tun device was there from the + // very early days of Docker, but got removed in runc 1.2.0-rc1, + // causing a number of regressions for users (see + // https://github.com/opencontainers/runc/pull/3468). + // + // Some upper-level orcherstration tools makes it either impossible + // or cumbersome to supply additional device rules, so we have to + // keep this for the sake of backward compatibility. + { + Rule: devices.Rule{ + Type: devices.CharDevice, + Major: 10, + Minor: 200, + Permissions: "rwm", + Allow: true, + }, + }, } type CreateOpts struct { diff --git a/libcontainer/standard_init_linux.go b/libcontainer/standard_init_linux.go index 9f7fa45d533..6a46eff7664 100644 --- a/libcontainer/standard_init_linux.go +++ b/libcontainer/standard_init_linux.go @@ -11,6 +11,8 @@ import ( "github.com/sirupsen/logrus" "golang.org/x/sys/unix" + "github.com/opencontainers/runc/internal/pathrs" + "github.com/opencontainers/runc/internal/sys" "github.com/opencontainers/runc/libcontainer/apparmor" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/keys" @@ -130,20 +132,17 @@ func (l *linuxStandardInit) Init() error { return fmt.Errorf("unable to apply apparmor profile: %w", err) } - for key, value := range l.config.Config.Sysctl { - if err := writeSystemProperty(key, value); err != nil { - return err - } + if err := sys.WriteSysctls(l.config.Config.Sysctl); err != nil { + return err } for _, path := range l.config.Config.ReadonlyPaths { if err := readonlyPath(path); err != nil { return fmt.Errorf("can't make %q read-only: %w", path, err) } } - for _, path := range l.config.Config.MaskPaths { - if err := maskPath(path, l.config.Config.MountLabel); err != nil { - return fmt.Errorf("can't mask path %s: %w", path, err) - } + + if err := maskPaths(l.config.Config.MaskPaths, l.config.Config.MountLabel); err != nil { + return err } pdeath, err := system.GetParentDeathSignal() if err != nil { @@ -244,19 +243,17 @@ func (l *linuxStandardInit) Init() error { return fmt.Errorf("close log pipe: %w", err) } - fifoPath, closer := utils.ProcThreadSelfFd(l.fifoFile.Fd()) - defer closer() - // Wait for the FIFO to be opened on the other side before exec-ing the // user process. We open it through /proc/self/fd/$fd, because the fd that // was given to us was an O_PATH fd to the fifo itself. Linux allows us to // re-open an O_PATH fd through /proc. - fd, err := unix.Open(fifoPath, unix.O_WRONLY|unix.O_CLOEXEC, 0) + fifoFile, err := pathrs.Reopen(l.fifoFile, unix.O_WRONLY|unix.O_CLOEXEC) if err != nil { - return &os.PathError{Op: "open exec fifo", Path: fifoPath, Err: err} + return fmt.Errorf("reopen exec fifo: %w", err) } - if _, err := unix.Write(fd, []byte("0")); err != nil { - return &os.PathError{Op: "write exec fifo", Path: fifoPath, Err: err} + defer fifoFile.Close() + if _, err := fifoFile.Write([]byte("0")); err != nil { + return &os.PathError{Op: "write exec fifo", Path: fifoFile.Name(), Err: err} } // Close the O_PATH fifofd fd before exec because the kernel resets @@ -265,6 +262,7 @@ func (l *linuxStandardInit) Init() error { // N.B. the core issue itself (passing dirfds to the host filesystem) has // since been resolved. // https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318 + _ = fifoFile.Close() _ = l.fifoFile.Close() s := l.config.SpecState diff --git a/libcontainer/sync_unix.go b/libcontainer/sync_unix.go index c5d8f55ec95..69c0228dbe1 100644 --- a/libcontainer/sync_unix.go +++ b/libcontainer/sync_unix.go @@ -42,9 +42,20 @@ func (s *syncSocket) WritePacket(b []byte) (int, error) { } func (s *syncSocket) ReadPacket() ([]byte, error) { - size, _, err := unix.Recvfrom(int(s.f.Fd()), nil, unix.MSG_TRUNC|unix.MSG_PEEK) + var ( + size int + err error + ) + + for { + size, _, err = unix.Recvfrom(int(s.f.Fd()), nil, unix.MSG_TRUNC|unix.MSG_PEEK) + if err != unix.EINTR { //nolint:errorlint // unix errors are bare + break + } + } + if err != nil { - return nil, fmt.Errorf("fetch packet length from socket: %w", err) + return nil, fmt.Errorf("fetch packet length from socket: %w", os.NewSyscallError("recvfrom", err)) } // We will only get a zero size if the socket has been closed from the // other end (otherwise recvfrom(2) will block until a packet is ready). In diff --git a/libcontainer/system/linux.go b/libcontainer/system/linux.go index 7bbf92a3d30..5e558c4f993 100644 --- a/libcontainer/system/linux.go +++ b/libcontainer/system/linux.go @@ -6,8 +6,6 @@ import ( "fmt" "io" "os" - "strconv" - "syscall" "unsafe" "github.com/sirupsen/logrus" @@ -43,49 +41,6 @@ func Exec(cmd string, args []string, env []string) error { } } -func execveat(fd uintptr, pathname string, args []string, env []string, flags int) error { - pathnamep, err := syscall.BytePtrFromString(pathname) - if err != nil { - return err - } - - argvp, err := syscall.SlicePtrFromStrings(args) - if err != nil { - return err - } - - envp, err := syscall.SlicePtrFromStrings(env) - if err != nil { - return err - } - - _, _, errno := syscall.Syscall6( - unix.SYS_EXECVEAT, - fd, - uintptr(unsafe.Pointer(pathnamep)), - uintptr(unsafe.Pointer(&argvp[0])), - uintptr(unsafe.Pointer(&envp[0])), - uintptr(flags), - 0, - ) - return errno -} - -func Fexecve(fd uintptr, args []string, env []string) error { - var err error - for { - err = execveat(fd, "", args, env, unix.AT_EMPTY_PATH) - if err != unix.EINTR { // nolint:errorlint // unix errors are bare - break - } - } - if err == unix.ENOSYS { // nolint:errorlint // unix errors are bare - // Fallback to classic /proc/self/fd/... exec. - return Exec("/proc/self/fd/"+strconv.Itoa(int(fd)), args, env) - } - return os.NewSyscallError("execveat", err) -} - func SetParentDeathSignal(sig uintptr) error { if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil { return err @@ -214,3 +169,23 @@ func SetLinuxPersonality(personality int) error { } return nil } + +// GetPtyPeer is a wrapper for ioctl(TIOCGPTPEER). +func GetPtyPeer(ptyFd uintptr, unsafePeerPath string, flags int) (*os.File, error) { + // Make sure O_NOCTTY is always set -- otherwise runc might accidentally + // gain it as a controlling terminal. O_CLOEXEC also needs to be set to + // make sure we don't leak the handle either. + flags |= unix.O_NOCTTY | unix.O_CLOEXEC + + // There is no nice wrapper for this kind of ioctl in unix. + peerFd, _, errno := unix.Syscall( + unix.SYS_IOCTL, + ptyFd, + uintptr(unix.TIOCGPTPEER), + uintptr(flags), + ) + if errno != 0 { + return nil, os.NewSyscallError("ioctl TIOCGPTPEER", errno) + } + return os.NewFile(peerFd, unsafePeerPath), nil +} diff --git a/libcontainer/system/proc.go b/libcontainer/system/proc.go index 774443ec9d2..34850dd8317 100644 --- a/libcontainer/system/proc.go +++ b/libcontainer/system/proc.go @@ -2,10 +2,12 @@ package system import ( "fmt" + "io" "os" - "path/filepath" "strconv" "strings" + + "github.com/opencontainers/runc/internal/pathrs" ) // State is the status of a process. @@ -66,8 +68,16 @@ type Stat_t struct { } // Stat returns a Stat_t instance for the specified process. -func Stat(pid int) (stat Stat_t, err error) { - bytes, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat")) +func Stat(pid int) (Stat_t, error) { + var stat Stat_t + + statFile, err := pathrs.ProcPidOpen(pid, "stat", os.O_RDONLY) + if err != nil { + return stat, err + } + defer statFile.Close() + + bytes, err := io.ReadAll(statFile) if err != nil { return stat, err } diff --git a/libcontainer/utils/cmsg.go b/libcontainer/utils/cmsg.go index 2edd1417af3..3aca5bdaccd 100644 --- a/libcontainer/utils/cmsg.go +++ b/libcontainer/utils/cmsg.go @@ -42,9 +42,20 @@ func RecvFile(socket *os.File) (_ *os.File, Err error) { oob := make([]byte, oobSpace) sockfd := socket.Fd() - n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, unix.MSG_CMSG_CLOEXEC) + var ( + n, oobn int + err error + ) + + for { + n, oobn, _, _, err = unix.Recvmsg(int(sockfd), name, oob, unix.MSG_CMSG_CLOEXEC) + if err != unix.EINTR { //nolint:errorlint // unix errors are bare + break + } + } + if err != nil { - return nil, err + return nil, os.NewSyscallError("recvmsg", err) } if n >= MaxNameLen || oobn != oobSpace { return nil, fmt.Errorf("recvfile: incorrect number of bytes read (n=%d oobn=%d)", n, oobn) @@ -115,5 +126,10 @@ func SendFile(socket *os.File, file *os.File) error { // SendRawFd sends a specific file descriptor over the given AF_UNIX socket. func SendRawFd(socket *os.File, msg string, fd uintptr) error { oob := unix.UnixRights(int(fd)) - return unix.Sendmsg(int(socket.Fd()), []byte(msg), oob, nil, 0) + for { + err := unix.Sendmsg(int(socket.Fd()), []byte(msg), oob, nil, 0) + if err != unix.EINTR { //nolint:errorlint // unix errors are bare + return os.NewSyscallError("sendmsg", err) + } + } } diff --git a/libcontainer/utils/utils.go b/libcontainer/utils/utils.go index db420ea688d..3e008bd49a9 100644 --- a/libcontainer/utils/utils.go +++ b/libcontainer/utils/utils.go @@ -65,11 +65,11 @@ func CleanPath(path string) string { return filepath.Clean(path) } -// stripRoot returns the passed path, stripping the root path if it was +// StripRoot returns the passed path, stripping the root path if it was // (lexicially) inside it. Note that both passed paths will always be treated // as absolute, and the returned path will also always be absolute. In // addition, the paths are cleaned before stripping the root. -func stripRoot(root, path string) string { +func StripRoot(root, path string) string { // Make the paths clean and absolute. root, path = CleanPath("/"+root), CleanPath("/"+path) switch { diff --git a/libcontainer/utils/utils_test.go b/libcontainer/utils/utils_test.go index 06c042f5fe3..4b5fd833cdf 100644 --- a/libcontainer/utils/utils_test.go +++ b/libcontainer/utils/utils_test.go @@ -131,9 +131,9 @@ func TestStripRoot(t *testing.T) { {"/foo/bar", "foo/bar/baz/beef", "/baz/beef"}, {"foo/bar", "foo/bar/baz/beets", "/baz/beets"}, } { - got := stripRoot(test.root, test.path) + got := StripRoot(test.root, test.path) if got != test.out { - t.Errorf("stripRoot(%q, %q) -- got %q, expected %q", test.root, test.path, got, test.out) + t.Errorf("StripRoot(%q, %q) -- got %q, expected %q", test.root, test.path, got, test.out) } } } diff --git a/libcontainer/utils/utils_unix.go b/libcontainer/utils/utils_unix.go index c8ad559d931..638878d77df 100644 --- a/libcontainer/utils/utils_unix.go +++ b/libcontainer/utils/utils_unix.go @@ -9,27 +9,15 @@ import ( "path/filepath" "runtime" "strconv" - "strings" "sync" _ "unsafe" // for go:linkname securejoin "github.com/cyphar/filepath-securejoin" + "github.com/opencontainers/runc/internal/pathrs" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) -// EnsureProcHandle returns whether or not the given file handle is on procfs. -func EnsureProcHandle(fh *os.File) error { - var buf unix.Statfs_t - if err := unix.Fstatfs(int(fh.Fd()), &buf); err != nil { - return fmt.Errorf("ensure %s is on procfs: %w", fh.Name(), err) - } - if buf.Type != unix.PROC_SUPER_MAGIC { - return fmt.Errorf("%s is not on procfs", fh.Name()) - } - return nil -} - var ( haveCloseRangeCloexecBool bool haveCloseRangeCloexecOnce sync.Once @@ -59,19 +47,13 @@ type fdFunc func(fd int) // fdRangeFrom calls the passed fdFunc for each file descriptor that is open in // the current process. func fdRangeFrom(minFd int, fn fdFunc) error { - procSelfFd, closer := ProcThreadSelf("fd") - defer closer() - - fdDir, err := os.Open(procSelfFd) + fdDir, closer, err := pathrs.ProcThreadSelfOpen("fd/", unix.O_DIRECTORY|unix.O_CLOEXEC) if err != nil { - return err + return fmt.Errorf("get handle to /proc/thread-self/fd: %w", err) } + defer closer() defer fdDir.Close() - if err := EnsureProcHandle(fdDir); err != nil { - return err - } - fdList, err := fdDir.Readdirnames(-1) if err != nil { return err @@ -164,8 +146,8 @@ func NewSockPair(name string) (parent, child *os.File, err error) { // the passed closure (the file handle will be freed once the closure returns). func WithProcfd(root, unsafePath string, fn func(procfd string) error) error { // Remove the root then forcefully resolve inside the root. - unsafePath = stripRoot(root, unsafePath) - path, err := securejoin.SecureJoin(root, unsafePath) + unsafePath = StripRoot(root, unsafePath) + fullPath, err := securejoin.SecureJoin(root, unsafePath) if err != nil { return fmt.Errorf("resolving path inside rootfs failed: %w", err) } @@ -174,7 +156,7 @@ func WithProcfd(root, unsafePath string, fn func(procfd string) error) error { defer closer() // Open the target path. - fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0) + fh, err := os.OpenFile(fullPath, unix.O_PATH|unix.O_CLOEXEC, 0) if err != nil { return fmt.Errorf("open o_path procfd: %w", err) } @@ -184,13 +166,24 @@ func WithProcfd(root, unsafePath string, fn func(procfd string) error) error { // Double-check the path is the one we expected. if realpath, err := os.Readlink(procfd); err != nil { return fmt.Errorf("procfd verification failed: %w", err) - } else if realpath != path { + } else if realpath != fullPath { return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath) } return fn(procfd) } +// WithProcfdFile is a very minimal wrapper around [ProcThreadSelfFd], intended +// to make migrating from [WithProcfd] and [WithProcfdPath] usage easier. The +// caller is responsible for making sure that the provided file handle is +// actually safe to operate on. +func WithProcfdFile(file *os.File, fn func(procfd string) error) error { + fdpath, closer := ProcThreadSelfFd(file.Fd()) + defer closer() + + return fn(fdpath) +} + type ProcThreadSelfCloser func() var ( @@ -262,91 +255,6 @@ func ProcThreadSelfFd(fd uintptr) (string, ProcThreadSelfCloser) { return ProcThreadSelf("fd/" + strconv.FormatUint(uint64(fd), 10)) } -// IsLexicallyInRoot is shorthand for strings.HasPrefix(path+"/", root+"/"), -// but properly handling the case where path or root are "/". -// -// NOTE: The return value only make sense if the path doesn't contain "..". -func IsLexicallyInRoot(root, path string) bool { - if root != "/" { - root += "/" - } - if path != "/" { - path += "/" - } - return strings.HasPrefix(path, root) -} - -// MkdirAllInRootOpen attempts to make -// -// path, _ := securejoin.SecureJoin(root, unsafePath) -// os.MkdirAll(path, mode) -// os.Open(path) -// -// safer against attacks where components in the path are changed between -// SecureJoin returning and MkdirAll (or Open) being called. In particular, we -// try to detect any symlink components in the path while we are doing the -// MkdirAll. -// -// NOTE: Unlike os.MkdirAll, mode is not Go's os.FileMode, it is the unix mode -// (the suid/sgid/sticky bits are not the same as for os.FileMode). -// -// NOTE: If unsafePath is a subpath of root, we assume that you have already -// called SecureJoin and so we use the provided path verbatim without resolving -// any symlinks (this is done in a way that avoids symlink-exchange races). -// This means that the path also must not contain ".." elements, otherwise an -// error will occur. -// -// This uses securejoin.MkdirAllHandle under the hood, but it has special -// handling if unsafePath has already been scoped within the rootfs (this is -// needed for a lot of runc callers and fixing this would require reworking a -// lot of path logic). -func MkdirAllInRootOpen(root, unsafePath string, mode uint32) (_ *os.File, Err error) { - // If the path is already "within" the root, get the path relative to the - // root and use that as the unsafe path. This is necessary because a lot of - // MkdirAllInRootOpen callers have already done SecureJoin, and refactoring - // all of them to stop using these SecureJoin'd paths would require a fair - // amount of work. - // TODO(cyphar): Do the refactor to libpathrs once it's ready. - if IsLexicallyInRoot(root, unsafePath) { - subPath, err := filepath.Rel(root, unsafePath) - if err != nil { - return nil, err - } - unsafePath = subPath - } - - // Check for any silly mode bits. - if mode&^0o7777 != 0 { - return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode) - } - // Linux (and thus os.MkdirAll) silently ignores the suid and sgid bits if - // passed. While it would make sense to return an error in that case (since - // the user has asked for a mode that won't be applied), for compatibility - // reasons we have to ignore these bits. - if ignoredBits := mode &^ 0o1777; ignoredBits != 0 { - logrus.Warnf("MkdirAll called with no-op mode bits that are ignored by Linux: 0o%.3o", ignoredBits) - mode &= 0o1777 - } - - rootDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0) - if err != nil { - return nil, fmt.Errorf("open root handle: %w", err) - } - defer rootDir.Close() - - return securejoin.MkdirAllHandle(rootDir, unsafePath, int(mode)) -} - -// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the -// returned handle, for callers that don't need to use it. -func MkdirAllInRoot(root, unsafePath string, mode uint32) error { - f, err := MkdirAllInRootOpen(root, unsafePath, mode) - if err == nil { - _ = f.Close() - } - return err -} - // Openat is a Go-friendly openat(2) wrapper. func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) { dirFd := unix.AT_FDCWD diff --git a/pause.go b/pause.go index a7f0aaccc4e..b5d354db6c9 100644 --- a/pause.go +++ b/pause.go @@ -1,7 +1,6 @@ package main import ( - "github.com/sirupsen/logrus" "github.com/urfave/cli" ) @@ -19,18 +18,16 @@ Use runc list to identify instances of containers and their current status.`, if err := checkArgs(context, 1, exactArgs); err != nil { return err } - rootlessCg, err := shouldUseRootlessCgroupManager(context) + container, err := getContainer(context) if err != nil { return err } - if rootlessCg { - logrus.Warnf("runc pause may fail if you don't have the full access to cgroups") - } - container, err := getContainer(context) + err = container.Pause() if err != nil { + maybeLogCgroupWarning("pause", err) return err } - return container.Pause() + return nil }, } @@ -48,17 +45,15 @@ Use runc list to identify instances of containers and their current status.`, if err := checkArgs(context, 1, exactArgs); err != nil { return err } - rootlessCg, err := shouldUseRootlessCgroupManager(context) + container, err := getContainer(context) if err != nil { return err } - if rootlessCg { - logrus.Warn("runc resume may fail if you don't have the full access to cgroups") - } - container, err := getContainer(context) + err = container.Resume() if err != nil { + maybeLogCgroupWarning("resume", err) return err } - return container.Resume() + return nil }, } diff --git a/ps.go b/ps.go index 4083e559d75..2588496acc8 100644 --- a/ps.go +++ b/ps.go @@ -9,7 +9,6 @@ import ( "strconv" "strings" - "github.com/sirupsen/logrus" "github.com/urfave/cli" ) @@ -28,13 +27,6 @@ var psCommand = cli.Command{ if err := checkArgs(context, 1, minArgs); err != nil { return err } - rootlessCg, err := shouldUseRootlessCgroupManager(context) - if err != nil { - return err - } - if rootlessCg { - logrus.Warn("runc ps may fail if you don't have the full access to cgroups") - } container, err := getContainer(context) if err != nil { @@ -43,6 +35,7 @@ var psCommand = cli.Command{ pids, err := container.Processes() if err != nil { + maybeLogCgroupWarning("ps", err) return err } diff --git a/runc.keyring b/runc.keyring index 3fb6d283636..4219f48265a 100644 --- a/runc.keyring +++ b/runc.keyring @@ -122,10 +122,10 @@ lxxclgJYU604APsFzpoLD0oUlfMn5Fh75ftkKPrwiHpTj4rRU6oIQu1/Bg== =Ab7w -----END PGP PUBLIC KEY BLOCK----- -pub rsa2048 2020-04-28 [SC] [expires: 2025-04-18] +pub rsa2048 2020-04-28 [SC] [expires: 2028-04-18] C2428CD75720FACDCF76B6EA17DE5ECB75A1100E uid [ultimate] Kir Kolyshkin -sub rsa2048 2020-04-28 [E] [expires: 2025-04-18] +sub rsa2048 2020-04-28 [E] [expires: 2028-04-18] -----BEGIN PGP PUBLIC KEY BLOCK----- Comment: github=kolyshkin @@ -137,26 +137,26 @@ ppTSiCl8/x/gKoXiJ+7MyvOZozUavkVHdim1NKCzwD014VOB8RXz+heUjS+HDXY9 SbTL4jCsN/x0bq+ZNp4lunihVY5WqX+BGLcx7xPnJ0Rp9Ju1mAhKrbKUmOG3rkWu DIJuVP8HQfCoffsBLUKQ0V4fh18kfq1bo3JvABEBAAG0I0tpciBLb2x5c2hraW4g PGtvbHlzaGtpbkBnbWFpbC5jb20+iQFUBBMBCAA+AhsDBQsJCAcCBhUKCQgLAgQW -AgMBAh4BAheAFiEEwkKM11cg+s3PdrbqF95ey3WhEA4FAmRAbOgFCQlaGGoACgkQ -F95ey3WhEA6dRQf+P+OHI3QiZu3TnrNBTsf+V8HhFBWKqafrjKbIE1A5HOHzcK2F -t2afYG+MZQILwSuCQOObgr3o7hGlqkwMwGtHt5nqG6/Z0bmkowG4JJmYIg9FhvQW -JEm/7lSBtxvFkw05H90UlzCM7AigD+PrLs96Zb0+FqdzEDWTMJeU7yYUFRNbXEu3 -wqpOZpHlYCJGKzFJBbGxYphlmljexRlWdZPwACKg7lBsVkM8JDPGxmmEe7/5tXPt -Oa1yS13SleLv4muHH3KO3cgJGqBfY/XIExZUQUF0GdL0yppBDbn0oZ/wvRuibCR0 -1P7rW88csSjAjhNjja4v/zWleSIpyWVi8IvYLLkBDQReqLt+AQgAtKUDLyUFxQ9k +AgMBAh4BAheAFiEEwkKM11cg+s3PdrbqF95ey3WhEA4FAmdcs+gFCQ7+0bIACgkQ +F95ey3WhEA6rRwf8CxnbLB/uqPZfmmiTzTk7luWaIo6YxtnNz3bn2rTByEo+rBgO +gbgtKaV4REYeKhtbdstkMTX3zr+zlqwuqaPaag/Cz20HLkD04bI+JCPoRH/dPadd +3nOdbdRfdWZeDDSFKjVunVpXlLxwvZ1WaaYKCfF06U3F7/z7MTAuKHrHTG9SrNPJ +UPJTy63dNnuiPpVNNtOyftLGEGgD1JH2tcosVEwEpAlXpIpJy4Lad9ajaRVoYNtT +qZr26sRFYNOQqWgl25QM8LyLFyYry9HfEXkbilW0OpkAkUvv0yAe97UPZ0beP8D+ +d5rMbZps6Ph1TtosdE/Gx8xWs7ALNDmXyCI/F7kBDQReqLt+AQgAtKUDLyUFxQ9k p8OwI/MsPTLLoYfjilJaXnmtzQjGYFrEuU3lt7omRUBldNChkjGghEukGTq0RD7Z s6Qv5PM5dtOypPJM0lmz2j7seun3AfDV44h/bjOFwTUjab3Nr9fQ52qESmRS03ik 6+5YNwq2D/+2kHVJ2vkUoo6KvioA1vPU311oW/Yfky8dLS5NguikE3to6YElWW38 oqFUVdMScCbf9a6CPXSQEz/rH4TgAhwyTo6oegv+8L/szGFy5ToNGiA0D45HcFDc yXs1d+b3bYRuGfC1l/z+WZWwbeHt1fKEQ8pCLDLRre5y0hPRHeN2CG4U7iyI5B5h 8LITPcZ66wARAQABiQE8BBgBCAAmAhsMFiEEwkKM11cg+s3PdrbqF95ey3WhEA4F -AmRAbRQFCQlaGJYACgkQF95ey3WhEA7vywf9FFTeRgNji8ZIPMM2vIlns+CMkP5R -uXakU6Q0O6Wmbb/ULOkobTqJ/Jcze8OuembuU3V6MiOQKgUIDrN7itjnJPQBneKT -iqJdPK8KOiGIzqa0aRekvOu2nCz9n87Bf48pviH922yfs8gXYRCUnSV/i7/p+N8r -5Fy7dJen5SXksN2/rUCEgU9FD17l2uMAoQbRqZg74/GwSDLnhrZ9eMrbPnguSQF4 -S1NPMeS7+G/gPN9Ze9qFmOF2p57cmEa+8mriZCYY3BcUBOiMOV5HSBKJwqA2M8au -2dAKmFWb/G+K/dgBdkAulQ/BfCpwgFmmgJ5dAeaS3y8Xd86aBE0/eLCrhQ== -=GkpD +AmdctAIFCQ7+0bIACgkQF95ey3WhEA7PDggAlZxK7mCYThh7Z75mWftIaT3ms5jR +cuQcCQYy2Z7qCaNxJtRklhsaAwpO0NQdNdQEfVXlNYLXRuFDq+hemhZKMu4lzQbZ +3atm5swWcB8+9q+aCMP5nppwUXxCxHdhp4VxIYEv+wNjTF/6Fxu66fYPQPDKVacS +H9NLjHsVoDFSi9rvtAy/Bs2aVn0hZkwpxzHJNVPnNcMAEnYXfM+kXu3761J61FAr +o8zT9XXXnUYRuxHRAsrpa3atQj7jDHvFlcc3VfPmUFPs0aLRy19/44xRE1FZOSur +f7jJ1HOKSJA9zx0xWaURRTRkMTIVuMnQKZofxC96GavBDVTtZlgLzeWVnQ== +=eHgH -----END PGP PUBLIC KEY BLOCK----- pub rsa3072 2019-07-25 [SC] [expires: 2025-07-27] diff --git a/script/release_sign.sh b/script/release_sign.sh index 39f806fa642..883d0169a35 100755 --- a/script/release_sign.sh +++ b/script/release_sign.sh @@ -108,7 +108,7 @@ trap 'rm -r "$tmp_gpgdir"' EXIT tmp_runc_gpgflags=("--homedir=$tmp_gpgdir" "--no-default-keyring" "--keyring=$project.keyring") gpg "${tmp_runc_gpgflags[@]}" --import <"$root/$project.keyring" -tmp_seccomp_gpgflags=("--homedir=$tmp_gpgdir" "--no-default-keyring" "--keyring=seccomp.keyring") +tmp_seccomp_gpgflags=("--homedir=$tmp_gpgdir" "--no-default-keyring" "--keyring=seccomp.keyring" "--keyserver=keys.openpgp.org") gpg "${tmp_seccomp_gpgflags[@]}" --recv-keys 0x47A68FCE37C7D7024FD65E11356CE62C2B524099 gpg "${tmp_seccomp_gpgflags[@]}" --recv-keys 0x7100AADFAE6E6E940D2E0AD655E45A5AE8CA7C8A diff --git a/script/setup_host_fedora.sh b/script/setup_host_fedora.sh new file mode 100755 index 00000000000..efe6a001dc1 --- /dev/null +++ b/script/setup_host_fedora.sh @@ -0,0 +1,33 @@ +#!/bin/bash +set -eux -o pipefail +DNF=(dnf -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs --exclude="kernel,kernel-core") +RPMS=(bats git-core glibc-static golang jq libseccomp-devel make) +# Work around dnf mirror failures by retrying a few times. +for i in $(seq 0 2); do + sleep "$i" + "${DNF[@]}" update && "${DNF[@]}" install "${RPMS[@]}" && break +done + +# criu-4.1-1 has a known bug (https://github.com/checkpoint-restore/criu/issues/2650) +# which is fixed in criu-4.1-2 (currently in updates-testing). TODO: remove this later. +if [[ $(rpm -q criu) == "criu-4.1-1.fc"* ]]; then + "${DNF[@]}" --enablerepo=updates-testing update criu +fi + +dnf clean all + +# To avoid "avc: denied { nosuid_transition }" from SELinux as we run tests on /tmp. +mount -o remount,suid /tmp + +# Setup rootless user. +"$(dirname "${BASH_SOURCE[0]}")"/setup_rootless.sh + +# Delegate cgroup v2 controllers to rootless user via --systemd-cgroup +mkdir -p /etc/systemd/system/user@.service.d +cat >/etc/systemd/system/user@.service.d/delegate.conf <= 244 (Fedora >= 32, Ubuntu >= 20.04). +Delegate=yes +EOF +systemctl daemon-reload diff --git a/script/setup_rootless.sh b/script/setup_rootless.sh new file mode 100755 index 00000000000..842d2ecf16b --- /dev/null +++ b/script/setup_rootless.sh @@ -0,0 +1,15 @@ +#!/bin/bash +set -eux -o pipefail + +# Add a user for rootless tests. +sudo useradd -u2000 -m -d/home/rootless -s/bin/bash rootless + +# Allow both the current user and rootless itself to use +# ssh rootless@localhost in tests/rootless.sh. +# shellcheck disable=SC2174 # Silence "-m only applies to the deepest directory". +mkdir -p -m 0700 "$HOME/.ssh" +ssh-keygen -t ecdsa -N "" -f "$HOME/.ssh/rootless.key" +sudo mkdir -p -m 0700 /home/rootless/.ssh +sudo cp "$HOME/.ssh/rootless.key" /home/rootless/.ssh/id_ecdsa +sudo cp "$HOME/.ssh/rootless.key.pub" /home/rootless/.ssh/authorized_keys +sudo chown -R rootless.rootless /home/rootless diff --git a/tests/integration/cgroups.bats b/tests/integration/cgroups.bats index 70e3ee9f5e0..a2b207109d5 100644 --- a/tests/integration/cgroups.bats +++ b/tests/integration/cgroups.bats @@ -50,7 +50,7 @@ function setup() { check_cgroup_value "cgroup.controllers" "$(cat /sys/fs/cgroup/machine.slice/cgroup.controllers)" else # Filter out controllers that systemd is unable to delegate. - check_cgroup_value "cgroup.controllers" "$(sed 's/ \(hugetlb\|misc\|rdma\)//g' /dev/null & echo $!' + [ "$status" -eq 0 ] + execed_pid=$output + done +} diff --git a/tests/integration/cpu_affinity.bats b/tests/integration/cpu_affinity.bats new file mode 100644 index 00000000000..5df65374352 --- /dev/null +++ b/tests/integration/cpu_affinity.bats @@ -0,0 +1,127 @@ +#!/usr/bin/env bats +# Exec CPU affinity tests. For more details, see: +# - https://github.com/opencontainers/runtime-spec/pull/1253 + +load helpers + +INITIAL_CPU_MASK="$(grep -F Cpus_allowed_list: /proc/self/status | awk '{ print $2 }')" + +function setup() { + requires smp cgroups_cpuset + setup_busybox + + echo "Initial CPU mask: $INITIAL_CPU_MASK" >&2 + echo "---" >&2 +} + +function teardown() { + teardown_bundle +} + +function first_cpu() { + sed 's/[-,].*//g' &2 + echo "$cmdname $* (status=$status)" >&2 # shellcheck disable=SC2154 echo "$output" >&2 } +# Wrapper for runc. +function runc() { + CMDNAME="$(basename "$RUNC")" sane_run __runc "$@" +} + +function setup_runc_cmdline() { + RUNC_CMDLINE=("$RUNC") + [[ -v RUNC_USE_SYSTEMD ]] && RUNC_CMDLINE+=("--systemd-cgroup") + [[ -n "${ROOT:-}" ]] && RUNC_CMDLINE+=("--root" "$ROOT/state") + export RUNC_CMDLINE +} + # Raw wrapper for runc. function __runc() { - "$RUNC" ${RUNC_USE_SYSTEMD+--systemd-cgroup} \ - ${ROOT:+--root "$ROOT/state"} "$@" + setup_runc_cmdline + "${RUNC_CMDLINE[@]}" "$@" } # Wrapper for runc spec. diff --git a/tests/integration/hooks.bats b/tests/integration/hooks.bats index 099337a2b2c..fed50782b2e 100644 --- a/tests/integration/hooks.bats +++ b/tests/integration/hooks.bats @@ -21,7 +21,6 @@ function teardown() { @test "runc create [hook fails]" { for hook in prestart createRuntime createContainer; do echo "testing hook $hook" - # shellcheck disable=SC2016 update_config '.hooks |= {"'$hook'": [{"path": "/bin/true"}, {"path": "/bin/false"}]}' runc create --console-socket "$CONSOLE_SOCKET" test_hooks [ "$status" -ne 0 ] @@ -34,7 +33,6 @@ function teardown() { # All hooks except Poststop. for hook in prestart createRuntime createContainer startContainer poststart; do echo "testing hook $hook" - # shellcheck disable=SC2016 update_config '.hooks |= {"'$hook'": [{"path": "/bin/true"}, {"path": "/bin/false"}]}' runc run "test_hook-$hook" [[ "$output" != "Hello World" ]] diff --git a/tests/integration/mounts_propagation.bats b/tests/integration/mounts_propagation.bats new file mode 100644 index 00000000000..909b0d9d514 --- /dev/null +++ b/tests/integration/mounts_propagation.bats @@ -0,0 +1,22 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + requires root + setup_debian +} + +function teardown() { + teardown_bundle +} + +@test "runc run [rootfsPropagation shared]" { + update_config ' .linux.rootfsPropagation = "shared" ' + + update_config ' .process.args = ["findmnt", "--noheadings", "-o", "PROPAGATION", "/"] ' + + runc run test_shared_rootfs + [ "$status" -eq 0 ] + [ "$output" = "shared" ] +} diff --git a/tests/integration/run.bats b/tests/integration/run.bats index c6e30709402..28964a22a8a 100644 --- a/tests/integration/run.bats +++ b/tests/integration/run.bats @@ -84,7 +84,6 @@ function teardown() { chmod 'a=rwx,ug+s,+t' rootfs/tmp # set all bits mode=$(stat -c %A rootfs/tmp) - # shellcheck disable=SC2016 update_config '.process.args = ["sh", "-c", "stat -c %A /tmp"]' update_config '.mounts += [{"destination": "/tmp", "type": "tmpfs", "source": "tmpfs", "options":["noexec","nosuid","nodev","rprivate"]}]' @@ -94,7 +93,6 @@ function teardown() { } @test "runc run with tmpfs perms" { - # shellcheck disable=SC2016 update_config '.process.args = ["sh", "-c", "stat -c %a /tmp/test"]' update_config '.mounts += [{"destination": "/tmp/test", "type": "tmpfs", "source": "tmpfs", "options": ["mode=0444"]}]' @@ -113,14 +111,12 @@ function teardown() { # so it should use the directory's perms. update_config '.mounts[-1].options = []' chmod 0710 rootfs/tmp/test - # shellcheck disable=SC2016 runc run test_tmpfs [ "$status" -eq 0 ] [ "${lines[0]}" = "710" ] # Add back the mode on the mount, and it should use that instead. # Just for fun, use different perms than was used earlier. - # shellcheck disable=SC2016 update_config '.mounts[-1].options = ["mode=0410"]' runc run test_tmpfs [ "$status" -eq 0 ] diff --git a/tests/integration/seccomp-notify.bats b/tests/integration/seccomp-notify.bats index fcbd5664907..b6992e5752a 100644 --- a/tests/integration/seccomp-notify.bats +++ b/tests/integration/seccomp-notify.bats @@ -183,7 +183,7 @@ function scmp_act_notify_template() { @test "runc run [seccomp] (SCMP_ACT_NOTIFY startContainer hook)" { # shellcheck disable=SC2016 # We use single quotes to properly delimit the $1 param to - # update_config(), but this shellshcheck is quite silly and fails if the + # update_config(), but shellcheck is quite silly and fails if the # multi-line string includes some $var (even when it is properly outside of the # single quotes) or when we use this syntax to execute commands in the # string: $(command). diff --git a/tests/integration/timens.bats b/tests/integration/timens.bats index fb33e2b2783..10f3fc057d8 100644 --- a/tests/integration/timens.bats +++ b/tests/integration/timens.bats @@ -54,6 +54,26 @@ function teardown() { grep -E '^boottime\s+1337\s+3141519$' <<<"$output" } +# https://github.com/opencontainers/runc/issues/4635 +@test "runc exec [simple timens]" { + requires timens + + update_config '.process.args = ["sleep", "inf"]' + update_config '.linux.namespaces += [{"type": "time"}] + | .linux.timeOffsets = { + "monotonic": { "secs": 7881, "nanosecs": 2718281 }, + "boottime": { "secs": 1337, "nanosecs": 3141519 } + }' + + runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox + [ "$status" -eq 0 ] + + runc exec test_busybox cat /proc/self/timens_offsets + [ "$status" -eq 0 ] + grep -E '^monotonic\s+7881\s+2718281$' <<<"$output" + grep -E '^boottime\s+1337\s+3141519$' <<<"$output" +} + @test "runc run [simple timens + userns]" { requires root requires timens diff --git a/tests/integration/tty.bats b/tests/integration/tty.bats index c95c19a1eea..fba8d8046b3 100644 --- a/tests/integration/tty.bats +++ b/tests/integration/tty.bats @@ -123,7 +123,6 @@ function teardown() { # replace "uid": 0 with "uid": 1000 # and do a similar thing for gid. - # shellcheck disable=SC2016 update_config ' (.. | select(.uid? == 0)) .uid |= 1000 | (.. | select(.gid? == 0)) .gid |= 100' diff --git a/tests/rootless.sh b/tests/rootless.sh index 7f0408508ce..e54cf48bc2e 100755 --- a/tests/rootless.sh +++ b/tests/rootless.sh @@ -185,7 +185,7 @@ for enabled_features in $features_powerset; do # We use `ssh rootless@localhost` instead of `sudo -u rootless` for creating systemd user session. # Alternatively we could use `machinectl shell`, but it is known not to work well on SELinux-enabled hosts as of April 2020: # https://bugzilla.redhat.com/show_bug.cgi?id=1788616 - ssh -t -t -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -i "$HOME/rootless.key" rootless@localhost -- PATH="$PATH" RUNC_USE_SYSTEMD="$RUNC_USE_SYSTEMD" bats -t "$ROOT/tests/integration$ROOTLESS_TESTPATH" + ssh -t -t -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -i "$HOME/.ssh/rootless.key" rootless@localhost -- PATH="$PATH" RUNC_USE_SYSTEMD="$RUNC_USE_SYSTEMD" bats -t "$ROOT/tests/integration$ROOTLESS_TESTPATH" else sudo -HE -u rootless PATH="$PATH" "$(which bats)" -t "$ROOT/tests/integration$ROOTLESS_TESTPATH" fi diff --git a/update.go b/update.go index 4fef85f71fb..c816280d1da 100644 --- a/update.go +++ b/update.go @@ -367,8 +367,12 @@ other options are ignored. return err } } - config.IntelRdt.L3CacheSchema = l3CacheSchema - config.IntelRdt.MemBwSchema = memBwSchema + if l3CacheSchema != "" { + config.IntelRdt.L3CacheSchema = l3CacheSchema + } + if memBwSchema != "" { + config.IntelRdt.MemBwSchema = memBwSchema + } } // XXX(kolyshkin@): currently "runc update" is unable to change diff --git a/utils_linux.go b/utils_linux.go index feb6ef80c4a..cf6bcfc5a93 100644 --- a/utils_linux.go +++ b/utils_linux.go @@ -3,6 +3,7 @@ package main import ( "errors" "fmt" + "io/fs" "net" "os" "path/filepath" @@ -15,6 +16,7 @@ import ( "github.com/urfave/cli" "golang.org/x/sys/unix" + "github.com/opencontainers/runc/internal/pathrs" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/specconv" @@ -228,10 +230,14 @@ func (r *runner) run(config *specs.Process) (int, error) { process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...) } baseFd := 3 + len(process.ExtraFiles) - procSelfFd, closer := utils.ProcThreadSelf("fd/") + procSelfFd, closer, err := pathrs.ProcThreadSelfOpen("fd/", unix.O_DIRECTORY|unix.O_CLOEXEC) + if err != nil { + return -1, err + } defer closer() + defer procSelfFd.Close() for i := baseFd; i < baseFd+r.preserveFDs; i++ { - _, err = os.Stat(filepath.Join(procSelfFd, strconv.Itoa(i))) + err := unix.Faccessat(int(procSelfFd.Fd()), strconv.Itoa(i), unix.F_OK, 0) if err != nil { return -1, fmt.Errorf("unable to stat preserved-fd %d (of %d): %w", i-baseFd, r.preserveFDs, err) } @@ -444,3 +450,9 @@ func setupPidfdSocket(process *libcontainer.Process, sockpath string) (_clean fu conn.Close() }, nil } + +func maybeLogCgroupWarning(op string, err error) { + if errors.Is(err, fs.ErrPermission) { + logrus.Warn("runc " + op + " failure might be caused by lack of full access to cgroups") + } +} diff --git a/vendor/github.com/containerd/console/console_other.go b/vendor/github.com/containerd/console/console_other.go index 933dfadddae..968c5771c8d 100644 --- a/vendor/github.com/containerd/console/console_other.go +++ b/vendor/github.com/containerd/console/console_other.go @@ -1,5 +1,5 @@ -//go:build !darwin && !freebsd && !linux && !netbsd && !openbsd && !solaris && !windows && !zos -// +build !darwin,!freebsd,!linux,!netbsd,!openbsd,!solaris,!windows,!zos +//go:build !darwin && !freebsd && !linux && !netbsd && !openbsd && !windows && !zos +// +build !darwin,!freebsd,!linux,!netbsd,!openbsd,!windows,!zos /* Copyright The containerd Authors. diff --git a/vendor/github.com/containerd/console/console_unix.go b/vendor/github.com/containerd/console/console_unix.go index 161f5d126cb..aa4c696234a 100644 --- a/vendor/github.com/containerd/console/console_unix.go +++ b/vendor/github.com/containerd/console/console_unix.go @@ -31,6 +31,15 @@ func NewPty() (Console, string, error) { if err != nil { return nil, "", err } + return NewPtyFromFile(f) +} + +// NewPtyFromFile creates a new pty pair, just like [NewPty] except that the +// provided [os.File] is used as the master rather than automatically creating +// a new master from /dev/ptmx. The ownership of [os.File] is passed to the +// returned [Console], so the caller must be careful to not call Close on the +// underlying file. +func NewPtyFromFile(f File) (Console, string, error) { slave, err := ptsname(f) if err != nil { return nil, "", err diff --git a/vendor/github.com/containerd/console/tc_darwin.go b/vendor/github.com/containerd/console/tc_darwin.go index 787154580f6..77c695a40fb 100644 --- a/vendor/github.com/containerd/console/tc_darwin.go +++ b/vendor/github.com/containerd/console/tc_darwin.go @@ -18,7 +18,6 @@ package console import ( "fmt" - "os" "golang.org/x/sys/unix" ) @@ -30,12 +29,12 @@ const ( // unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. // unlockpt should be called before opening the slave side of a pty. -func unlockpt(f *os.File) error { +func unlockpt(f File) error { return unix.IoctlSetPointerInt(int(f.Fd()), unix.TIOCPTYUNLK, 0) } // ptsname retrieves the name of the first available pts for the given master. -func ptsname(f *os.File) (string, error) { +func ptsname(f File) (string, error) { n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCPTYGNAME) if err != nil { return "", err diff --git a/vendor/github.com/containerd/console/tc_freebsd_cgo.go b/vendor/github.com/containerd/console/tc_freebsd_cgo.go index 33282579411..627f7d55a99 100644 --- a/vendor/github.com/containerd/console/tc_freebsd_cgo.go +++ b/vendor/github.com/containerd/console/tc_freebsd_cgo.go @@ -21,7 +21,6 @@ package console import ( "fmt" - "os" "golang.org/x/sys/unix" ) @@ -39,7 +38,7 @@ const ( // unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. // unlockpt should be called before opening the slave side of a pty. -func unlockpt(f *os.File) error { +func unlockpt(f File) error { fd := C.int(f.Fd()) if _, err := C.unlockpt(fd); err != nil { C.close(fd) @@ -49,7 +48,7 @@ func unlockpt(f *os.File) error { } // ptsname retrieves the name of the first available pts for the given master. -func ptsname(f *os.File) (string, error) { +func ptsname(f File) (string, error) { n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN) if err != nil { return "", err diff --git a/vendor/github.com/containerd/console/tc_freebsd_nocgo.go b/vendor/github.com/containerd/console/tc_freebsd_nocgo.go index 18a9b9cbea9..434ba46efc1 100644 --- a/vendor/github.com/containerd/console/tc_freebsd_nocgo.go +++ b/vendor/github.com/containerd/console/tc_freebsd_nocgo.go @@ -21,7 +21,6 @@ package console import ( "fmt" - "os" "golang.org/x/sys/unix" ) @@ -42,12 +41,12 @@ const ( // unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. // unlockpt should be called before opening the slave side of a pty. -func unlockpt(f *os.File) error { +func unlockpt(f File) error { panic("unlockpt() support requires cgo.") } // ptsname retrieves the name of the first available pts for the given master. -func ptsname(f *os.File) (string, error) { +func ptsname(f File) (string, error) { n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN) if err != nil { return "", err diff --git a/vendor/github.com/containerd/console/tc_linux.go b/vendor/github.com/containerd/console/tc_linux.go index 7d552ea4ba1..e98dc022dc6 100644 --- a/vendor/github.com/containerd/console/tc_linux.go +++ b/vendor/github.com/containerd/console/tc_linux.go @@ -18,7 +18,6 @@ package console import ( "fmt" - "os" "unsafe" "golang.org/x/sys/unix" @@ -31,7 +30,7 @@ const ( // unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. // unlockpt should be called before opening the slave side of a pty. -func unlockpt(f *os.File) error { +func unlockpt(f File) error { var u int32 // XXX do not use unix.IoctlSetPointerInt here, see commit dbd69c59b81. if _, _, err := unix.Syscall(unix.SYS_IOCTL, f.Fd(), unix.TIOCSPTLCK, uintptr(unsafe.Pointer(&u))); err != 0 { @@ -41,7 +40,7 @@ func unlockpt(f *os.File) error { } // ptsname retrieves the name of the first available pts for the given master. -func ptsname(f *os.File) (string, error) { +func ptsname(f File) (string, error) { var u uint32 // XXX do not use unix.IoctlGetInt here, see commit dbd69c59b81. if _, _, err := unix.Syscall(unix.SYS_IOCTL, f.Fd(), unix.TIOCGPTN, uintptr(unsafe.Pointer(&u))); err != 0 { diff --git a/vendor/github.com/containerd/console/tc_netbsd.go b/vendor/github.com/containerd/console/tc_netbsd.go index 71227aefdff..73cf4397772 100644 --- a/vendor/github.com/containerd/console/tc_netbsd.go +++ b/vendor/github.com/containerd/console/tc_netbsd.go @@ -18,7 +18,6 @@ package console import ( "bytes" - "os" "golang.org/x/sys/unix" ) @@ -31,12 +30,12 @@ const ( // unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. // unlockpt should be called before opening the slave side of a pty. // This does not exist on NetBSD, it does not allocate controlling terminals on open -func unlockpt(f *os.File) error { +func unlockpt(f File) error { return nil } // ptsname retrieves the name of the first available pts for the given master. -func ptsname(f *os.File) (string, error) { +func ptsname(f File) (string, error) { ptm, err := unix.IoctlGetPtmget(int(f.Fd()), unix.TIOCPTSNAME) if err != nil { return "", err diff --git a/vendor/github.com/containerd/console/tc_openbsd_cgo.go b/vendor/github.com/containerd/console/tc_openbsd_cgo.go index 0e76f6cc3e0..46f4250c4d6 100644 --- a/vendor/github.com/containerd/console/tc_openbsd_cgo.go +++ b/vendor/github.com/containerd/console/tc_openbsd_cgo.go @@ -20,8 +20,6 @@ package console import ( - "os" - "golang.org/x/sys/unix" ) @@ -34,7 +32,7 @@ const ( ) // ptsname retrieves the name of the first available pts for the given master. -func ptsname(f *os.File) (string, error) { +func ptsname(f File) (string, error) { ptspath, err := C.ptsname(C.int(f.Fd())) if err != nil { return "", err @@ -44,7 +42,7 @@ func ptsname(f *os.File) (string, error) { // unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. // unlockpt should be called before opening the slave side of a pty. -func unlockpt(f *os.File) error { +func unlockpt(f File) error { if _, err := C.grantpt(C.int(f.Fd())); err != nil { return err } diff --git a/vendor/github.com/containerd/console/tc_openbsd_nocgo.go b/vendor/github.com/containerd/console/tc_openbsd_nocgo.go index dca92418b0e..a8f9f6c25cb 100644 --- a/vendor/github.com/containerd/console/tc_openbsd_nocgo.go +++ b/vendor/github.com/containerd/console/tc_openbsd_nocgo.go @@ -29,8 +29,6 @@ package console import ( - "os" - "golang.org/x/sys/unix" ) @@ -39,10 +37,10 @@ const ( cmdTcSet = unix.TIOCSETA ) -func ptsname(f *os.File) (string, error) { +func ptsname(f File) (string, error) { panic("ptsname() support requires cgo.") } -func unlockpt(f *os.File) error { +func unlockpt(f File) error { panic("unlockpt() support requires cgo.") } diff --git a/vendor/github.com/containerd/console/tc_zos.go b/vendor/github.com/containerd/console/tc_zos.go index fc90ba5fb86..23b0bd2820b 100644 --- a/vendor/github.com/containerd/console/tc_zos.go +++ b/vendor/github.com/containerd/console/tc_zos.go @@ -17,7 +17,6 @@ package console import ( - "os" "strings" "golang.org/x/sys/unix" @@ -29,11 +28,11 @@ const ( ) // unlockpt is a no-op on zos. -func unlockpt(_ *os.File) error { +func unlockpt(File) error { return nil } // ptsname retrieves the name of the first available pts for the given master. -func ptsname(f *os.File) (string, error) { +func ptsname(f File) (string, error) { return "/dev/ttyp" + strings.TrimPrefix(f.Name(), "/dev/ptyp"), nil } diff --git a/vendor/github.com/cyphar/filepath-securejoin/.golangci.yml b/vendor/github.com/cyphar/filepath-securejoin/.golangci.yml new file mode 100644 index 00000000000..e965034ed36 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/.golangci.yml @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: MPL-2.0 + +# Copyright (C) 2025 Aleksa Sarai +# Copyright (C) 2025 SUSE LLC +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +version: "2" + +linters: + enable: + - asasalint + - asciicheck + - containedctx + - contextcheck + - errcheck + - errorlint + - exhaustive + - forcetypeassert + - godot + - goprintffuncname + - govet + - importas + - ineffassign + - makezero + - misspell + - musttag + - nilerr + - nilnesserr + - nilnil + - noctx + - prealloc + - revive + - staticcheck + - testifylint + - unconvert + - unparam + - unused + - usetesting + settings: + govet: + enable: + - nilness + testifylint: + enable-all: true + +formatters: + enable: + - gofumpt + - goimports + settings: + goimports: + local-prefixes: + - github.com/cyphar/filepath-securejoin diff --git a/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md b/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md index 05657248fa3..3faee0bc55b 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md +++ b/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md @@ -4,9 +4,221 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). -## [Unreleased] ## +## [Unreleased 0.5.z] ## + +## [0.5.1] - 2025-10-31 ## + +> Spooky scary skeletons send shivers down your spine! + +### Changed ### +- `openat2` can return `-EAGAIN` if it detects a possible attack in certain + scenarios (namely if there was a rename or mount while walking a path with a + `..` component). While this is necessary to avoid a denial-of-service in the + kernel, it does require retry loops in userspace. + + In previous versions, `pathrs-lite` would retry `openat2` 32 times before + returning an error, but we've received user reports that this limit can be + hit on systems with very heavy load. In some synthetic benchmarks (testing + the worst-case of an attacker doing renames in a tight loop on every core of + a 16-core machine) we managed to get a ~3% failure rate in runc. We have + improved this situation in two ways: + + * We have now increased this limit to 128, which should be good enough for + most use-cases without becoming a denial-of-service vector (the number of + syscalls called by the `O_PATH` resolver in a typical case is within the + same ballpark). The same benchmarks show a failure rate of ~0.12% which + (while not zero) is probably sufficient for most users. + + * In addition, we now return a `unix.EAGAIN` error that is bubbled up and can + be detected by callers. This means that callers with stricter requirements + to avoid spurious errors can choose to do their own infinite `EAGAIN` retry + loop (though we would strongly recommend users use time-based deadlines in + such retry loops to avoid potentially unbounded denials-of-service). + +## [0.5.0] - 2025-09-26 ## + +> Let the past die. Kill it if you have to. + +> **NOTE**: With this release, some parts of +> `github.com/cyphar/filepath-securejoin` are now licensed under the Mozilla +> Public License (version 2). Please see [COPYING.md][] as well as the the +> license header in each file for more details. + +[COPYING.md]: ./COPYING.md + +### Breaking ### +- The new API introduced in the [0.3.0][] release has been moved to a new + subpackage called `pathrs-lite`. This was primarily done to better indicate + the split between the new and old APIs, as well as indicate to users the + purpose of this subpackage (it is a less complete version of [libpathrs][]). + + We have added some wrappers to the top-level package to ease the transition, + but those are deprecated and will be removed in the next minor release of + filepath-securejoin. Users should update their import paths. + + This new subpackage has also been relicensed under the Mozilla Public License + (version 2), please see [COPYING.md][] for more details. + +### Added ### +- Most of the key bits the safe `procfs` API have now been exported and are + available in `github.com/cyphar/filepath-securejoin/pathrs-lite/procfs`. At + the moment this primarily consists of a new `procfs.Handle` API: + + * `OpenProcRoot` returns a new handle to `/proc`, endeavouring to make it + safe if possible (`subset=pid` to protect against mistaken write attacks + and leaks, as well as using `fsopen(2)` to avoid racing mount attacks). + + `OpenUnsafeProcRoot` returns a handle without attempting to create one + with `subset=pid`, which makes it more dangerous to leak. Most users + should use `OpenProcRoot` (even if you need to use `ProcRoot` as the base + of an operation, as filepath-securejoin will internally open a handle when + necessary). + + * The `(*procfs.Handle).Open*` family of methods lets you get a safe + `O_PATH` handle to subpaths within `/proc` for certain subpaths. + + For `OpenThreadSelf`, the returned `ProcThreadSelfCloser` needs to be + called after you completely finish using the handle (this is necessary + because Go is multi-threaded and `ProcThreadSelf` references + `/proc/thread-self` which may disappear if we do not + `runtime.LockOSThread` -- `ProcThreadSelfCloser` is currently equivalent + to `runtime.UnlockOSThread`). + + Note that you cannot open any `procfs` symlinks (most notably magic-links) + using this API. At the moment, filepath-securejoin does not support this + feature (but [libpathrs][] does). + + * `ProcSelfFdReadlink` lets you get the in-kernel path representation of a + file descriptor (think `readlink("/proc/self/fd/...")`), except that we + verify that there aren't any tricky overmounts that could fool the + process. + + Please be aware that the returned string is simply a snapshot at that + particular moment, and an attacker could move the file being pointed to. + In addition, complex namespace configurations could result in non-sensical + or confusing paths to be returned. The value received from this function + should only be used as secondary verification of some security property, + not as proof that a particular handle has a particular path. + + The procfs handle used internally by the API is the same as the rest of + `filepath-securejoin` (for privileged programs this is usually a private + in-process `procfs` instance created with `fsopen(2)`). + + As before, this is intended as a stop-gap before users migrate to + [libpathrs][], which provides a far more extensive safe `procfs` API and is + generally more robust. + +- Previously, the hardened procfs implementation (used internally within + `Reopen` and `Open(at)InRoot`) only protected against overmount attacks on + systems with `openat2(2)` (Linux 5.6) or systems with `fsopen(2)` or + `open_tree(2)` (Linux 5.2) and programs with privileges to use them (with + some caveats about locked mounts that probably affect very few users). For + other users, an attacker with the ability to create malicious mounts (on most + systems, a sysadmin) could trick you into operating on files you didn't + expect. This attack only really makes sense in the context of container + runtime implementations. + + This was considered a reasonable trade-off, as the long-term intention was to + get all users to just switch to [libpathrs][] if they wanted to use the safe + `procfs` API (which had more extensive protections, and is what these new + protections in `filepath-securejoin` are based on). However, as the API + is now being exported it seems unwise to advertise the API as "safe" if we do + not protect against known attacks. + + The procfs API is now more protected against attackers on systems lacking the + aforementioned protections. However, the most comprehensive of these + protections effectively rely on [`statx(STATX_MNT_ID)`][statx.2] (Linux 5.8). + On older kernel versions, there is no effective protection (there is some + minimal protection against non-`procfs` filesystem components but a + sufficiently clever attacker can work around those). In addition, + `STATX_MNT_ID` is vulnerable to mount ID reuse attacks by sufficiently + motivated and privileged attackers -- this problem is mitigated with + `STATX_MNT_ID_UNIQUE` (Linux 6.8) but that raises the minimum kernel version + for more protection. + + The fact that these protections are quite limited despite needing a fair bit + of extra code to handle was one of the primary reasons we did not initially + implement this in `filepath-securejoin` ([libpathrs][] supports all of this, + of course). + +### Fixed ### +- RHEL 8 kernels have backports of `fsopen(2)` but in some testing we've found + that it has very bad (and very difficult to debug) performance issues, and so + we will explicitly refuse to use `fsopen(2)` if the running kernel version is + pre-5.2 and will instead fallback to `open("/proc")`. + +[CVE-2024-21626]: https://github.com/opencontainers/runc/security/advisories/GHSA-xr7r-f8xq-vfvv +[libpathrs]: https://github.com/cyphar/libpathrs +[statx.2]: https://www.man7.org/linux/man-pages/man2/statx.2.html + +## [0.4.1] - 2025-01-28 ## + +### Fixed ### +- The restrictions added for `root` paths passed to `SecureJoin` in 0.4.0 was + found to be too strict and caused some regressions when folks tried to + update, so this restriction has been relaxed to only return an error if the + path contains a `..` component. We still recommend users use `filepath.Clean` + (and even `filepath.EvalSymlinks`) on the `root` path they are using, but at + least you will no longer be punished for "trivial" unclean paths. + +## [0.4.0] - 2025-01-13 ## + +### Breaking #### +- `SecureJoin(VFS)` will now return an error if the provided `root` is not a + `filepath.Clean`'d path. + + While it is ultimately the responsibility of the caller to ensure the root is + a safe path to use, passing a path like `/symlink/..` as a root would result + in the `SecureJoin`'d path being placed in `/` even though `/symlink/..` + might be a different directory, and so we should more strongly discourage + such usage. + + All major users of `securejoin.SecureJoin` already ensure that the paths they + provide are safe (and this is ultimately a question of user error), but + removing this foot-gun is probably a good idea. Of course, this is + necessarily a breaking API change (though we expect no real users to be + affected by it). + + Thanks to [Erik Sjölund](https://github.com/eriksjolund), who initially + reported this issue as a possible security issue. + +- `MkdirAll` and `MkdirHandle` now take an `os.FileMode`-style mode argument + instead of a raw `unix.S_*`-style mode argument, which may cause compile-time + type errors depending on how you use `filepath-securejoin`. For most users, + there will be no change in behaviour aside from the type change (as the + bottom `0o777` bits are the same in both formats, and most users are probably + only using those bits). + + However, if you were using `unix.S_ISVTX` to set the sticky bit with + `MkdirAll(Handle)` you will need to switch to `os.ModeSticky` otherwise you + will get a runtime error with this update. In addition, the error message you + will get from passing `unix.S_ISUID` and `unix.S_ISGID` will be different as + they are treated as invalid bits now (note that previously passing said bits + was also an error). + +## [0.3.6] - 2024-12-17 ## + +### Compatibility ### +- The minimum Go version requirement for `filepath-securejoin` is now Go 1.18 + (we use generics internally). + + For reference, `filepath-securejoin@v0.3.0` somewhat-arbitrarily bumped the + Go version requirement to 1.21. + + While we did make some use of Go 1.21 stdlib features (and in principle Go + versions <= 1.21 are no longer even supported by upstream anymore), some + downstreams have complained that the version bump has meant that they have to + do workarounds when backporting fixes that use the new `filepath-securejoin` + API onto old branches. This is not an ideal situation, but since using this + library is probably better for most downstreams than a hand-rolled + workaround, we now have compatibility shims that allow us to build on older + Go versions. +- Lower minimum version requirement for `golang.org/x/sys` to `v0.18.0` (we + need the wrappers for `fsconfig(2)`), which should also make backporting + patches to older branches easier. ## [0.3.5] - 2024-12-06 ## + ### Fixed ### - `MkdirAll` will now no longer return an `EEXIST` error if two racing processes are creating the same directory. We will still verify that the path @@ -106,7 +318,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). safe to start migrating to as we have extensive tests ensuring they behave correctly and are safe against various races and other attacks. -[libpathrs]: https://github.com/openSUSE/libpathrs +[libpathrs]: https://github.com/cyphar/libpathrs [open.2]: https://www.man7.org/linux/man-pages/man2/open.2.html ## [0.2.5] - 2024-05-03 ## @@ -171,7 +383,12 @@ This is our first release of `github.com/cyphar/filepath-securejoin`, containing a full implementation with a coverage of 93.5% (the only missing cases are the error cases, which are hard to mocktest at the moment). -[Unreleased]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.5...HEAD +[Unreleased 0.5.z]: https://github.com/cyphar/filepath-securejoin/compare/v0.5.1...release-0.5 +[0.5.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.5.0...v0.5.1 +[0.5.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.4.1...v0.5.0 +[0.4.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.4.0...v0.4.1 +[0.4.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.6...v0.4.0 +[0.3.6]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.5...v0.3.6 [0.3.5]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.4...v0.3.5 [0.3.4]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.3...v0.3.4 [0.3.3]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.2...v0.3.3 diff --git a/vendor/github.com/cyphar/filepath-securejoin/COPYING.md b/vendor/github.com/cyphar/filepath-securejoin/COPYING.md new file mode 100644 index 00000000000..520e822b184 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/COPYING.md @@ -0,0 +1,447 @@ +## COPYING ## + +`SPDX-License-Identifier: BSD-3-Clause AND MPL-2.0` + +This project is made up of code licensed under different licenses. Which code +you use will have an impact on whether only one or both licenses apply to your +usage of this library. + +Note that **each file** in this project individually has a code comment at the +start describing the license of that particular file -- this is the most +accurate license information of this project; in case there is any conflict +between this document and the comment at the start of a file, the comment shall +take precedence. The only purpose of this document is to work around [a known +technical limitation of pkg.go.dev's license checking tool when dealing with +non-trivial project licenses][go75067]. + +[go75067]: https://go.dev/issue/75067 + +### `BSD-3-Clause` ### + +At time of writing, the following files and directories are licensed under the +BSD-3-Clause license: + + * `doc.go` + * `join*.go` + * `vfs.go` + * `internal/consts/*.go` + * `pathrs-lite/internal/gocompat/*.go` + * `pathrs-lite/internal/kernelversion/*.go` + +The text of the BSD-3-Clause license used by this project is the following (the +text is also available from the [`LICENSE.BSD`](./LICENSE.BSD) file): + +``` +Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. +Copyright (C) 2017-2024 SUSE LLC. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +``` + +### `MPL-2.0` ### + +All other files (unless otherwise marked) are licensed under the Mozilla Public +License (version 2.0). + +The text of the Mozilla Public License (version 2.0) is the following (the text +is also available from the [`LICENSE.MPL-2.0`](./LICENSE.MPL-2.0) file): + +``` +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at https://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. +``` diff --git a/vendor/github.com/cyphar/filepath-securejoin/LICENSE b/vendor/github.com/cyphar/filepath-securejoin/LICENSE.BSD similarity index 100% rename from vendor/github.com/cyphar/filepath-securejoin/LICENSE rename to vendor/github.com/cyphar/filepath-securejoin/LICENSE.BSD diff --git a/vendor/github.com/cyphar/filepath-securejoin/LICENSE.MPL-2.0 b/vendor/github.com/cyphar/filepath-securejoin/LICENSE.MPL-2.0 new file mode 100644 index 00000000000..d0a1fa1482e --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/LICENSE.MPL-2.0 @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at https://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/vendor/github.com/cyphar/filepath-securejoin/README.md b/vendor/github.com/cyphar/filepath-securejoin/README.md index eaeb53fcd0a..6673abfc842 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/README.md +++ b/vendor/github.com/cyphar/filepath-securejoin/README.md @@ -67,7 +67,8 @@ func SecureJoin(root, unsafePath string) (string, error) { [libpathrs]: https://github.com/openSUSE/libpathrs [go#20126]: https://github.com/golang/go/issues/20126 -### New API ### +### New API ### +[#new-api]: #new-api While we recommend users switch to [libpathrs][libpathrs] as soon as it has a stable release, some methods implemented by libpathrs have been ported to this @@ -165,5 +166,19 @@ after `MkdirAll`). ### License ### -The license of this project is the same as Go, which is a BSD 3-clause license -available in the `LICENSE` file. +`SPDX-License-Identifier: BSD-3-Clause AND MPL-2.0` + +Some of the code in this project is derived from Go, and is licensed under a +BSD 3-clause license (available in `LICENSE.BSD`). Other files (many of which +are derived from [libpathrs][libpathrs]) are licensed under the Mozilla Public +License version 2.0 (available in `LICENSE.MPL-2.0`). If you are using the +["New API" described above][#new-api], you are probably using code from files +released under this license. + +Every source file in this project has a copyright header describing its +license. Please check the license headers of each file to see what license +applies to it. + +See [COPYING.md](./COPYING.md) for some more details. + +[umoci]: https://github.com/opencontainers/umoci diff --git a/vendor/github.com/cyphar/filepath-securejoin/VERSION b/vendor/github.com/cyphar/filepath-securejoin/VERSION index c2c0004f0e2..4b9fcbec101 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/VERSION +++ b/vendor/github.com/cyphar/filepath-securejoin/VERSION @@ -1 +1 @@ -0.3.5 +0.5.1 diff --git a/vendor/github.com/cyphar/filepath-securejoin/codecov.yml b/vendor/github.com/cyphar/filepath-securejoin/codecov.yml new file mode 100644 index 00000000000..ff284dbfaf9 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/codecov.yml @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: MPL-2.0 + +# Copyright (C) 2025 Aleksa Sarai +# Copyright (C) 2025 SUSE LLC +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +comment: + layout: "condensed_header, reach, diff, components, condensed_files, condensed_footer" + require_changes: true + branches: + - main + +coverage: + range: 60..100 + status: + project: + default: + target: 85% + threshold: 0% + patch: + default: + target: auto + informational: true + +github_checks: + annotations: false diff --git a/vendor/github.com/cyphar/filepath-securejoin/deprecated_linux.go b/vendor/github.com/cyphar/filepath-securejoin/deprecated_linux.go new file mode 100644 index 00000000000..3e427b16409 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/deprecated_linux.go @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package securejoin + +import ( + "github.com/cyphar/filepath-securejoin/pathrs-lite" +) + +var ( + // MkdirAll is a wrapper around [pathrs.MkdirAll]. + // + // Deprecated: You should use [pathrs.MkdirAll] directly instead. This + // wrapper will be removed in filepath-securejoin v0.6. + MkdirAll = pathrs.MkdirAll + + // MkdirAllHandle is a wrapper around [pathrs.MkdirAllHandle]. + // + // Deprecated: You should use [pathrs.MkdirAllHandle] directly instead. + // This wrapper will be removed in filepath-securejoin v0.6. + MkdirAllHandle = pathrs.MkdirAllHandle + + // OpenInRoot is a wrapper around [pathrs.OpenInRoot]. + // + // Deprecated: You should use [pathrs.OpenInRoot] directly instead. This + // wrapper will be removed in filepath-securejoin v0.6. + OpenInRoot = pathrs.OpenInRoot + + // OpenatInRoot is a wrapper around [pathrs.OpenatInRoot]. + // + // Deprecated: You should use [pathrs.OpenatInRoot] directly instead. This + // wrapper will be removed in filepath-securejoin v0.6. + OpenatInRoot = pathrs.OpenatInRoot + + // Reopen is a wrapper around [pathrs.Reopen]. + // + // Deprecated: You should use [pathrs.Reopen] directly instead. This + // wrapper will be removed in filepath-securejoin v0.6. + Reopen = pathrs.Reopen +) diff --git a/vendor/github.com/cyphar/filepath-securejoin/doc.go b/vendor/github.com/cyphar/filepath-securejoin/doc.go index 1ec7d065ef4..1438fc9c09c 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/doc.go +++ b/vendor/github.com/cyphar/filepath-securejoin/doc.go @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: BSD-3-Clause + // Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. // Copyright (C) 2017-2024 SUSE LLC. All rights reserved. // Use of this source code is governed by a BSD-style @@ -14,14 +16,13 @@ // **not** safe against race conditions where an attacker changes the // filesystem after (or during) the [SecureJoin] operation. // -// The new API is made up of [OpenInRoot] and [MkdirAll] (and derived -// functions). These are safe against racing attackers and have several other -// protections that are not provided by the legacy API. There are many more -// operations that most programs expect to be able to do safely, but we do not -// provide explicit support for them because we want to encourage users to -// switch to [libpathrs](https://github.com/openSUSE/libpathrs) which is a -// cross-language next-generation library that is entirely designed around -// operating on paths safely. +// The new API is available in the [pathrs-lite] subpackage, and provide +// protections against racing attackers as well as several other key +// protections against attacks often seen by container runtimes. As the name +// suggests, [pathrs-lite] is a stripped down (pure Go) reimplementation of +// [libpathrs]. The main APIs provided are [OpenInRoot], [MkdirAll], and +// [procfs.Handle] -- other APIs are not planned to be ported. The long-term +// goal is for users to migrate to [libpathrs] which is more fully-featured. // // securejoin has been used by several container runtimes (Docker, runc, // Kubernetes, etc) for quite a few years as a de-facto standard for operating @@ -31,9 +32,16 @@ // API as soon as possible (or even better, switch to libpathrs). // // This project was initially intended to be included in the Go standard -// library, but [it was rejected](https://go.dev/issue/20126). There is now a -// [new Go proposal](https://go.dev/issue/67002) for a safe path resolution API -// that shares some of the goals of filepath-securejoin. However, that design -// is intended to work like `openat2(RESOLVE_BENEATH)` which does not fit the -// usecase of container runtimes and most system tools. +// library, but it was rejected (see https://go.dev/issue/20126). Much later, +// [os.Root] was added to the Go stdlib that shares some of the goals of +// filepath-securejoin. However, its design is intended to work like +// openat2(RESOLVE_BENEATH) which does not fit the usecase of container +// runtimes and most system tools. +// +// [pathrs-lite]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite +// [libpathrs]: https://github.com/openSUSE/libpathrs +// [OpenInRoot]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite#OpenInRoot +// [MkdirAll]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite#MkdirAll +// [procfs.Handle]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs#Handle +// [os.Root]: https:///pkg.go.dev/os#Root package securejoin diff --git a/vendor/github.com/cyphar/filepath-securejoin/internal/consts/consts.go b/vendor/github.com/cyphar/filepath-securejoin/internal/consts/consts.go new file mode 100644 index 00000000000..c69c4da91ee --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/internal/consts/consts.go @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: BSD-3-Clause + +// Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. +// Copyright (C) 2017-2025 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package consts contains the definitions of internal constants used +// throughout filepath-securejoin. +package consts + +// MaxSymlinkLimit is the maximum number of symlinks that can be encountered +// during a single lookup before returning -ELOOP. At time of writing, Linux +// has an internal limit of 40. +const MaxSymlinkLimit = 255 diff --git a/vendor/github.com/cyphar/filepath-securejoin/join.go b/vendor/github.com/cyphar/filepath-securejoin/join.go index e0ee3f2b57a..199c1d83924 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/join.go +++ b/vendor/github.com/cyphar/filepath-securejoin/join.go @@ -1,5 +1,7 @@ +// SPDX-License-Identifier: BSD-3-Clause + // Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. -// Copyright (C) 2017-2024 SUSE LLC. All rights reserved. +// Copyright (C) 2017-2025 SUSE LLC. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -11,9 +13,9 @@ import ( "path/filepath" "strings" "syscall" -) -const maxSymlinkLimit = 255 + "github.com/cyphar/filepath-securejoin/internal/consts" +) // IsNotExist tells you if err is an error that implies that either the path // accessed does not exist (or path components don't exist). This is @@ -24,12 +26,38 @@ func IsNotExist(err error) bool { return errors.Is(err, os.ErrNotExist) || errors.Is(err, syscall.ENOTDIR) || errors.Is(err, syscall.ENOENT) } -// SecureJoinVFS joins the two given path components (similar to [filepath.Join]) except -// that the returned path is guaranteed to be scoped inside the provided root -// path (when evaluated). Any symbolic links in the path are evaluated with the -// given root treated as the root of the filesystem, similar to a chroot. The -// filesystem state is evaluated through the given [VFS] interface (if nil, the -// standard [os].* family of functions are used). +// errUnsafeRoot is returned if the user provides SecureJoinVFS with a path +// that contains ".." components. +var errUnsafeRoot = errors.New("root path provided to SecureJoin contains '..' components") + +// stripVolume just gets rid of the Windows volume included in a path. Based on +// some godbolt tests, the Go compiler is smart enough to make this a no-op on +// Linux. +func stripVolume(path string) string { + return path[len(filepath.VolumeName(path)):] +} + +// hasDotDot checks if the path contains ".." components in a platform-agnostic +// way. +func hasDotDot(path string) bool { + // If we are on Windows, strip any volume letters. It turns out that + // C:..\foo may (or may not) be a valid pathname and we need to handle that + // leading "..". + path = stripVolume(path) + // Look for "/../" in the path, but we need to handle leading and trailing + // ".."s by adding separators. Doing this with filepath.Separator is ugly + // so just convert to Unix-style "/" first. + path = filepath.ToSlash(path) + return strings.Contains("/"+path+"/", "/../") +} + +// SecureJoinVFS joins the two given path components (similar to +// [filepath.Join]) except that the returned path is guaranteed to be scoped +// inside the provided root path (when evaluated). Any symbolic links in the +// path are evaluated with the given root treated as the root of the +// filesystem, similar to a chroot. The filesystem state is evaluated through +// the given [VFS] interface (if nil, the standard [os].* family of functions +// are used). // // Note that the guarantees provided by this function only apply if the path // components in the returned string are not modified (in other words are not @@ -46,7 +74,22 @@ func IsNotExist(err error) bool { // provided via direct input or when evaluating symlinks. Therefore: // // "C:\Temp" + "D:\path\to\file.txt" results in "C:\Temp\path\to\file.txt" -func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) { +// +// If the provided root is not [filepath.Clean] then an error will be returned, +// as such root paths are bordering on somewhat unsafe and using such paths is +// not best practice. We also strongly suggest that any root path is first +// fully resolved using [filepath.EvalSymlinks] or otherwise constructed to +// avoid containing symlink components. Of course, the root also *must not* be +// attacker-controlled. +func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) { //nolint:revive // name is part of public API + // The root path must not contain ".." components, otherwise when we join + // the subpath we will end up with a weird path. We could work around this + // in other ways but users shouldn't be giving us non-lexical root paths in + // the first place. + if hasDotDot(root) { + return "", errUnsafeRoot + } + // Use the os.* VFS implementation if none was specified. if vfs == nil { vfs = osVFS{} @@ -59,9 +102,10 @@ func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) { linksWalked int ) for remainingPath != "" { - if v := filepath.VolumeName(remainingPath); v != "" { - remainingPath = remainingPath[len(v):] - } + // On Windows, if we managed to end up at a path referencing a volume, + // drop the volume to make sure we don't end up with broken paths or + // escaping the root volume. + remainingPath = stripVolume(remainingPath) // Get the next path component. var part string @@ -97,7 +141,7 @@ func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) { // It's a symlink, so get its contents and expand it by prepending it // to the yet-unparsed path. linksWalked++ - if linksWalked > maxSymlinkLimit { + if linksWalked > consts.MaxSymlinkLimit { return "", &os.PathError{Op: "SecureJoin", Path: root + string(filepath.Separator) + unsafePath, Err: syscall.ELOOP} } diff --git a/vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go b/vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go deleted file mode 100644 index ae3b381efe6..00000000000 --- a/vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go +++ /dev/null @@ -1,128 +0,0 @@ -//go:build linux - -// Copyright (C) 2024 SUSE LLC. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package securejoin - -import ( - "errors" - "fmt" - "os" - "path/filepath" - "strings" - "sync" - - "golang.org/x/sys/unix" -) - -var hasOpenat2 = sync.OnceValue(func() bool { - fd, err := unix.Openat2(unix.AT_FDCWD, ".", &unix.OpenHow{ - Flags: unix.O_PATH | unix.O_CLOEXEC, - Resolve: unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_IN_ROOT, - }) - if err != nil { - return false - } - _ = unix.Close(fd) - return true -}) - -func scopedLookupShouldRetry(how *unix.OpenHow, err error) bool { - // RESOLVE_IN_ROOT (and RESOLVE_BENEATH) can return -EAGAIN if we resolve - // ".." while a mount or rename occurs anywhere on the system. This could - // happen spuriously, or as the result of an attacker trying to mess with - // us during lookup. - // - // In addition, scoped lookups have a "safety check" at the end of - // complete_walk which will return -EXDEV if the final path is not in the - // root. - return how.Resolve&(unix.RESOLVE_IN_ROOT|unix.RESOLVE_BENEATH) != 0 && - (errors.Is(err, unix.EAGAIN) || errors.Is(err, unix.EXDEV)) -} - -const scopedLookupMaxRetries = 10 - -func openat2File(dir *os.File, path string, how *unix.OpenHow) (*os.File, error) { - fullPath := dir.Name() + "/" + path - // Make sure we always set O_CLOEXEC. - how.Flags |= unix.O_CLOEXEC - var tries int - for tries < scopedLookupMaxRetries { - fd, err := unix.Openat2(int(dir.Fd()), path, how) - if err != nil { - if scopedLookupShouldRetry(how, err) { - // We retry a couple of times to avoid the spurious errors, and - // if we are being attacked then returning -EAGAIN is the best - // we can do. - tries++ - continue - } - return nil, &os.PathError{Op: "openat2", Path: fullPath, Err: err} - } - // If we are using RESOLVE_IN_ROOT, the name we generated may be wrong. - // NOTE: The procRoot code MUST NOT use RESOLVE_IN_ROOT, otherwise - // you'll get infinite recursion here. - if how.Resolve&unix.RESOLVE_IN_ROOT == unix.RESOLVE_IN_ROOT { - if actualPath, err := rawProcSelfFdReadlink(fd); err == nil { - fullPath = actualPath - } - } - return os.NewFile(uintptr(fd), fullPath), nil - } - return nil, &os.PathError{Op: "openat2", Path: fullPath, Err: errPossibleAttack} -} - -func lookupOpenat2(root *os.File, unsafePath string, partial bool) (*os.File, string, error) { - if !partial { - file, err := openat2File(root, unsafePath, &unix.OpenHow{ - Flags: unix.O_PATH | unix.O_CLOEXEC, - Resolve: unix.RESOLVE_IN_ROOT | unix.RESOLVE_NO_MAGICLINKS, - }) - return file, "", err - } - return partialLookupOpenat2(root, unsafePath) -} - -// partialLookupOpenat2 is an alternative implementation of -// partialLookupInRoot, using openat2(RESOLVE_IN_ROOT) to more safely get a -// handle to the deepest existing child of the requested path within the root. -func partialLookupOpenat2(root *os.File, unsafePath string) (*os.File, string, error) { - // TODO: Implement this as a git-bisect-like binary search. - - unsafePath = filepath.ToSlash(unsafePath) // noop - endIdx := len(unsafePath) - var lastError error - for endIdx > 0 { - subpath := unsafePath[:endIdx] - - handle, err := openat2File(root, subpath, &unix.OpenHow{ - Flags: unix.O_PATH | unix.O_CLOEXEC, - Resolve: unix.RESOLVE_IN_ROOT | unix.RESOLVE_NO_MAGICLINKS, - }) - if err == nil { - // Jump over the slash if we have a non-"" remainingPath. - if endIdx < len(unsafePath) { - endIdx += 1 - } - // We found a subpath! - return handle, unsafePath[endIdx:], lastError - } - if errors.Is(err, unix.ENOENT) || errors.Is(err, unix.ENOTDIR) { - // That path doesn't exist, let's try the next directory up. - endIdx = strings.LastIndexByte(subpath, '/') - lastError = err - continue - } - return nil, "", fmt.Errorf("open subpath: %w", err) - } - // If we couldn't open anything, the whole subpath is missing. Return a - // copy of the root fd so that the caller doesn't close this one by - // accident. - rootClone, err := dupFile(root) - if err != nil { - return nil, "", err - } - return rootClone, unsafePath, lastError -} diff --git a/vendor/github.com/cyphar/filepath-securejoin/openat_linux.go b/vendor/github.com/cyphar/filepath-securejoin/openat_linux.go deleted file mode 100644 index 949fb5f2d82..00000000000 --- a/vendor/github.com/cyphar/filepath-securejoin/openat_linux.go +++ /dev/null @@ -1,59 +0,0 @@ -//go:build linux - -// Copyright (C) 2024 SUSE LLC. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package securejoin - -import ( - "os" - "path/filepath" - - "golang.org/x/sys/unix" -) - -func dupFile(f *os.File) (*os.File, error) { - fd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0) - if err != nil { - return nil, os.NewSyscallError("fcntl(F_DUPFD_CLOEXEC)", err) - } - return os.NewFile(uintptr(fd), f.Name()), nil -} - -func openatFile(dir *os.File, path string, flags int, mode int) (*os.File, error) { - // Make sure we always set O_CLOEXEC. - flags |= unix.O_CLOEXEC - fd, err := unix.Openat(int(dir.Fd()), path, flags, uint32(mode)) - if err != nil { - return nil, &os.PathError{Op: "openat", Path: dir.Name() + "/" + path, Err: err} - } - // All of the paths we use with openatFile(2) are guaranteed to be - // lexically safe, so we can use path.Join here. - fullPath := filepath.Join(dir.Name(), path) - return os.NewFile(uintptr(fd), fullPath), nil -} - -func fstatatFile(dir *os.File, path string, flags int) (unix.Stat_t, error) { - var stat unix.Stat_t - if err := unix.Fstatat(int(dir.Fd()), path, &stat, flags); err != nil { - return stat, &os.PathError{Op: "fstatat", Path: dir.Name() + "/" + path, Err: err} - } - return stat, nil -} - -func readlinkatFile(dir *os.File, path string) (string, error) { - size := 4096 - for { - linkBuf := make([]byte, size) - n, err := unix.Readlinkat(int(dir.Fd()), path, linkBuf) - if err != nil { - return "", &os.PathError{Op: "readlinkat", Path: dir.Name() + "/" + path, Err: err} - } - if n != size { - return string(linkBuf[:n]), nil - } - // Possible truncation, resize the buffer. - size *= 2 - } -} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/README.md b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/README.md new file mode 100644 index 00000000000..1be727e75b3 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/README.md @@ -0,0 +1,33 @@ +## `pathrs-lite` ## + +`github.com/cyphar/filepath-securejoin/pathrs-lite` provides a minimal **pure +Go** implementation of the core bits of [libpathrs][]. This is not intended to +be a complete replacement for libpathrs, instead it is mainly intended to be +useful as a transition tool for existing Go projects. + +The long-term plan for `pathrs-lite` is to provide a build tag that will cause +all `pathrs-lite` operations to call into libpathrs directly, thus removing +code duplication for projects that wish to make use of libpathrs (and providing +the ability for software packagers to opt-in to libpathrs support without +needing to patch upstream). + +[libpathrs]: https://github.com/cyphar/libpathrs + +### License ### + +Most of this subpackage is licensed under the Mozilla Public License (version +2.0). For more information, see the top-level [COPYING.md][] and +[LICENSE.MPL-2.0][] files, as well as the individual license headers for each +file. + +``` +Copyright (C) 2024-2025 Aleksa Sarai +Copyright (C) 2024-2025 SUSE LLC + +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at https://mozilla.org/MPL/2.0/. +``` + +[COPYING.md]: ../COPYING.md +[LICENSE.MPL-2.0]: ../LICENSE.MPL-2.0 diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/doc.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/doc.go new file mode 100644 index 00000000000..d3d74517500 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/doc.go @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Package pathrs (pathrs-lite) is a less complete pure Go implementation of +// some of the APIs provided by [libpathrs]. +package pathrs diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/assert/assert.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/assert/assert.go new file mode 100644 index 00000000000..595dfbf1acf --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/assert/assert.go @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: MPL-2.0 + +// Copyright (C) 2025 Aleksa Sarai +// Copyright (C) 2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Package assert provides some basic assertion helpers for Go. +package assert + +import ( + "fmt" +) + +// Assert panics if the predicate is false with the provided argument. +func Assert(predicate bool, msg any) { + if !predicate { + panic(msg) + } +} + +// Assertf panics if the predicate is false and formats the message using the +// same formatting as [fmt.Printf]. +// +// [fmt.Printf]: https://pkg.go.dev/fmt#Printf +func Assertf(predicate bool, fmtMsg string, args ...any) { + Assert(predicate, fmt.Sprintf(fmtMsg, args...)) +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors_linux.go new file mode 100644 index 00000000000..d0b200f4f9a --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors_linux.go @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Package internal contains unexported common code for filepath-securejoin. +package internal + +import ( + "errors" + + "golang.org/x/sys/unix" +) + +type xdevErrorish struct { + description string +} + +func (err xdevErrorish) Error() string { return err.description } +func (err xdevErrorish) Is(target error) bool { return target == unix.EXDEV } + +var ( + // ErrPossibleAttack indicates that some attack was detected. + ErrPossibleAttack error = xdevErrorish{"possible attack detected"} + + // ErrPossibleBreakout indicates that during an operation we ended up in a + // state that could be a breakout but we detected it. + ErrPossibleBreakout error = xdevErrorish{"possible breakout detected"} + + // ErrInvalidDirectory indicates an unlinked directory. + ErrInvalidDirectory = errors.New("wandered into deleted directory") + + // ErrDeletedInode indicates an unlinked file (non-directory). + ErrDeletedInode = errors.New("cannot verify path of deleted inode") +) diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/at_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/at_linux.go new file mode 100644 index 00000000000..09105491304 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/at_linux.go @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package fd + +import ( + "fmt" + "os" + "path/filepath" + "runtime" + + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" +) + +// prepareAtWith returns -EBADF (an invalid fd) if dir is nil, otherwise using +// the dir.Fd(). We use -EBADF because in filepath-securejoin we generally +// don't want to allow relative-to-cwd paths. The returned path is an +// *informational* string that describes a reasonable pathname for the given +// *at(2) arguments. You must not use the full path for any actual filesystem +// operations. +func prepareAt(dir Fd, path string) (dirFd int, unsafeUnmaskedPath string) { + dirFd, dirPath := -int(unix.EBADF), "." + if dir != nil { + dirFd, dirPath = int(dir.Fd()), dir.Name() + } + if !filepath.IsAbs(path) { + // only prepend the dirfd path for relative paths + path = dirPath + "/" + path + } + // NOTE: If path is "." or "", the returned path won't be filepath.Clean, + // but that's okay since this path is either used for errors (in which case + // a trailing "/" or "/." is important information) or will be + // filepath.Clean'd later (in the case of fd.Openat). + return dirFd, path +} + +// Openat is an [Fd]-based wrapper around unix.Openat. +func Openat(dir Fd, path string, flags int, mode int) (*os.File, error) { //nolint:unparam // wrapper func + dirFd, fullPath := prepareAt(dir, path) + // Make sure we always set O_CLOEXEC. + flags |= unix.O_CLOEXEC + fd, err := unix.Openat(dirFd, path, flags, uint32(mode)) + if err != nil { + return nil, &os.PathError{Op: "openat", Path: fullPath, Err: err} + } + runtime.KeepAlive(dir) + // openat is only used with lexically-safe paths so we can use + // filepath.Clean here, and also the path itself is not going to be used + // for actual path operations. + fullPath = filepath.Clean(fullPath) + return os.NewFile(uintptr(fd), fullPath), nil +} + +// Fstatat is an [Fd]-based wrapper around unix.Fstatat. +func Fstatat(dir Fd, path string, flags int) (unix.Stat_t, error) { + dirFd, fullPath := prepareAt(dir, path) + var stat unix.Stat_t + if err := unix.Fstatat(dirFd, path, &stat, flags); err != nil { + return stat, &os.PathError{Op: "fstatat", Path: fullPath, Err: err} + } + runtime.KeepAlive(dir) + return stat, nil +} + +// Faccessat is an [Fd]-based wrapper around unix.Faccessat. +func Faccessat(dir Fd, path string, mode uint32, flags int) error { + dirFd, fullPath := prepareAt(dir, path) + err := unix.Faccessat(dirFd, path, mode, flags) + if err != nil { + err = &os.PathError{Op: "faccessat", Path: fullPath, Err: err} + } + runtime.KeepAlive(dir) + return err +} + +// Readlinkat is an [Fd]-based wrapper around unix.Readlinkat. +func Readlinkat(dir Fd, path string) (string, error) { + dirFd, fullPath := prepareAt(dir, path) + size := 4096 + for { + linkBuf := make([]byte, size) + n, err := unix.Readlinkat(dirFd, path, linkBuf) + if err != nil { + return "", &os.PathError{Op: "readlinkat", Path: fullPath, Err: err} + } + runtime.KeepAlive(dir) + if n != size { + return string(linkBuf[:n]), nil + } + // Possible truncation, resize the buffer. + size *= 2 + } +} + +const ( + // STATX_MNT_ID_UNIQUE is provided in golang.org/x/sys@v0.20.0, but in order to + // avoid bumping the requirement for a single constant we can just define it + // ourselves. + _STATX_MNT_ID_UNIQUE = 0x4000 //nolint:revive // unix.* name + + // We don't care which mount ID we get. The kernel will give us the unique + // one if it is supported. If the kernel doesn't support + // STATX_MNT_ID_UNIQUE, the bit is ignored and the returned request mask + // will only contain STATX_MNT_ID (if supported). + wantStatxMntMask = _STATX_MNT_ID_UNIQUE | unix.STATX_MNT_ID +) + +var hasStatxMountID = gocompat.SyncOnceValue(func() bool { + var stx unix.Statx_t + err := unix.Statx(-int(unix.EBADF), "/", 0, wantStatxMntMask, &stx) + return err == nil && stx.Mask&wantStatxMntMask != 0 +}) + +// GetMountID gets the mount identifier associated with the fd and path +// combination. It is effectively a wrapper around fetching +// STATX_MNT_ID{,_UNIQUE} with unix.Statx, but with a fallback to 0 if the +// kernel doesn't support the feature. +func GetMountID(dir Fd, path string) (uint64, error) { + // If we don't have statx(STATX_MNT_ID*) support, we can't do anything. + if !hasStatxMountID() { + return 0, nil + } + + dirFd, fullPath := prepareAt(dir, path) + + var stx unix.Statx_t + err := unix.Statx(dirFd, path, unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW, wantStatxMntMask, &stx) + if stx.Mask&wantStatxMntMask == 0 { + // It's not a kernel limitation, for some reason we couldn't get a + // mount ID. Assume it's some kind of attack. + err = fmt.Errorf("could not get mount id: %w", err) + } + if err != nil { + return 0, &os.PathError{Op: "statx(STATX_MNT_ID_...)", Path: fullPath, Err: err} + } + runtime.KeepAlive(dir) + return stx.Mnt_id, nil +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd.go new file mode 100644 index 00000000000..d2206a386f9 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd.go @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: MPL-2.0 + +// Copyright (C) 2025 Aleksa Sarai +// Copyright (C) 2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Package fd provides a drop-in interface-based replacement of [*os.File] that +// allows for things like noop-Close wrappers to be used. +// +// [*os.File]: https://pkg.go.dev/os#File +package fd + +import ( + "io" + "os" +) + +// Fd is an interface that mirrors most of the API of [*os.File], allowing you +// to create wrappers that can be used in place of [*os.File]. +// +// [*os.File]: https://pkg.go.dev/os#File +type Fd interface { + io.Closer + Name() string + Fd() uintptr +} + +// Compile-time interface checks. +var ( + _ Fd = (*os.File)(nil) + _ Fd = noClose{} +) + +type noClose struct{ inner Fd } + +func (f noClose) Name() string { return f.inner.Name() } +func (f noClose) Fd() uintptr { return f.inner.Fd() } + +func (f noClose) Close() error { return nil } + +// NopCloser returns an [*os.File]-like object where the [Close] method is now +// a no-op. +// +// Note that for [*os.File] and similar objects, the Go garbage collector will +// still call [Close] on the underlying file unless you use +// [runtime.SetFinalizer] to disable this behaviour. This is up to the caller +// to do (if necessary). +// +// [*os.File]: https://pkg.go.dev/os#File +// [Close]: https://pkg.go.dev/io#Closer +// [runtime.SetFinalizer]: https://pkg.go.dev/runtime#SetFinalizer +func NopCloser(f Fd) Fd { return noClose{inner: f} } diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd_linux.go new file mode 100644 index 00000000000..e1ec3c0b8e4 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd_linux.go @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package fd + +import ( + "fmt" + "os" + "runtime" + + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal" +) + +// DupWithName creates a new file descriptor referencing the same underlying +// file, but with the provided name instead of fd.Name(). +func DupWithName(fd Fd, name string) (*os.File, error) { + fd2, err := unix.FcntlInt(fd.Fd(), unix.F_DUPFD_CLOEXEC, 0) + if err != nil { + return nil, os.NewSyscallError("fcntl(F_DUPFD_CLOEXEC)", err) + } + runtime.KeepAlive(fd) + return os.NewFile(uintptr(fd2), name), nil +} + +// Dup creates a new file description referencing the same underlying file. +func Dup(fd Fd) (*os.File, error) { + return DupWithName(fd, fd.Name()) +} + +// Fstat is an [Fd]-based wrapper around unix.Fstat. +func Fstat(fd Fd) (unix.Stat_t, error) { + var stat unix.Stat_t + if err := unix.Fstat(int(fd.Fd()), &stat); err != nil { + return stat, &os.PathError{Op: "fstat", Path: fd.Name(), Err: err} + } + runtime.KeepAlive(fd) + return stat, nil +} + +// Fstatfs is an [Fd]-based wrapper around unix.Fstatfs. +func Fstatfs(fd Fd) (unix.Statfs_t, error) { + var statfs unix.Statfs_t + if err := unix.Fstatfs(int(fd.Fd()), &statfs); err != nil { + return statfs, &os.PathError{Op: "fstatfs", Path: fd.Name(), Err: err} + } + runtime.KeepAlive(fd) + return statfs, nil +} + +// IsDeadInode detects whether the file has been unlinked from a filesystem and +// is thus a "dead inode" from the kernel's perspective. +func IsDeadInode(file Fd) error { + // If the nlink of a file drops to 0, there is an attacker deleting + // directories during our walk, which could result in weird /proc values. + // It's better to error out in this case. + stat, err := Fstat(file) + if err != nil { + return fmt.Errorf("check for dead inode: %w", err) + } + if stat.Nlink == 0 { + err := internal.ErrDeletedInode + if stat.Mode&unix.S_IFMT == unix.S_IFDIR { + err = internal.ErrInvalidDirectory + } + return fmt.Errorf("%w %q", err, file.Name()) + } + return nil +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/mount_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/mount_linux.go new file mode 100644 index 00000000000..77549c7a993 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/mount_linux.go @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package fd + +import ( + "os" + "runtime" + + "golang.org/x/sys/unix" +) + +// Fsopen is an [Fd]-based wrapper around unix.Fsopen. +func Fsopen(fsName string, flags int) (*os.File, error) { + // Make sure we always set O_CLOEXEC. + flags |= unix.FSOPEN_CLOEXEC + fd, err := unix.Fsopen(fsName, flags) + if err != nil { + return nil, os.NewSyscallError("fsopen "+fsName, err) + } + return os.NewFile(uintptr(fd), "fscontext:"+fsName), nil +} + +// Fsmount is an [Fd]-based wrapper around unix.Fsmount. +func Fsmount(ctx Fd, flags, mountAttrs int) (*os.File, error) { + // Make sure we always set O_CLOEXEC. + flags |= unix.FSMOUNT_CLOEXEC + fd, err := unix.Fsmount(int(ctx.Fd()), flags, mountAttrs) + if err != nil { + return nil, os.NewSyscallError("fsmount "+ctx.Name(), err) + } + return os.NewFile(uintptr(fd), "fsmount:"+ctx.Name()), nil +} + +// OpenTree is an [Fd]-based wrapper around unix.OpenTree. +func OpenTree(dir Fd, path string, flags uint) (*os.File, error) { + dirFd, fullPath := prepareAt(dir, path) + // Make sure we always set O_CLOEXEC. + flags |= unix.OPEN_TREE_CLOEXEC + fd, err := unix.OpenTree(dirFd, path, flags) + if err != nil { + return nil, &os.PathError{Op: "open_tree", Path: fullPath, Err: err} + } + runtime.KeepAlive(dir) + return os.NewFile(uintptr(fd), fullPath), nil +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/openat2_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/openat2_linux.go new file mode 100644 index 00000000000..3e937fe3c16 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/openat2_linux.go @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package fd + +import ( + "errors" + "os" + "runtime" + + "golang.org/x/sys/unix" +) + +func scopedLookupShouldRetry(how *unix.OpenHow, err error) bool { + // RESOLVE_IN_ROOT (and RESOLVE_BENEATH) can return -EAGAIN if we resolve + // ".." while a mount or rename occurs anywhere on the system. This could + // happen spuriously, or as the result of an attacker trying to mess with + // us during lookup. + // + // In addition, scoped lookups have a "safety check" at the end of + // complete_walk which will return -EXDEV if the final path is not in the + // root. + return how.Resolve&(unix.RESOLVE_IN_ROOT|unix.RESOLVE_BENEATH) != 0 && + (errors.Is(err, unix.EAGAIN) || errors.Is(err, unix.EXDEV)) +} + +// This is a fairly arbitrary limit we have just to avoid an attacker being +// able to make us spin in an infinite retry loop -- callers can choose to +// retry on EAGAIN if they prefer. +const scopedLookupMaxRetries = 128 + +// Openat2 is an [Fd]-based wrapper around unix.Openat2, but with some retry +// logic in case of EAGAIN errors. +func Openat2(dir Fd, path string, how *unix.OpenHow) (*os.File, error) { + dirFd, fullPath := prepareAt(dir, path) + // Make sure we always set O_CLOEXEC. + how.Flags |= unix.O_CLOEXEC + var tries int + for { + fd, err := unix.Openat2(dirFd, path, how) + if err != nil { + if scopedLookupShouldRetry(how, err) && tries < scopedLookupMaxRetries { + // We retry a couple of times to avoid the spurious errors, and + // if we are being attacked then returning -EAGAIN is the best + // we can do. + tries++ + continue + } + return nil, &os.PathError{Op: "openat2", Path: fullPath, Err: err} + } + runtime.KeepAlive(dir) + return os.NewFile(uintptr(fd), fullPath), nil + } +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/README.md b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/README.md new file mode 100644 index 00000000000..5dcb6ae0070 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/README.md @@ -0,0 +1,10 @@ +## gocompat ## + +This directory contains backports of stdlib functions from later Go versions so +the filepath-securejoin can continue to be used by projects that are stuck with +Go 1.18 support. Note that often filepath-securejoin is added in security +patches for old releases, so avoiding the need to bump Go compiler requirements +is a huge plus to downstreams. + +The source code is licensed under the same license as the Go stdlib. See the +source files for the precise license information. diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/doc.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/doc.go new file mode 100644 index 00000000000..4b1803f580a --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/doc.go @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: BSD-3-Clause +//go:build linux && go1.20 + +// Copyright (C) 2025 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package gocompat includes compatibility shims (backported from future Go +// stdlib versions) to permit filepath-securejoin to be used with older Go +// versions (often filepath-securejoin is added in security patches for old +// releases, so avoiding the need to bump Go compiler requirements is a huge +// plus to downstreams). +package gocompat diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_errors_go120.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_errors_go120.go new file mode 100644 index 00000000000..4a114bd3da9 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_errors_go120.go @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: BSD-3-Clause +//go:build linux && go1.20 + +// Copyright (C) 2024 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gocompat + +import ( + "fmt" +) + +// WrapBaseError is a helper that is equivalent to fmt.Errorf("%w: %w"), except +// that on pre-1.20 Go versions only errors.Is() works properly (errors.Unwrap) +// is only guaranteed to give you baseErr. +func WrapBaseError(baseErr, extraErr error) error { + return fmt.Errorf("%w: %w", extraErr, baseErr) +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_errors_unsupported.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_errors_unsupported.go new file mode 100644 index 00000000000..3061016a6a6 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_errors_unsupported.go @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: BSD-3-Clause + +//go:build linux && !go1.20 + +// Copyright (C) 2024 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gocompat + +import ( + "fmt" +) + +type wrappedError struct { + inner error + isError error +} + +func (err wrappedError) Is(target error) bool { + return err.isError == target +} + +func (err wrappedError) Unwrap() error { + return err.inner +} + +func (err wrappedError) Error() string { + return fmt.Sprintf("%v: %v", err.isError, err.inner) +} + +// WrapBaseError is a helper that is equivalent to fmt.Errorf("%w: %w"), except +// that on pre-1.20 Go versions only errors.Is() works properly (errors.Unwrap) +// is only guaranteed to give you baseErr. +func WrapBaseError(baseErr, extraErr error) error { + return wrappedError{ + inner: baseErr, + isError: extraErr, + } +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_go121.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_go121.go new file mode 100644 index 00000000000..d4a938186e4 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_go121.go @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: BSD-3-Clause + +//go:build linux && go1.21 + +// Copyright (C) 2024-2025 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gocompat + +import ( + "cmp" + "slices" + "sync" +) + +// SlicesDeleteFunc is equivalent to Go 1.21's slices.DeleteFunc. +func SlicesDeleteFunc[S ~[]E, E any](slice S, delFn func(E) bool) S { + return slices.DeleteFunc(slice, delFn) +} + +// SlicesContains is equivalent to Go 1.21's slices.Contains. +func SlicesContains[S ~[]E, E comparable](slice S, val E) bool { + return slices.Contains(slice, val) +} + +// SlicesClone is equivalent to Go 1.21's slices.Clone. +func SlicesClone[S ~[]E, E any](slice S) S { + return slices.Clone(slice) +} + +// SyncOnceValue is equivalent to Go 1.21's sync.OnceValue. +func SyncOnceValue[T any](f func() T) func() T { + return sync.OnceValue(f) +} + +// SyncOnceValues is equivalent to Go 1.21's sync.OnceValues. +func SyncOnceValues[T1, T2 any](f func() (T1, T2)) func() (T1, T2) { + return sync.OnceValues(f) +} + +// CmpOrdered is equivalent to Go 1.21's cmp.Ordered generic type definition. +type CmpOrdered = cmp.Ordered + +// CmpCompare is equivalent to Go 1.21's cmp.Compare. +func CmpCompare[T CmpOrdered](x, y T) int { + return cmp.Compare(x, y) +} + +// Max2 is equivalent to Go 1.21's max builtin (but only for two parameters). +func Max2[T CmpOrdered](x, y T) T { + return max(x, y) +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_unsupported.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_unsupported.go new file mode 100644 index 00000000000..0ea6218aa6c --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_unsupported.go @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: BSD-3-Clause + +//go:build linux && !go1.21 + +// Copyright (C) 2021, 2022 The Go Authors. All rights reserved. +// Copyright (C) 2024-2025 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE.BSD file. + +package gocompat + +import ( + "sync" +) + +// These are very minimal implementations of functions that appear in Go 1.21's +// stdlib, included so that we can build on older Go versions. Most are +// borrowed directly from the stdlib, and a few are modified to be "obviously +// correct" without needing to copy too many other helpers. + +// clearSlice is equivalent to Go 1.21's builtin clear. +// Copied from the Go 1.24 stdlib implementation. +func clearSlice[S ~[]E, E any](slice S) { + var zero E + for i := range slice { + slice[i] = zero + } +} + +// slicesIndexFunc is equivalent to Go 1.21's slices.IndexFunc. +// Copied from the Go 1.24 stdlib implementation. +func slicesIndexFunc[S ~[]E, E any](s S, f func(E) bool) int { + for i := range s { + if f(s[i]) { + return i + } + } + return -1 +} + +// SlicesDeleteFunc is equivalent to Go 1.21's slices.DeleteFunc. +// Copied from the Go 1.24 stdlib implementation. +func SlicesDeleteFunc[S ~[]E, E any](s S, del func(E) bool) S { + i := slicesIndexFunc(s, del) + if i == -1 { + return s + } + // Don't start copying elements until we find one to delete. + for j := i + 1; j < len(s); j++ { + if v := s[j]; !del(v) { + s[i] = v + i++ + } + } + clearSlice(s[i:]) // zero/nil out the obsolete elements, for GC + return s[:i] +} + +// SlicesContains is equivalent to Go 1.21's slices.Contains. +// Similar to the stdlib slices.Contains, except that we don't have +// slices.Index so we need to use slices.IndexFunc for this non-Func helper. +func SlicesContains[S ~[]E, E comparable](s S, v E) bool { + return slicesIndexFunc(s, func(e E) bool { return e == v }) >= 0 +} + +// SlicesClone is equivalent to Go 1.21's slices.Clone. +// Copied from the Go 1.24 stdlib implementation. +func SlicesClone[S ~[]E, E any](s S) S { + // Preserve nil in case it matters. + if s == nil { + return nil + } + return append(S([]E{}), s...) +} + +// SyncOnceValue is equivalent to Go 1.21's sync.OnceValue. +// Copied from the Go 1.25 stdlib implementation. +func SyncOnceValue[T any](f func() T) func() T { + // Use a struct so that there's a single heap allocation. + d := struct { + f func() T + once sync.Once + valid bool + p any + result T + }{ + f: f, + } + return func() T { + d.once.Do(func() { + defer func() { + d.f = nil + d.p = recover() + if !d.valid { + panic(d.p) + } + }() + d.result = d.f() + d.valid = true + }) + if !d.valid { + panic(d.p) + } + return d.result + } +} + +// SyncOnceValues is equivalent to Go 1.21's sync.OnceValues. +// Copied from the Go 1.25 stdlib implementation. +func SyncOnceValues[T1, T2 any](f func() (T1, T2)) func() (T1, T2) { + // Use a struct so that there's a single heap allocation. + d := struct { + f func() (T1, T2) + once sync.Once + valid bool + p any + r1 T1 + r2 T2 + }{ + f: f, + } + return func() (T1, T2) { + d.once.Do(func() { + defer func() { + d.f = nil + d.p = recover() + if !d.valid { + panic(d.p) + } + }() + d.r1, d.r2 = d.f() + d.valid = true + }) + if !d.valid { + panic(d.p) + } + return d.r1, d.r2 + } +} + +// CmpOrdered is equivalent to Go 1.21's cmp.Ordered generic type definition. +// Copied from the Go 1.25 stdlib implementation. +type CmpOrdered interface { + ~int | ~int8 | ~int16 | ~int32 | ~int64 | + ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr | + ~float32 | ~float64 | + ~string +} + +// isNaN reports whether x is a NaN without requiring the math package. +// This will always return false if T is not floating-point. +// Copied from the Go 1.25 stdlib implementation. +func isNaN[T CmpOrdered](x T) bool { + return x != x +} + +// CmpCompare is equivalent to Go 1.21's cmp.Compare. +// Copied from the Go 1.25 stdlib implementation. +func CmpCompare[T CmpOrdered](x, y T) int { + xNaN := isNaN(x) + yNaN := isNaN(y) + if xNaN { + if yNaN { + return 0 + } + return -1 + } + if yNaN { + return +1 + } + if x < y { + return -1 + } + if x > y { + return +1 + } + return 0 +} + +// Max2 is equivalent to Go 1.21's max builtin for two parameters. +func Max2[T CmpOrdered](x, y T) T { + m := x + if y > m { + m = y + } + return m +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/kernelversion/kernel_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/kernelversion/kernel_linux.go new file mode 100644 index 00000000000..cb6de41861f --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/kernelversion/kernel_linux.go @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: BSD-3-Clause + +// Copyright (C) 2022 The Go Authors. All rights reserved. +// Copyright (C) 2025 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE.BSD file. + +// The parsing logic is very loosely based on the Go stdlib's +// src/internal/syscall/unix/kernel_version_linux.go but with an API that looks +// a bit like runc's libcontainer/system/kernelversion. +// +// TODO(cyphar): This API has been copied around to a lot of different projects +// (Docker, containerd, runc, and now filepath-securejoin) -- maybe we should +// put it in a separate project? + +// Package kernelversion provides a simple mechanism for checking whether the +// running kernel is at least as new as some baseline kernel version. This is +// often useful when checking for features that would be too complicated to +// test support for (or in cases where we know that some kernel features in +// backport-heavy kernels are broken and need to be avoided). +package kernelversion + +import ( + "bytes" + "errors" + "fmt" + "strconv" + "strings" + + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" +) + +// KernelVersion is a numeric representation of the key numerical elements of a +// kernel version (for instance, "4.1.2-default-1" would be represented as +// KernelVersion{4, 1, 2}). +type KernelVersion []uint64 + +func (kver KernelVersion) String() string { + var str strings.Builder + for idx, elem := range kver { + if idx != 0 { + _, _ = str.WriteRune('.') + } + _, _ = str.WriteString(strconv.FormatUint(elem, 10)) + } + return str.String() +} + +var errInvalidKernelVersion = errors.New("invalid kernel version") + +// parseKernelVersion parses a string and creates a KernelVersion based on it. +func parseKernelVersion(kverStr string) (KernelVersion, error) { + kver := make(KernelVersion, 1, 3) + for idx, ch := range kverStr { + if '0' <= ch && ch <= '9' { + v := &kver[len(kver)-1] + *v = (*v * 10) + uint64(ch-'0') + } else { + if idx == 0 || kverStr[idx-1] < '0' || '9' < kverStr[idx-1] { + // "." must be preceded by a digit while in version section + return nil, fmt.Errorf("%w %q: kernel version has dot(s) followed by non-digit in version section", errInvalidKernelVersion, kverStr) + } + if ch != '.' { + break + } + kver = append(kver, 0) + } + } + if len(kver) < 2 { + return nil, fmt.Errorf("%w %q: kernel versions must contain at least two components", errInvalidKernelVersion, kverStr) + } + return kver, nil +} + +// getKernelVersion gets the current kernel version. +var getKernelVersion = gocompat.SyncOnceValues(func() (KernelVersion, error) { + var uts unix.Utsname + if err := unix.Uname(&uts); err != nil { + return nil, err + } + // Remove the \x00 from the release. + release := uts.Release[:] + return parseKernelVersion(string(release[:bytes.IndexByte(release, 0)])) +}) + +// GreaterEqualThan returns true if the the host kernel version is greater than +// or equal to the provided [KernelVersion]. When doing this comparison, any +// non-numerical suffixes of the host kernel version are ignored. +// +// If the number of components provided is not equal to the number of numerical +// components of the host kernel version, any missing components are treated as +// 0. This means that GreaterEqualThan(KernelVersion{4}) will be treated the +// same as GreaterEqualThan(KernelVersion{4, 0, 0, ..., 0, 0}), and that if the +// host kernel version is "4" then GreaterEqualThan(KernelVersion{4, 1}) will +// return false (because the host version will be treated as "4.0"). +func GreaterEqualThan(wantKver KernelVersion) (bool, error) { + hostKver, err := getKernelVersion() + if err != nil { + return false, err + } + + // Pad out the kernel version lengths to match one another. + cmpLen := gocompat.Max2(len(hostKver), len(wantKver)) + hostKver = append(hostKver, make(KernelVersion, cmpLen-len(hostKver))...) + wantKver = append(wantKver, make(KernelVersion, cmpLen-len(wantKver))...) + + for i := 0; i < cmpLen; i++ { + switch gocompat.CmpCompare(hostKver[i], wantKver[i]) { + case -1: + // host < want + return false, nil + case +1: + // host > want + return true, nil + case 0: + continue + } + } + // equal version values + return true, nil +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/doc.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/doc.go new file mode 100644 index 00000000000..4635714f626 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/doc.go @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: MPL-2.0 + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Package linux returns information about what features are supported on the +// running kernel. +package linux diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/mount_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/mount_linux.go new file mode 100644 index 00000000000..b29905bff66 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/mount_linux.go @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package linux + +import ( + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/kernelversion" +) + +// HasNewMountAPI returns whether the new fsopen(2) mount API is supported on +// the running kernel. +var HasNewMountAPI = gocompat.SyncOnceValue(func() bool { + // All of the pieces of the new mount API we use (fsopen, fsconfig, + // fsmount, open_tree) were added together in Linux 5.2[1,2], so we can + // just check for one of the syscalls and the others should also be + // available. + // + // Just try to use open_tree(2) to open a file without OPEN_TREE_CLONE. + // This is equivalent to openat(2), but tells us if open_tree is + // available (and thus all of the other basic new mount API syscalls). + // open_tree(2) is most light-weight syscall to test here. + // + // [1]: merge commit 400913252d09 + // [2]: + fd, err := unix.OpenTree(-int(unix.EBADF), "/", unix.OPEN_TREE_CLOEXEC) + if err != nil { + return false + } + _ = unix.Close(fd) + + // RHEL 8 has a backport of fsopen(2) that appears to have some very + // difficult to debug performance pathology. As such, it seems prudent to + // simply reject pre-5.2 kernels. + isNotBackport, _ := kernelversion.GreaterEqualThan(kernelversion.KernelVersion{5, 2}) + return isNotBackport +}) diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/openat2_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/openat2_linux.go new file mode 100644 index 00000000000..399609dc361 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/openat2_linux.go @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package linux + +import ( + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" +) + +// HasOpenat2 returns whether openat2(2) is supported on the running kernel. +var HasOpenat2 = gocompat.SyncOnceValue(func() bool { + fd, err := unix.Openat2(unix.AT_FDCWD, ".", &unix.OpenHow{ + Flags: unix.O_PATH | unix.O_CLOEXEC, + Resolve: unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_IN_ROOT, + }) + if err != nil { + return false + } + _ = unix.Close(fd) + return true +}) diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_linux.go new file mode 100644 index 00000000000..21e0a62e8ec --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_linux.go @@ -0,0 +1,544 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Package procfs provides a safe API for operating on /proc on Linux. Note +// that this is the *internal* procfs API, mainy needed due to Go's +// restrictions on cyclic dependencies and its incredibly minimal visibility +// system without making a separate internal/ package. +package procfs + +import ( + "errors" + "fmt" + "io" + "os" + "runtime" + "strconv" + + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/assert" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux" +) + +// The kernel guarantees that the root inode of a procfs mount has an +// f_type of PROC_SUPER_MAGIC and st_ino of PROC_ROOT_INO. +const ( + procSuperMagic = 0x9fa0 // PROC_SUPER_MAGIC + procRootIno = 1 // PROC_ROOT_INO +) + +// verifyProcHandle checks that the handle is from a procfs filesystem. +// Contrast this to [verifyProcRoot], which also verifies that the handle is +// the root of a procfs mount. +func verifyProcHandle(procHandle fd.Fd) error { + if statfs, err := fd.Fstatfs(procHandle); err != nil { + return err + } else if statfs.Type != procSuperMagic { + return fmt.Errorf("%w: incorrect procfs root filesystem type 0x%x", errUnsafeProcfs, statfs.Type) + } + return nil +} + +// verifyProcRoot verifies that the handle is the root of a procfs filesystem. +// Contrast this to [verifyProcHandle], which only verifies if the handle is +// some file on procfs (regardless of what file it is). +func verifyProcRoot(procRoot fd.Fd) error { + if err := verifyProcHandle(procRoot); err != nil { + return err + } + if stat, err := fd.Fstat(procRoot); err != nil { + return err + } else if stat.Ino != procRootIno { + return fmt.Errorf("%w: incorrect procfs root inode number %d", errUnsafeProcfs, stat.Ino) + } + return nil +} + +type procfsFeatures struct { + // hasSubsetPid was added in Linux 5.8, along with hidepid=ptraceable (and + // string-based hidepid= values). Before this patchset, it was not really + // safe to try to modify procfs superblock flags because the superblock was + // shared -- so if this feature is not available, **you should not set any + // superblock flags**. + // + // 6814ef2d992a ("proc: add option to mount only a pids subset") + // fa10fed30f25 ("proc: allow to mount many instances of proc in one pid namespace") + // 24a71ce5c47f ("proc: instantiate only pids that we can ptrace on 'hidepid=4' mount option") + // 1c6c4d112e81 ("proc: use human-readable values for hidepid") + // 9ff7258575d5 ("Merge branch 'proc-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace") + hasSubsetPid bool +} + +var getProcfsFeatures = gocompat.SyncOnceValue(func() procfsFeatures { + if !linux.HasNewMountAPI() { + return procfsFeatures{} + } + procfsCtx, err := fd.Fsopen("proc", unix.FSOPEN_CLOEXEC) + if err != nil { + return procfsFeatures{} + } + defer procfsCtx.Close() //nolint:errcheck // close failures aren't critical here + + return procfsFeatures{ + hasSubsetPid: unix.FsconfigSetString(int(procfsCtx.Fd()), "subset", "pid") == nil, + } +}) + +func newPrivateProcMount(subset bool) (_ *Handle, Err error) { + procfsCtx, err := fd.Fsopen("proc", unix.FSOPEN_CLOEXEC) + if err != nil { + return nil, err + } + defer procfsCtx.Close() //nolint:errcheck // close failures aren't critical here + + if subset && getProcfsFeatures().hasSubsetPid { + // Try to configure hidepid=ptraceable,subset=pid if possible, but + // ignore errors. + _ = unix.FsconfigSetString(int(procfsCtx.Fd()), "hidepid", "ptraceable") + _ = unix.FsconfigSetString(int(procfsCtx.Fd()), "subset", "pid") + } + + // Get an actual handle. + if err := unix.FsconfigCreate(int(procfsCtx.Fd())); err != nil { + return nil, os.NewSyscallError("fsconfig create procfs", err) + } + // TODO: Output any information from the fscontext log to debug logs. + procRoot, err := fd.Fsmount(procfsCtx, unix.FSMOUNT_CLOEXEC, unix.MS_NODEV|unix.MS_NOEXEC|unix.MS_NOSUID) + if err != nil { + return nil, err + } + defer func() { + if Err != nil { + _ = procRoot.Close() + } + }() + return newHandle(procRoot) +} + +func clonePrivateProcMount() (_ *Handle, Err error) { + // Try to make a clone without using AT_RECURSIVE if we can. If this works, + // we can be sure there are no over-mounts and so if the root is valid then + // we're golden. Otherwise, we have to deal with over-mounts. + procRoot, err := fd.OpenTree(nil, "/proc", unix.OPEN_TREE_CLONE) + if err != nil || hookForcePrivateProcRootOpenTreeAtRecursive(procRoot) { + procRoot, err = fd.OpenTree(nil, "/proc", unix.OPEN_TREE_CLONE|unix.AT_RECURSIVE) + } + if err != nil { + return nil, fmt.Errorf("creating a detached procfs clone: %w", err) + } + defer func() { + if Err != nil { + _ = procRoot.Close() + } + }() + return newHandle(procRoot) +} + +func privateProcRoot(subset bool) (*Handle, error) { + if !linux.HasNewMountAPI() || hookForceGetProcRootUnsafe() { + return nil, fmt.Errorf("new mount api: %w", unix.ENOTSUP) + } + // Try to create a new procfs mount from scratch if we can. This ensures we + // can get a procfs mount even if /proc is fake (for whatever reason). + procRoot, err := newPrivateProcMount(subset) + if err != nil || hookForcePrivateProcRootOpenTree(procRoot) { + // Try to clone /proc then... + procRoot, err = clonePrivateProcMount() + } + return procRoot, err +} + +func unsafeHostProcRoot() (_ *Handle, Err error) { + procRoot, err := os.OpenFile("/proc", unix.O_PATH|unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) + if err != nil { + return nil, err + } + defer func() { + if Err != nil { + _ = procRoot.Close() + } + }() + return newHandle(procRoot) +} + +// Handle is a wrapper around an *os.File handle to "/proc", which can be used +// to do further procfs-related operations in a safe way. +type Handle struct { + Inner fd.Fd + // Does this handle have subset=pid set? + isSubset bool +} + +func newHandle(procRoot fd.Fd) (*Handle, error) { + if err := verifyProcRoot(procRoot); err != nil { + // This is only used in methods that + _ = procRoot.Close() + return nil, err + } + proc := &Handle{Inner: procRoot} + // With subset=pid we can be sure that /proc/uptime will not exist. + if err := fd.Faccessat(proc.Inner, "uptime", unix.F_OK, unix.AT_SYMLINK_NOFOLLOW); err != nil { + proc.isSubset = errors.Is(err, os.ErrNotExist) + } + return proc, nil +} + +// Close closes the underlying file for the Handle. +func (proc *Handle) Close() error { return proc.Inner.Close() } + +var getCachedProcRoot = gocompat.SyncOnceValue(func() *Handle { + procRoot, err := getProcRoot(true) + if err != nil { + return nil // just don't cache if we see an error + } + if !procRoot.isSubset { + return nil // we only cache verified subset=pid handles + } + + // Disarm (*Handle).Close() to stop someone from accidentally closing + // the global handle. + procRoot.Inner = fd.NopCloser(procRoot.Inner) + return procRoot +}) + +// OpenProcRoot tries to open a "safer" handle to "/proc". +func OpenProcRoot() (*Handle, error) { + if proc := getCachedProcRoot(); proc != nil { + return proc, nil + } + return getProcRoot(true) +} + +// OpenUnsafeProcRoot opens a handle to "/proc" without any overmounts or +// masked paths (but also without "subset=pid"). +func OpenUnsafeProcRoot() (*Handle, error) { return getProcRoot(false) } + +func getProcRoot(subset bool) (*Handle, error) { + proc, err := privateProcRoot(subset) + if err != nil { + // Fall back to using a /proc handle if making a private mount failed. + // If we have openat2, at least we can avoid some kinds of over-mount + // attacks, but without openat2 there's not much we can do. + proc, err = unsafeHostProcRoot() + } + return proc, err +} + +var hasProcThreadSelf = gocompat.SyncOnceValue(func() bool { + return unix.Access("/proc/thread-self/", unix.F_OK) == nil +}) + +var errUnsafeProcfs = errors.New("unsafe procfs detected") + +// lookup is a very minimal wrapper around [procfsLookupInRoot] which is +// intended to be called from the external API. +func (proc *Handle) lookup(subpath string) (*os.File, error) { + handle, err := procfsLookupInRoot(proc.Inner, subpath) + if err != nil { + return nil, err + } + return handle, nil +} + +// procfsBase is an enum indicating the prefix of a subpath in operations +// involving [Handle]s. +type procfsBase string + +const ( + // ProcRoot refers to the root of the procfs (i.e., "/proc/"). + ProcRoot procfsBase = "/proc" + // ProcSelf refers to the current process' subdirectory (i.e., + // "/proc/self/"). + ProcSelf procfsBase = "/proc/self" + // ProcThreadSelf refers to the current thread's subdirectory (i.e., + // "/proc/thread-self/"). In multi-threaded programs (i.e., all Go + // programs) where one thread has a different CLONE_FS, it is possible for + // "/proc/self" to point the wrong thread and so "/proc/thread-self" may be + // necessary. Note that on pre-3.17 kernels, "/proc/thread-self" doesn't + // exist and so a fallback will be used in that case. + ProcThreadSelf procfsBase = "/proc/thread-self" + // TODO: Switch to an interface setup so we can have a more type-safe + // version of ProcPid and remove the need to worry about invalid string + // values. +) + +// prefix returns a prefix that can be used with the given [Handle]. +func (base procfsBase) prefix(proc *Handle) (string, error) { + switch base { + case ProcRoot: + return ".", nil + case ProcSelf: + return "self", nil + case ProcThreadSelf: + threadSelf := "thread-self" + if !hasProcThreadSelf() || hookForceProcSelfTask() { + // Pre-3.17 kernels don't have /proc/thread-self, so do it + // manually. + threadSelf = "self/task/" + strconv.Itoa(unix.Gettid()) + if err := fd.Faccessat(proc.Inner, threadSelf, unix.F_OK, unix.AT_SYMLINK_NOFOLLOW); err != nil || hookForceProcSelf() { + // In this case, we running in a pid namespace that doesn't + // match the /proc mount we have. This can happen inside runc. + // + // Unfortunately, there is no nice way to get the correct TID + // to use here because of the age of the kernel, so we have to + // just use /proc/self and hope that it works. + threadSelf = "self" + } + } + return threadSelf, nil + } + return "", fmt.Errorf("invalid procfs base %q", base) +} + +// ProcThreadSelfCloser is a callback that needs to be called when you are done +// operating on an [os.File] fetched using [ProcThreadSelf]. +// +// [os.File]: https://pkg.go.dev/os#File +type ProcThreadSelfCloser func() + +// open is the core lookup operation for [Handle]. It returns a handle to +// "/proc//". If the returned [ProcThreadSelfCloser] is non-nil, +// you should call it after you are done interacting with the returned handle. +// +// In general you should use prefer to use the other helpers, as they remove +// the need to interact with [procfsBase] and do not return a nil +// [ProcThreadSelfCloser] for [procfsBase] values other than [ProcThreadSelf] +// where it is necessary. +func (proc *Handle) open(base procfsBase, subpath string) (_ *os.File, closer ProcThreadSelfCloser, Err error) { + prefix, err := base.prefix(proc) + if err != nil { + return nil, nil, err + } + subpath = prefix + "/" + subpath + + switch base { + case ProcRoot: + file, err := proc.lookup(subpath) + if errors.Is(err, os.ErrNotExist) { + // The Handle handle in use might be a subset=pid one, which will + // result in spurious errors. In this case, just open a temporary + // unmasked procfs handle for this operation. + proc, err2 := OpenUnsafeProcRoot() // !subset=pid + if err2 != nil { + return nil, nil, err + } + defer proc.Close() //nolint:errcheck // close failures aren't critical here + + file, err = proc.lookup(subpath) + } + return file, nil, err + + case ProcSelf: + file, err := proc.lookup(subpath) + return file, nil, err + + case ProcThreadSelf: + // We need to lock our thread until the caller is done with the handle + // because between getting the handle and using it we could get + // interrupted by the Go runtime and hit the case where the underlying + // thread is swapped out and the original thread is killed, resulting + // in pull-your-hair-out-hard-to-debug issues in the caller. + runtime.LockOSThread() + defer func() { + if Err != nil { + runtime.UnlockOSThread() + closer = nil + } + }() + + file, err := proc.lookup(subpath) + return file, runtime.UnlockOSThread, err + } + // should never be reached + return nil, nil, fmt.Errorf("[internal error] invalid procfs base %q", base) +} + +// OpenThreadSelf returns a handle to "/proc/thread-self/" (or an +// equivalent handle on older kernels where "/proc/thread-self" doesn't exist). +// Once finished with the handle, you must call the returned closer function +// (runtime.UnlockOSThread). You must not pass the returned *os.File to other +// Go threads or use the handle after calling the closer. +func (proc *Handle) OpenThreadSelf(subpath string) (_ *os.File, _ ProcThreadSelfCloser, Err error) { + return proc.open(ProcThreadSelf, subpath) +} + +// OpenSelf returns a handle to /proc/self/. +func (proc *Handle) OpenSelf(subpath string) (*os.File, error) { + file, closer, err := proc.open(ProcSelf, subpath) + assert.Assert(closer == nil, "closer for ProcSelf must be nil") + return file, err +} + +// OpenRoot returns a handle to /proc/. +func (proc *Handle) OpenRoot(subpath string) (*os.File, error) { + file, closer, err := proc.open(ProcRoot, subpath) + assert.Assert(closer == nil, "closer for ProcRoot must be nil") + return file, err +} + +// OpenPid returns a handle to /proc/$pid/ (pid can be a pid or tid). +// This is mainly intended for usage when operating on other processes. +func (proc *Handle) OpenPid(pid int, subpath string) (*os.File, error) { + return proc.OpenRoot(strconv.Itoa(pid) + "/" + subpath) +} + +// checkSubpathOvermount checks if the dirfd and path combination is on the +// same mount as the given root. +func checkSubpathOvermount(root, dir fd.Fd, path string) error { + // Get the mntID of our procfs handle. + expectedMountID, err := fd.GetMountID(root, "") + if err != nil { + return fmt.Errorf("get root mount id: %w", err) + } + // Get the mntID of the target magic-link. + gotMountID, err := fd.GetMountID(dir, path) + if err != nil { + return fmt.Errorf("get subpath mount id: %w", err) + } + // As long as the directory mount is alive, even with wrapping mount IDs, + // we would expect to see a different mount ID here. (Of course, if we're + // using unsafeHostProcRoot() then an attaker could change this after we + // did this check.) + if expectedMountID != gotMountID { + return fmt.Errorf("%w: subpath %s/%s has an overmount obscuring the real path (mount ids do not match %d != %d)", + errUnsafeProcfs, dir.Name(), path, expectedMountID, gotMountID) + } + return nil +} + +// Readlink performs a readlink operation on "/proc//" in a way +// that should be free from race attacks. This is most commonly used to get the +// real path of a file by looking at "/proc/self/fd/$n", with the same safety +// protections as [Open] (as well as some additional checks against +// overmounts). +func (proc *Handle) Readlink(base procfsBase, subpath string) (string, error) { + link, closer, err := proc.open(base, subpath) + if closer != nil { + defer closer() + } + if err != nil { + return "", fmt.Errorf("get safe %s/%s handle: %w", base, subpath, err) + } + defer link.Close() //nolint:errcheck // close failures aren't critical here + + // Try to detect if there is a mount on top of the magic-link. This should + // be safe in general (a mount on top of the path afterwards would not + // affect the handle itself) and will definitely be safe if we are using + // privateProcRoot() (at least since Linux 5.12[1], when anonymous mount + // namespaces were completely isolated from external mounts including mount + // propagation events). + // + // [1]: Linux commit ee2e3f50629f ("mount: fix mounting of detached mounts + // onto targets that reside on shared mounts"). + if err := checkSubpathOvermount(proc.Inner, link, ""); err != nil { + return "", fmt.Errorf("check safety of %s/%s magiclink: %w", base, subpath, err) + } + + // readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See Linux commit + // 65cfc6722361 ("readlinkat(), fchownat() and fstatat() with empty + // relative pathnames"). + return fd.Readlinkat(link, "") +} + +// ProcSelfFdReadlink gets the real path of the given file by looking at +// readlink(/proc/thread-self/fd/$n). +// +// This is just a wrapper around [Handle.Readlink]. +func ProcSelfFdReadlink(fd fd.Fd) (string, error) { + procRoot, err := OpenProcRoot() // subset=pid + if err != nil { + return "", err + } + defer procRoot.Close() //nolint:errcheck // close failures aren't critical here + + fdPath := "fd/" + strconv.Itoa(int(fd.Fd())) + return procRoot.Readlink(ProcThreadSelf, fdPath) +} + +// CheckProcSelfFdPath returns whether the given file handle matches the +// expected path. (This is inherently racy.) +func CheckProcSelfFdPath(path string, file fd.Fd) error { + if err := fd.IsDeadInode(file); err != nil { + return err + } + actualPath, err := ProcSelfFdReadlink(file) + if err != nil { + return fmt.Errorf("get path of handle: %w", err) + } + if actualPath != path { + return fmt.Errorf("%w: handle path %q doesn't match expected path %q", internal.ErrPossibleBreakout, actualPath, path) + } + return nil +} + +// ReopenFd takes an existing file descriptor and "re-opens" it through +// /proc/thread-self/fd/. This allows for O_PATH file descriptors to be +// upgraded to regular file descriptors, as well as changing the open mode of a +// regular file descriptor. Some filesystems have unique handling of open(2) +// which make this incredibly useful (such as /dev/ptmx). +func ReopenFd(handle fd.Fd, flags int) (*os.File, error) { + procRoot, err := OpenProcRoot() // subset=pid + if err != nil { + return nil, err + } + defer procRoot.Close() //nolint:errcheck // close failures aren't critical here + + // We can't operate on /proc/thread-self/fd/$n directly when doing a + // re-open, so we need to open /proc/thread-self/fd and then open a single + // final component. + procFdDir, closer, err := procRoot.OpenThreadSelf("fd/") + if err != nil { + return nil, fmt.Errorf("get safe /proc/thread-self/fd handle: %w", err) + } + defer procFdDir.Close() //nolint:errcheck // close failures aren't critical here + defer closer() + + // Try to detect if there is a mount on top of the magic-link we are about + // to open. If we are using unsafeHostProcRoot(), this could change after + // we check it (and there's nothing we can do about that) but for + // privateProcRoot() this should be guaranteed to be safe (at least since + // Linux 5.12[1], when anonymous mount namespaces were completely isolated + // from external mounts including mount propagation events). + // + // [1]: Linux commit ee2e3f50629f ("mount: fix mounting of detached mounts + // onto targets that reside on shared mounts"). + fdStr := strconv.Itoa(int(handle.Fd())) + if err := checkSubpathOvermount(procRoot.Inner, procFdDir, fdStr); err != nil { + return nil, fmt.Errorf("check safety of /proc/thread-self/fd/%s magiclink: %w", fdStr, err) + } + + flags |= unix.O_CLOEXEC + // Rather than just wrapping fd.Openat, open-code it so we can copy + // handle.Name(). + reopenFd, err := unix.Openat(int(procFdDir.Fd()), fdStr, flags, 0) + if err != nil { + return nil, fmt.Errorf("reopen fd %d: %w", handle.Fd(), err) + } + return os.NewFile(uintptr(reopenFd), handle.Name()), nil +} + +// Test hooks used in the procfs tests to verify that the fallback logic works. +// See testing_mocks_linux_test.go and procfs_linux_test.go for more details. +var ( + hookForcePrivateProcRootOpenTree = hookDummyFile + hookForcePrivateProcRootOpenTreeAtRecursive = hookDummyFile + hookForceGetProcRootUnsafe = hookDummy + + hookForceProcSelfTask = hookDummy + hookForceProcSelf = hookDummy +) + +func hookDummy() bool { return false } +func hookDummyFile(_ io.Closer) bool { return false } diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_lookup_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_lookup_linux.go new file mode 100644 index 00000000000..1ad1f18eee6 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_lookup_linux.go @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// This code is adapted to be a minimal version of the libpathrs proc resolver +// . +// As we only need O_PATH|O_NOFOLLOW support, this is not too much to port. + +package procfs + +import ( + "fmt" + "os" + "path" + "path/filepath" + "strings" + + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/internal/consts" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux" +) + +// procfsLookupInRoot is a stripped down version of completeLookupInRoot, +// entirely designed to support the very small set of features necessary to +// make procfs handling work. Unlike completeLookupInRoot, we always have +// O_PATH|O_NOFOLLOW behaviour for trailing symlinks. +// +// The main restrictions are: +// +// - ".." is not supported (as it requires either os.Root-style replays, +// which is more bug-prone; or procfs verification, which is not possible +// due to re-entrancy issues). +// - Absolute symlinks for the same reason (and all absolute symlinks in +// procfs are magic-links, which we want to skip anyway). +// - If statx is supported (checkSymlinkOvermount), any mount-point crossings +// (which is the main attack of concern against /proc). +// - Partial lookups are not supported, so the symlink stack is not needed. +// - Trailing slash special handling is not necessary in most cases (if we +// operating on procfs, it's usually with programmer-controlled strings +// that will then be re-opened), so we skip it since whatever re-opens it +// can deal with it. It's a creature comfort anyway. +// +// If the system supports openat2(), this is implemented using equivalent flags +// (RESOLVE_BENEATH | RESOLVE_NO_XDEV | RESOLVE_NO_MAGICLINKS). +func procfsLookupInRoot(procRoot fd.Fd, unsafePath string) (Handle *os.File, _ error) { + unsafePath = filepath.ToSlash(unsafePath) // noop + + // Make sure that an empty unsafe path still returns something sane, even + // with openat2 (which doesn't have AT_EMPTY_PATH semantics yet). + if unsafePath == "" { + unsafePath = "." + } + + // This is already checked by getProcRoot, but make sure here since the + // core security of this lookup is based on this assumption. + if err := verifyProcRoot(procRoot); err != nil { + return nil, err + } + + if linux.HasOpenat2() { + // We prefer being able to use RESOLVE_NO_XDEV if we can, to be + // absolutely sure we are operating on a clean /proc handle that + // doesn't have any cheeky overmounts that could trick us (including + // symlink mounts on top of /proc/thread-self). RESOLVE_BENEATH isn't + // strictly needed, but just use it since we have it. + // + // NOTE: /proc/self is technically a magic-link (the contents of the + // symlink are generated dynamically), but it doesn't use + // nd_jump_link() so RESOLVE_NO_MAGICLINKS allows it. + // + // TODO: It would be nice to have RESOLVE_NO_DOTDOT, purely for + // self-consistency with the backup O_PATH resolver. + handle, err := fd.Openat2(procRoot, unsafePath, &unix.OpenHow{ + Flags: unix.O_PATH | unix.O_NOFOLLOW | unix.O_CLOEXEC, + Resolve: unix.RESOLVE_BENEATH | unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_MAGICLINKS, + }) + if err != nil { + // TODO: Once we bump the minimum Go version to 1.20, we can use + // multiple %w verbs for this wrapping. For now we need to use a + // compatibility shim for older Go versions. + // err = fmt.Errorf("%w: %w", errUnsafeProcfs, err) + return nil, gocompat.WrapBaseError(err, errUnsafeProcfs) + } + return handle, nil + } + + // To mirror openat2(RESOLVE_BENEATH), we need to return an error if the + // path is absolute. + if path.IsAbs(unsafePath) { + return nil, fmt.Errorf("%w: cannot resolve absolute paths in procfs resolver", internal.ErrPossibleBreakout) + } + + currentDir, err := fd.Dup(procRoot) + if err != nil { + return nil, fmt.Errorf("clone root fd: %w", err) + } + defer func() { + // If a handle is not returned, close the internal handle. + if Handle == nil { + _ = currentDir.Close() + } + }() + + var ( + linksWalked int + currentPath string + remainingPath = unsafePath + ) + for remainingPath != "" { + // Get the next path component. + var part string + if i := strings.IndexByte(remainingPath, '/'); i == -1 { + part, remainingPath = remainingPath, "" + } else { + part, remainingPath = remainingPath[:i], remainingPath[i+1:] + } + if part == "" { + // no-op component, but treat it the same as "." + part = "." + } + if part == ".." { + // not permitted + return nil, fmt.Errorf("%w: cannot walk into '..' in procfs resolver", internal.ErrPossibleBreakout) + } + + // Apply the component lexically to the path we are building. + // currentPath does not contain any symlinks, and we are lexically + // dealing with a single component, so it's okay to do a filepath.Clean + // here. (Not to mention that ".." isn't allowed.) + nextPath := path.Join("/", currentPath, part) + // If we logically hit the root, just clone the root rather than + // opening the part and doing all of the other checks. + if nextPath == "/" { + // Jump to root. + rootClone, err := fd.Dup(procRoot) + if err != nil { + return nil, fmt.Errorf("clone root fd: %w", err) + } + _ = currentDir.Close() + currentDir = rootClone + currentPath = nextPath + continue + } + + // Try to open the next component. + nextDir, err := fd.Openat(currentDir, part, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) + if err != nil { + return nil, err + } + + // Make sure we are still on procfs and haven't crossed mounts. + if err := verifyProcHandle(nextDir); err != nil { + _ = nextDir.Close() + return nil, fmt.Errorf("check %q component is on procfs: %w", part, err) + } + if err := checkSubpathOvermount(procRoot, nextDir, ""); err != nil { + _ = nextDir.Close() + return nil, fmt.Errorf("check %q component is not overmounted: %w", part, err) + } + + // We are emulating O_PATH|O_NOFOLLOW, so we only need to traverse into + // trailing symlinks if we are not the final component. Otherwise we + // can just return the currentDir. + if remainingPath != "" { + st, err := nextDir.Stat() + if err != nil { + _ = nextDir.Close() + return nil, fmt.Errorf("stat component %q: %w", part, err) + } + + if st.Mode()&os.ModeType == os.ModeSymlink { + // readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See + // Linux commit 65cfc6722361 ("readlinkat(), fchownat() and + // fstatat() with empty relative pathnames"). + linkDest, err := fd.Readlinkat(nextDir, "") + // We don't need the handle anymore. + _ = nextDir.Close() + if err != nil { + return nil, err + } + + linksWalked++ + if linksWalked > consts.MaxSymlinkLimit { + return nil, &os.PathError{Op: "securejoin.procfsLookupInRoot", Path: "/proc/" + unsafePath, Err: unix.ELOOP} + } + + // Update our logical remaining path. + remainingPath = linkDest + "/" + remainingPath + // Absolute symlinks are probably magiclinks, we reject them. + if path.IsAbs(linkDest) { + return nil, fmt.Errorf("%w: cannot jump to / in procfs resolver -- possible magiclink", internal.ErrPossibleBreakout) + } + continue + } + } + + // Walk into the next component. + _ = currentDir.Close() + currentDir = nextDir + currentPath = nextPath + } + + // One final sanity-check. + if err := verifyProcHandle(currentDir); err != nil { + return nil, fmt.Errorf("check final handle is on procfs: %w", err) + } + if err := checkSubpathOvermount(procRoot, currentDir, ""); err != nil { + return nil, fmt.Errorf("check final handle is not overmounted: %w", err) + } + return currentDir, nil +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/lookup_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/lookup_linux.go similarity index 86% rename from vendor/github.com/cyphar/filepath-securejoin/lookup_linux.go rename to vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/lookup_linux.go index 290befa1547..f47504e663c 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/lookup_linux.go +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/lookup_linux.go @@ -1,10 +1,15 @@ +// SPDX-License-Identifier: MPL-2.0 + //go:build linux -// Copyright (C) 2024 SUSE LLC. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. -package securejoin +package pathrs import ( "errors" @@ -12,10 +17,15 @@ import ( "os" "path" "path/filepath" - "slices" "strings" "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/internal/consts" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs" ) type symlinkStackEntry struct { @@ -113,12 +123,12 @@ func (s *symlinkStack) push(dir *os.File, remainingPath, linkTarget string) erro return nil } // Split the link target and clean up any "" parts. - linkTargetParts := slices.DeleteFunc( + linkTargetParts := gocompat.SlicesDeleteFunc( strings.Split(linkTarget, "/"), func(part string) bool { return part == "" || part == "." }) // Copy the directory so the caller doesn't close our copy. - dirCopy, err := dupFile(dir) + dirCopy, err := fd.Dup(dir) if err != nil { return err } @@ -160,11 +170,11 @@ func (s *symlinkStack) PopTopSymlink() (*os.File, string, bool) { // within the provided root (a-la RESOLVE_IN_ROOT) and opens the final existing // component of the requested path, returning a file handle to the final // existing component and a string containing the remaining path components. -func partialLookupInRoot(root *os.File, unsafePath string) (*os.File, string, error) { +func partialLookupInRoot(root fd.Fd, unsafePath string) (*os.File, string, error) { return lookupInRoot(root, unsafePath, true) } -func completeLookupInRoot(root *os.File, unsafePath string) (*os.File, error) { +func completeLookupInRoot(root fd.Fd, unsafePath string) (*os.File, error) { handle, remainingPath, err := lookupInRoot(root, unsafePath, false) if remainingPath != "" && err == nil { // should never happen @@ -175,7 +185,7 @@ func completeLookupInRoot(root *os.File, unsafePath string) (*os.File, error) { return handle, err } -func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.File, _ string, _ error) { +func lookupInRoot(root fd.Fd, unsafePath string, partial bool) (Handle *os.File, _ string, _ error) { unsafePath = filepath.ToSlash(unsafePath) // noop // This is very similar to SecureJoin, except that we operate on the @@ -183,20 +193,20 @@ func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.Fi // managed open, along with the remaining path components not opened. // Try to use openat2 if possible. - if hasOpenat2() { + if linux.HasOpenat2() { return lookupOpenat2(root, unsafePath, partial) } // Get the "actual" root path from /proc/self/fd. This is necessary if the // root is some magic-link like /proc/$pid/root, in which case we want to - // make sure when we do checkProcSelfFdPath that we are using the correct - // root path. - logicalRootPath, err := procSelfFdReadlink(root) + // make sure when we do procfs.CheckProcSelfFdPath that we are using the + // correct root path. + logicalRootPath, err := procfs.ProcSelfFdReadlink(root) if err != nil { return nil, "", fmt.Errorf("get real root path: %w", err) } - currentDir, err := dupFile(root) + currentDir, err := fd.Dup(root) if err != nil { return nil, "", fmt.Errorf("clone root fd: %w", err) } @@ -261,7 +271,7 @@ func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.Fi return nil, "", fmt.Errorf("walking into root with part %q failed: %w", part, err) } // Jump to root. - rootClone, err := dupFile(root) + rootClone, err := fd.Dup(root) if err != nil { return nil, "", fmt.Errorf("clone root fd: %w", err) } @@ -272,21 +282,21 @@ func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.Fi } // Try to open the next component. - nextDir, err := openatFile(currentDir, part, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) - switch { - case err == nil: + nextDir, err := fd.Openat(currentDir, part, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) + switch err { + case nil: st, err := nextDir.Stat() if err != nil { _ = nextDir.Close() return nil, "", fmt.Errorf("stat component %q: %w", part, err) } - switch st.Mode() & os.ModeType { + switch st.Mode() & os.ModeType { //nolint:exhaustive // just a glorified if statement case os.ModeSymlink: // readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See // Linux commit 65cfc6722361 ("readlinkat(), fchownat() and // fstatat() with empty relative pathnames"). - linkDest, err := readlinkatFile(nextDir, "") + linkDest, err := fd.Readlinkat(nextDir, "") // We don't need the handle anymore. _ = nextDir.Close() if err != nil { @@ -294,7 +304,7 @@ func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.Fi } linksWalked++ - if linksWalked > maxSymlinkLimit { + if linksWalked > consts.MaxSymlinkLimit { return nil, "", &os.PathError{Op: "securejoin.lookupInRoot", Path: logicalRootPath + "/" + unsafePath, Err: unix.ELOOP} } @@ -308,7 +318,7 @@ func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.Fi // Absolute symlinks reset any work we've already done. if path.IsAbs(linkDest) { // Jump to root. - rootClone, err := dupFile(root) + rootClone, err := fd.Dup(root) if err != nil { return nil, "", fmt.Errorf("clone root fd: %w", err) } @@ -336,12 +346,12 @@ func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.Fi // rename or mount on the system. if part == ".." { // Make sure the root hasn't moved. - if err := checkProcSelfFdPath(logicalRootPath, root); err != nil { + if err := procfs.CheckProcSelfFdPath(logicalRootPath, root); err != nil { return nil, "", fmt.Errorf("root path moved during lookup: %w", err) } // Make sure the path is what we expect. fullPath := logicalRootPath + nextPath - if err := checkProcSelfFdPath(fullPath, currentDir); err != nil { + if err := procfs.CheckProcSelfFdPath(fullPath, currentDir); err != nil { return nil, "", fmt.Errorf("walking into %q had unexpected result: %w", part, err) } } @@ -372,7 +382,7 @@ func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.Fi // context of openat2, a trailing slash and a trailing "/." are completely // equivalent. if strings.HasSuffix(unsafePath, "/") { - nextDir, err := openatFile(currentDir, ".", unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) + nextDir, err := fd.Openat(currentDir, ".", unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) if err != nil { if !partial { _ = currentDir.Close() diff --git a/vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/mkdir_linux.go similarity index 71% rename from vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go rename to vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/mkdir_linux.go index 6dfe8c42b36..f3c62b0dac6 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/mkdir_linux.go @@ -1,27 +1,59 @@ +// SPDX-License-Identifier: MPL-2.0 + //go:build linux -// Copyright (C) 2024 SUSE LLC. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. -package securejoin +package pathrs import ( "errors" "fmt" "os" "path/filepath" - "slices" "strings" "golang.org/x/sys/unix" -) -var ( - errInvalidMode = errors.New("invalid permission mode") - errPossibleAttack = errors.New("possible attack detected") + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux" ) +var errInvalidMode = errors.New("invalid permission mode") + +// modePermExt is like os.ModePerm except that it also includes the set[ug]id +// and sticky bits. +const modePermExt = os.ModePerm | os.ModeSetuid | os.ModeSetgid | os.ModeSticky + +//nolint:cyclop // this function needs to handle a lot of cases +func toUnixMode(mode os.FileMode) (uint32, error) { + sysMode := uint32(mode.Perm()) + if mode&os.ModeSetuid != 0 { + sysMode |= unix.S_ISUID + } + if mode&os.ModeSetgid != 0 { + sysMode |= unix.S_ISGID + } + if mode&os.ModeSticky != 0 { + sysMode |= unix.S_ISVTX + } + // We don't allow file type bits. + if mode&os.ModeType != 0 { + return 0, fmt.Errorf("%w %+.3o (%s): type bits not permitted", errInvalidMode, mode, mode) + } + // We don't allow other unknown modes. + if mode&^modePermExt != 0 || sysMode&unix.S_IFMT != 0 { + return 0, fmt.Errorf("%w %+.3o (%s): unknown mode bits", errInvalidMode, mode, mode) + } + return sysMode, nil +} + // MkdirAllHandle is equivalent to [MkdirAll], except that it is safer to use // in two respects: // @@ -40,17 +72,19 @@ var ( // a brand new lookup of unsafePath (such as with [SecureJoin] or openat2) after // doing [MkdirAll]. If you intend to open the directory after creating it, you // should use MkdirAllHandle. -func MkdirAllHandle(root *os.File, unsafePath string, mode int) (_ *os.File, Err error) { - // Make sure there are no os.FileMode bits set. - if mode&^0o7777 != 0 { - return nil, fmt.Errorf("%w for mkdir 0o%.3o", errInvalidMode, mode) +// +// [SecureJoin]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin#SecureJoin +func MkdirAllHandle(root *os.File, unsafePath string, mode os.FileMode) (_ *os.File, Err error) { + unixMode, err := toUnixMode(mode) + if err != nil { + return nil, err } // On Linux, mkdirat(2) (and os.Mkdir) silently ignore the suid and sgid // bits. We could also silently ignore them but since we have very few // users it seems more prudent to return an error so users notice that // these bits will not be set. - if mode&^0o1777 != 0 { - return nil, fmt.Errorf("%w for mkdir 0o%.3o: suid and sgid are ignored by mkdir", errInvalidMode, mode) + if unixMode&^0o1777 != 0 { + return nil, fmt.Errorf("%w for mkdir %+.3o: suid and sgid are ignored by mkdir", errInvalidMode, mode) } // Try to open as much of the path as possible. @@ -76,7 +110,7 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode int) (_ *os.File, Err // // This is mostly a quality-of-life check, because mkdir will simply fail // later if the attacker deletes the tree after this check. - if err := isDeadInode(currentDir); err != nil { + if err := fd.IsDeadInode(currentDir); err != nil { return nil, fmt.Errorf("finding existing subpath of %q: %w", unsafePath, err) } @@ -87,13 +121,13 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode int) (_ *os.File, Err return nil, fmt.Errorf("cannot create subdirectories in %q: %w", currentDir.Name(), unix.ENOTDIR) } else if err != nil { return nil, fmt.Errorf("re-opening handle to %q: %w", currentDir.Name(), err) - } else { + } else { //nolint:revive // indent-error-flow lint doesn't make sense here _ = currentDir.Close() currentDir = reopenDir } remainingParts := strings.Split(remainingPath, string(filepath.Separator)) - if slices.Contains(remainingParts, "..") { + if gocompat.SlicesContains(remainingParts, "..") { // The path contained ".." components after the end of the "real" // components. We could try to safely resolve ".." here but that would // add a bunch of extra logic for something that it's not clear even @@ -105,9 +139,6 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode int) (_ *os.File, Err return nil, fmt.Errorf("%w: yet-to-be-created path %q contains '..' components", unix.ENOENT, remainingPath) } - // Make sure the mode doesn't have any type bits. - mode &^= unix.S_IFMT - // Create the remaining components. for _, part := range remainingParts { switch part { @@ -124,11 +155,15 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode int) (_ *os.File, Err // directory at the same time as us. In that case, just continue on as // if we created it (if the created inode is not a directory, the // following open call will fail). - if err := unix.Mkdirat(int(currentDir.Fd()), part, uint32(mode)); err != nil && !errors.Is(err, unix.EEXIST) { + if err := unix.Mkdirat(int(currentDir.Fd()), part, unixMode); err != nil && !errors.Is(err, unix.EEXIST) { err = &os.PathError{Op: "mkdirat", Path: currentDir.Name() + "/" + part, Err: err} // Make the error a bit nicer if the directory is dead. - if err2 := isDeadInode(currentDir); err2 != nil { - err = fmt.Errorf("%w (%w)", err, err2) + if deadErr := fd.IsDeadInode(currentDir); deadErr != nil { + // TODO: Once we bump the minimum Go version to 1.20, we can use + // multiple %w verbs for this wrapping. For now we need to use a + // compatibility shim for older Go versions. + // err = fmt.Errorf("%w (%w)", err, deadErr) + err = gocompat.WrapBaseError(err, deadErr) } return nil, err } @@ -136,13 +171,13 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode int) (_ *os.File, Err // Get a handle to the next component. O_DIRECTORY means we don't need // to use O_PATH. var nextDir *os.File - if hasOpenat2() { - nextDir, err = openat2File(currentDir, part, &unix.OpenHow{ + if linux.HasOpenat2() { + nextDir, err = openat2(currentDir, part, &unix.OpenHow{ Flags: unix.O_NOFOLLOW | unix.O_DIRECTORY | unix.O_CLOEXEC, Resolve: unix.RESOLVE_BENEATH | unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_NO_XDEV, }) } else { - nextDir, err = openatFile(currentDir, part, unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) + nextDir, err = fd.Openat(currentDir, part, unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) } if err != nil { return nil, err @@ -194,14 +229,13 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode int) (_ *os.File, Err // an open directory handle as the root, you should use [MkdirAllHandle] instead. // This function is a wrapper around [MkdirAllHandle]. // -// NOTE: The mode argument must be set the unix mode bits (unix.S_I...), not -// the Go generic mode bits ([os.FileMode]...). -func MkdirAll(root, unsafePath string, mode int) error { +// [SecureJoin]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin#SecureJoin +func MkdirAll(root, unsafePath string, mode os.FileMode) error { rootDir, err := os.OpenFile(root, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) if err != nil { return err } - defer rootDir.Close() + defer rootDir.Close() //nolint:errcheck // close failures aren't critical here f, err := MkdirAllHandle(rootDir, unsafePath, mode) if err != nil { diff --git a/vendor/github.com/cyphar/filepath-securejoin/open_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/open_linux.go similarity index 56% rename from vendor/github.com/cyphar/filepath-securejoin/open_linux.go rename to vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/open_linux.go index 230be73f0eb..7492d8cfa06 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/open_linux.go +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/open_linux.go @@ -1,17 +1,22 @@ +// SPDX-License-Identifier: MPL-2.0 + //go:build linux -// Copyright (C) 2024 SUSE LLC. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. -package securejoin +package pathrs import ( - "fmt" "os" - "strconv" "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs" ) // OpenatInRoot is equivalent to [OpenInRoot], except that the root is provided @@ -40,12 +45,14 @@ func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error) { // disconnected TTY that could cause a DoS, or some other issue). In order to // use the returned handle, you can "upgrade" it to a proper handle using // [Reopen]. +// +// [SecureJoin]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin#SecureJoin func OpenInRoot(root, unsafePath string) (*os.File, error) { rootDir, err := os.OpenFile(root, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) if err != nil { return nil, err } - defer rootDir.Close() + defer rootDir.Close() //nolint:errcheck // close failures aren't critical here return OpenatInRoot(rootDir, unsafePath) } @@ -63,41 +70,5 @@ func OpenInRoot(root, unsafePath string) (*os.File, error) { // // [CVE-2019-19921]: https://github.com/advisories/GHSA-fh74-hm69-rqjw func Reopen(handle *os.File, flags int) (*os.File, error) { - procRoot, err := getProcRoot() - if err != nil { - return nil, err - } - - // We can't operate on /proc/thread-self/fd/$n directly when doing a - // re-open, so we need to open /proc/thread-self/fd and then open a single - // final component. - procFdDir, closer, err := procThreadSelf(procRoot, "fd/") - if err != nil { - return nil, fmt.Errorf("get safe /proc/thread-self/fd handle: %w", err) - } - defer procFdDir.Close() - defer closer() - - // Try to detect if there is a mount on top of the magic-link we are about - // to open. If we are using unsafeHostProcRoot(), this could change after - // we check it (and there's nothing we can do about that) but for - // privateProcRoot() this should be guaranteed to be safe (at least since - // Linux 5.12[1], when anonymous mount namespaces were completely isolated - // from external mounts including mount propagation events). - // - // [1]: Linux commit ee2e3f50629f ("mount: fix mounting of detached mounts - // onto targets that reside on shared mounts"). - fdStr := strconv.Itoa(int(handle.Fd())) - if err := checkSymlinkOvermount(procRoot, procFdDir, fdStr); err != nil { - return nil, fmt.Errorf("check safety of /proc/thread-self/fd/%s magiclink: %w", fdStr, err) - } - - flags |= unix.O_CLOEXEC - // Rather than just wrapping openatFile, open-code it so we can copy - // handle.Name(). - reopenFd, err := unix.Openat(int(procFdDir.Fd()), fdStr, flags, 0) - if err != nil { - return nil, fmt.Errorf("reopen fd %d: %w", handle.Fd(), err) - } - return os.NewFile(uintptr(reopenFd), handle.Name()), nil + return procfs.ReopenFd(handle, flags) } diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/openat2_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/openat2_linux.go new file mode 100644 index 00000000000..937bc435f2b --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/openat2_linux.go @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package pathrs + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "strings" + + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd" + "github.com/cyphar/filepath-securejoin/pathrs-lite/procfs" +) + +func openat2(dir fd.Fd, path string, how *unix.OpenHow) (*os.File, error) { + file, err := fd.Openat2(dir, path, how) + if err != nil { + return nil, err + } + // If we are using RESOLVE_IN_ROOT, the name we generated may be wrong. + if how.Resolve&unix.RESOLVE_IN_ROOT == unix.RESOLVE_IN_ROOT { + if actualPath, err := procfs.ProcSelfFdReadlink(file); err == nil { + // TODO: Ideally we would not need to dup the fd, but you cannot + // easily just swap an *os.File with one from the same fd + // (the GC will close the old one, and you cannot clear the + // finaliser easily because it is associated with an internal + // field of *os.File not *os.File itself). + newFile, err := fd.DupWithName(file, actualPath) + if err != nil { + return nil, err + } + file = newFile + } + } + return file, nil +} + +func lookupOpenat2(root fd.Fd, unsafePath string, partial bool) (*os.File, string, error) { + if !partial { + file, err := openat2(root, unsafePath, &unix.OpenHow{ + Flags: unix.O_PATH | unix.O_CLOEXEC, + Resolve: unix.RESOLVE_IN_ROOT | unix.RESOLVE_NO_MAGICLINKS, + }) + return file, "", err + } + return partialLookupOpenat2(root, unsafePath) +} + +// partialLookupOpenat2 is an alternative implementation of +// partialLookupInRoot, using openat2(RESOLVE_IN_ROOT) to more safely get a +// handle to the deepest existing child of the requested path within the root. +func partialLookupOpenat2(root fd.Fd, unsafePath string) (*os.File, string, error) { + // TODO: Implement this as a git-bisect-like binary search. + + unsafePath = filepath.ToSlash(unsafePath) // noop + endIdx := len(unsafePath) + var lastError error + for endIdx > 0 { + subpath := unsafePath[:endIdx] + + handle, err := openat2(root, subpath, &unix.OpenHow{ + Flags: unix.O_PATH | unix.O_CLOEXEC, + Resolve: unix.RESOLVE_IN_ROOT | unix.RESOLVE_NO_MAGICLINKS, + }) + if err == nil { + // Jump over the slash if we have a non-"" remainingPath. + if endIdx < len(unsafePath) { + endIdx++ + } + // We found a subpath! + return handle, unsafePath[endIdx:], lastError + } + if errors.Is(err, unix.ENOENT) || errors.Is(err, unix.ENOTDIR) { + // That path doesn't exist, let's try the next directory up. + endIdx = strings.LastIndexByte(subpath, '/') + lastError = err + continue + } + return nil, "", fmt.Errorf("open subpath: %w", err) + } + // If we couldn't open anything, the whole subpath is missing. Return a + // copy of the root fd so that the caller doesn't close this one by + // accident. + rootClone, err := fd.Dup(root) + if err != nil { + return nil, "", err + } + return rootClone, unsafePath, lastError +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs/procfs_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs/procfs_linux.go new file mode 100644 index 00000000000..ec187a414c5 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs/procfs_linux.go @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Package procfs provides a safe API for operating on /proc on Linux. +package procfs + +import ( + "os" + + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs" +) + +// This package mostly just wraps internal/procfs APIs. This is necessary +// because we are forced to export some things from internal/procfs in order to +// avoid some dependency cycle issues, but we don't want users to see or use +// them. + +// ProcThreadSelfCloser is a callback that needs to be called when you are done +// operating on an [os.File] fetched using [Handle.OpenThreadSelf]. +// +// [os.File]: https://pkg.go.dev/os#File +type ProcThreadSelfCloser = procfs.ProcThreadSelfCloser + +// Handle is a wrapper around an *os.File handle to "/proc", which can be used +// to do further procfs-related operations in a safe way. +type Handle struct { + inner *procfs.Handle +} + +// Close close the resources associated with this [Handle]. Note that if this +// [Handle] was created with [OpenProcRoot], on some kernels the underlying +// procfs handle is cached and so this Close operation may be a no-op. However, +// you should always call Close on [Handle]s once you are done with them. +func (proc *Handle) Close() error { return proc.inner.Close() } + +// OpenProcRoot tries to open a "safer" handle to "/proc" (i.e., one with the +// "subset=pid" mount option applied, available from Linux 5.8). Unless you +// plan to do many [Handle.OpenRoot] operations, users should prefer to use +// this over [OpenUnsafeProcRoot] which is far more dangerous to keep open. +// +// If a safe handle cannot be opened, OpenProcRoot will fall back to opening a +// regular "/proc" handle. +// +// Note that using [Handle.OpenRoot] will still work with handles returned by +// this function. If a subpath cannot be operated on with a safe "/proc" +// handle, then [OpenUnsafeProcRoot] will be called internally and a temporary +// unsafe handle will be used. +func OpenProcRoot() (*Handle, error) { + proc, err := procfs.OpenProcRoot() + if err != nil { + return nil, err + } + return &Handle{inner: proc}, nil +} + +// OpenUnsafeProcRoot opens a handle to "/proc" without any overmounts or +// masked paths. You must be extremely careful to make sure this handle is +// never leaked to a container and that you program cannot be tricked into +// writing to arbitrary paths within it. +// +// This is not necessary if you just wish to use [Handle.OpenRoot], as handles +// returned by [OpenProcRoot] will fall back to using a *temporary* unsafe +// handle in that case. You should only really use this if you need to do many +// operations with [Handle.OpenRoot] and the performance overhead of making +// many procfs handles is an issue. If you do use OpenUnsafeProcRoot, you +// should make sure to close the handle as soon as possible to avoid +// known-fd-number attacks. +func OpenUnsafeProcRoot() (*Handle, error) { + proc, err := procfs.OpenUnsafeProcRoot() + if err != nil { + return nil, err + } + return &Handle{inner: proc}, nil +} + +// OpenThreadSelf returns a handle to "/proc/thread-self/" (or an +// equivalent handle on older kernels where "/proc/thread-self" doesn't exist). +// Once finished with the handle, you must call the returned closer function +// ([runtime.UnlockOSThread]). You must not pass the returned *os.File to other +// Go threads or use the handle after calling the closer. +// +// [runtime.UnlockOSThread]: https://pkg.go.dev/runtime#UnlockOSThread +func (proc *Handle) OpenThreadSelf(subpath string) (*os.File, ProcThreadSelfCloser, error) { + return proc.inner.OpenThreadSelf(subpath) +} + +// OpenSelf returns a handle to /proc/self/. +// +// Note that in Go programs with non-homogenous threads, this may result in +// spurious errors. If you are monkeying around with APIs that are +// thread-specific, you probably want to use [Handle.OpenThreadSelf] instead +// which will guarantee that the handle refers to the same thread as the caller +// is executing on. +func (proc *Handle) OpenSelf(subpath string) (*os.File, error) { + return proc.inner.OpenSelf(subpath) +} + +// OpenRoot returns a handle to /proc/. +// +// You should only use this when you need to operate on global procfs files +// (such as sysctls in /proc/sys). Unlike [Handle.OpenThreadSelf], +// [Handle.OpenSelf], and [Handle.OpenPid], the procfs handle used internally +// for this operation will never use "subset=pid", which makes it a more juicy +// target for [CVE-2024-21626]-style attacks (and doing something like opening +// a directory with OpenRoot effectively leaks [OpenUnsafeProcRoot] as long as +// the file descriptor is open). +// +// [CVE-2024-21626]: https://github.com/opencontainers/runc/security/advisories/GHSA-xr7r-f8xq-vfvv +func (proc *Handle) OpenRoot(subpath string) (*os.File, error) { + return proc.inner.OpenRoot(subpath) +} + +// OpenPid returns a handle to /proc/$pid/ (pid can be a pid or tid). +// This is mainly intended for usage when operating on other processes. +// +// You should not use this for the current thread, as special handling is +// needed for /proc/thread-self (or /proc/self/task/) when dealing with +// goroutine scheduling -- use [Handle.OpenThreadSelf] instead. +// +// To refer to the current thread-group, you should use prefer +// [Handle.OpenSelf] to passing os.Getpid as the pid argument. +func (proc *Handle) OpenPid(pid int, subpath string) (*os.File, error) { + return proc.inner.OpenPid(pid, subpath) +} + +// ProcSelfFdReadlink gets the real path of the given file by looking at +// /proc/self/fd/ with [readlink]. It is effectively just shorthand for +// something along the lines of: +// +// proc, err := procfs.OpenProcRoot() +// if err != nil { +// return err +// } +// link, err := proc.OpenThreadSelf(fmt.Sprintf("fd/%d", f.Fd())) +// if err != nil { +// return err +// } +// defer link.Close() +// var buf [4096]byte +// n, err := unix.Readlinkat(int(link.Fd()), "", buf[:]) +// if err != nil { +// return err +// } +// pathname := buf[:n] +// +// [readlink]: https://pkg.go.dev/golang.org/x/sys/unix#Readlinkat +func ProcSelfFdReadlink(f *os.File) (string, error) { + return procfs.ProcSelfFdReadlink(f) +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go b/vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go deleted file mode 100644 index 8cc827d7046..00000000000 --- a/vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go +++ /dev/null @@ -1,440 +0,0 @@ -//go:build linux - -// Copyright (C) 2024 SUSE LLC. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package securejoin - -import ( - "errors" - "fmt" - "os" - "runtime" - "strconv" - "sync" - - "golang.org/x/sys/unix" -) - -func fstat(f *os.File) (unix.Stat_t, error) { - var stat unix.Stat_t - if err := unix.Fstat(int(f.Fd()), &stat); err != nil { - return stat, &os.PathError{Op: "fstat", Path: f.Name(), Err: err} - } - return stat, nil -} - -func fstatfs(f *os.File) (unix.Statfs_t, error) { - var statfs unix.Statfs_t - if err := unix.Fstatfs(int(f.Fd()), &statfs); err != nil { - return statfs, &os.PathError{Op: "fstatfs", Path: f.Name(), Err: err} - } - return statfs, nil -} - -// The kernel guarantees that the root inode of a procfs mount has an -// f_type of PROC_SUPER_MAGIC and st_ino of PROC_ROOT_INO. -const ( - procSuperMagic = 0x9fa0 // PROC_SUPER_MAGIC - procRootIno = 1 // PROC_ROOT_INO -) - -func verifyProcRoot(procRoot *os.File) error { - if statfs, err := fstatfs(procRoot); err != nil { - return err - } else if statfs.Type != procSuperMagic { - return fmt.Errorf("%w: incorrect procfs root filesystem type 0x%x", errUnsafeProcfs, statfs.Type) - } - if stat, err := fstat(procRoot); err != nil { - return err - } else if stat.Ino != procRootIno { - return fmt.Errorf("%w: incorrect procfs root inode number %d", errUnsafeProcfs, stat.Ino) - } - return nil -} - -var hasNewMountApi = sync.OnceValue(func() bool { - // All of the pieces of the new mount API we use (fsopen, fsconfig, - // fsmount, open_tree) were added together in Linux 5.1[1,2], so we can - // just check for one of the syscalls and the others should also be - // available. - // - // Just try to use open_tree(2) to open a file without OPEN_TREE_CLONE. - // This is equivalent to openat(2), but tells us if open_tree is - // available (and thus all of the other basic new mount API syscalls). - // open_tree(2) is most light-weight syscall to test here. - // - // [1]: merge commit 400913252d09 - // [2]: - fd, err := unix.OpenTree(-int(unix.EBADF), "/", unix.OPEN_TREE_CLOEXEC) - if err != nil { - return false - } - _ = unix.Close(fd) - return true -}) - -func fsopen(fsName string, flags int) (*os.File, error) { - // Make sure we always set O_CLOEXEC. - flags |= unix.FSOPEN_CLOEXEC - fd, err := unix.Fsopen(fsName, flags) - if err != nil { - return nil, os.NewSyscallError("fsopen "+fsName, err) - } - return os.NewFile(uintptr(fd), "fscontext:"+fsName), nil -} - -func fsmount(ctx *os.File, flags, mountAttrs int) (*os.File, error) { - // Make sure we always set O_CLOEXEC. - flags |= unix.FSMOUNT_CLOEXEC - fd, err := unix.Fsmount(int(ctx.Fd()), flags, mountAttrs) - if err != nil { - return nil, os.NewSyscallError("fsmount "+ctx.Name(), err) - } - return os.NewFile(uintptr(fd), "fsmount:"+ctx.Name()), nil -} - -func newPrivateProcMount() (*os.File, error) { - procfsCtx, err := fsopen("proc", unix.FSOPEN_CLOEXEC) - if err != nil { - return nil, err - } - defer procfsCtx.Close() - - // Try to configure hidepid=ptraceable,subset=pid if possible, but ignore errors. - _ = unix.FsconfigSetString(int(procfsCtx.Fd()), "hidepid", "ptraceable") - _ = unix.FsconfigSetString(int(procfsCtx.Fd()), "subset", "pid") - - // Get an actual handle. - if err := unix.FsconfigCreate(int(procfsCtx.Fd())); err != nil { - return nil, os.NewSyscallError("fsconfig create procfs", err) - } - return fsmount(procfsCtx, unix.FSMOUNT_CLOEXEC, unix.MS_RDONLY|unix.MS_NODEV|unix.MS_NOEXEC|unix.MS_NOSUID) -} - -func openTree(dir *os.File, path string, flags uint) (*os.File, error) { - dirFd := -int(unix.EBADF) - dirName := "." - if dir != nil { - dirFd = int(dir.Fd()) - dirName = dir.Name() - } - // Make sure we always set O_CLOEXEC. - flags |= unix.OPEN_TREE_CLOEXEC - fd, err := unix.OpenTree(dirFd, path, flags) - if err != nil { - return nil, &os.PathError{Op: "open_tree", Path: path, Err: err} - } - return os.NewFile(uintptr(fd), dirName+"/"+path), nil -} - -func clonePrivateProcMount() (_ *os.File, Err error) { - // Try to make a clone without using AT_RECURSIVE if we can. If this works, - // we can be sure there are no over-mounts and so if the root is valid then - // we're golden. Otherwise, we have to deal with over-mounts. - procfsHandle, err := openTree(nil, "/proc", unix.OPEN_TREE_CLONE) - if err != nil || hookForcePrivateProcRootOpenTreeAtRecursive(procfsHandle) { - procfsHandle, err = openTree(nil, "/proc", unix.OPEN_TREE_CLONE|unix.AT_RECURSIVE) - } - if err != nil { - return nil, fmt.Errorf("creating a detached procfs clone: %w", err) - } - defer func() { - if Err != nil { - _ = procfsHandle.Close() - } - }() - if err := verifyProcRoot(procfsHandle); err != nil { - return nil, err - } - return procfsHandle, nil -} - -func privateProcRoot() (*os.File, error) { - if !hasNewMountApi() || hookForceGetProcRootUnsafe() { - return nil, fmt.Errorf("new mount api: %w", unix.ENOTSUP) - } - // Try to create a new procfs mount from scratch if we can. This ensures we - // can get a procfs mount even if /proc is fake (for whatever reason). - procRoot, err := newPrivateProcMount() - if err != nil || hookForcePrivateProcRootOpenTree(procRoot) { - // Try to clone /proc then... - procRoot, err = clonePrivateProcMount() - } - return procRoot, err -} - -func unsafeHostProcRoot() (_ *os.File, Err error) { - procRoot, err := os.OpenFile("/proc", unix.O_PATH|unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) - if err != nil { - return nil, err - } - defer func() { - if Err != nil { - _ = procRoot.Close() - } - }() - if err := verifyProcRoot(procRoot); err != nil { - return nil, err - } - return procRoot, nil -} - -func doGetProcRoot() (*os.File, error) { - procRoot, err := privateProcRoot() - if err != nil { - // Fall back to using a /proc handle if making a private mount failed. - // If we have openat2, at least we can avoid some kinds of over-mount - // attacks, but without openat2 there's not much we can do. - procRoot, err = unsafeHostProcRoot() - } - return procRoot, err -} - -var getProcRoot = sync.OnceValues(func() (*os.File, error) { - return doGetProcRoot() -}) - -var hasProcThreadSelf = sync.OnceValue(func() bool { - return unix.Access("/proc/thread-self/", unix.F_OK) == nil -}) - -var errUnsafeProcfs = errors.New("unsafe procfs detected") - -type procThreadSelfCloser func() - -// procThreadSelf returns a handle to /proc/thread-self/ (or an -// equivalent handle on older kernels where /proc/thread-self doesn't exist). -// Once finished with the handle, you must call the returned closer function -// (runtime.UnlockOSThread). You must not pass the returned *os.File to other -// Go threads or use the handle after calling the closer. -// -// This is similar to ProcThreadSelf from runc, but with extra hardening -// applied and using *os.File. -func procThreadSelf(procRoot *os.File, subpath string) (_ *os.File, _ procThreadSelfCloser, Err error) { - // We need to lock our thread until the caller is done with the handle - // because between getting the handle and using it we could get interrupted - // by the Go runtime and hit the case where the underlying thread is - // swapped out and the original thread is killed, resulting in - // pull-your-hair-out-hard-to-debug issues in the caller. - runtime.LockOSThread() - defer func() { - if Err != nil { - runtime.UnlockOSThread() - } - }() - - // Figure out what prefix we want to use. - threadSelf := "thread-self/" - if !hasProcThreadSelf() || hookForceProcSelfTask() { - /// Pre-3.17 kernels don't have /proc/thread-self, so do it manually. - threadSelf = "self/task/" + strconv.Itoa(unix.Gettid()) + "/" - if _, err := fstatatFile(procRoot, threadSelf, unix.AT_SYMLINK_NOFOLLOW); err != nil || hookForceProcSelf() { - // In this case, we running in a pid namespace that doesn't match - // the /proc mount we have. This can happen inside runc. - // - // Unfortunately, there is no nice way to get the correct TID to - // use here because of the age of the kernel, so we have to just - // use /proc/self and hope that it works. - threadSelf = "self/" - } - } - - // Grab the handle. - var ( - handle *os.File - err error - ) - if hasOpenat2() { - // We prefer being able to use RESOLVE_NO_XDEV if we can, to be - // absolutely sure we are operating on a clean /proc handle that - // doesn't have any cheeky overmounts that could trick us (including - // symlink mounts on top of /proc/thread-self). RESOLVE_BENEATH isn't - // strictly needed, but just use it since we have it. - // - // NOTE: /proc/self is technically a magic-link (the contents of the - // symlink are generated dynamically), but it doesn't use - // nd_jump_link() so RESOLVE_NO_MAGICLINKS allows it. - // - // NOTE: We MUST NOT use RESOLVE_IN_ROOT here, as openat2File uses - // procSelfFdReadlink to clean up the returned f.Name() if we use - // RESOLVE_IN_ROOT (which would lead to an infinite recursion). - handle, err = openat2File(procRoot, threadSelf+subpath, &unix.OpenHow{ - Flags: unix.O_PATH | unix.O_NOFOLLOW | unix.O_CLOEXEC, - Resolve: unix.RESOLVE_BENEATH | unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_MAGICLINKS, - }) - if err != nil { - return nil, nil, fmt.Errorf("%w: %w", errUnsafeProcfs, err) - } - } else { - handle, err = openatFile(procRoot, threadSelf+subpath, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) - if err != nil { - return nil, nil, fmt.Errorf("%w: %w", errUnsafeProcfs, err) - } - defer func() { - if Err != nil { - _ = handle.Close() - } - }() - // We can't detect bind-mounts of different parts of procfs on top of - // /proc (a-la RESOLVE_NO_XDEV), but we can at least be sure that we - // aren't on the wrong filesystem here. - if statfs, err := fstatfs(handle); err != nil { - return nil, nil, err - } else if statfs.Type != procSuperMagic { - return nil, nil, fmt.Errorf("%w: incorrect /proc/self/fd filesystem type 0x%x", errUnsafeProcfs, statfs.Type) - } - } - return handle, runtime.UnlockOSThread, nil -} - -var hasStatxMountId = sync.OnceValue(func() bool { - var ( - stx unix.Statx_t - // We don't care which mount ID we get. The kernel will give us the - // unique one if it is supported. - wantStxMask uint32 = unix.STATX_MNT_ID_UNIQUE | unix.STATX_MNT_ID - ) - err := unix.Statx(-int(unix.EBADF), "/", 0, int(wantStxMask), &stx) - return err == nil && stx.Mask&wantStxMask != 0 -}) - -func getMountId(dir *os.File, path string) (uint64, error) { - // If we don't have statx(STATX_MNT_ID*) support, we can't do anything. - if !hasStatxMountId() { - return 0, nil - } - - var ( - stx unix.Statx_t - // We don't care which mount ID we get. The kernel will give us the - // unique one if it is supported. - wantStxMask uint32 = unix.STATX_MNT_ID_UNIQUE | unix.STATX_MNT_ID - ) - - err := unix.Statx(int(dir.Fd()), path, unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW, int(wantStxMask), &stx) - if stx.Mask&wantStxMask == 0 { - // It's not a kernel limitation, for some reason we couldn't get a - // mount ID. Assume it's some kind of attack. - err = fmt.Errorf("%w: could not get mount id", errUnsafeProcfs) - } - if err != nil { - return 0, &os.PathError{Op: "statx(STATX_MNT_ID_...)", Path: dir.Name() + "/" + path, Err: err} - } - return stx.Mnt_id, nil -} - -func checkSymlinkOvermount(procRoot *os.File, dir *os.File, path string) error { - // Get the mntId of our procfs handle. - expectedMountId, err := getMountId(procRoot, "") - if err != nil { - return err - } - // Get the mntId of the target magic-link. - gotMountId, err := getMountId(dir, path) - if err != nil { - return err - } - // As long as the directory mount is alive, even with wrapping mount IDs, - // we would expect to see a different mount ID here. (Of course, if we're - // using unsafeHostProcRoot() then an attaker could change this after we - // did this check.) - if expectedMountId != gotMountId { - return fmt.Errorf("%w: symlink %s/%s has an overmount obscuring the real link (mount ids do not match %d != %d)", errUnsafeProcfs, dir.Name(), path, expectedMountId, gotMountId) - } - return nil -} - -func doRawProcSelfFdReadlink(procRoot *os.File, fd int) (string, error) { - fdPath := fmt.Sprintf("fd/%d", fd) - procFdLink, closer, err := procThreadSelf(procRoot, fdPath) - if err != nil { - return "", fmt.Errorf("get safe /proc/thread-self/%s handle: %w", fdPath, err) - } - defer procFdLink.Close() - defer closer() - - // Try to detect if there is a mount on top of the magic-link. Since we use the handle directly - // provide to the closure. If the closure uses the handle directly, this - // should be safe in general (a mount on top of the path afterwards would - // not affect the handle itself) and will definitely be safe if we are - // using privateProcRoot() (at least since Linux 5.12[1], when anonymous - // mount namespaces were completely isolated from external mounts including - // mount propagation events). - // - // [1]: Linux commit ee2e3f50629f ("mount: fix mounting of detached mounts - // onto targets that reside on shared mounts"). - if err := checkSymlinkOvermount(procRoot, procFdLink, ""); err != nil { - return "", fmt.Errorf("check safety of /proc/thread-self/fd/%d magiclink: %w", fd, err) - } - - // readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See Linux commit - // 65cfc6722361 ("readlinkat(), fchownat() and fstatat() with empty - // relative pathnames"). - return readlinkatFile(procFdLink, "") -} - -func rawProcSelfFdReadlink(fd int) (string, error) { - procRoot, err := getProcRoot() - if err != nil { - return "", err - } - return doRawProcSelfFdReadlink(procRoot, fd) -} - -func procSelfFdReadlink(f *os.File) (string, error) { - return rawProcSelfFdReadlink(int(f.Fd())) -} - -var ( - errPossibleBreakout = errors.New("possible breakout detected") - errInvalidDirectory = errors.New("wandered into deleted directory") - errDeletedInode = errors.New("cannot verify path of deleted inode") -) - -func isDeadInode(file *os.File) error { - // If the nlink of a file drops to 0, there is an attacker deleting - // directories during our walk, which could result in weird /proc values. - // It's better to error out in this case. - stat, err := fstat(file) - if err != nil { - return fmt.Errorf("check for dead inode: %w", err) - } - if stat.Nlink == 0 { - err := errDeletedInode - if stat.Mode&unix.S_IFMT == unix.S_IFDIR { - err = errInvalidDirectory - } - return fmt.Errorf("%w %q", err, file.Name()) - } - return nil -} - -func checkProcSelfFdPath(path string, file *os.File) error { - if err := isDeadInode(file); err != nil { - return err - } - actualPath, err := procSelfFdReadlink(file) - if err != nil { - return fmt.Errorf("get path of handle: %w", err) - } - if actualPath != path { - return fmt.Errorf("%w: handle path %q doesn't match expected path %q", errPossibleBreakout, actualPath, path) - } - return nil -} - -// Test hooks used in the procfs tests to verify that the fallback logic works. -// See testing_mocks_linux_test.go and procfs_linux_test.go for more details. -var ( - hookForcePrivateProcRootOpenTree = hookDummyFile - hookForcePrivateProcRootOpenTreeAtRecursive = hookDummyFile - hookForceGetProcRootUnsafe = hookDummy - - hookForceProcSelfTask = hookDummy - hookForceProcSelf = hookDummy -) - -func hookDummy() bool { return false } -func hookDummyFile(_ *os.File) bool { return false } diff --git a/vendor/github.com/cyphar/filepath-securejoin/vfs.go b/vendor/github.com/cyphar/filepath-securejoin/vfs.go index 36373f8c517..4d89a481ca7 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/vfs.go +++ b/vendor/github.com/cyphar/filepath-securejoin/vfs.go @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: BSD-3-Clause + // Copyright (C) 2017-2024 SUSE LLC. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/label/label.go b/vendor/github.com/opencontainers/selinux/go-selinux/label/label.go index 07e0f77dc27..884a8b80593 100644 --- a/vendor/github.com/opencontainers/selinux/go-selinux/label/label.go +++ b/vendor/github.com/opencontainers/selinux/go-selinux/label/label.go @@ -6,78 +6,11 @@ import ( "github.com/opencontainers/selinux/go-selinux" ) -// Deprecated: use selinux.ROFileLabel -var ROMountLabel = selinux.ROFileLabel - -// SetProcessLabel takes a process label and tells the kernel to assign the -// label to the next program executed by the current process. -// Deprecated: use selinux.SetExecLabel -var SetProcessLabel = selinux.SetExecLabel - -// ProcessLabel returns the process label that the kernel will assign -// to the next program executed by the current process. If "" is returned -// this indicates that the default labeling will happen for the process. -// Deprecated: use selinux.ExecLabel -var ProcessLabel = selinux.ExecLabel - -// SetSocketLabel takes a process label and tells the kernel to assign the -// label to the next socket that gets created -// Deprecated: use selinux.SetSocketLabel -var SetSocketLabel = selinux.SetSocketLabel - -// SocketLabel retrieves the current default socket label setting -// Deprecated: use selinux.SocketLabel -var SocketLabel = selinux.SocketLabel - -// SetKeyLabel takes a process label and tells the kernel to assign the -// label to the next kernel keyring that gets created -// Deprecated: use selinux.SetKeyLabel -var SetKeyLabel = selinux.SetKeyLabel - -// KeyLabel retrieves the current default kernel keyring label setting -// Deprecated: use selinux.KeyLabel -var KeyLabel = selinux.KeyLabel - -// FileLabel returns the label for specified path -// Deprecated: use selinux.FileLabel -var FileLabel = selinux.FileLabel - -// PidLabel will return the label of the process running with the specified pid -// Deprecated: use selinux.PidLabel -var PidLabel = selinux.PidLabel - // Init initialises the labeling system func Init() { _ = selinux.GetEnabled() } -// ClearLabels will clear all reserved labels -// Deprecated: use selinux.ClearLabels -var ClearLabels = selinux.ClearLabels - -// ReserveLabel will record the fact that the MCS label has already been used. -// This will prevent InitLabels from using the MCS label in a newly created -// container -// Deprecated: use selinux.ReserveLabel -func ReserveLabel(label string) error { - selinux.ReserveLabel(label) - return nil -} - -// ReleaseLabel will remove the reservation of the MCS label. -// This will allow InitLabels to use the MCS label in a newly created -// containers -// Deprecated: use selinux.ReleaseLabel -func ReleaseLabel(label string) error { - selinux.ReleaseLabel(label) - return nil -} - -// DupSecOpt takes a process label and returns security options that -// can be used to set duplicate labels on future container processes -// Deprecated: use selinux.DupSecOpt -var DupSecOpt = selinux.DupSecOpt - // FormatMountLabel returns a string to be used by the mount command. Using // the SELinux `context` mount option. Changing labels of files on mount // points with this option can never be changed. diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/label/label_linux.go b/vendor/github.com/opencontainers/selinux/go-selinux/label/label_linux.go index f61a560158b..95f29e21f4e 100644 --- a/vendor/github.com/opencontainers/selinux/go-selinux/label/label_linux.go +++ b/vendor/github.com/opencontainers/selinux/go-selinux/label/label_linux.go @@ -18,7 +18,7 @@ var validOptions = map[string]bool{ "level": true, } -var ErrIncompatibleLabel = errors.New("Bad SELinux option z and Z can not be used together") +var ErrIncompatibleLabel = errors.New("bad SELinux option: z and Z can not be used together") // InitLabels returns the process label and file labels to be used within // the container. A list of options can be passed into this function to alter @@ -52,11 +52,11 @@ func InitLabels(options []string) (plabel string, mlabel string, retErr error) { return "", selinux.PrivContainerMountLabel(), nil } if i := strings.Index(opt, ":"); i == -1 { - return "", "", fmt.Errorf("Bad label option %q, valid options 'disable' or \n'user, role, level, type, filetype' followed by ':' and a value", opt) + return "", "", fmt.Errorf("bad label option %q, valid options 'disable' or \n'user, role, level, type, filetype' followed by ':' and a value", opt) } con := strings.SplitN(opt, ":", 2) if !validOptions[con[0]] { - return "", "", fmt.Errorf("Bad label option %q, valid options 'disable, user, role, level, type, filetype'", con[0]) + return "", "", fmt.Errorf("bad label option %q, valid options 'disable, user, role, level, type, filetype'", con[0]) } if con[0] == "filetype" { mcon["type"] = con[1] @@ -79,12 +79,6 @@ func InitLabels(options []string) (plabel string, mlabel string, retErr error) { return processLabel, mountLabel, nil } -// Deprecated: The GenLabels function is only to be used during the transition -// to the official API. Use InitLabels(strings.Fields(options)) instead. -func GenLabels(options string) (string, string, error) { - return InitLabels(strings.Fields(options)) -} - // SetFileLabel modifies the "path" label to the specified file label func SetFileLabel(path string, fileLabel string) error { if !selinux.GetEnabled() || fileLabel == "" { @@ -120,17 +114,9 @@ func Relabel(path string, fileLabel string, shared bool) error { c["level"] = "s0" fileLabel = c.Get() } - if err := selinux.Chcon(path, fileLabel, true); err != nil { - return err - } - return nil + return selinux.Chcon(path, fileLabel, true) } -// DisableSecOpt returns a security opt that can disable labeling -// support for future container processes -// Deprecated: use selinux.DisableSecOpt -var DisableSecOpt = selinux.DisableSecOpt - // Validate checks that the label does not include unexpected options func Validate(label string) error { if strings.Contains(label, "z") && strings.Contains(label, "Z") { diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/label/label_stub.go b/vendor/github.com/opencontainers/selinux/go-selinux/label/label_stub.go index f21c80c5ab0..7a54afc5e6d 100644 --- a/vendor/github.com/opencontainers/selinux/go-selinux/label/label_stub.go +++ b/vendor/github.com/opencontainers/selinux/go-selinux/label/label_stub.go @@ -6,25 +6,19 @@ package label // InitLabels returns the process label and file labels to be used within // the container. A list of options can be passed into this function to alter // the labels. -func InitLabels(options []string) (string, string, error) { +func InitLabels([]string) (string, string, error) { return "", "", nil } -// Deprecated: The GenLabels function is only to be used during the transition -// to the official API. Use InitLabels(strings.Fields(options)) instead. -func GenLabels(options string) (string, string, error) { - return "", "", nil -} - -func SetFileLabel(path string, fileLabel string) error { +func SetFileLabel(string, string) error { return nil } -func SetFileCreateLabel(fileLabel string) error { +func SetFileCreateLabel(string) error { return nil } -func Relabel(path string, fileLabel string, shared bool) error { +func Relabel(string, string, bool) error { return nil } @@ -35,16 +29,16 @@ func DisableSecOpt() []string { } // Validate checks that the label does not include unexpected options -func Validate(label string) error { +func Validate(string) error { return nil } // RelabelNeeded checks whether the user requested a relabel -func RelabelNeeded(label string) bool { +func RelabelNeeded(string) bool { return false } // IsShared checks that the label includes a "shared" mark -func IsShared(label string) bool { +func IsShared(string) bool { return false } diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go b/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go index af058b84b13..15150d47528 100644 --- a/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go +++ b/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go @@ -41,6 +41,10 @@ var ( // ErrVerifierNil is returned when a context verifier function is nil. ErrVerifierNil = errors.New("verifier function is nil") + // ErrNotTGLeader is returned by [SetKeyLabel] if the calling thread + // is not the thread group leader. + ErrNotTGLeader = errors.New("calling thread is not the thread group leader") + // CategoryRange allows the upper bound on the category range to be adjusted CategoryRange = DefaultCategoryRange @@ -149,7 +153,7 @@ func CalculateGlbLub(sourceRange, targetRange string) (string, error) { // of the program is finished to guarantee another goroutine does not migrate to the current // thread before execution is complete. func SetExecLabel(label string) error { - return writeCon(attrPath("exec"), label) + return writeConThreadSelf("attr/exec", label) } // SetTaskLabel sets the SELinux label for the current thread, or an error. @@ -157,7 +161,7 @@ func SetExecLabel(label string) error { // be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() to guarantee // the current thread does not run in a new mislabeled thread. func SetTaskLabel(label string) error { - return writeCon(attrPath("current"), label) + return writeConThreadSelf("attr/current", label) } // SetSocketLabel takes a process label and tells the kernel to assign the @@ -166,12 +170,12 @@ func SetTaskLabel(label string) error { // the socket is created to guarantee another goroutine does not migrate // to the current thread before execution is complete. func SetSocketLabel(label string) error { - return writeCon(attrPath("sockcreate"), label) + return writeConThreadSelf("attr/sockcreate", label) } // SocketLabel retrieves the current socket label setting func SocketLabel() (string, error) { - return readCon(attrPath("sockcreate")) + return readConThreadSelf("attr/sockcreate") } // PeerLabel retrieves the label of the client on the other side of a socket @@ -180,17 +184,21 @@ func PeerLabel(fd uintptr) (string, error) { } // SetKeyLabel takes a process label and tells the kernel to assign the -// label to the next kernel keyring that gets created. Calls to SetKeyLabel -// should be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() until -// the kernel keyring is created to guarantee another goroutine does not migrate -// to the current thread before execution is complete. +// label to the next kernel keyring that gets created. +// +// Calls to SetKeyLabel should be wrapped in +// runtime.LockOSThread()/runtime.UnlockOSThread() until the kernel keyring is +// created to guarantee another goroutine does not migrate to the current +// thread before execution is complete. +// +// Only the thread group leader can set key label. func SetKeyLabel(label string) error { return setKeyLabel(label) } // KeyLabel retrieves the current kernel keyring label setting func KeyLabel() (string, error) { - return readCon("/proc/self/attr/keycreate") + return keyLabel() } // Get returns the Context as a string diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go b/vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go index f1e95977d3b..70392d98904 100644 --- a/vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go +++ b/vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go @@ -17,8 +17,11 @@ import ( "strings" "sync" - "github.com/opencontainers/selinux/pkg/pwalkdir" + "github.com/cyphar/filepath-securejoin/pathrs-lite" + "github.com/cyphar/filepath-securejoin/pathrs-lite/procfs" "golang.org/x/sys/unix" + + "github.com/opencontainers/selinux/pkg/pwalkdir" ) const ( @@ -45,7 +48,7 @@ type selinuxState struct { type level struct { cats *big.Int - sens uint + sens int } type mlsRange struct { @@ -73,10 +76,6 @@ var ( mcsList: make(map[string]bool), } - // for attrPath() - attrPathOnce sync.Once - haveThreadSelf bool - // for policyRoot() policyRootOnce sync.Once policyRootVal string @@ -132,12 +131,13 @@ func verifySELinuxfsMount(mnt string) bool { if err == nil { break } - if err == unix.EAGAIN || err == unix.EINTR { //nolint:errorlint // unix errors are bare + if err == unix.EAGAIN || err == unix.EINTR { continue } return false } + //#nosec G115 -- there is no overflow here. if uint32(buf.Type) != uint32(unix.SELINUX_MAGIC) { return false } @@ -255,48 +255,187 @@ func readConfig(target string) string { return "" } -func isProcHandle(fh *os.File) error { - var buf unix.Statfs_t +func readConFd(in *os.File) (string, error) { + data, err := io.ReadAll(in) + if err != nil { + return "", err + } + return string(bytes.TrimSuffix(data, []byte{0})), nil +} - for { - err := unix.Fstatfs(int(fh.Fd()), &buf) - if err == nil { - break - } - if err != unix.EINTR { //nolint:errorlint // unix errors are bare - return &os.PathError{Op: "fstatfs", Path: fh.Name(), Err: err} - } +func writeConFd(out *os.File, val string) error { + var err error + if val != "" { + _, err = out.Write([]byte(val)) + } else { + _, err = out.Write(nil) } - if buf.Type != unix.PROC_SUPER_MAGIC { - return fmt.Errorf("file %q is not on procfs", fh.Name()) + return err +} + +// openProcThreadSelf is a small wrapper around [OpenThreadSelf] and +// [pathrs.Reopen] to make "one-shot opens" slightly more ergonomic. The +// provided mode must be os.O_* flags to indicate what mode the returned file +// should be opened with (flags like os.O_CREAT and os.O_EXCL are not +// supported). +// +// If no error occurred, the returned handle is guaranteed to be exactly +// /proc/thread-self/ with no tricky mounts or symlinks causing you to +// operate on an unexpected path (with some caveats on pre-openat2 or +// pre-fsopen kernels). +// +// [OpenThreadSelf]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs#Handle.OpenThreadSelf +func openProcThreadSelf(subpath string, mode int) (*os.File, procfs.ProcThreadSelfCloser, error) { + if subpath == "" { + return nil, nil, ErrEmptyPath + } + + proc, err := procfs.OpenProcRoot() + if err != nil { + return nil, nil, err } + defer proc.Close() - return nil -} + handle, closer, err := proc.OpenThreadSelf(subpath) + if err != nil { + return nil, nil, fmt.Errorf("open /proc/thread-self/%s handle: %w", subpath, err) + } + defer handle.Close() // we will return a re-opened handle -func readCon(fpath string) (string, error) { - if fpath == "" { - return "", ErrEmptyPath + file, err := pathrs.Reopen(handle, mode) + if err != nil { + closer() + return nil, nil, fmt.Errorf("reopen /proc/thread-self/%s handle (%#x): %w", subpath, mode, err) } + return file, closer, nil +} - in, err := os.Open(fpath) +// Read the contents of /proc/thread-self/. +func readConThreadSelf(fpath string) (string, error) { + in, closer, err := openProcThreadSelf(fpath, os.O_RDONLY|unix.O_CLOEXEC) if err != nil { return "", err } + defer closer() defer in.Close() - if err := isProcHandle(in); err != nil { + return readConFd(in) +} + +// Write to /proc/thread-self/. +func writeConThreadSelf(fpath, val string) error { + if val == "" { + if !getEnabled() { + return nil + } + } + + out, closer, err := openProcThreadSelf(fpath, os.O_WRONLY|unix.O_CLOEXEC) + if err != nil { + return err + } + defer closer() + defer out.Close() + + return writeConFd(out, val) +} + +// openProcSelf is a small wrapper around [OpenSelf] and [pathrs.Reopen] to +// make "one-shot opens" slightly more ergonomic. The provided mode must be +// os.O_* flags to indicate what mode the returned file should be opened with +// (flags like os.O_CREAT and os.O_EXCL are not supported). +// +// If no error occurred, the returned handle is guaranteed to be exactly +// /proc/self/ with no tricky mounts or symlinks causing you to +// operate on an unexpected path (with some caveats on pre-openat2 or +// pre-fsopen kernels). +// +// [OpenSelf]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs#Handle.OpenSelf +func openProcSelf(subpath string, mode int) (*os.File, error) { + if subpath == "" { + return nil, ErrEmptyPath + } + + proc, err := procfs.OpenProcRoot() + if err != nil { + return nil, err + } + defer proc.Close() + + handle, err := proc.OpenSelf(subpath) + if err != nil { + return nil, fmt.Errorf("open /proc/self/%s handle: %w", subpath, err) + } + defer handle.Close() // we will return a re-opened handle + + file, err := pathrs.Reopen(handle, mode) + if err != nil { + return nil, fmt.Errorf("reopen /proc/self/%s handle (%#x): %w", subpath, mode, err) + } + return file, nil +} + +// Read the contents of /proc/self/. +func readConSelf(fpath string) (string, error) { + in, err := openProcSelf(fpath, os.O_RDONLY|unix.O_CLOEXEC) + if err != nil { return "", err } + defer in.Close() + return readConFd(in) } -func readConFd(in *os.File) (string, error) { - data, err := io.ReadAll(in) +// Write to /proc/self/. +func writeConSelf(fpath, val string) error { + if val == "" { + if !getEnabled() { + return nil + } + } + + out, err := openProcSelf(fpath, os.O_WRONLY|unix.O_CLOEXEC) if err != nil { - return "", err + return err } - return string(bytes.TrimSuffix(data, []byte{0})), nil + defer out.Close() + + return writeConFd(out, val) +} + +// openProcPid is a small wrapper around [OpenPid] and [pathrs.Reopen] to make +// "one-shot opens" slightly more ergonomic. The provided mode must be os.O_* +// flags to indicate what mode the returned file should be opened with (flags +// like os.O_CREAT and os.O_EXCL are not supported). +// +// If no error occurred, the returned handle is guaranteed to be exactly +// /proc/self/ with no tricky mounts or symlinks causing you to +// operate on an unexpected path (with some caveats on pre-openat2 or +// pre-fsopen kernels). +// +// [OpenPid]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs#Handle.OpenPid +func openProcPid(pid int, subpath string, mode int) (*os.File, error) { + if subpath == "" { + return nil, ErrEmptyPath + } + + proc, err := procfs.OpenProcRoot() + if err != nil { + return nil, err + } + defer proc.Close() + + handle, err := proc.OpenPid(pid, subpath) + if err != nil { + return nil, fmt.Errorf("open /proc/%d/%s handle: %w", pid, subpath, err) + } + defer handle.Close() // we will return a re-opened handle + + file, err := pathrs.Reopen(handle, mode) + if err != nil { + return nil, fmt.Errorf("reopen /proc/%d/%s handle (%#x): %w", pid, subpath, mode, err) + } + return file, nil } // classIndex returns the int index for an object class in the loaded policy, @@ -328,8 +467,8 @@ func lSetFileLabel(fpath string, label string) error { if err == nil { break } - if err != unix.EINTR { //nolint:errorlint // unix errors are bare - return &os.PathError{Op: "lsetxattr", Path: fpath, Err: err} + if err != unix.EINTR { + return &os.PathError{Op: fmt.Sprintf("lsetxattr(label=%s)", label), Path: fpath, Err: err} } } @@ -347,8 +486,8 @@ func setFileLabel(fpath string, label string) error { if err == nil { break } - if err != unix.EINTR { //nolint:errorlint // unix errors are bare - return &os.PathError{Op: "setxattr", Path: fpath, Err: err} + if err != unix.EINTR { + return &os.PathError{Op: fmt.Sprintf("setxattr(label=%s)", label), Path: fpath, Err: err} } } @@ -392,78 +531,34 @@ func lFileLabel(fpath string) (string, error) { } func setFSCreateLabel(label string) error { - return writeCon(attrPath("fscreate"), label) + return writeConThreadSelf("attr/fscreate", label) } // fsCreateLabel returns the default label the kernel which the kernel is using // for file system objects created by this task. "" indicates default. func fsCreateLabel() (string, error) { - return readCon(attrPath("fscreate")) + return readConThreadSelf("attr/fscreate") } // currentLabel returns the SELinux label of the current process thread, or an error. func currentLabel() (string, error) { - return readCon(attrPath("current")) + return readConThreadSelf("attr/current") } // pidLabel returns the SELinux label of the given pid, or an error. func pidLabel(pid int) (string, error) { - return readCon(fmt.Sprintf("/proc/%d/attr/current", pid)) + it, err := openProcPid(pid, "attr/current", os.O_RDONLY|unix.O_CLOEXEC) + if err != nil { + return "", nil + } + defer it.Close() + return readConFd(it) } // ExecLabel returns the SELinux label that the kernel will use for any programs // that are executed by the current process thread, or an error. func execLabel() (string, error) { - return readCon(attrPath("exec")) -} - -func writeCon(fpath, val string) error { - if fpath == "" { - return ErrEmptyPath - } - if val == "" { - if !getEnabled() { - return nil - } - } - - out, err := os.OpenFile(fpath, os.O_WRONLY, 0) - if err != nil { - return err - } - defer out.Close() - - if err := isProcHandle(out); err != nil { - return err - } - - if val != "" { - _, err = out.Write([]byte(val)) - } else { - _, err = out.Write(nil) - } - if err != nil { - return err - } - return nil -} - -func attrPath(attr string) string { - // Linux >= 3.17 provides this - const threadSelfPrefix = "/proc/thread-self/attr" - - attrPathOnce.Do(func() { - st, err := os.Stat(threadSelfPrefix) - if err == nil && st.Mode().IsDir() { - haveThreadSelf = true - } - }) - - if haveThreadSelf { - return filepath.Join(threadSelfPrefix, attr) - } - - return filepath.Join("/proc/self/task", strconv.Itoa(unix.Gettid()), "attr", attr) + return readConThreadSelf("exec") } // canonicalizeContext takes a context string and writes it to the kernel @@ -501,14 +596,14 @@ func catsToBitset(cats string) (*big.Int, error) { return nil, err } for i := catstart; i <= catend; i++ { - bitset.SetBit(bitset, int(i), 1) + bitset.SetBit(bitset, i, 1) } } else { cat, err := parseLevelItem(ranges[0], category) if err != nil { return nil, err } - bitset.SetBit(bitset, int(cat), 1) + bitset.SetBit(bitset, cat, 1) } } @@ -516,16 +611,17 @@ func catsToBitset(cats string) (*big.Int, error) { } // parseLevelItem parses and verifies that a sensitivity or category are valid -func parseLevelItem(s string, sep levelItem) (uint, error) { +func parseLevelItem(s string, sep levelItem) (int, error) { if len(s) < minSensLen || levelItem(s[0]) != sep { return 0, ErrLevelSyntax } - val, err := strconv.ParseUint(s[1:], 10, 32) + const bitSize = 31 // Make sure the result fits into signed int32. + val, err := strconv.ParseUint(s[1:], 10, bitSize) if err != nil { return 0, err } - return uint(val), nil + return int(val), nil } // parseLevel fills a level from a string that contains @@ -582,7 +678,8 @@ func bitsetToStr(c *big.Int) string { var str string length := 0 - for i := int(c.TrailingZeroBits()); i < c.BitLen(); i++ { + i0 := int(c.TrailingZeroBits()) //#nosec G115 -- don't expect TralingZeroBits to return values with highest bit set. + for i := i0; i < c.BitLen(); i++ { if c.Bit(i) == 0 { continue } @@ -622,7 +719,7 @@ func (l *level) equal(l2 *level) bool { // String returns an mlsRange as a string. func (m mlsRange) String() string { - low := "s" + strconv.Itoa(int(m.low.sens)) + low := "s" + strconv.Itoa(m.low.sens) if m.low.cats != nil && m.low.cats.BitLen() > 0 { low += ":" + bitsetToStr(m.low.cats) } @@ -631,7 +728,7 @@ func (m mlsRange) String() string { return low } - high := "s" + strconv.Itoa(int(m.high.sens)) + high := "s" + strconv.Itoa(m.high.sens) if m.high.cats != nil && m.high.cats.BitLen() > 0 { high += ":" + bitsetToStr(m.high.cats) } @@ -639,14 +736,16 @@ func (m mlsRange) String() string { return low + "-" + high } -func max(a, b uint) uint { +// TODO: remove these in favor of built-in min/max +// once we stop supporting Go < 1.21. +func maxInt(a, b int) int { if a > b { return a } return b } -func min(a, b uint) uint { +func minInt(a, b int) int { if a < b { return a } @@ -675,10 +774,10 @@ func calculateGlbLub(sourceRange, targetRange string) (string, error) { outrange := &mlsRange{low: &level{}, high: &level{}} /* take the greatest of the low */ - outrange.low.sens = max(s.low.sens, t.low.sens) + outrange.low.sens = maxInt(s.low.sens, t.low.sens) /* take the least of the high */ - outrange.high.sens = min(s.high.sens, t.high.sens) + outrange.high.sens = minInt(s.high.sens, t.high.sens) /* find the intersecting categories */ if s.low.cats != nil && t.low.cats != nil { @@ -723,16 +822,29 @@ func peerLabel(fd uintptr) (string, error) { // setKeyLabel takes a process label and tells the kernel to assign the // label to the next kernel keyring that gets created func setKeyLabel(label string) error { - err := writeCon("/proc/self/attr/keycreate", label) + // Rather than using /proc/thread-self, we want to use /proc/self to + // operate on the thread-group leader. + err := writeConSelf("attr/keycreate", label) if errors.Is(err, os.ErrNotExist) { return nil } if label == "" && errors.Is(err, os.ErrPermission) { return nil } + if errors.Is(err, unix.EACCES) && unix.Getpid() != unix.Gettid() { + return ErrNotTGLeader + } return err } +// KeyLabel retrieves the current kernel keyring label setting for this +// thread-group. +func keyLabel() (string, error) { + // Rather than using /proc/thread-self, we want to use /proc/self to + // operate on the thread-group leader. + return readConSelf("attr/keycreate") +} + // get returns the Context as a string func (c Context) get() string { if l := c["level"]; l != "" { @@ -808,8 +920,7 @@ func enforceMode() int { // setEnforceMode sets the current SELinux mode Enforcing, Permissive. // Disabled is not valid, since this needs to be set at boot time. func setEnforceMode(mode int) error { - //nolint:gosec // ignore G306: permissions to be 0600 or less. - return os.WriteFile(selinuxEnforcePath(), []byte(strconv.Itoa(mode)), 0o644) + return os.WriteFile(selinuxEnforcePath(), []byte(strconv.Itoa(mode)), 0) } // defaultEnforceMode returns the systems default SELinux mode Enforcing, @@ -1016,8 +1127,7 @@ func addMcs(processLabel, fileLabel string) (string, string) { // securityCheckContext validates that the SELinux label is understood by the kernel func securityCheckContext(val string) error { - //nolint:gosec // ignore G306: permissions to be 0600 or less. - return os.WriteFile(filepath.Join(getSelinuxMountPoint(), "context"), []byte(val), 0o644) + return os.WriteFile(filepath.Join(getSelinuxMountPoint(), "context"), []byte(val), 0) } // copyLevel returns a label with the MLS/MCS level from src label replaced on @@ -1134,7 +1244,7 @@ func rchcon(fpath, label string) error { //revive:disable:cognitive-complexity } return pwalkdir.Walk(fpath, func(p string, _ fs.DirEntry, _ error) error { if fastMode { - if cLabel, err := lFileLabel(fpath); err == nil && cLabel == label { + if cLabel, err := lFileLabel(p); err == nil && cLabel == label { return nil } } diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/selinux_stub.go b/vendor/github.com/opencontainers/selinux/go-selinux/selinux_stub.go index bc3fd3b3701..267921239c2 100644 --- a/vendor/github.com/opencontainers/selinux/go-selinux/selinux_stub.go +++ b/vendor/github.com/opencontainers/selinux/go-selinux/selinux_stub.go @@ -7,11 +7,11 @@ func attrPath(string) string { return "" } -func readCon(fpath string) (string, error) { +func readConThreadSelf(string) (string, error) { return "", nil } -func writeCon(string, string) error { +func writeConThreadSelf(string, string) error { return nil } @@ -21,27 +21,27 @@ func getEnabled() bool { return false } -func classIndex(class string) (int, error) { +func classIndex(string) (int, error) { return -1, nil } -func setFileLabel(fpath string, label string) error { +func setFileLabel(string, string) error { return nil } -func lSetFileLabel(fpath string, label string) error { +func lSetFileLabel(string, string) error { return nil } -func fileLabel(fpath string) (string, error) { +func fileLabel(string) (string, error) { return "", nil } -func lFileLabel(fpath string) (string, error) { +func lFileLabel(string) (string, error) { return "", nil } -func setFSCreateLabel(label string) error { +func setFSCreateLabel(string) error { return nil } @@ -53,7 +53,7 @@ func currentLabel() (string, error) { return "", nil } -func pidLabel(pid int) (string, error) { +func pidLabel(int) (string, error) { return "", nil } @@ -61,38 +61,42 @@ func execLabel() (string, error) { return "", nil } -func canonicalizeContext(val string) (string, error) { +func canonicalizeContext(string) (string, error) { return "", nil } -func computeCreateContext(source string, target string, class string) (string, error) { +func computeCreateContext(string, string, string) (string, error) { return "", nil } -func calculateGlbLub(sourceRange, targetRange string) (string, error) { +func calculateGlbLub(string, string) (string, error) { return "", nil } -func peerLabel(fd uintptr) (string, error) { +func peerLabel(uintptr) (string, error) { return "", nil } -func setKeyLabel(label string) error { +func setKeyLabel(string) error { return nil } +func keyLabel() (string, error) { + return "", nil +} + func (c Context) get() string { return "" } -func newContext(label string) (Context, error) { +func newContext(string) (Context, error) { return Context{}, nil } func clearLabels() { } -func reserveLabel(label string) { +func reserveLabel(string) { } func isMLSEnabled() bool { @@ -103,7 +107,7 @@ func enforceMode() int { return Disabled } -func setEnforceMode(mode int) error { +func setEnforceMode(int) error { return nil } @@ -111,7 +115,7 @@ func defaultEnforceMode() int { return Disabled } -func releaseLabel(label string) { +func releaseLabel(string) { } func roFileLabel() string { @@ -126,27 +130,27 @@ func initContainerLabels() (string, string) { return "", "" } -func containerLabels() (processLabel string, fileLabel string) { +func containerLabels() (string, string) { return "", "" } -func securityCheckContext(val string) error { +func securityCheckContext(string) error { return nil } -func copyLevel(src, dest string) (string, error) { +func copyLevel(string, string) (string, error) { return "", nil } -func chcon(fpath string, label string, recurse bool) error { +func chcon(string, string, bool) error { return nil } -func dupSecOpt(src string) ([]string, error) { +func dupSecOpt(string) ([]string, error) { return nil, nil } -func getDefaultContextWithLevel(user, level, scon string) (string, error) { +func getDefaultContextWithLevel(string, string, string) (string, error) { return "", nil } diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/xattrs_linux.go b/vendor/github.com/opencontainers/selinux/go-selinux/xattrs_linux.go index 9e473ca168f..559c851075e 100644 --- a/vendor/github.com/opencontainers/selinux/go-selinux/xattrs_linux.go +++ b/vendor/github.com/opencontainers/selinux/go-selinux/xattrs_linux.go @@ -31,7 +31,7 @@ func lgetxattr(path, attr string) ([]byte, error) { func doLgetxattr(path, attr string, dest []byte) (int, error) { for { sz, err := unix.Lgetxattr(path, attr, dest) - if err != unix.EINTR { //nolint:errorlint // unix errors are bare + if err != unix.EINTR { return sz, err } } @@ -64,7 +64,7 @@ func getxattr(path, attr string) ([]byte, error) { func dogetxattr(path, attr string, dest []byte) (int, error) { for { sz, err := unix.Getxattr(path, attr, dest) - if err != unix.EINTR { //nolint:errorlint // unix errors are bare + if err != unix.EINTR { return sz, err } } diff --git a/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/README.md b/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/README.md index 068ac400565..b827e7dd73f 100644 --- a/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/README.md +++ b/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/README.md @@ -28,7 +28,9 @@ Please note the following limitations of this code: * fs.SkipDir is not supported; - * no errors are ever passed to WalkDirFunc; + * ErrNotExist errors from filepath.WalkDir are silently ignored for any path + except the top directory (WalkDir argument); any other error is returned to + the caller of WalkDir; * once any error is returned from any walkDirFunc instance, no more calls to WalkDirFunc are made, and the error is returned to the caller of WalkDir; @@ -51,4 +53,4 @@ filepath.WalkDir. Otherwise (if a WalkDirFunc is actually doing something) this is usually faster, except when the WalkDirN(..., 1) is used. Run `go test -bench .` to see how different operations can benefit from it, as well as how the -level of paralellism affects the speed. +level of parallelism affects the speed. diff --git a/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/pwalkdir.go b/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/pwalkdir.go index 0f5d9f580d1..5d2d09a2985 100644 --- a/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/pwalkdir.go +++ b/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/pwalkdir.go @@ -4,6 +4,7 @@ package pwalkdir import ( + "errors" "fmt" "io/fs" "path/filepath" @@ -60,6 +61,12 @@ func WalkN(root string, walkFn fs.WalkDirFunc, num int) error { go func() { err = filepath.WalkDir(root, func(p string, entry fs.DirEntry, err error) error { if err != nil { + // Walking a file tree can race with removal, + // so ignore ENOENT, except for root. + // https://github.com/opencontainers/selinux/issues/199. + if errors.Is(err, fs.ErrNotExist) && len(p) != rootLen { + return nil + } close(files) return err } diff --git a/vendor/golang.org/x/net/LICENSE b/vendor/golang.org/x/net/LICENSE index 6a66aea5eaf..2a7cf70da6e 100644 --- a/vendor/golang.org/x/net/LICENSE +++ b/vendor/golang.org/x/net/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/vendor/modules.txt b/vendor/modules.txt index 3b245e0d120..a8371c8a7a8 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -15,7 +15,7 @@ github.com/cilium/ebpf/internal/sysenc github.com/cilium/ebpf/internal/tracefs github.com/cilium/ebpf/internal/unix github.com/cilium/ebpf/link -# github.com/containerd/console v1.0.4 +# github.com/containerd/console v1.0.5 ## explicit; go 1.13 github.com/containerd/console # github.com/coreos/go-systemd/v22 v22.5.0 @@ -25,9 +25,19 @@ github.com/coreos/go-systemd/v22/dbus # github.com/cpuguy83/go-md2man/v2 v2.0.2 ## explicit; go 1.11 github.com/cpuguy83/go-md2man/v2/md2man -# github.com/cyphar/filepath-securejoin v0.3.5 -## explicit; go 1.21 +# github.com/cyphar/filepath-securejoin v0.5.1 +## explicit; go 1.18 github.com/cyphar/filepath-securejoin +github.com/cyphar/filepath-securejoin/internal/consts +github.com/cyphar/filepath-securejoin/pathrs-lite +github.com/cyphar/filepath-securejoin/pathrs-lite/internal +github.com/cyphar/filepath-securejoin/pathrs-lite/internal/assert +github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd +github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat +github.com/cyphar/filepath-securejoin/pathrs-lite/internal/kernelversion +github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux +github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs +github.com/cyphar/filepath-securejoin/pathrs-lite/procfs # github.com/docker/go-units v0.5.0 ## explicit github.com/docker/go-units @@ -50,7 +60,7 @@ github.com/mrunalp/fileutils ## explicit github.com/opencontainers/runtime-spec/specs-go github.com/opencontainers/runtime-spec/specs-go/features -# github.com/opencontainers/selinux v1.11.0 +# github.com/opencontainers/selinux v1.12.0 => ./internal/third_party/selinux ## explicit; go 1.19 github.com/opencontainers/selinux/go-selinux github.com/opencontainers/selinux/go-selinux/label @@ -81,7 +91,7 @@ github.com/vishvananda/netns # golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 ## explicit; go 1.18 golang.org/x/exp/constraints -# golang.org/x/net v0.24.0 +# golang.org/x/net v0.33.0 ## explicit; go 1.18 golang.org/x/net/bpf # golang.org/x/sys v0.28.0 @@ -116,3 +126,4 @@ google.golang.org/protobuf/reflect/protoreflect google.golang.org/protobuf/reflect/protoregistry google.golang.org/protobuf/runtime/protoiface google.golang.org/protobuf/runtime/protoimpl +# github.com/opencontainers/selinux => ./internal/third_party/selinux