diff --git a/Makefile b/Makefile index 82cd3be4..7c1ced94 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ REGISTRY ?= ghcr.io -USERNAME ?= talos-systems +USERNAME ?= siderolabs SHA ?= $(shell git describe --match=none --always --abbrev=8 --dirty) TAG ?= $(shell git describe --tag --always --dirty) BRANCH ?= $(shell git rev-parse --abbrev-ref HEAD) @@ -20,7 +20,7 @@ empty := space = $(empty) $(empty) TARGETS = amd-ucode bnx2-bnx2x gvisor hello-world-service intel-ucode -NONFREE_TARGETS = +NONFREE_TARGETS = nvidia-container-toolkit all: $(TARGETS) ## Builds all known pkgs. @@ -51,15 +51,6 @@ $(TARGETS) $(NONFREE_TARGETS): deps.png: bldr graph | dot -Tpng > deps.png -kernel-%: ## Updates the kernel configs: e.g. make kernel-olddefconfig; make kernel-menuconfig; etc. - for platform in $(subst $(,),$(space),$(PLATFORM)); do \ - arch=`basename $$platform` ; \ - $(MAKE) docker-kernel-prepare PLATFORM=$$platform TARGET_ARGS="--tag=$(REGISTRY)/$(USERNAME)/kernel:$(TAG)-$$arch --load"; \ - docker run --rm -it --entrypoint=/toolchain/bin/bash -e PATH=/toolchain/bin:/bin -w /src -v $$PWD/kernel/build/config-$$arch:/host/.hostconfig $(REGISTRY)/$(USERNAME)/kernel:$(TAG)-$$arch -c 'cp /host/.hostconfig .config && make $* && cp .config /host/.hostconfig'; \ - done - -# Utilities - .PHONY: conformance conformance: ## Performs policy checks against the commit and source code. - docker run --rm -it -v $(PWD):/src -w /src ghcr.io/talos-systems/conform:v0.1.0-alpha.22 enforce + docker run --rm -it -v $(PWD):/src -w /src ghcr.io/siderolabs/conform:latest enforce diff --git a/Pkgfile b/Pkgfile index 5d138044..945e191b 100644 --- a/Pkgfile +++ b/Pkgfile @@ -1,10 +1,12 @@ -# syntax = ghcr.io/talos-systems/bldr:v0.2.0-alpha.6-frontend +# syntax = ghcr.io/siderolabs/bldr:v0.2.0-alpha.7-1-g9d49478-frontend format: v1alpha2 vars: - TOOLS_IMAGE: ghcr.io/talos-systems/tools:v0.10.0-alpha.0-5-g8197edb - LINUX_FIRMWARE_IMAGE: ghcr.io/talos-systems/linux-firmware:v0.9.0-2-g447ce75 + TOOLS_IMAGE: ghcr.io/siderolabs/tools:v1.1.0-alpha.0-2-gbfc99ca + LINUX_FIRMWARE_IMAGE: ghcr.io/siderolabs/linux-firmware:v1.0.0-5-g615d1a0 + NVIDIA_DRIVER_VERSION_MAJOR: 510 + NVIDIA_DRIVER_VERSION_MINOR: 54 labels: - org.opencontainers.image.source: https://github.com/talos-systems/extensions + org.opencontainers.image.source: https://github.com/siderolabs/extensions diff --git a/README.md b/README.md index a1386866..03db4154 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ The image is composed of a `manifest.yaml` file that provides information and co ## Building Extensions -In the current form, building extensions requires the use of our [bldr](https://github.com/talos-systems/bldr) tool. +In the current form, building extensions requires the use of our [bldr](https://github.com/siderolabs/bldr) tool. It is highly recommended to take a look at an existing extensions as a template for building your own. The rough flow should look like the following: @@ -44,7 +44,7 @@ metadata: ### Creating `pkg.yaml` Creating a `pkg.yaml` file is the normal process from bldr. -See instructions [here](https://github.com/talos-systems/bldr#pkgyaml) for details and examples on this format. +See instructions [here](https://github.com/siderolabs/bldr#pkgyaml) for details and examples on this format. Using other existing extensions in this repo for tips is also highly recommended. One important note is that the final directory tree of the generated package should look like this example from the `gvisor` package: diff --git a/container-runtime/gvisor/README.md b/container-runtime/gvisor/README.md index c6c411ef..2a9f9806 100644 --- a/container-runtime/gvisor/README.md +++ b/container-runtime/gvisor/README.md @@ -8,7 +8,7 @@ Enable the extension in the machine configuration before installing Talos: machine: install: extensions: - - image: ghcr.io/talos-systems/gvisor: + - image: ghcr.io/siderolabs/gvisor: ``` gVisor requires unprivileged user namespace creation, so Talos default setting diff --git a/container-runtime/gvisor/runsc.toml b/container-runtime/gvisor/runsc.toml index 07082d01..3826c6af 100644 --- a/container-runtime/gvisor/runsc.toml +++ b/container-runtime/gvisor/runsc.toml @@ -1,3 +1,3 @@ [runsc_config] -# See https://github.com/talos-systems/extensions/issues/4 +# See https://github.com/siderolabs/extensions/issues/4 ignore-cgroups = "true" diff --git a/examples/hello-world-service/README.md b/examples/hello-world-service/README.md index 21781f96..48120cb1 100644 --- a/examples/hello-world-service/README.md +++ b/examples/hello-world-service/README.md @@ -10,7 +10,7 @@ Enable the extension in the machine configuration before installing Talos: machine: install: extensions: - - image: ghcr.io/talos-systems/hello-world-service: + - image: ghcr.io/siderolabs/hello-world-service: ``` Once this example extension is installed, it will provide simple HTTP server which responds with a message on port 80: diff --git a/examples/hello-world-service/src/go.mod b/examples/hello-world-service/src/go.mod index 2e0d1602..958738c4 100644 --- a/examples/hello-world-service/src/go.mod +++ b/examples/hello-world-service/src/go.mod @@ -1,3 +1,3 @@ -module github.com/talos-systems/hello-world +module github.com/siderolabs/hello-world go 1.17 diff --git a/nvidia-container-toolkit/DEVELOPMENT.md b/nvidia-container-toolkit/DEVELOPMENT.md new file mode 100644 index 00000000..5bbc8c5f --- /dev/null +++ b/nvidia-container-toolkit/DEVELOPMENT.md @@ -0,0 +1,48 @@ +# development + +This document is intended as a guide to updating the `nvidia-container-toolkit` dependencies. + +## Components + +### [nvidia-container-cli](./nvidia-container-cli/) + +`nvidia-container-cli` is called by the `nvidia-container-runtime` to setup the required NVIDIA library mounts and NVIDIA device files for a workload container + +### [nvidia-container-runtime](./nvidia-container-runtime/) + +`nvidia-container-runtime` is the runtime used by `containerd` to run workload containers. It's mostly a wrapper around `runc` + +It also ships a tool called `nvidia-container-runtime-hook` which is used to setup OCI hooks, it's a symlink to `nvidia-container-toolkit`, which eventually calls `nvidia-container-cli` + +### [nvidia-device-create](./nvidia-device-create/) + +This is used to create the required NVIDIA device files under `/dev`. This required udev rules. + +### [glibc](./glibc/) + +`nvidia-container-cli` is fully dependent on `glibc` to be able to access the NVIDIA shared objects. + +## Updating the nvidia driver version + +- Update the driver version in `pkgs` repo [here](https://github.com/siderolabs/pkgs/blob/master/nonfree/kmod-nvidia/pkg.yaml) +- Update the driver version [here](../Pkgfile) + +## Updating the nvidia-container-toolkit version + +- Update the `libnvidia-container` version [here](./nvidia-container-cli/pkg.yaml) +- Update the `container-toolkit` version [here](./nvidia-container-runtime/pkg.yaml) + +Make sure to also update the `nvidia-device-create` [here](./nvidia-device-create/pkg.yaml) + +### Patches + +- [nvidia-container-cli](./nvidia-container-cli/patches/libnvidia-container/) + - `common.h.patch` - use custom glibc interpreter path + - `Makefile.patch` - build statically linked with `libcap` and `libseccomp` + - `nvc_ldcache.c.patch` - use the standard `ld.so.cache` path inside the container +- [container-runtime](./nvidia-container-runtime/patches/nvidia-container-runtime/) + - `main.go.patch` - use custom path for the nvidia-container-runtime config +- [container-runtime](./nvidia-container-runtime/patches/nvidia-container-toolkit/) + - `hook_config.go.patch` - use custom path for the nvidia-container-runtime config +- [nvidia-device-create](./nvidia-device-create/patches/nvidia-graphics-drivers-build/) + - Makefile.patch - build statically linked with `libpciaccess` diff --git a/nvidia-container-toolkit/README.md b/nvidia-container-toolkit/README.md new file mode 100644 index 00000000..472cd8a7 --- /dev/null +++ b/nvidia-container-toolkit/README.md @@ -0,0 +1,83 @@ +# NVIDIA Container toolkit extension + +## Usage + +Enable the extension in the machine configuration before installing Talos: + +```yaml +machine: + install: + extensions: + - image: ghcr.io/siderolabs/nvidia-container-toolkit: +``` + +The following NVIDIA modules needs to be loaded, so add this to the talos config: + +```yaml +machine: + kernel: + modules: + - name: nvidia + - name: nvidia_uvm + - name: nvidia_drm + - name: nvidia_modeset +``` + +`nvidia-container-cli` loads BPF programs and requires relaxed KSPP setting for [bpf_jit_harden](https://sysctl-explorer.net/net/core/bpf_jit_harden/), so Talos default setting +should be overridden: + +```yaml +machine: + sysctls: + net.core.bpf_jit_harden: 1 +``` + +> Warning! This disables [KSPP best practices](https://kernsec.org/wiki/index.php/Kernel_Self_Protection_Project/Recommended_Settings#sysctls) setting. + +## Testing + +Apply the following manifest to create a runtime class that uses the extension: + +```yaml +--- +apiVersion: node.k8s.io/v1 +kind: RuntimeClass +metadata: + name: nvidia +handler: nvidia +``` + +Install the NVIDIA device plugin: + +```bash +helm repo add nvdp https://nvidia.github.io/k8s-device-plugin +helm repo update +helm install nvidia-device-plugin nvdp/nvidia-device-plugin --version=0.11.0 --set=runtimeClassName=nvidia +``` + +Apply the following manifest to run CUDA pod via nvidia runtime: + +```yaml +--- +apiVersion: v1 +kind: Pod +metadata: + name: cuda-vector-add +spec: + restartPolicy: OnFailure + runtimeClassName: nvidia + containers: + - name: cuda-vector-add + image: "quay.io/giantswarm/nvidia-gpu-demo:latest" + resources: + limits: + nvidia.com/gpu: 1 +``` + +The pod should be up and running: + +```bash +❯ kubectl get pods +NAME READY STATUS RESTARTS AGE +cuda-vector-add 0/1 Completed 0 17m +``` diff --git a/nvidia-container-toolkit/glibc/ld.so.conf b/nvidia-container-toolkit/glibc/ld.so.conf new file mode 100644 index 00000000..084b5953 --- /dev/null +++ b/nvidia-container-toolkit/glibc/ld.so.conf @@ -0,0 +1,6 @@ +# libc default configuration +/usr/local/lib + +/usr/local/glibc/lib +/usr/lib +/lib diff --git a/nvidia-container-toolkit/glibc/pkg.yaml b/nvidia-container-toolkit/glibc/pkg.yaml new file mode 100644 index 00000000..98886469 --- /dev/null +++ b/nvidia-container-toolkit/glibc/pkg.yaml @@ -0,0 +1,63 @@ +name: glibc +variant: scratch +shell: /bin/bash +dependencies: + - image: ubuntu:22.04 +steps: + - sources: + - url: https://ftpmirror.gnu.org/libc/glibc-2.35.tar.gz + destination: glibc.tar.gz + sha256: 3e8e0c6195da8dfbd31d77c56fb8d99576fb855fafd47a9e0a895e51fd5942d4 + sha512: 45bf782aeda508e17fd51b45cf5ad96bd1067cf96b758b5c2d5def681af713df15e75c253d9c85de047f0a1dd22cf4f2239d70ae392cdb9291092e6570734d43 + env: + DEBIAN_FRONTEND: noninteractive + prepare: + - | + apt update && \ + apt install -y \ + bison \ + build-essential \ + gawk \ + gettext \ + openssl \ + python3 \ + texinfo + - | + mkdir -p glibc glibc-build + + tar -xzf glibc.tar.gz --strip-components=1 -C glibc + build: + - | + # unset the variables bldr sets by default + unset CXXFLAGS + unset LDFLAGS + unset CFLAGS + unset TARGET + unset HOST + + cd glibc-build + + ../glibc/configure \ + --prefix=/usr/local/glibc \ + --libdir=/usr/local/glibc/lib \ + --libexecdir=/usr/local/glibc/lib \ + --enable-stack-protector=strong + + make -j $(nproc) + install: + - | + mkdir -p /rootfs + + cd glibc-build + make install DESTDIR=/rootfs + + cp /pkg/ld.so.conf /rootfs/usr/local/glibc/etc/ld.so.conf + + # cleanup include, var and share + rm -rf /rootfs/usr/local/glibc/include + rm -rf /rootfs/usr/local/glibc/share + rm -rf /rootfs/usr/local/glibc/var +finalize: + - from: /rootfs + to: /rootfs + diff --git a/nvidia-container-toolkit/manifest.yaml b/nvidia-container-toolkit/manifest.yaml new file mode 100644 index 00000000..a9367eb7 --- /dev/null +++ b/nvidia-container-toolkit/manifest.yaml @@ -0,0 +1,11 @@ +version: v1alpha1 +metadata: + name: nvidia-container-toolkit + # the first part is the driver version and the second the container-toolkit version + version: 510.54-v1.9.0 + author: Andrew Rynhard + description: | + This system extension provides nvidia runtime and it's dependencies using NVIDIA's runtime handler. + compatibility: + talos: + version: "> v0.15.0-alpha.0" diff --git a/nvidia-container-toolkit/nvidia-container-cli/patches/libnvidia-container/Makefile.patch b/nvidia-container-toolkit/nvidia-container-cli/patches/libnvidia-container/Makefile.patch new file mode 100644 index 00000000..dd8d48bb --- /dev/null +++ b/nvidia-container-toolkit/nvidia-container-cli/patches/libnvidia-container/Makefile.patch @@ -0,0 +1,13 @@ +diff --git Makefile Makefile +index 6fb6976..c7b9ffa 100644 +--- Makefile ++++ Makefile +@@ -184,7 +184,7 @@ LIB_LDLIBS = $(LIB_LDLIBS_STATIC) $(LIB_LDLIBS_SHARED) + BIN_CPPFLAGS = -include $(BUILD_DEFS) $(CPPFLAGS) + BIN_CFLAGS = -I$(SRCS_DIR) -fPIE -flto $(CFLAGS) + BIN_LDFLAGS = -L. -pie $(LDFLAGS) -Wl,-rpath='$$ORIGIN/../$$LIB' +-BIN_LDLIBS = -l:$(LIB_SHARED) -ldl -lcap $(LDLIBS) ++BIN_LDLIBS = -l:$(LIB_STATIC) -ldl -l:libcap.a -l:libseccomp.a $(LDLIBS) + + $(word 1,$(LIB_RPC_SRCS)): RPCGENFLAGS=-h + $(word 2,$(LIB_RPC_SRCS)): RPCGENFLAGS=-c diff --git a/nvidia-container-toolkit/nvidia-container-cli/patches/libnvidia-container/common.h.patch b/nvidia-container-toolkit/nvidia-container-cli/patches/libnvidia-container/common.h.patch new file mode 100644 index 00000000..6b3d698d --- /dev/null +++ b/nvidia-container-toolkit/nvidia-container-cli/patches/libnvidia-container/common.h.patch @@ -0,0 +1,22 @@ +diff --git src/common.h src/common.h +index c91d349..461b2a5 100644 +--- src/common.h ++++ src/common.h +@@ -24,7 +24,7 @@ + #define LDCONFIG_PATH "/sbin/ldconfig" + #define LDCONFIG_ALT_PATH "/sbin/ldconfig.real" + +-#define LIB_DIR "/lib64" ++#define LIB_DIR "/usr/local/glibc/lib" + #define USR_BIN_DIR "/usr/bin" + #define USR_LIB_DIR "/usr/lib64" + #define USR_LIB32_DIR "/usr/lib32" +@@ -33,7 +33,7 @@ + #if defined(__x86_64__) + # define LIB_ARCH LD_X8664_LIB64 + # define LIB32_ARCH LD_I386_LIB32 +-# define USR_LIB_MULTIARCH_DIR "/usr/lib/x86_64-linux-gnu" ++# define USR_LIB_MULTIARCH_DIR "/usr/local/lib" + # define USR_LIB32_MULTIARCH_DIR "/usr/lib/i386-linux-gnu" + # if !defined(__NR_execveat) + # define __NR_execveat 322 diff --git a/nvidia-container-toolkit/nvidia-container-cli/patches/libnvidia-container/nvc_ldcache.c.patch b/nvidia-container-toolkit/nvidia-container-cli/patches/libnvidia-container/nvc_ldcache.c.patch new file mode 100644 index 00000000..390c0c2e --- /dev/null +++ b/nvidia-container-toolkit/nvidia-container-cli/patches/libnvidia-container/nvc_ldcache.c.patch @@ -0,0 +1,13 @@ +diff --git src/nvc_ldcache.c src/nvc_ldcache.c +index d73d0f1..c28e982 100644 +--- src/nvc_ldcache.c ++++ src/nvc_ldcache.c +@@ -349,7 +349,7 @@ nvc_ldcache_update(struct nvc_context *ctx, const struct nvc_container *cnt) + if (validate_args(ctx, cnt != NULL) < 0) + return (-1); + +- argv = (char * []){cnt->cfg.ldconfig, cnt->cfg.libs_dir, cnt->cfg.libs32_dir, NULL}; ++ argv = (char * []){cnt->cfg.ldconfig, cnt->cfg.libs_dir, cnt->cfg.libs32_dir, "-C", "/etc/ld.so.cache", NULL}; + if (*argv[0] == '@') { + /* + * We treat this path specially to be relative to the host filesystem. diff --git a/nvidia-container-toolkit/nvidia-container-cli/pkg.yaml b/nvidia-container-toolkit/nvidia-container-cli/pkg.yaml new file mode 100644 index 00000000..54252fb8 --- /dev/null +++ b/nvidia-container-toolkit/nvidia-container-cli/pkg.yaml @@ -0,0 +1,86 @@ +name: nvidia-container-cli +variant: scratch +dependencies: + - image: ubuntu:20.04 + # nvidia-pkgs depends on glibc, + # so any stage depending on nvidia-container-cli will have the updated cache, + # from both nvidia-pkgs and nvidia-container-cli + - stage: nvidia-pkgs +shell: /bin/bash +steps: + - sources: + - url: https://gitlab.com/nvidia/container-toolkit/libnvidia-container/-/archive/v1.9.0/libnvidia-container-v1.9.0.tar.gz + destination: libnvidia-container.tar.gz + sha256: b33b83e90d5e07271c28f1fb330c2d32958fa91b3a8f3238e43d691728829299 + sha512: c1a2a43733580eb2bc1d0798c164dd5f8f5b65bfb3d6e47a067fd5a6a93cc18fd0d718e314881f9d4e596c1376363340d4a35f89973e0e43cdb25b07922c8430 + env: + DEBIAN_FRONTEND: noninteractive + GOPATH: /go + REVISION: abd4e14d8cb923e2a70b7dcfee55fbc16bffa353 + WITH_TIRPC: no + WITH_NVCGO: yes + WITH_SECCOMP: yes + WITH_LIBELF: no + prepare: + - | + apt-get update + apt-get install -y \ + apt-utils \ + bmake \ + build-essential \ + bzip2 \ + ca-certificates \ + curl \ + devscripts \ + dh-make \ + fakeroot \ + git \ + libcap-dev \ + libelf-dev \ + libseccomp-dev \ + lintian \ + lsb-release \ + m4 \ + pkg-config \ + xz-utils \ + patchelf + + # {{ if eq .ARCH "x86_64" }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr + curl https://storage.googleapis.com/golang/go1.17.8.linux-amd64.tar.gz | tar -C /usr/local -xz + # {{ else }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr + curl https://storage.googleapis.com/golang/go1.17.8.linux-arm64.tar.gz | tar -C /usr/local -xz + # {{ end }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr + + mkdir libnvidia-container + tar -xzf libnvidia-container.tar.gz --strip-components=1 -C libnvidia-container + build: + - | + export PATH=$GOPATH/bin:/usr/local/go/bin:$PATH + + cd libnvidia-container + + patch -p0 < /pkg/patches/libnvidia-container/Makefile.patch + patch -p0 < /pkg/patches/libnvidia-container/common.h.patch + patch -p0 < /pkg/patches/libnvidia-container/nvc_ldcache.c.patch + make + install: + - | + export PATH=$GOPATH/bin:/usr/local/go/bin:$PATH + mkdir -p /rootfs + + cd libnvidia-container + + make install DESTDIR=/rootfs + + rm -rf /rootfs/usr/local/lib/debug + rm -rf /rootfs/usr/local/lib/pkgconfig + rm -rf /rootfs/usr/local/include + rm -rf /rootfs/usr/local/share + + patchelf --set-interpreter /usr/local/glibc/lib/ld-linux-x86-64.so.2 /rootfs/usr/local/bin/nvidia-container-cli + + # run ldconfig to update the cache + /rootfs/usr/local/glibc/sbin/ldconfig -r /rootfs +finalize: + - from: /rootfs + to: /rootfs diff --git a/nvidia-container-toolkit/nvidia-container-runtime/nvidia-container-runtime.part b/nvidia-container-toolkit/nvidia-container-runtime/nvidia-container-runtime.part new file mode 100644 index 00000000..a84165b5 --- /dev/null +++ b/nvidia-container-toolkit/nvidia-container-runtime/nvidia-container-runtime.part @@ -0,0 +1,7 @@ +[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia] + privileged_without_host_devices = false + runtime_engine = "" + runtime_root = "" + runtime_type = "io.containerd.runc.v2" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options] + BinaryName = "/usr/local/bin/nvidia-container-runtime" diff --git a/nvidia-container-toolkit/nvidia-container-runtime/nvidia-container-runtime.toml b/nvidia-container-toolkit/nvidia-container-runtime/nvidia-container-runtime.toml new file mode 100644 index 00000000..56e05438 --- /dev/null +++ b/nvidia-container-toolkit/nvidia-container-runtime/nvidia-container-runtime.toml @@ -0,0 +1,15 @@ +disable-require = false + +[nvidia-container-cli] +#root = "/run/nvidia/driver" +#path = "/usr/bin/nvidia-container-cli" +environment = [] +debug = "/var/log/nvidia-container-cli.log" +ldcache = "/usr/local/glibc/etc/ld.so.cache" +ldconfig = "@/usr/local/glibc/sbin/ldconfig" +load-kmods = false +#no-cgroups = false +#user = "root:video" + +[nvidia-container-runtime] +debug = "/var/log/nvidia-container-runtime.log" diff --git a/nvidia-container-toolkit/nvidia-container-runtime/patches/nvidia-container-runtime/main.go.patch b/nvidia-container-toolkit/nvidia-container-runtime/patches/nvidia-container-runtime/main.go.patch new file mode 100644 index 00000000..0c611ce1 --- /dev/null +++ b/nvidia-container-toolkit/nvidia-container-runtime/patches/nvidia-container-runtime/main.go.patch @@ -0,0 +1,13 @@ +diff --git cmd/nvidia-container-runtime/main.go cmd/nvidia-container-runtime/main.go +index cbb52b7..789a2b0 100644 +--- cmd/nvidia-container-runtime/main.go ++++ cmd/nvidia-container-runtime/main.go +@@ -16,7 +16,7 @@ const ( + ) + + var ( +- configDir = "/etc/" ++ configDir = "/usr/local/etc/" + ) + + var logger = NewLogger() diff --git a/nvidia-container-toolkit/nvidia-container-runtime/patches/nvidia-container-toolkit/hook_config.go.patch b/nvidia-container-toolkit/nvidia-container-runtime/patches/nvidia-container-toolkit/hook_config.go.patch new file mode 100644 index 00000000..0a552ebd --- /dev/null +++ b/nvidia-container-toolkit/nvidia-container-runtime/patches/nvidia-container-toolkit/hook_config.go.patch @@ -0,0 +1,13 @@ +diff --git cmd/nvidia-container-toolkit/hook_config.go cmd/nvidia-container-toolkit/hook_config.go +index b4cb013..fdf1664 100644 +--- cmd/nvidia-container-toolkit/hook_config.go ++++ cmd/nvidia-container-toolkit/hook_config.go +@@ -10,7 +10,7 @@ import ( + ) + + const ( +- configPath = "/etc/nvidia-container-runtime/config.toml" ++ configPath = "/usr/local/etc/nvidia-container-runtime/config.toml" + driverPath = "/run/nvidia/driver" + ) + diff --git a/nvidia-container-toolkit/nvidia-container-runtime/pkg.yaml b/nvidia-container-toolkit/nvidia-container-runtime/pkg.yaml new file mode 100644 index 00000000..fd4856d4 --- /dev/null +++ b/nvidia-container-toolkit/nvidia-container-runtime/pkg.yaml @@ -0,0 +1,47 @@ +name: nvidia-container-runtime +variant: scratch +shell: /toolchain/bin/bash +dependencies: + - stage: base +steps: + - sources: + - url: https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/archive/v1.9.0/container-toolkit-v1.9.0.tar.gz + destination: container-toolkit.tar.gz + sha256: e95ad56d08bee14f54d3c15e5201ba502676afda3392a40c09f112a29a3e3679 + sha512: d742d297dc9cb29e2d775c64fe5dc668a230cd8b0a193eb9b44518efd1e092817038c8ed015ab28b0cf2a45ad03b99d0e215df9e18586bb917d8c5b5d783aa90 + env: + GOPATH: /go + prepare: + - | + mkdir -p container-toolkit + tar -xzf container-toolkit.tar.gz --strip-components=1 -C container-toolkit + build: + - | + export PATH=${PATH}:${TOOLCHAIN}/go/bin + + cd container-toolkit + + patch -p0 < /pkg/patches/nvidia-container-runtime/main.go.patch + patch -p0 < /pkg/patches/nvidia-container-toolkit/hook_config.go.patch + + make cmds + install: + - | + mkdir -p /rootfs/usr/local/bin + + cd container-toolkit + + cp ./nvidia-container-runtime /rootfs/usr/local/bin/nvidia-container-runtime + cp ./nvidia-container-toolkit /rootfs/usr/local/bin/nvidia-container-toolkit + + ln -s nvidia-container-toolkit /rootfs/usr/local/bin/nvidia-container-runtime-hook + + chmod +x /rootfs/usr/local/bin/nvidia-container-runtime + chmod +x /rootfs/usr/local/bin/nvidia-container-toolkit +finalize: + - from: /rootfs + to: /rootfs + - from: /pkg/nvidia-container-runtime.part + to: /rootfs/etc/cri/conf.d/nvidia-container-runtime.part + - from: /pkg/nvidia-container-runtime.toml + to: /rootfs/usr/local/etc/nvidia-container-runtime/config.toml diff --git a/nvidia-container-toolkit/nvidia-device-create/15-nvidia-device.rules b/nvidia-container-toolkit/nvidia-device-create/15-nvidia-device.rules new file mode 100644 index 00000000..b651ef34 --- /dev/null +++ b/nvidia-container-toolkit/nvidia-device-create/15-nvidia-device.rules @@ -0,0 +1,8 @@ +# This will create the device nvidia device nodes +ACTION=="add", DEVPATH=="/bus/pci/drivers/nvidia", RUN+="/usr/local/bin/ub-device-create" + +# Create the device node for the nvidia-uvm module +ACTION=="add", DEVPATH=="/module/nvidia_uvm", SUBSYSTEM=="module", RUN+="/usr/local/bin/ub-device-create" + +# https://download.nvidia.com/XFree86/Linux-x86_64/510.54/README/nvidia-persistenced.html +ACTION=="add", DEVPATH=="/bus/pci/drivers/nvidia", RUN+="/usr/local/bin/nvidia-persistenced --no-persistence-mode --verbose" diff --git a/nvidia-container-toolkit/nvidia-device-create/patches/nvidia-graphics-drivers-build/Makefile.patch b/nvidia-container-toolkit/nvidia-device-create/patches/nvidia-graphics-drivers-build/Makefile.patch new file mode 100644 index 00000000..a76dec3f --- /dev/null +++ b/nvidia-container-toolkit/nvidia-device-create/patches/nvidia-graphics-drivers-build/Makefile.patch @@ -0,0 +1,13 @@ +diff --git Makefile Makefile +index cc7610bd..0c325c75 100644 +--- Makefile ++++ Makefile +@@ -3,7 +3,7 @@ + PROGRAM = ub-device-create + PROGRAM_FILES = ub-device-create.c + CC = gcc +-CFLAGS =-g -Wall $(shell pkg-config --cflags --libs pciaccess libkmod) ++CFLAGS =-g -static -Wall -l:libpciaccess.a + + all: build + diff --git a/nvidia-container-toolkit/nvidia-device-create/pkg.yaml b/nvidia-container-toolkit/nvidia-device-create/pkg.yaml new file mode 100644 index 00000000..4a2c208f --- /dev/null +++ b/nvidia-container-toolkit/nvidia-device-create/pkg.yaml @@ -0,0 +1,47 @@ +# https://download.nvidia.com/XFree86/Linux-x86_64/510.54/README/faq.html +# check the section under NVIDIA-INSTALLER -> How and when are the NVIDIA device files created? +name: nvidia-device-create +variant: scratch +dependencies: + - image: ubuntu:21.10 +shell: /bin/bash +steps: + - sources: + # https://github.com/tseliot/nvidia-graphics-drivers/commit/6b655df8a905f2ea00298d44ad6003f7b51fd37a + - url: https://github.com/tseliot/nvidia-graphics-drivers/archive/6b655df8a905f2ea00298d44ad6003f7b51fd37a.tar.gz + destination: nvidia-graphics-drivers-build.tar.gz + sha256: 48a293271d0f38d0f7029c798bad01cbbe39290a36116fda53503463357b47d9 + sha512: 764ffba4851ff76d4461da7eefcb818ba36e7fc1d3651643a5501e8fdd1ed459aab647656e297887d8a0c805e825bb27794f7c0952d6896b77415b9f2737ab5f + env: + DEBIAN_FRONTEND: noninteractive + prepare: + - | + apt-get update + apt install -y libpciaccess-dev \ + libkmod-dev \ + build-essential + + # https://download.nvidia.com/XFree86/Linux-x86_64/510.54/README/faq.html#devicenodes + # https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#runfile-verifications + mkdir nvidia-graphics-drivers-build + + tar -xzf nvidia-graphics-drivers-build.tar.gz --strip-components=1 -C nvidia-graphics-drivers-build + build: + - | + cd nvidia-graphics-drivers-build/debian/device-create + + patch -p0 < /pkg/patches/nvidia-graphics-drivers-build/Makefile.patch + make -j $(nproc) + install: + - | + mkdir -p /rootfs/usr/local/bin \ + mkdir -p /rootfs/usr/etc/udev/rules.d + + cd nvidia-graphics-drivers-build/debian/device-create + + # https://github.com/tseliot/nvidia-graphics-drivers/blob/510/debian/nvidia-kernel-common-510.install + cp ub-device-create /rootfs/usr/local/bin + cp /pkg/15-nvidia-device.rules /rootfs/usr/etc/udev/rules.d +finalize: + - from: /rootfs + to: /rootfs diff --git a/nvidia-container-toolkit/nvidia-pkgs/pkg.yaml b/nvidia-container-toolkit/nvidia-pkgs/pkg.yaml new file mode 100644 index 00000000..86749c52 --- /dev/null +++ b/nvidia-container-toolkit/nvidia-pkgs/pkg.yaml @@ -0,0 +1,167 @@ +name: nvidia-pkgs +variant: scratch +dependencies: + - image: ubuntu:22.04 + # depends on glibc to update ld.so.cache + # so any stage depending on nvidia-pkgs will have the updated cache + - stage: glibc +shell: /bin/bash +steps: + - sources: + # {{ if eq .ARCH "aarch64" }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr + - url: https://download.nvidia.com/XFree86/Linux-aarch64/{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }}/NVIDIA-Linux-aarch64-{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }}.run + destination: nvidia.run + sha256: bff7a5640445b3e38b35d9d589b52c7353bb473703b7a5d050aa96aaa35ca896 + sha512: 9d872748dc0957c0754561582a1984a8f912c345a5e616116edfd86d629efb5674c29723c80ae47d458b23aaec6fc5a71724441dd7e11ba4f5b94f8b04591f81 + # {{ else }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr + - url: https://download.nvidia.com/XFree86/Linux-x86_64/{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }}/NVIDIA-Linux-x86_64-{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }}.run + destination: nvidia.run + sha256: 4c20deccae3fe347adfd0e6989a306f9024fdadf831adc1e8e60855675335161 + sha512: 1e65e96c1ae1cccd5cd483f2b65927e3594d28f3774459dfd094530f445f4b0f5368a3b7eebf4970bd2632438cca2cbb93af50a59a3a87a2c13d08ed5155164c + # {{ end }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr + env: + DEBIAN_FRONTEND: noninteractive + prepare: + - | + apt update && \ + apt install -y \ + patchelf + + bash nvidia.run --extract-only + install: + - | + mkdir -p /rootfs/usr/local/bin \ + /rootfs/usr/local/lib/gbm \ + /rootfs/usr/local/lib/nvidia/xorg \ + /rootfs/lib/firmware/nvidia/{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} \ + /rootfs/usr/local/lib/containers/nvidia-persistenced \ + /rootfs/usr/local/etc/containers + + cd NVIDIA-Linux* + + # https://github.com/tseliot/nvidia-graphics-drivers/blob/510/debian/libnvidia-cfg1-510.install + cp libnvidia-cfg.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + + # https://github.com/tseliot/nvidia-graphics-drivers/blob/510/debian/libnvidia-cfg1-510.links + ln -s libnvidia-cfg.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libnvidia-cfg.so.1 + ln -s libnvidia-cfg.so.1 /rootfs/usr/local/lib/libnvidia-cfg.so + + # https://github.com/tseliot/nvidia-graphics-drivers/blob/510/debian/libnvidia-compute-510.install + cp libcuda.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + # {{ if eq .ARCH "x86_64" }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr + cp libnvidia-compiler-next.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + cp libnvidia-compiler.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + # {{ end }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr + cp libnvidia-ml.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + cp libnvidia-nvvm.so.4.0.0 /rootfs/usr/local/lib + cp libnvidia-opencl.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + cp libnvidia-ptxjitcompiler.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + + # https://github.com/tseliot/nvidia-graphics-drivers/blob/510/debian/libnvidia-compute-510.links + ln -s libnvidia-nvvm.so.4.0.0 /rootfs/usr/local/lib/libnvidia-nvvm.so.4 + ln -s libnvidia-nvvm.so.4 /rootfs/usr/local/lib/libnvidia-nvvm.so + ln -s libcuda.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libcuda.so.1 + ln -s libcuda.so.1 /rootfs/usr/local/lib/libcuda.so + ln -s libnvidia-ml.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libnvidia-ml.so.1 + ln -s libnvidia-ml.so.1 /rootfs/usr/local/lib/libnvidia-ml.so + ln -s libnvidia-opencl.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libnvidia-opencl.so.1 + ln -s libnvidia-ptxjitcompiler.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libnvidia-ptxjitcompiler.so.1 + ln -s libnvidia-ptxjitcompiler.so.1 /rootfs/usr/local/lib/libnvidia-ptxjitcompiler.so + + # https://github.com/tseliot/nvidia-graphics-drivers/blob/510/debian/libnvidia-decode-510.install + cp libnvcuvid.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + cp libnvidia-opticalflow.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + cp libvdpau_nvidia.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + + # https://github.com/tseliot/nvidia-graphics-drivers/blob/510/debian/libnvidia-decode-510.links + ln -s libnvcuvid.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libnvcuvid.so.1 + ln -s libnvcuvid.so.1 /rootfs/usr/local/lib/libnvcuvid.so + ln -s libnvidia-opticalflow.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libnvidia-opticalflow.so.1 + ln -s libnvidia-opticalflow.so.1 /rootfs/usr/local/lib/libnvidia-opticalflow.so + ln -s libvdpau_nvidia.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libvdpau_nvidia.so.1 + ln -s libvdpau_nvidia.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libvdpau_nvidia.so + + # https://github.com/tseliot/nvidia-graphics-drivers/blob/510/debian/libnvidia-encode-510.install + cp libnvidia-encode.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libnvidia-encode.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} + + # https://github.com/tseliot/nvidia-graphics-drivers/blob/510/debian/libnvidia-encode-510.links + ln -s libnvidia-encode.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libnvidia-encode.so.1 + ln -s libnvidia-encode.so.1 /rootfs/usr/local/lib/libnvidia-encode.so + + # https://github.com/tseliot/nvidia-graphics-drivers/blob/510/debian/libnvidia-extra-510.install + cp libnvidia-allocator.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libnvidia-allocator.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} + + # https://github.com/tseliot/nvidia-graphics-drivers/blob/510/debian/libnvidia-extra-510.links + ln -s libnvidia-allocator.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libnvidia-allocator.so.1 + ln -s libnvidia-allocator.so.1 /rootfs/usr/local/lib/libnvidia-allocator.so + ln -s ../libnvidia-allocator.so.1 /rootfs/usr/local/lib/gbm/nvidia-drm_gbm.so + + # https://github.com/tseliot/nvidia-graphics-drivers/blob/510/debian/libnvidia-fbc1-510.install + cp libnvidia-fbc.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + + # https://github.com/tseliot/nvidia-graphics-drivers/blob/510/debian/libnvidia-fbc1-510.links + ln -s libnvidia-fbc.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libnvidia-fbc.so.1 + ln -s libnvidia-fbc.so.1 /rootfs/usr/local/lib/libnvidia-fbc.so + + patchelf --set-interpreter /usr/local/glibc/lib/ld-linux-x86-64.so.2 nvidia-ngx-updater + + # https://github.com/tseliot/nvidia-graphics-drivers/blob/510/debian/libnvidia-gl-510.install + cp libEGL_nvidia.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + cp libGLESv1_CM_nvidia.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + cp libGLESv2_nvidia.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + cp libGLX_nvidia.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + cp libglxserver_nvidia.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + cp libnvidia-egl-gbm.so.1.1.0 /rootfs/usr/local/lib + cp libnvidia-eglcore.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + cp libnvidia-glcore.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + cp libnvidia-glsi.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + cp libnvidia-glvkspirv.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + cp libnvidia-ngx.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + cp libnvidia-rtcore.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + cp libnvidia-tls.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + cp libnvidia-vulkan-producer.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + cp libnvoptix.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib + cp nvidia-ngx-updater /rootfs/usr/local/bin + + # https://github.com/tseliot/nvidia-graphics-drivers/blob/510/debian/libnvidia-gl-510.links + ln -s libnvidia-vulkan-producer.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libnvidia-vulkan-producer.so + ln -s libnvoptix.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libnvoptix.so.1 + ln -s nvidia/xorg/libglxserver_nvidia.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/nvidia/xorg/libglxserver_nvidia.so + ln -s libEGL_nvidia.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libEGL_nvidia.so.0 + ln -s libGLESv1_CM_nvidia.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libGLESv1_CM_nvidia.so.1 + ln -s libGLESv2_nvidia.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libGLESv2_nvidia.so.2 + ln -s libGLX_nvidia.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libGLX_nvidia.so.0 + + patchelf --set-interpreter /usr/local/glibc/lib/ld-linux-x86-64.so.2 nvidia-cuda-mps-control + patchelf --set-interpreter /usr/local/glibc/lib/ld-linux-x86-64.so.2 nvidia-cuda-mps-server + patchelf --set-interpreter /usr/local/glibc/lib/ld-linux-x86-64.so.2 nvidia-persistenced + + # https://github.com/tseliot/nvidia-graphics-drivers/blob/510/debian/nvidia-compute-utils-510.install + cp nvidia-cuda-mps-control /rootfs/usr/local/bin/ + cp nvidia-cuda-mps-server /rootfs/usr/local/bin/ + cp nvidia-persistenced /rootfs/usr/local/bin/ + + # https://github.com/tseliot/nvidia-graphics-drivers/blob/510/debian/nvidia-kernel-common-510.install + cp firmware/gsp.bin /rootfs/lib/firmware/nvidia/{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }}/gsp.bin + + patchelf --set-interpreter /usr/local/glibc/lib/ld-linux-x86-64.so.2 nvidia-debugdump + patchelf --set-interpreter /usr/local/glibc/lib/ld-linux-x86-64.so.2 nvidia-smi + patchelf --set-interpreter /usr/local/glibc/lib/ld-linux-x86-64.so.2 nvidia-xconfig + + # https://github.com/tseliot/nvidia-graphics-drivers/blob/510/debian/nvidia-utils-510.install + cp nvidia-debugdump /rootfs/usr/local/bin/ + cp nvidia-smi /rootfs/usr/local/bin/ + cp nvidia-xconfig /rootfs/usr/local/bin/ + + # https://github.com/tseliot/nvidia-graphics-drivers/blob/510/debian/xserver-xorg-video-nvidia-510.install + cp nvidia_drv.so /rootfs/usr/local/lib/nvidia/xorg/nvidia_drv.so + + # missing ones + ln -s libnvidia-ngx.so.{{ .NVIDIA_DRIVER_VERSION_MAJOR }}.{{ .NVIDIA_DRIVER_VERSION_MINOR }} /rootfs/usr/local/lib/libnvidia-ngx.so.1 + ln -s libnvidia-egl-gbm.so.1.1.0 /rootfs/usr/local/lib/libnvidia-egl-gbm.so.1 + + # run ldconfig to update the cache + /rootfs/usr/local/glibc/sbin/ldconfig -r /rootfs +finalize: + - from: /rootfs + to: /rootfs diff --git a/nvidia-container-toolkit/pkg.yaml b/nvidia-container-toolkit/pkg.yaml new file mode 100644 index 00000000..770a0708 --- /dev/null +++ b/nvidia-container-toolkit/pkg.yaml @@ -0,0 +1,17 @@ +name: nvidia-container-toolkit +variant: scratch +shell: /toolchain/bin/bash +dependencies: + - stage: base + - stage: nvidia-container-cli + - stage: nvidia-container-runtime + - stage: nvidia-device-create +steps: + - install: + - | + mkdir -p /rootfs +finalize: + - from: /rootfs + to: /rootfs + - from: /pkg/manifest.yaml + to: /