-
Notifications
You must be signed in to change notification settings - Fork 117
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore: nvidia-persistenced as an extension service
Run `nvidia-persistenced` as a Talos Extension Service Bump nvidia drivers to 510.68.02 pkgs repo bumped [here](siderolabs/pkgs#470) Use the patch from https://gitlab.com/nvidia/container-toolkit/libnvidia-container/-/merge_requests/165 Signed-off-by: Noel Georgi <git@frezbo.dev>
- Loading branch information
Showing
12 changed files
with
183 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
5 changes: 5 additions & 0 deletions
5
nonfree/nvidia-container-toolkit/nvidia-persistenced-wrapper/go.mod
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
module nvidia-persistenced-wrapper | ||
|
||
go 1.18 | ||
|
||
require golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6 |
2 changes: 2 additions & 0 deletions
2
nonfree/nvidia-container-toolkit/nvidia-persistenced-wrapper/go.sum
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6 h1:nonptSpoQ4vQjyraW20DXPAglgQfVnM9ZC6MmNLMR60= | ||
golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= |
90 changes: 90 additions & 0 deletions
90
nonfree/nvidia-container-toolkit/nvidia-persistenced-wrapper/main.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
package main | ||
|
||
import ( | ||
"errors" | ||
"io/ioutil" | ||
"log" | ||
"os" | ||
"os/exec" | ||
"os/signal" | ||
"strconv" | ||
|
||
"golang.org/x/sys/unix" | ||
) | ||
|
||
const ( | ||
stateFolder = "/var/run/nvidia-persistenced" | ||
pidFile = stateFolder + "/" + "nvidia-persistenced.pid" | ||
) | ||
|
||
func main() { | ||
// first check if the pid file exists, | ||
// then check if the process is running, | ||
// if running try to kill it, then start the new process | ||
if _, err := os.Stat(pidFile); err != nil { | ||
if !errors.Is(err, os.ErrNotExist) { | ||
log.Fatalf("nvidia-persistenced-wrapper: failed to stat pid file: %s%v\n", pidFile, err) | ||
} | ||
} else { | ||
pid, err := getProcessId() | ||
|
||
if err != nil { | ||
log.Fatalf("nvidia-persistenced-wrapper: error reading pid file: %s%v\n", pidFile, err) | ||
} | ||
if err := killProcess(pid); err != nil { | ||
log.Fatalf("nvidia-persistenced-wrapper: error killing process: %d%v\n", pid, err) | ||
} | ||
// now we can remove the state directory | ||
if err := os.RemoveAll(stateFolder); err != nil { | ||
log.Fatalf("nvidia-persistenced-wrapper: error removing state directory: %s%v\n", stateFolder, err) | ||
} | ||
} | ||
|
||
cmd := exec.Command("/usr/local/bin/nvidia-persistenced", | ||
[]string{ | ||
"--no-persistence-mode", | ||
"--verbose", | ||
}...) | ||
|
||
cmd.Stdout = os.Stdout | ||
cmd.Stderr = os.Stderr | ||
|
||
if err := cmd.Start(); err != nil { | ||
log.Fatalf("nvidia-persistenced-wrapper: error starting nvidia-persistenced: %v\n", err) | ||
} | ||
|
||
ch := make(chan os.Signal, 1) | ||
signal.Notify(ch, unix.SIGINT, unix.SIGTERM) | ||
|
||
if err := cmd.Process.Signal(<-ch); err != nil { | ||
log.Fatalf("nvidia-persistenced-wrapper: error sending signal to nvidia-persistenced: %v\n", err) | ||
} | ||
|
||
if _, err := cmd.Process.Wait(); err != nil { | ||
log.Fatalf("nvidia-persistenced-wrapper: error waiting for nvidia-persistenced to exit: %v\n", err) | ||
} | ||
} | ||
|
||
func getProcessId() (int, error) { | ||
pidData, err := ioutil.ReadFile(pidFile) | ||
if err != nil { | ||
return 0, err | ||
} | ||
pid, err := strconv.Atoi(string(pidData)) | ||
if err != nil { | ||
return 0, err | ||
} | ||
return int(pid), nil | ||
} | ||
|
||
func killProcess(pid int) error { | ||
p, err := os.FindProcess(pid) | ||
if err != nil { | ||
return err | ||
} | ||
// ignore if process is already dead | ||
if err := p.Kill(); !errors.Is(err, os.ErrProcessDone) { | ||
return err | ||
} | ||
return nil | ||
} |
22 changes: 22 additions & 0 deletions
22
nonfree/nvidia-container-toolkit/nvidia-persistenced-wrapper/pkg.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
name: nvidia-persistenced-wrapper | ||
variant: scratch | ||
shell: /toolchain/bin/bash | ||
dependencies: | ||
- stage: base | ||
steps: | ||
- build: | ||
- | | ||
export PATH=${PATH}:${TOOLCHAIN}/go/bin | ||
cp -r /pkg/* . | ||
CGO_ENABLED=0 go build -o nvidia-persistenced-wrapper main.go | ||
install: | ||
- | | ||
mkdir -p /rootfs/usr/local/bin | ||
mkdir -p /rootfs/usr/lib/containers/nvidia-persistenced | ||
cp nvidia-persistenced-wrapper /rootfs/usr/local/bin/nvidia-persistenced-wrapper | ||
finalize: | ||
- from: /rootfs | ||
to: /rootfs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
# https://download.nvidia.com/XFree86/Linux-x86_64/510.54/README/nvidia-persistenced.html | ||
name: nvidia-persistenced | ||
container: | ||
entrypoint: /usr/local/bin/nvidia-persistenced-wrapper | ||
mounts: | ||
# device files | ||
- source: /dev | ||
destination: /dev | ||
type: bind | ||
options: | ||
- rshared | ||
- rbind | ||
- rw | ||
# shared libraries | ||
- source: /usr/local/glibc | ||
destination: /usr/local/glibc | ||
type: bind | ||
options: | ||
- bind | ||
- ro | ||
# shared libraries | ||
- source: /usr/local/lib | ||
destination: /usr/local/lib | ||
type: bind | ||
options: | ||
- bind | ||
- ro | ||
# service state file | ||
- source: /var/run/nvidia-persistenced | ||
destination: /var/run/nvidia-persistenced | ||
type: bind | ||
options: | ||
- rshared | ||
- rbind | ||
- rw | ||
# binaries | ||
- source: /usr/local/bin | ||
destination: /usr/local/bin | ||
type: bind | ||
options: | ||
- bind | ||
- ro | ||
depends: | ||
- service: cri | ||
# we need to depend on udevd so that the nvidia device files are created | ||
- service: udevd | ||
- path: /sys/bus/pci/drivers/nvidia | ||
restart: always |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
NVIDIA_DRIVER_VERSION_MAJOR: 510 | ||
NVIDIA_DRIVER_VERSION_MINOR: 60.02 | ||
NVIDIA_DRIVER_VERSION_MINOR: 68.02 | ||
NVIDIA_CONTAINER_TOOLKIT_VERSION: v1.9.0 |