From 1cfc65abef54277028f27c3f96dcf845e4a7ea72 Mon Sep 17 00:00:00 2001 From: Landon Clipp <11232769+LandonTClipp@users.noreply.github.com> Date: Tue, 12 Nov 2024 11:26:45 -0600 Subject: [PATCH] feat(inputs.nvidia-smi): REVERT -- Add `probe_on_startup` option (#16165) --- plugins/inputs/nvidia_smi/README.md | 6 --- plugins/inputs/nvidia_smi/nvidia_smi.go | 26 ++++------- plugins/inputs/nvidia_smi/nvidia_smi_test.go | 49 -------------------- plugins/inputs/nvidia_smi/sample.conf | 6 --- 4 files changed, 9 insertions(+), 78 deletions(-) diff --git a/plugins/inputs/nvidia_smi/README.md b/plugins/inputs/nvidia_smi/README.md index cb07033a680b6..c493ff357edb2 100644 --- a/plugins/inputs/nvidia_smi/README.md +++ b/plugins/inputs/nvidia_smi/README.md @@ -37,12 +37,6 @@ using the `startup_error_behavior` setting. Available values are: ## Optional: timeout for GPU polling # timeout = "5s" - - ## Optional: Attempt to run nvidia-smi once on startup. If nvidia-smi returns a non-zero - ## exit code, the plugin will return an error. This is particularly useful - ## if used in conjunction with `startup_error_behavior` to allow the plugin to be - ## disabled if nvidia-smi cannot run successfully. - # probe_on_startup = false ``` ### Linux diff --git a/plugins/inputs/nvidia_smi/nvidia_smi.go b/plugins/inputs/nvidia_smi/nvidia_smi.go index 375ca8455987b..695b8c6f601ee 100644 --- a/plugins/inputs/nvidia_smi/nvidia_smi.go +++ b/plugins/inputs/nvidia_smi/nvidia_smi.go @@ -27,14 +27,12 @@ var sampleConfig string // NvidiaSMI holds the methods for this plugin type NvidiaSMI struct { - BinPath string `toml:"bin_path"` - Timeout config.Duration `toml:"timeout"` - ProbeOnStartup bool `toml:"probe_on_startup"` - Log telegraf.Logger `toml:"-"` - - ignorePlugin bool - once sync.Once - nvidiaSMIArgs []string + BinPath string `toml:"bin_path"` + Timeout config.Duration `toml:"timeout"` + Log telegraf.Logger `toml:"-"` + + ignorePlugin bool + once sync.Once } func (*NvidiaSMI) SampleConfig() string { @@ -49,11 +47,6 @@ func (smi *NvidiaSMI) Start(telegraf.Accumulator) error { } smi.BinPath = binPath } - if smi.ProbeOnStartup { - if _, err := internal.CombinedOutputTimeout(exec.Command(smi.BinPath, smi.nvidiaSMIArgs...), time.Duration(smi.Timeout)); err != nil { - return &internal.StartupError{Err: err} - } - } return nil } @@ -67,7 +60,7 @@ func (smi *NvidiaSMI) Gather(acc telegraf.Accumulator) error { } // Construct and execute metrics query - data, err := internal.CombinedOutputTimeout(exec.Command(smi.BinPath, smi.nvidiaSMIArgs...), time.Duration(smi.Timeout)) + data, err := internal.CombinedOutputTimeout(exec.Command(smi.BinPath, "-q", "-x"), time.Duration(smi.Timeout)) if err != nil { return fmt.Errorf("calling %q failed: %w", smi.BinPath, err) } @@ -126,9 +119,8 @@ func (smi *NvidiaSMI) parse(acc telegraf.Accumulator, data []byte) error { func init() { inputs.Add("nvidia_smi", func() telegraf.Input { return &NvidiaSMI{ - BinPath: "/usr/bin/nvidia-smi", - Timeout: config.Duration(5 * time.Second), - nvidiaSMIArgs: []string{"-q", "-x"}, + BinPath: "/usr/bin/nvidia-smi", + Timeout: config.Duration(5 * time.Second), } }) } diff --git a/plugins/inputs/nvidia_smi/nvidia_smi_test.go b/plugins/inputs/nvidia_smi/nvidia_smi_test.go index b30673666f3fe..23c57e5f6a3b6 100644 --- a/plugins/inputs/nvidia_smi/nvidia_smi_test.go +++ b/plugins/inputs/nvidia_smi/nvidia_smi_test.go @@ -4,65 +4,16 @@ import ( "errors" "os" "path/filepath" - "runtime" "testing" "time" "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/config" "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/models" "github.com/influxdata/telegraf/testutil" "github.com/stretchr/testify/require" ) -func TestOnStartupError(t *testing.T) { - var binPath string - var nvidiaSMIArgs []string - if runtime.GOOS == "windows" { - binPath = `C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe` - nvidiaSMIArgs = []string{"-Command", "exit 1"} - } else { - binPath = "/bin/bash" - nvidiaSMIArgs = []string{"-c", "exit 1"} - } - - tests := []struct { - ProbeOnStartup bool - }{ - { - ProbeOnStartup: true, - }, - { - ProbeOnStartup: false, - }, - } - for _, tt := range tests { - plugin := &NvidiaSMI{ - BinPath: binPath, - ProbeOnStartup: tt.ProbeOnStartup, - Timeout: config.Duration(time.Second), - Log: &testutil.Logger{}, - nvidiaSMIArgs: nvidiaSMIArgs, - } - model := models.NewRunningInput(plugin, &models.InputConfig{ - Name: "nvidia_smi", - }) - require.NoError(t, model.Init()) - - var acc testutil.Accumulator - var ferr *internal.FatalError - err := model.Start(&acc) - - if tt.ProbeOnStartup { - require.False(t, errors.As(err, &ferr)) - require.ErrorIs(t, model.Gather(&acc), internal.ErrNotConnected) - } else { - require.NoError(t, err) - } - } -} - func TestErrorBehaviorDefault(t *testing.T) { // make sure we can't find nvidia-smi in $PATH somewhere os.Unsetenv("PATH") diff --git a/plugins/inputs/nvidia_smi/sample.conf b/plugins/inputs/nvidia_smi/sample.conf index 0582ac3cb8bf7..8879b3923a2cc 100644 --- a/plugins/inputs/nvidia_smi/sample.conf +++ b/plugins/inputs/nvidia_smi/sample.conf @@ -7,9 +7,3 @@ ## Optional: timeout for GPU polling # timeout = "5s" - - ## Optional: Attempt to run nvidia-smi once on startup. If nvidia-smi returns a non-zero - ## exit code, the plugin will return an error. This is particularly useful - ## if used in conjunction with `startup_error_behavior` to allow the plugin to be - ## disabled if nvidia-smi cannot run successfully. - # probe_on_startup = false