Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(outputs.syslog): Implement startup error behavior options #15787

Merged
merged 2 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions plugins/outputs/syslog/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,20 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.

[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins

## Startup error behavior options <!-- @/docs/includes/startup_error_behavior.md -->

In addition to the plugin-specific and global configuration settings the plugin
supports options for specifying the behavior when experiencing startup errors
using the `startup_error_behavior` setting. Available values are:

- `error`: Telegraf with stop and exit in case of startup errors. This is the
default behavior.
- `ignore`: Telegraf will ignore startup errors for this plugin and disables it
but continues processing for all other plugins.
- `retry`: Telegraf will try to startup the plugin in every gather or write
cycle in case of startup errors. The plugin is disabled until
the startup succeeds.

## Configuration

```toml @sample.conf
Expand Down
3 changes: 2 additions & 1 deletion plugins/outputs/syslog/syslog.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (

"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/config"
"github.com/influxdata/telegraf/internal"
tlsint "github.com/influxdata/telegraf/plugins/common/tls"
"github.com/influxdata/telegraf/plugins/outputs"
)
Expand Down Expand Up @@ -75,7 +76,7 @@ func (s *Syslog) Connect() error {
c, err = tls.Dial(spl[0], spl[1], tlsCfg)
}
if err != nil {
return err
return &internal.StartupError{Err: err, Retry: true}
}

if err := s.setKeepAlive(c); err != nil {
Expand Down
184 changes: 184 additions & 0 deletions plugins/outputs/syslog/syslog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ import (
"github.com/stretchr/testify/require"

"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/models"
"github.com/influxdata/telegraf/testutil"
"github.com/leodido/go-syslog/v4/nontransparent"
)
Expand Down Expand Up @@ -244,3 +246,185 @@ func TestSyslogWriteReconnect(t *testing.T) {
require.NoError(t, err)
require.Equal(t, string(messageBytesWithFraming), string(buf[:n]))
}

func TestStartupErrorBehaviorDefault(t *testing.T) {
// Setup a dummy listener but do not accept connections
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
address := listener.Addr().String()
listener.Close()

// Setup the plugin and the model to be able to use the startup retry strategy
plugin := &Syslog{
Address: "tcp://" + address,
Trailer: nontransparent.LF,
Separator: "_",
DefaultSeverityCode: uint8(5), // notice
DefaultFacilityCode: uint8(1), // user-level
DefaultAppname: "Telegraf",
}

model := models.NewRunningOutput(
plugin,
&models.OutputConfig{
Name: "syslog",
},
10, 100,
)
require.NoError(t, model.Init())

// Starting the plugin will fail with an error because the server does not listen
err = model.Connect()
require.Error(t, err, "connection should be refused")
var serr *internal.StartupError
require.ErrorAs(t, err, &serr)
}

func TestStartupErrorBehaviorError(t *testing.T) {
// Setup a dummy listener but do not accept connections
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
address := listener.Addr().String()
listener.Close()

// Setup the plugin and the model to be able to use the startup retry strategy
plugin := &Syslog{
Address: "tcp://" + address,
Trailer: nontransparent.LF,
Separator: "_",
DefaultSeverityCode: uint8(5), // notice
DefaultFacilityCode: uint8(1), // user-level
DefaultAppname: "Telegraf",
}

model := models.NewRunningOutput(
plugin,
&models.OutputConfig{
Name: "syslog",
StartupErrorBehavior: "error",
},
10, 100,
)
require.NoError(t, model.Init())

// Starting the plugin will fail with an error because the server does not listen
err = model.Connect()
require.Error(t, err, "connection should be refused")
var serr *internal.StartupError
require.ErrorAs(t, err, &serr)
}

func TestStartupErrorBehaviorIgnore(t *testing.T) {
// Setup a dummy listener but do not accept connections
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
address := listener.Addr().String()
listener.Close()

// Setup the plugin and the model to be able to use the startup retry strategy
plugin := &Syslog{
Address: "tcp://" + address,
Trailer: nontransparent.LF,
Separator: "_",
DefaultSeverityCode: uint8(5), // notice
DefaultFacilityCode: uint8(1), // user-level
DefaultAppname: "Telegraf",
}

model := models.NewRunningOutput(
plugin,
&models.OutputConfig{
Name: "syslog",
StartupErrorBehavior: "ignore",
},
10, 100,
)
require.NoError(t, model.Init())

// Starting the plugin will fail because the server does not accept connections.
// The model code should convert it to a fatal error for the agent to remove
// the plugin.
err = model.Connect()
require.Error(t, err, "connection should be refused")
var fatalErr *internal.FatalError
require.ErrorAs(t, err, &fatalErr)
}

func TestStartupErrorBehaviorRetry(t *testing.T) {
// Setup a dummy listener but do not accept connections
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
address := listener.Addr().String()
listener.Close()

// Setup the plugin and the model to be able to use the startup retry strategy
plugin := &Syslog{
Address: "tcp://" + address,
Trailer: nontransparent.LF,
Separator: "_",
DefaultSeverityCode: uint8(5), // notice
DefaultFacilityCode: uint8(1), // user-level
DefaultAppname: "Telegraf",
}

model := models.NewRunningOutput(
plugin,
&models.OutputConfig{
Name: "syslog",
StartupErrorBehavior: "retry",
},
10, 100,
)
require.NoError(t, model.Init())

// Starting the plugin will return no error because the plugin will
// retry to connect in every write cycle.
require.NoError(t, model.Connect())
defer model.Close()

// Writing metrics in this state should fail because we are not fully
// started up
metrics := testutil.MockMetrics()
for _, m := range metrics {
model.AddMetric(m)
}
require.ErrorIs(t, model.WriteBatch(), internal.ErrNotConnected)

// Startup an actually working listener we can connect and write to
listener, err = net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
defer listener.Close()

var wg sync.WaitGroup
buf := make([]byte, 256)

wg.Add(1)
go func() {
defer wg.Done()

conn, err := listener.Accept()
if err != nil {
t.Logf("accepting connection failed: %v", err)
t.Fail()
return
}

if err := conn.SetReadDeadline(time.Now().Add(3 * time.Second)); err != nil {
t.Logf("setting read deadline failed: %v", err)
t.Fail()
return
}

if _, err := conn.Read(buf); err != nil {
t.Logf("reading failed: %v", err)
t.Fail()
}
}()

// Update the plugin's address and write again. This time the write should
// succeed.
plugin.Address = "tcp://" + listener.Addr().String()
require.NoError(t, model.WriteBatch())
wg.Wait()
require.NotEmpty(t, string(buf))
}
Loading