From 5e6d25c5e3c49699790d6f1f2481df8a2aa9cbb5 Mon Sep 17 00:00:00 2001 From: Evan Bradley <11745660+evan-bradley@users.noreply.github.com> Date: Thu, 16 May 2024 12:03:33 -0400 Subject: [PATCH] Revert "[cmd/opampsupervisor]: Persist the instance ID between restarts (#32618)" This reverts commit 125ff4927e012e0a1b9a7a0fec9684f433a43d2e. --- ..._opamp-supervisor-persist-instance-id.yaml | 13 -- cmd/opampsupervisor/README.md | 11 -- cmd/opampsupervisor/e2e_test.go | 158 +----------------- cmd/opampsupervisor/go.mod | 2 +- .../supervisor/config/config.go | 28 +--- cmd/opampsupervisor/supervisor/persistence.go | 92 ---------- .../supervisor/persistence_test.go | 73 -------- cmd/opampsupervisor/supervisor/supervisor.go | 105 ++++++------ .../supervisor/supervisor_accepts_conn.yaml | 3 - .../testdata/supervisor/supervisor_basic.yaml | 3 - .../testdata/supervisor/supervisor_nocap.yaml | 3 - .../testdata/supervisor/supervisor_test.yaml | 3 - 12 files changed, 62 insertions(+), 432 deletions(-) delete mode 100644 .chloggen/feat_opamp-supervisor-persist-instance-id.yaml delete mode 100644 cmd/opampsupervisor/supervisor/persistence.go delete mode 100644 cmd/opampsupervisor/supervisor/persistence_test.go diff --git a/.chloggen/feat_opamp-supervisor-persist-instance-id.yaml b/.chloggen/feat_opamp-supervisor-persist-instance-id.yaml deleted file mode 100644 index 4461e759891d..000000000000 --- a/.chloggen/feat_opamp-supervisor-persist-instance-id.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Use this changelog template to create an entry for release notes. - -# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' -change_type: enhancement - -# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) -component: opampsupervisor - -# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). -note: Allows the supervisor to persist its instance ID between restarts. - -# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. -issues: [21073] diff --git a/cmd/opampsupervisor/README.md b/cmd/opampsupervisor/README.md index a292623fc26a..10d4761aadaf 100644 --- a/cmd/opampsupervisor/README.md +++ b/cmd/opampsupervisor/README.md @@ -33,17 +33,6 @@ The supervisor is currently undergoing heavy development and is not ready for an 4. The supervisor should connect to the OpAMP server and start a Collector instance. -## Persistent data storage -The supervisor persists some data to disk in order to mantain state between restarts. The directory where this data is stored may be specified via the supervisor configuration: -```yaml -storage: - directory: "/path/to/storage/dir" -``` - -By default, the supervisor will use `/var/lib/otelcol/supervisor` on posix systems, and `%ProgramData%/Otelcol/Supervisor` on Windows. - -This directory will be created on supervisor startup if it does not exist. - ## Status The OpenTelemetry OpAMP Supervisor is intended to be the reference diff --git a/cmd/opampsupervisor/e2e_test.go b/cmd/opampsupervisor/e2e_test.go index 6b089555166b..9e90c412405a 100644 --- a/cmd/opampsupervisor/e2e_test.go +++ b/cmd/opampsupervisor/e2e_test.go @@ -158,10 +158,9 @@ func getSupervisorConfig(t *testing.T, configType string, extraConfigData map[st extension = ".exe" } configData := map[string]string{ - "goos": runtime.GOOS, - "goarch": runtime.GOARCH, - "extension": extension, - "storage_dir": t.TempDir(), + "goos": runtime.GOOS, + "goarch": runtime.GOARCH, + "extension": extension, } for key, val := range extraConfigData { @@ -782,154 +781,3 @@ func TestSupervisorRestartsWithLastReceivedConfig(t *testing.T) { }, 10*time.Second, 500*time.Millisecond, "Collector was not started with the last received remote config") } - -func TestSupervisorPersistsInstanceID(t *testing.T) { - // Tests shutting down and starting up a new supervisor will - // persist and re-use the same instance ID. - storageDir := t.TempDir() - - agentIDChan := make(chan string, 1) - server := newOpAMPServer( - t, - defaultConnectingHandler, - server.ConnectionCallbacksStruct{ - OnMessageFunc: func(_ context.Context, _ types.Connection, message *protobufs.AgentToServer) *protobufs.ServerToAgent { - - select { - case agentIDChan <- message.InstanceUid: - default: - } - - return &protobufs.ServerToAgent{} - }, - }) - - s := newSupervisor(t, "basic", map[string]string{ - "url": server.addr, - "storage_dir": storageDir, - }) - - waitForSupervisorConnection(server.supervisorConnected, true) - - t.Logf("Supervisor connected") - - var firstAgentID string - select { - case firstAgentID = <-agentIDChan: - case <-time.After(1 * time.Second): - t.Fatalf("failed to get first agent ID") - } - - t.Logf("Got agent ID %s, shutting down supervisor", firstAgentID) - - s.Shutdown() - - waitForSupervisorConnection(server.supervisorConnected, false) - - t.Logf("Supervisor disconnected") - - // Drain agent ID channel so we get a fresh ID from the new supervisor - select { - case <-agentIDChan: - default: - } - - s = newSupervisor(t, "basic", map[string]string{ - "url": server.addr, - "storage_dir": storageDir, - }) - defer s.Shutdown() - - waitForSupervisorConnection(server.supervisorConnected, true) - - t.Logf("Supervisor connected") - - var secondAgentID string - select { - case secondAgentID = <-agentIDChan: - case <-time.After(1 * time.Second): - t.Fatalf("failed to get second agent ID") - } - - require.Equal(t, firstAgentID, secondAgentID) -} - -func TestSupervisorPersistsNewInstanceID(t *testing.T) { - // Tests that an agent ID that is given from the server to the agent in an AgentIdentification message - // is properly persisted. - storageDir := t.TempDir() - - newID := "01HW3GS9NWD840C5C2BZS3KYPW" - - agentIDChan := make(chan string, 1) - server := newOpAMPServer( - t, - defaultConnectingHandler, - server.ConnectionCallbacksStruct{ - OnMessageFunc: func(_ context.Context, _ types.Connection, message *protobufs.AgentToServer) *protobufs.ServerToAgent { - - select { - case agentIDChan <- message.InstanceUid: - default: - } - - if message.InstanceUid != newID { - return &protobufs.ServerToAgent{ - InstanceUid: message.InstanceUid, - AgentIdentification: &protobufs.AgentIdentification{ - NewInstanceUid: newID, - }, - } - } - - return &protobufs.ServerToAgent{} - }, - }) - - s := newSupervisor(t, "basic", map[string]string{ - "url": server.addr, - "storage_dir": storageDir, - }) - - waitForSupervisorConnection(server.supervisorConnected, true) - - t.Logf("Supervisor connected") - - for id := range agentIDChan { - if id == newID { - t.Logf("Agent ID was changed to new ID") - break - } - } - - s.Shutdown() - - waitForSupervisorConnection(server.supervisorConnected, false) - - t.Logf("Supervisor disconnected") - - // Drain agent ID channel so we get a fresh ID from the new supervisor - select { - case <-agentIDChan: - default: - } - - s = newSupervisor(t, "basic", map[string]string{ - "url": server.addr, - "storage_dir": storageDir, - }) - defer s.Shutdown() - - waitForSupervisorConnection(server.supervisorConnected, true) - - t.Logf("Supervisor connected") - - var newRecievedAgentID string - select { - case newRecievedAgentID = <-agentIDChan: - case <-time.After(1 * time.Second): - t.Fatalf("failed to get second agent ID") - } - - require.Equal(t, newID, newRecievedAgentID) -} diff --git a/cmd/opampsupervisor/go.mod b/cmd/opampsupervisor/go.mod index 044d38d8bc2e..19ac4b90213c 100644 --- a/cmd/opampsupervisor/go.mod +++ b/cmd/opampsupervisor/go.mod @@ -17,7 +17,6 @@ require ( go.uber.org/goleak v1.3.0 go.uber.org/zap v1.27.0 google.golang.org/protobuf v1.34.1 - gopkg.in/yaml.v3 v3.0.1 ) require ( @@ -33,4 +32,5 @@ require ( go.uber.org/multierr v1.11.0 // indirect golang.org/x/net v0.23.0 // indirect golang.org/x/sys v0.18.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/cmd/opampsupervisor/supervisor/config/config.go b/cmd/opampsupervisor/supervisor/config/config.go index 00e8d6a3ec8d..415a7e59fd6b 100644 --- a/cmd/opampsupervisor/supervisor/config/config.go +++ b/cmd/opampsupervisor/supervisor/config/config.go @@ -5,9 +5,6 @@ package config import ( "net/http" - "os" - "path/filepath" - "runtime" "go.opentelemetry.io/collector/config/configtls" ) @@ -17,7 +14,7 @@ type Supervisor struct { Server *OpAMPServer Agent *Agent Capabilities *Capabilities `mapstructure:"capabilities"` - Storage Storage `mapstructure:"storage"` + Storage *Storage `mapstructure:"storage"` } type Storage struct { @@ -25,29 +22,6 @@ type Storage struct { Directory string `mapstructure:"directory"` } -// DirectoryOrDefault returns the configured storage directory if it was configured, -// otherwise it returns the system default. -func (s Storage) DirectoryOrDefault() string { - if s.Directory == "" { - switch runtime.GOOS { - case "windows": - // Windows default is "%ProgramData%\Otelcol\Supervisor" - // If the ProgramData environment variable is not set, - // it falls back to C:\ProgramData - programDataDir := os.Getenv("ProgramData") - if programDataDir == "" { - programDataDir = `C:\ProgramData` - } - return filepath.Join(programDataDir, "Otelcol", "Supervisor") - default: - // Default for non-windows systems - return "/var/lib/otelcol/supervisor" - } - } - - return s.Directory -} - // Capabilities is the set of capabilities that the Supervisor supports. type Capabilities struct { AcceptsRemoteConfig *bool `mapstructure:"accepts_remote_config"` diff --git a/cmd/opampsupervisor/supervisor/persistence.go b/cmd/opampsupervisor/supervisor/persistence.go deleted file mode 100644 index e3f4077adc09..000000000000 --- a/cmd/opampsupervisor/supervisor/persistence.go +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package supervisor - -import ( - "crypto/rand" - "errors" - "os" - "time" - - "github.com/oklog/ulid/v2" - "gopkg.in/yaml.v3" -) - -// persistentState represents persistent state for the supervisor -type persistentState struct { - InstanceID ulid.ULID `yaml:"instance_id"` - - // Path to the config file that the state should be saved to. - // This is not marshaled. - configPath string `yaml:"-"` -} - -func (p *persistentState) SetInstanceID(id ulid.ULID) error { - p.InstanceID = id - return p.writeState() -} - -func (p *persistentState) writeState() error { - by, err := yaml.Marshal(p) - if err != nil { - return err - } - - return os.WriteFile(p.configPath, by, 0600) -} - -// loadOrCreatePersistentState attempts to load the persistent state from disk. If it doesn't -// exist, a new persistent state file is created. -func loadOrCreatePersistentState(file string) (*persistentState, error) { - state, err := loadPersistentState(file) - switch { - case errors.Is(err, os.ErrNotExist): - return createNewPersistentState(file) - case err != nil: - return nil, err - default: - return state, nil - } -} - -func loadPersistentState(file string) (*persistentState, error) { - var state *persistentState - - by, err := os.ReadFile(file) - if err != nil { - return nil, err - } - - if err := yaml.Unmarshal(by, &state); err != nil { - return nil, err - } - - state.configPath = file - - return state, nil -} - -func createNewPersistentState(file string) (*persistentState, error) { - id, err := generateNewULID() - if err != nil { - return nil, err - } - - p := &persistentState{ - InstanceID: id, - configPath: file, - } - - return p, p.writeState() -} - -func generateNewULID() (ulid.ULID, error) { - entropy := ulid.Monotonic(rand.Reader, 0) - id, err := ulid.New(ulid.Timestamp(time.Now()), entropy) - if err != nil { - return ulid.ULID{}, err - } - - return id, nil -} diff --git a/cmd/opampsupervisor/supervisor/persistence_test.go b/cmd/opampsupervisor/supervisor/persistence_test.go deleted file mode 100644 index 77e4ba9eb0f9..000000000000 --- a/cmd/opampsupervisor/supervisor/persistence_test.go +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package supervisor - -import ( - "os" - "path/filepath" - "testing" - - "github.com/oklog/ulid/v2" - "github.com/stretchr/testify/require" -) - -func TestCreateOrLoadPersistentState(t *testing.T) { - t.Run("Creates a new state file if it does not exist", func(t *testing.T) { - f := filepath.Join(t.TempDir(), "state.yaml") - state, err := loadOrCreatePersistentState(f) - require.NoError(t, err) - - // instance ID should be populated - require.NotEqual(t, ulid.ULID{}, state.InstanceID) - require.FileExists(t, f) - }) - - t.Run("loads state from file if it exists", func(t *testing.T) { - f := filepath.Join(t.TempDir(), "state.yaml") - - err := os.WriteFile(f, []byte(`instance_id: "01HW3GS9NWD840C5C2BZS3KYPW"`), 0600) - require.NoError(t, err) - - state, err := loadOrCreatePersistentState(f) - require.NoError(t, err) - - // instance ID should be populated with value from file - require.Equal(t, ulid.MustParse("01HW3GS9NWD840C5C2BZS3KYPW"), state.InstanceID) - require.FileExists(t, f) - }) - -} - -func TestPersistentState_SetInstanceID(t *testing.T) { - f := filepath.Join(t.TempDir(), "state.yaml") - state, err := createNewPersistentState(f) - require.NoError(t, err) - - // instance ID should be populated - require.NotEqual(t, ulid.ULID{}, state.InstanceID) - require.FileExists(t, f) - - newULID := ulid.MustParse("01HW3GS9NWD840C5C2BZS3KYPW") - err = state.SetInstanceID(newULID) - require.NoError(t, err) - - require.Equal(t, newULID, state.InstanceID) - - // Test that loading the state after setting the instance ID has the new instance ID - loadedState, err := loadPersistentState(f) - require.NoError(t, err) - - require.Equal(t, newULID, loadedState.InstanceID) -} - -func TestGenerateNewULID(t *testing.T) { - // Test generating a new ULID twice returns 2 different results - id1, err := generateNewULID() - require.NoError(t, err) - - id2, err := generateNewULID() - require.NoError(t, err) - - require.NotEqual(t, id1, id2) -} diff --git a/cmd/opampsupervisor/supervisor/supervisor.go b/cmd/opampsupervisor/supervisor/supervisor.go index 47f58d66d888..096557179951 100644 --- a/cmd/opampsupervisor/supervisor/supervisor.go +++ b/cmd/opampsupervisor/supervisor/supervisor.go @@ -9,6 +9,7 @@ import ( _ "embed" "errors" "fmt" + "math/rand" "net" "net/http" "os" @@ -55,8 +56,6 @@ var ( lastRecvOwnMetricsConfigFile = "last_recv_own_metrics_config.dat" ) -const persistentStateFile = "persistent_state.yaml" - // Supervisor implements supervising of OpenTelemetry Collector and uses OpAMPClient // to work with an OpAMP Server. type Supervisor struct { @@ -76,8 +75,8 @@ type Supervisor struct { agentDescription *protobufs.AgentDescription - // Supervisor's persistent state - persistentState *persistentState + // Agent's instance id. + instanceID ulid.ULID bootstrapTemplate *template.Template extraConfigTemplate *template.Template @@ -108,7 +107,7 @@ type Supervisor struct { // The OpAMP client to connect to the OpAMP Server. opampClient client.OpAMPClient - doneChan chan struct{} + shuttingDown bool supervisorWG sync.WaitGroup agentHasStarted bool @@ -126,7 +125,6 @@ func NewSupervisor(logger *zap.Logger, configFile string) (*Supervisor, error) { agentConfigOwnMetricsSection: &atomic.Value{}, effectiveConfig: &atomic.Value{}, connectedToOpAMPServer: make(chan struct{}), - doneChan: make(chan struct{}), } if err := s.createTemplates(); err != nil { @@ -137,17 +135,13 @@ func NewSupervisor(logger *zap.Logger, configFile string) (*Supervisor, error) { return nil, fmt.Errorf("error loading config: %w", err) } - storageDir := s.config.Storage.DirectoryOrDefault() - if err := os.MkdirAll(storageDir, 0700); err != nil { - return nil, fmt.Errorf("error creating storage dir: %w", err) - } - - var err error - s.persistentState, err = loadOrCreatePersistentState(s.persistentStateFile()) + id, err := s.createInstanceID() if err != nil { return nil, err } + s.instanceID = id + if err = s.getBootstrapInfo(); err != nil { return nil, fmt.Errorf("could not get bootstrap info from the Collector: %w", err) } @@ -161,7 +155,7 @@ func NewSupervisor(logger *zap.Logger, configFile string) (*Supervisor, error) { s.agentHealthCheckEndpoint = fmt.Sprintf("localhost:%d", healthCheckPort) logger.Debug("Supervisor starting", - zap.String("id", s.persistentState.InstanceID.String())) + zap.String("id", s.instanceID.String())) s.loadAgentEffectiveConfig() @@ -239,7 +233,7 @@ func (s *Supervisor) getBootstrapInfo() (err error) { var cfg bytes.Buffer err = s.bootstrapTemplate.Execute(&cfg, map[string]any{ - "InstanceUid": s.persistentState.InstanceID.String(), + "InstanceUid": s.instanceID.String(), "SupervisorPort": supervisorPort, }) if err != nil { @@ -269,11 +263,11 @@ func (s *Supervisor) getBootstrapInfo() (err error) { if attr.Key == semconv.AttributeServiceInstanceID { // TODO: Consider whether to attempt restarting the Collector. // https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/29864 - if attr.Value.GetStringValue() != s.persistentState.InstanceID.String() { + if attr.Value.GetStringValue() != s.instanceID.String() { done <- fmt.Errorf( "the Collector's instance ID (%s) does not match with the instance ID set by the Supervisor (%s)", attr.Value.GetStringValue(), - s.persistentState.InstanceID.String()) + s.instanceID.String()) return } instanceIDSeen = true @@ -381,7 +375,7 @@ func (s *Supervisor) startOpAMP() error { OpAMPServerURL: s.config.Server.Endpoint, Header: s.config.Server.Headers, TLSConfig: tlsConfig, - InstanceUid: s.persistentState.InstanceID.String(), + InstanceUid: s.instanceID.String(), Callbacks: types.CallbacksStruct{ OnConnectFunc: func(_ context.Context) { s.connectedToOpAMPServer <- struct{}{} @@ -564,6 +558,19 @@ func (s *Supervisor) waitForOpAMPConnection() error { } } +// TODO: Persist instance ID. https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/21073 +func (s *Supervisor) createInstanceID() (ulid.ULID, error) { + entropy := ulid.Monotonic(rand.New(rand.NewSource(0)), 0) + id, err := ulid.New(ulid.Timestamp(time.Now()), entropy) + + if err != nil { + return ulid.ULID{}, err + } + + return id, nil + +} + func (s *Supervisor) composeExtraLocalConfig() []byte { var cfg bytes.Buffer resourceAttrs := map[string]string{} @@ -605,9 +612,10 @@ func (s *Supervisor) loadAgentEffectiveConfig() { s.effectiveConfig.Store(string(effectiveConfigBytes)) if s.config.Capabilities != nil && s.config.Capabilities.AcceptsRemoteConfig != nil && - *s.config.Capabilities.AcceptsRemoteConfig { + *s.config.Capabilities.AcceptsRemoteConfig && + s.config.Storage != nil { // Try to load the last received remote config if it exists. - lastRecvRemoteConfig, err = os.ReadFile(filepath.Join(s.config.Storage.DirectoryOrDefault(), lastRecvRemoteConfigFile)) + lastRecvRemoteConfig, err = os.ReadFile(filepath.Join(s.config.Storage.Directory, lastRecvRemoteConfigFile)) if err == nil { config := &protobufs.AgentRemoteConfig{} err = proto.Unmarshal(lastRecvRemoteConfig, config) @@ -624,9 +632,10 @@ func (s *Supervisor) loadAgentEffectiveConfig() { } if s.config.Capabilities != nil && s.config.Capabilities.ReportsOwnMetrics != nil && - *s.config.Capabilities.ReportsOwnMetrics { + *s.config.Capabilities.ReportsOwnMetrics && + s.config.Storage != nil { // Try to load the last received own metrics config if it exists. - lastRecvOwnMetricsConfig, err = os.ReadFile(filepath.Join(s.config.Storage.DirectoryOrDefault(), lastRecvOwnMetricsConfigFile)) + lastRecvOwnMetricsConfig, err = os.ReadFile(filepath.Join(s.config.Storage.Directory, lastRecvOwnMetricsConfigFile)) if err == nil { set := &protobufs.TelemetryConnectionSettings{} err = proto.Unmarshal(lastRecvOwnMetricsConfig, set) @@ -885,7 +894,6 @@ func (s *Supervisor) healthCheck() { func (s *Supervisor) runAgentProcess() { if _, err := os.Stat(s.effectiveConfigFilePath); err == nil { // We have an effective config file saved previously. Use it to start the agent. - s.logger.Debug("Effective config found, starting agent initial time") s.startAgent() } @@ -895,7 +903,6 @@ func (s *Supervisor) runAgentProcess() { for { select { case <-s.hasNewConfig: - s.logger.Debug("Restarting agent due to new config") restartTimer.Stop() s.stopAgentApplyConfig() s.startAgent() @@ -906,6 +913,10 @@ func (s *Supervisor) runAgentProcess() { continue } + if s.shuttingDown { + return + } + s.logger.Debug("Agent process exited unexpectedly. Will restart in a bit...", zap.Int("pid", s.commander.Pid()), zap.Int("exit_code", s.commander.ExitCode())) errMsg := fmt.Sprintf( "Agent process PID=%d exited unexpectedly, exit code=%d. Will restart in a bit...", @@ -930,18 +941,10 @@ func (s *Supervisor) runAgentProcess() { restartTimer.Reset(5 * time.Second) case <-restartTimer.C: - s.logger.Debug("Agent starting after start backoff") s.startAgent() case <-s.healthCheckTicker.C: s.healthCheck() - - case <-s.doneChan: - err := s.commander.Stop(context.Background()) - if err != nil { - s.logger.Error("Could not stop agent process", zap.Error(err)) - } - return } } } @@ -974,7 +977,14 @@ func (s *Supervisor) writeEffectiveConfigToFile(cfg string, filePath string) { func (s *Supervisor) Shutdown() { s.logger.Debug("Supervisor shutting down...") - close(s.doneChan) + s.shuttingDown = true + if s.commander != nil { + err := s.commander.Stop(context.Background()) + + if err != nil { + s.logger.Error("Could not stop agent process", zap.Error(err)) + } + } if s.opampClient != nil { err := s.opampClient.SetHealth( @@ -994,29 +1004,37 @@ func (s *Supervisor) Shutdown() { } } - s.supervisorWG.Wait() - if s.healthCheckTicker != nil { s.healthCheckTicker.Stop() } + + s.supervisorWG.Wait() } func (s *Supervisor) saveLastReceivedConfig(config *protobufs.AgentRemoteConfig) error { + if s.config.Storage == nil { + return nil + } + cfg, err := proto.Marshal(config) if err != nil { return err } - return os.WriteFile(filepath.Join(s.config.Storage.DirectoryOrDefault(), lastRecvRemoteConfigFile), cfg, 0600) + return os.WriteFile(filepath.Join(s.config.Storage.Directory, lastRecvRemoteConfigFile), cfg, 0600) } func (s *Supervisor) saveLastReceivedOwnTelemetrySettings(set *protobufs.TelemetryConnectionSettings, filePath string) error { + if s.config.Storage == nil { + return nil + } + cfg, err := proto.Marshal(set) if err != nil { return err } - return os.WriteFile(filepath.Join(s.config.Storage.DirectoryOrDefault(), filePath), cfg, 0600) + return os.WriteFile(filepath.Join(s.config.Storage.Directory, filePath), cfg, 0600) } func (s *Supervisor) onMessage(ctx context.Context, msg *types.MessageData) { @@ -1064,14 +1082,9 @@ func (s *Supervisor) onMessage(ctx context.Context, msg *types.MessageData) { } s.logger.Debug("Agent identity is changing", - zap.String("old_id", s.persistentState.InstanceID.String()), + zap.String("old_id", s.instanceID.String()), zap.String("new_id", newInstanceID.String())) - - err = s.persistentState.SetInstanceID(newInstanceID) - if err != nil { - s.logger.Error("Failed to persist new instance ID, instance ID will revert on restart.", zap.String("new_id", newInstanceID.String()), zap.Error(err)) - } - + s.instanceID = newInstanceID err = s.opampClient.SetAgentDescription(s.agentDescription) if err != nil { s.logger.Error("Failed to send agent description to OpAMP server") @@ -1095,10 +1108,6 @@ func (s *Supervisor) onMessage(ctx context.Context, msg *types.MessageData) { } } -func (s *Supervisor) persistentStateFile() string { - return filepath.Join(s.config.Storage.DirectoryOrDefault(), persistentStateFile) -} - func (s *Supervisor) findRandomPort() (int, error) { l, err := net.Listen("tcp", "localhost:0") diff --git a/cmd/opampsupervisor/testdata/supervisor/supervisor_accepts_conn.yaml b/cmd/opampsupervisor/testdata/supervisor/supervisor_accepts_conn.yaml index e86ab6cb25f0..0282577b252a 100644 --- a/cmd/opampsupervisor/testdata/supervisor/supervisor_accepts_conn.yaml +++ b/cmd/opampsupervisor/testdata/supervisor/supervisor_accepts_conn.yaml @@ -11,8 +11,5 @@ capabilities: reports_remote_config: true accepts_opamp_connection_settings: true -storage: - directory: "{{.storage_dir}}" - agent: executable: ../../bin/otelcontribcol_{{.goos}}_{{.goarch}}{{.extension}} diff --git a/cmd/opampsupervisor/testdata/supervisor/supervisor_basic.yaml b/cmd/opampsupervisor/testdata/supervisor/supervisor_basic.yaml index 75490189b904..7e4b0a08536c 100644 --- a/cmd/opampsupervisor/testdata/supervisor/supervisor_basic.yaml +++ b/cmd/opampsupervisor/testdata/supervisor/supervisor_basic.yaml @@ -11,8 +11,5 @@ capabilities: reports_remote_config: true accepts_restart_command: true -storage: - directory: "{{.storage_dir}}" - agent: executable: ../../bin/otelcontribcol_{{.goos}}_{{.goarch}}{{.extension}} diff --git a/cmd/opampsupervisor/testdata/supervisor/supervisor_nocap.yaml b/cmd/opampsupervisor/testdata/supervisor/supervisor_nocap.yaml index ca0d9378887d..5b61ad4b1b68 100644 --- a/cmd/opampsupervisor/testdata/supervisor/supervisor_nocap.yaml +++ b/cmd/opampsupervisor/testdata/supervisor/supervisor_nocap.yaml @@ -10,8 +10,5 @@ capabilities: accepts_remote_config: false reports_remote_config: false -storage: - directory: "{{.storage_dir}}" - agent: executable: ../../bin/otelcontribcol_{{.goos}}_{{.goarch}}{{.extension}} diff --git a/cmd/opampsupervisor/testdata/supervisor/supervisor_test.yaml b/cmd/opampsupervisor/testdata/supervisor/supervisor_test.yaml index 68881cd4970c..bdcdc2e72c93 100644 --- a/cmd/opampsupervisor/testdata/supervisor/supervisor_test.yaml +++ b/cmd/opampsupervisor/testdata/supervisor/supervisor_test.yaml @@ -10,8 +10,5 @@ capabilities: accepts_remote_config: true reports_remote_config: true -storage: - directory: "{{.storage_dir}}" - agent: executable: ../../bin/otelcontribcol_darwin_arm64