Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[cmd/opampsupervisor]: Allow configuring agent description #32819

Merged
13 changes: 13 additions & 0 deletions .chloggen/feat_supervisor-agent-description.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: opampsupervisor

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Adds the ability to configure the agent description

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [32644]
91 changes: 91 additions & 0 deletions cmd/opampsupervisor/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,97 @@ func TestSupervisorBootstrapsCollector(t *testing.T) {
}, 5*time.Second, 250*time.Millisecond)
}

func TestSupervisorAgentDescriptionConfigApplies(t *testing.T) {
// Load the Supervisor config so we can get the location of
// the Collector that will be run.
var cfg config.Supervisor
cfgFile := getSupervisorConfig(t, "agent_description", map[string]string{})
k := koanf.New("::")
err := k.Load(file.Provider(cfgFile.Name()), yaml.Parser())
require.NoError(t, err)
err = k.UnmarshalWithConf("", &cfg, koanf.UnmarshalConf{
Tag: "mapstructure",
})
require.NoError(t, err)

host, err := os.Hostname()
require.NoError(t, err)

// Get the binary name and version from the Collector binary
// using the `components` command that prints a YAML-encoded
// map of information about the Collector build. Some of this
// information will be used as defaults for the telemetry
// attributes.
agentPath := cfg.Agent.Executable
componentsInfo, err := exec.Command(agentPath, "components").Output()
require.NoError(t, err)
k = koanf.New("::")
err = k.Load(rawbytes.Provider(componentsInfo), yaml.Parser())
require.NoError(t, err)
buildinfo := k.StringMap("buildinfo")
command := buildinfo["command"]
version := buildinfo["version"]

agentDescMessageChan := make(chan *protobufs.AgentToServer, 1)

server := newOpAMPServer(
t,
defaultConnectingHandler,
server.ConnectionCallbacksStruct{
OnMessageFunc: func(_ context.Context, _ types.Connection, message *protobufs.AgentToServer) *protobufs.ServerToAgent {
if message.AgentDescription != nil {
select {
case agentDescMessageChan <- message:
default:
}
}

return &protobufs.ServerToAgent{}
},
})

s := newSupervisor(t, "agent_description", map[string]string{"url": server.addr})
defer s.Shutdown()

waitForSupervisorConnection(server.supervisorConnected, true)
var ad *protobufs.AgentToServer
select {
case ad = <-agentDescMessageChan:
case <-time.After(5 * time.Second):
t.Fatal("Failed to get agent description after 5 seconds")
}

expectedDescription := &protobufs.AgentDescription{
IdentifyingAttributes: []*protobufs.KeyValue{
stringKeyValue("client.id", "my-client-id"),
stringKeyValue(semconv.AttributeServiceInstanceID, ad.InstanceUid),
stringKeyValue(semconv.AttributeServiceName, command),
stringKeyValue(semconv.AttributeServiceVersion, version),
},
NonIdentifyingAttributes: []*protobufs.KeyValue{
stringKeyValue("env", "prod"),
stringKeyValue(semconv.AttributeHostArch, runtime.GOARCH),
stringKeyValue(semconv.AttributeHostName, host),
stringKeyValue(semconv.AttributeOSType, runtime.GOOS),
},
}

require.Equal(t, expectedDescription, ad.AgentDescription)

time.Sleep(250 * time.Millisecond)
Copy link
Contributor Author

@BinaryFissionGames BinaryFissionGames May 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without this sleep, the supervisor gets stuck shutting down.

This is the same race I had to address in #32618 in order to get tests to run properly for that PR. This can be removed once the fix from that PR is merged.

}

func stringKeyValue(key, val string) *protobufs.KeyValue {
return &protobufs.KeyValue{
Key: key,
Value: &protobufs.AnyValue{
Value: &protobufs.AnyValue_StringValue{
StringValue: val,
},
},
}
}

// Creates a Collector config that reads and writes logs to files and provides
// file descriptors for I/O operations to those files. The files are placed
// in a unique temp directory that is cleaned up after the test's completion.
Expand Down
14 changes: 13 additions & 1 deletion cmd/opampsupervisor/specification/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ storage:
# and %ProgramData%/Otelcol/Supervisor on Windows.
directory: /path/to/dir

collector:
agent:
# Path to Collector executable. Required.
executable: /opt/otelcol/bin/otelcol

Expand All @@ -144,6 +144,18 @@ collector:
deny: \[/var/log/secret_logs\]
write:
allow: \[/var/otelcol\]

# Optional key-value pairs to add to either the identifying attributes or
# non-identifying attributes of the agent description sent to the OpAMP server.
# Values here override the values in the agent description retrieved from the collector's
# OpAMP extension.
BinaryFissionGames marked this conversation as resolved.
Show resolved Hide resolved
# The service.instance.id identifying attribute may not be overridden.
description:
identifying_attributes:
client.id: "01HWWSK84BMT7J45663MBJMTPJ"
non_identifying_attributes:
custom.attribute: "custom-value"

```

### Executing Collector
Expand Down
32 changes: 31 additions & 1 deletion cmd/opampsupervisor/supervisor/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
package config

import (
"fmt"
"net/http"

"go.opentelemetry.io/collector/config/configtls"
semconv "go.opentelemetry.io/collector/semconv/v1.21.0"
)

// Supervisor is the Supervisor config file format.
Expand All @@ -17,6 +19,14 @@ type Supervisor struct {
Storage *Storage `mapstructure:"storage"`
}

func (s *Supervisor) Validate() error {
if s.Agent != nil {
return s.Agent.Validate()
}

return nil
}

type Storage struct {
// Directory is the directory where the Supervisor will store its data.
Directory string `mapstructure:"directory"`
Expand All @@ -40,5 +50,25 @@ type OpAMPServer struct {
}

type Agent struct {
Executable string
Executable string
Description AgentDescription `mapstructure:"description"`
}

func (a *Agent) Validate() error {
return a.Description.Validate()
}

type AgentDescription struct {
IdentifyingAttributes map[string]string `mapstructure:"identifying_attributes"`
NonIdentifyingAttributes map[string]string `mapstructure:"non_identifying_attributes"`
}

func (a *AgentDescription) Validate() error {
for k := range a.IdentifyingAttributes {
// Don't allow overriding the instance ID attribute
if k == semconv.AttributeServiceInstanceID {
return fmt.Errorf("cannot override identifying attribute %q", semconv.AttributeServiceInstanceID)
}
}
return nil
}
BinaryFissionGames marked this conversation as resolved.
Show resolved Hide resolved
87 changes: 87 additions & 0 deletions cmd/opampsupervisor/supervisor/config/config_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package config

import (
"net/http"
"testing"

"github.com/stretchr/testify/require"
"go.opentelemetry.io/collector/config/configtls"
semconv "go.opentelemetry.io/collector/semconv/v1.21.0"
)

func TestValidate(t *testing.T) {
testCases := []struct {
name string
config Supervisor
expectedError string
}{
{
name: "Empty Config is valid",
config: Supervisor{},
},
{
name: "Valid filled out config",
config: Supervisor{
Server: &OpAMPServer{
Endpoint: "wss://localhost:9090/opamp",
Headers: http.Header{
"Header1": []string{"HeaderValue"},
},
TLSSetting: configtls.ClientConfig{
Insecure: true,
},
},
Agent: &Agent{
Executable: "../../otelcol",
Description: AgentDescription{
IdentifyingAttributes: map[string]string{
"client.id": "some-client-id",
},
NonIdentifyingAttributes: map[string]string{
"env": "dev",
},
},
},
Capabilities: &Capabilities{
AcceptsRemoteConfig: asPtr(true),
},
Storage: &Storage{
Directory: "/etc/opamp-supervisor/storage",
},
},
},
{
name: "Cannot override instance ID",
config: Supervisor{
Agent: &Agent{
Executable: "../../otelcol",
Description: AgentDescription{
IdentifyingAttributes: map[string]string{
semconv.AttributeServiceInstanceID: "instance-id",
},
},
},
},
expectedError: `cannot override identifying attribute "service.instance.id"`,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
err := tc.config.Validate()

if tc.expectedError == "" {
require.NoError(t, err)
} else {
require.ErrorContains(t, err, tc.expectedError)
}
})
}
}

func asPtr[T any](t T) *T {
return &t
}
51 changes: 50 additions & 1 deletion cmd/opampsupervisor/supervisor/supervisor.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,10 @@ func (s *Supervisor) loadConfig(configFile string) error {
return fmt.Errorf("cannot parse %v: %w", configFile, err)
}

if err := s.config.Validate(); err != nil {
return fmt.Errorf("invalid config: %w", err)
}

return nil
}

Expand Down Expand Up @@ -256,7 +260,7 @@ func (s *Supervisor) getBootstrapInfo() (err error) {
onMessageFunc: func(_ serverTypes.Connection, message *protobufs.AgentToServer) {
if message.AgentDescription != nil {
instanceIDSeen := false
s.agentDescription = message.AgentDescription
s.setAgentDescription(message.AgentDescription)
identAttr := s.agentDescription.IdentifyingAttributes

for _, attr := range identAttr {
Expand Down Expand Up @@ -431,6 +435,51 @@ func (s *Supervisor) startOpAMP() error {
return nil
}

// setAgentDescription sets the agent description, merging in any user-specified attributes from the supervisor configuration.
func (s *Supervisor) setAgentDescription(ad *protobufs.AgentDescription) {
ad.IdentifyingAttributes = applyKeyValueOverrides(s.config.Agent.Description.IdentifyingAttributes, ad.IdentifyingAttributes)
ad.NonIdentifyingAttributes = applyKeyValueOverrides(s.config.Agent.Description.NonIdentifyingAttributes, ad.NonIdentifyingAttributes)
s.agentDescription = ad
}

// applyKeyValueOverrides merges the overrides map into the array of key value pairs.
// If a key from overrides already exists in the array of key value pairs, it is overwritten by the value from the overrides map.
// An array of KeyValue pair is returned, with each key value pair having a distinct key.
func applyKeyValueOverrides(overrides map[string]string, orig []*protobufs.KeyValue) []*protobufs.KeyValue {
kvMap := make(map[string]*protobufs.KeyValue, len(orig)+len(overrides))

for _, kv := range orig {
kvMap[kv.Key] = kv
}

for k, v := range overrides {
kvMap[k] = &protobufs.KeyValue{
Key: k,
Value: &protobufs.AnyValue{
Value: &protobufs.AnyValue_StringValue{
StringValue: v,
},
},
}
}

// Sort keys for stable output, makes it easier to test.
keys := make([]string, 0, len(kvMap))
for k := range kvMap {
keys = append(keys, k)
}

sort.Strings(keys)

kvOut := make([]*protobufs.KeyValue, 0, len(kvMap))
for _, k := range keys {
v := kvMap[k]
kvOut = append(kvOut, v)
}

return kvOut
}

func (s *Supervisor) stopOpAMP() error {
s.logger.Debug("Stopping OpAMP client...")
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
server:
endpoint: ws://{{.url}}/v1/opamp
tls:
insecure: true

capabilities:
reports_effective_config: true
reports_own_metrics: true
reports_health: true
accepts_remote_config: true
reports_remote_config: true
accepts_restart_command: true

agent:
executable: ../../bin/otelcontribcol_{{.goos}}_{{.goarch}}{{.extension}}
description:
identifying_attributes:
client.id: "my-client-id"
non_identifying_attributes:
env: "prod"
Loading