Skip to content

Commit

Permalink
[internal/splunk] Splunk metric names should be validated via regex (o…
Browse files Browse the repository at this point in the history
…pen-telemetry#34291)

**Description:** For the splunk HEC receiver, add additional criteria
for the metric names. As per
https://docs.splunk.com/Documentation/Splunk/9.2.1/Metrics/Overview#What_is_a_metric_data_point.3F,
metric names can only consist of specific characters. Validates these
requirements to be in line with other Splunk receivers.

**Link to tracking Issue:**
[open-telemetry#34275](open-telemetry#34275)

---------

Co-authored-by: Dmitrii Anoshin <anoshindx@gmail.com>
  • Loading branch information
spiffyy99 and dmitryax authored Jul 30, 2024
1 parent ff71b24 commit f01613a
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 4 deletions.
27 changes: 27 additions & 0 deletions .chloggen/splunk_enforce_metrics_naming_regex.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: receiver/splunkhec

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add a regex to enforce metrics naming for Splunk events fields based on metrics documentation.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [34275]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
25 changes: 23 additions & 2 deletions internal/splunk/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package splunk // import "github.com/open-telemetry/opentelemetry-collector-cont

import (
"encoding/json"
"regexp"
"strconv"
"strings"
)
Expand All @@ -31,6 +32,14 @@ const (
DefaultRawPath = "/services/collector/raw"
DefaultHealthPath = "/services/collector/health"
DefaultAckPath = "/services/collector/ack"

// https://docs.splunk.com/Documentation/Splunk/9.2.1/Metrics/Overview#What_is_a_metric_data_point.3F
// metric name can contain letters, numbers, underscore, dot or colon. cannot start with number or underscore, or contain metric_name
metricNamePattern = "^metric_name:([A-Za-z\\.:][A-Za-z0-9_\\.:]*)$"
)

var (
metricNameRegexp = regexp.MustCompile(metricNamePattern)
)

// AccessTokenPassthroughConfig configures passing through access tokens.
Expand All @@ -55,6 +64,18 @@ func (e *Event) IsMetric() bool {
return e.Event == HecEventMetricType || (e.Event == nil && len(e.GetMetricValues()) > 0)
}

// checks if the field name matches the requirements for a metric datapoint field,
// and returns the metric name and a bool indicating whether the field is a metric.
func getMetricNameFromField(fieldName string) (string, bool) {
// only consider metric name if it fits regex criteria.
// use matches[1] since first element contains entire string.
// first subgroup will be the actual metric name.
if matches := metricNameRegexp.FindStringSubmatch(fieldName); len(matches) > 1 {
return matches[1], !strings.Contains(matches[1], "metric_name")
}
return "", false
}

// GetMetricValues extracts metric key value pairs from a Splunk HEC metric.
func (e *Event) GetMetricValues() map[string]any {
if v, ok := e.Fields["metric_name"]; ok {
Expand All @@ -63,8 +84,8 @@ func (e *Event) GetMetricValues() map[string]any {

values := map[string]any{}
for k, v := range e.Fields {
if strings.HasPrefix(k, "metric_name:") {
values[k[12:]] = v
if metricName, ok := getMetricNameFromField(k); ok {
values[metricName] = v
}
}
return values
Expand Down
15 changes: 13 additions & 2 deletions internal/splunk/common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,21 @@ func TestGetValues(t *testing.T) {
Fields: map[string]any{},
}
assert.Equal(t, map[string]any{}, metric.GetMetricValues())
metric.Fields["metric_name:x"] = "y"
assert.Equal(t, map[string]any{"x": "y"}, metric.GetMetricValues())
metric.Fields["metric_name:foo"] = "bar"
assert.Equal(t, map[string]any{"foo": "bar"}, metric.GetMetricValues())
assert.Equal(t, map[string]any{"x": "y", "foo": "bar"}, metric.GetMetricValues())
metric.Fields["metric_name:foo2"] = "foobar"
assert.Equal(t, map[string]any{"foo": "bar", "foo2": "foobar"}, metric.GetMetricValues())
assert.Equal(t, map[string]any{"x": "y", "foo": "bar", "foo2": "foobar"}, metric.GetMetricValues())
metric.Fields["metric_name:foo:123_456.Bar"] = "quux"
assert.Equal(t, map[string]any{"x": "y", "foo": "bar", "foo2": "foobar", "foo:123_456.Bar": "quux"}, metric.GetMetricValues())
// fields that aren't allowed
metric.Fields["metric_name:foo bar"] = "baz" // contains space
metric.Fields["metric_name:foo?"] = "baz" // illegal character
metric.Fields["metric_name:1stfoo"] = "baz" // starts with number
metric.Fields["metric_name:_foo"] = "baz" // starts with underscore
metric.Fields["metric_name:foo_metric_name:bar"] = "baz" // name contains "metric_name"
assert.Equal(t, map[string]any{"x": "y", "foo": "bar", "foo2": "foobar", "foo:123_456.Bar": "quux"}, metric.GetMetricValues())
}

func TestSingleValue(t *testing.T) {
Expand Down

0 comments on commit f01613a

Please sign in to comment.