Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VAULT-17555: Secret sync client metrics #25713

Merged
merged 5 commits into from
Mar 1, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
add metrics for secret sync clients
  • Loading branch information
miagilepner committed Feb 29, 2024
commit 14391588209789acbb8f4b9c19f92c929efca2d3
1 change: 1 addition & 0 deletions helper/metricsutil/wrapped_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ type TelemetryConstConfig struct {
}

type Metrics interface {
SetGauge(key []string, val float32)
SetGaugeWithLabels(key []string, val float32, labels []Label)
IncrCounterWithLabels(key []string, val float32, labels []Label)
AddSampleWithLabels(key []string, val float32, labels []Label)
Expand Down
33 changes: 27 additions & 6 deletions vault/activity_log.go
Original file line number Diff line number Diff line change
Expand Up @@ -2247,7 +2247,6 @@ func (a *ActivityLog) writePrecomputedQuery(ctx context.Context, segmentTime tim

// the byNamespace map also needs to be transformed into a protobuf
pq.Namespaces = a.transformALNamespaceBreakdowns(opts.byNamespace)

err := a.queryStore.Put(ctx, pq)
if err != nil {
a.logger.Warn("failed to store precomputed query", "error", err)
Expand Down Expand Up @@ -2316,11 +2315,27 @@ func (a *ActivityLog) segmentToPrecomputedQuery(ctx context.Context, segmentTime
a.breakdownTokenSegment(token, opts.byNamespace)
}

// write metrics
// handle metrics reporting
a.reportPrecomputedQueryMetrics(ctx, segmentTime, opts)

// convert the maps to the proper format and write them as precomputed queries
return a.writePrecomputedQuery(ctx, segmentTime, opts)
}

func (a *ActivityLog) reportPrecomputedQueryMetrics(ctx context.Context, segmentTime time.Time, opts pqOptions) {
if segmentTime != opts.activePeriodEnd && segmentTime != opts.activePeriodStart {
return
}
// we don't want to introduce any new namespaced metrics. For secret sync
// (and all newer client types) we'll instead keep maps of the total
summedMetricsMonthly := make(map[string]int)
summedMetricsReporting := make(map[string]int)

for nsID, entry := range opts.byNamespace {
// If this is the most recent month, or the start of the reporting period, output
// a metric for each namespace.
if segmentTime == opts.activePeriodEnd {
switch segmentTime {
case opts.activePeriodEnd:
a.metrics.SetGaugeWithLabels(
[]string{"identity", "entity", "active", "monthly"},
float32(entry.Counts.countByType(entityActivityType)),
Expand All @@ -2335,7 +2350,8 @@ func (a *ActivityLog) segmentToPrecomputedQuery(ctx context.Context, segmentTime
{Name: "namespace", Value: a.namespaceToLabel(ctx, nsID)},
},
)
} else if segmentTime == opts.activePeriodStart {
summedMetricsMonthly[secretSyncActivityType] += entry.Counts.countByType(secretSyncActivityType)
case opts.activePeriodStart:
a.metrics.SetGaugeWithLabels(
[]string{"identity", "entity", "active", "reporting_period"},
float32(entry.Counts.countByType(entityActivityType)),
Expand All @@ -2350,11 +2366,16 @@ func (a *ActivityLog) segmentToPrecomputedQuery(ctx context.Context, segmentTime
{Name: "namespace", Value: a.namespaceToLabel(ctx, nsID)},
},
)
summedMetricsReporting[secretSyncActivityType] += entry.Counts.countByType(secretSyncActivityType)
}
}

// convert the maps to the proper format and write them as precomputed queries
return a.writePrecomputedQuery(ctx, segmentTime, opts)
for clientType, count := range summedMetricsMonthly {
a.metrics.SetGauge([]string{"identity", strings.ReplaceAll(clientType, "-", ""), "active", "monthly"}, float32(count))
}
for clientType, count := range summedMetricsReporting {
a.metrics.SetGauge([]string{"identity", strings.ReplaceAll(clientType, "-", ""), "active", "reporting_period"}, float32(count))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you consider substituting _ for rather than removing hyphens? I think our metrics often (though probably not consistentlty) use underscores for multi-word terms -- for example reporting_period in the same metric name here. I have a very gentle preference towards identity.non_entity.active.reporting_period rather than identity.nonentity.active.reporting_period on consistency grounds. But not a big deal if you disagree.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also prefer underscores. We already have the metric nonentity and so I was trying to make secret syncs follow the same pattern, but there's no reason why it needs to. I'll change it.

}
}

// goroutine to process the request in the intent log, creating precomputed queries.
Expand Down
116 changes: 116 additions & 0 deletions vault/activity_log_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"testing"
"time"

"github.com/armon/go-metrics"
"github.com/axiomhq/hyperloglog"
"github.com/go-test/deep"
"github.com/golang/protobuf/proto"
Expand Down Expand Up @@ -4841,3 +4842,118 @@ func TestAddActivityToFragment(t *testing.T) {
})
}
}

// TestActivityLog_reportPrecomputedQueryMetrics creates 3 clients per type and
// calls reportPrecomputedQueryMetrics. The test verifies that the metric sink
// gets metrics reported correctly, based on the segment time matching the
// active period start or end
func TestActivityLog_reportPrecomputedQueryMetrics(t *testing.T) {
core, _, _, metricsSink := TestCoreUnsealedWithMetrics(t)
a := core.activityLog
byMonth := make(summaryByMonth)
byNS := make(summaryByNamespace)
segmentTime := time.Now()

// for each client type, make 3 clients in their own namespaces
for i := 0; i < 3; i++ {
for _, clientType := range []string{secretSyncActivityType, nonEntityTokenActivityType, entityActivityType} {
client := &activity.EntityRecord{
ClientID: fmt.Sprintf("%s-%d", clientType, i),
NamespaceID: fmt.Sprintf("ns-%d", i),
MountAccessor: fmt.Sprintf("mnt-%d", i),
ClientType: clientType,
NonEntity: clientType == nonEntityTokenActivityType,
}
processClientRecord(client, byNS, byMonth, segmentTime)
}
}
endTime := timeutil.EndOfMonth(segmentTime)
opts := pqOptions{
byNamespace: byNS,
byMonth: byMonth,
endTime: endTime,
}

otherTime := segmentTime.Add(time.Hour)

hasNoMetric := func(t *testing.T, gauges map[string]metrics.GaugeValue, name string) {
t.Helper()
for _, metric := range gauges {
if metric.Name == name {
require.Fail(t, "metric found", name)
}
}
}
hasMetric := func(t *testing.T, gauges map[string]metrics.GaugeValue, name string, value float32, namespaceLabel *string) {
t.Helper()
fullMetric := fmt.Sprintf("%s;cluster=test-cluster", name)
if namespaceLabel != nil {
fullMetric = fmt.Sprintf("%s;namespace=%s;cluster=test-cluster", name, *namespaceLabel)
}
require.Contains(t, gauges, fullMetric)
metric := gauges[fullMetric]
require.Equal(t, value, metric.Value)
}

testCases := []struct {
name string
activePeriodEnd time.Time
activePeriodStart time.Time
}{
{
name: "monthly metric",
activePeriodEnd: segmentTime,
activePeriodStart: otherTime,
},
{
name: "reporting period metric",
activePeriodEnd: otherTime,
activePeriodStart: segmentTime,
},
{
name: "no metric",
activePeriodEnd: otherTime,
activePeriodStart: otherTime,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
opts.activePeriodStart = tc.activePeriodStart
opts.activePeriodEnd = tc.activePeriodEnd

a.reportPrecomputedQueryMetrics(context.Background(), segmentTime, opts)

data := metricsSink.Data()
gauges := data[len(data)-1].Gauges

switch segmentTime {
case opts.activePeriodEnd:
// expect the metrics ending with "monthly"
// the namespace was never registered in core, so it'll be
// reported with a "deleted-" prefix
for i := 0; i < 3; i++ {
ns := fmt.Sprintf("deleted-ns-%d", i)
hasMetric(t, gauges, "identity.entity.active.monthly", 1, &ns)
hasMetric(t, gauges, "identity.nonentity.active.monthly", 1, &ns)
}
// secret sync metrics should be the sum of clients across all
// namespaces
hasMetric(t, gauges, "identity.secretsync.active.monthly", 3, nil)
case opts.activePeriodStart:
for i := 0; i < 3; i++ {
ns := fmt.Sprintf("deleted-ns-%d", i)
hasMetric(t, gauges, "identity.entity.active.reporting_period", 1, &ns)
hasMetric(t, gauges, "identity.nonentity.active.reporting_period", 1, &ns)
}
hasMetric(t, gauges, "identity.secretsync.active.reporting_period", 3, nil)
default:
hasNoMetric(t, gauges, "identity.entity.active.monthly")
hasNoMetric(t, gauges, "identity.nonentity.active.monthly")
hasNoMetric(t, gauges, "identity.secretsync.active.monthly")
hasNoMetric(t, gauges, "identity.entity.active.reporting_period")
hasNoMetric(t, gauges, "identity.entity.active.reporting_period")
hasNoMetric(t, gauges, "identity.secretsync.active.reporting_period")
}
})
}
}
Loading