From 2ec9ca89d1f540d813651b0f0c518d50a6d1ebd9 Mon Sep 17 00:00:00 2001 From: gordonhu7 Date: Wed, 12 Jul 2023 08:35:55 -0400 Subject: [PATCH] add lru cache metrics to telemetry package --- pkg/agent/manager/cache/lru_cache.go | 19 ++++------ pkg/agent/manager/cache/lru_cache_test.go | 28 +++++++------- pkg/common/telemetry/config.go | 4 ++ pkg/common/telemetry/lru.go | 46 +++++++++++++++++++++++ pkg/common/telemetry/names.go | 15 ++++++++ 5 files changed, 86 insertions(+), 26 deletions(-) create mode 100644 pkg/common/telemetry/lru.go diff --git a/pkg/agent/manager/cache/lru_cache.go b/pkg/agent/manager/cache/lru_cache.go index 5f2ebdd875f..e276c52a424 100644 --- a/pkg/agent/manager/cache/lru_cache.go +++ b/pkg/agent/manager/cache/lru_cache.go @@ -18,11 +18,6 @@ import ( const ( DefaultSVIDCacheMaxSize = 1000 SVIDSyncInterval = 500 * time.Millisecond - SVIDMapSize = "lru_cache_svid_map_size" - RecordMapSize = "lru_cache_record_map_size" - EntryAdded = "lru_cache_entry_add" - EntryUpdated = "lru_cache_entry_update" - EntryRemoved = "lru_cache_entry_remove" ) // Cache caches each registration entry, bundles, and JWT SVIDs for the agent. @@ -88,7 +83,7 @@ type LRUCache struct { trustDomain spiffeid.TrustDomain clk clock.Clock - metrics telemetry.Metrics + metrics *telemetry.LRUMetrics mu sync.RWMutex @@ -123,7 +118,7 @@ func NewLRUCache(log logrus.FieldLogger, trustDomain spiffeid.TrustDomain, bundl JWTSVIDCache: NewJWTSVIDCache(), log: log, - metrics: metrics, + metrics: telemetry.NewLRUMetrics(&telemetry.LRUConfig{MetricsImpl: metrics}), trustDomain: trustDomain, records: make(map[string]*lruCacheRecord), selectors: make(map[selector]*selectorsMapIndex), @@ -226,7 +221,7 @@ func (c *LRUCache) NewSubscriber(selectors []*common.Selector) Subscriber { // updated through a call to UpdateSVIDs. func (c *LRUCache) UpdateEntries(update *UpdateEntries, checkSVID func(*common.RegistrationEntry, *common.RegistrationEntry, *X509SVID) bool) { c.mu.Lock() - defer func() { c.metrics.SetGauge([]string{RecordMapSize}, float32(c.CountRecords())) }() + defer func() { c.metrics.SetEntriesMapSize(c.CountRecords()) }() defer c.mu.Unlock() // Remove bundles that no longer exist. The bundle for the agent trust @@ -298,7 +293,7 @@ func (c *LRUCache) UpdateEntries(update *UpdateEntries, checkSVID func(*common.R delete(c.staleEntries, id) } } - c.metrics.IncrCounter([]string{EntryRemoved}, float32(entriesRemoved)) + c.metrics.IncrementEntriesRemoved(entriesRemoved) outdatedEntries := make(map[string]struct{}) entriesUpdated := 0 @@ -391,8 +386,8 @@ func (c *LRUCache) UpdateEntries(update *UpdateEntries, checkSVID func(*common.R } } } - c.metrics.IncrCounter([]string{EntryUpdated}, float32(entriesUpdated)) - c.metrics.IncrCounter([]string{EntryAdded}, float32(entriesCreated)) + c.metrics.IncrementEntriesAdded(entriesCreated) + c.metrics.IncrementEntriesUpdated(entriesUpdated) // entries with active subscribers which are not cached will be put in staleEntries map; // irrespective of what svid cache size as we cannot deny identity to a subscriber @@ -444,7 +439,7 @@ func (c *LRUCache) UpdateEntries(update *UpdateEntries, checkSVID func(*common.R func (c *LRUCache) UpdateSVIDs(update *UpdateSVIDs) { c.mu.Lock() - defer func() { c.metrics.SetGauge([]string{SVIDMapSize}, float32(c.CountSVIDs())) }() + defer func() { c.metrics.SetSVIDMapSize(c.CountSVIDs()) }() defer c.mu.Unlock() // Allocate a set of selectors that diff --git a/pkg/agent/manager/cache/lru_cache_test.go b/pkg/agent/manager/cache/lru_cache_test.go index d68f202e38c..24ecc8dfc36 100644 --- a/pkg/agent/manager/cache/lru_cache_test.go +++ b/pkg/agent/manager/cache/lru_cache_test.go @@ -889,7 +889,7 @@ func TestSubscribeToLRUCacheChanges(t *testing.T) { func TestMetrics(t *testing.T) { cache := newTestLRUCache(t) fakeMetrics := fakemetrics.New() - cache.metrics = fakeMetrics + cache.metrics = telemetry.NewLRUMetrics(&telemetry.LRUConfig{MetricsImpl: fakeMetrics}) foo := makeRegistrationEntry("FOO", "A") bar := makeRegistrationEntry("BAR", "B") @@ -911,19 +911,19 @@ func TestMetrics(t *testing.T) { cache.UpdateEntries(updateEntries, nil) assert.Equal(t, []fakemetrics.MetricItem{ - {Type: fakemetrics.IncrCounterType, Key: []string{EntryRemoved}, Val: 0}, - {Type: fakemetrics.IncrCounterType, Key: []string{EntryUpdated}, Val: 0}, - {Type: fakemetrics.IncrCounterType, Key: []string{EntryAdded}, Val: 2}, - {Type: fakemetrics.SetGaugeType, Key: []string{RecordMapSize}, Val: 2}, - {Type: fakemetrics.SetGaugeType, Key: []string{SVIDMapSize}, Val: 1}, - {Type: fakemetrics.IncrCounterType, Key: []string{EntryRemoved}, Val: 1}, - {Type: fakemetrics.IncrCounterType, Key: []string{EntryUpdated}, Val: 1}, - {Type: fakemetrics.IncrCounterType, Key: []string{EntryAdded}, Val: 0}, - {Type: fakemetrics.SetGaugeType, Key: []string{RecordMapSize}, Val: 1}, - {Type: fakemetrics.IncrCounterType, Key: []string{EntryRemoved}, Val: 0}, - {Type: fakemetrics.IncrCounterType, Key: []string{EntryUpdated}, Val: 1}, - {Type: fakemetrics.IncrCounterType, Key: []string{EntryAdded}, Val: 1}, - {Type: fakemetrics.SetGaugeType, Key: []string{RecordMapSize}, Val: 2}, + {Type: fakemetrics.IncrCounterType, Key: []string{telemetry.EntryRemoved}, Val: 0}, + {Type: fakemetrics.IncrCounterType, Key: []string{telemetry.EntryAdded}, Val: 2}, + {Type: fakemetrics.IncrCounterType, Key: []string{telemetry.EntryUpdated}, Val: 0}, + {Type: fakemetrics.SetGaugeType, Key: []string{telemetry.RecordMapSize}, Val: 2}, + {Type: fakemetrics.SetGaugeType, Key: []string{telemetry.SVIDMapSize}, Val: 1}, + {Type: fakemetrics.IncrCounterType, Key: []string{telemetry.EntryRemoved}, Val: 1}, + {Type: fakemetrics.IncrCounterType, Key: []string{telemetry.EntryAdded}, Val: 0}, + {Type: fakemetrics.IncrCounterType, Key: []string{telemetry.EntryUpdated}, Val: 1}, + {Type: fakemetrics.SetGaugeType, Key: []string{telemetry.RecordMapSize}, Val: 1}, + {Type: fakemetrics.IncrCounterType, Key: []string{telemetry.EntryRemoved}, Val: 0}, + {Type: fakemetrics.IncrCounterType, Key: []string{telemetry.EntryAdded}, Val: 1}, + {Type: fakemetrics.IncrCounterType, Key: []string{telemetry.EntryUpdated}, Val: 1}, + {Type: fakemetrics.SetGaugeType, Key: []string{telemetry.RecordMapSize}, Val: 2}, }, fakeMetrics.AllMetrics()) } diff --git a/pkg/common/telemetry/config.go b/pkg/common/telemetry/config.go index 698915c5229..3912056a17c 100644 --- a/pkg/common/telemetry/config.go +++ b/pkg/common/telemetry/config.go @@ -52,3 +52,7 @@ type M3Config struct { type InMem struct { UnusedKeyPositions map[string][]token.Pos `hcl:",unusedKeyPositions"` } + +type LRUConfig struct { + MetricsImpl Metrics +} diff --git a/pkg/common/telemetry/lru.go b/pkg/common/telemetry/lru.go new file mode 100644 index 00000000000..992a041542b --- /dev/null +++ b/pkg/common/telemetry/lru.go @@ -0,0 +1,46 @@ +package telemetry + +import ( + "sync" +) + +type LRUMetrics struct { + metrics Metrics + mu sync.Mutex +} + +func NewLRUMetrics(c *LRUConfig) *LRUMetrics { + return &LRUMetrics{ + metrics: c.MetricsImpl, + } +} + +func (c *LRUMetrics) IncrementEntriesAdded(entriesAdded int) { + c.mu.Lock() + defer c.mu.Unlock() + c.metrics.IncrCounter([]string{EntryAdded}, float32(entriesAdded)) +} + +func (c *LRUMetrics) IncrementEntriesUpdated(entriesUpdated int) { + c.mu.Lock() + defer c.mu.Unlock() + c.metrics.IncrCounter([]string{EntryUpdated}, float32(entriesUpdated)) +} + +func (c *LRUMetrics) IncrementEntriesRemoved(entriesRemoved int) { + c.mu.Lock() + defer c.mu.Unlock() + c.metrics.IncrCounter([]string{EntryRemoved}, float32(entriesRemoved)) +} + +func (c *LRUMetrics) SetEntriesMapSize(recordMapSize int) { + c.mu.Lock() + defer c.mu.Unlock() + c.metrics.SetGauge([]string{RecordMapSize}, float32(recordMapSize)) +} + +func (c *LRUMetrics) SetSVIDMapSize(svidMapSize int) { + c.mu.Lock() + defer c.mu.Unlock() + c.metrics.SetGauge([]string{SVIDMapSize}, float32(svidMapSize)) +} diff --git a/pkg/common/telemetry/names.go b/pkg/common/telemetry/names.go index 715daf71766..c48e8f06413 100644 --- a/pkg/common/telemetry/names.go +++ b/pkg/common/telemetry/names.go @@ -263,6 +263,15 @@ const ( // ElapsedTime tags some duration of time. ElapsedTime = "elapsed_time" + // EntryAdded is the counter key for when a entry is added to LRU cache + EntryAdded = "lru_cache_entry_add" + + // EntryRemoved is the counter key for when a entry is removed from LRU cache + EntryRemoved = "lru_cache_entry_remove" + + // EntryUpdated is the counter key for when an LRU cache entry is updated + EntryUpdated = "lru_cache_entry_update" + // EndpointSpiffeID tags endpoint SPIFFE ID EndpointSpiffeID = "endpoint_spiffe_id" @@ -415,6 +424,9 @@ const ( // ReceivedUID is like Received, specific to uid. ReceivedUID = "received_uid" + // RecordMapSize is the gauge key to hold the size of the LRU cache entries map + RecordMapSize = "lru_cache_record_map_size" + // RefreshHint tags a bundle refresh hint RefreshHint = "refresh_hint" @@ -493,6 +505,9 @@ const ( // with other tags to add clarity Subject = "subject" + // SVIDMapSize is the gauge key for the size of the LRU cache SVID map + SVIDMapSize = "lru_cache_svid_map_size" + // SVIDResponseLatency tags latency for SVID response SVIDResponseLatency = "svid_response_latency"