Skip to content

Commit

Permalink
Have x509_svid_cache_max_size as agent config setting (not experime…
Browse files Browse the repository at this point in the history
…ntal) (#5531)

Signed-off-by: Agustín Martínez Fayó <amartinezfayo@gmail.com>
  • Loading branch information
amartinezfayo authored Oct 2, 2024
1 parent 26b2d93 commit 110ec0c
Show file tree
Hide file tree
Showing 14 changed files with 118 additions and 48 deletions.
6 changes: 6 additions & 0 deletions cmd/spire-agent/cli/run/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ type agentConfig struct {
AllowUnauthenticatedVerifiers bool `hcl:"allow_unauthenticated_verifiers"`
AllowedForeignJWTClaims []string `hcl:"allowed_foreign_jwt_claims"`
AvailabilityTarget string `hcl:"availability_target"`
X509SVIDCacheMaxSize int `hcl:"x509_svid_cache_max_size"`

AuthorizedDelegates []string `hcl:"authorized_delegates"`

Expand Down Expand Up @@ -494,6 +495,11 @@ func NewAgentConfig(c *Config, logOptions []log.Option, allowUnknownConfig bool)
ac.LogReopener = log.ReopenOnSignal(logger, reopenableFile)
}

if c.Agent.X509SVIDCacheMaxSize < 0 {
return nil, errors.New("x509_svid_cache_max_size should not be negative")
}
ac.X509SVIDCacheMaxSize = c.Agent.X509SVIDCacheMaxSize

td, err := common_cli.ParseTrustDomain(c.Agent.TrustDomain, logger)
if err != nil {
return nil, err
Expand Down
36 changes: 36 additions & 0 deletions cmd/spire-agent/cli/run/run_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -897,6 +897,42 @@ func TestNewAgentConfig(t *testing.T) {
require.Nil(t, c)
},
},
{
msg: "x509_svid_cache_max_size is set",
input: func(c *Config) {
c.Agent.X509SVIDCacheMaxSize = 100
},
test: func(t *testing.T, c *agent.Config) {
require.EqualValues(t, 100, c.X509SVIDCacheMaxSize)
},
},
{
msg: "x509_svid_cache_max_size is not set",
input: func(c *Config) {
},
test: func(t *testing.T, c *agent.Config) {
require.EqualValues(t, 0, c.X509SVIDCacheMaxSize)
},
},
{
msg: "x509_svid_cache_max_size is zero",
input: func(c *Config) {
c.Agent.X509SVIDCacheMaxSize = 0
},
test: func(t *testing.T, c *agent.Config) {
require.EqualValues(t, 0, c.X509SVIDCacheMaxSize)
},
},
{
msg: "x509_svid_cache_max_size is negative",
expectError: true,
input: func(c *Config) {
c.Agent.X509SVIDCacheMaxSize = -10
},
test: func(t *testing.T, c *agent.Config) {
require.Nil(t, c)
},
},
{
msg: "allowed_foreign_jwt_claims provided",
input: func(c *Config) {
Expand Down
1 change: 1 addition & 0 deletions doc/spire_agent.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ This may be useful for templating configuration files, for example across differ
| `trust_domain` | The trust domain that this agent belongs to (should be no more than 255 characters) | |
| `workload_x509_svid_key_type` | The workload X509 SVID key type &lt;rsa-2048&vert;ec-p256&gt; | ec-p256 |
| `availability_target` | The minimum amount of time desired to gracefully handle SPIRE Server or Agent downtime. This configurable influences how aggressively X509 SVIDs should be rotated. If set, must be at least 24h. See [Availability Target](#availability-target) | |
| `x509_svid_cache_max_size` | Soft limit of max number of SVIDs that would be stored in LRU cache | 1000 |

| experimental | Description | Default |
|:------------------------------|--------------------------------------------------------------------------------------|-------------------------|
Expand Down
1 change: 1 addition & 0 deletions pkg/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ func (a *Agent) newManager(ctx context.Context, sto storage.Storage, cat catalog
Storage: sto,
SyncInterval: a.c.SyncInterval,
UseSyncAuthorizedEntries: a.c.UseSyncAuthorizedEntries,
SVIDCacheMaxSize: a.c.X509SVIDCacheMaxSize,
SVIDStoreCache: cache,
NodeAttestor: na,
RotationStrategy: rotationutil.NewRotationStrategy(a.c.AvailabilityTarget),
Expand Down
2 changes: 1 addition & 1 deletion pkg/agent/api/delegatedidentity/v1/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -996,7 +996,7 @@ func (m *FakeManager) SubscribeToBundleChanges() *cache.BundleStream {

func newTestCache() *cache.LRUCache {
log, _ := test.NewNullLogger()
return cache.NewLRUCache(log, trustDomain1, bundle1, telemetry.Blackhole{}, clock.New())
return cache.NewLRUCache(log, trustDomain1, bundle1, telemetry.Blackhole{}, cache.DefaultSVIDCacheMaxSize, clock.New())
}

func generateSubscribeToX509SVIDMetrics() []fakemetrics.MetricItem {
Expand Down
3 changes: 3 additions & 0 deletions pkg/agent/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ type Config struct {
// is used to sync entries from the server.
UseSyncAuthorizedEntries bool

// X509SVIDCacheMaxSize is a soft limit of max number of SVIDs that would be stored in cache
X509SVIDCacheMaxSize int

// Trust domain and associated CA bundle
TrustDomain spiffeid.TrustDomain
TrustBundle []*x509.Certificate
Expand Down
22 changes: 15 additions & 7 deletions pkg/agent/manager/cache/lru_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ import (
)

const (
// SVIDCacheMaxSize is the size for the cache
SVIDCacheMaxSize = 1000
// DefaultSVIDCacheMaxSize is set when svidCacheMaxSize is not provided
DefaultSVIDCacheMaxSize = 1000
// SVIDSyncInterval is the interval at which SVIDs are synced with subscribers
SVIDSyncInterval = 500 * time.Millisecond
// Default batch size for processing tainted SVIDs
Expand Down Expand Up @@ -139,14 +139,21 @@ type LRUCache struct {
// svids are stored by entry IDs
svids map[string]*X509SVID

// svidCacheMaxSize is a soft limit of max number of SVIDs that would be stored in cache
svidCacheMaxSize int

subscribeBackoffFn func() backoff.BackOff

processingBatchSize int
// used to debug scheduled batchs for tainted authorities
taintedBatchProcessedCh chan struct{}
}

func NewLRUCache(log logrus.FieldLogger, trustDomain spiffeid.TrustDomain, bundle *Bundle, metrics telemetry.Metrics, clk clock.Clock) *LRUCache {
func NewLRUCache(log logrus.FieldLogger, trustDomain spiffeid.TrustDomain, bundle *Bundle, metrics telemetry.Metrics, svidCacheMaxSize int, clk clock.Clock) *LRUCache {
if svidCacheMaxSize <= 0 {
svidCacheMaxSize = DefaultSVIDCacheMaxSize
}

return &LRUCache{
BundleCache: NewBundleCache(trustDomain, bundle),
JWTSVIDCache: NewJWTSVIDCache(),
Expand All @@ -160,8 +167,9 @@ func NewLRUCache(log logrus.FieldLogger, trustDomain spiffeid.TrustDomain, bundl
bundles: map[spiffeid.TrustDomain]*spiffebundle.Bundle{
trustDomain: bundle,
},
svids: make(map[string]*X509SVID),
clk: clk,
svids: make(map[string]*X509SVID),
svidCacheMaxSize: svidCacheMaxSize,
clk: clk,
subscribeBackoffFn: func() backoff.BackOff {
return backoff.NewBackoff(clk, SVIDSyncInterval)
},
Expand Down Expand Up @@ -433,7 +441,7 @@ func (c *LRUCache) UpdateEntries(update *UpdateEntries, checkSVID func(*common.R
// entries with active subscribers which are not cached will be put in staleEntries map;
// irrespective of what svid cache size as we cannot deny identity to a subscriber
activeSubsByEntryID, recordsWithLastAccessTime := c.syncSVIDsWithSubscribers()
extraSize := len(c.svids) - SVIDCacheMaxSize
extraSize := len(c.svids) - c.svidCacheMaxSize

// delete svids without subscribers and which have not been accessed since svidCacheExpiryTime
if extraSize > 0 {
Expand Down Expand Up @@ -772,7 +780,7 @@ func (c *LRUCache) syncSVIDsWithSubscribers() (map[string]struct{}, []recordAcce
lastAccessTimestamps = append(lastAccessTimestamps, newRecordAccessEvent(record.lastAccessTimestamp, id))
}

remainderSize := SVIDCacheMaxSize - len(c.svids)
remainderSize := c.svidCacheMaxSize - len(c.svids)
// add records which are not cached for remainder of cache size
for id := range c.records {
if len(c.staleEntries) >= remainderSize {
Expand Down
70 changes: 42 additions & 28 deletions pkg/agent/manager/cache/lru_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,8 @@ func TestLRUCacheSubscriberNotNotifiedOnOverlappingSVIDChanges(t *testing.T) {

func TestLRUCacheSVIDCacheExpiry(t *testing.T) {
clk := clock.NewMock(t)
cache := newTestLRUCacheWithConfig(clk)
svidCacheMaxSize := 10
cache := newTestLRUCacheWithConfig(svidCacheMaxSize, clk)

clk.Add(1 * time.Second)
foo := makeRegistrationEntry("FOO", "A")
Expand Down Expand Up @@ -703,7 +704,7 @@ func TestLRUCacheSVIDCacheExpiry(t *testing.T) {
// Move clk by 2 seconds
clk.Add(2 * time.Second)
// update total of size+2 entries
updateEntries := createUpdateEntries(SVIDCacheMaxSize, makeBundles(bundleV1))
updateEntries := createUpdateEntries(svidCacheMaxSize, makeBundles(bundleV1))
updateEntries.RegistrationEntries[foo.EntryId] = foo
updateEntries.RegistrationEntries[bar.EntryId] = bar

Expand All @@ -720,10 +721,10 @@ func TestLRUCacheSVIDCacheExpiry(t *testing.T) {
sub.Finish()
}
}
assert.Equal(t, SVIDCacheMaxSize+2, cache.CountX509SVIDs())
assert.Equal(t, svidCacheMaxSize+2, cache.CountX509SVIDs())

cache.UpdateEntries(updateEntries, nil)
assert.Equal(t, SVIDCacheMaxSize, cache.CountX509SVIDs())
assert.Equal(t, svidCacheMaxSize, cache.CountX509SVIDs())

// foo SVID should be removed from cache as it does not have active subscriber
assert.False(t, cache.notifySubscriberIfSVIDAvailable(makeSelectors("A"), subA.(*lruCacheSubscriber)))
Expand All @@ -739,24 +740,25 @@ func TestLRUCacheSVIDCacheExpiry(t *testing.T) {
require.Len(t, cache.GetStaleEntries(), 1)
assert.Equal(t, foo, cache.GetStaleEntries()[0].Entry)

assert.Equal(t, SVIDCacheMaxSize, cache.CountX509SVIDs())
assert.Equal(t, svidCacheMaxSize, cache.CountX509SVIDs())
}

func TestLRUCacheMaxSVIDCacheSize(t *testing.T) {
clk := clock.NewMock(t)
cache := newTestLRUCacheWithConfig(clk)
svidCacheMaxSize := 10
cache := newTestLRUCacheWithConfig(svidCacheMaxSize, clk)

// create entries more than maxSvidCacheSize
updateEntries := createUpdateEntries(SVIDCacheMaxSize+2, makeBundles(bundleV1))
// create entries more than svidCacheMaxSize
updateEntries := createUpdateEntries(svidCacheMaxSize+2, makeBundles(bundleV1))
cache.UpdateEntries(updateEntries, nil)

require.Len(t, cache.GetStaleEntries(), SVIDCacheMaxSize)
require.Len(t, cache.GetStaleEntries(), svidCacheMaxSize)

cache.UpdateSVIDs(&UpdateSVIDs{
X509SVIDs: makeX509SVIDsFromStaleEntries(cache.GetStaleEntries()),
})
require.Len(t, cache.GetStaleEntries(), 0)
assert.Equal(t, SVIDCacheMaxSize, cache.CountX509SVIDs())
assert.Equal(t, svidCacheMaxSize, cache.CountX509SVIDs())

// Validate that active subscriber will still get SVID even if SVID count is at maxSvidCacheSize
foo := makeRegistrationEntry("FOO", "A")
Expand All @@ -767,25 +769,26 @@ func TestLRUCacheMaxSVIDCacheSize(t *testing.T) {

cache.UpdateEntries(updateEntries, nil)
require.Len(t, cache.GetStaleEntries(), 1)
assert.Equal(t, SVIDCacheMaxSize, cache.CountX509SVIDs())
assert.Equal(t, svidCacheMaxSize, cache.CountX509SVIDs())

cache.UpdateSVIDs(&UpdateSVIDs{
X509SVIDs: makeX509SVIDs(foo),
})
assert.Equal(t, SVIDCacheMaxSize+1, cache.CountX509SVIDs())
assert.Equal(t, svidCacheMaxSize+1, cache.CountX509SVIDs())
require.Len(t, cache.GetStaleEntries(), 0)
}

func TestSyncSVIDsWithSubscribers(t *testing.T) {
clk := clock.NewMock(t)
cache := newTestLRUCacheWithConfig(clk)
svidCacheMaxSize := 5
cache := newTestLRUCacheWithConfig(svidCacheMaxSize, clk)

updateEntries := createUpdateEntries(SVIDCacheMaxSize, makeBundles(bundleV1))
updateEntries := createUpdateEntries(svidCacheMaxSize, makeBundles(bundleV1))
cache.UpdateEntries(updateEntries, nil)
cache.UpdateSVIDs(&UpdateSVIDs{
X509SVIDs: makeX509SVIDsFromStaleEntries(cache.GetStaleEntries()),
})
assert.Equal(t, SVIDCacheMaxSize, cache.CountX509SVIDs())
assert.Equal(t, svidCacheMaxSize, cache.CountX509SVIDs())

// Update foo but its SVID is not yet cached
foo := makeRegistrationEntry("FOO", "A")
Expand All @@ -803,7 +806,7 @@ func TestSyncSVIDsWithSubscribers(t *testing.T) {
require.Len(t, cache.GetStaleEntries(), 1)
assert.Equal(t, []*StaleEntry{{Entry: cache.records[foo.EntryId].entry}}, cache.GetStaleEntries())

assert.Equal(t, SVIDCacheMaxSize, cache.CountX509SVIDs())
assert.Equal(t, svidCacheMaxSize, cache.CountX509SVIDs())
}

func TestNotifySubscriberWhenSVIDIsAvailable(t *testing.T) {
Expand All @@ -828,12 +831,13 @@ func TestNotifySubscriberWhenSVIDIsAvailable(t *testing.T) {

func TestSubscribeToWorkloadUpdatesLRUNoSelectors(t *testing.T) {
clk := clock.NewMock(t)
cache := newTestLRUCacheWithConfig(clk)
svidCacheMaxSize := 1
cache := newTestLRUCacheWithConfig(svidCacheMaxSize, clk)

// Creating test entries, but this will not affect current test...
foo := makeRegistrationEntry("FOO", "A")
bar := makeRegistrationEntry("BAR", "B")
updateEntries := createUpdateEntries(SVIDCacheMaxSize, makeBundles(bundleV1))
updateEntries := createUpdateEntries(svidCacheMaxSize, makeBundles(bundleV1))
updateEntries.RegistrationEntries[foo.EntryId] = foo
updateEntries.RegistrationEntries[bar.EntryId] = bar
cache.UpdateEntries(updateEntries, nil)
Expand Down Expand Up @@ -874,7 +878,7 @@ func TestSubscribeToWorkloadUpdatesLRUNoSelectors(t *testing.T) {
<-subWaitCh
cache.SyncSVIDsWithSubscribers()

assert.Len(t, cache.GetStaleEntries(), SVIDCacheMaxSize)
assert.Len(t, cache.GetStaleEntries(), svidCacheMaxSize)
cache.UpdateSVIDs(&UpdateSVIDs{
X509SVIDs: makeX509SVIDs(foo, bar),
})
Expand All @@ -890,7 +894,7 @@ func TestSubscribeToWorkloadUpdatesLRUNoSelectors(t *testing.T) {

func TestSubscribeToLRUCacheChanges(t *testing.T) {
clk := clock.NewMock(t)
cache := newTestLRUCacheWithConfig(clk)
cache := newTestLRUCacheWithConfig(1, clk)

foo := makeRegistrationEntry("FOO", "A")
bar := makeRegistrationEntry("BAR", "B")
Expand Down Expand Up @@ -979,7 +983,7 @@ func TestTaintX509SVIDs(t *testing.T) {
batchProcessedCh := make(chan struct{}, 1)

// Initialize cache with configuration
cache := newTestLRUCacheWithConfig(clk)
cache := newTestLRUCacheWithConfig(10, clk)
cache.processingBatchSize = 4
cache.log = log
cache.taintedBatchProcessedCh = batchProcessedCh
Expand Down Expand Up @@ -1130,7 +1134,7 @@ func TestTaintX509SVIDsNoSVIDs(t *testing.T) {
log.Level = logrus.DebugLevel

// Initialize cache with configuration
cache := newTestLRUCacheWithConfig(clk)
cache := newTestLRUCacheWithConfig(10, clk)
cache.log = log

entries := createTestEntries(10)
Expand Down Expand Up @@ -1216,9 +1220,19 @@ func TestMetrics(t *testing.T) {
}

func TestNewLRUCache(t *testing.T) {
// expected cache size
cache := newTestLRUCacheWithConfig(clock.NewMock(t))
require.NotNil(t, cache)
// Negative for value for svidCacheMaxSize should set default value in
// cache.svidCacheMaxSize
cache := newTestLRUCacheWithConfig(-5, clock.NewMock(t))
require.Equal(t, DefaultSVIDCacheMaxSize, cache.svidCacheMaxSize)

// Zero for value for svidCacheMaxSize should set default value in
// cache.svidCacheMaxSize
cache = newTestLRUCacheWithConfig(0, clock.NewMock(t))
require.Equal(t, DefaultSVIDCacheMaxSize, cache.svidCacheMaxSize)

// Custom value for svidCacheMaxSize should propagate properly
cache = newTestLRUCacheWithConfig(55, clock.NewMock(t))
require.Equal(t, 55, cache.svidCacheMaxSize)
}

func BenchmarkLRUCacheGlobalNotification(b *testing.B) {
Expand Down Expand Up @@ -1269,12 +1283,12 @@ func BenchmarkLRUCacheGlobalNotification(b *testing.B) {
func newTestLRUCache(t testing.TB) *LRUCache {
log, _ := test.NewNullLogger()
return NewLRUCache(log, spiffeid.RequireTrustDomainFromString("domain.test"), bundleV1,
telemetry.Blackhole{}, clock.NewMock(t))
telemetry.Blackhole{}, 0, clock.NewMock(t))
}

func newTestLRUCacheWithConfig(clk clock.Clock) *LRUCache {
func newTestLRUCacheWithConfig(svidCacheMaxSize int, clk clock.Clock) *LRUCache {
log, _ := test.NewNullLogger()
return NewLRUCache(log, trustDomain1, bundleV1, telemetry.Blackhole{}, clk)
return NewLRUCache(log, trustDomain1, bundleV1, telemetry.Blackhole{}, svidCacheMaxSize, clk)
}

// numEntries should not be more than 12 digits
Expand Down
2 changes: 1 addition & 1 deletion pkg/agent/manager/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func newManager(c *Config) *manager {
}

cache := managerCache.NewLRUCache(c.Log.WithField(telemetry.SubsystemName, telemetry.CacheManager), c.TrustDomain, c.Bundle,
c.Metrics, c.Clk)
c.Metrics, c.SVIDCacheMaxSize, c.Clk)

rotCfg := &svid.RotatorConfig{
SVIDKeyManager: keymanager.ForSVID(c.Catalog.GetKeyManager()),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/bin/bash

# LRU Cache size is 1000; we expect uid:1001 to receive all 1002 identities,
# and later on disconnect for the cache to be pruned back to 1000
SIZE=1002
# LRU Cache size is 8; we expect uid:1001 to receive all 10 identities,
# and later on disconnect for the cache to be pruned back to 8
SIZE=10

# Create entries for uid 1001
for ((m=1;m<=$SIZE;m++)); do
Expand Down
6 changes: 3 additions & 3 deletions test/integration/suites/fetch-x509-svids/05-fetch-x509-svids
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash

ENTRYCOUNT=1002
CACHESIZE=1000
ENTRYCOUNT=10
CACHESIZE=8

X509SVIDCOUNT=$(docker compose exec -u 1001 -T spire-agent \
/opt/spire/bin/spire-agent api fetch x509 \
Expand All @@ -14,4 +14,4 @@ else
fi

# Call agent debug endpoints and check if extra X.509-SVIDs from cache are cleaned up
check-x509-svid-count "spire-agent" 1000
check-x509-svid-count "spire-agent" $CACHESIZE
Loading

0 comments on commit 110ec0c

Please sign in to comment.