Skip to content

Commit

Permalink
Added exponential retry to the domain cache
Browse files Browse the repository at this point in the history
  • Loading branch information
jakobht committed Feb 18, 2025
1 parent 4b30b13 commit ac53f8f
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 3 deletions.
11 changes: 10 additions & 1 deletion common/cache/domainCache.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"time"

"github.com/uber/cadence/common"
"github.com/uber/cadence/common/backoff"
"github.com/uber/cadence/common/clock"
"github.com/uber/cadence/common/cluster"
"github.com/uber/cadence/common/errors"
Expand Down Expand Up @@ -119,6 +120,8 @@ type (
callbackLock sync.Mutex
prepareCallbacks map[int]PrepareCallbackFn
callbacks map[int]CallbackFn

throttleRetry *backoff.ThrottleRetry
}

// DomainCacheEntries is DomainCacheEntry slice
Expand Down Expand Up @@ -160,6 +163,11 @@ func NewDomainCache(
opts ...DomainCacheOption,
) *DefaultDomainCache {

throttleRetry := backoff.NewThrottleRetry(
backoff.WithRetryPolicy(common.CreateDomainCacheRetryPolicy()),
backoff.WithRetryableError(common.IsServiceTransientError),
)

cache := &DefaultDomainCache{
status: domainCacheInitialized,
shutdownChan: make(chan struct{}),
Expand All @@ -172,6 +180,7 @@ func NewDomainCache(
logger: logger,
prepareCallbacks: make(map[int]PrepareCallbackFn),
callbacks: make(map[int]CallbackFn),
throttleRetry: throttleRetry,
}
cache.cacheNameToID.Store(newDomainCache())
cache.cacheByID.Store(newDomainCache())
Expand Down Expand Up @@ -428,7 +437,7 @@ func (c *DefaultDomainCache) refreshLoop() {
func (c *DefaultDomainCache) refreshDomains() error {
c.refreshLock.Lock()
defer c.refreshLock.Unlock()
return c.refreshDomainsLocked()
return c.throttleRetry.Do(context.Background(), c.refreshDomainsLocked)
}

// this function only refresh the domains in the v2 table
Expand Down
24 changes: 22 additions & 2 deletions common/cache/domainCache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -921,14 +921,34 @@ func (s *domainCacheSuite) Test_refreshDomainsLocked_IntervalTooShort() {
s.NoError(err)
}

func (s *domainCacheSuite) Test_refreshDomainsLocked_ListDomainsError() {
func (s *domainCacheSuite) Test_refreshDomains_ListDomainsNonRetryableError() {
s.metadataMgr.On("GetMetadata", mock.Anything).Return(&persistence.GetMetadataResponse{NotificationVersion: 0}, nil).Once()
s.metadataMgr.On("ListDomains", mock.Anything, mock.Anything).Return(nil, assert.AnError).Once()

err := s.domainCache.refreshDomainsLocked()
err := s.domainCache.refreshDomains()
s.ErrorIs(err, assert.AnError)
}

func (s *domainCacheSuite) Test_refreshDomains_ListDomainsRetryableError() {
retryableError := &types.ServiceBusyError{
Message: "Service is busy",
}

// We expect the metadataMgr to be called twice, once for the initial attempt and once for the retry
s.metadataMgr.On("GetMetadata", mock.Anything).Return(&persistence.GetMetadataResponse{NotificationVersion: 0}, nil).Times(2)

// First time return retryable error
s.metadataMgr.On("ListDomains", mock.Anything, mock.Anything).Return(nil, retryableError).Once()

// Second time return non-retryable error
s.metadataMgr.On("ListDomains", mock.Anything, mock.Anything).Return(nil, assert.AnError).Once()

err := s.domainCache.refreshDomains()

// We expect the error to be the first error
s.ErrorIs(err, retryableError)
}

func (s *domainCacheSuite) TestDomainCacheEntry_Getters() {
gen := testdatagen.New(s.T())

Expand Down
13 changes: 13 additions & 0 deletions common/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ const (
taskCompleterMaxInterval = 10 * time.Second
taskCompleterExpirationInterval = 5 * time.Minute

domainCacheInitialInterval = 1 * time.Second
domainCacheMaxInterval = 20 * time.Second
domainCacheExpirationInterval = 2 * time.Minute

contextExpireThreshold = 10 * time.Millisecond

// FailureReasonCompleteResultExceedsLimit is failureReason for complete result exceeds limit
Expand Down Expand Up @@ -228,6 +232,15 @@ func CreateTaskCompleterRetryPolicy() backoff.RetryPolicy {
return policy
}

// CreateDomainCacheRetryPolicy creates a retry policy to handle domain cache refresh failures
func CreateDomainCacheRetryPolicy() backoff.RetryPolicy {
policy := backoff.NewExponentialRetryPolicy(domainCacheInitialInterval)
policy.SetMaximumInterval(domainCacheMaxInterval)
policy.SetExpirationInterval(domainCacheExpirationInterval)

return policy
}

// IsValidIDLength checks if id is valid according to its length
func IsValidIDLength(
id string,
Expand Down

0 comments on commit ac53f8f

Please sign in to comment.