Skip to content

Commit

Permalink
Add retry logic to UCP GetAWSResourceWithPost handler (#8170)
Browse files Browse the repository at this point in the history
# Description

We've seen flaky functional test failures with AWS S3:
#5963

This PR adds retries to the handler that I think is causing this 404
error.

* Add `pkg/retry` directory for standard retries
* Use `pkg/retry` in UCP `GetAWSResourceWithPost` handler

## Type of change

<!--

Please select **one** of the following options that describes your
change and delete the others. Clearly identifying the type of change you
are making will help us review your PR faster, and is used in authoring
release notes.

If you are making a bug fix or functionality change to Radius and do not
have an associated issue link please create one now.

-->

- This pull request fixes a bug in Radius and has an approved issue
(issue link required).
- This pull request adds or changes features of Radius and has an
approved issue (issue link required).
- This pull request is a minor refactor, code cleanup, test improvement,
or other maintenance task and doesn't change the functionality of Radius
(issue link optional).

<!--

Please update the following to link the associated issue. This is
required for some kinds of changes (see above).

-->

Fixes: #7352

## Contributor checklist
Please verify that the PR meets the following requirements, where
applicable:

- [ ] An overview of proposed schema changes is included in a linked
GitHub issue.
- [ ] A design document PR is created in the [design-notes
repository](https://github.com/radius-project/design-notes/), if new
APIs are being introduced.
- [ ] If applicable, design document has been reviewed and approved by
Radius maintainers/approvers.
- [ ] A PR for the [samples
repository](https://github.com/radius-project/samples) is created, if
existing samples are affected by the changes in this PR.
- [ ] A PR for the [documentation
repository](https://github.com/radius-project/docs) is created, if the
changes in this PR affect the documentation or any user facing updates
are made.
- [ ] A PR for the [recipes
repository](https://github.com/radius-project/recipes) is created, if
existing recipes are affected by the changes in this PR.

---------

Signed-off-by: willdavsmith <willdavsmith@gmail.com>
  • Loading branch information
willdavsmith authored Jan 27, 2025
1 parent a1782fc commit e4db991
Show file tree
Hide file tree
Showing 11 changed files with 324 additions and 22 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ require (
github.com/sagikazarmark/locafero v0.6.0 // indirect
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect
github.com/sethvargo/go-retry v0.3.0 // indirect
github.com/skeema/knownhosts v1.3.0 // indirect
github.com/sourcegraph/conc v0.3.0 // indirect
github.com/tidwall/gjson v1.18.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1405,6 +1405,8 @@ github.com/sahilm/fuzzy v0.1.1 h1:ceu5RHF8DGgoi+/dR5PsECjCDH1BE3Fnmpo7aVXOdRA=
github.com/sahilm/fuzzy v0.1.1/go.mod h1:VFvziUEIMCrT6A6tw2RFIXPXXmzXbOsSHF0DOI8ZK9Y=
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8=
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4=
github.com/sethvargo/go-retry v0.3.0 h1:EEt31A35QhrcRZtrYFDTBg91cqZVnFL2navjDrah2SE=
github.com/sethvargo/go-retry v0.3.0/go.mod h1:mNX17F0C/HguQMyMyJxcnU471gOZGxCLyYaFyAZraas=
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
Expand Down
82 changes: 82 additions & 0 deletions pkg/retry/retry.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package retry

import (
"context"
"time"

"github.com/sethvargo/go-retry"
)

const (
defaultInterval = 1 * time.Second
defaultMaxRetries = 10
defaultMaxDuration = 60 * time.Second
)

// RetryConfig is the configuration for a retry operation.
type RetryConfig struct {
// BackoffStrategy is the backoff strategy to use.
BackoffStrategy retry.Backoff
}

// Retryer is a utility for retrying functions.
type Retryer struct {
config *RetryConfig
}

// NewNoOpRetryer creates a new Retryer that does not retry.
// This is useful for testing.
func NewNoOpRetryer() *Retryer {
b := retry.NewConstant(1 * time.Second)
b = retry.WithMaxRetries(0, b)

return NewRetryer(&RetryConfig{
BackoffStrategy: b,
})
}

// DefaultBackoffStrategy returns the default backoff strategy.
// The default backoff strategy is an exponential backoff with a maximum duration and maximum retries.
func DefaultBackoffStrategy() retry.Backoff {
b := retry.NewExponential(1 * time.Second)
b = retry.WithMaxDuration(defaultMaxDuration, b)
b = retry.WithMaxRetries(defaultMaxRetries, b)

return b
}

// NewDefaultRetryer creates a new Retryer with the default configuration.
// The default configuration is an exponential backoff with a maximum duration and maximum retries.
func NewDefaultRetryer() *Retryer {
return NewRetryer(&RetryConfig{
BackoffStrategy: DefaultBackoffStrategy(),
})
}

// NewRetryer creates a new Retryer with the given configuration.
// If either the config or config.BackoffStrategy are nil,
// the default configuration is used.
// The default configuration is an exponential backoff with a maximum duration and maximum retries.
func NewRetryer(config *RetryConfig) *Retryer {
retryConfig := &RetryConfig{}

if config != nil && config.BackoffStrategy != nil {
retryConfig.BackoffStrategy = config.BackoffStrategy
} else {
retryConfig.BackoffStrategy = DefaultBackoffStrategy()
}

return &Retryer{
config: retryConfig,
}
}

// RetryFunc retries the given function with the backoff strategy.
func (r *Retryer) RetryFunc(ctx context.Context, f func(ctx context.Context) error) error {
return retry.Do(ctx, r.config.BackoffStrategy, f)
}

// RetryableError marks an error as retryable.
func RetryableError(err error) error {
return retry.RetryableError(err)
}
89 changes: 89 additions & 0 deletions pkg/retry/retry_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package retry

import (
"context"
"errors"
"testing"
"time"

goretry "github.com/sethvargo/go-retry"
"github.com/stretchr/testify/require"
)

func TestNewNoOpRetryer(t *testing.T) {
retryer := NewNoOpRetryer()
require.NotNil(t, retryer)
require.NotNil(t, retryer.config)
require.NotNil(t, retryer.config.BackoffStrategy)

expectedBackoffStrategy := goretry.NewConstant(time.Second * 1)
expectedBackoffStrategy = goretry.WithMaxRetries(0, expectedBackoffStrategy)

require.IsType(t, expectedBackoffStrategy, retryer.config.BackoffStrategy)
}

func TestDefaultBackoffStrategy(t *testing.T) {
backoff := DefaultBackoffStrategy()
require.NotNil(t, backoff)
}

func TestNewDefaultRetryer(t *testing.T) {
retryer := NewDefaultRetryer()
require.NotNil(t, retryer)
require.NotNil(t, retryer.config)
require.NotNil(t, retryer.config.BackoffStrategy)

expectedBackoffStrategy := goretry.NewConstant(time.Second * 1)
expectedBackoffStrategy = goretry.WithMaxRetries(0, expectedBackoffStrategy)

require.IsType(t, expectedBackoffStrategy, retryer.config.BackoffStrategy)
}

func TestNewRetryer(t *testing.T) {
config := &RetryConfig{
BackoffStrategy: goretry.NewConstant(1 * time.Second),
}
retryer := NewRetryer(config)
require.NotNil(t, retryer)
require.NotNil(t, retryer.config)

retryer = NewRetryer(nil)
require.NotNil(t, retryer)
require.NotNil(t, retryer.config)
require.NotNil(t, retryer.config.BackoffStrategy)

config = &RetryConfig{}
retryer = NewRetryer(config)
require.NotNil(t, retryer)
require.NotNil(t, retryer.config)
require.NotNil(t, retryer.config.BackoffStrategy)
}

func TestRetryer_RetryFunc(t *testing.T) {
retryer := NewDefaultRetryer()
ctx := context.Background()

// Test successful function
err := retryer.RetryFunc(ctx, func(ctx context.Context) error {
return nil
})
require.NoError(t, err)

// Test retryable error
retryCount := 0
err = retryer.RetryFunc(ctx, func(ctx context.Context) error {
retryCount++
if retryCount < 3 {
return RetryableError(errors.New("retryable error"))
}
return nil
})
require.NoError(t, err)
require.Equal(t, 3, retryCount)

// Test non-retryable error
err = retryer.RetryFunc(ctx, func(ctx context.Context) error {
return errors.New("non-retryable error")
})
require.Error(t, err)
}
3 changes: 2 additions & 1 deletion pkg/ucp/frontend/aws/routes.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/radius-project/radius/pkg/armrpc/frontend/defaultoperation"
"github.com/radius-project/radius/pkg/armrpc/frontend/server"
aztoken "github.com/radius-project/radius/pkg/azure/tokencredentials"
"github.com/radius-project/radius/pkg/retry"
"github.com/radius-project/radius/pkg/ucp"
"github.com/radius-project/radius/pkg/ucp/api/v20231001preview"
ucp_aws "github.com/radius-project/radius/pkg/ucp/aws"
Expand Down Expand Up @@ -225,7 +226,7 @@ func (m *Module) Initialize(ctx context.Context) (http.Handler, error) {
OperationType: &v1.OperationType{Type: OperationTypeAWSResource, Method: v1.OperationGetImperative},
ResourceType: OperationTypeAWSResource,
ControllerFactory: func(opt controller.Options) (controller.Controller, error) {
return awsproxy_ctrl.NewGetAWSResourceWithPost(opt, m.AWSClients)
return awsproxy_ctrl.NewGetAWSResourceWithPost(opt, m.AWSClients, retry.NewDefaultRetryer())
},
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func (p *CreateOrUpdateAWSResource) Run(ctx context.Context, w http.ResponseWrit
}

cloudControlOpts := []func(*cloudcontrol.Options){CloudControlRegionOption(region)}
cloudFormationOpts := []func(*cloudformation.Options){CloudFormationWithRegionOption(region)}
cloudFormationOpts := []func(*cloudformation.Options){CloudFormationRegionOption(region)}

// Create and update work differently for AWS - we need to know if the resource
// we're working on exists already.
Expand Down Expand Up @@ -125,7 +125,6 @@ func (p *CreateOrUpdateAWSResource) Run(ctx context.Context, w http.ResponseWrit

if existing {
// Get resource type schema

describeTypeOutput, err := p.awsClients.CloudFormation.DescribeType(ctx, &cloudformation.DescribeTypeInput{
Type: types.RegistryTypeResource,
TypeName: to.Ptr(serviceCtx.ResourceTypeInAWSFormat()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func (p *CreateOrUpdateAWSResourceWithPost) Run(ctx context.Context, w http.Resp
}

cloudControlOpts := []func(*cloudcontrol.Options){CloudControlRegionOption(region)}
cloudFormationOpts := []func(*cloudformation.Options){CloudFormationWithRegionOption(region)}
cloudFormationOpts := []func(*cloudformation.Options){CloudFormationRegionOption(region)}

describeTypeOutput, err := p.awsClients.CloudFormation.DescribeType(ctx, &cloudformation.DescribeTypeInput{
Type: types.RegistryTypeResource,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func (p *DeleteAWSResourceWithPost) Run(ctx context.Context, w http.ResponseWrit
return armrpc_rest.NewBadRequestARMResponse(e), nil
}

cloudFormationOpts := []func(*cloudformation.Options){CloudFormationWithRegionOption(region)}
cloudFormationOpts := []func(*cloudformation.Options){CloudFormationRegionOption(region)}
describeTypeOutput, err := p.awsClients.CloudFormation.DescribeType(ctx, &cloudformation.DescribeTypeInput{
Type: types.RegistryTypeResource,
TypeName: to.Ptr(serviceCtx.ResourceTypeInAWSFormat()),
Expand Down
38 changes: 27 additions & 11 deletions pkg/ucp/frontend/controller/awsproxy/getawsresourcewithpost.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ import (
"github.com/aws/aws-sdk-go-v2/service/cloudcontrol"
"github.com/aws/aws-sdk-go-v2/service/cloudformation"
"github.com/aws/aws-sdk-go-v2/service/cloudformation/types"

"github.com/radius-project/radius/pkg/retry"
)

var _ armrpc_controller.Controller = (*GetAWSResourceWithPost)(nil)
Expand All @@ -44,13 +46,15 @@ var _ armrpc_controller.Controller = (*GetAWSResourceWithPost)(nil)
type GetAWSResourceWithPost struct {
armrpc_controller.Operation[*datamodel.AWSResource, datamodel.AWSResource]
awsClients ucpaws.Clients
retryer *retry.Retryer
}

// NewGetAWSResourceWithPost creates a new GetAWSResourceWithPost controller with the given options and AWS clients.
func NewGetAWSResourceWithPost(opts armrpc_controller.Options, awsClients ucpaws.Clients) (armrpc_controller.Controller, error) {
func NewGetAWSResourceWithPost(opts armrpc_controller.Options, awsClients ucpaws.Clients, retryer *retry.Retryer) (armrpc_controller.Controller, error) {
return &GetAWSResourceWithPost{
Operation: armrpc_controller.NewOperation(opts, armrpc_controller.ResourceOptions[datamodel.AWSResource]{}),
awsClients: awsClients,
retryer: retryer,
}, nil
}

Expand All @@ -77,7 +81,7 @@ func (p *GetAWSResourceWithPost) Run(ctx context.Context, w http.ResponseWriter,
return armrpc_rest.NewBadRequestARMResponse(e), nil
}

cloudFormationOpts := []func(*cloudformation.Options){CloudFormationWithRegionOption(region)}
cloudFormationOpts := []func(*cloudformation.Options){CloudFormationRegionOption(region)}
describeTypeOutput, err := p.awsClients.CloudFormation.DescribeType(ctx, &cloudformation.DescribeTypeInput{
Type: types.RegistryTypeResource,
TypeName: to.Ptr(serviceCtx.ResourceTypeInAWSFormat()),
Expand All @@ -100,15 +104,27 @@ func (p *GetAWSResourceWithPost) Run(ctx context.Context, w http.ResponseWriter,

cloudcontrolOpts := []func(*cloudcontrol.Options){CloudControlRegionOption(region)}
logger.Info("Fetching resource", "resourceType", serviceCtx.ResourceTypeInAWSFormat(), "resourceID", awsResourceIdentifier)
response, err := p.awsClients.CloudControl.GetResource(ctx, &cloudcontrol.GetResourceInput{
TypeName: to.Ptr(serviceCtx.ResourceTypeInAWSFormat()),
Identifier: aws.String(awsResourceIdentifier),
}, cloudcontrolOpts...)

if ucpaws.IsAWSResourceNotFoundError(err) {
return armrpc_rest.NewNotFoundMessageResponse(constructNotFoundResponseMessage(middleware.GetRelativePath(p.Options().PathBase, req.URL.Path), awsResourceIdentifier)), nil
} else if err != nil {
return ucpaws.HandleAWSError(err)

var response *cloudcontrol.GetResourceOutput
if err := p.retryer.RetryFunc(ctx, func(ctx context.Context) error {
response, err = p.awsClients.CloudControl.GetResource(ctx, &cloudcontrol.GetResourceInput{
TypeName: to.Ptr(serviceCtx.ResourceTypeInAWSFormat()),
Identifier: aws.String(awsResourceIdentifier),
}, cloudcontrolOpts...)

// If the resource is not found, retry.
if ucpaws.IsAWSResourceNotFoundError(err) {
return retry.RetryableError(err)
}

// If any other error occurs, return the error.
return err
}); err != nil {
if ucpaws.IsAWSResourceNotFoundError(err) {
return armrpc_rest.NewNotFoundMessageResponse(constructNotFoundResponseMessage(middleware.GetRelativePath(p.Options().PathBase, req.URL.Path), awsResourceIdentifier)), nil
} else {
return ucpaws.HandleAWSError(err)
}
}

resourceProperties := map[string]any{}
Expand Down
Loading

0 comments on commit e4db991

Please sign in to comment.