Skip to content

Commit

Permalink
feat(konnect): handle BadRequest errors and other unrecoverable error…
Browse files Browse the repository at this point in the history
…s to prevent endless reconciliation (#796)

* feat(konnect): handle BadRequest errors and other unrecoverable errors to prevent endless reconciliation

* refactor: add IgnoreUnrecoverableAPIErr to handle unrecoverable errors
  • Loading branch information
pmalek authored Oct 24, 2024
1 parent f284199 commit 170469a
Show file tree
Hide file tree
Showing 3 changed files with 365 additions and 69 deletions.
43 changes: 32 additions & 11 deletions controller/konnect/ops/ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import (
"fmt"
"time"

sdkkonnecterrs "github.com/Kong/sdk-konnect-go/models/sdkerrors"
"github.com/go-logr/logr"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -102,7 +104,10 @@ func Create[
return nil, fmt.Errorf("unsupported entity type %T", ent)
}

var errRelationsFailed KonnectEntityCreatedButRelationsFailedError
var (
errRelationsFailed KonnectEntityCreatedButRelationsFailedError
errSDK *sdkkonnecterrs.SDKError
)
switch {
case ErrorIsCreateConflict(err):
// If there was a conflict on the create request, we can assume the entity already exists.
Expand Down Expand Up @@ -161,8 +166,12 @@ func Create[
} else {
SetKonnectEntityProgrammedConditionFalse(e, consts.KonnectEntitiesFailedToCreateReason, err.Error())
}

case errors.As(err, &errSDK):
SetKonnectEntityProgrammedConditionFalse(e, consts.KonnectEntitiesFailedToCreateReason, errSDK.Error())
case errors.As(err, &errRelationsFailed):
SetKonnectEntityProgrammedConditionFalse(e, errRelationsFailed.Reason, err.Error())
e.SetKonnectID(errRelationsFailed.KonnectID)
SetKonnectEntityProgrammedConditionFalse(e, errRelationsFailed.Reason, errRelationsFailed.Err.Error())
case err != nil:
SetKonnectEntityProgrammedConditionFalse(e, consts.KonnectEntitiesFailedToCreateReason, err.Error())
default:
Expand All @@ -171,7 +180,7 @@ func Create[

logOpComplete(ctx, start, CreateOp, e, err)

return e, err
return e, IgnoreUnrecoverableAPIErr(err, loggerForEntity(ctx, e, CreateOp))
}

// Delete deletes a Konnect entity.
Expand Down Expand Up @@ -242,7 +251,7 @@ func Delete[
return fmt.Errorf("unsupported entity type %T", ent)
}

logOpComplete[T, TEnt](ctx, start, DeleteOp, ent, err)
logOpComplete(ctx, start, DeleteOp, ent, err)

return err
}
Expand Down Expand Up @@ -360,8 +369,13 @@ func Update[
return ctrl.Result{}, fmt.Errorf("unsupported entity type %T", ent)
}

var errRelationsFailed KonnectEntityCreatedButRelationsFailedError
var (
errRelationsFailed KonnectEntityCreatedButRelationsFailedError
errSDK *sdkkonnecterrs.SDKError
)
switch {
case errors.As(err, &errSDK):
SetKonnectEntityProgrammedConditionFalse(e, consts.KonnectEntitiesFailedToUpdateReason, errSDK.Body)
case errors.As(err, &errRelationsFailed):
e.SetKonnectID(errRelationsFailed.KonnectID)
SetKonnectEntityProgrammedConditionFalse(e, errRelationsFailed.Reason, err.Error())
Expand All @@ -371,26 +385,33 @@ func Update[
SetKonnectEntityProgrammedCondition(e)
}

logOpComplete[T, TEnt](ctx, now, UpdateOp, e, err)
logOpComplete(ctx, now, UpdateOp, e, err)

return ctrl.Result{}, err
return ctrl.Result{}, IgnoreUnrecoverableAPIErr(err, loggerForEntity(ctx, e, UpdateOp))
}

func logOpComplete[
func loggerForEntity[
T constraints.SupportedKonnectEntityType,
TEnt constraints.EntityType[T],
](ctx context.Context, start time.Time, op Op, e TEnt, err error) {
](ctx context.Context, e TEnt, op Op) logr.Logger {
keysAndValues := []interface{}{
"op", op,
"duration", time.Since(start).String(),
}

// Only add the Konnect ID if it exists and it's a create operation.
// Otherwise the Konnect ID is already set in the logger.
if id := e.GetKonnectStatus().GetKonnectID(); id != "" && op == CreateOp {
keysAndValues = append(keysAndValues, "konnect_id", id)
}
logger := ctrllog.FromContext(ctx).WithValues(keysAndValues...)
return ctrllog.FromContext(ctx).WithValues(keysAndValues...)
}

func logOpComplete[
T constraints.SupportedKonnectEntityType,
TEnt constraints.EntityType[T],
](ctx context.Context, start time.Time, op Op, e TEnt, err error) {
logger := loggerForEntity(ctx, e, op).
WithValues("duration", time.Since(start).String())

if err != nil {
// NOTE: We don't want to print stack trace information here so skip 99 frames
Expand Down
157 changes: 99 additions & 58 deletions controller/konnect/ops/ops_errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@ import (
"errors"
"fmt"
"net/http"
"slices"

sdkkonnecterrs "github.com/Kong/sdk-konnect-go/models/sdkerrors"
"github.com/go-logr/logr"
"sigs.k8s.io/controller-runtime/pkg/client"
ctrllog "sigs.k8s.io/controller-runtime/pkg/log"

"github.com/kong/gateway-operator/controller/konnect/constraints"
"github.com/kong/gateway-operator/controller/pkg/log"
)

// ErrNilResponse is an error indicating that a Konnect operation returned an empty response.
Expand Down Expand Up @@ -51,15 +54,17 @@ func (e CantPerformOperationWithoutControlPlaneIDError) Error() string {
)
}

type sdkErrorDetails struct {
TypeAt string `json:"@type"`
Type string `json:"type"`
Field string `json:"field"`
Messages []string `json:"messages"`
}

type sdkErrorBody struct {
Code int `json:"code"`
Message string `json:"message"`
Details []struct {
TypeAt string `json:"@type"`
Type string `json:"type"`
Field string `json:"field"`
Messages []string `json:"messages"`
} `json:"details"`
Code int `json:"code"`
Message string `json:"message"`
Details []sdkErrorDetails `json:"details"`
}

// ParseSDKErrorBody parses the body of an SDK error response.
Expand Down Expand Up @@ -88,6 +93,62 @@ func ParseSDKErrorBody(body string) (sdkErrorBody, error) {
return sdkErr, nil
}

const (
dataConstraintMesasge = "data constraint error"
validationErrorMessage = "validation error"
)

// ErrorIsSDKErrorTypeField returns true if the provided error is a type field error.
// These types of errors are unrecoverable and should be covered by CEL validation
// rules on CRDs but just in case some of those are left unhandled we can handle
// them by reacting to SDKErrors of type ERROR_TYPE_FIELD.
//
// Exemplary body:
//
// {
// "code": 3,
// "message": "validation error",
// "details": [
// {
// "@type": "type.googleapis.com/kong.admin.model.v1.ErrorDetail",
// "type": "ERROR_TYPE_FIELD",
// "field": "tags[0]",
// "messages": [
// "length must be <= 128, but got 138"
// ]
// }
// ]
// }
func ErrorIsSDKErrorTypeField(err error) bool {
var errSDK *sdkkonnecterrs.SDKError
if !errors.As(err, &errSDK) {
return false
}

errSDKBody, err := ParseSDKErrorBody(errSDK.Body)
if err != nil {
return false
}

if errSDKBody.Message != validationErrorMessage {
return false
}

if !slices.ContainsFunc(errSDKBody.Details, func(d sdkErrorDetails) bool {
return d.Type == "ERROR_TYPE_FIELD"
}) {
return false
}

return true
}

// ErrorIsSDKBadRequestError returns true if the provided error is a BadRequestError.
func ErrorIsSDKBadRequestError(err error) bool {
var errSDK *sdkkonnecterrs.BadRequestError
return errors.As(err, &errSDK)
}

// ErrorIsCreateConflict returns true if the provided error is a Konnect conflict error.
//
// NOTE: Konnect APIs specific for Konnect only APIs like Gateway Control Planes
Expand All @@ -114,10 +175,6 @@ func SDKErrorIsConflict(sdkError *sdkkonnecterrs.SDKError) bool {
return false
}

const (
dataConstraintMesasge = "data constraint error"
)

if sdkErrorBody.Message != dataConstraintMesasge {
return false
}
Expand All @@ -129,6 +186,15 @@ func SDKErrorIsConflict(sdkError *sdkkonnecterrs.SDKError) bool {
return false
}

func errIsNotFound(err error) bool {
var (
notFoundError *sdkkonnecterrs.NotFoundError
sdkError *sdkkonnecterrs.SDKError
)
return errors.As(err, &notFoundError) ||
errors.As(err, &sdkError) && sdkError.StatusCode == http.StatusNotFound
}

// handleUpdateError handles errors that occur during an update operation.
// If the entity is not found, then it uses the provided create function to
// recreate the it.
Expand All @@ -141,41 +207,17 @@ func handleUpdateError[
ent TEnt,
createFunc func(ctx context.Context) error,
) error {
var (
sdkError *sdkkonnecterrs.SDKError
id = ent.GetKonnectStatus().GetKonnectID()
)
if errors.As(err, &sdkError) {
switch sdkError.StatusCode {
case http.StatusNotFound:
logEntityNotFoundRecreating(ctx, ent, id)
if err := createFunc(ctx); err != nil {
return FailedKonnectOpError[T]{
Op: UpdateOp,
Err: err,
}
}
return nil
default:
return FailedKonnectOpError[T]{
Op: UpdateOp,
Err: sdkError,
}
}
}

var notFoundError *sdkkonnecterrs.NotFoundError
if errors.As(err, &notFoundError) {
if errIsNotFound(err) {
id := ent.GetKonnectStatus().GetKonnectID()
logEntityNotFoundRecreating(ctx, ent, id)
if err := createFunc(ctx); err != nil {
if createErr := createFunc(ctx); createErr != nil {
return FailedKonnectOpError[T]{
Op: UpdateOp,
Err: err,
Op: CreateOp,
Err: fmt.Errorf("failed to create %s %s: %w", ent.GetTypeName(), id, createErr),
}
}
return nil
}

return FailedKonnectOpError[T]{
Op: UpdateOp,
Err: err,
Expand All @@ -189,34 +231,33 @@ func handleDeleteError[
T constraints.SupportedKonnectEntityType,
TEnt constraints.EntityType[T],
](ctx context.Context, err error, ent TEnt) error {
logDeleteSkipped := func() {
if errIsNotFound(err) {
ctrllog.FromContext(ctx).
Info("entity not found in Konnect, skipping delete",
"op", DeleteOp,
"type", ent.GetTypeName(),
"id", ent.GetKonnectStatus().GetKonnectID(),
)
}

var sdkNotFoundError *sdkkonnecterrs.NotFoundError
if errors.As(err, &sdkNotFoundError) {
logDeleteSkipped()
return nil
}

var sdkError *sdkkonnecterrs.SDKError
if errors.As(err, &sdkError) {
if sdkError.StatusCode == http.StatusNotFound {
logDeleteSkipped()
return nil
}
return FailedKonnectOpError[T]{
Op: DeleteOp,
Err: sdkError,
}
}
return FailedKonnectOpError[T]{
Op: DeleteOp,
Err: err,
}
}

// IgnoreUnrecoverableAPIErr ignores unrecoverable errors that would cause the
// reconciler to endlessly requeue.
func IgnoreUnrecoverableAPIErr(err error, logger logr.Logger) error {
// If the error is a type field error or bad request error, then don't propagate
// it to the caller.
// We cannot recover from this error as this requires user to change object's
// manifest. The entity's status is already updated with the error.
if ErrorIsSDKErrorTypeField(err) || ErrorIsSDKBadRequestError(err) {
log.Debug(logger, "ignoring unrecoverable API error, consult object's status for details", "err", err)
return nil
}

return err
}
Loading

0 comments on commit 170469a

Please sign in to comment.