From 52bbb29401f7996e396f6f761fe07ecfeeda0998 Mon Sep 17 00:00:00 2001 From: Gabe Alford Date: Wed, 1 May 2024 10:21:44 -0600 Subject: [PATCH] chore: use retry on conflict to update the status --- .../admission/falconadmission_controller.go | 32 +++++------ internal/controller/common/utils.go | 27 +++++---- .../falconcontainer_controller.go | 42 ++++++++------ .../controller/falcon_container/image_push.go | 35 +++++++----- .../controller/falcon_container/k8s_utils.go | 55 +++++++++++++------ .../falconnodesensor_controller.go | 38 +++++++++---- 6 files changed, 140 insertions(+), 89 deletions(-) diff --git a/internal/controller/admission/falconadmission_controller.go b/internal/controller/admission/falconadmission_controller.go index edae3d1f..98bba5fc 100644 --- a/internal/controller/admission/falconadmission_controller.go +++ b/internal/controller/admission/falconadmission_controller.go @@ -30,6 +30,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/retry" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" @@ -106,37 +107,30 @@ func (r *FalconAdmissionReconciler) Reconcile(ctx context.Context, req ctrl.Requ // Let's just set the status as Unknown when no status is available if falconAdmission.Status.Conditions == nil || len(falconAdmission.Status.Conditions) == 0 { - meta.SetStatusCondition(&falconAdmission.Status.Conditions, metav1.Condition{Type: falconv1alpha1.ConditionPending, Status: metav1.ConditionUnknown, Reason: "Reconciling", Message: "Starting reconciliation"}) - if err = r.Status().Update(ctx, falconAdmission); err != nil { + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + meta.SetStatusCondition(&falconAdmission.Status.Conditions, metav1.Condition{Type: falconv1alpha1.ConditionPending, Status: metav1.ConditionUnknown, Reason: "Reconciling", Message: "Starting reconciliation"}) + return r.Status().Update(ctx, falconAdmission) + }) + if err != nil { log.Error(err, "Failed to update FalconAdmission status") return ctrl.Result{}, err } - - // Let's re-fetch the Custom Resource after update the status - // so that we have the latest state of the resource on the cluster and we will avoid - // raise the issue "the object has been modified, please apply - // your changes to the latest version and try again" which would re-trigger the reconciliation - // if we try to update it again in the following operations - if err := r.Get(ctx, req.NamespacedName, falconAdmission); err != nil { - log.Error(err, "Failed to re-fetch FalconAdmission") - return ctrl.Result{}, err - } } if falconAdmission.Status.Version != version.Get() { falconAdmission.Status.Version = version.Get() + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + err := r.Get(ctx, req.NamespacedName, falconAdmission) + if err != nil { + return err + } - err = r.Status().Update(ctx, falconAdmission) + return r.Status().Update(ctx, falconAdmission) + }) if err != nil { log.Error(err, "Failed to update FalconAdmission status for falconAdmission.Status.Version") return ctrl.Result{}, err } - - err = r.Get(ctx, req.NamespacedName, falconAdmission) - if err != nil { - log.Error(err, "Failed to re-fetch FalconAdmission for status update") - return ctrl.Result{}, err - } } if err := r.reconcileNamespace(ctx, req, log, falconAdmission); err != nil { diff --git a/internal/controller/common/utils.go b/internal/controller/common/utils.go index 9a9782fb..98ccf09e 100644 --- a/internal/controller/common/utils.go +++ b/internal/controller/common/utils.go @@ -14,6 +14,7 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/util/retry" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -164,19 +165,21 @@ func ConditionsUpdate(r client.Client, ctx context.Context, req ctrl.Request, lo if !meta.IsStatusConditionPresentAndEqual(falconStatus.Conditions, falconCondition.Type, falconCondition.Status) { fgvk := falconObject.GetObjectKind().GroupVersionKind() - // Re-fetch the Custom Resource before update the status - // so that we have the latest state of the resource on the cluster and we will avoid - // raise the issue "the object has been modified, please apply - // your changes to the latest version and try again" which would re-trigger the reconciliation - err := r.Get(ctx, req.NamespacedName, falconObject) - if err != nil { - log.Error(err, fmt.Sprintf("Failed to re-fetch %s for status update", fgvk.Kind)) - return err - } + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + // Re-fetch the Custom Resource before update the status + // so that we have the latest state of the resource on the cluster and we will avoid + // raise the issue "the object has been modified, please apply + // your changes to the latest version and try again" which would re-trigger the reconciliation + err := r.Get(ctx, req.NamespacedName, falconObject) + if err != nil { + log.Error(err, fmt.Sprintf("Failed to re-fetch %s for status update", fgvk.Kind)) + return err + } - // The following implementation will update the status - meta.SetStatusCondition(&falconStatus.Conditions, falconCondition) - err = r.Status().Update(ctx, falconObject) + // The following implementation will update the status + meta.SetStatusCondition(&falconStatus.Conditions, falconCondition) + return r.Status().Update(ctx, falconObject) + }) if err != nil { log.Error(err, fmt.Sprintf("Failed to update %s status", fgvk.Kind)) return err diff --git a/internal/controller/falcon_container/falconcontainer_controller.go b/internal/controller/falcon_container/falconcontainer_controller.go index df8f357f..28d32662 100644 --- a/internal/controller/falcon_container/falconcontainer_controller.go +++ b/internal/controller/falcon_container/falconcontainer_controller.go @@ -21,6 +21,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/rest" + "k8s.io/client-go/util/retry" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" @@ -97,7 +98,14 @@ func (r *FalconContainerReconciler) Reconcile(ctx context.Context, req ctrl.Requ if falconContainer.Status.Version != version.Get() { falconContainer.Status.Version = version.Get() - err := r.Status().Update(ctx, falconContainer) + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + err := r.Get(ctx, req.NamespacedName, falconContainer) + if err != nil { + return err + } + + return r.Status().Update(ctx, falconContainer) + }) if err != nil { log.Error(err, "Failed to update FalconContainer status for falconcontainer.Status.Version") return ctrl.Result{}, err @@ -275,26 +283,26 @@ func (r *FalconContainerReconciler) Reconcile(ctx context.Context, req ctrl.Requ } func (r *FalconContainerReconciler) StatusUpdate(ctx context.Context, req ctrl.Request, log logr.Logger, falconContainer *falconv1alpha1.FalconContainer, condType string, status metav1.ConditionStatus, reason string, message string) error { - meta.SetStatusCondition(&falconContainer.Status.Conditions, metav1.Condition{ - Status: status, - Reason: reason, - Message: message, - Type: condType, - ObservedGeneration: falconContainer.GetGeneration(), + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + err := r.Get(ctx, req.NamespacedName, falconContainer) + if err != nil { + return err + } + + meta.SetStatusCondition(&falconContainer.Status.Conditions, metav1.Condition{ + Status: status, + Reason: reason, + Message: message, + Type: condType, + ObservedGeneration: falconContainer.GetGeneration(), + }) + + return r.Status().Update(ctx, falconContainer) }) - if err := r.Status().Update(ctx, falconContainer); err != nil { + if err != nil { log.Error(err, "Failed to update FalconContainer status") return err } - // Let's re-fetch the memcached Custom Resource after update the status - // so that we have the latest state of the resource on the cluster and we will avoid - // raise the issue "the object has been modified, please apply - // your changes to the latest version and try again" which would re-trigger the reconciliation - // if we try to update it again in the following operations - if err := r.Get(ctx, req.NamespacedName, falconContainer); err != nil { - log.Error(err, "Failed to re-fetch FalconContainer") - return err - } return nil } diff --git a/internal/controller/falcon_container/image_push.go b/internal/controller/falcon_container/image_push.go index 51618101..ae2c21c0 100644 --- a/internal/controller/falcon_container/image_push.go +++ b/internal/controller/falcon_container/image_push.go @@ -7,6 +7,7 @@ import ( "strings" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/util/retry" falconv1alpha1 "github.com/crowdstrike/falcon-operator/api/falcon/v1alpha1" "github.com/crowdstrike/falcon-operator/internal/controller/image" @@ -57,14 +58,18 @@ func (r *FalconContainerReconciler) PushImage(ctx context.Context, log logr.Logg return fmt.Errorf("Cannot identify Falcon Container Image: %v", err) } - meta.SetStatusCondition(&falconContainer.Status.Conditions, metav1.Condition{ - Type: "ImageReady", - Status: metav1.ConditionTrue, - Message: imageUri, - Reason: "Pushed", + err = retry.RetryOnConflict(retry.DefaultRetry, func() error { + meta.SetStatusCondition(&falconContainer.Status.Conditions, metav1.Condition{ + Type: "ImageReady", + Status: metav1.ConditionTrue, + Message: imageUri, + Reason: "Pushed", + }) + + return r.Client.Status().Update(ctx, falconContainer) }) - return r.Client.Status().Update(ctx, falconContainer) + return err } func (r *FalconContainerReconciler) verifyCrowdStrikeRegistry(ctx context.Context, log logr.Logger, falconContainer *falconv1alpha1.FalconContainer) (bool, error) { @@ -83,15 +88,19 @@ func (r *FalconContainerReconciler) verifyCrowdStrikeRegistry(ctx context.Contex return false, nil } - meta.SetStatusCondition(&falconContainer.Status.Conditions, metav1.Condition{ - Status: metav1.ConditionTrue, - Reason: falconv1alpha1.ReasonDiscovered, - Message: imageUri, - Type: falconv1alpha1.ConditionImageReady, - ObservedGeneration: falconContainer.GetGeneration(), + err = retry.RetryOnConflict(retry.DefaultRetry, func() error { + meta.SetStatusCondition(&falconContainer.Status.Conditions, metav1.Condition{ + Status: metav1.ConditionTrue, + Reason: falconv1alpha1.ReasonDiscovered, + Message: imageUri, + Type: falconv1alpha1.ConditionImageReady, + ObservedGeneration: falconContainer.GetGeneration(), + }) + + return r.Client.Status().Update(ctx, falconContainer) }) - return true, r.Client.Status().Update(ctx, falconContainer) + return true, err } func (r *FalconContainerReconciler) registryUri(ctx context.Context, falconContainer *falconv1alpha1.FalconContainer) (string, error) { diff --git a/internal/controller/falcon_container/k8s_utils.go b/internal/controller/falcon_container/k8s_utils.go index 35a8adc8..7b5f099b 100644 --- a/internal/controller/falcon_container/k8s_utils.go +++ b/internal/controller/falcon_container/k8s_utils.go @@ -11,6 +11,7 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -29,13 +30,19 @@ func (r *FalconContainerReconciler) Create(ctx context.Context, log logr.Logger, return fmt.Errorf("failed to create %s %s in namespace %s: %v", gvk.Kind, name, namespace, err) } } - meta.SetStatusCondition(&falconContainer.Status.Conditions, metav1.Condition{ - Type: fmt.Sprintf("%sReady", strings.ToUpper(gvk.Kind[:1])+gvk.Kind[1:]), - Status: metav1.ConditionTrue, - Reason: "Created", - Message: fmt.Sprintf("Successfully created %s %s in %s", gvk.Kind, name, namespace), + + err = retry.RetryOnConflict(retry.DefaultRetry, func() error { + meta.SetStatusCondition(&falconContainer.Status.Conditions, metav1.Condition{ + Type: fmt.Sprintf("%sReady", strings.ToUpper(gvk.Kind[:1])+gvk.Kind[1:]), + Status: metav1.ConditionTrue, + Reason: "Created", + Message: fmt.Sprintf("Successfully created %s %s in %s", gvk.Kind, name, namespace), + }) + + return r.Client.Status().Update(ctx, falconContainer) }) - return r.Client.Status().Update(ctx, falconContainer) + + return err default: return fmt.Errorf("Unrecognized kube object type: %T", obj) } @@ -55,13 +62,19 @@ func (r *FalconContainerReconciler) Update(ctx context.Context, log logr.Logger, } return fmt.Errorf("Cannot update object %s %s in namespace %s: %v", gvk.Kind, name, namespace, err) } - meta.SetStatusCondition(&falconContainer.Status.Conditions, metav1.Condition{ - Type: fmt.Sprintf("%sReady", strings.ToUpper(gvk.Kind[:1])+gvk.Kind[1:]), - Status: metav1.ConditionTrue, - Reason: "Updated", - Message: fmt.Sprintf("Successfully updated %s %s in %s", gvk.Kind, name, namespace), + + err = retry.RetryOnConflict(retry.DefaultRetry, func() error { + meta.SetStatusCondition(&falconContainer.Status.Conditions, metav1.Condition{ + Type: fmt.Sprintf("%sReady", strings.ToUpper(gvk.Kind[:1])+gvk.Kind[1:]), + Status: metav1.ConditionTrue, + Reason: "Updated", + Message: fmt.Sprintf("Successfully updated %s %s in %s", gvk.Kind, name, namespace), + }) + + return r.Client.Status().Update(ctx, falconContainer) }) - return r.Client.Status().Update(ctx, falconContainer) + + return err default: return fmt.Errorf("Unrecognized kube object type: %T", obj) } @@ -81,13 +94,19 @@ func (r *FalconContainerReconciler) Delete(ctx context.Context, log logr.Logger, } return fmt.Errorf("Cannot delete object %s %s in namespace %s: %v", gvk.Kind, name, namespace, err) } - meta.SetStatusCondition(&falconContainer.Status.Conditions, metav1.Condition{ - Type: fmt.Sprintf("%sReady", strings.ToUpper(gvk.Kind[:1])+gvk.Kind[1:]), - Status: metav1.ConditionFalse, - Reason: "Deleted", - Message: fmt.Sprintf("Successfully deleted %s %s in %s", gvk.Kind, name, namespace), + + err = retry.RetryOnConflict(retry.DefaultRetry, func() error { + meta.SetStatusCondition(&falconContainer.Status.Conditions, metav1.Condition{ + Type: fmt.Sprintf("%sReady", strings.ToUpper(gvk.Kind[:1])+gvk.Kind[1:]), + Status: metav1.ConditionFalse, + Reason: "Deleted", + Message: fmt.Sprintf("Successfully deleted %s %s in %s", gvk.Kind, name, namespace), + }) + + return r.Client.Status().Update(ctx, falconContainer) }) - return r.Client.Status().Update(ctx, falconContainer) + + return err default: return fmt.Errorf("Unrecognized kube object type: %T", obj) } diff --git a/internal/controller/falcon_node/falconnodesensor_controller.go b/internal/controller/falcon_node/falconnodesensor_controller.go index ab2fa046..f8b55fb1 100644 --- a/internal/controller/falcon_node/falconnodesensor_controller.go +++ b/internal/controller/falcon_node/falconnodesensor_controller.go @@ -23,6 +23,7 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/retry" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -101,7 +102,14 @@ func (r *FalconNodeSensorReconciler) Reconcile(ctx context.Context, req ctrl.Req if nodesensor.Status.Version != version.Get() { nodesensor.Status.Version = version.Get() - err = r.Status().Update(ctx, nodesensor) + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + err := r.Get(ctx, req.NamespacedName, nodesensor) + if err != nil { + return err + } + + return r.Status().Update(ctx, nodesensor) + }) if err != nil { log.Error(err, "Failed to update FalconNodeSensor status for nodesensor.Status.Version") return ctrl.Result{}, err @@ -300,7 +308,14 @@ func (r *FalconNodeSensorReconciler) Reconcile(ctx context.Context, req ctrl.Req imgVer := common.ImageVersion(image) if nodesensor.Status.Sensor != imgVer { nodesensor.Status.Sensor = imgVer - err = r.Status().Update(ctx, nodesensor) + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + err := r.Get(ctx, req.NamespacedName, nodesensor) + if err != nil { + return err + } + + return r.Status().Update(ctx, nodesensor) + }) if err != nil { log.Error(err, "Failed to update FalconNodeSensor status for nodesensor.Status.Sensor") return ctrl.Result{}, err @@ -838,15 +853,18 @@ func (r *FalconNodeSensorReconciler) handleSAAnnotations(ctx context.Context, no // statusUpdate updates the FalconNodeSensor CR conditions func (r *FalconNodeSensorReconciler) conditionsUpdate(condType string, status metav1.ConditionStatus, reason string, message string, ctx context.Context, nodesensor *falconv1alpha1.FalconNodeSensor, logger logr.Logger) error { if !meta.IsStatusConditionPresentAndEqual(nodesensor.Status.Conditions, condType, status) { - meta.SetStatusCondition(&nodesensor.Status.Conditions, metav1.Condition{ - Status: status, - Reason: reason, - Message: message, - Type: condType, - ObservedGeneration: nodesensor.GetGeneration(), + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + meta.SetStatusCondition(&nodesensor.Status.Conditions, metav1.Condition{ + Status: status, + Reason: reason, + Message: message, + Type: condType, + ObservedGeneration: nodesensor.GetGeneration(), + }) + + return r.Status().Update(ctx, nodesensor) }) - - if err := r.Status().Update(ctx, nodesensor); err != nil { + if err != nil { logger.Error(err, "Failed to update FalconNodeSensor status", "Failed to update the Condition at Reasoning", reason) return err }