From abafabaecdaf2315bb94f3462964b8d4d88a9835 Mon Sep 17 00:00:00 2001 From: Lan Date: Wed, 7 Dec 2022 07:17:23 +0800 Subject: [PATCH] Fix ClusterInfo type ResourceExport recreation bug (#4442) After Gateway HA is enabled, the ClusterInfo type of ResourceExport will be recreated when the active Gateway is changed. But there is a case that a new ClusterInfo of ResourceExport creation may fail when the leader controller process is slow and existing ResourceExport is not deleted in time. Fix the issue by checking the existing ResourceExport's DeletionTimestamp and changing to recreate the ResourceExport if the DeletionTimestamp is not zero. Signed-off-by: Lan Luo --- .../multicluster/gateway_controller.go | 17 ++++------ .../multicluster/gateway_controller_test.go | 32 +++++++++++++++++-- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/multicluster/controllers/multicluster/gateway_controller.go b/multicluster/controllers/multicluster/gateway_controller.go index 82f5cc5e8f1..048852f9c2f 100644 --- a/multicluster/controllers/multicluster/gateway_controller.go +++ b/multicluster/controllers/multicluster/gateway_controller.go @@ -19,7 +19,6 @@ package multicluster import ( "context" "fmt" - "reflect" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -104,15 +103,18 @@ func (r *GatewayReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct createOrUpdate := func(gwIP string) error { existingResExport := &mcsv1alpha1.ResourceExport{} - if err := commonArea.Get(ctx, resExportNamespacedName, existingResExport); err != nil { - if !apierrors.IsNotFound(err) { - return err - } + err := commonArea.Get(ctx, resExportNamespacedName, existingResExport) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + if apierrors.IsNotFound(err) || !existingResExport.DeletionTimestamp.IsZero() { if err = r.createResourceExport(ctx, req, commonArea, gwIP); err != nil { return err } return nil } + // updateResourceExport will update latest Gateway information with the existing ResourceExport's resourceVersion. + // It will return an error and retry when there is a version conflict. if err = r.updateResourceExport(ctx, req, commonArea, existingResExport, &mcsv1alpha1.GatewayInfo{GatewayIP: gwIP}); err != nil { return err } @@ -150,11 +152,6 @@ func (r *GatewayReconciler) updateResourceExport(ctx context.Context, req ctrl.R PodCIDRs: r.podCIDRs, GatewayInfos: []mcsv1alpha1.GatewayInfo{*gwInfo}, } - if reflect.DeepEqual(existingResExport.Spec, resExportSpec) { - klog.V(2).InfoS("Skip updating ClusterInfo kind of ResourceExport due to no change", "clusterinfo", klog.KObj(existingResExport), - "gateway", req.NamespacedName) - return nil - } klog.V(2).InfoS("Updating ClusterInfo kind of ResourceExport", "clusterinfo", klog.KObj(existingResExport), "gateway", req.NamespacedName) existingResExport.Spec = resExportSpec diff --git a/multicluster/controllers/multicluster/gateway_controller_test.go b/multicluster/controllers/multicluster/gateway_controller_test.go index cb0d0b8a444..4b1d5f8a586 100644 --- a/multicluster/controllers/multicluster/gateway_controller_test.go +++ b/multicluster/controllers/multicluster/gateway_controller_test.go @@ -17,10 +17,12 @@ limitations under the License. package multicluster import ( + "context" "reflect" "testing" "time" + "github.com/stretchr/testify/assert" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -74,14 +76,15 @@ var ( func TestGatewayReconciler(t *testing.T) { gwNode1New := gwNode1 gwNode1New.GatewayIP = "10.10.10.12" - + staleExistingResExport := existingResExport.DeepCopy() + staleExistingResExport.DeletionTimestamp = &metav1.Time{Time: time.Now()} tests := []struct { name string - te mcsv1alpha1.Gateway namespacedName types.NamespacedName gateway []mcsv1alpha1.Gateway resExport *mcsv1alpha1.ResourceExport expectedInfo []mcsv1alpha1.GatewayInfo + expectedErr string isDelete bool }{ { @@ -99,6 +102,18 @@ func TestGatewayReconciler(t *testing.T) { }, }, }, + { + name: "error creating a ResourceExport when existing ResourceExport is being deleted", + namespacedName: types.NamespacedName{ + Namespace: "default", + Name: "node-1", + }, + gateway: []mcsv1alpha1.Gateway{ + gwNode1, + }, + resExport: staleExistingResExport, + expectedErr: "resourceexports.multicluster.crd.antrea.io \"cluster-a-clusterinfo\" already exists", + }, { name: "update a ResourceExport successfully by updating an existing Gateway", namespacedName: types.NamespacedName{ @@ -145,7 +160,11 @@ func TestGatewayReconciler(t *testing.T) { t.Run(tt.name, func(t *testing.T) { req := ctrl.Request{NamespacedName: tt.namespacedName} if _, err := r.Reconcile(ctx, req); err != nil { - t.Errorf("Gateway Reconciler should handle ResourceExports events successfully but got error = %v", err) + if tt.expectedErr != "" { + assert.Equal(t, tt.expectedErr, err.Error()) + } else { + t.Errorf("Gateway Reconciler should handle ResourceExports events successfully but got error = %v", err) + } } else { ciExport := mcsv1alpha1.ResourceExport{} ciExportName := types.NamespacedName{ @@ -166,3 +185,10 @@ func TestGatewayReconciler(t *testing.T) { }) } } + +func TestGetServiceCIDR(t *testing.T) { + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects().Build() + r := NewGatewayReconciler(fakeClient, scheme, "default", "", []string{"10.200.1.1/16"}, nil) + err := r.getServiceCIDR(context.TODO()) + assert.Contains(t, err.Error(), "expected a specific error but none was returned") +}