Skip to content

Commit 0c1aa96

Browse files
authored
Add retry logic for App CR updates to handle optimistic concurrency conflicts (#1766)
* Add retry logic for App CR updates to handle optimistic concurrency conflicts - Implement updateAppWithRetry() function in PackageInstall reconciler - Add retry logic with up to 5 attempts for App CR updates - Re-fetch App CR after each failed update to get latest resourceVersion - Handle NotFound errors immediately without retries - Add comprehensive unit tests for retry logic covering: - Conflict error handling with retries - NotFound error immediate return - Update function error propagation - Use NewApp function for App transformations in retry logic Fixes optimistic concurrency control conflicts that occur when multiple operations attempt to update the same App CR simultaneously. Signed-off-by: Marin Dzhigarov <m.dzhigarov@gmail.com> * Fix golangci-lint unused parameter warnings in tests - Rename unused 'action' parameters to '_' in test reactors - Rename unused 'app' parameter to '_' in failing update function test - Addresses revive linter warnings for unused parameters Signed-off-by: Marin Dzhigarov <m.dzhigarov@gmail.com> * Fix app update retry logic to only retry on conflict errors - Update updateAppWithRetry to only retry on conflict errors, not on NotFound errors - Change retry condition from !errors.IsNotFound(err) to !errors.IsConflict(err) - Update unit tests to reflect new retry behavior: - Fix Test_UpdateAppWithRetry_HandlesNotFoundError to expect no retries for NotFound - Add Test_UpdateAppWithRetry_HandlesNonConflictError for non-conflict error handling - Update Test_UpdateAppWithRetry_HandlesConflictErrors to use proper errors.NewConflict() - Add k8s.io/apimachinery/pkg/api/errors import for proper error types This ensures that only conflict errors trigger retries, while all other errors (including NotFound) are returned immediately without retries. Signed-off-by: Marin Dzhigarov <m.dzhigarov@gmail.com> --------- Signed-off-by: Marin Dzhigarov <m.dzhigarov@gmail.com>
1 parent e94726f commit 0c1aa96

File tree

3 files changed

+309
-4
lines changed

3 files changed

+309
-4
lines changed

pkg/app/crd_app.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,18 @@ func (a *CRDApp) updateStatusOnce() error {
100100
func (a *CRDApp) updateApp(updateFunc func(*kcv1alpha1.App)) error {
101101
a.log.Info("Updating app")
102102

103+
var lastErr error
104+
for i := 0; i < 5; i++ {
105+
lastErr = a.updateAppOnce(updateFunc)
106+
if lastErr == nil {
107+
return nil
108+
}
109+
}
110+
111+
return lastErr
112+
}
113+
114+
func (a *CRDApp) updateAppOnce(updateFunc func(*kcv1alpha1.App)) error {
103115
existingApp, err := a.appClient.KappctrlV1alpha1().Apps(a.appModel.Namespace).Get(context.Background(), a.appModel.Name, metav1.GetOptions{})
104116
if err != nil {
105117
return fmt.Errorf("Updating app: %s", err)

pkg/packageinstall/packageinstall.go

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -230,8 +230,9 @@ func (pi *PackageInstallCR) reconcileAppWithPackage(existingApp *kcv1alpha1.App,
230230
var appToCheckStatus *kcv1alpha1.App = existingApp
231231

232232
if !equality.Semantic.DeepEqual(desiredApp, existingApp) {
233-
updatedApp, err := pi.kcclient.KappctrlV1alpha1().Apps(desiredApp.Namespace).Update(
234-
context.Background(), desiredApp, metav1.UpdateOptions{})
233+
updatedApp, err := pi.updateAppWithRetry(existingApp, func(app *kcv1alpha1.App) (*kcv1alpha1.App, error) {
234+
return NewApp(app, pi.model, pkgWithPlaceholderSecrets, pi.opts)
235+
})
235236
if err != nil {
236237
return reconcile.Result{Requeue: true}, err
237238
}
@@ -404,14 +405,22 @@ func (pi *PackageInstallCR) reconcileDelete(modelStatus *reconciler.Status) (rec
404405
existingApp.Spec.DefaultNamespace = pi.model.Spec.DefaultNamespace
405406

406407
if !equality.Semantic.DeepEqual(existingApp, unchangeExistingApp) {
407-
existingApp, err = pi.kcclient.KappctrlV1alpha1().Apps(existingApp.Namespace).Update(
408-
context.Background(), existingApp, metav1.UpdateOptions{})
408+
updatedApp, err := pi.updateAppWithRetry(existingApp, func(app *kcv1alpha1.App) (*kcv1alpha1.App, error) {
409+
app.Spec.ServiceAccountName = existingApp.Spec.ServiceAccountName
410+
app.Spec.Cluster = existingApp.Spec.Cluster
411+
app.Spec.NoopDelete = existingApp.Spec.NoopDelete
412+
app.Spec.Paused = existingApp.Spec.Paused
413+
app.Spec.Canceled = existingApp.Spec.Canceled
414+
app.Spec.DefaultNamespace = existingApp.Spec.DefaultNamespace
415+
return app, nil
416+
})
409417
if err != nil {
410418
if errors.IsNotFound(err) {
411419
return reconcile.Result{}, pi.unblockDeletion()
412420
}
413421
return reconcile.Result{Requeue: true}, err
414422
}
423+
existingApp = updatedApp
415424
}
416425

417426
if existingApp.DeletionTimestamp == nil {
@@ -549,3 +558,33 @@ func (pi PackageInstallCR) createSecretForSecretgenController(iteration int) (st
549558
}
550559
return secretName, nil
551560
}
561+
562+
// updateAppWithRetry updates an App CR with retry logic to handle conflicts
563+
func (pi *PackageInstallCR) updateAppWithRetry(app *kcv1alpha1.App, updateFunc func(*kcv1alpha1.App) (*kcv1alpha1.App, error)) (*kcv1alpha1.App, error) {
564+
var lastErr error
565+
for i := 0; i < 5; i++ {
566+
desiredApp, err := updateFunc(app)
567+
if err != nil {
568+
return nil, fmt.Errorf("update function failed: %s", err)
569+
}
570+
571+
updatedApp, err := pi.kcclient.KappctrlV1alpha1().Apps(desiredApp.Namespace).Update(
572+
context.Background(), desiredApp, metav1.UpdateOptions{})
573+
if err == nil {
574+
return updatedApp, nil
575+
}
576+
lastErr = err
577+
578+
if !errors.IsConflict(err) {
579+
return nil, err
580+
}
581+
582+
app, err = pi.kcclient.KappctrlV1alpha1().Apps(app.Namespace).Get(
583+
context.Background(), app.Name, metav1.GetOptions{})
584+
if err != nil {
585+
return nil, err
586+
}
587+
}
588+
589+
return nil, fmt.Errorf("updating app after 5 retries: %s", lastErr)
590+
}

pkg/packageinstall/packageinstall_test.go

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"github.com/stretchr/testify/assert"
2020
"github.com/stretchr/testify/require"
2121
corev1 "k8s.io/api/core/v1"
22+
"k8s.io/apimachinery/pkg/api/errors"
2223
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2324
"k8s.io/apimachinery/pkg/runtime"
2425
"k8s.io/apimachinery/pkg/runtime/schema"
@@ -988,3 +989,256 @@ func generatePackageWithConstraints(name, version, kcConstraint string, k8sConst
988989
},
989990
}
990991
}
992+
993+
func Test_UpdateAppWithRetry_HandlesConflictErrors(t *testing.T) {
994+
log := logf.Log.WithName("kc")
995+
996+
model := &pkgingv1alpha1.PackageInstall{
997+
ObjectMeta: metav1.ObjectMeta{
998+
Name: "instl-pkg",
999+
},
1000+
Spec: pkgingv1alpha1.PackageInstallSpec{
1001+
ServiceAccountName: "default-ns-sa",
1002+
PackageRef: &pkgingv1alpha1.PackageRef{
1003+
RefName: "pkg.test.carvel.dev",
1004+
VersionSelection: &versions.VersionSelectionSemver{
1005+
Constraints: "1.0.0",
1006+
},
1007+
},
1008+
},
1009+
}
1010+
1011+
existingApp := &v1alpha1.App{
1012+
ObjectMeta: metav1.ObjectMeta{
1013+
Name: "instl-pkg",
1014+
Namespace: "default",
1015+
ResourceVersion: "1",
1016+
},
1017+
Spec: v1alpha1.AppSpec{
1018+
ServiceAccountName: "old-sa",
1019+
},
1020+
}
1021+
1022+
fakekctrl := fakekappctrl.NewSimpleClientset(existingApp)
1023+
fakek8s := fake.NewSimpleClientset()
1024+
1025+
updateAttempts := 0
1026+
conflictCount := 3 // Fail first 3 attempts with conflict, succeed on 4th
1027+
1028+
// Add reactor to simulate conflict errors on update
1029+
fakekctrl.PrependReactor("update", "apps", func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) {
1030+
updateAttempts++
1031+
updateAction := action.(k8stesting.UpdateAction)
1032+
app := updateAction.GetObject().(*v1alpha1.App)
1033+
1034+
if updateAttempts <= conflictCount {
1035+
// Simulate conflict error for first few attempts
1036+
return true, nil, errors.NewConflict(schema.GroupResource{Group: "kappctrl.carvel.dev", Resource: "apps"}, "instl-pkg", fmt.Errorf("the object has been modified; please apply your changes to the latest version and try again"))
1037+
}
1038+
1039+
// Succeed on final attempt
1040+
app.ResourceVersion = fmt.Sprintf("%d", updateAttempts)
1041+
return false, app, nil
1042+
})
1043+
1044+
// Add reactor to simulate fresh fetch on Get (after conflict)
1045+
getFreshCount := 0
1046+
fakekctrl.PrependReactor("get", "apps", func(_ k8stesting.Action) (handled bool, ret runtime.Object, err error) {
1047+
getFreshCount++
1048+
freshApp := existingApp.DeepCopy()
1049+
freshApp.ResourceVersion = fmt.Sprintf("fresh-%d", getFreshCount)
1050+
return true, freshApp, nil
1051+
})
1052+
1053+
ip := NewPackageInstallCR(model, log, fakekctrl, nil, fakek8s,
1054+
FakeComponentInfo{KCVersion: semver.MustParse("0.42.31337")}, Opts{},
1055+
metrics.NewMetrics())
1056+
1057+
updatedApp, err := ip.updateAppWithRetry(existingApp, func(app *v1alpha1.App) (*v1alpha1.App, error) {
1058+
app.Spec.ServiceAccountName = "new-sa"
1059+
return app, nil
1060+
})
1061+
1062+
// Verify the retry logic worked
1063+
assert.Nil(t, err, "updateAppWithRetry should succeed after retries")
1064+
assert.NotNil(t, updatedApp, "should return updated app")
1065+
assert.Equal(t, "new-sa", updatedApp.Spec.ServiceAccountName, "should have applied the transformation")
1066+
assert.Equal(t, conflictCount+1, updateAttempts, "should have attempted update 4 times (3 conflicts + 1 success)")
1067+
assert.Equal(t, conflictCount, getFreshCount, "should have fetched fresh app 3 times after conflicts")
1068+
}
1069+
1070+
func Test_UpdateAppWithRetry_HandlesNotFoundError(t *testing.T) {
1071+
log := logf.Log.WithName("kc")
1072+
1073+
// Create a PackageInstall
1074+
model := &pkgingv1alpha1.PackageInstall{
1075+
ObjectMeta: metav1.ObjectMeta{
1076+
Name: "instl-pkg",
1077+
},
1078+
Spec: pkgingv1alpha1.PackageInstallSpec{
1079+
ServiceAccountName: "default-ns-sa",
1080+
PackageRef: &pkgingv1alpha1.PackageRef{
1081+
RefName: "pkg.test.carvel.dev",
1082+
VersionSelection: &versions.VersionSelectionSemver{
1083+
Constraints: "1.0.0",
1084+
},
1085+
},
1086+
},
1087+
}
1088+
1089+
// Create an existing App
1090+
existingApp := &v1alpha1.App{
1091+
ObjectMeta: metav1.ObjectMeta{
1092+
Name: "instl-pkg",
1093+
Namespace: "default",
1094+
ResourceVersion: "1",
1095+
},
1096+
Spec: v1alpha1.AppSpec{
1097+
ServiceAccountName: "old-sa",
1098+
},
1099+
}
1100+
1101+
// Create fake clients
1102+
fakekctrl := fakekappctrl.NewSimpleClientset()
1103+
fakek8s := fake.NewSimpleClientset()
1104+
1105+
// Track update attempts
1106+
updateAttempts := 0
1107+
1108+
// Add reactor to simulate NotFound error on update
1109+
fakekctrl.PrependReactor("update", "apps", func(_ k8stesting.Action) (handled bool, ret runtime.Object, err error) {
1110+
updateAttempts++
1111+
// Simulate NotFound error
1112+
return true, nil, errors.NewNotFound(schema.GroupResource{Group: "kappctrl.carvel.dev", Resource: "apps"}, "instl-pkg")
1113+
})
1114+
1115+
ip := NewPackageInstallCR(model, log, fakekctrl, nil, fakek8s,
1116+
FakeComponentInfo{KCVersion: semver.MustParse("0.42.31337")}, Opts{},
1117+
metrics.NewMetrics())
1118+
1119+
// Test the updateAppWithRetry function
1120+
updatedApp, err := ip.updateAppWithRetry(existingApp, func(app *v1alpha1.App) (*v1alpha1.App, error) {
1121+
// Simple transformation: update service account name
1122+
app.Spec.ServiceAccountName = "new-sa"
1123+
return app, nil
1124+
})
1125+
1126+
// Verify NotFound error is returned immediately (no retries)
1127+
assert.NotNil(t, err, "should return error")
1128+
assert.Contains(t, err.Error(), "not found", "should preserve NotFound error")
1129+
assert.Nil(t, updatedApp, "should not return updated app")
1130+
assert.Equal(t, 1, updateAttempts, "should only attempt update once for NotFound error")
1131+
}
1132+
1133+
func Test_UpdateAppWithRetry_HandlesUpdateFunctionError(t *testing.T) {
1134+
log := logf.Log.WithName("kc")
1135+
1136+
// Create a PackageInstall
1137+
model := &pkgingv1alpha1.PackageInstall{
1138+
ObjectMeta: metav1.ObjectMeta{
1139+
Name: "instl-pkg",
1140+
},
1141+
Spec: pkgingv1alpha1.PackageInstallSpec{
1142+
ServiceAccountName: "default-ns-sa",
1143+
PackageRef: &pkgingv1alpha1.PackageRef{
1144+
RefName: "pkg.test.carvel.dev",
1145+
VersionSelection: &versions.VersionSelectionSemver{
1146+
Constraints: "1.0.0",
1147+
},
1148+
},
1149+
},
1150+
}
1151+
1152+
// Create an existing App
1153+
existingApp := &v1alpha1.App{
1154+
ObjectMeta: metav1.ObjectMeta{
1155+
Name: "instl-pkg",
1156+
Namespace: "default",
1157+
ResourceVersion: "1",
1158+
},
1159+
Spec: v1alpha1.AppSpec{
1160+
ServiceAccountName: "old-sa",
1161+
},
1162+
}
1163+
1164+
// Create fake clients
1165+
fakekctrl := fakekappctrl.NewSimpleClientset(existingApp)
1166+
fakek8s := fake.NewSimpleClientset()
1167+
1168+
ip := NewPackageInstallCR(model, log, fakekctrl, nil, fakek8s,
1169+
FakeComponentInfo{KCVersion: semver.MustParse("0.42.31337")}, Opts{},
1170+
metrics.NewMetrics())
1171+
1172+
// Test the updateAppWithRetry function with failing update function
1173+
updatedApp, err := ip.updateAppWithRetry(existingApp, func(_ *v1alpha1.App) (*v1alpha1.App, error) {
1174+
return nil, fmt.Errorf("update function failed")
1175+
})
1176+
1177+
// Verify update function error is handled properly
1178+
assert.NotNil(t, err, "should return error")
1179+
assert.Contains(t, err.Error(), "update function failed", "should preserve update function error")
1180+
assert.Nil(t, updatedApp, "should not return updated app")
1181+
}
1182+
1183+
func Test_UpdateAppWithRetry_HandlesNonConflictError(t *testing.T) {
1184+
log := logf.Log.WithName("kc")
1185+
1186+
// Create a PackageInstall
1187+
model := &pkgingv1alpha1.PackageInstall{
1188+
ObjectMeta: metav1.ObjectMeta{
1189+
Name: "instl-pkg",
1190+
},
1191+
Spec: pkgingv1alpha1.PackageInstallSpec{
1192+
ServiceAccountName: "default-ns-sa",
1193+
PackageRef: &pkgingv1alpha1.PackageRef{
1194+
RefName: "pkg.test.carvel.dev",
1195+
VersionSelection: &versions.VersionSelectionSemver{
1196+
Constraints: "1.0.0",
1197+
},
1198+
},
1199+
},
1200+
}
1201+
1202+
// Create an existing App
1203+
existingApp := &v1alpha1.App{
1204+
ObjectMeta: metav1.ObjectMeta{
1205+
Name: "instl-pkg",
1206+
Namespace: "default",
1207+
ResourceVersion: "1",
1208+
},
1209+
Spec: v1alpha1.AppSpec{
1210+
ServiceAccountName: "old-sa",
1211+
},
1212+
}
1213+
1214+
// Create fake clients
1215+
fakekctrl := fakekappctrl.NewSimpleClientset(existingApp)
1216+
fakek8s := fake.NewSimpleClientset()
1217+
1218+
// Track update attempts
1219+
updateAttempts := 0
1220+
1221+
// Add reactor to simulate non-conflict error on update (e.g., validation error)
1222+
fakekctrl.PrependReactor("update", "apps", func(_ k8stesting.Action) (handled bool, ret runtime.Object, err error) {
1223+
updateAttempts++
1224+
// Simulate a validation error (non-conflict error)
1225+
return true, nil, fmt.Errorf("validation failed: invalid spec")
1226+
})
1227+
1228+
ip := NewPackageInstallCR(model, log, fakekctrl, nil, fakek8s,
1229+
FakeComponentInfo{KCVersion: semver.MustParse("0.42.31337")}, Opts{},
1230+
metrics.NewMetrics())
1231+
1232+
// Test the updateAppWithRetry function
1233+
updatedApp, err := ip.updateAppWithRetry(existingApp, func(app *v1alpha1.App) (*v1alpha1.App, error) {
1234+
// Simple transformation: update service account name
1235+
app.Spec.ServiceAccountName = "new-sa"
1236+
return app, nil
1237+
})
1238+
1239+
// Verify non-conflict error is returned immediately (no retries)
1240+
assert.NotNil(t, err, "should return error")
1241+
assert.Contains(t, err.Error(), "validation failed", "should preserve non-conflict error")
1242+
assert.Nil(t, updatedApp, "should not return updated app")
1243+
assert.Equal(t, 1, updateAttempts, "should only attempt update once for non-conflict error")
1244+
}

0 commit comments

Comments
 (0)