From 4ead0f3a17ed452d55f2fc0b08b6252495254b31 Mon Sep 17 00:00:00 2001 From: "newton@alisx.com" Date: Thu, 22 Aug 2024 18:45:20 +0300 Subject: [PATCH] fix(spanner): ensure failure of column_metadata table update doesn't cause an apply failure --- internal/spanner/services/services.go | 7 ++-- internal/spanner/services/utils.go | 12 ++++++- internal/utils/retry.go | 44 +++++++++++++++++++++++ internal/utils/retry_test.go | 51 +++++++++++++++++++++++++++ 4 files changed, 111 insertions(+), 3 deletions(-) create mode 100644 internal/utils/retry.go create mode 100644 internal/utils/retry_test.go diff --git a/internal/spanner/services/services.go b/internal/spanner/services/services.go index ddf2557..87f045b 100644 --- a/internal/spanner/services/services.go +++ b/internal/spanner/services/services.go @@ -1127,7 +1127,9 @@ func (s *SpannerService) CreateSpannerTable(ctx context.Context, parent string, } if err := UpdateColumnMetadata(db, tableId, table.Schema.Columns); err != nil { - return nil, status.Errorf(codes.Internal, "Error updating column metadata table: %v", err) + // This is not a fatal error, so we log it and continue + tfLogger.Error(ctx, fmt.Sprintf("Error updating column metadata table: %v", err)) + //return nil, status.Errorf(codes.Internal, "Error updating column metadata table: %v", err) } // Get created table @@ -1726,7 +1728,8 @@ func (s *SpannerService) UpdateSpannerTable(ctx context.Context, table *SpannerT } if err := UpdateColumnMetadata(db, tableId, table.Schema.Columns); err != nil { - return nil, status.Errorf(codes.Internal, "Error updating column metadata table: %v", err) + // This is not a fatal error, so we log it and continue + tfLogger.Error(ctx, fmt.Sprintf("Error updating column metadata table: %v", err)) } return table, nil diff --git a/internal/spanner/services/utils.go b/internal/spanner/services/utils.go index 89d6ff7..d897c23 100644 --- a/internal/spanner/services/utils.go +++ b/internal/spanner/services/utils.go @@ -141,7 +141,17 @@ func GetColumnMetadata(db *gorm.DB, tableName string) ([]*ColumnMetadata, error) } func UpdateColumnMetadata(db *gorm.DB, tableName string, columns []*SpannerTableColumn) error { // Create or Update ColumnMetadata table - if err := db.AutoMigrate(&ColumnMetadata{}); err != nil { + // IMPORTANT: When tables don't depend on each other, terraform will attempt to create them in parallel. + // This can cause the migration to run at the same time for multiple tables, which can lead to a duplicate table error. + // To prevent this, we'll retry the migration a few times. + _, err := utils.Retry(5, 10*time.Second, func() (interface{}, error) { + if err := db.AutoMigrate(&ColumnMetadata{}); err != nil { + return nil, err + } + + return nil, nil + }) + if err != nil { return err } diff --git a/internal/utils/retry.go b/internal/utils/retry.go new file mode 100644 index 0000000..a3852e8 --- /dev/null +++ b/internal/utils/retry.go @@ -0,0 +1,44 @@ +package utils + +import ( + "math/rand" + "time" +) + +// Retry is a utility function to retry a function a number of times with exponential backoff +// and jitter. It will return the result of the function if it succeeds, or the last error if +// it fails. +// +// If the error is a Stop, it will return the original error for later checking. +func Retry[R interface{}](attempts int, initialSleep time.Duration, f func() (R, error)) (R, error) { + if res, err := f(); err != nil { + if s, ok := err.(Stop); ok { + // Return the original error for later checking + return res, s.error + } + + if attempts--; attempts > 0 { + // Calculate exponential backoff + sleep := initialSleep * (1 << uint(attempts)) + + // Add some randomness to prevent creating a Thundering Herd + jitter := time.Duration(rand.Int63n(int64(sleep))) + sleep = sleep + jitter/2 + + time.Sleep(sleep) + return Retry[R](attempts, initialSleep, f) + } + return res, err + } else { + return res, nil + } +} + +type Stop struct { + error +} + +// NonRetryableError is a utility function to return an error that will not be retried +func NonRetryableError(err error) Stop { + return Stop{err} +} diff --git a/internal/utils/retry_test.go b/internal/utils/retry_test.go new file mode 100644 index 0000000..0630e05 --- /dev/null +++ b/internal/utils/retry_test.go @@ -0,0 +1,51 @@ +package utils + +import ( + "fmt" + "reflect" + "testing" + "time" +) + +func TestRetry(t *testing.T) { + type args[R interface{}] struct { + attempts int + initialSleep time.Duration + f func() (R, error) + } + type testCase[R interface{}] struct { + name string + args args[R] + want R + wantErr bool + } + tests := []testCase[interface{}]{ + { + name: "Test Retry", + args: args[interface{}]{ + attempts: 2, + initialSleep: 5 * time.Second, + f: func() (interface{}, error) { + return "test", fmt.Errorf("error") + }, + }, + want: "test", + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tStart := time.Now() + got, err := Retry(tt.args.attempts, tt.args.initialSleep, tt.args.f) + tEnd := time.Now() + fmt.Println("Time taken: ", tEnd.Sub(tStart)) + if (err != nil) != tt.wantErr { + t.Errorf("Retry() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("Retry() got = %v, want %v", got, tt.want) + } + }) + } +}