Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 53 additions & 15 deletions components/operator/internal/handlers/sessions.go
Original file line number Diff line number Diff line change
Expand Up @@ -1234,6 +1234,39 @@ func CleanupExpiredTempContentPods() {

// copySecretToNamespace copies a secret to a target namespace with owner references
func copySecretToNamespace(ctx context.Context, sourceSecret *corev1.Secret, targetNamespace string, ownerObj *unstructured.Unstructured) error {
// Check if secret already exists in target namespace
existingSecret, err := config.K8sClient.CoreV1().Secrets(targetNamespace).Get(ctx, sourceSecret.Name, v1.GetOptions{})
secretExists := err == nil
if err != nil && !errors.IsNotFound(err) {
return fmt.Errorf("error checking for existing secret: %w", err)
}
Comment on lines +1237 to +1242
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Major Issue - Unnecessary Double Fetch

You're fetching the secret twice when it exists:

  1. Line 1238: Initial fetch into existingSecret
  2. Line 1306: Re-fetch in retry loop into currentSecret

The first fetch is only used to check for an existing controller (lines 1250-1256), then immediately discarded when entering the update path.

Recommendation: Remove the initial fetch and move the controller check into the retry loop:

func copySecretToNamespace(ctx context.Context, sourceSecret *corev1.Secret, targetNamespace string, ownerObj *unstructured.Unstructured) error {
    // Create owner reference structure (will set Controller later based on existing state)
    newOwnerRef := v1.OwnerReference{
        APIVersion: ownerObj.GetAPIVersion(),
        Kind:       ownerObj.GetKind(),
        Name:       ownerObj.GetName(),
        UID:        ownerObj.GetUID(),
    }

    // Create a new secret in the target namespace
    newSecret := &corev1.Secret{
        // ... (same as current)
    }

    // Try to create the secret first (optimistic path for new secrets)
    _, err := config.K8sClient.CoreV1().Secrets(targetNamespace).Create(ctx, newSecret, v1.CreateOptions{})
    if err == nil {
        return nil  // Success - secret created
    }
    
    if !errors.IsAlreadyExists(err) {
        return fmt.Errorf("error creating secret: %w", err)
    }

    // Secret exists, update it with retry logic
    log.Printf("Secret %s already exists in namespace %s, checking if update needed", sourceSecret.Name, targetNamespace)
    
    return retry.RetryOnConflict(retry.DefaultRetry, func() error {
        currentSecret, err := config.K8sClient.CoreV1().Secrets(targetNamespace).Get(ctx, sourceSecret.Name, v1.GetOptions{})
        if err != nil {
            return err
        }

        // Check if already has our owner reference
        for _, ownerRef := range currentSecret.OwnerReferences {
            if ownerRef.UID == ownerObj.GetUID() {
                log.Printf("Secret %s already has correct owner reference, skipping", sourceSecret.Name)
                return nil
            }
        }

        // Check for existing controller
        hasController := false
        for _, ownerRef := range currentSecret.OwnerReferences {
            if ownerRef.Controller != nil && *ownerRef.Controller {
                hasController = true
                break
            }
        }

        // Create owner ref based on current state
        ownerRefToAdd := newOwnerRef
        if !hasController {
            ownerRefToAdd.Controller = boolPtr(true)
        }

        // Apply updates
        currentSecret.OwnerReferences = append([]v1.OwnerReference{}, currentSecret.OwnerReferences...)
        currentSecret.OwnerReferences = append(currentSecret.OwnerReferences, ownerRefToAdd)
        currentSecret.Data = sourceSecret.Data
        if currentSecret.Annotations == nil {
            currentSecret.Annotations = make(map[string]string)
        }
        currentSecret.Annotations[types.CopiedFromAnnotation] = fmt.Sprintf("%s/%s", sourceSecret.Namespace, sourceSecret.Name)

        _, err = config.K8sClient.CoreV1().Secrets(targetNamespace).Update(ctx, currentSecret, v1.UpdateOptions{})
        return err
    })
}

Benefits:

  • Eliminates redundant API call (better performance)
  • Simpler control flow (create-first pattern)
  • Follows Kubernetes best practice of optimistic creation
  • Single source of truth for controller detection logic


// Determine if we should set Controller: true
// For shared secrets (like ambient-vertex), don't set Controller: true if secret already exists
// to avoid conflicts when multiple sessions use the same secret
shouldSetController := true
if secretExists {
Comment on lines +1244 to +1248
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Code Quality: Redundant logic - shouldSetController is recalculated

The shouldSetController variable is determined here based on the first fetch of existingSecret, but then the exact same check is performed again inside the retry loop (lines 1312-1318).

This creates two issues:

  1. Redundant code - the same logic is duplicated
  2. Potential inconsistency - the outer check uses existingSecret but the inner check uses currentSecret (refetched). If another process modifies the secret between these checks, they could give different results.

Recommendation: Remove the outer shouldSetController logic and only perform the check inside the retry loop where you have the most up-to-date data. The outer check is unnecessary since:

  • If the secret doesn't exist, the Create path is taken (line 1343)
  • If the secret exists, the retry loop always refetches (line 1306) and checks again (lines 1312-1318)

// Check if existing secret already has a controller reference
for _, ownerRef := range existingSecret.OwnerReferences {
if ownerRef.Controller != nil && *ownerRef.Controller {
shouldSetController = false
log.Printf("Secret %s already has a controller reference, adding non-controller reference instead", sourceSecret.Name)
break
}
}
}
Comment on lines +1244 to +1257
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Major Issue - Logging Without Context

The log message on line 1253 doesn't include the namespace, which makes debugging multi-tenant deployments difficult.

Current:

log.Printf("Secret %s already has a controller reference, adding non-controller reference instead", sourceSecret.Name)

Recommended:

log.Printf("Secret %s in namespace %s already has a controller reference, adding non-controller reference for session %s/%s", 
    sourceSecret.Name, targetNamespace, ownerObj.GetNamespace(), ownerObj.GetName())

This follows the pattern from line 1287 and provides critical debugging context. Per CLAUDE.md operator patterns: "Logging: Structured logs with relevant context (namespace, resource name, etc.)"


// Create owner reference
newOwnerRef := v1.OwnerReference{
APIVersion: ownerObj.GetAPIVersion(),
Kind: ownerObj.GetKind(),
Name: ownerObj.GetName(),
UID: ownerObj.GetUID(),
}
if shouldSetController {
newOwnerRef.Controller = boolPtr(true)
Comment on lines +1259 to +1267
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔵 Code Quality: newOwnerRef modified in two places

The newOwnerRef is created here with conditional Controller field, but then it's potentially modified again inside the retry loop (lines 1321-1324). This is confusing because:

  1. The outer newOwnerRef may have Controller: true set
  2. Then inside the retry loop, we create ownerRefToAdd := newOwnerRef (which copies it)
  3. Then we set ownerRefToAdd.Controller = nil if needed

This works, but creates unnecessary complexity. If you remove the redundant outer shouldSetController logic as suggested above, you can simplify this to just create a basic owner ref without the Controller field, then set it only inside the retry loop where you have the most current data.

}
Comment on lines +1260 to +1268
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Critical Issue - Controller Set Before Entering Update Path

There's a logical flaw here. You're setting Controller: true on newOwnerRef based on the initial check (line 1266-1268), but this reference is used when creating a new secret (line 1279).

The problem: When creating a brand new secret, it should ALWAYS have Controller: true since it's the first (and only) owner reference. The shouldSetController logic only makes sense for the update path.

Current flow:

  1. Check if secret exists and has controller → set shouldSetController
  2. Create newOwnerRef with or without Controller: true
  3. If secret exists → enter update path (which re-checks controller anyway!)
  4. If secret doesn't exist → create with newOwnerRef (might incorrectly have Controller: false/nil)

The bug: If you somehow get into a race where:

  • Initial check finds no secret (or finds secret without controller)
  • Sets shouldSetController = true
  • Secret is created by another goroutine WITH a controller
  • Your code tries to create, gets AlreadyExists error...
  • But wait, the create path doesn't handle AlreadyExists!

Current code at line 1343-1345:

// Create the secret
_, err = config.K8sClient.CoreV1().Secrets(targetNamespace).Create(ctx, newSecret, v1.CreateOptions{})
return err

If the secret was created between line 1238 and 1344, this will return an error instead of falling back to update logic!

Recommendation: Use the create-first pattern I suggested in my earlier comment. This eliminates all these issues.


// Create a new secret in the target namespace
newSecret := &corev1.Secret{
ObjectMeta: v1.ObjectMeta{
Expand All @@ -1243,22 +1276,13 @@ func copySecretToNamespace(ctx context.Context, sourceSecret *corev1.Secret, tar
Annotations: map[string]string{
types.CopiedFromAnnotation: fmt.Sprintf("%s/%s", sourceSecret.Namespace, sourceSecret.Name),
},
OwnerReferences: []v1.OwnerReference{
{
APIVersion: ownerObj.GetAPIVersion(),
Kind: ownerObj.GetKind(),
Name: ownerObj.GetName(),
UID: ownerObj.GetUID(),
Controller: boolPtr(true),
},
},
OwnerReferences: []v1.OwnerReference{newOwnerRef},
},
Type: sourceSecret.Type,
Data: sourceSecret.Data,
}

// Check if secret already exists in target namespace
if existingSecret, err := config.K8sClient.CoreV1().Secrets(targetNamespace).Get(ctx, sourceSecret.Name, v1.GetOptions{}); err == nil {
if secretExists {
// Secret already exists, check if it needs to be updated
log.Printf("Secret %s already exists in namespace %s, checking if update needed", sourceSecret.Name, targetNamespace)

Expand All @@ -1284,10 +1308,26 @@ func copySecretToNamespace(ctx context.Context, sourceSecret *corev1.Secret, tar
return err
}

// Check again if there's already a controller reference (may have changed since last check)
hasController := false
for _, ownerRef := range currentSecret.OwnerReferences {
if ownerRef.Controller != nil && *ownerRef.Controller {
hasController = true
break
}
}
Comment on lines +1311 to +1318
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Critical Issue - Race Condition with Controller Detection

There's a Time-of-Check-to-Time-of-Use (TOCTOU) race condition between the initial controller check (lines 1250-1256) and the retry loop check (lines 1312-1318).

Scenario:

  1. Thread A: Checks at line 1250, finds no controller, sets shouldSetController = true
  2. Thread B: Concurrently adds the first controller reference
  3. Thread A: Enters retry loop with newOwnerRef.Controller = true (from line 1267)
  4. Thread A: Re-checks at line 1312, finds controller, sets ownerRefToAdd.Controller = nil
  5. Result: Correct behavior due to the re-check!

However, the current code has a subtle bug:

ownerRefToAdd := newOwnerRef  // This is a struct copy
if hasController {
    ownerRefToAdd.Controller = nil  // Modifying the copy
}

This works correctly, but the logic is confusing because:

  • newOwnerRef.Controller might be true from line 1267
  • You're creating a copy and conditionally modifying it
  • The initial shouldSetController logic becomes redundant

Recommendation: Remove the duplicate controller detection logic and only check in the retry loop (see my previous comment for the simplified version). This eliminates the TOCTOU complexity entirely.


// Create a fresh owner reference based on current state
// If there's already a controller, don't set Controller: true for the new reference
ownerRefToAdd := newOwnerRef
if hasController {
ownerRefToAdd.Controller = nil
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔵 Minor Issue - Inconsistent Nil Pointer Handling

Setting Controller to nil explicitly is correct, but there's a subtle inconsistency:

  • When shouldSetController is false initially (lines 1247-1257), the Controller field is simply not set (zero value = nil)
  • When detected in the retry loop (line 1324), you explicitly set Controller = nil

For consistency and clarity, consider one of these approaches:

Option 1 (Recommended): Always explicit

if shouldSetController {
    newOwnerRef.Controller = boolPtr(true)
} else {
    newOwnerRef.Controller = nil  // Explicit nil for clarity
}

Option 2: Always omit

ownerRefToAdd := v1.OwnerReference{
    APIVersion: newOwnerRef.APIVersion,
    Kind:       newOwnerRef.Kind,
    Name:       newOwnerRef.Name,
    UID:        newOwnerRef.UID,
    // Controller intentionally omitted - will be nil
}
if !hasController {
    ownerRefToAdd.Controller = boolPtr(true)
}

The explicit approach (Option 1) makes the intent clearer to future maintainers.

}

// Apply updates
// Create a new slice to avoid mutating shared/cached data
currentSecret.OwnerReferences = append([]v1.OwnerReference{}, currentSecret.OwnerReferences...)
currentSecret.OwnerReferences = append(currentSecret.OwnerReferences, newSecret.OwnerReferences[0])
currentSecret.OwnerReferences = append(currentSecret.OwnerReferences, ownerRefToAdd)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔵 Minor: Consider checking for duplicate owner references

Before appending ownerRefToAdd, consider checking if an owner reference with the same UID already exists in currentSecret.OwnerReferences. While the outer check at lines 1290-1296 should prevent this in the normal case, race conditions could result in duplicate owner references if multiple processes try to add the same owner simultaneously.

Suggested addition:

// Check if owner reference already exists (race condition protection)
for _, existing := range currentSecret.OwnerReferences {
    if existing.UID == ownerRefToAdd.UID {
        log.Printf("Owner reference already exists for %s, skipping append", ownerRefToAdd.UID)
        return nil
    }
}
currentSecret.OwnerReferences = append(currentSecret.OwnerReferences, ownerRefToAdd)

Comment on lines 1327 to +1330
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Major Issue - Slice Copy Comment Misleading

The comment says "Create a new slice to avoid mutating shared/cached data", but this pattern doesn't fully protect against cache mutation.

Why: You're copying the slice header, but the underlying OwnerReference structs are value types (not pointers), so they're already copied. The real concern is mutating the slice itself (changing length/capacity), not the elements.

The actual risk: If currentSecret came from a cached informer, mutating its slice could cause issues. However, you're calling Get() directly (line 1306), which should return a fresh object, not a cached one.

Recommendations:

Option 1 (Current is actually fine): Keep it as-is, but clarify the comment:

// Create a new slice to avoid mutating the original slice structure
// (the OwnerReference values are already copied since they're structs)
currentSecret.OwnerReferences = append([]v1.OwnerReference{}, currentSecret.OwnerReferences...)

Option 2 (More defensive): Deep copy if you're paranoid:

// Defensive copy - append to empty slice to get fresh slice header
refs := make([]v1.OwnerReference, len(currentSecret.OwnerReferences))
copy(refs, currentSecret.OwnerReferences)
refs = append(refs, ownerRefToAdd)
currentSecret.OwnerReferences = refs

Option 3 (Simplest - recommended): Since you're getting a fresh object from the API, just append directly:

currentSecret.OwnerReferences = append(currentSecret.OwnerReferences, ownerRefToAdd)

The Kubernetes API returns fresh objects, not shared cache references, so the defensive copy is likely unnecessary unless you're using an informer cache elsewhere.

currentSecret.Data = sourceSecret.Data
if currentSecret.Annotations == nil {
currentSecret.Annotations = make(map[string]string)
Expand All @@ -1298,12 +1338,10 @@ func copySecretToNamespace(ctx context.Context, sourceSecret *corev1.Secret, tar
_, err = config.K8sClient.CoreV1().Secrets(targetNamespace).Update(ctx, currentSecret, v1.UpdateOptions{})
return err
})
} else if !errors.IsNotFound(err) {
return fmt.Errorf("error checking for existing secret: %w", err)
}

// Create the secret
_, err := config.K8sClient.CoreV1().Secrets(targetNamespace).Create(ctx, newSecret, v1.CreateOptions{})
_, err = config.K8sClient.CoreV1().Secrets(targetNamespace).Create(ctx, newSecret, v1.CreateOptions{})
return err
}

Expand Down
97 changes: 97 additions & 0 deletions components/operator/internal/handlers/sessions_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,103 @@ func TestCopySecretToNamespace_MultipleOwnerReferences(t *testing.T) {
}
}

// TestCopySecretToNamespace_ExistingController tests adding owner ref when secret already has a controller
func TestCopySecretToNamespace_ExistingController(t *testing.T) {
existingSecret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "ambient-vertex",
Namespace: "target-ns",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "vteam.ambient-code/v1alpha1",
Kind: "AgenticSession",
Name: "existing-session",
UID: k8stypes.UID("existing-uid-111"),
Controller: boolPtr(true), // Already has a controller
},
},
},
Type: corev1.SecretTypeOpaque,
Data: map[string][]byte{
"key": []byte("value"),
},
}

setupTestClient(existingSecret)

sourceSecret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "ambient-vertex",
Namespace: "source-ns",
},
Type: corev1.SecretTypeOpaque,
Data: map[string][]byte{
"key": []byte("updated-value"),
},
}

ownerObj := &unstructured.Unstructured{}
ownerObj.SetAPIVersion("vteam.ambient-code/v1alpha1")
ownerObj.SetKind("AgenticSession")
ownerObj.SetName("new-session")
ownerObj.SetUID(k8stypes.UID("new-uid-222"))

ctx := context.Background()
err := copySecretToNamespace(ctx, sourceSecret, "target-ns", ownerObj)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔵 Test Enhancement: Consider testing error case

The test only covers the happy path. Consider adding a subtest for when the controller check race condition occurs (where the retry loop finds a different controller than the outer check).

Also consider testing:

  1. What happens when the source secret data is identical to existing secret data
  2. Concurrent calls to copySecretToNamespace with the same owner
  3. Concurrent calls with different owners

if err != nil {
t.Fatalf("copySecretToNamespace failed: %v", err)
}

// Verify secret has both owner references
result, err := config.K8sClient.CoreV1().Secrets("target-ns").Get(ctx, "ambient-vertex", metav1.GetOptions{})
if err != nil {
t.Fatalf("Failed to get secret: %v", err)
}

if len(result.OwnerReferences) != 2 {
t.Fatalf("Expected 2 owner references, got %d", len(result.OwnerReferences))
}

// Verify only one controller reference exists
controllerCount := 0
foundExisting := false
foundNew := false
for _, ref := range result.OwnerReferences {
if ref.Controller != nil && *ref.Controller {
controllerCount++
}
if ref.UID == k8stypes.UID("existing-uid-111") {
foundExisting = true
// Original controller should still be true
if ref.Controller == nil || !*ref.Controller {
t.Error("Existing controller reference should still have Controller: true")
}
}
if ref.UID == k8stypes.UID("new-uid-222") {
foundNew = true
// New reference should NOT have Controller: true
if ref.Controller != nil && *ref.Controller {
t.Error("New owner reference should NOT have Controller: true when secret already has a controller")
}
}
}
Comment on lines +408 to +430
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔵 Minor Issue - Test Could Be More Robust

The test validation logic is good, but could be more explicit about what's being tested:

Current approach: Checks all owner refs in a single loop
Issue: If the test fails, the error messages don't clearly indicate which check failed

Suggested improvement:

// Verify only one controller reference exists
controllerRefs := []v1.OwnerReference{}
var existingRef, newRef *v1.OwnerReference

for i := range result.OwnerReferences {
    ref := &result.OwnerReferences[i]
    if ref.Controller != nil && *ref.Controller {
        controllerRefs = append(controllerRefs, *ref)
    }
    if ref.UID == k8stypes.UID("existing-uid-111") {
        existingRef = ref
    }
    if ref.UID == k8stypes.UID("new-uid-222") {
        newRef = ref
    }
}

// Clear, separated assertions
if len(controllerRefs) != 1 {
    t.Errorf("Expected exactly 1 controller reference, got %d: %+v", len(controllerRefs), controllerRefs)
}

if existingRef == nil {
    t.Fatal("Existing owner reference was lost")
}
if existingRef.Controller == nil || !*existingRef.Controller {
    t.Error("Existing controller reference should still have Controller: true")
}

if newRef == nil {
    t.Fatal("New owner reference was not added")
}
if newRef.Controller != nil && *newRef.Controller {
    t.Error("New owner reference should NOT have Controller: true when secret already has a controller")
}

Benefits:

  • Clearer failure messages (shows the actual controller refs)
  • Separated concerns (easier to debug which assertion failed)
  • Fatal vs Error used appropriately (can't check fields if ref is nil)


if controllerCount != 1 {
t.Errorf("Expected exactly 1 controller reference, got %d", controllerCount)
}
if !foundExisting {
t.Error("Existing owner reference was lost")
}
if !foundNew {
t.Error("New owner reference was not added")
}

// Verify data was updated
if string(result.Data["key"]) != "updated-value" {
t.Errorf("Expected data 'updated-value', got '%s'", string(result.Data["key"]))
}
Comment on lines +442 to +445
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

✅ Good Test Coverage

Excellent test assertion! This correctly verifies that the secret data is updated even when only adding an owner reference. This is important for shared secrets like ambient-vertex where the source secret might have been updated since the target namespace copy was created.

}
Comment on lines +351 to +446
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔵 Minor Issue - Missing Test Coverage

Great test for the controller conflict scenario! However, I notice some missing edge cases:

Missing test cases:

  1. Duplicate owner reference prevention: What happens if you call copySecretToNamespace twice with the same ownerObj? The code checks if ownerRef.UID == ownerObj.GetUID() (line 1292), but there's no test for this.

  2. Race condition during update: What happens if the secret is deleted between the existence check and the retry loop?

  3. Concurrent controller addition: What if two sessions try to add themselves as controller simultaneously (both see no controller initially)?

  4. Annotations preservation: When updating an existing secret, are existing annotations preserved (besides the CopiedFromAnnotation)?

  5. Labels preservation: When updating, are existing labels preserved?

Suggested additional test:

func TestCopySecretToNamespace_IdempotentOwnerRef(t *testing.T) {
    // Test that calling copySecretToNamespace twice with same owner doesn't duplicate refs
    sourceSecret := &corev1.Secret{ /* ... */ }
    ownerObj := &unstructured.Unstructured{ /* ... */ }
    
    // First call
    err := copySecretToNamespace(ctx, sourceSecret, "target-ns", ownerObj)
    if err != nil {
        t.Fatalf("First copy failed: %v", err)
    }
    
    // Second call with same owner
    err = copySecretToNamespace(ctx, sourceSecret, "target-ns", ownerObj)
    if err != nil {
        t.Fatalf("Second copy failed: %v", err)
    }
    
    result, _ := config.K8sClient.CoreV1().Secrets("target-ns").Get(ctx, sourceSecret.Name, metav1.GetOptions{})
    
    // Should still have only one owner reference
    if len(result.OwnerReferences) != 1 {
        t.Errorf("Expected 1 owner reference after duplicate call, got %d", len(result.OwnerReferences))
    }
}


// TestCopySecretToNamespace_NilAnnotations tests updating secret with nil annotations
func TestCopySecretToNamespace_NilAnnotations(t *testing.T) {
existingSecret := &corev1.Secret{
Expand Down
Loading