Skip to content

Commit b15c262

Browse files
(fix) catalog deletion resilience support
Enables installed extensions to continue working when their source catalog becomes unavailable or is deleted. When resolution fails due to catalog unavailability, the operator now continues reconciling with the currently installed bundle instead of failing. Changes: - Resolution falls back to installed bundle when catalog unavailable - Unpacking skipped when maintaining current installed state - Helm and Boxcutter appliers handle nil contentFS gracefully - Version upgrades properly blocked without catalog access This ensures workloads remain stable and operational even when the catalog they were installed from is temporarily unavailable or deleted, while appropriately preventing version changes that require catalog access.
1 parent 0cf8c11 commit b15c262

File tree

3 files changed

+95
-11
lines changed

3 files changed

+95
-11
lines changed

internal/operator-controller/applier/boxcutter.go

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -317,22 +317,34 @@ func (bc *Boxcutter) createOrUpdate(ctx context.Context, obj client.Object) erro
317317
}
318318

319319
func (bc *Boxcutter) apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels, revisionAnnotations map[string]string) (bool, string, error) {
320-
// Generate desired revision
321-
desiredRevision, err := bc.RevisionGenerator.GenerateRevision(ctx, contentFS, ext, objectLabels, revisionAnnotations)
320+
// List all existing revisions
321+
existingRevisions, err := bc.getExistingRevisions(ctx, ext.GetName())
322322
if err != nil {
323323
return false, "", err
324324
}
325325

326-
if err := controllerutil.SetControllerReference(ext, desiredRevision, bc.Scheme); err != nil {
327-
return false, "", fmt.Errorf("set ownerref: %w", err)
326+
// If contentFS is nil, we're maintaining the current state without catalog access.
327+
// In this case, we should use the existing installed revision without generating a new one.
328+
if contentFS == nil {
329+
if len(existingRevisions) == 0 {
330+
return false, "", fmt.Errorf("no bundle content available and no existing revisions found")
331+
}
332+
// Use the most recent revision and just reconcile it (don't create a new one)
333+
// Boxcutter's ClusterExtensionRevision reconciliation will handle maintaining the resources
334+
// Return success since we're maintaining the current state
335+
return true, "", nil
328336
}
329337

330-
// List all existing revisions
331-
existingRevisions, err := bc.getExistingRevisions(ctx, ext.GetName())
338+
// Generate desired revision
339+
desiredRevision, err := bc.RevisionGenerator.GenerateRevision(ctx, contentFS, ext, objectLabels, revisionAnnotations)
332340
if err != nil {
333341
return false, "", err
334342
}
335343

344+
if err := controllerutil.SetControllerReference(ext, desiredRevision, bc.Scheme); err != nil {
345+
return false, "", fmt.Errorf("set ownerref: %w", err)
346+
}
347+
336348
currentRevision := &ocv1.ClusterExtensionRevision{}
337349
state := StateNeedsInstall
338350
// check if we can update the current revision.

internal/operator-controller/applier/helm.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,16 @@ func (h *Helm) runPreAuthorizationChecks(ctx context.Context, ext *ocv1.ClusterE
103103
}
104104

105105
func (h *Helm) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels map[string]string, storageLabels map[string]string) (bool, string, error) {
106+
// If contentFS is nil, we're maintaining the current state without catalog access.
107+
// In this case, reconcile the existing Helm release if it exists.
108+
if contentFS == nil {
109+
ac, err := h.ActionClientGetter.ActionClientFor(ctx, ext)
110+
if err != nil {
111+
return false, "", err
112+
}
113+
return h.reconcileExistingRelease(ctx, ac, ext)
114+
}
115+
106116
chrt, err := h.buildHelmChart(contentFS, ext)
107117
if err != nil {
108118
return false, "", err
@@ -197,6 +207,40 @@ func (h *Helm) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExte
197207
return true, "", nil
198208
}
199209

210+
// reconcileExistingRelease reconciles an existing Helm release without catalog access.
211+
// This is used when the catalog is unavailable but we need to maintain the current installation.
212+
// It reconciles the release and sets up watchers to ensure resources are maintained.
213+
func (h *Helm) reconcileExistingRelease(ctx context.Context, ac helmclient.ActionInterface, ext *ocv1.ClusterExtension) (bool, string, error) {
214+
rel, err := ac.Get(ext.GetName())
215+
if errors.Is(err, driver.ErrReleaseNotFound) {
216+
return false, "", fmt.Errorf("no bundle content available and no existing release found")
217+
}
218+
if err != nil {
219+
return false, "", fmt.Errorf("getting current release: %w", err)
220+
}
221+
222+
// Reconcile the existing release to ensure resources are maintained
223+
if err := ac.Reconcile(rel); err != nil {
224+
return false, "", err
225+
}
226+
227+
// Watch the release objects to ensure they're maintained
228+
relObjects, err := util.ManifestObjects(strings.NewReader(rel.Manifest), fmt.Sprintf("%s-release-manifest", rel.Name))
229+
if err != nil {
230+
return true, "", err
231+
}
232+
klog.FromContext(ctx).Info("watching managed objects")
233+
cache, err := h.Manager.Get(ctx, ext)
234+
if err != nil {
235+
return true, "", err
236+
}
237+
if err := cache.Watch(ctx, h.Watcher, relObjects...); err != nil {
238+
return true, "", err
239+
}
240+
241+
return true, "", nil
242+
}
243+
200244
func (h *Helm) buildHelmChart(bundleFS fs.FS, ext *ocv1.ClusterExtension) (*chart.Chart, error) {
201245
if h.HelmChartProvider == nil {
202246
return nil, errors.New("HelmChartProvider is nil")

internal/operator-controller/controllers/clusterextension_reconcile_steps.go

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,7 @@ func ResolveBundle(r resolve.Resolver) ReconcileStepFunc {
9595
}
9696
resolvedBundle, resolvedBundleVersion, resolvedDeprecation, err := r.Resolve(ctx, ext, bm)
9797
if err != nil {
98-
// Note: We don't distinguish between resolution-specific errors and generic errors
99-
setStatusProgressing(ext, err)
100-
setInstalledStatusFromRevisionStates(ext, state.revisionStates)
101-
ensureAllConditionsWithReason(ext, ocv1.ReasonFailed, err.Error())
102-
return nil, err
98+
return handleResolutionError(ctx, state, ext, err)
10399
}
104100

105101
// set deprecation status after _successful_ resolution
@@ -134,9 +130,41 @@ func ResolveBundle(r resolve.Resolver) ReconcileStepFunc {
134130
}
135131
}
136132

133+
// handleResolutionError handles the case when bundle resolution fails.
134+
// If a bundle is already installed, we fall back to using it to maintain the current state.
135+
// This enables workload resilience when the catalog becomes unavailable.
136+
func handleResolutionError(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension, err error) (*ctrl.Result, error) {
137+
l := log.FromContext(ctx)
138+
139+
// If we have an installed bundle, fall back to it to maintain current state
140+
if state.revisionStates.Installed != nil {
141+
l.Info("resolution failed but continuing with installed bundle", "error", err, "installedBundle", state.revisionStates.Installed.BundleMetadata)
142+
setStatusProgressing(ext, err)
143+
setInstalledStatusFromRevisionStates(ext, state.revisionStates)
144+
state.resolvedRevisionMetadata = state.revisionStates.Installed
145+
return nil, nil
146+
}
147+
148+
// No installed bundle and resolution failed - cannot proceed
149+
setStatusProgressing(ext, err)
150+
setInstalledStatusFromRevisionStates(ext, state.revisionStates)
151+
ensureAllConditionsWithReason(ext, ocv1.ReasonFailed, err.Error())
152+
return nil, err
153+
}
154+
137155
func UnpackBundle(i imageutil.Puller, cache imageutil.Cache) ReconcileStepFunc {
138156
return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) {
139157
l := log.FromContext(ctx)
158+
159+
// Skip unpacking if we're using an already-installed bundle
160+
// (e.g., when catalog is unavailable but we're maintaining current state)
161+
if state.revisionStates.Installed != nil &&
162+
state.resolvedRevisionMetadata.BundleMetadata == state.revisionStates.Installed.BundleMetadata {
163+
l.Info("skipping unpack - using installed bundle content")
164+
// imageFS will remain nil - the applier will use the existing installed content
165+
return nil, nil
166+
}
167+
140168
l.Info("unpacking resolved bundle")
141169
imageFS, _, _, err := i.Pull(ctx, ext.GetName(), state.resolvedRevisionMetadata.Image, cache)
142170
if err != nil {

0 commit comments

Comments
 (0)