-
Couldn't load subscription status.
- Fork 68
🌱 ClusterExtensionRevision conditions #2281
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
perdasilva
wants to merge
5
commits into
operator-framework:main
Choose a base branch
from
perdasilva:revision-conditions
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Changes from 2 commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
5b2d1da
initial thoughts
b43d206
WIP Introduce Progressing condition
pedjak cc490bf
Add ClusterExtensionRevision MarkAs(Progressing|NotProgressing|Availa…
pedjak 8c97bd7
Add ability to play with startup/readines/liveness probes in test-ope…
pedjak 16c6ada
wip: propagate conditions from CER to cluster extension
pedjak File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -126,36 +126,51 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev | |
| // Reconcile | ||
| // | ||
| if err := c.ensureFinalizer(ctx, rev, clusterExtensionRevisionTeardownFinalizer); err != nil { | ||
| meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ | ||
| Type: ocv1.ClusterExtensionRevisionTypeAvailable, | ||
| Status: metav1.ConditionFalse, | ||
| Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure, | ||
| Message: err.Error(), | ||
| ObservedGeneration: rev.Generation, | ||
| }) | ||
| return ctrl.Result{}, fmt.Errorf("error ensuring teardown finalizer: %v", err) | ||
| } | ||
|
|
||
| if err := c.establishWatch(ctx, rev, revision); err != nil { | ||
| inRollout := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable) == nil | ||
| if inRollout { | ||
| if err := c.establishWatch(ctx, rev, revision); err != nil { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should establishWatch just be idempotent and we call it whether were rolling out or not? |
||
| werr := fmt.Errorf("establish watch: %v", err) | ||
| meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ | ||
| Type: ocv1.ClusterExtensionRevisionTypeProgressing, | ||
| Status: metav1.ConditionTrue, | ||
| Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure, | ||
| Message: werr.Error(), | ||
| ObservedGeneration: rev.Generation, | ||
| }) | ||
| return ctrl.Result{}, werr | ||
| } | ||
| meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ | ||
| Type: ocv1.ClusterExtensionRevisionTypeAvailable, | ||
| Status: metav1.ConditionFalse, | ||
| Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure, | ||
| Message: err.Error(), | ||
| Type: ocv1.ClusterExtensionRevisionTypeProgressing, | ||
| Status: metav1.ConditionTrue, | ||
| Reason: ocv1.ClusterExtensionRevisionReasonRolloutInProgress, | ||
| Message: "Revision is being rolled out.", | ||
| ObservedGeneration: rev.Generation, | ||
| }) | ||
| return ctrl.Result{}, fmt.Errorf("establish watch: %v", err) | ||
| } | ||
|
|
||
| rres, err := c.RevisionEngine.Reconcile(ctx, *revision, opts...) | ||
| if err != nil { | ||
| meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ | ||
| Type: ocv1.ClusterExtensionRevisionTypeAvailable, | ||
| Status: metav1.ConditionFalse, | ||
| Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure, | ||
| Message: err.Error(), | ||
| ObservedGeneration: rev.Generation, | ||
| }) | ||
| if inRollout { | ||
| meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ | ||
| Type: ocv1.ClusterExtensionRevisionTypeProgressing, | ||
| Status: metav1.ConditionTrue, | ||
| Reason: ocv1.ClusterExtensionRevisionReasonRolloutError, | ||
| Message: err.Error(), | ||
| ObservedGeneration: rev.Generation, | ||
| }) | ||
| } else { | ||
| // it is a probably transient error, and we do not know if the revision is available or not | ||
| meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ | ||
| Type: ocv1.ClusterExtensionRevisionTypeAvailable, | ||
| Status: metav1.ConditionUnknown, | ||
| Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure, | ||
| Message: err.Error(), | ||
| ObservedGeneration: rev.Generation, | ||
| }) | ||
| } | ||
| return ctrl.Result{}, fmt.Errorf("revision reconcile: %v", err) | ||
| } | ||
| l.Info("reconcile report", "report", rres.String()) | ||
|
|
@@ -165,27 +180,51 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev | |
| if verr := rres.GetValidationError(); verr != nil { | ||
| l.Info("preflight error, retrying after 10s", "err", verr.String()) | ||
|
|
||
| meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ | ||
| Type: ocv1.ClusterExtensionRevisionTypeAvailable, | ||
| Status: metav1.ConditionFalse, | ||
| Reason: ocv1.ClusterExtensionRevisionReasonRevisionValidationFailure, | ||
| Message: fmt.Sprintf("revision validation error: %s", verr), | ||
| ObservedGeneration: rev.Generation, | ||
| }) | ||
| if inRollout { | ||
| // given that we retry, we are not going to keep Progressing condition True | ||
| meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ | ||
| Type: ocv1.ClusterExtensionRevisionTypeProgressing, | ||
| Status: metav1.ConditionTrue, | ||
| Reason: ocv1.ClusterExtensionRevisionReasonRevisionValidationFailure, | ||
| Message: fmt.Sprintf("revision validation error: %s", verr), | ||
| ObservedGeneration: rev.Generation, | ||
| }) | ||
| } else { | ||
| // it is a probably transient error, and we do not know if the revision is available or not | ||
| meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ | ||
| Type: ocv1.ClusterExtensionRevisionTypeAvailable, | ||
| Status: metav1.ConditionUnknown, | ||
| Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure, | ||
| Message: fmt.Sprintf("revision validation error: %s", verr), | ||
| ObservedGeneration: rev.Generation, | ||
| }) | ||
| } | ||
| return ctrl.Result{RequeueAfter: 10 * time.Second}, nil | ||
| } | ||
|
|
||
| for i, pres := range rres.GetPhases() { | ||
| if verr := pres.GetValidationError(); verr != nil { | ||
| l.Info("preflight error, retrying after 10s", "err", verr.String()) | ||
|
|
||
| meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ | ||
| Type: ocv1.ClusterExtensionRevisionTypeAvailable, | ||
| Status: metav1.ConditionFalse, | ||
| Reason: ocv1.ClusterExtensionRevisionReasonPhaseValidationError, | ||
| Message: fmt.Sprintf("phase %d validation error: %s", i, verr), | ||
| ObservedGeneration: rev.Generation, | ||
| }) | ||
| if inRollout { | ||
| // given that we retry, we are not going to keep Progressing condition True | ||
| meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ | ||
| Type: ocv1.ClusterExtensionRevisionTypeProgressing, | ||
| Status: metav1.ConditionTrue, | ||
| Reason: ocv1.ClusterExtensionRevisionReasonPhaseValidationError, | ||
| Message: fmt.Sprintf("phase %d validation error: %s", i, verr), | ||
| ObservedGeneration: rev.Generation, | ||
| }) | ||
| } else { | ||
| // it is a probably transient error, and we do not know if the revision is available or not | ||
| meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ | ||
| Type: ocv1.ClusterExtensionRevisionTypeAvailable, | ||
| Status: metav1.ConditionUnknown, | ||
| Reason: ocv1.ClusterExtensionRevisionReasonPhaseValidationError, | ||
| Message: fmt.Sprintf("phase %d validation error: %s", i, verr), | ||
| ObservedGeneration: rev.Generation, | ||
| }) | ||
| } | ||
| return ctrl.Result{RequeueAfter: 10 * time.Second}, nil | ||
| } | ||
|
|
||
|
|
@@ -198,18 +237,34 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev | |
|
|
||
| if len(collidingObjs) > 0 { | ||
| l.Info("object collision error, retrying after 10s", "collisions", collidingObjs) | ||
|
|
||
| meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ | ||
| Type: ocv1.ClusterExtensionRevisionTypeAvailable, | ||
| Status: metav1.ConditionFalse, | ||
| Reason: ocv1.ClusterExtensionRevisionReasonObjectCollisions, | ||
| Message: fmt.Sprintf("revision object collisions in phase %d\n%s", i, strings.Join(collidingObjs, "\n\n")), | ||
| ObservedGeneration: rev.Generation, | ||
| }) | ||
| return ctrl.Result{RequeueAfter: 10 * time.Second}, nil | ||
| // collisions are probably stickier than phase roll out probe failures - so we'd probably want to set | ||
| // Progressing to false here due to the collision | ||
| if inRollout { | ||
| meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ | ||
| Type: ocv1.ClusterExtensionRevisionTypeProgressing, | ||
| Status: metav1.ConditionFalse, | ||
| Reason: ocv1.ClusterExtensionRevisionReasonObjectCollisions, | ||
| Message: fmt.Sprintf("revision object collisions in phase %d\n%s", i, strings.Join(collidingObjs, "\n\n")), | ||
| ObservedGeneration: rev.Generation, | ||
| }) | ||
|
|
||
| // not sure if we want to retry here - collisions are probably not transient? | ||
| return ctrl.Result{RequeueAfter: 10 * time.Second}, nil | ||
| } | ||
| } | ||
| } | ||
|
|
||
| if !rres.InTransistion() { | ||
| // we have rolled out all objects in all phases, not interested in probes here | ||
| meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ | ||
| Type: ocv1.ClusterExtensionRevisionTypeProgressing, | ||
| Status: metav1.ConditionFalse, | ||
| Reason: ocv1.ClusterExtensionRevisionReasonRolledOut, | ||
| Message: "Revision is rolled out.", | ||
| ObservedGeneration: rev.Generation, | ||
| }) | ||
| } | ||
|
|
||
| //nolint:nestif | ||
| if rres.IsComplete() { | ||
| // Archive other revisions. | ||
|
|
@@ -223,37 +278,44 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev | |
| } | ||
| } | ||
|
|
||
| // Report status. | ||
| // It would be good to understand from Nico how we can distinguish between progression and availability probes | ||
| // and how to best check that all Availability probes are passing | ||
| meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ | ||
| Type: ocv1.ClusterExtensionRevisionTypeAvailable, | ||
| Status: metav1.ConditionTrue, | ||
| Reason: ocv1.ClusterExtensionRevisionReasonAvailable, | ||
| Message: "Object is available and passes all probes.", | ||
| Message: "Objects are available and passes all probes.", | ||
| ObservedGeneration: rev.Generation, | ||
| }) | ||
|
|
||
| // We'll probably only want to remove this once we are done updating the ClusterExtension conditions | ||
| // as its one of the interfaces between the revision and the extension. If we still have the Succeeded for now | ||
| // that's fine. | ||
| meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ | ||
| Type: ocv1.ClusterExtensionRevisionTypeSucceeded, | ||
| Status: metav1.ConditionTrue, | ||
| Reason: ocv1.ClusterExtensionRevisionReasonRolloutSuccess, | ||
| Message: "Revision succeeded rolling out.", | ||
| ObservedGeneration: rev.Generation, | ||
| }) | ||
| if !meta.IsStatusConditionTrue(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) { | ||
| meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ | ||
| Type: ocv1.ClusterExtensionRevisionTypeSucceeded, | ||
| Status: metav1.ConditionTrue, | ||
| Reason: ocv1.ClusterExtensionRevisionReasonRolloutSuccess, | ||
| Message: "Revision succeeded rolling out.", | ||
| ObservedGeneration: rev.Generation, | ||
| }) | ||
| } | ||
| } else { | ||
| var probeFailureMsgs []string | ||
| for _, pres := range rres.GetPhases() { | ||
| if pres.IsComplete() { | ||
| continue | ||
| } | ||
| for _, ores := range pres.GetObjects() { | ||
| // we probably want an AvailabilityProbeType and run through all of them independently of whether | ||
| // the revision is complete or not | ||
| pr := ores.Probes()[boxcutter.ProgressProbeType] | ||
| if pr.Success { | ||
| continue | ||
| } | ||
|
|
||
| obj := ores.Object() | ||
| gvk := obj.GetObjectKind().GroupVersionKind() | ||
| // I think these can be pretty large and verbose. We may want to | ||
| // work a little on the formatting...? | ||
| probeFailureMsgs = append(probeFailureMsgs, fmt.Sprintf( | ||
| "Object %s.%s %s/%s: %v", | ||
| gvk.Kind, gvk.GroupVersion().String(), | ||
|
|
@@ -280,17 +342,6 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev | |
| }) | ||
| } | ||
| } | ||
| if rres.InTransistion() { | ||
| meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ | ||
| Type: ocv1.TypeProgressing, | ||
| Status: metav1.ConditionTrue, | ||
| Reason: ocv1.ClusterExtensionRevisionReasonProgressing, | ||
| Message: "Rollout in progress.", | ||
| ObservedGeneration: rev.Generation, | ||
| }) | ||
| } else { | ||
| meta.RemoveStatusCondition(&rev.Status.Conditions, ocv1.TypeProgressing) | ||
| } | ||
|
|
||
| return ctrl.Result{}, nil | ||
| } | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I worry about looking at the status to drive the reconciliation logic here. We should probably look at the state of the objects themselves to make this determination, no? Or, is this just temporary and in lieu of the new boxcutter PR that should surface this state more easily?