Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions api/v1/clusterextensionrevision_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@ const (
ClusterExtensionRevisionKind = "ClusterExtensionRevision"

// Condition Types
ClusterExtensionRevisionTypeAvailable = "Available"
ClusterExtensionRevisionTypeSucceeded = "Succeeded"
ClusterExtensionRevisionTypeAvailable = "Available"
ClusterExtensionRevisionTypeSucceeded = "Succeeded"
ClusterExtensionRevisionTypeProgressing = "Progressing"

// Condition Reasons
ClusterExtensionRevisionReasonAvailable = "Available"
Expand All @@ -40,6 +41,9 @@ const (
ClusterExtensionRevisionReasonIncomplete = "Incomplete"
ClusterExtensionRevisionReasonProgressing = "Progressing"
ClusterExtensionRevisionReasonArchived = "Archived"
ClusterExtensionRevisionReasonRolloutInProgress = "RollingOut"
ClusterExtensionRevisionReasonRolloutError = "RolloutError"
ClusterExtensionRevisionReasonRolledOut = "RolledOut"
)

// ClusterExtensionRevisionSpec defines the desired state of ClusterExtensionRevision.
Expand Down Expand Up @@ -150,6 +154,7 @@ type ClusterExtensionRevisionStatus struct {

// ClusterExtensionRevision is the Schema for the clusterextensionrevisions API
// +kubebuilder:printcolumn:name="Available",type=string,JSONPath=`.status.conditions[?(@.type=='Available')].status`
// +kubebuilder:printcolumn:name="Progressing",type=string,JSONPath=`.status.conditions[?(@.type=='Progressing')].status`
// +kubebuilder:printcolumn:name=Age,type=date,JSONPath=`.metadata.creationTimestamp`
type ClusterExtensionRevision struct {
metav1.TypeMeta `json:",inline"`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ spec:
- jsonPath: .status.conditions[?(@.type=='Available')].status
name: Available
type: string
- jsonPath: .status.conditions[?(@.type=='Progressing')].status
name: Progressing
type: string
- jsonPath: .metadata.creationTimestamp
name: Age
type: date
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,36 +126,51 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev
// Reconcile
//
if err := c.ensureFinalizer(ctx, rev, clusterExtensionRevisionTeardownFinalizer); err != nil {
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
Status: metav1.ConditionFalse,
Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure,
Message: err.Error(),
ObservedGeneration: rev.Generation,
})
return ctrl.Result{}, fmt.Errorf("error ensuring teardown finalizer: %v", err)
}

if err := c.establishWatch(ctx, rev, revision); err != nil {
inRollout := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable) == nil
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I worry about looking at the status to drive the reconciliation logic here. We should probably look at the state of the objects themselves to make this determination, no? Or, is this just temporary and in lieu of the new boxcutter PR that should surface this state more easily?

if inRollout {
if err := c.establishWatch(ctx, rev, revision); err != nil {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should establishWatch just be idempotent and we call it whether were rolling out or not?

werr := fmt.Errorf("establish watch: %v", err)
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
Status: metav1.ConditionTrue,
Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure,
Message: werr.Error(),
ObservedGeneration: rev.Generation,
})
return ctrl.Result{}, werr
}
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
Status: metav1.ConditionFalse,
Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure,
Message: err.Error(),
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
Status: metav1.ConditionTrue,
Reason: ocv1.ClusterExtensionRevisionReasonRolloutInProgress,
Message: "Revision is being rolled out.",
ObservedGeneration: rev.Generation,
})
return ctrl.Result{}, fmt.Errorf("establish watch: %v", err)
}

rres, err := c.RevisionEngine.Reconcile(ctx, *revision, opts...)
if err != nil {
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
Status: metav1.ConditionFalse,
Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure,
Message: err.Error(),
ObservedGeneration: rev.Generation,
})
if inRollout {
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
Status: metav1.ConditionTrue,
Reason: ocv1.ClusterExtensionRevisionReasonRolloutError,
Message: err.Error(),
ObservedGeneration: rev.Generation,
})
} else {
// it is a probably transient error, and we do not know if the revision is available or not
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
Status: metav1.ConditionUnknown,
Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure,
Message: err.Error(),
ObservedGeneration: rev.Generation,
})
}
return ctrl.Result{}, fmt.Errorf("revision reconcile: %v", err)
}
l.Info("reconcile report", "report", rres.String())
Expand All @@ -165,27 +180,51 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev
if verr := rres.GetValidationError(); verr != nil {
l.Info("preflight error, retrying after 10s", "err", verr.String())

meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
Status: metav1.ConditionFalse,
Reason: ocv1.ClusterExtensionRevisionReasonRevisionValidationFailure,
Message: fmt.Sprintf("revision validation error: %s", verr),
ObservedGeneration: rev.Generation,
})
if inRollout {
// given that we retry, we are not going to keep Progressing condition True
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
Status: metav1.ConditionTrue,
Reason: ocv1.ClusterExtensionRevisionReasonRevisionValidationFailure,
Message: fmt.Sprintf("revision validation error: %s", verr),
ObservedGeneration: rev.Generation,
})
} else {
// it is a probably transient error, and we do not know if the revision is available or not
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
Status: metav1.ConditionUnknown,
Reason: ocv1.ClusterExtensionRevisionReasonReconcileFailure,
Message: fmt.Sprintf("revision validation error: %s", verr),
ObservedGeneration: rev.Generation,
})
}
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
}

for i, pres := range rres.GetPhases() {
if verr := pres.GetValidationError(); verr != nil {
l.Info("preflight error, retrying after 10s", "err", verr.String())

meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
Status: metav1.ConditionFalse,
Reason: ocv1.ClusterExtensionRevisionReasonPhaseValidationError,
Message: fmt.Sprintf("phase %d validation error: %s", i, verr),
ObservedGeneration: rev.Generation,
})
if inRollout {
// given that we retry, we are not going to keep Progressing condition True
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
Status: metav1.ConditionTrue,
Reason: ocv1.ClusterExtensionRevisionReasonPhaseValidationError,
Message: fmt.Sprintf("phase %d validation error: %s", i, verr),
ObservedGeneration: rev.Generation,
})
} else {
// it is a probably transient error, and we do not know if the revision is available or not
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
Status: metav1.ConditionUnknown,
Reason: ocv1.ClusterExtensionRevisionReasonPhaseValidationError,
Message: fmt.Sprintf("phase %d validation error: %s", i, verr),
ObservedGeneration: rev.Generation,
})
}
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
}

Expand All @@ -198,18 +237,34 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev

if len(collidingObjs) > 0 {
l.Info("object collision error, retrying after 10s", "collisions", collidingObjs)

meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
Status: metav1.ConditionFalse,
Reason: ocv1.ClusterExtensionRevisionReasonObjectCollisions,
Message: fmt.Sprintf("revision object collisions in phase %d\n%s", i, strings.Join(collidingObjs, "\n\n")),
ObservedGeneration: rev.Generation,
})
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
// collisions are probably stickier than phase roll out probe failures - so we'd probably want to set
// Progressing to false here due to the collision
if inRollout {
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
Status: metav1.ConditionFalse,
Reason: ocv1.ClusterExtensionRevisionReasonObjectCollisions,
Message: fmt.Sprintf("revision object collisions in phase %d\n%s", i, strings.Join(collidingObjs, "\n\n")),
ObservedGeneration: rev.Generation,
})

// not sure if we want to retry here - collisions are probably not transient?
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
}
}
}

if !rres.InTransistion() {
// we have rolled out all objects in all phases, not interested in probes here
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
Status: metav1.ConditionFalse,
Reason: ocv1.ClusterExtensionRevisionReasonRolledOut,
Message: "Revision is rolled out.",
ObservedGeneration: rev.Generation,
})
}

//nolint:nestif
if rres.IsComplete() {
// Archive other revisions.
Expand All @@ -223,37 +278,44 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev
}
}

// Report status.
// It would be good to understand from Nico how we can distinguish between progression and availability probes
// and how to best check that all Availability probes are passing
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
Status: metav1.ConditionTrue,
Reason: ocv1.ClusterExtensionRevisionReasonAvailable,
Message: "Object is available and passes all probes.",
Message: "Objects are available and passes all probes.",
ObservedGeneration: rev.Generation,
})

// We'll probably only want to remove this once we are done updating the ClusterExtension conditions
// as its one of the interfaces between the revision and the extension. If we still have the Succeeded for now
// that's fine.
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
Type: ocv1.ClusterExtensionRevisionTypeSucceeded,
Status: metav1.ConditionTrue,
Reason: ocv1.ClusterExtensionRevisionReasonRolloutSuccess,
Message: "Revision succeeded rolling out.",
ObservedGeneration: rev.Generation,
})
if !meta.IsStatusConditionTrue(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) {
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
Type: ocv1.ClusterExtensionRevisionTypeSucceeded,
Status: metav1.ConditionTrue,
Reason: ocv1.ClusterExtensionRevisionReasonRolloutSuccess,
Message: "Revision succeeded rolling out.",
ObservedGeneration: rev.Generation,
})
}
} else {
var probeFailureMsgs []string
for _, pres := range rres.GetPhases() {
if pres.IsComplete() {
continue
}
for _, ores := range pres.GetObjects() {
// we probably want an AvailabilityProbeType and run through all of them independently of whether
// the revision is complete or not
pr := ores.Probes()[boxcutter.ProgressProbeType]
if pr.Success {
continue
}

obj := ores.Object()
gvk := obj.GetObjectKind().GroupVersionKind()
// I think these can be pretty large and verbose. We may want to
// work a little on the formatting...?
probeFailureMsgs = append(probeFailureMsgs, fmt.Sprintf(
"Object %s.%s %s/%s: %v",
gvk.Kind, gvk.GroupVersion().String(),
Expand All @@ -280,17 +342,6 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev
})
}
}
if rres.InTransistion() {
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
Type: ocv1.TypeProgressing,
Status: metav1.ConditionTrue,
Reason: ocv1.ClusterExtensionRevisionReasonProgressing,
Message: "Rollout in progress.",
ObservedGeneration: rev.Generation,
})
} else {
meta.RemoveStatusCondition(&rev.Status.Conditions, ocv1.TypeProgressing)
}

return ctrl.Result{}, nil
}
Expand Down
3 changes: 3 additions & 0 deletions manifests/experimental-e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,9 @@ spec:
- jsonPath: .status.conditions[?(@.type=='Available')].status
name: Available
type: string
- jsonPath: .status.conditions[?(@.type=='Progressing')].status
name: Progressing
type: string
- jsonPath: .metadata.creationTimestamp
name: Age
type: date
Expand Down
3 changes: 3 additions & 0 deletions manifests/experimental.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,9 @@ spec:
- jsonPath: .status.conditions[?(@.type=='Available')].status
name: Available
type: string
- jsonPath: .status.conditions[?(@.type=='Progressing')].status
name: Progressing
type: string
- jsonPath: .metadata.creationTimestamp
name: Age
type: date
Expand Down
Loading