Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Propagate VR condition error message to protected pvc conditions #1639

Merged
merged 4 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 62 additions & 29 deletions internal/controller/vrg_volrep.go
Original file line number Diff line number Diff line change
Expand Up @@ -1430,20 +1430,27 @@ func (v *VRGInstance) checkVRStatus(pvc *corev1.PersistentVolumeClaim, volRep *v
//
// We handle 3 cases:
// - Primary deleted VRG: If Validated condition exists and false, the VR will never complete and can be
// deleted safely. Otherwise Completed condition is checked.
// - Primary VRG: Completed condition is checked.
// deleted safely.
// - Primary VRG: Validated condition is checked, and if successful the Completed conditions is also checked.
// - Secondary VRG: Completed, Degraded and Resyncing conditions are checked and ensured healthy.
func (v *VRGInstance) validateVRStatus(pvc *corev1.PersistentVolumeClaim, volRep *volrep.VolumeReplication,
state ramendrv1alpha1.ReplicationState,
) bool {
// Check validated for primary during VRG deletion.
if state == ramendrv1alpha1.Primary && rmnutil.ResourceIsDeleted(v.instance) {
validated, ok := v.validateVRValidatedStatus(volRep)
if !validated && ok {
v.log.Info(fmt.Sprintf("VolumeReplication %s/%s failed validation and can be deleted",
volRep.GetName(), volRep.GetNamespace()))
// If primary, check the validated condition.
if state == ramendrv1alpha1.Primary {
validated, condState := v.validateVRValidatedStatus(pvc, volRep)
if !validated && condState != conditionMissing {
// If the condition is known, this VR will never complete since it failed initial validation.
if condState == conditionKnown {
v.log.Info(fmt.Sprintf("VolumeReplication %s/%s failed validation and can be deleted",
volRep.GetName(), volRep.GetNamespace()))

// If the VRG is deleted the VR has reached the desired state.
return rmnutil.ResourceIsDeleted(v.instance)
}

return true
// The condition is stale or unknown so we need to check again later.
return false
}
}

Expand All @@ -1467,19 +1474,25 @@ func (v *VRGInstance) validateVRStatus(pvc *corev1.PersistentVolumeClaim, volRep
return true
}

// validateVRValidatedStatus validates that VolumeReplicaion resource was validated.
// Return 2 booleans
// validateVRValidatedStatus validates that VolumeReplication resource was validated.
// Returns 2 values:
// - validated: true if the condition is true, otherwise false
// - ok: true if the check was succeesfull, false if the condition is missing, stale, or unknown.
// - state: condition state
func (v *VRGInstance) validateVRValidatedStatus(
pvc *corev1.PersistentVolumeClaim,
volRep *volrep.VolumeReplication,
) (bool, bool) {
conditionMet, errorMsg := isVRConditionMet(volRep, volrep.ConditionValidated, metav1.ConditionTrue)
if errorMsg != "" {
v.log.Info(fmt.Sprintf("%s (VolRep: %s/%s)", errorMsg, volRep.GetName(), volRep.GetNamespace()))
) (bool, conditionState) {
conditionMet, condState, errorMsg := isVRConditionMet(volRep, volrep.ConditionValidated, metav1.ConditionTrue)
if !conditionMet && condState != conditionMissing {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would still log the errorMsg when the condState is conditionMissing

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One issue, since we log the defaultMsg (like we do for other conditions), we will log:

 "VolumeReplication resource not validated"

Which is confusing when using csi-addons < 0.10.0. For example when we Completed=True, which means that everything is good. So I think the best way is to keep the current behavior, log only if the condition exists.

defaultMsg := "VolumeReplication resource not validated"
v.updatePVCDataReadyConditionHelper(pvc.Namespace, pvc.Name, VRGConditionReasonError, errorMsg,
defaultMsg)
v.updatePVCDataProtectedConditionHelper(pvc.Namespace, pvc.Name, VRGConditionReasonError, errorMsg,
defaultMsg)
v.log.Info(fmt.Sprintf("%s (VolRep: %s/%s)", defaultMsg, volRep.GetName(), volRep.GetNamespace()))
}

return conditionMet, errorMsg == ""
return conditionMet, condState
}

// validateVRCompletedStatus validates if the VolumeReplication resource Completed condition is met and update
Expand All @@ -1502,7 +1515,7 @@ func (v *VRGInstance) validateVRCompletedStatus(pvc *corev1.PersistentVolumeClai
action = "demoted"
}

conditionMet, msg := isVRConditionMet(volRep, volrep.ConditionCompleted, metav1.ConditionTrue)
conditionMet, _, msg := isVRConditionMet(volRep, volrep.ConditionCompleted, metav1.ConditionTrue)
if !conditionMet {
defaultMsg := fmt.Sprintf("VolumeReplication resource for pvc not %s to %s", action, stateString)
v.updatePVCDataReadyConditionHelper(pvc.Namespace, pvc.Name, VRGConditionReasonError, msg,
Expand Down Expand Up @@ -1539,12 +1552,12 @@ func (v *VRGInstance) validateAdditionalVRStatusForSecondary(pvc *corev1.Persist
) bool {
v.updatePVCLastSyncCounters(pvc.Namespace, pvc.Name, nil)

conditionMet, _ := isVRConditionMet(volRep, volrep.ConditionResyncing, metav1.ConditionTrue)
conditionMet, _, _ := isVRConditionMet(volRep, volrep.ConditionResyncing, metav1.ConditionTrue)
if !conditionMet {
return v.checkResyncCompletionAsSecondary(pvc, volRep)
}

conditionMet, msg := isVRConditionMet(volRep, volrep.ConditionDegraded, metav1.ConditionTrue)
conditionMet, _, msg := isVRConditionMet(volRep, volrep.ConditionDegraded, metav1.ConditionTrue)
if !conditionMet {
defaultMsg := "VolumeReplication resource for pvc is not in Degraded condition while resyncing"
v.updatePVCDataProtectedConditionHelper(pvc.Namespace, pvc.Name, VRGConditionReasonError, msg,
Expand Down Expand Up @@ -1573,7 +1586,7 @@ func (v *VRGInstance) validateAdditionalVRStatusForSecondary(pvc *corev1.Persist
func (v *VRGInstance) checkResyncCompletionAsSecondary(pvc *corev1.PersistentVolumeClaim,
volRep *volrep.VolumeReplication,
) bool {
conditionMet, msg := isVRConditionMet(volRep, volrep.ConditionResyncing, metav1.ConditionFalse)
conditionMet, _, msg := isVRConditionMet(volRep, volrep.ConditionResyncing, metav1.ConditionFalse)
if !conditionMet {
defaultMsg := "VolumeReplication resource for pvc not syncing as Secondary"
v.updatePVCDataReadyConditionHelper(pvc.Namespace, pvc.Name, VRGConditionReasonError, msg,
Expand All @@ -1587,7 +1600,7 @@ func (v *VRGInstance) checkResyncCompletionAsSecondary(pvc *corev1.PersistentVol
return false
}

conditionMet, msg = isVRConditionMet(volRep, volrep.ConditionDegraded, metav1.ConditionFalse)
conditionMet, _, msg = isVRConditionMet(volRep, volrep.ConditionDegraded, metav1.ConditionFalse)
if !conditionMet {
defaultMsg := "VolumeReplication resource for pvc is not syncing and is degraded as Secondary"
v.updatePVCDataReadyConditionHelper(pvc.Namespace, pvc.Name, VRGConditionReasonError, msg,
Expand All @@ -1611,35 +1624,55 @@ func (v *VRGInstance) checkResyncCompletionAsSecondary(pvc *corev1.PersistentVol
return true
}

// isVRConditionMet returns true if the condition is met, and an error mesage if we could not get the
// condition value.
type conditionState string

const (
conditionMissing = conditionState("missing")
conditionStale = conditionState("stale")
conditionUnknown = conditionState("unknown")
conditionKnown = conditionState("known")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

while reviewing, I got confused mixing conditionKnown and conditionUnknown. I think it is better to rename conditionKnown to conditionExists

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

conditionExists is confusing with conditionMissing, but I can find a term that different from Unknown.

)

// isVRConditionMet check if condition is met.
// Returns 3 values:
// - met: true if the condition status matches the desired status, otherwise false
// - state: one of (conditionMissing, conditionStale, conditionUnknown, conditionKnown)
// generation, and its value is not unknown.
// - errorMsg: error message describing why the condition is not met
func isVRConditionMet(volRep *volrep.VolumeReplication,
conditionType string,
desiredStatus metav1.ConditionStatus,
) (bool, string) {
) (bool, conditionState, string) {
met := true

volRepCondition := findCondition(volRep.Status.Conditions, conditionType)
if volRepCondition == nil {
errorMsg := fmt.Sprintf("Failed to get the %s condition from status of VolumeReplication resource.",
conditionType)

return false, errorMsg
return !met, conditionMissing, errorMsg
}

if volRep.GetGeneration() != volRepCondition.ObservedGeneration {
errorMsg := fmt.Sprintf("Stale generation for condition %s from status of VolumeReplication resource.",
conditionType)

return false, errorMsg
return !met, conditionStale, errorMsg
}

if volRepCondition.Status == metav1.ConditionUnknown {
errorMsg := fmt.Sprintf("Unknown status for condition %s from status of VolumeReplication resource.",
conditionType)

return false, errorMsg
return !met, conditionUnknown, errorMsg
}

if volRepCondition.Status != desiredStatus {
// csi-addons > 0.10.0 returns detailed error message
return !met, conditionKnown, volRepCondition.Message
}

return volRepCondition.Status == desiredStatus, ""
return met, conditionKnown, ""
}

// Disabling unparam linter as currently every invokation of this
Expand Down
52 changes: 49 additions & 3 deletions internal/controller/vrg_volrep_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,19 @@ var _ = Describe("VolumeReplicationGroupVolRepController", func() {
It("simulate VR with failed validation", func() {
vrgDeleteFailedVR.promoteVolRepsWithOptions(promoteOptions{ValidatedFailed: true})
})
It("propagate VR condition message to protected pvc conditions", func() {
vrName := vrgDeleteFailedVR.pvcNames[0]
vr := volrep.VolumeReplication{}
Expect(k8sClient.Get(context.TODO(), vrName, &vr)).To(Succeed())
validated := meta.FindStatusCondition(vr.Status.Conditions, volrep.ConditionValidated)
Expect(validated).NotTo(BeNil())
vrgDeleteFailedVR.waitForProtectedPVCCondition(
vrName,
vrgController.VRGConditionTypeDataReady,
metav1.ConditionFalse,
validated.Message,
)
})
It("VRG can be deleted", func() {
By("deleting the VRG")
vrg := vrgDeleteFailedVR.getVRG()
Expand Down Expand Up @@ -2368,7 +2381,7 @@ func (v *vrgTest) promoteVolRepsAndDo(options promoteOptions, do func(int, int))

if options.ValidatedFailed {
volRepStatus.State = volrep.UnknownState
volRepStatus.Message = "precondition failed ..."
volRepStatus.Message = "failed to meet prerequisite: details..."
}

volRep.Status = volRepStatus
Expand All @@ -2388,6 +2401,9 @@ func (v *vrgTest) promoteVolRepsAndDo(options promoteOptions, do func(int, int))
v.waitForVolRepCondition(volrepKey, volrep.ConditionValidated, metav1.ConditionFalse)
}
} else {
if !options.ValidatedMissing {
v.waitForVolRepCondition(volrepKey, volrep.ConditionValidated, metav1.ConditionTrue)
}
v.waitForVolRepCondition(volrepKey, volrep.ConditionCompleted, metav1.ConditionTrue)
v.waitForProtectedPVCs(volrepKey)
}
Expand All @@ -2404,15 +2420,17 @@ func (v *vrgTest) generateVRConditions(generation int64, options promoteOptions)
if !options.ValidatedMissing {
validated := metav1.Condition{
Type: volrep.ConditionValidated,
Reason: volrep.PrerequisiteNotMet,
Reason: volrep.PrerequisiteMet,
ObservedGeneration: generation,
Status: metav1.ConditionFalse,
Status: metav1.ConditionTrue,
LastTransitionTime: lastTransitionTime,
Message: "volume is validated",
}

if options.ValidatedFailed {
validated.Status = metav1.ConditionFalse
validated.Reason = volrep.PrerequisiteNotMet
validated.Message = "failed to meet prerequisite: details..."
}

conditions = append(conditions, validated)
Expand All @@ -2424,11 +2442,13 @@ func (v *vrgTest) generateVRConditions(generation int64, options promoteOptions)
ObservedGeneration: generation,
Status: metav1.ConditionTrue,
LastTransitionTime: lastTransitionTime,
Message: "volume is completed",
}

if options.ValidatedFailed {
completed.Status = metav1.ConditionFalse
completed.Reason = volrep.FailedToPromote
completed.Message = "failed to promote"
}

degraded := metav1.Condition{
Expand All @@ -2437,13 +2457,15 @@ func (v *vrgTest) generateVRConditions(generation int64, options promoteOptions)
ObservedGeneration: generation,
Status: metav1.ConditionFalse,
LastTransitionTime: lastTransitionTime,
Message: "volume is healthy",
}
resyncing := metav1.Condition{
Type: volrep.ConditionResyncing,
Reason: volrep.NotResyncing,
ObservedGeneration: generation,
Status: metav1.ConditionFalse,
LastTransitionTime: lastTransitionTime,
Message: "volume is not resyncing",
}

return append(conditions, completed, degraded, resyncing)
Expand Down Expand Up @@ -2523,6 +2545,30 @@ func (v *vrgTest) waitForVolRepCondition(
"failed to wait for volRep condition %q to become %q", conditionType, conditionStatus)
}

func (v *vrgTest) waitForProtectedPVCCondition(
key types.NamespacedName,
conditionType string,
conditionStatus metav1.ConditionStatus,
conditionMessage string,
) {
Eventually(func() bool {
vrg := v.getVRG()
protectedPVC := vrgController.FindProtectedPVC(vrg, key.Namespace, key.Name)
if protectedPVC == nil {
return false
}

condition := meta.FindStatusCondition(protectedPVC.Conditions, conditionType)
if condition == nil {
return false
}

return condition.Status == conditionStatus && condition.Message == conditionMessage
}, vrgtimeout, vrginterval).Should(BeTrue(),
"failed to wait for protected pvc condition %q to become %q with message %q",
conditionType, conditionStatus, conditionMessage)
}

func (v *vrgTest) waitForProtectedPVCs(vrNamespacedName types.NamespacedName) {
Eventually(func() bool {
vrg := v.getVRG()
Expand Down