Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: ability to abort an update when exceeding progressDeadlineSeconds #1397

Merged
merged 7 commits into from
Aug 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/features/specification.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ spec:
# Defaults to 600s
progressDeadlineSeconds: 600

# Whether to abort the update when ProgressDeadlineSeconds
# is exceeded if analysis or experiment is not used.
# Optional and default is false.
progressDeadlineAbort: false

# UTC timestamp in which a Rollout should sequentially restart all of
# its pods. Used by the `kubectl argo rollouts restart ROLLOUT` command.
# The controller will ensure all pods have a creationTimestamp greater
Expand Down
2 changes: 2 additions & 0 deletions manifests/crds/rollout-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ spec:
type: integer
paused:
type: boolean
progressDeadlineAbort:
type: boolean
progressDeadlineSeconds:
format: int32
type: integer
Expand Down
2 changes: 2 additions & 0 deletions manifests/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9759,6 +9759,8 @@ spec:
type: integer
paused:
type: boolean
progressDeadlineAbort:
type: boolean
progressDeadlineSeconds:
format: int32
type: integer
Expand Down
2 changes: 2 additions & 0 deletions manifests/namespace-install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9759,6 +9759,8 @@ spec:
type: integer
paused:
type: boolean
progressDeadlineAbort:
type: boolean
progressDeadlineSeconds:
format: int32
type: integer
Expand Down
4 changes: 4 additions & 0 deletions pkg/apiclient/rollout/rollout.swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -1185,6 +1185,10 @@
"format": "int32",
"description": "ProgressDeadlineSeconds The maximum time in seconds for a rollout to\nmake progress before it is considered to be failed. Argo Rollouts will\ncontinue to process failed rollouts and a condition with a\nProgressDeadlineExceeded reason will be surfaced in the rollout status.\nNote that progress will not be estimated during the time a rollout is paused.\nDefaults to 600s."
},
"progressDeadlineAbort": {
"type": "boolean",
"title": "ProgressDeadlineAbort is whether to abort the update when ProgressDeadlineSeconds\nis exceeded if analysis is not used. Default is false.\n+optional"
},
"restartAt": {
"$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time",
"title": "RestartAt indicates when all the pods of a Rollout should be restarted"
Expand Down
4 changes: 4 additions & 0 deletions pkg/apis/rollouts/v1alpha1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ type RolloutSpec struct {
// Note that progress will not be estimated during the time a rollout is paused.
// Defaults to 600s.
ProgressDeadlineSeconds *int32 `json:"progressDeadlineSeconds,omitempty" protobuf:"varint,8,opt,name=progressDeadlineSeconds"`
// ProgressDeadlineAbort is whether to abort the update when ProgressDeadlineSeconds
// is exceeded if analysis is not used. Default is false.
// +optional
ProgressDeadlineAbort bool `json:"progressDeadlineAbort,omitempty" protobuf:"varint,12,opt,name=progressDeadlineAbort"`
// RestartAt indicates when all the pods of a Rollout should be restarted
RestartAt *metav1.Time `json:"restartAt,omitempty" protobuf:"bytes,9,opt,name=restartAt"`
// Analysis configuration for the analysis runs to retain
Expand Down
61 changes: 61 additions & 0 deletions rollout/bluegreen_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,67 @@ func TestBlueGreenSetPreviewService(t *testing.T) {
f.verifyPatchedService(servicePatch, rsPodHash, "")
}

// TestBlueGreenProgressDeadlineAbort tests aborting an update if it is timeout
func TestBlueGreenProgressDeadlineAbort(t *testing.T) {
// Two cases to be tested:
// 1. the rollout is making progress, but timeout just happens
// 2. the rollout is not making progress due to timeout and the rollout spec
// is changed to set ProgressDeadlineAbort
tests := []bool{true, false}

var runRolloutProgressDeadlineAbort func(isTimeout bool)
runRolloutProgressDeadlineAbort = func(isTimeout bool) {
f := newFixture(t)
defer f.Close()

r := newBlueGreenRollout("foo", 1, nil, "active", "preview")
progressDeadlineSeconds := int32(1)
r.Spec.ProgressDeadlineSeconds = &progressDeadlineSeconds
r.Spec.ProgressDeadlineAbort = true

f.rolloutLister = append(f.rolloutLister, r)
f.objects = append(f.objects, r)

rs := newReplicaSetWithStatus(r, 1, 1)
r.Status.UpdatedReplicas = 1
r.Status.ReadyReplicas = 1
r.Status.AvailableReplicas = 1

rsPodHash := rs.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]

var progressingTimeoutCond *v1alpha1.RolloutCondition
if isTimeout {
msg := fmt.Sprintf("ReplicaSet %q has timed out progressing.", "foo-"+rsPodHash)
progressingTimeoutCond = conditions.NewRolloutCondition(v1alpha1.RolloutProgressing, corev1.ConditionFalse, conditions.TimedOutReason, msg)
} else {
progressingTimeoutCond = conditions.NewRolloutCondition(v1alpha1.RolloutProgressing, corev1.ConditionTrue, conditions.TimedOutReason, conditions.TimedOutReason)
}
conditions.SetRolloutCondition(&r.Status, *progressingTimeoutCond)

r.Status.BlueGreen.ActiveSelector = rsPodHash
r.Status.BlueGreen.PreviewSelector = rsPodHash

previewSvc := newService("preview", 80, nil, r)
selector := map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: rsPodHash}
activeSvc := newService("active", 80, selector, r)
f.kubeobjects = append(f.kubeobjects, previewSvc, activeSvc)
f.serviceLister = append(f.serviceLister, previewSvc, activeSvc)

f.kubeobjects = append(f.kubeobjects, rs)
f.replicaSetLister = append(f.replicaSetLister, rs)

f.expectPatchServiceAction(previewSvc, rsPodHash)
patchIndex := f.expectPatchRolloutAction(r)
f.run(getKey(r, t))

f.verifyPatchedRolloutAborted(patchIndex, "foo-"+rsPodHash)
}

for _, tc := range tests {
runRolloutProgressDeadlineAbort(tc)
}
}

//TestSetServiceManagedBy ensures the managed by annotation is set in the service is set
func TestSetServiceManagedBy(t *testing.T) {
f := newFixture(t)
Expand Down
15 changes: 15 additions & 0 deletions rollout/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -877,6 +877,21 @@ func (f *fixture) verifyPatchedService(index int, newPodHash string, managedBy s
assert.Equal(f.t, patch, string(patchAction.GetPatch()))
}

func (f *fixture) verifyPatchedRolloutAborted(index int, rsName string) {
action := filterInformerActions(f.kubeclient.Actions())[index]
_, ok := action.(core.PatchAction)
if !ok {
assert.Fail(f.t, "Expected Patch action, not %s", action.GetVerb())
}

ro := f.getPatchedRolloutAsObject(index)
assert.NotNil(f.t, ro)
assert.True(f.t, ro.Status.Abort)
assert.Equal(f.t, v1alpha1.RolloutPhaseDegraded, ro.Status.Phase)
expectedMsg := fmt.Sprintf("ProgressDeadlineExceeded: ReplicaSet %q has timed out progressing.", rsName)
assert.Equal(f.t, expectedMsg, ro.Status.Message)
}

func (f *fixture) verifyPatchedAnalysisRun(index int, ar *v1alpha1.AnalysisRun) bool {
action := filterInformerActions(f.client.Actions())[index]
patchAction, ok := action.(core.PatchAction)
Expand Down
18 changes: 17 additions & 1 deletion rollout/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -648,8 +648,24 @@ func (c *rolloutContext) calculateRolloutConditions(newStatus v1alpha1.RolloutSt
if c.newRS != nil {
msg = fmt.Sprintf(conditions.ReplicaSetTimeOutMessage, c.newRS.Name)
}

condition := conditions.NewRolloutCondition(v1alpha1.RolloutProgressing, corev1.ConditionFalse, conditions.TimedOutReason, msg)
conditions.SetRolloutCondition(&newStatus, *condition)
condChanged := conditions.SetRolloutCondition(&newStatus, *condition)

// If condition is changed and ProgressDeadlineAbort is set, abort the update
if condChanged {
if c.rollout.Spec.ProgressDeadlineAbort {
c.pauseContext.AddAbort(msg)
c.recorder.Warnf(c.rollout, record.EventOptions{EventReason: conditions.RolloutAbortedReason}, msg)
}
} else {
// Although condition is unchanged, ProgressDeadlineAbort can be set after
// an existing update timeout. In this case if update is not aborted, we need to abort.
if c.rollout.Spec.ProgressDeadlineAbort && c.pauseContext != nil && !c.pauseContext.IsAborted() {
c.pauseContext.AddAbort(msg)
c.recorder.Warnf(c.rollout, record.EventOptions{EventReason: conditions.RolloutAbortedReason}, msg)
}
}
}
}

Expand Down
71 changes: 71 additions & 0 deletions test/e2e/functional_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,77 @@ spec:
ExpectReplicaCounts(1, 2, 1, 1, 1)
}

// TestBlueGreenAbortExceedProgressDeadline verifies the AbortExceedProgressDeadline feature
func (s *FunctionalSuite) TestBlueGreenExceedProgressDeadlineAbort() {
s.Given().
RolloutObjects(newService("bluegreen-scaledowndelay-active")).
RolloutObjects(`
apiVersion: argoproj.io/v1alpha1
kind: Rollout
metadata:
name: bluegreen-scaledowndelay
spec:
replicas: 1
strategy:
blueGreen:
activeService: bluegreen-scaledowndelay-active
abortScaleDownDelaySeconds: 2
selector:
matchLabels:
app: bluegreen-scaledowndelay
template:
metadata:
labels:
app: bluegreen-scaledowndelay
spec:
containers:
- name: bluegreen-scaledowndelay
image: nginx:1.19-alpine
resources:
requests:
memory: 16Mi
cpu: 1m
`).
When().
ApplyManifests().
WaitForRolloutStatus("Healthy").
PatchSpec(`
spec:
progressDeadlineAbort: false
progressDeadlineSeconds: 1
template:
spec:
containers:
- name: bad2good
image: nginx:1.19-alpine-argo-error
command: null`).
WaitForRolloutStatus("Degraded").
Sleep(3*time.Second).
Then().
ExpectRevisionPodCount("2", 1).
ExpectRollout("Abort=False", func(r *v1alpha1.Rollout) bool {
return r.Status.Abort == false
}).
When().
PatchSpec(`
spec:
progressDeadlineAbort: true
progressDeadlineSeconds: 1
template:
spec:
containers:
- name: bad2good
image: nginx:1.19-alpine-argo-error
command: null`).
WaitForRolloutStatus("Degraded").
Sleep(3*time.Second).
Then().
ExpectRevisionPodCount("2", 0).
ExpectRollout("Abort=True", func(r *v1alpha1.Rollout) bool {
return r.Status.Abort == true && len(r.Status.Conditions) == 3
})
}

// TestBlueGreenScaleDownOnAbort verifies the scaleDownOnAbort feature
func (s *FunctionalSuite) TestBlueGreenScaleDownOnAbort() {
s.Given().
Expand Down
4 changes: 2 additions & 2 deletions utils/conditions/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,8 @@ func GetRolloutCondition(status v1alpha1.RolloutStatus, condType v1alpha1.Rollou
}

// SetRolloutCondition updates the rollout to include the provided condition. If the condition that
// we are about to add already exists and has the same status and reason then we are not going to update.
// Returns true if the condition was updated
// we are about to add already exists and has the same status and reason, then we are not going to update
// by returning false. Returns true if the condition was updated
func SetRolloutCondition(status *v1alpha1.RolloutStatus, condition v1alpha1.RolloutCondition) bool {
currentCond := GetRolloutCondition(*status, condition.Type)
if currentCond != nil && currentCond.Status == condition.Status && currentCond.Reason == condition.Reason {
Expand Down