From a17b4e4f1a192b1479be0fa1f2b4f2b56e27f93b Mon Sep 17 00:00:00 2001 From: Hui Kang Date: Thu, 5 Aug 2021 16:15:54 -0400 Subject: [PATCH] feat: scaledown rs when update exceeds progressDeadlineSeconds - Add AbortExceedProgressDeadline to rollout spec - when rollout doesn't use analysis, set AbortExceedProgressDeadline to true to scale down the new RS. - e2e test added Signed-off-by: Hui Kang --- docs/features/specification.md | 4 ++ manifests/crds/rollout-crd.yaml | 2 + manifests/install.yaml | 2 + manifests/namespace-install.yaml | 2 + pkg/apiclient/rollout/rollout.swagger.json | 4 ++ pkg/apis/rollouts/v1alpha1/types.go | 4 ++ .../v1alpha1/zz_generated.deepcopy.go | 5 ++ rollout/sync.go | 5 ++ test/e2e/functional_test.go | 65 +++++++++++++++++++ utils/defaults/defaults.go | 9 +++ utils/defaults/defaults_test.go | 13 ++++ 11 files changed, 115 insertions(+) diff --git a/docs/features/specification.md b/docs/features/specification.md index d719d74ca6..6715cd063e 100644 --- a/docs/features/specification.md +++ b/docs/features/specification.md @@ -60,6 +60,10 @@ spec: # Defaults to 600s progressDeadlineSeconds: 600 + # Whether to abort the update when ProgressDeadlineSeconds + # is exceeded if analysis is not used. Optional and default is false. + AbortExceedProgressDeadline: false + # UTC timestamp in which a Rollout should sequentially restart all of # its pods. Used by the `kubectl argo rollouts restart ROLLOUT` command. # The controller will ensure all pods have a creationTimestamp greater diff --git a/manifests/crds/rollout-crd.yaml b/manifests/crds/rollout-crd.yaml index 6c5cb0a944..e3073cb523 100644 --- a/manifests/crds/rollout-crd.yaml +++ b/manifests/crds/rollout-crd.yaml @@ -47,6 +47,8 @@ spec: type: object spec: properties: + abortExceedProgressDeadline: + type: boolean analysis: properties: successfulRunHistoryLimit: diff --git a/manifests/install.yaml b/manifests/install.yaml index 8c6b321ab2..099fc13880 100644 --- a/manifests/install.yaml +++ b/manifests/install.yaml @@ -9745,6 +9745,8 @@ spec: type: object spec: properties: + abortExceedProgressDeadline: + type: boolean analysis: properties: successfulRunHistoryLimit: diff --git a/manifests/namespace-install.yaml b/manifests/namespace-install.yaml index 2fc1caf4b0..2b160d815c 100644 --- a/manifests/namespace-install.yaml +++ b/manifests/namespace-install.yaml @@ -9745,6 +9745,8 @@ spec: type: object spec: properties: + abortExceedProgressDeadline: + type: boolean analysis: properties: successfulRunHistoryLimit: diff --git a/pkg/apiclient/rollout/rollout.swagger.json b/pkg/apiclient/rollout/rollout.swagger.json index 7657b2867c..f61e7703c0 100644 --- a/pkg/apiclient/rollout/rollout.swagger.json +++ b/pkg/apiclient/rollout/rollout.swagger.json @@ -1178,6 +1178,10 @@ "format": "int32", "description": "ProgressDeadlineSeconds The maximum time in seconds for a rollout to\nmake progress before it is considered to be failed. Argo Rollouts will\ncontinue to process failed rollouts and a condition with a\nProgressDeadlineExceeded reason will be surfaced in the rollout status.\nNote that progress will not be estimated during the time a rollout is paused.\nDefaults to 600s." }, + "abortExceedProgressDeadline": { + "type": "boolean", + "title": "AbortExceedProgressDeadline to whether abort the update when ProgressDeadlineSeconds\nis exceeded if analysis is not used. Default is false.\n+optional" + }, "restartAt": { "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time", "title": "RestartAt indicates when all the pods of a Rollout should be restarted" diff --git a/pkg/apis/rollouts/v1alpha1/types.go b/pkg/apis/rollouts/v1alpha1/types.go index bfd2475538..53d77aa03a 100644 --- a/pkg/apis/rollouts/v1alpha1/types.go +++ b/pkg/apis/rollouts/v1alpha1/types.go @@ -69,6 +69,10 @@ type RolloutSpec struct { // Note that progress will not be estimated during the time a rollout is paused. // Defaults to 600s. ProgressDeadlineSeconds *int32 `json:"progressDeadlineSeconds,omitempty" protobuf:"varint,8,opt,name=progressDeadlineSeconds"` + // AbortExceedProgressDeadline to whether abort the update when ProgressDeadlineSeconds + // is exceeded if analysis is not used. Default is false. + // +optional + AbortExceedProgressDeadline *bool `json:"abortExceedProgressDeadline,omitempty" protobuf:"varint,12,opt,name=abortExceedProgressDeadline"` // RestartAt indicates when all the pods of a Rollout should be restarted RestartAt *metav1.Time `json:"restartAt,omitempty" protobuf:"bytes,9,opt,name=restartAt"` // Analysis configuration for the analysis runs to retain diff --git a/pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go index aa31789c56..973d0de76f 100644 --- a/pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go @@ -1643,6 +1643,11 @@ func (in *RolloutSpec) DeepCopyInto(out *RolloutSpec) { *out = new(int32) **out = **in } + if in.AbortExceedProgressDeadline != nil { + in, out := &in.AbortExceedProgressDeadline, &out.AbortExceedProgressDeadline + *out = new(bool) + **out = **in + } if in.RestartAt != nil { in, out := &in.RestartAt, &out.RestartAt *out = (*in).DeepCopy() diff --git a/rollout/sync.go b/rollout/sync.go index d541ece498..f25b6d0151 100644 --- a/rollout/sync.go +++ b/rollout/sync.go @@ -617,6 +617,11 @@ func (c *rolloutContext) calculateRolloutConditions(newStatus v1alpha1.RolloutSt // Check for progress. Only do this if the latest rollout hasn't completed yet and it is not aborted if !isCompleteRollout && !isAborted { switch { + case conditions.RolloutTimedOut(c.rollout, &newStatus) && defaults.GetAbortExceedProgressDeadlineOrDefault(c.rollout): + msg := fmt.Sprintf(conditions.RolloutTimeOutMessage, c.rollout.Name) + c.pauseContext.AddAbort(msg) + abortCondition := conditions.NewRolloutCondition(v1alpha1.RolloutPaused, corev1.ConditionTrue, conditions.TimedOutReason, msg) + conditions.SetRolloutCondition(&newStatus, *abortCondition) case conditions.RolloutComplete(c.rollout, &newStatus): // Update the rollout conditions with a message for the new replica set that // was successfully deployed. If the condition already exists, we ignore this update. diff --git a/test/e2e/functional_test.go b/test/e2e/functional_test.go index fc381773f3..16b03f9465 100644 --- a/test/e2e/functional_test.go +++ b/test/e2e/functional_test.go @@ -885,6 +885,71 @@ spec: ExpectReplicaCounts(1, 2, 1, 1, 1) } +// TestBlueGreenAbortExceedProgressDeadline verifies the AbortExceedProgressDeadline feature +func (s *FunctionalSuite) TestBlueGreenAbortExceedProgressDeadline() { + s.Given(). + RolloutObjects(newService("bluegreen-scaledowndelay-active")). + RolloutObjects(` +apiVersion: argoproj.io/v1alpha1 +kind: Rollout +metadata: + name: bluegreen-scaledowndelay +spec: + replicas: 1 + strategy: + blueGreen: + activeService: bluegreen-scaledowndelay-active + abortScaleDownDelaySeconds: 2 + selector: + matchLabels: + app: bluegreen-scaledowndelay + template: + metadata: + labels: + app: bluegreen-scaledowndelay + spec: + containers: + - name: bluegreen-scaledowndelay + image: nginx:1.19-alpine + resources: + requests: + memory: 16Mi + cpu: 1m +`). + When(). + ApplyManifests(). + WaitForRolloutStatus("Healthy"). + PatchSpec(` +spec: + abortExceedProgressDeadline: false + progressDeadlineSeconds: 1 + template: + spec: + containers: + - name: bad2good + image: nginx:1.19-alpine-argo-error + command: null`). + WaitForRolloutStatus("Degraded"). + Sleep(3*time.Second). + Then(). + ExpectRevisionPodCount("2", 1). + When(). + PatchSpec(` +spec: + abortExceedProgressDeadline: true + progressDeadlineSeconds: 1 + template: + spec: + containers: + - name: bad2good + image: nginx:1.19-alpine-argo-error + command: null`). + WaitForRolloutStatus("Degraded"). + Sleep(3*time.Second). + Then(). + ExpectRevisionPodCount("2", 0) +} + // TestBlueGreenScaleDownOnAbort verifies the scaleDownOnAbort feature func (s *FunctionalSuite) TestBlueGreenScaleDownOnAbort() { s.Given(). diff --git a/utils/defaults/defaults.go b/utils/defaults/defaults.go index cd6e864a11..ca84594b82 100644 --- a/utils/defaults/defaults.go +++ b/utils/defaults/defaults.go @@ -26,6 +26,8 @@ const ( DefaultMaxUnavailable = "25" // DefaultProgressDeadlineSeconds default number of seconds for the rollout to be making progress DefaultProgressDeadlineSeconds = int32(600) + // DefaultAbortExceedProgressDeadline + DefaultAbortExceedProgressDeadline = false // DefaultScaleDownDelaySeconds default seconds before scaling down old replicaset after switching services DefaultScaleDownDelaySeconds = int32(30) // DefaultAbortScaleDownDelaySeconds default seconds before scaling down old replicaset after switching services @@ -116,6 +118,13 @@ func GetExperimentProgressDeadlineSecondsOrDefault(e *v1alpha1.Experiment) int32 return DefaultProgressDeadlineSeconds } +func GetAbortExceedProgressDeadlineOrDefault(rollout *v1alpha1.Rollout) bool { + if rollout.Spec.AbortExceedProgressDeadline != nil { + return *rollout.Spec.AbortExceedProgressDeadline + } + return DefaultAbortExceedProgressDeadline +} + func GetExperimentScaleDownDelaySecondsOrDefault(e *v1alpha1.Experiment) int32 { if e.Spec.ScaleDownDelaySeconds != nil { return *e.Spec.ScaleDownDelaySeconds diff --git a/utils/defaults/defaults_test.go b/utils/defaults/defaults_test.go index a3e5322c3e..cbddf4a752 100644 --- a/utils/defaults/defaults_test.go +++ b/utils/defaults/defaults_test.go @@ -153,6 +153,19 @@ func TestGetProgressDeadlineSecondsOrDefault(t *testing.T) { assert.Equal(t, DefaultProgressDeadlineSeconds, GetProgressDeadlineSecondsOrDefault(rolloutDefaultValue)) } +func TestGetAbortExceedProgressDeadlineOrDefault(t *testing.T) { + abort := true + rolloutNonDefaultValue := &v1alpha1.Rollout{ + Spec: v1alpha1.RolloutSpec{ + AbortExceedProgressDeadline: &abort, + }, + } + + assert.Equal(t, abort, GetAbortExceedProgressDeadlineOrDefault(rolloutNonDefaultValue)) + rolloutDefaultValue := &v1alpha1.Rollout{} + assert.Equal(t, DefaultAbortExceedProgressDeadline, GetAbortExceedProgressDeadlineOrDefault(rolloutDefaultValue)) +} + func TestGetScaleDownDelaySecondsOrDefault(t *testing.T) { { scaleDownDelaySeconds := int32(60)