Skip to content

Commit

Permalink
Add get/retry experiment commands. Support experiment retries (argo…
Browse files Browse the repository at this point in the history
  • Loading branch information
jessesuen authored Nov 6, 2019
1 parent 08112e1 commit 55c7875
Show file tree
Hide file tree
Showing 26 changed files with 1,080 additions and 508 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.5.0
0.6.0
52 changes: 52 additions & 0 deletions examples/analysis-templates.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# This file contains AnalysisTemplates referenced by Rollouts and Experiments in the examples.
# Please apply this file first, before running any of the examples.

---
# This AnalysisTemplate will run a Kubernetes Job every 5 seconds that succeeds.
kind: AnalysisTemplate
apiVersion: argoproj.io/v1alpha1
metadata:
name: pass
spec:
metrics:
- name: pass
interval: 5
maxFailures: 1
provider:
job:
spec:
template:
spec:
containers:
- name: sleep
image: alpine:3.8
command: [sh, -c]
args: [exit 0]
restartPolicy: Never
backoffLimit: 0

---
# This AnalysisTemplate will run a Kubernetes Job every 5 seconds, with a 50% chance of failure.
# When the number of accumulated failures exceeds maxFailures, it will cause the analysis run to
# fail, and subsequently cause the rollout or experiment to abort.
kind: AnalysisTemplate
apiVersion: argoproj.io/v1alpha1
metadata:
name: random-fail
spec:
metrics:
- name: random-fail
interval: 5
maxFailures: 1
provider:
job:
spec:
template:
spec:
containers:
- name: sleep
image: alpine:3.8
command: [sh, -c]
args: [FLIP=$(($(($RANDOM%10))%2)) && exit $FLIP]
restartPolicy: Never
backoffLimit: 0
43 changes: 43 additions & 0 deletions examples/experiment-with-analysis.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# This example demonstrates an experiment which starts two ReplicaSets with different images, and
# additionally starts an AnalysisRun in the background
#
# Prerequisites:
# * kubectl apply -f analysis-templates.yaml
#
apiVersion: argoproj.io/v1alpha1
kind: Experiment
metadata:
name: experiment-with-analysis
spec:
templates:
- name: purple
selector:
matchLabels:
app: rollouts-demo
template:
metadata:
labels:
app: rollouts-demo
spec:
containers:
- name: rollouts-demo
image: argoproj/rollouts-demo:purple
imagePullPolicy: Always
- name: orange
selector:
matchLabels:
app: rollouts-demo
template:
metadata:
labels:
app: rollouts-demo
spec:
containers:
- name: rollouts-demo
image: argoproj/rollouts-demo:orange
imagePullPolicy: Always
analyses:
- name: random-fail
templateName: random-fail
- name: pass
templateName: pass
30 changes: 4 additions & 26 deletions examples/rollout-analysis-step.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# This example demonstrates a Rollout which starts and finishes analysis at a specific canary step
#
# Prerequisites:
# kubectl apply -f analysis-templates.yaml
#
apiVersion: argoproj.io/v1alpha1
kind: Rollout
metadata:
Expand Down Expand Up @@ -31,29 +35,3 @@ spec:
- analysis:
name: random-fail
templateName: random-fail

---
# This AnalysisTemplate will run a Kubernetes Job every 5 seconds, with a 50% chance of failure.
# When the number of accumulated failures exceeds maxFailures, it will cause the analysis run to
# fail, and subsequently cause the rollout to abort.
kind: AnalysisTemplate
apiVersion: argoproj.io/v1alpha1
metadata:
name: random-fail
spec:
metrics:
- name: random-fail
interval: 5
maxFailures: 1
provider:
job:
spec:
template:
spec:
containers:
- name: sleep
image: alpine:3.8
command: [sh, -c]
args: [FLIP=$(($(($RANDOM%10))%2)) && exit $FLIP]
restartPolicy: Never
backoffLimit: 0
30 changes: 4 additions & 26 deletions examples/rollout-background-analysis.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# This example demonstrates a Rollout which performs background analysis while the Rollout is updating.
#
# Prerequisites:
# * kubectl apply -f analysis-templates.yaml
#
apiVersion: argoproj.io/v1alpha1
kind: Rollout
metadata:
Expand Down Expand Up @@ -31,29 +35,3 @@ spec:
steps:
- setWeight: 25
- pause: {}

---
# This AnalysisTemplate will run a Kubernetes Job every 5 seconds, with a 50% chance of failure.
# When the number of accumulated failures exceeds maxFailures, it will cause the analysis run to
# fail, and subsequently cause the rollout to abort.
kind: AnalysisTemplate
apiVersion: argoproj.io/v1alpha1
metadata:
name: random-fail
spec:
metrics:
- name: random-fail
interval: 5
maxFailures: 1
provider:
job:
spec:
template:
spec:
containers:
- name: sleep
image: alpine:3.8
command: [sh, -c]
args: [FLIP=$(($(($RANDOM%10))%2)) && exit $FLIP]
restartPolicy: Never
backoffLimit: 0
32 changes: 5 additions & 27 deletions examples/rollout-experiment-step.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
# This example demonstrates a Rollout which begins an an experiment at a specified step.
# The rollout willl not proceed to the next step until the experiment is completed and successful.
# The rollout will not proceed to the next step until the experiment is completed and successful.
# In this example, the experiment itself starts its own AnalysisRun which is tied to the experiment.
# This is useful for when analysis should be done only during the experimentation phase, but not
# during the regular update of the rollout.
#
# Prerequisites:
# * kubectl apply -f analysis-templates.yaml
#
apiVersion: argoproj.io/v1alpha1
kind: Rollout
metadata:
Expand Down Expand Up @@ -38,29 +42,3 @@ spec:
analyses:
- name: random-fail
templateName: random-fail

---
# This AnalysisTemplate will run a Kubernetes Job every 5 seconds, with a 50% chance of failure.
# When the number of accumulated failures exceeds maxFailures, it will cause the analysis run to
# fail, and subsequently cause the rollout to abort.
kind: AnalysisTemplate
apiVersion: argoproj.io/v1alpha1
metadata:
name: random-fail
spec:
metrics:
- name: random-fail
interval: 5
maxFailures: 1
provider:
job:
spec:
template:
spec:
containers:
- name: sleep
image: alpine:3.8
command: [sh, -c]
args: [FLIP=$(($(($RANDOM%10))%2)) && exit $FLIP]
restartPolicy: Never
backoffLimit: 0
23 changes: 4 additions & 19 deletions experiments/experiment.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,31 +337,16 @@ func (ec *experimentContext) reconcileAnalysisRun(analysis v1alpha1.ExperimentAn
newStatus.Message = run.Status.Message
}

// createAnalysisRun creates the analysis run. If an existing runs exists with same name, and is
// semantically equal, returns the existing one, otherwise errors
// createAnalysisRun creates the analysis run. If an existing runs exists with same name, is
// semantically equal, and is not complete, returns the existing one, otherwise creates a new
// run with a collision counter increase.
func (ec *experimentContext) createAnalysisRun(analysis v1alpha1.ExperimentAnalysisTemplateRef) (*v1alpha1.AnalysisRun, error) {
analysisRunIf := ec.argoProjClientset.ArgoprojV1alpha1().AnalysisRuns(ec.ex.Namespace)
run, err := ec.newAnalysisRun(analysis, analysis.Arguments)
if err != nil {
return nil, err
}
newRun, createErr := analysisRunIf.Create(run)
if createErr != nil {
if !k8serrors.IsAlreadyExists(createErr) {
return nil, createErr
}
existingRun, err := analysisRunIf.Get(run.Name, metav1.GetOptions{})
if err != nil {
return nil, err
}
controllerRef := metav1.GetControllerOf(existingRun)
if ec.ex.UID == controllerRef.UID && analysisutil.IsSemanticallyEqual(run.Spec, existingRun.Spec) {
ec.log.Infof("Claimed existing analysisrun %s", existingRun.Name)
return existingRun, nil
}
return nil, createErr
}
return newRun, nil
return analysisutil.CreateWithCollisionCounter(ec.log, analysisRunIf, *run)
}

func (ec *experimentContext) calculateStatus() *v1alpha1.ExperimentStatus {
Expand Down
Loading

0 comments on commit 55c7875

Please sign in to comment.