Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add examples for three existing failure policy actions. #601

60 changes: 60 additions & 0 deletions examples/failure-policy/failjobset-action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
apiVersion: jobset.x-k8s.io/v1alpha2
ahg-g marked this conversation as resolved.
Show resolved Hide resolved
kind: JobSet
metadata:
name: failjobset-action-example
spec:
failurePolicy:
danielvegamyhre marked this conversation as resolved.
Show resolved Hide resolved
maxRestarts: 3
rules:
- action: FailJobSet
targetReplicatedJobs:
- leader
replicatedJobs:
- name: leader
replicas: 1
template:
spec:
# Set backoff limit to 0 so job will immediately fail if any pod fails.
backoffLimit: 0
completions: 2
parallelism: 2
template:
spec:
containers:
- name: leader
image: bash:latest
command:
- bash
- -xc
- |
echo "JOB_COMPLETION_INDEX=$JOB_COMPLETION_INDEX"
if [[ "$JOB_COMPLETION_INDEX" == "0" ]]; then
for i in $(seq 10 -1 1)
do
echo "Sleeping in $i"
sleep 1
done
exit 1
fi
for i in $(seq 1 1000)
do
echo "$i"
sleep 1
done
- name: workers
replicas: 1
template:
spec:
backoffLimit: 0
completions: 2
parallelism: 2
template:
spec:
containers:
- name: worker
image: bash:latest
command:
- bash
- -xc
- |
sleep 1000
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
apiVersion: jobset.x-k8s.io/v1alpha2
kind: JobSet
metadata:
name: onjobfailurereasons-podfailurepolicy-example
spec:
failurePolicy:
maxRestarts: 3
rules:
- action: RestartJobSetAndIgnoreMaxRestarts
targetReplicatedJobs:
- leader
onJobFailureReasons:
- PodFailurePolicy
replicatedJobs:
- name: leader
replicas: 1
template:
spec:
# Set backoff limit to 0 so job will immediately fail if any pod fails.
backoffLimit: 0
completions: 2
parallelism: 2
template:
spec:
restartPolicy: Never
containers:
- name: leader
image: bash:latest
command:
- bash
- -xc
- |
echo "JOB_COMPLETION_INDEX=$JOB_COMPLETION_INDEX"
if [[ "$JOB_COMPLETION_INDEX" == "0" ]]; then
for i in $(seq 10 -1 1)
do
echo "Sleeping in $i"
sleep 1
done
exit 1
fi
for i in $(seq 1 1000)
do
echo "$i"
sleep 1
done
podFailurePolicy:
rules:
- action: FailJob
onPodConditions: []
onExitCodes:
containerName: leader
operator: In
values: [1]
- name: workers
replicas: 1
template:
spec:
backoffLimit: 0
completions: 2
parallelism: 2
template:
spec:
containers:
- name: worker
image: bash:latest
command:
- bash
- -xc
- |
sleep 1000
62 changes: 62 additions & 0 deletions examples/failure-policy/onjobfailurereasons-present.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
apiVersion: jobset.x-k8s.io/v1alpha2
kind: JobSet
metadata:
name: onjobfailurereasons-present-example
spec:
failurePolicy:
maxRestarts: 3
rules:
- action: RestartJobSetAndIgnoreMaxRestarts
targetReplicatedJobs:
- leader
onJobFailureReasons:
- BackoffLimitExceeded
replicatedJobs:
- name: leader
replicas: 1
template:
spec:
# Set backoff limit to 0 so job will immediately fail if any pod fails.
backoffLimit: 0
completions: 2
parallelism: 2
template:
spec:
containers:
- name: leader
image: bash:latest
command:
- bash
- -xc
- |
echo "JOB_COMPLETION_INDEX=$JOB_COMPLETION_INDEX"
if [[ "$JOB_COMPLETION_INDEX" == "0" ]]; then
for i in $(seq 10 -1 1)
do
echo "Sleeping in $i"
sleep 1
done
exit 1
fi
for i in $(seq 1 1000)
do
echo "$i"
sleep 1
done
- name: workers
replicas: 1
template:
spec:
backoffLimit: 0
completions: 2
parallelism: 2
template:
spec:
containers:
- name: worker
image: bash:latest
command:
- bash
- -xc
- |
sleep 1000
60 changes: 60 additions & 0 deletions examples/failure-policy/restartjobset-action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
apiVersion: jobset.x-k8s.io/v1alpha2
kind: JobSet
metadata:
name: restartjobset-action-example
spec:
failurePolicy:
maxRestarts: 3
rules:
- action: RestartJobSet
targetReplicatedJobs:
- leader
replicatedJobs:
- name: leader
replicas: 1
template:
spec:
# Set backoff limit to 0 so job will immediately fail if any pod fails.
backoffLimit: 0
completions: 2
parallelism: 2
template:
spec:
containers:
- name: leader
image: bash:latest
command:
- bash
- -xc
- |
echo "JOB_COMPLETION_INDEX=$JOB_COMPLETION_INDEX"
if [[ "$JOB_COMPLETION_INDEX" == "0" ]]; then
for i in $(seq 10 -1 1)
do
echo "Sleeping in $i"
sleep 1
done
exit 1
fi
for i in $(seq 1 1000)
do
echo "$i"
sleep 1
done
- name: workers
replicas: 1
template:
spec:
backoffLimit: 0
completions: 2
parallelism: 2
template:
spec:
containers:
- name: worker
image: bash:latest
command:
- bash
- -xc
- |
sleep 1000
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
apiVersion: jobset.x-k8s.io/v1alpha2
kind: JobSet
metadata:
# rjimr stands for "restartjobsetandignoremaxrestarts"
name: rjimr-action-example
spec:
failurePolicy:
maxRestarts: 3
rules:
danielvegamyhre marked this conversation as resolved.
Show resolved Hide resolved
- action: RestartJobSetAndIgnoreMaxRestarts
targetReplicatedJobs:
- leader
replicatedJobs:
- name: leader
replicas: 1
template:
spec:
# Set backoff limit to 0 so job will immediately fail if any pod fails.
backoffLimit: 0
completions: 2
parallelism: 2
template:
spec:
containers:
- name: leader
image: bash:latest
command:
- bash
- -xc
- |
echo "JOB_COMPLETION_INDEX=$JOB_COMPLETION_INDEX"
if [[ "$JOB_COMPLETION_INDEX" == "0" ]]; then
for i in $(seq 10 -1 1)
do
echo "Sleeping in $i"
sleep 1
done
exit 1
fi
for i in $(seq 1 1000)
do
echo "$i"
sleep 1
done
- name: workers
replicas: 1
template:
spec:
backoffLimit: 0
completions: 2
parallelism: 2
template:
spec:
containers:
- name: worker
image: bash:latest
command:
- bash
- -xc
- |
sleep 1000