Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revamp how Tekton pipelines to run notebooks work. #703

Merged
merged 6 commits into from
Jun 29, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,29 @@ hydrate:
kustomize build -o $(REPO_DIRS)/kf-ci-v1/namespaces/auto-deploy $(TEKTON_INSTALLS)/auto-deploy
kustomize build -o $(REPO_DIRS)/kf-ci-v1/namespaces/kf-ci $(TEKTON_INSTALLS)/kf-ci

build-worker-image:
cd images && skaffold build -p testing --kube-context=kubeflow-testing -v info --file-output=latest_image.json

set-worker-image:
kpt cfg set ./tekton test-image $(shell yq r ./images/latest_image.json builds[0].tag)

update-worker-image: build-worker-image set-worker-image

# This is a debug rule providing some sugar to hydrate and push the manifests and then wait for the
# sync
debug-push-and-run:
make hydrate && git add . && git commit -m "Latest" && git push jlewi
cd ./go/cmd/nomos-wait && go run .
kubectl --context=kf-ci-v1 create -f ./tekton/runs/nb-test-run.yaml

# This is a debug rule providing some sugar for fast iteration during development
# It might need to be customized for your usage.
# make-update-worker-image builds and sets a new worker image.
# make hydrate ... rehydrates and pushes the Tekton resources
# nomos-wait waits for the latest nomos changes to be sync'd
# and then we submit a run of the pipeline.
debug-rebuild-and-run:
make update-worker-image
make hydrate && git add . && git commit -m "Latest" && git push jlewi
cd ./go/cmd/nomos-wait && go run .
kubectl --context=kf-ci-v1 create -f ./tekton/runs/nb-test-run.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ spec:
description: Test targe name, used to group test results in JUNIT.
name: test-target-name
type: string
- description: This should be the bucket that the rendered notebook will be written
to. This should be a GCS path that is accessible from the KF cluster where the
notebook runs. It will be copied to artifacts-gcs
name: notebook-output
type: string
- description: Path to the notebook to run. This should be the relative path relative
to the root of the repository where the notebook lives. Do not include a leading
"/"
name: notebook-path
type: string
- description: GCS bucket and directory artifacts will be uploaded to. Should be
in the form of 'gs://'
name: artifacts-gcs
Expand All @@ -24,36 +34,41 @@ spec:
- description: Location to search for test clusters.
name: testing-cluster-location
type: string
- description: Directory to write outputs to in local FS.
name: output-workspace
type: string
resources:
- name: examples-repo
- name: notebook-repo
type: git
- name: testing-repo
type: git
- name: image
type: image
tasks:
- name: mnist-gcp
- name: build-image
params:
- name: artifacts-gcs
value: $(params.artifacts-gcs)
resources:
inputs:
- name: notebook-repo
resource: notebook-repo
- name: image
resource: image
taskRef:
name: notebook-test-builder
- name: run-notebook
params:
- name: notebook-path
value: kubeflow/examples/mnist/mnist_gcp.ipynb
- name: junit-path
value: $(params.output-workspace)/$(params.junit-path)/junit_mnist-gcp.xml
value: $(params.notebook-path)
- name: test-target-name
value: $(params.test-target-name)
- name: output-workspace
value: $(params.output-workspace)
- name: notebook-output
value: $(params.notebook-output)
- name: artifacts-gcs
value: $(params.artifacts-gcs)
- name: testing-cluster-pattern
value: $(params.testing-cluster-pattern)
- name: testing-cluster-location
value: $(params.testing-cluster-location)
resources:
inputs:
- name: examples-repo
resource: examples-repo
- name: kf-testing-repo
resource: testing-repo
runAfter:
- build-image
taskRef:
name: nb-tests
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ spec:
value: /workspace/kubeconfig
- name: PYTHONPATH
value: /workspace/$(inputs.resources.testing-repo.name)/py
image: gcr.io/kubeflow-ci/test-worker-py3:6f0d932-dirty@sha256:06ebe5412d638e3e51bdd792aecbafdc4ee1e7146ff367a7be346cd726738cbb
image: gcr.io/kubeflow-ci/test-worker-py3:3780b5d-dirty@sha256:4a766d6f5cc6cbcb00dbc96205f7a5b2816bc5f2b6d516fd67124d4a3e6508ea
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does it work with building these images? Are the git sha hard coded or are these auto updated?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The images are currently built using skaffold. We use a CLI option with skaffold to emit the URL of the image to a json file. We then use kpt to change the images to point at the new image. There's a make rule to provide syntactic sugar to string these commands together.

Ideally we would automate this so that on postsubmit new images would be automatically built and a PR opened to update all the images.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool, thanks for the explanation!

name: create-context
- command:
- python
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ spec:
env:
- name: KUBECONFIG
value: /workspace/kubeconfig
image: gcr.io/kubeflow-ci/test-worker-py3:6f0d932-dirty@sha256:06ebe5412d638e3e51bdd792aecbafdc4ee1e7146ff367a7be346cd726738cbb
image: gcr.io/kubeflow-ci/test-worker-py3:3780b5d-dirty@sha256:4a766d6f5cc6cbcb00dbc96205f7a5b2816bc5f2b6d516fd67124d4a3e6508ea
steps:
- command:
- /workspace/$(inputs.resources.blueprint-repo.name)/kubeflow/hack/create_context.sh
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ spec:
be in the form of 'gs://'
name: artifacts-gcs
type: string
- default: gcr.io/kubeflow-ci/test-worker-py3:6f0d932-dirty@sha256:06ebe5412d638e3e51bdd792aecbafdc4ee1e7146ff367a7be346cd726738cbb
- default: gcr.io/kubeflow-ci/test-worker-py3:3780b5d-dirty@sha256:4a766d6f5cc6cbcb00dbc96205f7a5b2816bc5f2b6d516fd67124d4a3e6508ea
description: The docker image to run the tests in
name: test-image
type: string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ spec:
be in the form of 'gs://'
name: artifacts-gcs
type: string
- default: gcr.io/kubeflow-ci/test-worker-py3:6f0d932-dirty@sha256:06ebe5412d638e3e51bdd792aecbafdc4ee1e7146ff367a7be346cd726738cbb
- default: gcr.io/kubeflow-ci/test-worker-py3:3780b5d-dirty@sha256:4a766d6f5cc6cbcb00dbc96205f7a5b2816bc5f2b6d516fd67124d4a3e6508ea
description: The docker image to run the tests in
name: test-image
type: string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ spec:
be in the form of 'gs://'
name: artifacts-gcs
type: string
- default: gcr.io/kubeflow-ci/test-worker-py3:6f0d932-dirty@sha256:06ebe5412d638e3e51bdd792aecbafdc4ee1e7146ff367a7be346cd726738cbb
- default: gcr.io/kubeflow-ci/test-worker-py3:3780b5d-dirty@sha256:4a766d6f5cc6cbcb00dbc96205f7a5b2816bc5f2b6d516fd67124d4a3e6508ea
description: The docker image to run the tests in. Should contain a version
of kubeflow/testing/py in /srcCache that we want to use.
name: test-image
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ metadata:
spec:
inputs:
params:
- description: Testing notebook location. Should be in the form of {REPO_OWNER}/{REPO}/path/to/notebook.ipynb
- description: Path to the notebook to run. This should be the relative path relative
to the root of the repository where the notebook lives.
name: notebook-path
type: string
- description: Cluster pattern to run the notebook test. Default to be from master
Expand All @@ -18,32 +19,27 @@ spec:
- description: Location to search for test clusters e.g. us-central1 or us-central1-f
name: testing-cluster-location
type: string
- description: This should be the bucket that the rendered notebook will be written
to. This should be a GCS path that is accessible from the KF cluster where
the notebook runs. It will be copied to artifacts-gcs
name: notebook-output
type: string
- description: GCS bucket and directory artifacts will be uploaded to. Should
be in the form of 'gs://'
name: artifacts-gcs
type: string
- description: Relative path to the GCS artifacts will be uploaded to. Base path
is artifacts-gcs so the actual GCS blob will be artifacts-gcs/junit-path
name: junit-path
type: string
- default: manual-testing
description: Test targe name, used to group test results in JUNIT.
name: test-target-name
type: string
- description: Directory to write outputs to in local FS.
name: output-workspace
type: string
- default: default-profile
description: The namespace to run the notebook in
name: nb-namespace
type: string
resources:
- name: examples-repo
targetPath: src/kubeflow/examples
type: git
- name: kf-testing-repo
targetPath: src/kubeflow/testing
type: git
- default: gcr.io/kubeflow-ci/test-worker-py3:3780b5d-dirty@sha256:4a766d6f5cc6cbcb00dbc96205f7a5b2816bc5f2b6d516fd67124d4a3e6508ea
description: The docker image to run the tests in
name: test-image
type: string
steps:
- args:
- -m
Expand All @@ -52,62 +48,49 @@ spec:
- --location=$(inputs.params.testing-cluster-location)
- get-credentials
command:
- python3
- python
env:
- name: PYTHONPATH
value: /workspace/src/kubeflow/examples/py:/workspace/src/kubeflow/testing/py
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /secret/gcp-credentials/key.json
image: gcr.io/kubeflow-ci/test-worker:latest
value: /srcCache/kubeflow/testing/py
image: $(inputs.params.test-image)
name: get-credential
volumeMounts:
- mountPath: /secret/gcp-credentials
name: gcp-credentials
readOnly: true
- env:
- name: PYTHONPATH
value: /workspace/src/kubeflow/examples/py:/workspace/src/kubeflow/testing/py
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /secret/gcp-credentials/key.json
image: gcr.io/kubeflow-ci/test-worker:latest
value: /srcCache/kubeflow/testing/py
image: $(inputs.params.test-image)
name: run-notebook
script: |
#!/usr/bin/env bash
set -x
mkdir -p /workspace/artifacts
pytest run_notebook_test.py \
--log-cli-level=info \
--log-cli-format='%(levelname)s|%(asctime)s|%(pathname)s|%(lineno)d| %(message)s' \
--timeout=1800 \
--junitxml=$(inputs.params.junit-path) \
--notebook_path=$(inputs.params.notebook-path) \
--junitxml=/workspace/artifacts/junit_notebook.xml \
--notebook_path=/src/notebook-repo/$(inputs.params.notebook-path) \
--test-target-name=$(inputs.params.test-target-name) \
--artifacts-gcs=$(inputs.params.artifacts-gcs) \
--artifacts-gcs=$(inputs.params.notebook-output) \
--image_file=$(inputs.params.artifacts-gcs)/image.yaml
--namespace=$(inputs.params.nb-namespace)
|| echo test finished.
volumeMounts:
- mountPath: /secret/gcp-credentials
name: gcp-credentials
readOnly: true
workingDir: /workspace/src/kubeflow/examples/py/kubeflow/examples/notebook_tests
echo test finished
workingDir: /srcCache/kubeflow/testing/py/kubeflow/testing/notebook_tests
- image: $(inputs.params.test-image)
name: copy-buckets
script: |
#!/usr/bin/env bash
set -x
gsutil cp -r $(inputs.params.output)/ $(inputs.params.artifacts-gcs)
- args:
- -m
- kubeflow.testing.tekton_client
- junit_parse_and_upload
- --artifacts_dir=$(inputs.params.output-workspace)
- --artifacts_dir=/workspace/artifacts
- --output_gcs=$(inputs.params.artifacts-gcs)
command:
- python
env:
- name: PYTHONPATH
value: /workspace/src/kubeflow/examples/py:/workspace/src/kubeflow/testing/py
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /secret/gcp-credentials/key.json
image: gcr.io/kubeflow-ci/test-worker:latest
value: /src/kubeflow/testing/py
image: $(inputs.params.test-image)
name: copy-artifacts
volumeMounts:
- mountPath: /secret/gcp-credentials
name: gcp-credentials
readOnly: true
volumes:
- name: gcp-credentials
secret:
secretName: gcp-credentials
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
apiVersion: tekton.dev/v1alpha1
kind: Task
metadata:
annotations:
sidecar.istio.io/inject: "false"
name: notebook-test-builder
namespace: auto-deploy
spec:
inputs:
params:
- default: gcr.io/kubeflow-ci/test-worker-py3:3780b5d-dirty@sha256:4a766d6f5cc6cbcb00dbc96205f7a5b2816bc5f2b6d516fd67124d4a3e6508ea
description: The docker image to run the tests in
name: test-image
type: string
- description: GCS bucket and directory artifacts will be uploaded to. Should
be in the form of 'gs://'
name: artifacts-gcs
type: string
resources:
- name: notebook-repo
type: git
- name: image
type: image
steps:
- image: $(inputs.params.test-image)
name: setup
script: |
#!/usr/bin/env bash
set -x
mkdir -p /workspace/build
cd /workspace/build
# Copy the source code
cp -r /workspace/$(inputs.resources.notebook-repo.name) .
cp -r /srcCache/kubeflow/testing/notebook_testing/Dockerfile.notebook_runner ./Dockerfile.notebook_runner
mkdir -p kubeflow/
# Copy over the kubeflow/testing directory because we need it to run the
# notebooks; note that the copy is coming from the worker test image.
cp -r /srcCache/kubeflow/testing ./kubeflow/testing
# Create the artifacts directory
mkdir -p /workspace/artifacts
- command:
- /kaniko/executor
- --dockerfile=/workspace/build/Dockerfile.notebook_runner
- --destination=$(inputs.resources.image.url)
- --context=/workspace/build
- --digest-file=/workspace/artifacts/image-digest
image: gcr.io/kaniko-project/executor:v0.23.0
name: build-push
resources:
requests:
cpu: 7
memory: 16Gi
- args:
- -m
- kubeflow.testing.tekton_client
- create-image-file
- --image-name=$(inputs.resources.image.url)
- --digest-file=/workspace/artifacts/image-digest
- --output=$(inputs.params.artifacts-gcs)/image.yaml
command:
- python
env:
- name: PYTHONPATH
value: /srcCache/kubeflow/testing/py
image: $(inputs.params.test-image)
name: create-image-file
Loading