-
Notifications
You must be signed in to change notification settings - Fork 1.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Switching test to kubeflow deployment #351
Changes from 9 commits
1c7f2c9
213795b
47d5457
0ea4e69
e2f5ea1
bf05e06
b83c4c1
21f88d0
8c3f83f
b78dc1b
1323ab4
ab93fe8
39d1712
f694670
1974bd3
61a8ef4
33cad5b
97de358
08a54ef
6afcc66
fa492e7
7bd127a
8b37fa2
1d55b6f
0776bf6
fae10e9
f198ccb
ed04d33
bafc268
2ef2610
41979ba
f449107
1b86ad0
7c19bc9
84bbc39
1beaae7
bb6420d
2fde6f4
4a98037
ab647bf
d2e073d
ed6b9e8
da34b8d
75c89c9
63c9832
5c73e01
25a49d9
a5a14dc
2701495
94edfa8
2fdb5c7
776034f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
#!/bin/bash | ||
# | ||
# Copyright 2018 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
echo "check status of argo workflow $ARGO_WORKFLOW...." | ||
# probing the argo workflow status until it completed. Timeout after 30 minutes | ||
for i in $(seq 1 ${PULL_ARGO_WORKFLOW_STATUS_MAX_ATTEMPT}) | ||
do | ||
WORKFLOW_STATUS=`kubectl get workflow $ARGO_WORKFLOW --show-labels` | ||
echo $WORKFLOW_STATUS | grep ${WORKFLOW_COMPLETE_KEYWORD} && s=0 && break || s=$? && printf "Workflow ${ARGO_WORKFLOW} is not finished.\n${WORKFLOW_STATUS}\nSleep for 20 seconds...\n" && sleep 20 | ||
done | ||
|
||
# Check whether the argo workflow finished or not and exit if not. | ||
if [[ $s != 0 ]]; then | ||
echo "Prow job Failed: Argo workflow timeout.." | ||
argo logs -w ${ARGO_WORKFLOW} | ||
exit $s | ||
fi | ||
|
||
echo "Argo workflow finished." | ||
|
||
if [[ ! -z "$TEST_RESULT_FOLDER" ]] | ||
then | ||
echo "Copy test result" | ||
mkdir -p $ARTIFACT_DIR | ||
gsutil cp -r "${TEST_RESULTS_GCS_DIR}"/* "${ARTIFACT_DIR}" || true | ||
fi | ||
|
||
ARGO_WORKFLOW_DETAILS=`argo get ${ARGO_WORKFLOW}` | ||
ARGO_WORKFLOW_LOGS=`argo logs -w ${ARGO_WORKFLOW}` | ||
|
||
if [[ $WORKFLOW_STATUS = *"${WORKFLOW_FAILED_KEYWORD}"* ]]; then | ||
printf "The argo workflow failed.\n =========Argo Workflow=========\n${ARGO_WORKFLOW_DETAILS}\n==================\n" | ||
printf "=========Argo Workflow Logs=========\n${ARGO_WORKFLOW_LOGS}\n==================\n" | ||
exit 1 | ||
else | ||
printf ${ARGO_WORKFLOW_DETAILS} | ||
exit 0 | ||
fi |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
#!/bin/bash | ||
# | ||
# Copyright 2018 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
set -xe | ||
|
||
usage() | ||
{ | ||
echo "usage: deploy.sh | ||
[--workflow_file the file name of the argo workflow to run] | ||
[--test_result_bucket the gcs bucket that argo workflow store the result to. Default is ml-pipeline-test | ||
[--test_result_folder the gcs folder that argo workflow store the result to. Always a relative directory to gs://<gs_bucket>/[PULL_SHA]] | ||
[--timeout timeout of the tests in seconds. Default is 1800 seconds. ] | ||
[-h help]" | ||
} | ||
|
||
PROJECT=ml-pipeline-test | ||
TEST_RESULT_BUCKET=ml-pipeline-test | ||
GCR_IMAGE_BASE_DIR=gcr.io/ml-pipeline-test/${PULL_PULL_SHA} | ||
TIMEOUT_SECONDS=1800 | ||
|
||
while [ "$1" != "" ]; do | ||
case $1 in | ||
--workflow_file ) shift | ||
WORKFLOW_FILE=$1 | ||
;; | ||
--test_result_bucket ) shift | ||
TEST_RESULT_BUCKET=$1 | ||
;; | ||
--test_result_folder ) shift | ||
TEST_RESULT_FOLDER=$1 | ||
;; | ||
--timeout ) shift | ||
TIMEOUT_SECONDS=$1 | ||
;; | ||
-h | --help ) usage | ||
exit | ||
;; | ||
* ) usage | ||
exit 1 | ||
esac | ||
shift | ||
done | ||
|
||
TEST_RESULTS_GCS_DIR=gs://${TEST_RESULT_BUCKET}/${PULL_PULL_SHA}/${TEST_RESULT_FOLDER} | ||
ARTIFACT_DIR=$WORKSPACE/_artifacts | ||
WORKFLOW_COMPLETE_KEYWORD="completed=true" | ||
WORKFLOW_FAILED_KEYWORD="phase=Failed" | ||
PULL_ARGO_WORKFLOW_STATUS_MAX_ATTEMPT=$(expr $TIMEOUT_SECONDS / 20 ) | ||
|
||
echo "presubmit test starts" | ||
|
||
# activating the service account | ||
gcloud auth activate-service-account --key-file="${GOOGLE_APPLICATION_CREDENTIALS}" | ||
gcloud config set compute/zone us-central1-a | ||
|
||
# Install ksonnet | ||
KS_VERSION="0.11.0" | ||
curl -LO https://github.com/ksonnet/ksonnet/releases/download/v${KS_VERSION}/ks_${KS_VERSION}_linux_amd64.tar.gz | ||
tar -xzf ks_${KS_VERSION}_linux_amd64.tar.gz | ||
chmod +x ./ks_${KS_VERSION}_linux_amd64/ks | ||
mv ./ks_${KS_VERSION}_linux_amd64/ks /usr/local/bin/ | ||
|
||
# Install kubeflow | ||
KUBEFLOW_MASTER=$(pwd)/kubeflow_master | ||
git clone https://github.com/kubeflow/kubeflow.git ${KUBEFLOW_MASTER} | ||
|
||
## Download latest release source code | ||
KUBEFLOW_SRC=$(pwd)/kubeflow_latest_release | ||
mkdir ${KUBEFLOW_SRC} | ||
cd ${KUBEFLOW_SRC} | ||
export KUBEFLOW_TAG=v0.3.1 | ||
curl https://raw.githubusercontent.com/kubeflow/kubeflow/${KUBEFLOW_TAG}/scripts/download.sh | bash | ||
|
||
## Override the pipeline config with code from master | ||
cp -r ${KUBEFLOW_MASTER}/kubeflow/pipeline ${KUBEFLOW_SRC}/kubeflow/pipeline | ||
cp -r ${KUBEFLOW_MASTER}/kubeflow/argo ${KUBEFLOW_SRC}/kubeflow/argo | ||
|
||
TEST_CLUSTER_PREFIX=${WORKFLOW_FILE%.*} | ||
TEST_CLUSTER=$(echo $TEST_CLUSTER_PREFIX | cut -d _ -f 1)-${PULL_PULL_SHA:0:7}-${RANDOM} | ||
function delete_cluster { | ||
echo "Delete cluster..." | ||
gcloud container clusters delete ${TEST_CLUSTER} --async | ||
} | ||
# trap delete_cluster EXIT | ||
|
||
export CLIENT_ID=${RANDOM} | ||
export CLIENT_SECRET=${RANDOM} | ||
KFAPP=$(pwd)/${TEST_CLUSTER} | ||
|
||
function clean_up { | ||
echo "Clean up..." | ||
cd ${KFAPP} | ||
${KUBEFLOW_SRC}/scripts/kfctl.sh delete all | ||
} | ||
# trap delete_cluster EXIT | ||
|
||
${KUBEFLOW_SRC}/scripts/kfctl.sh init ${KFAPP} --platform gcp --project ${PROJECT} | ||
cd ${KFAPP} | ||
${KUBEFLOW_SRC}/scripts/kfctl.sh generate platform | ||
${KUBEFLOW_SRC}/scripts/kfctl.sh apply platform | ||
${KUBEFLOW_SRC}/scripts/kfctl.sh generate k8s | ||
|
||
## Update pipeline component image | ||
pushd ks_app | ||
ks param set pipeline apiImage ${GCR_IMAGE_BASE_DIR}/api:${PULL_PULL_SHA} | ||
ks param set pipeline persistenceAgentImage ${GCR_IMAGE_BASE_DIR}/persistenceagent:${PULL_PULL_SHA} | ||
ks param set pipeline scheduledWorkflowImage ${GCR_IMAGE_BASE_DIR}/scheduledworkflow:${PULL_PULL_SHA} | ||
ks param set pipeline uiImage ${GCR_IMAGE_BASE_DIR}/frontend:${PULL_PULL_SHA} | ||
popd | ||
|
||
${KUBEFLOW_SRC}/scripts/kfctl.sh apply k8s | ||
|
||
gcloud container clusters get-credentials ${TEST_CLUSTER} | ||
|
||
echo "submitting argo workflow for commit ${PULL_PULL_SHA}..." | ||
ARGO_WORKFLOW=`argo submit $(dirname $0)/${WORKFLOW_FILE} \ | ||
-p commit-sha="${PULL_PULL_SHA}" \ | ||
-p test-results-gcs-dir="${TEST_RESULTS_GCS_DIR}" \ | ||
-p cluster-type="${CLUSTER_TYPE}" \ | ||
-o name | ||
` | ||
echo argo workflow submitted successfully | ||
|
||
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd)" | ||
source "${DIR}/check-argo-status.sh" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ usage() | |
[-h help]" | ||
} | ||
|
||
PROJECT=ml-pipeline-test | ||
TEST_RESULT_BUCKET=ml-pipeline-test | ||
GCR_IMAGE_BASE_DIR=gcr.io/ml-pipeline-test/${PULL_PULL_SHA} | ||
CLUSTER_TYPE=create-gke | ||
|
@@ -58,7 +59,6 @@ while [ "$1" != "" ]; do | |
shift | ||
done | ||
|
||
ZONE=us-west1-a | ||
TEST_RESULTS_GCS_DIR=gs://${TEST_RESULT_BUCKET}/${PULL_PULL_SHA}/${TEST_RESULT_FOLDER} | ||
ARTIFACT_DIR=$WORKSPACE/_artifacts | ||
WORKFLOW_COMPLETE_KEYWORD="completed=true" | ||
|
@@ -69,8 +69,8 @@ echo "presubmit test starts" | |
|
||
# activating the service account | ||
gcloud auth activate-service-account --key-file="${GOOGLE_APPLICATION_CREDENTIALS}" | ||
gcloud config set compute/zone us-central1-a | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we hard-coding the zone? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. currently we only requested quota for this zone. |
||
|
||
#Creating a new GKE cluster if needed | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. reverted |
||
if [ "$CLUSTER_TYPE" == "create-gke" ]; then | ||
echo "create test cluster" | ||
TEST_CLUSTER_PREFIX=${WORKFLOW_FILE%.*} | ||
|
@@ -97,7 +97,6 @@ if [ "$CLUSTER_TYPE" == "create-gke" ]; then | |
fi | ||
|
||
kubectl config set-context $(kubectl config current-context) --namespace=default | ||
|
||
echo "Add necessary cluster role bindings" | ||
ACCOUNT=$(gcloud info --format='value(config.account)') | ||
kubectl create clusterrolebinding PROW_BINDING --clusterrole=cluster-admin --user=$ACCOUNT | ||
|
@@ -109,17 +108,14 @@ mkdir -p ~/bin/ | |
export PATH=~/bin/:$PATH | ||
curl -sSL -o ~/bin/argo https://github.com/argoproj/argo/releases/download/$ARGO_VERSION/argo-linux-amd64 | ||
chmod +x ~/bin/argo | ||
|
||
kubectl create ns argo | ||
kubectl apply -n argo -f https://raw.githubusercontent.com/argoproj/argo/$ARGO_VERSION/manifests/install.yaml | ||
|
||
|
||
echo "submitting argo workflow for commit ${PULL_PULL_SHA}..." | ||
ARGO_WORKFLOW=`argo submit $(dirname $0)/${WORKFLOW_FILE} \ | ||
-p commit-sha="${PULL_PULL_SHA}" \ | ||
-p test-results-gcs-dir="${TEST_RESULTS_GCS_DIR}" \ | ||
-p cluster-type="${CLUSTER_TYPE}" \ | ||
-p bootstrapper-image="${GCR_IMAGE_BASE_DIR}/bootstrapper" \ | ||
-p api-image="${GCR_IMAGE_BASE_DIR}/api" \ | ||
-p frontend-image="${GCR_IMAGE_BASE_DIR}/frontend" \ | ||
-p scheduledworkflow-image="${GCR_IMAGE_BASE_DIR}/scheduledworkflow" \ | ||
|
@@ -128,38 +124,5 @@ ARGO_WORKFLOW=`argo submit $(dirname $0)/${WORKFLOW_FILE} \ | |
` | ||
echo argo workflow submitted successfully | ||
|
||
echo "check status of argo workflow $ARGO_WORKFLOW...." | ||
# probing the argo workflow status until it completed. Timeout after 30 minutes | ||
for i in $(seq 1 ${PULL_ARGO_WORKFLOW_STATUS_MAX_ATTEMPT}) | ||
do | ||
WORKFLOW_STATUS=`kubectl get workflow $ARGO_WORKFLOW --show-labels` | ||
echo $WORKFLOW_STATUS | grep ${WORKFLOW_COMPLETE_KEYWORD} && s=0 && break || s=$? && printf "Workflow ${ARGO_WORKFLOW} is not finished.\n${WORKFLOW_STATUS}\nSleep for 20 seconds...\n" && sleep 20 | ||
done | ||
|
||
# Check whether the argo workflow finished or not and exit if not. | ||
if [[ $s != 0 ]]; then | ||
echo "Prow job Failed: Argo workflow timeout.." | ||
argo logs -w ${ARGO_WORKFLOW} | ||
exit $s | ||
fi | ||
|
||
echo "Argo workflow finished." | ||
|
||
if [[ ! -z "$TEST_RESULT_FOLDER" ]] | ||
then | ||
echo "Copy test result" | ||
mkdir -p $ARTIFACT_DIR | ||
gsutil cp -r "${TEST_RESULTS_GCS_DIR}"/* "${ARTIFACT_DIR}" || true | ||
fi | ||
|
||
ARGO_WORKFLOW_DETAILS=`argo get ${ARGO_WORKFLOW}` | ||
ARGO_WORKFLOW_LOGS=`argo logs -w ${ARGO_WORKFLOW}` | ||
|
||
if [[ $WORKFLOW_STATUS = *"${WORKFLOW_FAILED_KEYWORD}"* ]]; then | ||
printf "The argo workflow failed.\n =========Argo Workflow=========\n${ARGO_WORKFLOW_DETAILS}\n==================\n" | ||
printf "=========Argo Workflow Logs=========\n${ARGO_WORKFLOW_LOGS}\n==================\n" | ||
exit 1 | ||
else | ||
printf ${ARGO_WORKFLOW_DETAILS} | ||
exit 0 | ||
fi | ||
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd)" | ||
source "${DIR}/check-argo-status.sh" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This fails since
$KFAPP
is relative path.