Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix sample test - Add gcp permission #454

Merged
merged 15 commits into from
Dec 5, 2018
1 change: 1 addition & 0 deletions components/dataproc/xgboost/common/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ def copy_resources_to_gcs(file_paths, gcs_path):
dest_files = []
for file_name in file_paths:
dest_file = os.path.join(gcs_path, tmpdir, os.path.basename(file_name))
subprocess.call(['gcloud', 'auth', 'activate-service-account', '--key-file', os.environ['GOOGLE_APPLICATION_CREDENTIALS']])
subprocess.call(['gsutil', 'cp', file_name, dest_file])
dest_files.append(dest_file)

Expand Down
27 changes: 6 additions & 21 deletions components/kubeflow/deployer/src/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,6 @@ while (($#)); do
MODEL_PATH="$1"
shift
;;
"--project")
shift
PROJECT="$1"
shift
;;
"--zone")
shift
ZONE="$1"
shift
;;
"--cluster-name")
shift
CLUSTER_NAME="$1"
Expand Down Expand Up @@ -65,27 +55,22 @@ fi

echo "Deploying the model '${MODEL_PATH}'"

if [ -z "${PROJECT}" ]; then
PROJECT=$(wget -q -O- --header="Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/project/project-id)
fi

if [ -z "${CLUSTER_NAME}" ]; then
CLUSTER_NAME=$(wget -q -O- --header="Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/instance/attributes/cluster-name)
fi

if [ -z "${ZONE}" ]; then
ZONE=$(wget -q -O- --header="Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/instance/zone | cut -d '/' -f 4)
fi

# Ensure the server name is not more than 63 characters.
SERVER_NAME="${SERVER_NAME:0:63}"
# Trim any trailing hyphens from the server name.
while [[ "${SERVER_NAME:(-1)}" == "-" ]]; do SERVER_NAME="${SERVER_NAME::-1}"; done

echo "Deploying ${SERVER_NAME} to the cluster ${CLUSTER_NAME} in the project ${PROJECT} and the zone ${ZONE}..."
echo "Deploying ${SERVER_NAME} to the cluster ${CLUSTER_NAME}"

# Connect kubectl to the cluster
gcloud --project "${PROJECT}" container clusters get-credentials "${CLUSTER_NAME}" --zone "${ZONE}"
# Connect kubectl to the local cluster
kubectl config set-cluster "${CLUSTER_NAME}" --server=https://kubernetes.default --certificate-authority=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
kubectl config set-credentials pipeline --token "$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
kubectl config set-context kubeflow --cluster "${CLUSTER_NAME}" --user pipeline
kubectl config use-context kubeflow

# Configure and deploy the TF serving app
cd /src/github.com/kubeflow/kubeflow
Expand Down
1 change: 1 addition & 0 deletions pipeline/pipeline/pipeline-apiserver.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@
resources: [
"pods",
"pods/exec",
"pods/log",
"services",
],
verbs: [
Expand Down
9 changes: 5 additions & 4 deletions samples/kubeflow-tf/kubeflow-training-classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@


import kfp.dsl as dsl
import kfp.gcp as gcp
import datetime

def dataflow_tf_transform_op(train_data: 'GcsUri', evaluation_data: 'GcsUri', schema: 'GcsUri[text/json]', project: 'GcpProject', preprocess_mode, preprocess_module: 'GcsUri[text/code/python]', transform_output: 'GcsUri[Directory]', step_name='preprocess'):
Expand All @@ -31,7 +32,7 @@ def dataflow_tf_transform_op(train_data: 'GcsUri', evaluation_data: 'GcsUri', sc
'--output', transform_output,
],
file_outputs = {'transformed': '/output.txt'}
)
).apply(gcp.use_gcp_secret('user-gcp-sa'))


def kubeflow_tf_training_op(transformed_data_dir, schema: 'GcsUri[text/json]', learning_rate: float, hidden_layer_size: int, steps: int, target, preprocess_module: 'GcsUri[text/code/python]', training_output: 'GcsUri[Directory]', step_name='training'):
Expand All @@ -49,7 +50,7 @@ def kubeflow_tf_training_op(transformed_data_dir, schema: 'GcsUri[text/json]', l
'--job-dir', training_output,
],
file_outputs = {'train': '/output.txt'}
)
).apply(gcp.use_gcp_secret('user-gcp-sa'))

def dataflow_tf_predict_op(evaluation_data: 'GcsUri', schema: 'GcsUri[text/json]', target: str, model: 'TensorFlow model', predict_mode, project: 'GcpProject', prediction_output: 'GcsUri', step_name='prediction'):
return dsl.ContainerOp(
Expand All @@ -65,7 +66,7 @@ def dataflow_tf_predict_op(evaluation_data: 'GcsUri', schema: 'GcsUri[text/json]
'--output', prediction_output,
],
file_outputs = {'prediction': '/output.txt'}
)
).apply(gcp.use_gcp_secret('user-gcp-sa'))

def confusion_matrix_op(predictions, output, step_name='confusionmatrix'):
return dsl.ContainerOp(
Expand All @@ -75,7 +76,7 @@ def confusion_matrix_op(predictions, output, step_name='confusionmatrix'):
'--predictions', predictions,
'--output', output,
]
)
).apply(gcp.use_gcp_secret('user-gcp-sa'))

@dsl.pipeline(
name='Pipeline TFJob',
Expand Down
11 changes: 6 additions & 5 deletions samples/tfx/taxi-cab-classification-pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@


import kfp.dsl as dsl
import kfp.gcp as gcp
import datetime

def dataflow_tf_data_validation_op(inference_data: 'GcsUri', validation_data: 'GcsUri', column_names: 'GcsUri[text/json]', key_columns, project: 'GcpProject', mode, validation_output: 'GcsUri[Directory]', step_name='validation'):
Expand All @@ -34,7 +35,7 @@ def dataflow_tf_data_validation_op(inference_data: 'GcsUri', validation_data: 'G
'output': '/output.txt',
'schema': '/output_schema.json',
}
)
).apply(gcp.use_gcp_secret('user-gcp-sa'))

def dataflow_tf_transform_op(train_data: 'GcsUri', evaluation_data: 'GcsUri', schema: 'GcsUri[text/json]', project: 'GcpProject', preprocess_mode, preprocess_module: 'GcsUri[text/code/python]', transform_output: 'GcsUri[Directory]', step_name='preprocess'):
return dsl.ContainerOp(
Expand All @@ -50,7 +51,7 @@ def dataflow_tf_transform_op(train_data: 'GcsUri', evaluation_data: 'GcsUri', sc
'--output', transform_output,
],
file_outputs = {'transformed': '/output.txt'}
)
).apply(gcp.use_gcp_secret('user-gcp-sa'))


def tf_train_op(transformed_data_dir, schema: 'GcsUri[text/json]', learning_rate: float, hidden_layer_size: int, steps: int, target: str, preprocess_module: 'GcsUri[text/code/python]', training_output: 'GcsUri[Directory]', step_name='training'):
Expand All @@ -68,7 +69,7 @@ def tf_train_op(transformed_data_dir, schema: 'GcsUri[text/json]', learning_rate
'--job-dir', training_output,
],
file_outputs = {'train': '/output.txt'}
)
).apply(gcp.use_gcp_secret('user-gcp-sa'))

def dataflow_tf_model_analyze_op(model: 'TensorFlow model', evaluation_data: 'GcsUri', schema: 'GcsUri[text/json]', project: 'GcpProject', analyze_mode, analyze_slice_column, analysis_output: 'GcsUri', step_name='analysis'):
return dsl.ContainerOp(
Expand All @@ -84,7 +85,7 @@ def dataflow_tf_model_analyze_op(model: 'TensorFlow model', evaluation_data: 'Gc
'--output', analysis_output,
],
file_outputs = {'analysis': '/output.txt'}
)
).apply(gcp.use_gcp_secret('user-gcp-sa'))


def dataflow_tf_predict_op(evaluation_data: 'GcsUri', schema: 'GcsUri[text/json]', target: str, model: 'TensorFlow model', predict_mode, project: 'GcpProject', prediction_output: 'GcsUri', step_name='prediction'):
Expand All @@ -101,7 +102,7 @@ def dataflow_tf_predict_op(evaluation_data: 'GcsUri', schema: 'GcsUri[text/json]
'--output', prediction_output,
],
file_outputs = {'prediction': '/output.txt'}
)
).apply(gcp.use_gcp_secret('user-gcp-sa'))

def kubeflow_deploy_op(model: 'TensorFlow model', tf_server_name, step_name='deploy'):
return dsl.ContainerOp(
Expand Down
17 changes: 9 additions & 8 deletions samples/xgboost-spark/xgboost-training-cm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@


import kfp.dsl as dsl
import kfp.gcp as gcp


# ================================================================
Expand Down Expand Up @@ -177,28 +178,28 @@ def xgb_train_pipeline(
workers=dsl.PipelineParam('workers', value=2),
true_label=dsl.PipelineParam('true-label', value='ACTION'),
):
delete_cluster_op = DeleteClusterOp('delete-cluster', project, region)
delete_cluster_op = DeleteClusterOp('delete-cluster', project, region).apply(gcp.use_gcp_secret('user-gcp-sa'))
with dsl.ExitHandler(exit_op=delete_cluster_op):
create_cluster_op = CreateClusterOp('create-cluster', project, region, output)
create_cluster_op = CreateClusterOp('create-cluster', project, region, output).apply(gcp.use_gcp_secret('user-gcp-sa'))

analyze_op = AnalyzeOp('analyze', project, region, create_cluster_op.output, schema,
train_data, '%s/{{workflow.name}}/analysis' % output)
train_data, '%s/{{workflow.name}}/analysis' % output).apply(gcp.use_gcp_secret('user-gcp-sa'))

transform_op = TransformOp('transform', project, region, create_cluster_op.output,
train_data, eval_data, target, analyze_op.output,
'%s/{{workflow.name}}/transform' % output)
'%s/{{workflow.name}}/transform' % output).apply(gcp.use_gcp_secret('user-gcp-sa'))

train_op = TrainerOp('train', project, region, create_cluster_op.output, transform_op.outputs['train'],
transform_op.outputs['eval'], target, analyze_op.output, workers,
rounds, '%s/{{workflow.name}}/model' % output)
rounds, '%s/{{workflow.name}}/model' % output).apply(gcp.use_gcp_secret('user-gcp-sa'))

predict_op = PredictOp('predict', project, region, create_cluster_op.output, transform_op.outputs['eval'],
train_op.output, target, analyze_op.output, '%s/{{workflow.name}}/predict' % output)
train_op.output, target, analyze_op.output, '%s/{{workflow.name}}/predict' % output).apply(gcp.use_gcp_secret('user-gcp-sa'))

confusion_matrix_op = ConfusionMatrixOp('confusion-matrix', predict_op.output,
'%s/{{workflow.name}}/confusionmatrix' % output)
'%s/{{workflow.name}}/confusionmatrix' % output).apply(gcp.use_gcp_secret('user-gcp-sa'))

roc_op = RocOp('roc', predict_op.output, true_label, '%s/{{workflow.name}}/roc' % output)
roc_op = RocOp('roc', predict_op.output, true_label, '%s/{{workflow.name}}/roc' % output).apply(gcp.use_gcp_secret('user-gcp-sa'))

if __name__ == '__main__':
import kfp.compiler as compiler
Expand Down
1 change: 0 additions & 1 deletion sdk/python/kfp/dsl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,3 @@
from ._container_op import ContainerOp
from ._ops_group import OpsGroup, ExitHandler, Condition
from ._component import python_component
from ._default_gcp_op import default_gcp_op
15 changes: 15 additions & 0 deletions sdk/python/kfp/dsl/_container_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,21 @@ def __init__(self, name: str, image: str, command: str=None, arguments: str=None
if len(self.outputs) == 1:
self.output = list(self.outputs.values())[0]

def apply(self, mod_func):
"""Applies a modifier function to self. The function should return the passed object.
This is needed to chain "extention methods" to this class.

Example:
from kfp.gcp import use_gcp_secret
task = (
train_op(...)
.set_memory_request('1GB')
.apply(use_gcp_secret('user-gcp-sa'))
.set_memory_limit('2GB')
)
"""
return mod_func(self)

def after(self, op):
"""Specify explicit dependency on another op."""
self.dependent_op_names.append(op.name)
Expand Down
69 changes: 0 additions & 69 deletions sdk/python/kfp/dsl/_default_gcp_op.py

This file was deleted.

58 changes: 58 additions & 0 deletions sdk/python/kfp/gcp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

def use_gcp_secret(secret_name='user-gcp-sa', secret_file_path_in_volume='/user-gcp-sa.json', volume_name='gcp-credentials', secret_volume_mount_path='/secret/gcp-credentials'):
"""An operator that configures the container to use GCP service account.

The user-gcp-sa secret is created as part of the kubeflow deployment that
stores the access token for kubeflow user service account.

With this service account, the container has a range of GCP APIs to
access to. This service account is automatically created as part of the
kubeflow deployment.

For the list of the GCP APIs this service account can access to, check
https://github.com/kubeflow/kubeflow/blob/7b0db0d92d65c0746ac52b000cbc290dac7c62b1/deployment/gke/deployment_manager_configs/iam_bindings_template.yaml#L18

If you want to call the GCP APIs in a different project, grant the kf-user
service account access permission.
"""

def _use_gcp_secret(task):
from kubernetes import client as k8s_client
return (
task
.add_volume(
k8s_client.V1Volume(
name=volume_name,
secret=k8s_client.V1SecretVolumeSource(
secret_name=secret_name,
)
)
)
.add_volume_mount(
k8s_client.V1VolumeMount(
name=volume_name,
mount_path=secret_volume_mount_path,
)
)
.add_env_variable(
k8s_client.V1EnvVar(
name='GOOGLE_APPLICATION_CREDENTIALS',
value=secret_volume_mount_path + secret_file_path_in_volume,
)
)
)

return _use_gcp_secret