Skip to content

Commit

Permalink
Bump TFX dependency (kubeflow#3121)
Browse files Browse the repository at this point in the history
* bump py sample

* fix notebook per comments during bug bash

* clean up
  • Loading branch information
Jiaxiao Zheng authored and Jeffwan committed Dec 9, 2020
1 parent b4c73b5 commit 0e7b5cc
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 108 deletions.
2 changes: 1 addition & 1 deletion backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ FROM python:3.5 as compiler
RUN apt-get update -y && \
apt-get install --no-install-recommends -y -q default-jdk python3-setuptools python3-dev
RUN wget https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py
RUN python3 -m pip install tfx==0.21.0rc0
RUN python3 -m pip install tfx==0.21.0

WORKDIR /go/src/github.com/kubeflow/pipelines
COPY sdk sdk
Expand Down
22 changes: 4 additions & 18 deletions samples/core/parameterized_tfx_oss/parameterized_tfx_oss.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from typing import Text

import kfp
from kfp import dsl
from tfx.components.evaluator.component import Evaluator
from tfx.components.example_gen.csv_example_gen.component import CsvExampleGen
from tfx.components.example_validator.component import ExampleValidator
Expand Down Expand Up @@ -58,7 +57,7 @@
)


def _create_test_pipeline(
def _create_pipeline(
pipeline_root: Text, csv_input_location: data_types.RuntimeParameter,
taxi_module_file: data_types.RuntimeParameter, enable_cache: bool
):
Expand Down Expand Up @@ -113,18 +112,13 @@ def _create_test_pipeline(
examples=example_gen.outputs['examples'], model=trainer.outputs['model']
)

# Hack: ensuring push_destination can be correctly parameterized and interpreted.
# pipeline root will be specified as a dsl.PipelineParam with the name
# pipeline-root, see:
# https://github.com/tensorflow/tfx/blob/1c670e92143c7856f67a866f721b8a9368ede385/tfx/orchestration/kubeflow/kubeflow_dag_runner.py#L226
_pipeline_root_param = dsl.PipelineParam(name='pipeline-root')
pusher = Pusher(
model=trainer.outputs['model'],
model_blessing=model_validator.outputs['blessing'],
push_destination=pusher_pb2.PushDestination(
filesystem=pusher_pb2.PushDestination.Filesystem(
base_directory=os.path.
join(str(_pipeline_root_param), 'model_serving')
join(str(pipeline.ROOT_PARAMETER), 'model_serving')
)
),
)
Expand All @@ -141,9 +135,8 @@ def _create_test_pipeline(


if __name__ == '__main__':

enable_cache = True
pipeline = _create_test_pipeline(
pipeline = _create_pipeline(
pipeline_root,
_data_root_param,
_taxi_module_file_param,
Expand All @@ -154,14 +147,7 @@ def _create_test_pipeline(
config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
kubeflow_metadata_config=kubeflow_dag_runner.
get_default_kubeflow_metadata_config(),
# TODO: remove this override when KubeflowDagRunnerConfig doesn't default to use_gcp_secret op.
pipeline_operator_funcs=list(
filter(
lambda operator: operator.__name__.find('gcp_secret') == -1,
kubeflow_dag_runner.get_default_pipeline_operator_funcs()
)
),
tfx_image='tensorflow/tfx:0.21.0rc0',
tfx_image='tensorflow/tfx:0.21.0',
)
kfp_runner = kubeflow_dag_runner.KubeflowDagRunner(
output_filename=__file__ + '.yaml', config=config
Expand Down
109 changes: 21 additions & 88 deletions samples/core/parameterized_tfx_oss/taxi_pipeline_notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"## Permission\n",
"\n",
"This pipeline requires Google Cloud Storage permission to run. \n",
"If KFP was deployed through K8S marketplace, please make sure full-scope access is checked when creating the cluster.\n",
"If KFP was deployed through K8S marketplace, please make sure full-scope access is checked when creating the cluster.\n",
"Otherwise, follow instructions in [the guideline](https://github.com/kubeflow/pipelines/blob/master/manifests/gcp_marketplace/guide.md#gcp-service-account-credentials) to guarantee at least, that the service account has `storage.admin` role."
]
},
Expand All @@ -29,7 +29,7 @@
"source": [
"!python3 -m pip install pip --upgrade --quiet --user\n",
"!python3 -m pip install kfp --upgrade --quiet --user\n",
"!python3 -m pip install tfx==0.21.0rc0 --quiet --user"
"!python3 -m pip install tfx==0.21.0 --quiet --user"
]
},
{
Expand All @@ -49,10 +49,9 @@
"outputs": [],
"source": [
"import os\n",
"from typing import Optional, Text\n",
"from typing import Text\n",
"\n",
"import kfp\n",
"from kfp import dsl\n",
"\n",
"from tfx.components import Evaluator\n",
"from tfx.components import CsvExampleGen\n",
Expand Down Expand Up @@ -143,59 +142,24 @@
"source": [
"# The input data location is parameterized by _data_root_param\n",
"examples = external_input(data_root_param)\n",
"example_gen = CsvExampleGen(input=examples)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"example_gen = CsvExampleGen(input=examples)\n",
"\n",
"statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])\n",
"\n",
"infer_schema = SchemaGen(\n",
" statistics=statistics_gen.outputs['statistics'], infer_feature_shape=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
" statistics=statistics_gen.outputs['statistics'], infer_feature_shape=False)\n",
"\n",
"validate_stats = ExampleValidator(\n",
" statistics=statistics_gen.outputs['statistics'],\n",
" schema=infer_schema.outputs['schema'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
" schema=infer_schema.outputs['schema'])\n",
"\n",
"# The module file used in Transform and Trainer component is paramterized by\n",
"# _taxi_module_file_param.\n",
"transform = Transform(\n",
" examples=example_gen.outputs['examples'],\n",
" schema=infer_schema.outputs['schema'],\n",
" module_file=taxi_module_file_param)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
" module_file=taxi_module_file_param)\n",
"\n",
"# The numbers of steps in train_args are specified as RuntimeParameter with\n",
"# name 'train-steps' and 'eval-steps', respectively.\n",
"trainer = Trainer(\n",
Expand All @@ -204,52 +168,26 @@
" schema=infer_schema.outputs['schema'],\n",
" transform_graph=transform.outputs['transform_graph'],\n",
" train_args={'num_steps': train_steps},\n",
" eval_args={'num_steps': eval_steps})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
" eval_args={'num_steps': eval_steps})\n",
"\n",
"# The name of slicing column is specified as a RuntimeParameter.\n",
"model_analyzer = Evaluator(\n",
" examples=example_gen.outputs['examples'],\n",
" model=trainer.outputs['model'],\n",
" feature_slicing_spec=dict(specs=[{\n",
" 'column_for_slicing': [slicing_column]\n",
" }]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
" }]))\n",
"\n",
"model_validator = ModelValidator(\n",
" examples=example_gen.outputs['examples'], model=trainer.outputs['model'])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Hack: ensuring push_destination can be correctly parameterized and interpreted.\n",
"# pipeline root will be specified as a dsl.PipelineParam with the name\n",
"# pipeline-root, see:\n",
"# https://github.com/tensorflow/tfx/blob/1c670e92143c7856f67a866f721b8a9368ede385/tfx/orchestration/kubeflow/kubeflow_dag_runner.py#L226\n",
"_pipeline_root_param = dsl.PipelineParam(name='pipeline-root')\n",
" examples=example_gen.outputs['examples'], model=trainer.outputs['model'])\n",
"\n",
"pusher = Pusher(\n",
" model=trainer.outputs['model'],\n",
" model_blessing=model_validator.outputs['blessing'],\n",
" push_destination=pusher_pb2.PushDestination(\n",
" filesystem=pusher_pb2.PushDestination.Filesystem(\n",
" base_directory=os.path.join(\n",
" str(_pipeline_root_param), 'model_serving'))))\n"
" str(pipeline.ROOT_PARAMETER), 'model_serving'))))"
]
},
{
Expand Down Expand Up @@ -281,15 +219,10 @@
"source": [
"# Specify a TFX docker image. For the full list of tags please see:\n",
"# https://hub.docker.com/r/tensorflow/tfx/tags\n",
"tfx_image = 'tensorflow/tfx:0.21.0rc0'\n",
"tfx_image = 'tensorflow/tfx:0.21.0'\n",
"config = kubeflow_dag_runner.KubeflowDagRunnerConfig(\n",
" kubeflow_metadata_config=kubeflow_dag_runner\n",
" .get_default_kubeflow_metadata_config(),\n",
" # Switch to use GCP service account by deleting the next line,\n",
" # if KFP is operating workload identity, or with GCP full scope permission.\n",
" pipeline_operator_funcs=list(filter(\n",
" lambda operator: operator.__name__.find('gcp_secret') == -1,\n",
" kubeflow_dag_runner.get_default_pipeline_operator_funcs())),\n",
" tfx_image=tfx_image)\n",
"kfp_runner = kubeflow_dag_runner.KubeflowDagRunner(config=config)\n",
"# KubeflowDagRunner compiles the DSL pipeline object into KFP pipeline package.\n",
Expand Down
2 changes: 1 addition & 1 deletion test/sample-test/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ RUN pip3 install google-api-python-client==1.7.0
RUN pip3 install google-cloud-storage==1.17.0
RUN pip3 install fire==0.2.1
RUN pip3 install yamale==2.0
RUN pip3 install tfx==0.21.0rc0
RUN pip3 install tfx==0.21.0

# Install python client, including DSL compiler.
COPY ./sdk/python /sdk/python
Expand Down

0 comments on commit 0e7b5cc

Please sign in to comment.