Bump TFX dependency (kubeflow#3121)

* bump py sample * fix notebook per comments during bug bash * clean up
Jeffwan · Dec 9, 2020 · 0e7b5cc · 0e7b5cc
1 parent b4c73b5
commit 0e7b5cc
Show file tree

Hide file tree

Showing 4 changed files with 27 additions and 108 deletions.
diff --git a/backend/Dockerfile b/backend/Dockerfile
@@ -28,7 +28,7 @@ FROM python:3.5 as compiler
 RUN apt-get update -y && \
     apt-get install --no-install-recommends -y -q default-jdk python3-setuptools python3-dev
 RUN wget https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py
-RUN python3 -m pip install tfx==0.21.0rc0
+RUN python3 -m pip install tfx==0.21.0
 
 WORKDIR /go/src/github.com/kubeflow/pipelines
 COPY sdk sdk

diff --git a/samples/core/parameterized_tfx_oss/parameterized_tfx_oss.py b/samples/core/parameterized_tfx_oss/parameterized_tfx_oss.py
@@ -18,7 +18,6 @@
 from typing import Text
 
 import kfp
-from kfp import dsl
 from tfx.components.evaluator.component import Evaluator
 from tfx.components.example_gen.csv_example_gen.component import CsvExampleGen
 from tfx.components.example_validator.component import ExampleValidator
@@ -58,7 +57,7 @@
 )
 
 
-def _create_test_pipeline(
+def _create_pipeline(
     pipeline_root: Text, csv_input_location: data_types.RuntimeParameter,
     taxi_module_file: data_types.RuntimeParameter, enable_cache: bool
 ):
@@ -113,18 +112,13 @@ def _create_test_pipeline(
       examples=example_gen.outputs['examples'], model=trainer.outputs['model']
   )
 
-  # Hack: ensuring push_destination can be correctly parameterized and interpreted.
-  # pipeline root will be specified as a dsl.PipelineParam with the name
-  # pipeline-root, see:
-  # https://github.com/tensorflow/tfx/blob/1c670e92143c7856f67a866f721b8a9368ede385/tfx/orchestration/kubeflow/kubeflow_dag_runner.py#L226
-  _pipeline_root_param = dsl.PipelineParam(name='pipeline-root')
   pusher = Pusher(
       model=trainer.outputs['model'],
       model_blessing=model_validator.outputs['blessing'],
       push_destination=pusher_pb2.PushDestination(
           filesystem=pusher_pb2.PushDestination.Filesystem(
               base_directory=os.path.
-              join(str(_pipeline_root_param), 'model_serving')
+              join(str(pipeline.ROOT_PARAMETER), 'model_serving')
           )
       ),
   )
@@ -141,9 +135,8 @@ def _create_test_pipeline(
 
 
 if __name__ == '__main__':
-
   enable_cache = True
-  pipeline = _create_test_pipeline(
+  pipeline = _create_pipeline(
       pipeline_root,
       _data_root_param,
       _taxi_module_file_param,
@@ -154,14 +147,7 @@ def _create_test_pipeline(
   config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
       kubeflow_metadata_config=kubeflow_dag_runner.
       get_default_kubeflow_metadata_config(),
-      # TODO: remove this override when KubeflowDagRunnerConfig doesn't default to use_gcp_secret op.
-      pipeline_operator_funcs=list(
-          filter(
-              lambda operator: operator.__name__.find('gcp_secret') == -1,
-              kubeflow_dag_runner.get_default_pipeline_operator_funcs()
-          )
-      ),
-      tfx_image='tensorflow/tfx:0.21.0rc0',
+      tfx_image='tensorflow/tfx:0.21.0',
   )
   kfp_runner = kubeflow_dag_runner.KubeflowDagRunner(
       output_filename=__file__ + '.yaml', config=config

diff --git a/samples/core/parameterized_tfx_oss/taxi_pipeline_notebook.ipynb b/samples/core/parameterized_tfx_oss/taxi_pipeline_notebook.ipynb
@@ -17,7 +17,7 @@
     "## Permission\n",
     "\n",
     "This pipeline requires Google Cloud Storage permission to run. \n",
-    "If KFP was deployed through K8S marketplace, please  make sure full-scope access is checked when creating the cluster.\n",
+    "If KFP was deployed through K8S marketplace, please make sure full-scope access is checked when creating the cluster.\n",
     "Otherwise, follow instructions in [the guideline](https://github.com/kubeflow/pipelines/blob/master/manifests/gcp_marketplace/guide.md#gcp-service-account-credentials) to guarantee at least, that the service account has `storage.admin` role."
    ]
   },
@@ -29,7 +29,7 @@
    "source": [
     "!python3 -m pip install pip --upgrade --quiet --user\n",
     "!python3 -m pip install kfp --upgrade --quiet --user\n",
-    "!python3 -m pip install tfx==0.21.0rc0 --quiet --user"
+    "!python3 -m pip install tfx==0.21.0 --quiet --user"
    ]
   },
   {
@@ -49,10 +49,9 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "from typing import Optional, Text\n",
+    "from typing import Text\n",
     "\n",
     "import kfp\n",
-    "from kfp import dsl\n",
     "\n",
     "from tfx.components import Evaluator\n",
     "from tfx.components import CsvExampleGen\n",
@@ -143,59 +142,24 @@
    "source": [
     "# The input data location is parameterized by _data_root_param\n",
     "examples = external_input(data_root_param)\n",
-    "example_gen = CsvExampleGen(input=examples)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "example_gen = CsvExampleGen(input=examples)\n",
+    "\n",
+    "statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])\n",
+    "\n",
     "infer_schema = SchemaGen(\n",
-    "    statistics=statistics_gen.outputs['statistics'], infer_feature_shape=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "    statistics=statistics_gen.outputs['statistics'], infer_feature_shape=False)\n",
+    "\n",
     "validate_stats = ExampleValidator(\n",
     "  statistics=statistics_gen.outputs['statistics'],\n",
-    "  schema=infer_schema.outputs['schema'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "  schema=infer_schema.outputs['schema'])\n",
+    "\n",
     "# The module file used in Transform and Trainer component is paramterized by\n",
     "# _taxi_module_file_param.\n",
     "transform = Transform(\n",
     "  examples=example_gen.outputs['examples'],\n",
     "  schema=infer_schema.outputs['schema'],\n",
-    "  module_file=taxi_module_file_param)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "  module_file=taxi_module_file_param)\n",
+    "\n",
     "# The numbers of steps in train_args are specified as RuntimeParameter with\n",
     "# name 'train-steps' and 'eval-steps', respectively.\n",
     "trainer = Trainer(\n",
@@ -204,52 +168,26 @@
     "  schema=infer_schema.outputs['schema'],\n",
     "  transform_graph=transform.outputs['transform_graph'],\n",
     "  train_args={'num_steps': train_steps},\n",
-    "  eval_args={'num_steps': eval_steps})"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "  eval_args={'num_steps': eval_steps})\n",
+    "\n",
     "# The name of slicing column is specified as a RuntimeParameter.\n",
     "model_analyzer = Evaluator(\n",
     "  examples=example_gen.outputs['examples'],\n",
     "  model=trainer.outputs['model'],\n",
     "  feature_slicing_spec=dict(specs=[{\n",
     "      'column_for_slicing': [slicing_column]\n",
-    "  }]))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "  }]))\n",
+    "\n",
     "model_validator = ModelValidator(\n",
-    "  examples=example_gen.outputs['examples'], model=trainer.outputs['model'])\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Hack: ensuring push_destination can be correctly parameterized and interpreted.\n",
-    "# pipeline root will be specified as a dsl.PipelineParam with the name\n",
-    "# pipeline-root, see:\n",
-    "# https://github.com/tensorflow/tfx/blob/1c670e92143c7856f67a866f721b8a9368ede385/tfx/orchestration/kubeflow/kubeflow_dag_runner.py#L226\n",
-    "_pipeline_root_param = dsl.PipelineParam(name='pipeline-root')\n",
+    "  examples=example_gen.outputs['examples'], model=trainer.outputs['model'])\n",
+    "\n",
     "pusher = Pusher(\n",
     "  model=trainer.outputs['model'],\n",
     "  model_blessing=model_validator.outputs['blessing'],\n",
     "  push_destination=pusher_pb2.PushDestination(\n",
     "      filesystem=pusher_pb2.PushDestination.Filesystem(\n",
     "          base_directory=os.path.join(\n",
-    "              str(_pipeline_root_param), 'model_serving'))))\n"
+    "              str(pipeline.ROOT_PARAMETER), 'model_serving'))))"
    ]
   },
   {
@@ -281,15 +219,10 @@
    "source": [
     "# Specify a TFX docker image. For the full list of tags please see:\n",
     "# https://hub.docker.com/r/tensorflow/tfx/tags\n",
-    "tfx_image = 'tensorflow/tfx:0.21.0rc0'\n",
+    "tfx_image = 'tensorflow/tfx:0.21.0'\n",
     "config = kubeflow_dag_runner.KubeflowDagRunnerConfig(\n",
     "      kubeflow_metadata_config=kubeflow_dag_runner\n",
     "      .get_default_kubeflow_metadata_config(),\n",
-    "      # Switch to use GCP service account by deleting the next line,\n",
-    "      # if KFP is operating workload identity, or with GCP full scope permission.\n",
-    "      pipeline_operator_funcs=list(filter(\n",
-    "          lambda operator: operator.__name__.find('gcp_secret') == -1,\n",
-    "          kubeflow_dag_runner.get_default_pipeline_operator_funcs())),\n",
     "      tfx_image=tfx_image)\n",
     "kfp_runner = kubeflow_dag_runner.KubeflowDagRunner(config=config)\n",
     "# KubeflowDagRunner compiles the DSL pipeline object into KFP pipeline package.\n",

diff --git a/test/sample-test/Dockerfile b/test/sample-test/Dockerfile
@@ -19,7 +19,7 @@ RUN pip3 install google-api-python-client==1.7.0
 RUN pip3 install google-cloud-storage==1.17.0
 RUN pip3 install fire==0.2.1
 RUN pip3 install yamale==2.0
-RUN pip3 install tfx==0.21.0rc0
+RUN pip3 install tfx==0.21.0
 
 # Install python client, including DSL compiler.
 COPY ./sdk/python /sdk/python