From f92024f8ebc78cc42b3489576217c7a7f9b7efde Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Wed, 17 Apr 2019 09:22:05 -0700 Subject: [PATCH 01/43] SDK/Client - Stopped extracting pipeline file to disk during submission (#1178) --- sdk/python/kfp/_client.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sdk/python/kfp/_client.py b/sdk/python/kfp/_client.py index 4e0f44fe74e..a5517fd6984 100644 --- a/sdk/python/kfp/_client.py +++ b/sdk/python/kfp/_client.py @@ -189,8 +189,7 @@ def _extract_pipeline_yaml(self, package_file): if len(all_yaml_files) > 1: raise ValueError('Invalid package. Multiple yaml files in the package.') - filename = zip.extract(all_yaml_files[0]) - with open(filename, 'r') as f: + with zip.open(all_yaml_files[0]) as f: return yaml.load(f) elif package_file.endswith('.yaml') or package_file.endswith('.yml'): with open(package_file, 'r') as f: From d673a1f954ff4f5a54336cb6f9e8748a9ca5502d Mon Sep 17 00:00:00 2001 From: Animesh Singh Date: Wed, 17 Apr 2019 13:32:19 -0700 Subject: [PATCH 02/43] adding myself as reviewer for samples (#1174) have been reviewing quite a few, and should be able to handle general review for sample pipleines --- samples/OWNERS | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/OWNERS b/samples/OWNERS index b166df22bcc..06faac73528 100644 --- a/samples/OWNERS +++ b/samples/OWNERS @@ -8,3 +8,4 @@ reviewers: - Ark-kun - gaoning777 - hongye-sun + - animeshsingh From 1115fa582339c7667dab4d080a180475a305a6dd Mon Sep 17 00:00:00 2001 From: hongye-sun <43763191+hongye-sun@users.noreply.github.com> Date: Thu, 18 Apr 2019 11:22:00 -0700 Subject: [PATCH 03/43] Apply latest doc review changes to github docs (#1128) * Apply latest doc review changes to github docs * merge changes from tech writer * adding missing dataproc components --- components/gcp/bigquery/query/README.md | 105 ++++++++----- components/gcp/bigquery/query/sample.ipynb | 113 ++++++++----- .../gcp/dataflow/launch_python/README.md | 112 +++++++------ .../gcp/dataflow/launch_python/sample.ipynb | 115 ++++++++------ .../gcp/dataflow/launch_template/README.md | 100 +++++++----- .../gcp/dataflow/launch_template/sample.ipynb | 102 +++++++----- .../gcp/dataproc/create_cluster/README.md | 94 ++++++----- .../gcp/dataproc/create_cluster/sample.ipynb | 110 +++++++------ .../gcp/dataproc/delete_cluster/README.md | 72 +++++---- .../gcp/dataproc/delete_cluster/sample.ipynb | 94 ++++++----- .../gcp/dataproc/submit_hadoop_job/README.md | 109 +++++++------ .../dataproc/submit_hadoop_job/sample.ipynb | 123 +++++++++------ .../gcp/dataproc/submit_hive_job/README.md | 85 +++++----- .../gcp/dataproc/submit_hive_job/sample.ipynb | 101 +++++++----- .../gcp/dataproc/submit_pig_job/README.md | 89 ++++++----- .../gcp/dataproc/submit_pig_job/sample.ipynb | 105 +++++++------ .../gcp/dataproc/submit_pyspark_job/README.md | 87 +++++----- .../dataproc/submit_pyspark_job/sample.ipynb | 101 +++++++----- .../gcp/dataproc/submit_spark_job/README.md | 104 +++++++----- .../dataproc/submit_spark_job/sample.ipynb | 123 +++++++++------ .../dataproc/submit_sparksql_job/README.md | 71 +++++---- .../dataproc/submit_sparksql_job/sample.ipynb | 86 +++++----- .../gcp/ml_engine/batch_predict/README.md | 104 +++++++----- .../gcp/ml_engine/batch_predict/sample.ipynb | 111 ++++++++----- components/gcp/ml_engine/deploy/README.md | 141 +++++++++++------ components/gcp/ml_engine/deploy/sample.ipynb | 148 ++++++++++++------ components/gcp/ml_engine/train/README.md | 120 ++++++++------ components/gcp/ml_engine/train/sample.ipynb | 125 +++++++++------ 28 files changed, 1767 insertions(+), 1183 deletions(-) diff --git a/components/gcp/bigquery/query/README.md b/components/gcp/bigquery/query/README.md index ea6b36faf19..f42dff1e85e 100644 --- a/components/gcp/bigquery/query/README.md +++ b/components/gcp/bigquery/query/README.md @@ -1,49 +1,78 @@ -# Submitting a query using BigQuery -A Kubeflow Pipeline component to submit a query to Google Cloud Bigquery service and dump outputs to a Google Cloud Storage blob. +# Name -## Intended Use -The component is intended to export query data from BiqQuery service to Cloud Storage. +Gather training data by querying BigQuery -## Runtime arguments -Name | Description | Data type | Optional | Default -:--- | :---------- | :-------- | :------- | :------ -query | The query used by Bigquery service to fetch the results. | String | No | -project_id | The project to execute the query job. | GCPProjectID | No | -dataset_id | The ID of the persistent dataset to keep the results of the query. If the dataset does not exist, the operation will create a new one. | String | Yes | ` ` -table_id | The ID of the table to keep the results of the query. If absent, the operation will generate a random id for the table. | String | Yes | ` ` -output_gcs_path | The path to the Cloud Storage bucket to store the query output. | GCSPath | Yes | ` ` -dataset_location | The location to create the dataset. Defaults to `US`. | String | Yes | `US` -job_config | The full config spec for the query job. See [QueryJobConfig](https://googleapis.github.io/google-cloud-python/latest/bigquery/generated/google.cloud.bigquery.job.QueryJobConfig.html#google.cloud.bigquery.job.QueryJobConfig) for details. | Dict | Yes | ` ` +# Labels + +GCP, BigQuery, Kubeflow, Pipeline + + +# Summary + +A Kubeflow Pipeline component to submit a query to BigQuery and store the result in a Cloud Storage bucket. + + +# Details + + +## Intended use + +Use this Kubeflow component to: +* Select training data by submitting a query to BigQuery. +* Output the training data into a Cloud Storage bucket as CSV files. + + +## Runtime arguments: + + +| Argument | Description | Optional | Data type | Accepted values | Default | +|----------|-------------|----------|-----------|-----------------|---------| +| query | The query used by BigQuery to fetch the results. | No | String | | | +| project_id | The project ID of the Google Cloud Platform (GCP) project to use to execute the query. | No | GCPProjectID | | | +| dataset_id | The ID of the persistent BigQuery dataset to store the results of the query. If the dataset does not exist, the operation will create a new one. | Yes | String | | None | +| table_id | The ID of the BigQuery table to store the results of the query. If the table ID is absent, the operation will generate a random ID for the table. | Yes | String | | None | +| output_gcs_path | The path to the Cloud Storage bucket to store the query output. | Yes | GCSPath | | None | +| dataset_location | The location where the dataset is created. Defaults to US. | Yes | String | | US | +| job_config | The full configuration specification for the query job. See [QueryJobConfig](https://googleapis.github.io/google-cloud-python/latest/bigquery/generated/google.cloud.bigquery.job.QueryJobConfig.html#google.cloud.bigquery.job.QueryJobConfig) for details. | Yes | Dict | A JSONobject which has the same structure as [QueryJobConfig](https://googleapis.github.io/google-cloud-python/latest/bigquery/generated/google.cloud.bigquery.job.QueryJobConfig.html#google.cloud.bigquery.job.QueryJobConfig) | None | +## Input data schema + +The input data is a BigQuery job containing a query that pulls data f rom various sources. + + +## Output: -## Outputs Name | Description | Type :--- | :---------- | :--- output_gcs_path | The path to the Cloud Storage bucket containing the query output in CSV format. | GCSPath -## Cautions and requirements +## Cautions & requirements + To use the component, the following requirements must be met: -* BigQuery API is enabled -* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: -```python -bigquery_query_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) +* The BigQuery API is enabled. +* The component is running under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow Pipeline cluster. For example: -``` + ``` + bigquery_query_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) + ``` +* The Kubeflow user service account is a member of the `roles/bigquery.admin` role of the project. +* The Kubeflow user service account is a member of the `roles/storage.objectCreator `role of the Cloud Storage output bucket. -* The Kubeflow user service account is a member of `roles/bigquery.admin` role of the project. -* The Kubeflow user service account is also a member of `roles/storage.objectCreator` role of the Cloud Storage output bucket. +## Detailed description +This Kubeflow Pipeline component is used to: +* Submit a query to BigQuery. + * The query results are persisted in a dataset table in BigQuery. + * An extract job is created in BigQuery to extract the data from the dataset table and output it to a Cloud Storage bucket as CSV files. -## Detailed Description -The component does several things: -1. Creates persistent dataset and table if they do not exist. -1. Submits a query to BigQuery service and persists the result to the table. -1. Creates an extraction job to output the table data to a Cloud Storage bucket in CSV format. + Use the code below as an example of how to run your BigQuery job. -Here are the steps to use the component in a pipeline: -1. Install KFP SDK +### Sample +Note: The following sample code works in an IPython notebook or directly in Python code. + +#### Set sample parameters ```python @@ -64,13 +93,6 @@ bigquery_query_op = comp.load_component_from_url( help(bigquery_query_op) ``` -For more information about the component, please checkout: -* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/bigquery/_query.py) -* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) -* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/bigquery/query/sample.ipynb) -* [BigQuery query REST API](https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query) - - ### Sample Note: The following sample code works in IPython notebook or directly in Python code. @@ -161,3 +183,12 @@ run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arg ```python !gsutil cat OUTPUT_PATH ``` + +## References +* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/bigquery/_query.py) +* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) +* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/bigquery/query/sample.ipynb) +* [BigQuery query REST API](https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query) + +## License +By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control. diff --git a/components/gcp/bigquery/query/sample.ipynb b/components/gcp/bigquery/query/sample.ipynb index ee1945c637c..9da2362ef87 100644 --- a/components/gcp/bigquery/query/sample.ipynb +++ b/components/gcp/bigquery/query/sample.ipynb @@ -4,50 +4,80 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Submitting a query using BigQuery \n", - "A Kubeflow Pipeline component to submit a query to Google Cloud Bigquery service and dump outputs to a Google Cloud Storage blob. \n", + "# Name\n", "\n", - "## Intended Use\n", - "The component is intended to export query data from BiqQuery service to Cloud Storage. \n", + "Gather training data by querying BigQuery \n", "\n", - "## Runtime arguments\n", - "Name | Description | Data type | Optional | Default\n", - ":--- | :---------- | :-------- | :------- | :------\n", - "query | The query used by Bigquery service to fetch the results. | String | No |\n", - "project_id | The project to execute the query job. | GCPProjectID | No |\n", - "dataset_id | The ID of the persistent dataset to keep the results of the query. If the dataset does not exist, the operation will create a new one. | String | Yes | ` `\n", - "table_id | The ID of the table to keep the results of the query. If absent, the operation will generate a random id for the table. | String | Yes | ` `\n", - "output_gcs_path | The path to the Cloud Storage bucket to store the query output. | GCSPath | Yes | ` `\n", - "dataset_location | The location to create the dataset. Defaults to `US`. | String | Yes | `US`\n", - "job_config | The full config spec for the query job. See [QueryJobConfig](https://googleapis.github.io/google-cloud-python/latest/bigquery/generated/google.cloud.bigquery.job.QueryJobConfig.html#google.cloud.bigquery.job.QueryJobConfig) for details. | Dict | Yes | ` `\n", "\n", + "# Labels\n", + "\n", + "GCP, BigQuery, Kubeflow, Pipeline\n", + "\n", + "\n", + "# Summary\n", + "\n", + "A Kubeflow Pipeline component to submit a query to BigQuery and store the result in a Cloud Storage bucket.\n", + "\n", + "\n", + "# Details\n", + "\n", + "\n", + "## Intended use\n", + "\n", + "Use this Kubeflow component to:\n", + "* Select training data by submitting a query to BigQuery.\n", + "* Output the training data into a Cloud Storage bucket as CSV files.\n", + "\n", + "\n", + "## Runtime arguments:\n", + "\n", + "\n", + "| Argument | Description | Optional | Data type | Accepted values | Default |\n", + "|----------|-------------|----------|-----------|-----------------|---------|\n", + "| query | The query used by BigQuery to fetch the results. | No | String | | |\n", + "| project_id | The project ID of the Google Cloud Platform (GCP) project to use to execute the query. | No | GCPProjectID | | |\n", + "| dataset_id | The ID of the persistent BigQuery dataset to store the results of the query. If the dataset does not exist, the operation will create a new one. | Yes | String | | None |\n", + "| table_id | The ID of the BigQuery table to store the results of the query. If the table ID is absent, the operation will generate a random ID for the table. | Yes | String | | None |\n", + "| output_gcs_path | The path to the Cloud Storage bucket to store the query output. | Yes | GCSPath | | None |\n", + "| dataset_location | The location where the dataset is created. Defaults to US. | Yes | String | | US |\n", + "| job_config | The full configuration specification for the query job. See [QueryJobConfig](https://googleapis.github.io/google-cloud-python/latest/bigquery/generated/google.cloud.bigquery.job.QueryJobConfig.html#google.cloud.bigquery.job.QueryJobConfig) for details. | Yes | Dict | A JSONobject which has the same structure as [QueryJobConfig](https://googleapis.github.io/google-cloud-python/latest/bigquery/generated/google.cloud.bigquery.job.QueryJobConfig.html#google.cloud.bigquery.job.QueryJobConfig) | None |\n", + "## Input data schema\n", + "\n", + "The input data is a BigQuery job containing a query that pulls data f rom various sources. \n", + "\n", + "\n", + "## Output:\n", "\n", - "## Outputs\n", "Name | Description | Type\n", ":--- | :---------- | :---\n", "output_gcs_path | The path to the Cloud Storage bucket containing the query output in CSV format. | GCSPath\n", "\n", - "## Cautions and requirements\n", + "## Cautions & requirements\n", + "\n", "To use the component, the following requirements must be met:\n", - "* BigQuery API is enabled\n", - "* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", "\n", - "```python\n", - "bigquery_query_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", + "* The BigQuery API is enabled.\n", + "* The component is running under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow Pipeline cluster. For example:\n", + "\n", + " ```\n", + " bigquery_query_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", + " ```\n", + "* The Kubeflow user service account is a member of the `roles/bigquery.admin` role of the project.\n", + "* The Kubeflow user service account is a member of the `roles/storage.objectCreator `role of the Cloud Storage output bucket.\n", + "\n", + "## Detailed description\n", + "This Kubeflow Pipeline component is used to:\n", + "* Submit a query to BigQuery.\n", + " * The query results are persisted in a dataset table in BigQuery.\n", + " * An extract job is created in BigQuery to extract the data from the dataset table and output it to a Cloud Storage bucket as CSV files.\n", "\n", - "```\n", + " Use the code below as an example of how to run your BigQuery job.\n", "\n", - "* The Kubeflow user service account is a member of `roles/bigquery.admin` role of the project.\n", - "* The Kubeflow user service account is also a member of `roles/storage.objectCreator` role of the Cloud Storage output bucket.\n", + "### Sample\n", "\n", - "## Detailed Description\n", - "The component does several things:\n", - "1. Creates persistent dataset and table if they do not exist.\n", - "1. Submits a query to BigQuery service and persists the result to the table.\n", - "1. Creates an extraction job to output the table data to a Cloud Storage bucket in CSV format.\n", + "Note: The following sample code works in an IPython notebook or directly in Python code.\n", "\n", - "Here are the steps to use the component in a pipeline:\n", - "1. Install KFP SDK\n" + "#### Set sample parameters" ] }, { @@ -86,13 +116,6 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "For more information about the component, please checkout:\n", - "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/bigquery/_query.py)\n", - "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", - "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/bigquery/query/sample.ipynb)\n", - "* [BigQuery query REST API](https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query)\n", - "\n", - "\n", "### Sample\n", "\n", "Note: The following sample code works in IPython notebook or directly in Python code.\n", @@ -241,6 +264,20 @@ "source": [ "!gsutil cat OUTPUT_PATH" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/bigquery/_query.py)\n", + "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", + "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/bigquery/query/sample.ipynb)\n", + "* [BigQuery query REST API](https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query)\n", + "\n", + "## License\n", + "By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control." + ] } ], "metadata": { @@ -259,7 +296,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.6.4" } }, "nbformat": 4, diff --git a/components/gcp/dataflow/launch_python/README.md b/components/gcp/dataflow/launch_python/README.md index 9d6490db9c7..514609a8a39 100644 --- a/components/gcp/dataflow/launch_python/README.md +++ b/components/gcp/dataflow/launch_python/README.md @@ -1,54 +1,65 @@ -# Executing an Apache Beam Python job in Cloud Dataflow -A Kubeflow Pipeline component that submits an Apache Beam job (authored in Python) to Cloud Dataflow for execution. The Python Beam code is run with the Cloud Dataflow Runner. +# Name +Data preparation by executing an Apache Beam job in Cloud Dataflow -## Intended Use -Use this component to run a Python Beam code to submit a Dataflow job as a step of a KFP pipeline. The component will wait until the job finishes. +# Labels +GCP, Cloud Dataflow, Apache Beam, Python, Kubeflow + +# Summary +A Kubeflow Pipeline component that prepares data by submitting an Apache Beam job (authored in Python) to Cloud Dataflow for execution. The Python Beam code is run with Cloud Dataflow Runner. + +# Details +## Intended use + +Use this component to run a Python Beam code to submit a Cloud Dataflow job as a step of a Kubeflow pipeline. ## Runtime arguments -Name | Description | Type | Optional | Default -:--- | :---------- | :--- | :------- | :------ -python_file_path | The Cloud Storage or the local path to the python file being run. | String | No | -project_id | The ID of the parent project of the Dataflow job. | GCPProjectID | No | -staging_dir | The Cloud Storage directory for keeping staging files. A random subdirectory will be created under the directory to keep job info for resuming the job in case of failure and it will be passed as `staging_location` and `temp_location` command line args of the beam code. | GCSPath | Yes | ` ` -requirements_file_path | The Cloud Storageor the local path to the pip requirements file. | String | Yes | ` ` -args | The list of arguments to pass to the python file. | List | Yes | `[]` -wait_interval | The seconds to wait between calls to get the job status. | Integer | Yes | `30` - -## Output: -Name | Description | Type -:--- | :---------- | :--- -job_id | The id of the created dataflow job. | String - -## Cautions and requirements +Name | Description | Optional | Data type| Accepted values | Default | +:--- | :----------| :----------| :----------| :----------| :---------- | +python_file_path | The path to the Cloud Storage bucket or local directory containing the Python file to be run. | | GCSPath | | | +project_id | The ID of the Google Cloud Platform (GCP) project containing the Cloud Dataflow job.| | GCPProjectID | | | +staging_dir | The path to the Cloud Storage directory where the staging files are stored. A random subdirectory will be created under the staging directory to keep the job information.This is done so that you can resume the job in case of failure. `staging_dir` is passed as the command line arguments (`staging_location` and `temp_location`) of the Beam code. | Yes | GCPPath | | None | +requirements_file_path | The path to the Cloud Storage bucket or local directory containing the pip requirements file. | Yes | GCSPath | | None | +args | The list of arguments to pass to the Python file. | No | List | A list of string arguments | None | +wait_interval | The number of seconds to wait between calls to get the status of the job. | Yes | Integer | | 30 | + +## Input data schema + +Before you use the component, the following files must be ready in a Cloud Storage bucket: +- A Beam Python code file. +- A `requirements.txt` file which includes a list of dependent packages. + +The Beam Python code should follow the [Beam programming guide](https://beam.apache.org/documentation/programming-guide/) as well as the following additional requirements to be compatible with this component: +- It accepts the command line arguments `--project`, `--temp_location`, `--staging_location`, which are [standard Dataflow Runner options](https://cloud.google.com/dataflow/docs/guides/specifying-exec-params#setting-other-cloud-pipeline-options). +- It enables `info logging` before the start of a Cloud Dataflow job in the Python code. This is important to allow the component to track the status and ID of the job that is created. For example, calling `logging.getLogger().setLevel(logging.INFO)` before any other code. + + +## Output +Name | Description +:--- | :---------- +job_id | The id of the Cloud Dataflow job that is created. + +## Cautions & requirements To use the components, the following requirements must be met: -* Dataflow API is enabled. -* The component is running under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a KFP cluster. For example: +- Cloud Dataflow API is enabled. +- The component is running under a secret Kubeflow user service account in a Kubeflow Pipeline cluster. For example: ``` component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) ``` -* The Kubeflow user service account is a member of `roles/dataflow.developer` role of the project. -* The Kubeflow user service account is a member of `roles/storage.objectViewer` role of the Cloud Storage Objects `python_file_path` and `requirements_file_path`. -* The Kubeflow user service account is a member of `roles/storage.objectCreator` role of the Cloud Storage Object `staging_dir`. +The Kubeflow user service account is a member of: +- `roles/dataflow.developer` role of the project. +- `roles/storage.objectViewer` role of the Cloud Storage Objects `python_file_path` and `requirements_file_path`. +- `roles/storage.objectCreator` role of the Cloud Storage Object `staging_dir`. ## Detailed description -Before using the component, make sure the following files are prepared in a Cloud Storage bucket. -* A Beam Python code file. -* A `requirements.txt` file which includes a list of dependent packages. - -The Beam Python code should follow [Beam programing model](https://beam.apache.org/documentation/programming-guide/) and the following additional requirements to be compatible with this component: -* It accepts command line arguments: `--project`, `--temp_location`, `--staging_location`, which are [standard Dataflow Runner options](https://cloud.google.com/dataflow/docs/guides/specifying-exec-params#setting-other-cloud-pipeline-options). -* Enable info logging before the start of a Dataflow job in the Python code. This is important to allow the component to track the status and ID of create job. For example: calling `logging.getLogger().setLevel(logging.INFO)` before any other code. - The component does several things during the execution: -* Download `python_file_path` and `requirements_file_path` to local files. -* Start a subprocess to launch the Python program. -* Monitor the logs produced from the subprocess to extract Dataflow job information. -* Store Dataflow job information in `staging_dir` so the job can be resumed in case of failure. -* Wait for the job to finish. - -Here are the steps to use the component in a pipeline: -1. Install KFP SDK +- Downloads `python_file_path` and `requirements_file_path` to local files. +- Starts a subprocess to launch the Python program. +- Monitors the logs produced from the subprocess to extract the Cloud Dataflow job information. +- Stores the Cloud Dataflow job information in `staging_dir` so the job can be resumed in case of failure. +- Waits for the job to finish. +The steps to use the component in a pipeline are: +1. Install the Kubeflow Pipelines SDK: @@ -70,17 +81,9 @@ dataflow_python_op = comp.load_component_from_url( help(dataflow_python_op) ``` -For more information about the component, please checkout: -* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataflow/_launch_python.py) -* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) -* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataflow/launch_python/sample.ipynb) -* [Dataflow Python Quickstart](https://cloud.google.com/dataflow/docs/quickstarts/quickstart-python) - ### Sample - -Note: the sample code below works in both IPython notebook or python code directly. - -In this sample, we run a wordcount sample code in a KFP pipeline. The output will be stored in a Cloud Storage bucket. Here is the sample code: +Note: The following sample code works in an IPython notebook or directly in Python code. See the sample code below to learn how to execute the template. +In this sample, we run a wordcount sample code in a Kubeflow Pipeline. The output will be stored in a Cloud Storage bucket. Here is the sample code: ```python @@ -292,3 +295,12 @@ run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arg ```python !gsutil cat $OUTPUT_FILE ``` + +## References +* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataflow/_launch_python.py) +* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) +* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataflow/launch_python/sample.ipynb) +* [Dataflow Python Quickstart](https://cloud.google.com/dataflow/docs/quickstarts/quickstart-python) + +## License +By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control. diff --git a/components/gcp/dataflow/launch_python/sample.ipynb b/components/gcp/dataflow/launch_python/sample.ipynb index 93113512c4d..61a663439ec 100644 --- a/components/gcp/dataflow/launch_python/sample.ipynb +++ b/components/gcp/dataflow/launch_python/sample.ipynb @@ -4,56 +4,67 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Executing an Apache Beam Python job in Cloud Dataflow\n", - "A Kubeflow Pipeline component that submits an Apache Beam job (authored in Python) to Cloud Dataflow for execution. The Python Beam code is run with the Cloud Dataflow Runner.\n", + "# Name\n", + "Data preparation by executing an Apache Beam job in Cloud Dataflow\n", "\n", - "## Intended Use\n", - "Use this component to run a Python Beam code to submit a Dataflow job as a step of a KFP pipeline. The component will wait until the job finishes.\n", + "# Labels\n", + "GCP, Cloud Dataflow, Apache Beam, Python, Kubeflow\n", + "\n", + "# Summary\n", + "A Kubeflow Pipeline component that prepares data by submitting an Apache Beam job (authored in Python) to Cloud Dataflow for execution. The Python Beam code is run with Cloud Dataflow Runner.\n", + "\n", + "# Details\n", + "## Intended use\n", + "\n", + "Use this component to run a Python Beam code to submit a Cloud Dataflow job as a step of a Kubeflow pipeline. \n", "\n", "## Runtime arguments\n", - "Name | Description | Type | Optional | Default\n", - ":--- | :---------- | :--- | :------- | :------\n", - "python_file_path | The Cloud Storage or the local path to the python file being run. | String | No |\n", - "project_id | The ID of the parent project of the Dataflow job. | GCPProjectID | No |\n", - "staging_dir | The Cloud Storage directory for keeping staging files. A random subdirectory will be created under the directory to keep job info for resuming the job in case of failure and it will be passed as `staging_location` and `temp_location` command line args of the beam code. | GCSPath | Yes | ` `\n", - "requirements_file_path | The Cloud Storageor the local path to the pip requirements file. | String | Yes | ` `\n", - "args | The list of arguments to pass to the python file. | List | Yes | `[]`\n", - "wait_interval | The seconds to wait between calls to get the job status. | Integer | Yes | `30`\n", + "Name | Description | Optional | Data type| Accepted values | Default |\n", + ":--- | :----------| :----------| :----------| :----------| :---------- |\n", + "python_file_path | The path to the Cloud Storage bucket or local directory containing the Python file to be run. | | GCSPath | | |\n", + "project_id | The ID of the Google Cloud Platform (GCP) project containing the Cloud Dataflow job.| | GCPProjectID | | |\n", + "staging_dir | The path to the Cloud Storage directory where the staging files are stored. A random subdirectory will be created under the staging directory to keep the job information.This is done so that you can resume the job in case of failure. `staging_dir` is passed as the command line arguments (`staging_location` and `temp_location`) of the Beam code. | Yes | GCPPath | | None |\n", + "requirements_file_path | The path to the Cloud Storage bucket or local directory containing the pip requirements file. | Yes | GCSPath | | None |\n", + "args | The list of arguments to pass to the Python file. | No | List | A list of string arguments | None |\n", + "wait_interval | The number of seconds to wait between calls to get the status of the job. | Yes | Integer | | 30 |\n", + "\n", + "## Input data schema\n", + "\n", + "Before you use the component, the following files must be ready in a Cloud Storage bucket:\n", + "- A Beam Python code file.\n", + "- A `requirements.txt` file which includes a list of dependent packages.\n", + "\n", + "The Beam Python code should follow the [Beam programming guide](https://beam.apache.org/documentation/programming-guide/) as well as the following additional requirements to be compatible with this component:\n", + "- It accepts the command line arguments `--project`, `--temp_location`, `--staging_location`, which are [standard Dataflow Runner options](https://cloud.google.com/dataflow/docs/guides/specifying-exec-params#setting-other-cloud-pipeline-options).\n", + "- It enables `info logging` before the start of a Cloud Dataflow job in the Python code. This is important to allow the component to track the status and ID of the job that is created. For example, calling `logging.getLogger().setLevel(logging.INFO)` before any other code.\n", + "\n", "\n", - "## Output:\n", - "Name | Description | Type\n", - ":--- | :---------- | :---\n", - "job_id | The id of the created dataflow job. | String\n", + "## Output\n", + "Name | Description\n", + ":--- | :----------\n", + "job_id | The id of the Cloud Dataflow job that is created.\n", "\n", - "## Cautions and requirements\n", + "## Cautions & requirements\n", "To use the components, the following requirements must be met:\n", - "* Dataflow API is enabled.\n", - "* The component is running under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a KFP cluster. For example:\n", + "- Cloud Dataflow API is enabled.\n", + "- The component is running under a secret Kubeflow user service account in a Kubeflow Pipeline cluster. For example:\n", "```\n", "component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", "```\n", - "* The Kubeflow user service account is a member of `roles/dataflow.developer` role of the project.\n", - "* The Kubeflow user service account is a member of `roles/storage.objectViewer` role of the Cloud Storage Objects `python_file_path` and `requirements_file_path`.\n", - "* The Kubeflow user service account is a member of `roles/storage.objectCreator` role of the Cloud Storage Object `staging_dir`.\n", + "The Kubeflow user service account is a member of:\n", + "- `roles/dataflow.developer` role of the project.\n", + "- `roles/storage.objectViewer` role of the Cloud Storage Objects `python_file_path` and `requirements_file_path`.\n", + "- `roles/storage.objectCreator` role of the Cloud Storage Object `staging_dir`. \n", "\n", "## Detailed description\n", - "Before using the component, make sure the following files are prepared in a Cloud Storage bucket.\n", - "* A Beam Python code file.\n", - "* A `requirements.txt` file which includes a list of dependent packages.\n", - "\n", - "The Beam Python code should follow [Beam programing model](https://beam.apache.org/documentation/programming-guide/) and the following additional requirements to be compatible with this component:\n", - "* It accepts command line arguments: `--project`, `--temp_location`, `--staging_location`, which are [standard Dataflow Runner options](https://cloud.google.com/dataflow/docs/guides/specifying-exec-params#setting-other-cloud-pipeline-options).\n", - "* Enable info logging before the start of a Dataflow job in the Python code. This is important to allow the component to track the status and ID of create job. For example: calling `logging.getLogger().setLevel(logging.INFO)` before any other code.\n", - "\n", "The component does several things during the execution:\n", - "* Download `python_file_path` and `requirements_file_path` to local files.\n", - "* Start a subprocess to launch the Python program.\n", - "* Monitor the logs produced from the subprocess to extract Dataflow job information.\n", - "* Store Dataflow job information in `staging_dir` so the job can be resumed in case of failure.\n", - "* Wait for the job to finish.\n", - "\n", - "Here are the steps to use the component in a pipeline:\n", - "1. Install KFP SDK\n" + "- Downloads `python_file_path` and `requirements_file_path` to local files.\n", + "- Starts a subprocess to launch the Python program.\n", + "- Monitors the logs produced from the subprocess to extract the Cloud Dataflow job information.\n", + "- Stores the Cloud Dataflow job information in `staging_dir` so the job can be resumed in case of failure.\n", + "- Waits for the job to finish.\n", + "The steps to use the component in a pipeline are:\n", + "1. Install the Kubeflow Pipelines SDK:\n" ] }, { @@ -92,17 +103,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "For more information about the component, please checkout:\n", - "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataflow/_launch_python.py)\n", - "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", - "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataflow/launch_python/sample.ipynb)\n", - "* [Dataflow Python Quickstart](https://cloud.google.com/dataflow/docs/quickstarts/quickstart-python)\n", - "\n", "### Sample\n", - "\n", - "Note: the sample code below works in both IPython notebook or python code directly.\n", - "\n", - "In this sample, we run a wordcount sample code in a KFP pipeline. The output will be stored in a Cloud Storage bucket. Here is the sample code:" + "Note: The following sample code works in an IPython notebook or directly in Python code. See the sample code below to learn how to execute the template.\n", + "In this sample, we run a wordcount sample code in a Kubeflow Pipeline. The output will be stored in a Cloud Storage bucket. Here is the sample code:" ] }, { @@ -377,6 +380,20 @@ "source": [ "!gsutil cat $OUTPUT_FILE" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataflow/_launch_python.py)\n", + "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", + "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataflow/launch_python/sample.ipynb)\n", + "* [Dataflow Python Quickstart](https://cloud.google.com/dataflow/docs/quickstarts/quickstart-python)\n", + "\n", + "## License\n", + "By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control." + ] } ], "metadata": { @@ -395,7 +412,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.6.4" } }, "nbformat": 4, diff --git a/components/gcp/dataflow/launch_template/README.md b/components/gcp/dataflow/launch_template/README.md index cf5240af1f2..d04adad6363 100644 --- a/components/gcp/dataflow/launch_template/README.md +++ b/components/gcp/dataflow/launch_template/README.md @@ -1,43 +1,55 @@ -# Submitting a job to Cloud Dataflow service using a template -A Kubeflow Pipeline component to submit a job from a dataflow template to Cloud Dataflow service. +# Name +Data preparation by using a template to submit a job to Cloud Dataflow -## Intended Use +# Labels +GCP, Cloud Dataflow, Kubeflow, Pipeline -A Kubeflow Pipeline component to submit a job from a dataflow template to Google Cloud Dataflow service. +# Summary +A Kubeflow Pipeline component to prepare data by using a template to submit a job to Cloud Dataflow. + +# Details + +## Intended use +Use this component when you have a pre-built Cloud Dataflow template and want to launch it as a step in a Kubeflow Pipeline. ## Runtime arguments -Name | Description | Type | Optional | Default -:--- | :---------- | :--- | :------- | :------ -project_id | The ID of the Cloud Platform project to which the job belongs. | GCPProjectID | No | -gcs_path | A Cloud Storage path to the job creation template. It must be a valid Cloud Storage URL beginning with `gs://`. | GCSPath | No | -launch_parameters | The parameters that are required for the template being launched. The Schema is defined in [LaunchTemplateParameters Parameters](https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters). | Dict | Yes | `{}` -location | The regional endpoint to which the job request is directed. | GCPRegion | Yes | `` -validate_only | If true, the request is validated but not actually executed. | Bool | Yes | `False` -staging_dir | The Cloud Storage path for keeping staging files. A random subdirectory will be created under the directory to keep job info for resuming the job in case of failure. | GCSPath | Yes | `` -wait_interval | The seconds to wait between calls to get the job status. | Integer | Yes |`30` - -## Output: -Name | Description | Type -:--- | :---------- | :--- -job_id | The id of the created dataflow job. | String - -## Cautions and requirements -To use the components, the following requirements must be met: -* Dataflow API is enabled. -* The component is running under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a KFP cluster. For example: -``` -component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) -``` -* The Kubeflow user service account is a member of `roles/dataflow.developer` role of the project. -* The Kubeflow user service account is a member of `roles/storage.objectViewer` role of the Cloud Storage Object `gcs_path`. -* The Kubeflow user service account is a member of `roles/storage.objectCreator` role of the Cloud Storage Object `staging_dir`. +Argument | Description | Optional | Data type | Accepted values | Default | +:--- | :---------- | :----------| :----------| :---------- | :----------| +project_id | The ID of the Google Cloud Platform (GCP) project to which the job belongs. | No | GCPProjectID | | | +gcs_path | The path to a Cloud Storage bucket containing the job creation template. It must be a valid Cloud Storage URL beginning with 'gs://'. | No | GCSPath | | | +launch_parameters | The parameters that are required to launch the template. The schema is defined in [LaunchTemplateParameters](https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters). The parameter `jobName` is replaced by a generated name. | Yes | Dict | A JSON object which has the same structure as [LaunchTemplateParameters](https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters) | None | +location | The regional endpoint to which the job request is directed.| Yes | GCPRegion | | None | +staging_dir | The path to the Cloud Storage directory where the staging files are stored. A random subdirectory will be created under the staging directory to keep the job information. This is done so that you can resume the job in case of failure.| Yes | GCSPath | | None | +validate_only | If True, the request is validated but not executed. | Yes | Boolean | | False | +wait_interval | The number of seconds to wait between calls to get the status of the job. | Yes | Integer | | 30 | + +## Input data schema + +The input `gcs_path` must contain a valid Cloud Dataflow template. The template can be created by following the instructions in [Creating Templates](https://cloud.google.com/dataflow/docs/guides/templates/creating-templates). You can also use [Google-provided templates](https://cloud.google.com/dataflow/docs/guides/templates/provided-templates). + +## Output +Name | Description +:--- | :---------- +job_id | The id of the Cloud Dataflow job that is created. + +## Caution & requirements + +To use the component, the following requirements must be met: +- Cloud Dataflow API is enabled. +- The component is running under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow Pipeline cluster. For example: + ``` + component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) + ``` +* The Kubeflow user service account is a member of: + - `roles/dataflow.developer` role of the project. + - `roles/storage.objectViewer` role of the Cloud Storage Object `gcs_path.` + - `roles/storage.objectCreator` role of the Cloud Storage Object `staging_dir.` ## Detailed description -The input `gcs_path` must contain a valid Dataflow template. The template can be created by following the guide [Creating Templates](https://cloud.google.com/dataflow/docs/guides/templates/creating-templates). Or, you can use [Google-provided templates](https://cloud.google.com/dataflow/docs/guides/templates/provided-templates). - -Here are the steps to use the component in a pipeline: -1. Install KFP SDK +You can execute the template locally by following the instructions in [Executing Templates](https://cloud.google.com/dataflow/docs/guides/templates/executing-templates). See the sample code below to learn how to execute the template. +Follow these steps to use the component in a pipeline: +1. Install the Kubeflow Pipeline SDK: @@ -59,17 +71,10 @@ dataflow_template_op = comp.load_component_from_url( help(dataflow_template_op) ``` -For more information about the component, please checkout: -* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataflow/_launch_template.py) -* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) -* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataflow/launch_template/sample.ipynb) -* [Cloud Dataflow Templates overview](https://cloud.google.com/dataflow/docs/guides/templates/overview) - ### Sample -Note: the sample code below works in both IPython notebook or python code directly. - -In this sample, we run a Google provided word count template from `gs://dataflow-templates/latest/Word_Count`. The template takes a text file as input and output word counts to a Cloud Storage bucket. Here is the sample input: +Note: The following sample code works in an IPython notebook or directly in Python code. +In this sample, we run a Google-provided word count template from `gs://dataflow-templates/latest/Word_Count`. The template takes a text file as input and outputs word counts to a Cloud Storage bucket. Here is the sample input: ```python @@ -159,3 +164,14 @@ run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arg ```python !gsutil cat $OUTPUT_PATH* ``` + +## References + +* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataflow/_launch_template.py) +* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) +* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataflow/launch_template/sample.ipynb) +* [Cloud Dataflow Templates overview](https://cloud.google.com/dataflow/docs/guides/templates/overview) + +## License +By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control. + diff --git a/components/gcp/dataflow/launch_template/sample.ipynb b/components/gcp/dataflow/launch_template/sample.ipynb index 706d69549a6..ec313804895 100644 --- a/components/gcp/dataflow/launch_template/sample.ipynb +++ b/components/gcp/dataflow/launch_template/sample.ipynb @@ -4,45 +4,57 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Submitting a job to Cloud Dataflow service using a template\n", - "A Kubeflow Pipeline component to submit a job from a dataflow template to Cloud Dataflow service.\n", + "# Name\n", + "Data preparation by using a template to submit a job to Cloud Dataflow\n", "\n", - "## Intended Use\n", + "# Labels\n", + "GCP, Cloud Dataflow, Kubeflow, Pipeline\n", "\n", - "A Kubeflow Pipeline component to submit a job from a dataflow template to Google Cloud Dataflow service.\n", + "# Summary\n", + "A Kubeflow Pipeline component to prepare data by using a template to submit a job to Cloud Dataflow.\n", + "\n", + "# Details\n", + "\n", + "## Intended use\n", + "Use this component when you have a pre-built Cloud Dataflow template and want to launch it as a step in a Kubeflow Pipeline.\n", "\n", "## Runtime arguments\n", - "Name | Description | Type | Optional | Default\n", - ":--- | :---------- | :--- | :------- | :------\n", - "project_id | The ID of the Cloud Platform project to which the job belongs. | GCPProjectID | No |\n", - "gcs_path | A Cloud Storage path to the job creation template. It must be a valid Cloud Storage URL beginning with `gs://`. | GCSPath | No |\n", - "launch_parameters | The parameters that are required for the template being launched. The Schema is defined in [LaunchTemplateParameters Parameters](https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters). | Dict | Yes | `{}`\n", - "location | The regional endpoint to which the job request is directed. | GCPRegion | Yes | ``\n", - "validate_only | If true, the request is validated but not actually executed. | Bool | Yes | `False`\n", - "staging_dir | The Cloud Storage path for keeping staging files. A random subdirectory will be created under the directory to keep job info for resuming the job in case of failure. | GCSPath | Yes | ``\n", - "wait_interval | The seconds to wait between calls to get the job status. | Integer | Yes |`30`\n", + "Argument | Description | Optional | Data type | Accepted values | Default |\n", + ":--- | :---------- | :----------| :----------| :---------- | :----------|\n", + "project_id | The ID of the Google Cloud Platform (GCP) project to which the job belongs. | No | GCPProjectID | | |\n", + "gcs_path | The path to a Cloud Storage bucket containing the job creation template. It must be a valid Cloud Storage URL beginning with 'gs://'. | No | GCSPath | | |\n", + "launch_parameters | The parameters that are required to launch the template. The schema is defined in [LaunchTemplateParameters](https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters). The parameter `jobName` is replaced by a generated name. | Yes | Dict | A JSON object which has the same structure as [LaunchTemplateParameters](https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters) | None |\n", + "location | The regional endpoint to which the job request is directed.| Yes | GCPRegion | | None |\n", + "staging_dir | The path to the Cloud Storage directory where the staging files are stored. A random subdirectory will be created under the staging directory to keep the job information. This is done so that you can resume the job in case of failure.| Yes | GCSPath | | None |\n", + "validate_only | If True, the request is validated but not executed. | Yes | Boolean | | False |\n", + "wait_interval | The number of seconds to wait between calls to get the status of the job. | Yes | Integer | | 30 |\n", "\n", - "## Output:\n", - "Name | Description | Type\n", - ":--- | :---------- | :---\n", - "job_id | The id of the created dataflow job. | String\n", + "## Input data schema\n", "\n", - "## Cautions and requirements\n", - "To use the components, the following requirements must be met:\n", - "* Dataflow API is enabled.\n", - "* The component is running under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a KFP cluster. For example:\n", - "```\n", - "component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", - "```\n", - "* The Kubeflow user service account is a member of `roles/dataflow.developer` role of the project.\n", - "* The Kubeflow user service account is a member of `roles/storage.objectViewer` role of the Cloud Storage Object `gcs_path`.\n", - "* The Kubeflow user service account is a member of `roles/storage.objectCreator` role of the Cloud Storage Object `staging_dir`.\n", + "The input `gcs_path` must contain a valid Cloud Dataflow template. The template can be created by following the instructions in [Creating Templates](https://cloud.google.com/dataflow/docs/guides/templates/creating-templates). You can also use [Google-provided templates](https://cloud.google.com/dataflow/docs/guides/templates/provided-templates).\n", "\n", - "## Detailed description\n", - "The input `gcs_path` must contain a valid Dataflow template. The template can be created by following the guide [Creating Templates](https://cloud.google.com/dataflow/docs/guides/templates/creating-templates). Or, you can use [Google-provided templates](https://cloud.google.com/dataflow/docs/guides/templates/provided-templates).\n", + "## Output\n", + "Name | Description\n", + ":--- | :----------\n", + "job_id | The id of the Cloud Dataflow job that is created.\n", "\n", - "Here are the steps to use the component in a pipeline:\n", - "1. Install KFP SDK\n" + "## Caution & requirements\n", + "\n", + "To use the component, the following requirements must be met:\n", + "- Cloud Dataflow API is enabled.\n", + "- The component is running under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow Pipeline cluster. For example:\n", + " ```\n", + " component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", + " ```\n", + "* The Kubeflow user service account is a member of:\n", + " - `roles/dataflow.developer` role of the project.\n", + " - `roles/storage.objectViewer` role of the Cloud Storage Object `gcs_path.`\n", + " - `roles/storage.objectCreator` role of the Cloud Storage Object `staging_dir.` \n", + "\n", + "## Detailed description\n", + "You can execute the template locally by following the instructions in [Executing Templates](https://cloud.google.com/dataflow/docs/guides/templates/executing-templates). See the sample code below to learn how to execute the template.\n", + "Follow these steps to use the component in a pipeline:\n", + "1. Install the Kubeflow Pipeline SDK:\n" ] }, { @@ -81,17 +93,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "For more information about the component, please checkout:\n", - "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataflow/_launch_template.py)\n", - "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", - "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataflow/launch_template/sample.ipynb)\n", - "* [Cloud Dataflow Templates overview](https://cloud.google.com/dataflow/docs/guides/templates/overview)\n", - "\n", "### Sample\n", "\n", - "Note: the sample code below works in both IPython notebook or python code directly.\n", - "\n", - "In this sample, we run a Google provided word count template from `gs://dataflow-templates/latest/Word_Count`. The template takes a text file as input and output word counts to a Cloud Storage bucket. Here is the sample input:" + "Note: The following sample code works in an IPython notebook or directly in Python code.\n", + "In this sample, we run a Google-provided word count template from `gs://dataflow-templates/latest/Word_Count`. The template takes a text file as input and outputs word counts to a Cloud Storage bucket. Here is the sample input:" ] }, { @@ -239,6 +244,21 @@ "source": [ "!gsutil cat $OUTPUT_PATH*" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "\n", + "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataflow/_launch_template.py)\n", + "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", + "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataflow/launch_template/sample.ipynb)\n", + "* [Cloud Dataflow Templates overview](https://cloud.google.com/dataflow/docs/guides/templates/overview)\n", + "\n", + "## License\n", + "By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control.\n" + ] } ], "metadata": { @@ -257,7 +277,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.6.4" } }, "nbformat": 4, diff --git a/components/gcp/dataproc/create_cluster/README.md b/components/gcp/dataproc/create_cluster/README.md index 1945c524085..2ffedc57163 100644 --- a/components/gcp/dataproc/create_cluster/README.md +++ b/components/gcp/dataproc/create_cluster/README.md @@ -1,44 +1,62 @@ -# Creating a Cluster with Cloud Dataproc -A Kubeflow Pipeline component to create a cluster in Cloud Dataproc service. +# Name +Data processing by creating a cluster in Cloud Dataproc -## Intended Use -This component can be used at the start of a KFP pipeline to create a temporary Dataproc cluster to run Dataproc jobs as subsequent steps in the pipeline. The cluster can be later recycled by the [Dataproc delete cluster component](https://github.com/kubeflow/pipelines/tree/master/components/gcp/dataproc/delete_cluster). +# Label +Cloud Dataproc, cluster, GCP, Cloud Storage, KubeFlow, Pipeline + + +# Summary +A Kubeflow Pipeline component to create a cluster in Cloud Dataproc. + +# Details +## Intended use + +Use this component at the start of a Kubeflow Pipeline to create a temporary Cloud Dataproc cluster to run Cloud Dataproc jobs as steps in the pipeline. ## Runtime arguments -Name | Description | Type | Optional | Default -:--- | :---------- | :--- | :------- | :------ -project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | GCPProjectID | No | -region | The Cloud Dataproc region runs the newly created cluster. | GCPRegion | No | -name | The name of the newly created cluster. Cluster names within a project must be unique. Names of deleted clusters can be reused. | String | Yes | ` ` -name_prefix | The prefix of the cluster name. | String | Yes | ` ` -initialization_actions | List of Cloud Storage URIs of executables to execute on each node after the configuration is completed. By default, executables are run on the master and all the worker nodes. | List | Yes | `[]` -config_bucket | A Cloud Storage bucket used to stage the job dependencies, the configuration files, and the job driver console’s output. | GCSPath | Yes | ` ` -image_version | The version of the software inside the cluster. | String | Yes | ` ` -cluster | The full [cluster config] (https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters#Cluster). | Dict | Yes | `{}` -wait_interval | The number of seconds to pause between polling the operation done status. | Integer | Yes | `30` + +| Argument | Description | Optional | Data type | Accepted values | Default | +|----------|-------------|----------|-----------|-----------------|---------| +| project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | No | GCPProjectID | | | +| region | The Cloud Dataproc region to create the cluster in. | No | GCPRegion | | | +| name | The name of the cluster. Cluster names within a project must be unique. You can reuse the names of deleted clusters. | Yes | String | | None | +| name_prefix | The prefix of the cluster name. | Yes | String | | None | +| initialization_actions | A list of Cloud Storage URIs identifying executables to execute on each node after the configuration is completed. By default, executables are run on the master and all the worker nodes. | Yes | List | | None | +| config_bucket | The Cloud Storage bucket to use to stage the job dependencies, the configuration files, and the job driver console’s output. | Yes | GCSPath | | None | +| image_version | The version of the software inside the cluster. | Yes | String | | None | +| cluster | The full [cluster configuration](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters#Cluster). | Yes | Dict | | None | +| wait_interval | The number of seconds to pause before polling the operation. | Yes | Integer | | 30 | ## Output Name | Description | Type :--- | :---------- | :--- -cluster_name | The cluster name of the created cluster. | String +cluster_name | The name of the cluster. | String + +Note: You can recycle the cluster by using the [Dataproc delete cluster component](https://github.com/kubeflow/pipelines/tree/master/components/gcp/dataproc/delete_cluster). + ## Cautions & requirements -To use the component, you must: -* Setup project by following the [guide](https://cloud.google.com/dataproc/docs/guides/setup-project). -* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: -``` -component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) -``` -* Grant Kubeflow user service account the read access to the Cloud Storage buckets which contains initialization action files. -* Grant Kubeflow user service account the `roles/dataproc.editor` role on the project. -## Detailed Description -This component creates a new Dataproc cluster by using [Dataproc create cluster REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters/create). +To use the component, you must: +* Set up the GCP project by following these [steps](https://cloud.google.com/dataproc/docs/guides/setup-project). +* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: -Here are the steps to use the component in a pipeline: -1. Install KFP SDK + ``` + component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) + ``` +* Grant the following types of access to the Kubeflow user service account: + * Read access to the Cloud Storage buckets which contains initialization action files. + * The role, `roles/dataproc.editor` on the project. + +## Detailed description + +This component creates a new Dataproc cluster by using the [Dataproc create cluster REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters/create). + +Follow these steps to use the component in a pipeline: + +1. Install the Kubeflow Pipeline SDK: @@ -60,16 +78,8 @@ dataproc_create_cluster_op = comp.load_component_from_url( help(dataproc_create_cluster_op) ``` -For more information about the component, please checkout: -* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_create_cluster.py) -* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) -* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/create_cluster/sample.ipynb) -* [Dataproc create cluster REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters/create) - - ### Sample - -Note: the sample code below works in both IPython notebook or python code directly. +Note: The following sample code works in an IPython notebook or directly in Python code. See the sample code below to learn how to execute the template. #### Set sample parameters @@ -142,3 +152,13 @@ experiment = client.create_experiment(EXPERIMENT_NAME) run_name = pipeline_func.__name__ + ' run' run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments) ``` + +## References +* [Kubernetes Engine for Kubeflow](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) +* [Component Python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_create_cluster.py) +* [Component Docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) +* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/create_cluster/sample.ipynb) +* [Dataproc create cluster REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters/create) + +## License +By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control. diff --git a/components/gcp/dataproc/create_cluster/sample.ipynb b/components/gcp/dataproc/create_cluster/sample.ipynb index 1c9a000406d..16a7dd8c60b 100644 --- a/components/gcp/dataproc/create_cluster/sample.ipynb +++ b/components/gcp/dataproc/create_cluster/sample.ipynb @@ -4,46 +4,64 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Creating a Cluster with Cloud Dataproc\n", - "A Kubeflow Pipeline component to create a cluster in Cloud Dataproc service.\n", + "# Name\n", + "Data processing by creating a cluster in Cloud Dataproc\n", "\n", - "## Intended Use\n", - "This component can be used at the start of a KFP pipeline to create a temporary Dataproc cluster to run Dataproc jobs as subsequent steps in the pipeline. The cluster can be later recycled by the [Dataproc delete cluster component](https://github.com/kubeflow/pipelines/tree/master/components/gcp/dataproc/delete_cluster).\n", "\n", + "# Label\n", + "Cloud Dataproc, cluster, GCP, Cloud Storage, KubeFlow, Pipeline\n", + "\n", + "\n", + "# Summary\n", + "A Kubeflow Pipeline component to create a cluster in Cloud Dataproc.\n", + "\n", + "# Details\n", + "## Intended use\n", + "\n", + "Use this component at the start of a Kubeflow Pipeline to create a temporary Cloud Dataproc cluster to run Cloud Dataproc jobs as steps in the pipeline.\n", "\n", "## Runtime arguments\n", - "Name | Description | Type | Optional | Default\n", - ":--- | :---------- | :--- | :------- | :------\n", - "project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | GCPProjectID | No |\n", - "region | The Cloud Dataproc region runs the newly created cluster. | GCPRegion | No |\n", - "name | The name of the newly created cluster. Cluster names within a project must be unique. Names of deleted clusters can be reused. | String | Yes | ` `\n", - "name_prefix | The prefix of the cluster name. | String | Yes | ` `\n", - "initialization_actions | List of Cloud Storage URIs of executables to execute on each node after the configuration is completed. By default, executables are run on the master and all the worker nodes. | List | Yes | `[]`\n", - "config_bucket | A Cloud Storage bucket used to stage the job dependencies, the configuration files, and the job driver console’s output. | GCSPath | Yes | ` `\n", - "image_version | The version of the software inside the cluster. | String | Yes | ` `\n", - "cluster | The full [cluster config] (https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters#Cluster). | Dict | Yes | `{}`\n", - "wait_interval | The number of seconds to pause between polling the operation done status. | Integer | Yes | `30`\n", + "\n", + "| Argument | Description | Optional | Data type | Accepted values | Default |\n", + "|----------|-------------|----------|-----------|-----------------|---------|\n", + "| project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | No | GCPProjectID | | |\n", + "| region | The Cloud Dataproc region to create the cluster in. | No | GCPRegion | | |\n", + "| name | The name of the cluster. Cluster names within a project must be unique. You can reuse the names of deleted clusters. | Yes | String | | None |\n", + "| name_prefix | The prefix of the cluster name. | Yes | String | | None |\n", + "| initialization_actions | A list of Cloud Storage URIs identifying executables to execute on each node after the configuration is completed. By default, executables are run on the master and all the worker nodes. | Yes | List | | None |\n", + "| config_bucket | The Cloud Storage bucket to use to stage the job dependencies, the configuration files, and the job driver console’s output. | Yes | GCSPath | | None |\n", + "| image_version | The version of the software inside the cluster. | Yes | String | | None |\n", + "| cluster | The full [cluster configuration](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters#Cluster). | Yes | Dict | | None |\n", + "| wait_interval | The number of seconds to pause before polling the operation. | Yes | Integer | | 30 |\n", "\n", "## Output\n", "Name | Description | Type\n", ":--- | :---------- | :---\n", - "cluster_name | The cluster name of the created cluster. | String\n", + "cluster_name | The name of the cluster. | String\n", + "\n", + "Note: You can recycle the cluster by using the [Dataproc delete cluster component](https://github.com/kubeflow/pipelines/tree/master/components/gcp/dataproc/delete_cluster).\n", + "\n", "\n", "## Cautions & requirements\n", - "To use the component, you must:\n", - "* Setup project by following the [guide](https://cloud.google.com/dataproc/docs/guides/setup-project).\n", - "* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", - "```\n", - "component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", - "```\n", - "* Grant Kubeflow user service account the read access to the Cloud Storage buckets which contains initialization action files.\n", - "* Grant Kubeflow user service account the `roles/dataproc.editor` role on the project.\n", - "\n", - "## Detailed Description\n", - "This component creates a new Dataproc cluster by using [Dataproc create cluster REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters/create).\n", - "\n", - "Here are the steps to use the component in a pipeline:\n", - "1. Install KFP SDK\n" + "\n", + "To use the component, you must:\n", + "* Set up the GCP project by following these [steps](https://cloud.google.com/dataproc/docs/guides/setup-project).\n", + "* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", + "\n", + " ```\n", + " component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", + " ```\n", + "* Grant the following types of access to the Kubeflow user service account:\n", + " * Read access to the Cloud Storage buckets which contains initialization action files.\n", + " * The role, `roles/dataproc.editor` on the project.\n", + "\n", + "## Detailed description\n", + "\n", + "This component creates a new Dataproc cluster by using the [Dataproc create cluster REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters/create). \n", + "\n", + "Follow these steps to use the component in a pipeline:\n", + "\n", + "1. Install the Kubeflow Pipeline SDK:\n" ] }, { @@ -82,22 +100,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "For more information about the component, please checkout:\n", - "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_create_cluster.py)\n", - "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", - "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/create_cluster/sample.ipynb)\n", - "* [Dataproc create cluster REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters/create)\n", - "\n", - "\n", "### Sample\n", + "Note: The following sample code works in an IPython notebook or directly in Python code. See the sample code below to learn how to execute the template.\n", "\n", - "Note: the sample code below works in both IPython notebook or python code directly." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ "#### Set sample parameters" ] }, @@ -205,6 +210,21 @@ "run_name = pipeline_func.__name__ + ' run'\n", "run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "* [Kubernetes Engine for Kubeflow](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts)\n", + "* [Component Python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_create_cluster.py)\n", + "* [Component Docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", + "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/create_cluster/sample.ipynb)\n", + "* [Dataproc create cluster REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters/create)\n", + "\n", + "## License\n", + "By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control." + ] } ], "metadata": { @@ -223,7 +243,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.6.4" } }, "nbformat": 4, diff --git a/components/gcp/dataproc/delete_cluster/README.md b/components/gcp/dataproc/delete_cluster/README.md index fb2be0b9722..5cb238c607f 100644 --- a/components/gcp/dataproc/delete_cluster/README.md +++ b/components/gcp/dataproc/delete_cluster/README.md @@ -1,33 +1,43 @@ -# Deleting a Cluster with Cloud Dataproc -A Kubeflow Pipeline component to delete a cluster in Cloud Dataproc service. +# Name + +Data preparation by deleting a cluster in Cloud Dataproc + +# Label +Cloud Dataproc, cluster, GCP, Cloud Storage, Kubeflow, Pipeline + + +# Summary +A Kubeflow Pipeline component to delete a cluster in Cloud Dataproc. + +## Intended use +Use this component at the start of a Kubeflow Pipeline to delete a temporary Cloud Dataproc cluster to run Cloud Dataproc jobs as steps in the pipeline. This component is usually used with an [exit handler](https://github.com/kubeflow/pipelines/blob/master/samples/basic/exit_handler.py) to run at the end of a pipeline. -## Intended Use -Use the component to recycle a Dataproc cluster as one of the step in a KFP pipeline. This component is usually used with an [exit handler](https://github.com/kubeflow/pipelines/blob/master/samples/basic/exit_handler.py) to run at the end of a pipeline. ## Runtime arguments -Name | Description | Type | Optional | Default -:--- | :---------- | :--- | :------- | :------ -project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | GCPProjectID | No | -region | The Cloud Dataproc region runs the cluster to delete. | GCPRegion | No | -name | The cluster name to delete. | String | No | -wait_interval | The number of seconds to pause between polling the delete operation done status. | Integer | Yes | `30` +| Argument | Description | Optional | Data type | Accepted values | Default | +|----------|-------------|----------|-----------|-----------------|---------| +| project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | No | GCPProjectID | | | +| region | The Cloud Dataproc region in which to handle the request. | No | GCPRegion | | | +| name | The name of the cluster to delete. | No | String | | | +| wait_interval | The number of seconds to pause between polling the operation. | Yes | Integer | | 30 | + ## Cautions & requirements To use the component, you must: -* Setup project by following the [guide](https://cloud.google.com/dataproc/docs/guides/setup-project). -* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: -``` -component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) -``` -* Grant Kubeflow user service account the `roles/dataproc.editor` role on the project. +* Set up a GCP project by following this [guide](https://cloud.google.com/dataproc/docs/guides/setup-project). +* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: -## Detailed Description -This component deletes a Dataproc cluster by using [Dataproc delete cluster REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters/delete). + ``` + component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) + ``` +* Grant the Kubeflow user service account the role `roles/dataproc.editor` on the project. -Here are the steps to use the component in a pipeline: -1. Install KFP SDK +## Detailed description +This component deletes a Dataproc cluster by using [Dataproc delete cluster REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters/delete). +Follow these steps to use the component in a pipeline: +1. Install the Kubeflow Pipeline SDK: ```python @@ -48,20 +58,13 @@ dataproc_delete_cluster_op = comp.load_component_from_url( help(dataproc_delete_cluster_op) ``` -For more information about the component, please checkout: -* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_delete_cluster.py) -* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) -* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/delete_cluster/sample.ipynb) -* [Dataproc delete cluster REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters/delete) - - ### Sample -Note: the sample code below works in both IPython notebook or python code directly. +Note: The following sample code works in an IPython notebook or directly in Python code. See the sample code below to learn how to execute the template. #### Prerequisites -Before running the sample code, you need to [create a Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster). +[Create a Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster) before running the sample code. #### Set sample parameters @@ -122,3 +125,14 @@ experiment = client.create_experiment(EXPERIMENT_NAME) run_name = pipeline_func.__name__ + ' run' run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments) ``` + +## References + +* [Component Python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_delete_cluster.py) +* [Component Docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) +* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/delete_cluster/sample.ipynb) +* [Dataproc delete cluster REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters/delete) + + +## License +By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control. diff --git a/components/gcp/dataproc/delete_cluster/sample.ipynb b/components/gcp/dataproc/delete_cluster/sample.ipynb index 15ad51550e9..d0de6367956 100644 --- a/components/gcp/dataproc/delete_cluster/sample.ipynb +++ b/components/gcp/dataproc/delete_cluster/sample.ipynb @@ -4,34 +4,45 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Deleting a Cluster with Cloud Dataproc\n", - "A Kubeflow Pipeline component to delete a cluster in Cloud Dataproc service.\n", + "# Name\n", + "\n", + "Data preparation by deleting a cluster in Cloud Dataproc\n", + "\n", + "# Label\n", + "Cloud Dataproc, cluster, GCP, Cloud Storage, Kubeflow, Pipeline\n", + "\n", + "\n", + "# Summary\n", + "A Kubeflow Pipeline component to delete a cluster in Cloud Dataproc.\n", + "\n", + "## Intended use\n", + "Use this component at the start of a Kubeflow Pipeline to delete a temporary Cloud Dataproc cluster to run Cloud Dataproc jobs as steps in the pipeline. This component is usually used with an [exit handler](https://github.com/kubeflow/pipelines/blob/master/samples/basic/exit_handler.py) to run at the end of a pipeline.\n", "\n", - "## Intended Use\n", - "Use the component to recycle a Dataproc cluster as one of the step in a KFP pipeline. This component is usually used with an [exit handler](https://github.com/kubeflow/pipelines/blob/master/samples/basic/exit_handler.py) to run at the end of a pipeline.\n", "\n", "## Runtime arguments\n", - "Name | Description | Type | Optional | Default\n", - ":--- | :---------- | :--- | :------- | :------\n", - "project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | GCPProjectID | No |\n", - "region | The Cloud Dataproc region runs the cluster to delete. | GCPRegion | No |\n", - "name | The cluster name to delete. | String | No |\n", - "wait_interval | The number of seconds to pause between polling the delete operation done status. | Integer | Yes | `30`\n", + "| Argument | Description | Optional | Data type | Accepted values | Default |\n", + "|----------|-------------|----------|-----------|-----------------|---------|\n", + "| project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | No | GCPProjectID | | |\n", + "| region | The Cloud Dataproc region in which to handle the request. | No | GCPRegion | | |\n", + "| name | The name of the cluster to delete. | No | String | | |\n", + "| wait_interval | The number of seconds to pause between polling the operation. | Yes | Integer | | 30 |\n", + "\n", "\n", "## Cautions & requirements\n", "To use the component, you must:\n", - "* Setup project by following the [guide](https://cloud.google.com/dataproc/docs/guides/setup-project).\n", - "* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", - "```\n", - "component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", - "```\n", - "* Grant Kubeflow user service account the `roles/dataproc.editor` role on the project.\n", - "\n", - "## Detailed Description\n", + "* Set up a GCP project by following this [guide](https://cloud.google.com/dataproc/docs/guides/setup-project).\n", + "* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", + "\n", + " ```\n", + " component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", + " ```\n", + "* Grant the Kubeflow user service account the role `roles/dataproc.editor` on the project.\n", + "\n", + "## Detailed description\n", "This component deletes a Dataproc cluster by using [Dataproc delete cluster REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters/delete).\n", "\n", - "Here are the steps to use the component in a pipeline:\n", - "1. Install KFP SDK\n" + "Follow these steps to use the component in a pipeline:\n", + "1. Install the Kubeflow Pipeline SDK:" ] }, { @@ -70,31 +81,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "For more information about the component, please checkout:\n", - "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_delete_cluster.py)\n", - "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", - "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/delete_cluster/sample.ipynb)\n", - "* [Dataproc delete cluster REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters/delete)\n", - "\n", - "\n", "### Sample\n", "\n", - "Note: the sample code below works in both IPython notebook or python code directly." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ + "Note: The following sample code works in an IPython notebook or directly in Python code. See the sample code below to learn how to execute the template.\n", + "\n", "#### Prerequisites\n", "\n", - "Before running the sample code, you need to [create a Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ + "[Create a Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster) before running the sample code.\n", + "\n", "#### Set sample parameters" ] }, @@ -190,6 +184,22 @@ "run_name = pipeline_func.__name__ + ' run'\n", "run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "\n", + "* [Component Python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_delete_cluster.py)\n", + "* [Component Docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", + "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/delete_cluster/sample.ipynb)\n", + "* [Dataproc delete cluster REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters/delete)\n", + "\n", + "\n", + "## License\n", + "By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control." + ] } ], "metadata": { @@ -208,7 +218,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.6.4" } }, "nbformat": 4, diff --git a/components/gcp/dataproc/submit_hadoop_job/README.md b/components/gcp/dataproc/submit_hadoop_job/README.md index 1d5bf42ff88..d1ae5d3c975 100644 --- a/components/gcp/dataproc/submit_hadoop_job/README.md +++ b/components/gcp/dataproc/submit_hadoop_job/README.md @@ -1,22 +1,36 @@ -# Submitting a Hadoop Job to Cloud Dataproc -A Kubeflow Pipeline component to submit an Apache Hadoop MapReduce job on Apache Hadoop YARN in Google Cloud Dataproc service. +# Name +Data preparation using Hadoop MapReduce on YARN with Cloud Dataproc -## Intended Use -Use the component to run an Apache Hadoop MapReduce job as one preprocessing step in a KFP pipeline. +# Label +Cloud Dataproc, GCP, Cloud Storage, Hadoop, YARN, Apache, MapReduce + + +# Summary +A Kubeflow Pipeline component to prepare data by submitting an Apache Hadoop MapReduce job on Apache Hadoop YARN to Cloud Dataproc. + +# Details +## Intended use +Use the component to run an Apache Hadoop MapReduce job as one preprocessing step in a Kubeflow Pipeline. ## Runtime arguments -Name | Description | Type | Optional | Default -:--- | :---------- | :--- | :------- | :------ -project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | GCPProjectID | No | -region | The Dataproc region that handles the request. | GCPRegion | No | -cluster_name | The name of the cluster that runs the job. | String | No | -main_jar_file_uri | The Hadoop Compatible Filesystem (HCFS) URI of the JAR file containing the main class to execute. Examples: `gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar` `hdfs:/tmp/test-samples/custom-wordcount.jar` `file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar` | GCSPath | No | -main_class | The name of the driver's main class. The JARfile that contains the class must be in the default CLASSPATH or specified in `hadoop_job.jarFileUris`. | String | No | -args | The arguments to pass to the driver. Do not include arguments, such as -libjars or -Dfoo=bar, that can be set as job properties, since a collision may occur that causes an incorrect job submission. | Yes | List | `[]` -hadoop_job | The payload of a [HadoopJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HadoopJob). | Dict | Yes | `{}` -job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Dict | Yes | `{}` -wait_interval | The number of seconds to pause between polling the operation. | Integer | Yes | `30` +| Argument | Description | Optional | Data type | Accepted values | Default | +|----------|-------------|----------|-----------|-----------------|---------| +| project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | No | GCPProjectID | | | +| region | The Dataproc region to handle the request. | No | GCPRegion | | | +| cluster_name | The name of the cluster to run the job. | No | String | | | +| main_jar_file_uri | The Hadoop Compatible Filesystem (HCFS) URI of the JAR file containing the main class to execute. | No | List | | | +| main_class | The name of the driver's main class. The JAR file that contains the class must be either in the default CLASSPATH or specified in `hadoop_job.jarFileUris`. | No | String | | | +| args | The arguments to pass to the driver. Do not include arguments, such as -libjars or -Dfoo=bar, that can be set as job properties, since a collision may occur that causes an incorrect job submission. | Yes | List | | None | +| hadoop_job | The payload of a [HadoopJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HadoopJob). | Yes | Dict | | None | +| job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Yes | Dict | | None | +| wait_interval | The number of seconds to pause between polling the operation. | Yes | Integer | | 30 | + +Note: +`main_jar_file_uri`: The examples for the files are : +- `gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar` +- `hdfs:/tmp/test-samples/custom-wordcount.jarfile:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar` + ## Output Name | Description | Type @@ -25,19 +39,22 @@ job_id | The ID of the created job. | String ## Cautions & requirements To use the component, you must: -* Setup project by following the [guide](https://cloud.google.com/dataproc/docs/guides/setup-project). -* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster). -* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: -``` -component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) -``` -* Grant Kubeflow user service account the `roles/dataproc.editor` role on the project. +* Set up a GCP project by following this [guide](https://cloud.google.com/dataproc/docs/guides/setup-project). +* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster). +* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: + + ```python + component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) + ``` +* Grant the Kubeflow user service account the role `roles/dataproc.editor` on the project. + +## Detailed description -## Detailed Description This component creates a Hadoop job from [Dataproc submit job REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/submit). -Here are the steps to use the component in a pipeline: -1. Install KFP SDK +Follow these steps to use the component in a pipeline: + +1. Install the Kubeflow Pipeline SDK: @@ -59,28 +76,23 @@ dataproc_submit_hadoop_job_op = comp.load_component_from_url( help(dataproc_submit_hadoop_job_op) ``` -For more information about the component, please checkout: -* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_submit_hadoop_job.py) -* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) -* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/submit_hadoop_job/sample.ipynb) -* [Dataproc HadoopJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HadoopJob) +## Sample +Note: The following sample code works in an IPython notebook or directly in Python code. See the sample code below to learn how to execute the template. -### Sample -Note: the sample code below works in both IPython notebook or python code directly. - -#### Setup a Dataproc cluster +### Setup a Dataproc cluster [Create a new Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster) (or reuse an existing one) before running the sample code. -#### Prepare Hadoop job -Upload your Hadoop jar file to a Cloud Storage (GCS) bucket. In the sample, we will use a jar file that is pre-installed in the main cluster, so there is no need to provide the `main_jar_file_uri`. We only set `main_class` to be `org.apache.hadoop.examples.WordCount`. +### Prepare a Hadoop job +Upload your Hadoop JAR file to a Cloud Storage bucket. In the sample, we will use a JAR file that is preinstalled in the main cluster, so there is no need to provide `main_jar_file_uri`. Here is the [WordCount example source code](https://github.com/apache/hadoop/blob/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordCount.java). -To package a self-contained Hadoop MapReduceapplication from the source code, follow the [MapReduce Tutorial](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html). +To package a self-contained Hadoop MapReduce application from the source code, follow the [MapReduce Tutorial](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html). + -#### Set sample parameters +### Set sample parameters ```python @@ -101,12 +113,10 @@ The input file is a simple text file: !gsutil cat $INTPUT_GCS_PATH ``` -#### Clean up existing output files (Optional) +### Clean up the existing output files (optional) +This is needed because the sample code requires the output folder to be a clean folder. To continue to run the sample, make sure that the service account of the notebook server has access to the `OUTPUT_GCS_PATH`. -This is needed because the sample code requires the output folder to be a clean folder. -To continue to run the sample, make sure that the service account of the notebook server has access to the `OUTPUT_GCS_PATH`. - -**CAUTION**: This will remove all blob files under `OUTPUT_GCS_PATH`. +CAUTION: This will remove all blob files under `OUTPUT_GCS_PATH`. ```python @@ -177,10 +187,19 @@ run_name = pipeline_func.__name__ + ' run' run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments) ``` -#### Inspect the outputs -The sample in the notebook will count the words in the input text and save them in sharded files. Here is the command to inspect them: +### Inspect the output +The sample in the notebook will count the words in the input text and save them in sharded files. The command to inspect the output is: ```python !gsutil cat $OUTPUT_GCS_PATH/* ``` + +## References +* [Component Python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_submit_hadoop_job.py) +* [Component Docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) +* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/submit_hadoop_job/sample.ipynb) +* [Dataproc HadoopJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HadoopJob) + +## License +By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control. diff --git a/components/gcp/dataproc/submit_hadoop_job/sample.ipynb b/components/gcp/dataproc/submit_hadoop_job/sample.ipynb index 6fa0f822be7..dc4b1230ebe 100644 --- a/components/gcp/dataproc/submit_hadoop_job/sample.ipynb +++ b/components/gcp/dataproc/submit_hadoop_job/sample.ipynb @@ -4,24 +4,38 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Submitting a Hadoop Job to Cloud Dataproc\n", - "A Kubeflow Pipeline component to submit an Apache Hadoop MapReduce job on Apache Hadoop YARN in Google Cloud Dataproc service.\n", + "# Name\n", + "Data preparation using Hadoop MapReduce on YARN with Cloud Dataproc\n", "\n", - "## Intended Use\n", - "Use the component to run an Apache Hadoop MapReduce job as one preprocessing step in a KFP pipeline. \n", + "# Label\n", + "Cloud Dataproc, GCP, Cloud Storage, Hadoop, YARN, Apache, MapReduce\n", + "\n", + "\n", + "# Summary\n", + "A Kubeflow Pipeline component to prepare data by submitting an Apache Hadoop MapReduce job on Apache Hadoop YARN to Cloud Dataproc.\n", + "\n", + "# Details\n", + "## Intended use\n", + "Use the component to run an Apache Hadoop MapReduce job as one preprocessing step in a Kubeflow Pipeline. \n", "\n", "## Runtime arguments\n", - "Name | Description | Type | Optional | Default\n", - ":--- | :---------- | :--- | :------- | :------\n", - "project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | GCPProjectID | No |\n", - "region | The Dataproc region that handles the request. | GCPRegion | No |\n", - "cluster_name | The name of the cluster that runs the job. | String | No |\n", - "main_jar_file_uri | The Hadoop Compatible Filesystem (HCFS) URI of the JAR file containing the main class to execute. Examples: `gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar` `hdfs:/tmp/test-samples/custom-wordcount.jar` `file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar` | GCSPath | No |\n", - "main_class | The name of the driver's main class. The JARfile that contains the class must be in the default CLASSPATH or specified in `hadoop_job.jarFileUris`. | String | No |\n", - "args | The arguments to pass to the driver. Do not include arguments, such as -libjars or -Dfoo=bar, that can be set as job properties, since a collision may occur that causes an incorrect job submission. | Yes | List | `[]`\n", - "hadoop_job | The payload of a [HadoopJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HadoopJob). | Dict | Yes | `{}`\n", - "job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Dict | Yes | `{}`\n", - "wait_interval | The number of seconds to pause between polling the operation. | Integer | Yes | `30`\n", + "| Argument | Description | Optional | Data type | Accepted values | Default |\n", + "|----------|-------------|----------|-----------|-----------------|---------|\n", + "| project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | No | GCPProjectID | | |\n", + "| region | The Dataproc region to handle the request. | No | GCPRegion | | |\n", + "| cluster_name | The name of the cluster to run the job. | No | String | | |\n", + "| main_jar_file_uri | The Hadoop Compatible Filesystem (HCFS) URI of the JAR file containing the main class to execute. | No | List | | |\n", + "| main_class | The name of the driver's main class. The JAR file that contains the class must be either in the default CLASSPATH or specified in `hadoop_job.jarFileUris`. | No | String | | |\n", + "| args | The arguments to pass to the driver. Do not include arguments, such as -libjars or -Dfoo=bar, that can be set as job properties, since a collision may occur that causes an incorrect job submission. | Yes | List | | None |\n", + "| hadoop_job | The payload of a [HadoopJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HadoopJob). | Yes | Dict | | None |\n", + "| job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Yes | Dict | | None |\n", + "| wait_interval | The number of seconds to pause between polling the operation. | Yes | Integer | | 30 |\n", + "\n", + "Note: \n", + "`main_jar_file_uri`: The examples for the files are : \n", + "- `gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar` \n", + "- `hdfs:/tmp/test-samples/custom-wordcount.jarfile:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar`\n", + "\n", "\n", "## Output\n", "Name | Description | Type\n", @@ -30,19 +44,22 @@ "\n", "## Cautions & requirements\n", "To use the component, you must:\n", - "* Setup project by following the [guide](https://cloud.google.com/dataproc/docs/guides/setup-project).\n", - "* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster).\n", - "* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", - "```\n", - "component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", - "```\n", - "* Grant Kubeflow user service account the `roles/dataproc.editor` role on the project.\n", + "* Set up a GCP project by following this [guide](https://cloud.google.com/dataproc/docs/guides/setup-project).\n", + "* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster).\n", + "* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", + "\n", + " ```python\n", + " component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", + " ```\n", + "* Grant the Kubeflow user service account the role `roles/dataproc.editor` on the project.\n", + "\n", + "## Detailed description\n", "\n", - "## Detailed Description\n", "This component creates a Hadoop job from [Dataproc submit job REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/submit).\n", "\n", - "Here are the steps to use the component in a pipeline:\n", - "1. Install KFP SDK\n" + "Follow these steps to use the component in a pipeline:\n", + "\n", + "1. Install the Kubeflow Pipeline SDK:\n" ] }, { @@ -81,33 +98,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "For more information about the component, please checkout:\n", - "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_submit_hadoop_job.py)\n", - "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", - "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/submit_hadoop_job/sample.ipynb)\n", - "* [Dataproc HadoopJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HadoopJob)\n", - "\n", - "### Sample\n", + "## Sample\n", + "Note: The following sample code works in an IPython notebook or directly in Python code. See the sample code below to learn how to execute the template.\n", "\n", - "Note: the sample code below works in both IPython notebook or python code directly.\n", "\n", - "#### Setup a Dataproc cluster\n", + "### Setup a Dataproc cluster\n", "[Create a new Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster) (or reuse an existing one) before running the sample code.\n", "\n", "\n", - "#### Prepare Hadoop job\n", - "Upload your Hadoop jar file to a Cloud Storage (GCS) bucket. In the sample, we will use a jar file that is pre-installed in the main cluster, so there is no need to provide the `main_jar_file_uri`. We only set `main_class` to be `org.apache.hadoop.examples.WordCount`.\n", + "### Prepare a Hadoop job\n", + "Upload your Hadoop JAR file to a Cloud Storage bucket. In the sample, we will use a JAR file that is preinstalled in the main cluster, so there is no need to provide `main_jar_file_uri`. \n", "\n", "Here is the [WordCount example source code](https://github.com/apache/hadoop/blob/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordCount.java).\n", "\n", - "To package a self-contained Hadoop MapReduceapplication from the source code, follow the [MapReduce Tutorial](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Set sample parameters" + "To package a self-contained Hadoop MapReduce application from the source code, follow the [MapReduce Tutorial](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html).\n", + "\n", + "\n", + "### Set sample parameters" ] }, { @@ -150,12 +157,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Clean up existing output files (Optional)\n", - "\n", - "This is needed because the sample code requires the output folder to be a clean folder.\n", - "To continue to run the sample, make sure that the service account of the notebook server has access to the `OUTPUT_GCS_PATH`.\n", + "### Clean up the existing output files (optional)\n", + "This is needed because the sample code requires the output folder to be a clean folder. To continue to run the sample, make sure that the service account of the notebook server has access to the `OUTPUT_GCS_PATH`.\n", "\n", - "**CAUTION**: This will remove all blob files under `OUTPUT_GCS_PATH`." + "CAUTION: This will remove all blob files under `OUTPUT_GCS_PATH`." ] }, { @@ -262,8 +267,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Inspect the outputs\n", - "The sample in the notebook will count the words in the input text and save them in sharded files. Here is the command to inspect them:" + "### Inspect the output\n", + "The sample in the notebook will count the words in the input text and save them in sharded files. The command to inspect the output is:" ] }, { @@ -274,6 +279,20 @@ "source": [ "!gsutil cat $OUTPUT_GCS_PATH/*" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "* [Component Python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_submit_hadoop_job.py)\n", + "* [Component Docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", + "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/submit_hadoop_job/sample.ipynb)\n", + "* [Dataproc HadoopJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HadoopJob)\n", + "\n", + "## License\n", + "By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control." + ] } ], "metadata": { @@ -292,7 +311,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.6.4" } }, "nbformat": 4, diff --git a/components/gcp/dataproc/submit_hive_job/README.md b/components/gcp/dataproc/submit_hive_job/README.md index 8cd1d0b01c9..f73bc257f1c 100644 --- a/components/gcp/dataproc/submit_hive_job/README.md +++ b/components/gcp/dataproc/submit_hive_job/README.md @@ -1,22 +1,29 @@ -# Submitting a Hive Job to Cloud Dataproc -A Kubeflow Pipeline component to submit a Hive job to Google Cloud Dataproc service. +# Name +Data preparation using Apache Hive on YARN with Cloud Dataproc -## Intended Use -Use the component to run an Apache Hive job as one preprocessing step in a KFP pipeline. +# Label +Cloud Dataproc, GCP, Cloud Storage, YARN, Hive, Apache + +# Summary +A Kubeflow Pipeline component to prepare data by submitting an Apache Hive job on YARN to Cloud Dataproc. + +# Details +## Intended use +Use the component to run an Apache Hive job as one preprocessing step in a Kubeflow Pipeline. ## Runtime arguments -Name | Description | Type | Optional | Default -:--- | :---------- | :--- | :------- | :------ -project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | GCPProjectID | No | -region | The Dataproc region that handles the request. | GCPRegion | No | -cluster_name | The name of the cluster that runs the job. | String | No | -queries | The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. | List | Yes | `[]` -query_file_uri | The Hadoop Compatible Filesystem (HCFS) URI of the script that contains Hive queries. | GCSPath | Yes | ` ` -script_variables | Mapping of query variable names to values (equivalent to the Hive command: SET name="value";). | List | Yes | `[]` -hive_job | The payload of a [HiveJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HiveJob). | Dict | Yes | `{}` -job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Dict | Yes | `{}` -wait_interval | The number of seconds to pause between polling the operation. | Integer | Yes | `30` +| Argument | Description | Optional | Data type | Accepted values | Default | +|----------|-------------|----------|-----------|-----------------|---------| +| project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | No | GCPProjectId | | | +| region | The Cloud Dataproc region to handle the request. | No | GCPRegion | | | +| cluster_name | The name of the cluster to run the job. | No | String | | | +| queries | The queries to execute the Hive job. Specify multiple queries in one string by separating them with semicolons. You do not need to terminate queries with semicolons. | Yes | List | | None | +| query_file_uri | The HCFS URI of the script that contains the Hive queries. | Yes | GCPPath | | None | +| script_variables | Mapping of the query’s variable names to their values (equivalent to the Hive command: SET name="value";). | Yes | Dict | | None | +| hive_job | The payload of a [HiveJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HiveJob) | Yes | Dict | | None | +| job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Yes | Dict | | None | +| wait_interval | The number of seconds to pause between polling the operation. | Yes | Integer | | 30 | ## Output Name | Description | Type @@ -25,19 +32,20 @@ job_id | The ID of the created job. | String ## Cautions & requirements To use the component, you must: -* Setup project by following the [guide](https://cloud.google.com/dataproc/docs/guides/setup-project). -* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster). -* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: -``` -component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) -``` -* Grant Kubeflow user service account the `roles/dataproc.editor` role on the project. +* Set up a GCP project by following this [guide](https://cloud.google.com/dataproc/docs/guides/setup-project). +* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster). +* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: + + ``` + component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) + ``` +* Grant the Kubeflow user service account the role `roles/dataproc.editor` on the project. -## Detailed Description +## Detailed description This component creates a Hive job from [Dataproc submit job REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/submit). -Here are the steps to use the component in a pipeline: -1. Install KFP SDK +Follow these steps to use the component in a pipeline: +1. Install the Kubeflow Pipeline SDK: @@ -59,23 +67,21 @@ dataproc_submit_hive_job_op = comp.load_component_from_url( help(dataproc_submit_hive_job_op) ``` -For more information about the component, please checkout: -* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_submit_hive_job.py) -* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) -* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/submit_hive_job/sample.ipynb) -* [Dataproc HiveJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HiveJob) - ### Sample -Note: the sample code below works in both IPython notebook or python code directly. +Note: The following sample code works in an IPython notebook or directly in Python code. See the sample code below to learn how to execute the template. + #### Setup a Dataproc cluster + [Create a new Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster) (or reuse an existing one) before running the sample code. -#### Prepare Hive query -Directly put your Hive queries in the `queries` list or upload your Hive queries into a file to a Cloud Storage (GCS) bucket and place the path in `query_file_uri`. In this sample, we will use a hard coded query in the `queries` list to select data from a public CSV file from GCS. +#### Prepare a Hive query + +Put your Hive queries in the queries list, or upload your Hive queries into a file saved in a Cloud Storage bucket and then enter the Cloud Storage bucket’s path in `query_file_uri.` In this sample, we will use a hard coded query in the queries list to select data from a public CSV file from Cloud Storage. + +For more details, see the [Hive language manual.](https://cwiki.apache.org/confluence/display/Hive/LanguageManual) -For more details, please checkout [Hive language manual](https://cwiki.apache.org/confluence/display/Hive/LanguageManual) #### Set sample parameters @@ -166,3 +172,12 @@ experiment = client.create_experiment(EXPERIMENT_NAME) run_name = pipeline_func.__name__ + ' run' run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments) ``` + +## References +* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_submit_hive_job.py) +* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) +* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/submit_hive_job/sample.ipynb) +* [Dataproc HiveJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HiveJob) + +## License +By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control. diff --git a/components/gcp/dataproc/submit_hive_job/sample.ipynb b/components/gcp/dataproc/submit_hive_job/sample.ipynb index a6081328ebe..bfd32c6558a 100644 --- a/components/gcp/dataproc/submit_hive_job/sample.ipynb +++ b/components/gcp/dataproc/submit_hive_job/sample.ipynb @@ -4,24 +4,31 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Submitting a Hive Job to Cloud Dataproc\n", - "A Kubeflow Pipeline component to submit a Hive job to Google Cloud Dataproc service. \n", + "# Name\n", + "Data preparation using Apache Hive on YARN with Cloud Dataproc\n", "\n", - "## Intended Use\n", - "Use the component to run an Apache Hive job as one preprocessing step in a KFP pipeline. \n", + "# Label\n", + "Cloud Dataproc, GCP, Cloud Storage, YARN, Hive, Apache\n", + "\n", + "# Summary\n", + "A Kubeflow Pipeline component to prepare data by submitting an Apache Hive job on YARN to Cloud Dataproc.\n", + "\n", + "# Details\n", + "## Intended use\n", + "Use the component to run an Apache Hive job as one preprocessing step in a Kubeflow Pipeline.\n", "\n", "## Runtime arguments\n", - "Name | Description | Type | Optional | Default\n", - ":--- | :---------- | :--- | :------- | :------\n", - "project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | GCPProjectID | No |\n", - "region | The Dataproc region that handles the request. | GCPRegion | No |\n", - "cluster_name | The name of the cluster that runs the job. | String | No |\n", - "queries | The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. | List | Yes | `[]`\n", - "query_file_uri | The Hadoop Compatible Filesystem (HCFS) URI of the script that contains Hive queries. | GCSPath | Yes | ` `\n", - "script_variables | Mapping of query variable names to values (equivalent to the Hive command: SET name=\"value\";). | List | Yes | `[]`\n", - "hive_job | The payload of a [HiveJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HiveJob). | Dict | Yes | `{}`\n", - "job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Dict | Yes | `{}`\n", - "wait_interval | The number of seconds to pause between polling the operation. | Integer | Yes | `30`\n", + "| Argument | Description | Optional | Data type | Accepted values | Default |\n", + "|----------|-------------|----------|-----------|-----------------|---------|\n", + "| project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | No | GCPProjectId | | |\n", + "| region | The Cloud Dataproc region to handle the request. | No | GCPRegion | | |\n", + "| cluster_name | The name of the cluster to run the job. | No | String | | |\n", + "| queries | The queries to execute the Hive job. Specify multiple queries in one string by separating them with semicolons. You do not need to terminate queries with semicolons. | Yes | List | | None |\n", + "| query_file_uri | The HCFS URI of the script that contains the Hive queries. | Yes | GCPPath | | None |\n", + "| script_variables | Mapping of the query’s variable names to their values (equivalent to the Hive command: SET name=\"value\";). | Yes | Dict | | None |\n", + "| hive_job | The payload of a [HiveJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HiveJob) | Yes | Dict | | None |\n", + "| job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Yes | Dict | | None |\n", + "| wait_interval | The number of seconds to pause between polling the operation. | Yes | Integer | | 30 |\n", "\n", "## Output\n", "Name | Description | Type\n", @@ -30,19 +37,20 @@ "\n", "## Cautions & requirements\n", "To use the component, you must:\n", - "* Setup project by following the [guide](https://cloud.google.com/dataproc/docs/guides/setup-project).\n", - "* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster).\n", - "* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", - "```\n", - "component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", - "```\n", - "* Grant Kubeflow user service account the `roles/dataproc.editor` role on the project.\n", - "\n", - "## Detailed Description\n", + "* Set up a GCP project by following this [guide](https://cloud.google.com/dataproc/docs/guides/setup-project).\n", + "* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster).\n", + "* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", + "\n", + " ```\n", + " component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", + " ```\n", + "* Grant the Kubeflow user service account the role `roles/dataproc.editor` on the project.\n", + "\n", + "## Detailed description\n", "This component creates a Hive job from [Dataproc submit job REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/submit).\n", "\n", - "Here are the steps to use the component in a pipeline:\n", - "1. Install KFP SDK\n" + "Follow these steps to use the component in a pipeline:\n", + "1. Install the Kubeflow Pipeline SDK:\n" ] }, { @@ -81,29 +89,22 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "For more information about the component, please checkout:\n", - "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_submit_hive_job.py)\n", - "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", - "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/submit_hive_job/sample.ipynb)\n", - "* [Dataproc HiveJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HiveJob)\n", - "\n", "### Sample\n", "\n", - "Note: the sample code below works in both IPython notebook or python code directly.\n", + "Note: The following sample code works in an IPython notebook or directly in Python code. See the sample code below to learn how to execute the template.\n", + "\n", "\n", "#### Setup a Dataproc cluster\n", + "\n", "[Create a new Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster) (or reuse an existing one) before running the sample code.\n", "\n", - "#### Prepare Hive query\n", - "Directly put your Hive queries in the `queries` list or upload your Hive queries into a file to a Cloud Storage (GCS) bucket and place the path in `query_file_uri`. In this sample, we will use a hard coded query in the `queries` list to select data from a public CSV file from GCS.\n", + "#### Prepare a Hive query\n", + "\n", + "Put your Hive queries in the queries list, or upload your Hive queries into a file saved in a Cloud Storage bucket and then enter the Cloud Storage bucket’s path in `query_file_uri.` In this sample, we will use a hard coded query in the queries list to select data from a public CSV file from Cloud Storage.\n", + "\n", + "For more details, see the [Hive language manual.](https://cwiki.apache.org/confluence/display/Hive/LanguageManual)\n", + "\n", "\n", - "For more details, please checkout [Hive language manual](https://cwiki.apache.org/confluence/display/Hive/LanguageManual)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ "#### Set sample parameters" ] }, @@ -229,6 +230,20 @@ "run_name = pipeline_func.__name__ + ' run'\n", "run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_submit_hive_job.py)\n", + "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", + "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/submit_hive_job/sample.ipynb)\n", + "* [Dataproc HiveJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HiveJob)\n", + "\n", + "## License\n", + "By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control." + ] } ], "metadata": { @@ -247,7 +262,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.6.4" } }, "nbformat": 4, diff --git a/components/gcp/dataproc/submit_pig_job/README.md b/components/gcp/dataproc/submit_pig_job/README.md index 252b0cad638..70ead813b0e 100644 --- a/components/gcp/dataproc/submit_pig_job/README.md +++ b/components/gcp/dataproc/submit_pig_job/README.md @@ -1,22 +1,31 @@ -# Submitting a Pig Job to Cloud Dataproc -A Kubeflow Pipeline component to submit a Pig job to Google Cloud Dataproc service. +# Name +Data preparation using Apache Pig on YARN with Cloud Dataproc -## Intended Use -Use the component to run an Apache Pig job as one preprocessing step in a KFP pipeline. +# Label +Cloud Dataproc, GCP, Cloud Storage, YARN, Pig, Apache, Kubeflow, pipelines, components + + +# Summary +A Kubeflow Pipeline component to prepare data by submitting an Apache Pig job on YARN to Cloud Dataproc. + + +# Details +## Intended use +Use the component to run an Apache Pig job as one preprocessing step in a Kubeflow Pipeline. ## Runtime arguments -Name | Description | Type | Optional | Default -:--- | :---------- | :--- | :------- | :------ -project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | GCPProjectID | No | -region | The Dataproc region that handles the request. | GCPRegion | No | -cluster_name | The name of the cluster that runs the job. | String | No | -queries | The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. | List | Yes | `[]` -query_file_uri | The Hadoop Compatible Filesystem (HCFS) URI of the script that contains Pig queries.| GCSPath | Yes | ` ` -script_variables | Optional. Mapping of query variable names to values (equivalent to the Pig command: SET name="value";).| List | Yes | `[]` -pig_job | The payload of a [PigJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PigJob). | Dict | Yes | `{}` -job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs).| Dict | Yes | `{}` -wait_interval | The number of seconds to pause between polling the operation. | Integer | Yes | `30` +| Argument | Description | Optional | Data type | Accepted values | Default | +|----------|-------------|----------|-----------|-----------------|---------| +| project_id | The ID of the Google Cloud Platform (GCP) project that the cluster belongs to. | No | GCPProjectID | | | +| region | The Cloud Dataproc region to handle the request. | No | GCPRegion | | | +| cluster_name | The name of the cluster to run the job. | No | String | | | +| queries | The queries to execute the Pig job. Specify multiple queries in one string by separating them with semicolons. You do not need to terminate queries with semicolons. | Yes | List | | None | +| query_file_uri | The HCFS URI of the script that contains the Pig queries. | Yes | GCSPath | | None | +| script_variables | Mapping of the query’s variable names to their values (equivalent to the Pig command: SET name="value";). | Yes | Dict | | None | +| pig_job | The payload of a [PigJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PigJob). | Yes | Dict | | None | +| job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Yes | Dict | | None | +| wait_interval | The number of seconds to pause between polling the operation. | Yes | Integer | | 30 | ## Output Name | Description | Type @@ -24,20 +33,22 @@ Name | Description | Type job_id | The ID of the created job. | String ## Cautions & requirements + To use the component, you must: -* Setup project by following the [guide](https://cloud.google.com/dataproc/docs/guides/setup-project). -* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster). -* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: -``` -component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) -``` -* Grant Kubeflow user service account the `roles/dataproc.editor` role on the project. +* Set up a GCP project by following this [guide](https://cloud.google.com/dataproc/docs/guides/setup-project). +* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster). +* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: + + ``` + component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) + ``` +* Grant the Kubeflow user service account the role `roles/dataproc.editor` on the project. -## Detailed Description +## Detailed description This component creates a Pig job from [Dataproc submit job REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/submit). -Here are the steps to use the component in a pipeline: -1. Install KFP SDK +Follow these steps to use the component in a pipeline: +1. Install the Kubeflow Pipeline SDK: @@ -59,23 +70,21 @@ dataproc_submit_pig_job_op = comp.load_component_from_url( help(dataproc_submit_pig_job_op) ``` -For more information about the component, please checkout: -* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_submit_pig_job.py) -* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) -* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/submit_pig_job/sample.ipynb) -* [Dataproc PigJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PigJob) - ### Sample -Note: the sample code below works in both IPython notebook or python code directly. +Note: The following sample code works in an IPython notebook or directly in Python code. See the sample code below to learn how to execute the template. + #### Setup a Dataproc cluster + [Create a new Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster) (or reuse an existing one) before running the sample code. -#### Prepare Pig query -Directly put your Pig queries in the `queries` list or upload your Pig queries into a file to a Google Cloud Storage (GCS) bucket and place the path in `query_file_uri`. In this sample, we will use a hard coded query in the `queries` list to select data from a local `passwd` file. -For more details, please checkout [Pig documentation](http://pig.apache.org/docs/latest/) +#### Prepare a Pig query + +Either put your Pig queries in the `queries` list, or upload your Pig queries into a file to a Cloud Storage bucket and then enter the Cloud Storage bucket’s path in `query_file_uri`. In this sample, we will use a hard coded query in the `queries` list to select data from a local `passwd` file. + +For more details on Apache Pig, see the [Pig documentation.](http://pig.apache.org/docs/latest/) #### Set sample parameters @@ -154,7 +163,11 @@ run_name = pipeline_func.__name__ + ' run' run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments) ``` +## References +* [Create a new Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster) +* [Pig documentation](http://pig.apache.org/docs/latest/) +* [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs) +* [PigJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PigJob) -```python - -``` +## License +By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control. diff --git a/components/gcp/dataproc/submit_pig_job/sample.ipynb b/components/gcp/dataproc/submit_pig_job/sample.ipynb index 9da409b8e1d..b695b2eadaa 100644 --- a/components/gcp/dataproc/submit_pig_job/sample.ipynb +++ b/components/gcp/dataproc/submit_pig_job/sample.ipynb @@ -4,24 +4,33 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Submitting a Pig Job to Cloud Dataproc\n", - "A Kubeflow Pipeline component to submit a Pig job to Google Cloud Dataproc service. \n", + "# Name\n", + "Data preparation using Apache Pig on YARN with Cloud Dataproc\n", "\n", - "## Intended Use\n", - "Use the component to run an Apache Pig job as one preprocessing step in a KFP pipeline. \n", + "# Label\n", + "Cloud Dataproc, GCP, Cloud Storage, YARN, Pig, Apache, Kubeflow, pipelines, components\n", + "\n", + "\n", + "# Summary\n", + "A Kubeflow Pipeline component to prepare data by submitting an Apache Pig job on YARN to Cloud Dataproc.\n", + "\n", + "\n", + "# Details\n", + "## Intended use\n", + "Use the component to run an Apache Pig job as one preprocessing step in a Kubeflow Pipeline.\n", "\n", "## Runtime arguments\n", - "Name | Description | Type | Optional | Default\n", - ":--- | :---------- | :--- | :------- | :------\n", - "project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | GCPProjectID | No |\n", - "region | The Dataproc region that handles the request. | GCPRegion | No |\n", - "cluster_name | The name of the cluster that runs the job. | String | No |\n", - "queries | The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. | List | Yes | `[]`\n", - "query_file_uri | The Hadoop Compatible Filesystem (HCFS) URI of the script that contains Pig queries.| GCSPath | Yes | ` `\n", - "script_variables | Optional. Mapping of query variable names to values (equivalent to the Pig command: SET name=\"value\";).| List | Yes | `[]`\n", - "pig_job | The payload of a [PigJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PigJob). | Dict | Yes | `{}`\n", - "job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs).| Dict | Yes | `{}`\n", - "wait_interval | The number of seconds to pause between polling the operation. | Integer | Yes | `30`\n", + "| Argument | Description | Optional | Data type | Accepted values | Default |\n", + "|----------|-------------|----------|-----------|-----------------|---------|\n", + "| project_id | The ID of the Google Cloud Platform (GCP) project that the cluster belongs to. | No | GCPProjectID | | |\n", + "| region | The Cloud Dataproc region to handle the request. | No | GCPRegion | | |\n", + "| cluster_name | The name of the cluster to run the job. | No | String | | |\n", + "| queries | The queries to execute the Pig job. Specify multiple queries in one string by separating them with semicolons. You do not need to terminate queries with semicolons. | Yes | List | | None |\n", + "| query_file_uri | The HCFS URI of the script that contains the Pig queries. | Yes | GCSPath | | None |\n", + "| script_variables | Mapping of the query’s variable names to their values (equivalent to the Pig command: SET name=\"value\";). | Yes | Dict | | None |\n", + "| pig_job | The payload of a [PigJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PigJob). | Yes | Dict | | None |\n", + "| job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Yes | Dict | | None |\n", + "| wait_interval | The number of seconds to pause between polling the operation. | Yes | Integer | | 30 |\n", "\n", "## Output\n", "Name | Description | Type\n", @@ -29,20 +38,22 @@ "job_id | The ID of the created job. | String\n", "\n", "## Cautions & requirements\n", + "\n", "To use the component, you must:\n", - "* Setup project by following the [guide](https://cloud.google.com/dataproc/docs/guides/setup-project).\n", - "* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster).\n", - "* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", - "```\n", - "component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", - "```\n", - "* Grant Kubeflow user service account the `roles/dataproc.editor` role on the project.\n", - "\n", - "## Detailed Description\n", + "* Set up a GCP project by following this [guide](https://cloud.google.com/dataproc/docs/guides/setup-project).\n", + "* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster).\n", + "* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", + "\n", + " ```\n", + " component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", + " ```\n", + "* Grant the Kubeflow user service account the role `roles/dataproc.editor` on the project.\n", + "\n", + "## Detailed description\n", "This component creates a Pig job from [Dataproc submit job REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/submit).\n", "\n", - "Here are the steps to use the component in a pipeline:\n", - "1. Install KFP SDK\n" + "Follow these steps to use the component in a pipeline:\n", + "1. Install the Kubeflow Pipeline SDK:\n" ] }, { @@ -81,29 +92,22 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "For more information about the component, please checkout:\n", - "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_submit_pig_job.py)\n", - "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", - "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/submit_pig_job/sample.ipynb)\n", - "* [Dataproc PigJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PigJob)\n", - "\n", "### Sample\n", "\n", - "Note: the sample code below works in both IPython notebook or python code directly.\n", + "Note: The following sample code works in an IPython notebook or directly in Python code. See the sample code below to learn how to execute the template.\n", + "\n", "\n", "#### Setup a Dataproc cluster\n", + "\n", "[Create a new Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster) (or reuse an existing one) before running the sample code.\n", "\n", - "#### Prepare Pig query\n", - "Directly put your Pig queries in the `queries` list or upload your Pig queries into a file to a Google Cloud Storage (GCS) bucket and place the path in `query_file_uri`. In this sample, we will use a hard coded query in the `queries` list to select data from a local `passwd` file.\n", "\n", - "For more details, please checkout [Pig documentation](http://pig.apache.org/docs/latest/)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ + "#### Prepare a Pig query\n", + "\n", + "Either put your Pig queries in the `queries` list, or upload your Pig queries into a file to a Cloud Storage bucket and then enter the Cloud Storage bucket’s path in `query_file_uri`. In this sample, we will use a hard coded query in the `queries` list to select data from a local `passwd` file.\n", + "\n", + "For more details on Apache Pig, see the [Pig documentation.](http://pig.apache.org/docs/latest/)\n", + "\n", "#### Set sample parameters" ] }, @@ -218,11 +222,18 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "## References\n", + "* [Create a new Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster) \n", + "* [Pig documentation](http://pig.apache.org/docs/latest/)\n", + "* [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs)\n", + "* [PigJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PigJob)\n", + "\n", + "## License\n", + "By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control." + ] } ], "metadata": { @@ -241,7 +252,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.6.4" } }, "nbformat": 4, diff --git a/components/gcp/dataproc/submit_pyspark_job/README.md b/components/gcp/dataproc/submit_pyspark_job/README.md index 3a5f6db5f89..7ba0533cb3e 100644 --- a/components/gcp/dataproc/submit_pyspark_job/README.md +++ b/components/gcp/dataproc/submit_pyspark_job/README.md @@ -1,21 +1,31 @@ -# Submitting a PySpark Job to Cloud Dataproc -A Kubeflow Pipeline component to submit a PySpark job to Google Cloud Dataproc service. +# Name +Data preparation using PySpark on Cloud Dataproc + + +# Label +Cloud Dataproc, GCP, Cloud Storage,PySpark, Kubeflow, pipelines, components + + +# Summary +A Kubeflow Pipeline component to prepare data by submitting a PySpark job to Cloud Dataproc. + + +# Details +## Intended use +Use the component to run an Apache PySpark job as one preprocessing step in a Kubeflow Pipeline. -## Intended Use -Use the component to run an Apache PySpark job as one preprocessing step in a KFP pipeline. ## Runtime arguments -Name | Description | Type | Optional | Default -:--- | :---------- | :--- | :------- | :------ -project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | GCPProjectID | No | -region | The Dataproc region that handles the request. | GCPRegion | No | -cluster_name | The name of the cluster that runs the job. | String | No | -main_python_file_uri | The Hadoop Compatible Filesystem (HCFS) URI of the main Python file to use as the driver. Must be a .py file. | GCSPath | No | -args | The arguments to pass to the driver. Do not include arguments, such as --conf, that can be set as job properties, since a collision may occur that causes an incorrect job submission. | List | Yes | `[]` -pyspark_job | The payload of a [PySparkJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PySparkJob). | Dict | Yes | `{}` -job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Dict | Yes | `{}` -wait_interval | The number of seconds to pause between polling the operation. | Integer | Yes | `30` +| Argument | Description | Optional | Data type | Accepted values | Default | +|----------------------|------------|----------|--------------|-----------------|---------| +| project_id | The ID of the Google Cloud Platform (GCP) project that the cluster belongs to. | No | GCPProjectID | | | +| region | The Cloud Dataproc region to handle the request. | No | GCPRegion | | | +| cluster_name | The name of the cluster to run the job. | No | String | | | +| main_python_file_uri | The HCFS URI of the Python file to use as the driver. This must be a .py file. | No | GCSPath | | | +| args | The arguments to pass to the driver. Do not include arguments, such as --conf, that can be set as job properties, since a collision may occur that causes an incorrect job submission. | Yes | List | | None | +| pyspark_job | The payload of a [PySparkJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PySparkJob). | Yes | Dict | | None | +| job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Yes | Dict | | None | ## Output Name | Description | Type @@ -23,21 +33,24 @@ Name | Description | Type job_id | The ID of the created job. | String ## Cautions & requirements + To use the component, you must: -* Setup project by following the [guide](https://cloud.google.com/dataproc/docs/guides/setup-project). -* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster). -* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: -``` -component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) -``` -* Grant Kubeflow user service account the `roles/dataproc.editor` role on the project. +* Set up a GCP project by following this [guide](https://cloud.google.com/dataproc/docs/guides/setup-project). +* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster). +* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: -## Detailed Description -This component creates a PySpark job from [Dataproc submit job REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/submit). + ``` + component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) + ``` +* Grant the Kubeflow user service account the role `roles/dataproc.editor` on the project. -Here are the steps to use the component in a pipeline: -1. Install KFP SDK +## Detailed description +This component creates a PySpark job from the [Dataproc submit job REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/submit). + +Follow these steps to use the component in a pipeline: + +1. Install the Kubeflow Pipeline SDK: ```python @@ -58,21 +71,19 @@ dataproc_submit_pyspark_job_op = comp.load_component_from_url( help(dataproc_submit_pyspark_job_op) ``` -For more information about the component, please checkout: -* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_submit_pyspark_job.py) -* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) -* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/submit_pyspark_job/sample.ipynb) -* [Dataproc PySparkJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PySparkJob) - ### Sample -Note: the sample code below works in both IPython notebook or python code directly. +Note: The following sample code works in an IPython notebook or directly in Python code. See the sample code below to learn how to execute the template. + #### Setup a Dataproc cluster + [Create a new Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster) (or reuse an existing one) before running the sample code. -#### Prepare PySpark job -Upload your PySpark code file to a Cloud Storage bucket. For example, thisis a publicly accessible hello-world.py in Cloud Storage: + +#### Prepare a PySpark job + +Upload your PySpark code file to a Cloud Storage bucket. For example, this is a publicly accessible `hello-world.py` in Cloud Storage: ```python @@ -151,7 +162,11 @@ run_name = pipeline_func.__name__ + ' run' run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments) ``` +## References -```python +* [Create a new Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster) +* [PySparkJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PySparkJob) +* [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs) -``` +## License +By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control. diff --git a/components/gcp/dataproc/submit_pyspark_job/sample.ipynb b/components/gcp/dataproc/submit_pyspark_job/sample.ipynb index 6fac3c069c3..f9f8bc09245 100644 --- a/components/gcp/dataproc/submit_pyspark_job/sample.ipynb +++ b/components/gcp/dataproc/submit_pyspark_job/sample.ipynb @@ -4,23 +4,33 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Submitting a PySpark Job to Cloud Dataproc\n", - "A Kubeflow Pipeline component to submit a PySpark job to Google Cloud Dataproc service. \n", + "# Name\n", + "Data preparation using PySpark on Cloud Dataproc\n", + "\n", + "\n", + "# Label\n", + "Cloud Dataproc, GCP, Cloud Storage,PySpark, Kubeflow, pipelines, components\n", + "\n", + "\n", + "# Summary\n", + "A Kubeflow Pipeline component to prepare data by submitting a PySpark job to Cloud Dataproc.\n", + "\n", + "\n", + "# Details\n", + "## Intended use\n", + "Use the component to run an Apache PySpark job as one preprocessing step in a Kubeflow Pipeline.\n", "\n", - "## Intended Use\n", - "Use the component to run an Apache PySpark job as one preprocessing step in a KFP pipeline. \n", "\n", "## Runtime arguments\n", - "Name | Description | Type | Optional | Default\n", - ":--- | :---------- | :--- | :------- | :------\n", - "project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | GCPProjectID | No |\n", - "region | The Dataproc region that handles the request. | GCPRegion | No |\n", - "cluster_name | The name of the cluster that runs the job. | String | No |\n", - "main_python_file_uri | The Hadoop Compatible Filesystem (HCFS) URI of the main Python file to use as the driver. Must be a .py file. | GCSPath | No |\n", - "args | The arguments to pass to the driver. Do not include arguments, such as --conf, that can be set as job properties, since a collision may occur that causes an incorrect job submission. | List | Yes | `[]`\n", - "pyspark_job | The payload of a [PySparkJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PySparkJob). | Dict | Yes | `{}`\n", - "job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Dict | Yes | `{}`\n", - "wait_interval | The number of seconds to pause between polling the operation. | Integer | Yes | `30`\n", + "| Argument | Description | Optional | Data type | Accepted values | Default |\n", + "|----------------------|------------|----------|--------------|-----------------|---------|\n", + "| project_id | The ID of the Google Cloud Platform (GCP) project that the cluster belongs to. | No | GCPProjectID | | |\n", + "| region | The Cloud Dataproc region to handle the request. | No | GCPRegion | | |\n", + "| cluster_name | The name of the cluster to run the job. | No | String | | |\n", + "| main_python_file_uri | The HCFS URI of the Python file to use as the driver. This must be a .py file. | No | GCSPath | | |\n", + "| args | The arguments to pass to the driver. Do not include arguments, such as --conf, that can be set as job properties, since a collision may occur that causes an incorrect job submission. | Yes | List | | None |\n", + "| pyspark_job | The payload of a [PySparkJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PySparkJob). | Yes | Dict | | None |\n", + "| job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Yes | Dict | | None |\n", "\n", "## Output\n", "Name | Description | Type\n", @@ -28,20 +38,24 @@ "job_id | The ID of the created job. | String\n", "\n", "## Cautions & requirements\n", + "\n", "To use the component, you must:\n", - "* Setup project by following the [guide](https://cloud.google.com/dataproc/docs/guides/setup-project).\n", - "* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster).\n", - "* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", - "```\n", - "component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", - "```\n", - "* Grant Kubeflow user service account the `roles/dataproc.editor` role on the project.\n", - "\n", - "## Detailed Description\n", - "This component creates a PySpark job from [Dataproc submit job REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/submit).\n", - "\n", - "Here are the steps to use the component in a pipeline:\n", - "1. Install KFP SDK\n" + "* Set up a GCP project by following this [guide](https://cloud.google.com/dataproc/docs/guides/setup-project).\n", + "* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster).\n", + "* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", + "\n", + " ```\n", + " component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", + " ```\n", + "* Grant the Kubeflow user service account the role `roles/dataproc.editor` on the project.\n", + "\n", + "## Detailed description\n", + "\n", + "This component creates a PySpark job from the [Dataproc submit job REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/submit).\n", + "\n", + "Follow these steps to use the component in a pipeline:\n", + "\n", + "1. Install the Kubeflow Pipeline SDK:" ] }, { @@ -80,21 +94,19 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "For more information about the component, please checkout:\n", - "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_submit_pyspark_job.py)\n", - "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", - "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/submit_pyspark_job/sample.ipynb)\n", - "* [Dataproc PySparkJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PySparkJob)\n", - "\n", "### Sample\n", "\n", - "Note: the sample code below works in both IPython notebook or python code directly.\n", + "Note: The following sample code works in an IPython notebook or directly in Python code. See the sample code below to learn how to execute the template.\n", + "\n", "\n", "#### Setup a Dataproc cluster\n", + "\n", "[Create a new Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster) (or reuse an existing one) before running the sample code.\n", "\n", - "#### Prepare PySpark job\n", - "Upload your PySpark code file to a Cloud Storage bucket. For example, thisis a publicly accessible hello-world.py in Cloud Storage:" + "\n", + "#### Prepare a PySpark job\n", + "\n", + "Upload your PySpark code file to a Cloud Storage bucket. For example, this is a publicly accessible `hello-world.py` in Cloud Storage:" ] }, { @@ -219,11 +231,18 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "## References\n", + "\n", + "* [Create a new Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster) \n", + "* [PySparkJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/PySparkJob)\n", + "* [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs)\n", + "\n", + "## License\n", + "By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control." + ] } ], "metadata": { @@ -242,7 +261,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.6.4" } }, "nbformat": 4, diff --git a/components/gcp/dataproc/submit_spark_job/README.md b/components/gcp/dataproc/submit_spark_job/README.md index 4c7ad7fcda8..5cad85794b5 100644 --- a/components/gcp/dataproc/submit_spark_job/README.md +++ b/components/gcp/dataproc/submit_spark_job/README.md @@ -1,22 +1,36 @@ -# Submitting a Spark Job to Cloud Dataproc -A Kubeflow Pipeline component to submit a Spark job to Google Cloud Dataproc service. +# Name -## Intended Use -Use the component to run an Apache Spark job as one preprocessing step in a KFP pipeline. +Data preparation using Spark on YARN with Cloud Dataproc + + +# Label + +Cloud Dataproc, GCP, Cloud Storage, Spark, Kubeflow, pipelines, components, YARN + + +# Summary + +A Kubeflow Pipeline component to prepare data by submitting a Spark job on YARN to Cloud Dataproc. + +# Details + +## Intended use + +Use the component to run an Apache Spark job as one preprocessing step in a Kubeflow Pipeline. ## Runtime arguments -Name | Description | Type | Optional | Default -:--- | :---------- | :--- | :------- | :------ -project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | GCPProjectID | No | -region | The Dataproc region that handles the request. | GCPRegion | No | -cluster_name | The name of the cluster that runs the job. | String | No | -main_jar_file_uri | The Hadoop Compatible Filesystem (HCFS) URI of the jar file that contains the main class. | GCSPath | No | -main_class | The name of the driver's main class. The jar file that contains the class must be in the default CLASSPATH or specified in `spark_job.jarFileUris`. | String | No | -args | The arguments to pass to the driver. Do not include arguments, such as --conf, that can be set as job properties, since a collision may occur that causes an incorrect job submission. | List | Yes | `[]` -spark_job | The payload of a [SparkJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkJob). | Dict | Yes | `{}` -job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Dict | Yes | `{}` -wait_interval | The number of seconds to pause between polling the operation. | Integer | Yes | `30` +Argument | Description | Optional | Data type | Accepted values | Default | +:--- | :---------- | :--- | :------- | :------| :------| +project_id | The ID of the Google Cloud Platform (GCP) project that the cluster belongs to.|No | GCPProjectID | | | +region | The Cloud Dataproc region to handle the request. | No | GCPRegion | | | +cluster_name | The name of the cluster to run the job. | No | String | | | +main_jar_file_uri | The Hadoop Compatible Filesystem (HCFS) URI of the JAR file that contains the main class. | No | GCSPath | | | +main_class | The name of the driver's main class. The JAR file that contains the class must be either in the default CLASSPATH or specified in `spark_job.jarFileUris`.| No | | | | +args | The arguments to pass to the driver. Do not include arguments, such as --conf, that can be set as job properties, since a collision may occur that causes an incorrect job submission.| Yes | | | | +spark_job | The payload of a [SparkJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkJob).| Yes | | | | +job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Yes | | | | +wait_interval | The number of seconds to wait between polling the operation. | Yes | | | 30 | ## Output Name | Description | Type @@ -24,22 +38,33 @@ Name | Description | Type job_id | The ID of the created job. | String ## Cautions & requirements + To use the component, you must: -* Setup project by following the [guide](https://cloud.google.com/dataproc/docs/guides/setup-project). -* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster). -* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: -``` -component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) -``` -* Grant Kubeflow user service account the `roles/dataproc.editor` role on the project. -## Detailed Description + + +* Set up a GCP project by following this [guide](https://cloud.google.com/dataproc/docs/guides/setup-project). +* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster). +* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: + + ``` + component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) + ``` + + +* Grant the Kubeflow user service account the role `roles/dataproc.editor` on the project. + + +## Detailed description + This component creates a Spark job from [Dataproc submit job REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/submit). -Here are the steps to use the component in a pipeline: -1. Install KFP SDK +Follow these steps to use the component in a pipeline: + +1. Install the Kubeflow Pipeline SDK: + ```python %%capture --no-stderr @@ -59,25 +84,21 @@ dataproc_submit_spark_job_op = comp.load_component_from_url( help(dataproc_submit_spark_job_op) ``` -For more information about the component, please checkout: -* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_submit_spark_job.py) -* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) -* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/submit_spark_job/sample.ipynb) -* [Dataproc SparkJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkJob) - ### Sample +Note: The following sample code works in an IPython notebook or directly in Python code. -Note: the sample code below works in both IPython notebook or python code directly. -#### Setup a Dataproc cluster +#### Set up a Dataproc cluster [Create a new Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster) (or reuse an existing one) before running the sample code. -#### Prepare Spark job -Upload your Spark jar file to a Cloud Storage (GCS) bucket. In the sample, we will use a jar file that is pre-installed in the main cluster `file:///usr/lib/spark/examples/jars/spark-examples.jar`. -Here is the [Pi example source code](https://github.com/apache/spark/blob/master/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java). +#### Prepare a Spark job +Upload your Spark JAR file to a Cloud Storage bucket. In the sample, we use a JAR file that is preinstalled in the main cluster: `file:///usr/lib/spark/examples/jars/spark-examples.jar`. + +Here is the [source code of the sample](https://github.com/apache/spark/blob/master/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java). + +To package a self-contained Spark application, follow these [instructions](https://spark.apache.org/docs/latest/quick-start.html#self-contained-applications). -To package a self-contained spark application, follow the [instructions](https://spark.apache.org/docs/latest/quick-start.html#self-contained-applications). #### Set sample parameters @@ -154,7 +175,12 @@ run_name = pipeline_func.__name__ + ' run' run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments) ``` +## References -```python +* [Component Python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_submit_spark_job.py) +* [Component Docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) +* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/submit_spark_job/sample.ipynb) +* [Dataproc SparkJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkJob) -``` +## License +By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control. diff --git a/components/gcp/dataproc/submit_spark_job/sample.ipynb b/components/gcp/dataproc/submit_spark_job/sample.ipynb index 0681629ce31..3d2b79cdc42 100644 --- a/components/gcp/dataproc/submit_spark_job/sample.ipynb +++ b/components/gcp/dataproc/submit_spark_job/sample.ipynb @@ -4,24 +4,38 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Submitting a Spark Job to Cloud Dataproc\n", - "A Kubeflow Pipeline component to submit a Spark job to Google Cloud Dataproc service. \n", + "# Name\n", "\n", - "## Intended Use\n", - "Use the component to run an Apache Spark job as one preprocessing step in a KFP pipeline. \n", + "Data preparation using Spark on YARN with Cloud Dataproc\n", + "\n", + "\n", + "# Label\n", + "\n", + "Cloud Dataproc, GCP, Cloud Storage, Spark, Kubeflow, pipelines, components, YARN\n", + "\n", + "\n", + "# Summary\n", + "\n", + "A Kubeflow Pipeline component to prepare data by submitting a Spark job on YARN to Cloud Dataproc.\n", + "\n", + "# Details\n", + "\n", + "## Intended use\n", + "\n", + "Use the component to run an Apache Spark job as one preprocessing step in a Kubeflow Pipeline.\n", "\n", "## Runtime arguments\n", - "Name | Description | Type | Optional | Default\n", - ":--- | :---------- | :--- | :------- | :------\n", - "project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | GCPProjectID | No |\n", - "region | The Dataproc region that handles the request. | GCPRegion | No |\n", - "cluster_name | The name of the cluster that runs the job. | String | No |\n", - "main_jar_file_uri | The Hadoop Compatible Filesystem (HCFS) URI of the jar file that contains the main class. | GCSPath | No |\n", - "main_class | The name of the driver's main class. The jar file that contains the class must be in the default CLASSPATH or specified in `spark_job.jarFileUris`. | String | No |\n", - "args | The arguments to pass to the driver. Do not include arguments, such as --conf, that can be set as job properties, since a collision may occur that causes an incorrect job submission. | List | Yes | `[]`\n", - "spark_job | The payload of a [SparkJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkJob). | Dict | Yes | `{}`\n", - "job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Dict | Yes | `{}`\n", - "wait_interval | The number of seconds to pause between polling the operation. | Integer | Yes | `30`\n", + "Argument | Description | Optional | Data type | Accepted values | Default |\n", + ":--- | :---------- | :--- | :------- | :------| :------| \n", + "project_id | The ID of the Google Cloud Platform (GCP) project that the cluster belongs to.|No | GCPProjectID | | |\n", + "region | The Cloud Dataproc region to handle the request. | No | GCPRegion | | | \n", + "cluster_name | The name of the cluster to run the job. | No | String | | |\n", + "main_jar_file_uri | The Hadoop Compatible Filesystem (HCFS) URI of the JAR file that contains the main class. | No | GCSPath | | |\n", + "main_class | The name of the driver's main class. The JAR file that contains the class must be either in the default CLASSPATH or specified in `spark_job.jarFileUris`.| No | | | | \n", + "args | The arguments to pass to the driver. Do not include arguments, such as --conf, that can be set as job properties, since a collision may occur that causes an incorrect job submission.| Yes | | | |\n", + "spark_job | The payload of a [SparkJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkJob).| Yes | | | |\n", + "job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Yes | | | |\n", + "wait_interval | The number of seconds to wait between polling the operation. | Yes | | | 30 |\n", "\n", "## Output\n", "Name | Description | Type\n", @@ -29,20 +43,32 @@ "job_id | The ID of the created job. | String\n", "\n", "## Cautions & requirements\n", + "\n", "To use the component, you must:\n", - "* Setup project by following the [guide](https://cloud.google.com/dataproc/docs/guides/setup-project).\n", - "* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster).\n", - "* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", - "```\n", - "component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", - "```\n", - "* Grant Kubeflow user service account the `roles/dataproc.editor` role on the project.\n", - "\n", - "## Detailed Description\n", + "\n", + "\n", + "\n", + "* Set up a GCP project by following this [guide](https://cloud.google.com/dataproc/docs/guides/setup-project).\n", + "* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster).\n", + "* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", + "\n", + " ```\n", + " component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", + " ```\n", + "\n", + "\n", + "* Grant the Kubeflow user service account the role `roles/dataproc.editor` on the project.\n", + "\n", + "\n", + "## Detailed description\n", + "\n", "This component creates a Spark job from [Dataproc submit job REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/submit).\n", "\n", - "Here are the steps to use the component in a pipeline:\n", - "1. Install KFP SDK\n" + "Follow these steps to use the component in a pipeline:\n", + "\n", + "\n", + "\n", + "1. Install the Kubeflow Pipeline SDK:" ] }, { @@ -81,31 +107,22 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "For more information about the component, please checkout:\n", - "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_submit_spark_job.py)\n", - "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", - "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/submit_spark_job/sample.ipynb)\n", - "* [Dataproc SparkJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkJob)\n", - "\n", "### Sample\n", + "Note: The following sample code works in an IPython notebook or directly in Python code.\n", "\n", - "Note: the sample code below works in both IPython notebook or python code directly.\n", "\n", - "#### Setup a Dataproc cluster\n", + "#### Set up a Dataproc cluster\n", "[Create a new Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster) (or reuse an existing one) before running the sample code.\n", "\n", - "#### Prepare Spark job\n", - "Upload your Spark jar file to a Cloud Storage (GCS) bucket. In the sample, we will use a jar file that is pre-installed in the main cluster `file:///usr/lib/spark/examples/jars/spark-examples.jar`. \n", "\n", - "Here is the [Pi example source code](https://github.com/apache/spark/blob/master/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java).\n", + "#### Prepare a Spark job\n", + "Upload your Spark JAR file to a Cloud Storage bucket. In the sample, we use a JAR file that is preinstalled in the main cluster: `file:///usr/lib/spark/examples/jars/spark-examples.jar`.\n", + "\n", + "Here is the [source code of the sample](https://github.com/apache/spark/blob/master/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java).\n", + "\n", + "To package a self-contained Spark application, follow these [instructions](https://spark.apache.org/docs/latest/quick-start.html#self-contained-applications).\n", + "\n", "\n", - "To package a self-contained spark application, follow the [instructions](https://spark.apache.org/docs/latest/quick-start.html#self-contained-applications)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ "#### Set sample parameters" ] }, @@ -218,11 +235,19 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "## References\n", + "\n", + "* [Component Python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_submit_spark_job.py)\n", + "* [Component Docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", + "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/submit_spark_job/sample.ipynb)\n", + "* [Dataproc SparkJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkJob)\n", + "\n", + "## License\n", + "By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control." + ] } ], "metadata": { @@ -241,7 +266,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.6.4" } }, "nbformat": 4, diff --git a/components/gcp/dataproc/submit_sparksql_job/README.md b/components/gcp/dataproc/submit_sparksql_job/README.md index 841e582a06a..4b743859ad8 100644 --- a/components/gcp/dataproc/submit_sparksql_job/README.md +++ b/components/gcp/dataproc/submit_sparksql_job/README.md @@ -1,22 +1,30 @@ -# Submitting a SparkSql Job to Cloud Dataproc -A Kubeflow Pipeline component to submit a SparkSql job to Google Cloud Dataproc service. +# Name +Data preparation using SparkSQL on YARN with Cloud Dataproc -## Intended Use -Use the component to run an Apache SparkSql job as one preprocessing step in a KFP pipeline. +# Label +Cloud Dataproc, GCP, Cloud Storage, YARN, SparkSQL, Kubeflow, pipelines, components + +# Summary +A Kubeflow Pipeline component to prepare data by submitting a SparkSql job on YARN to Cloud Dataproc. + +# Details + +## Intended use +Use the component to run an Apache SparkSql job as one preprocessing step in a Kubeflow Pipeline. ## Runtime arguments -Name | Description | Type | Optional | Default -:--- | :---------- | :--- | :------- | :------ -project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | GCPProjectID | No | -region | The Dataproc region that handles the request. | GCPRegion | No | -cluster_name | The name of the cluster that runs the job. | String | No | -queries | The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. | List | Yes | `[]` -query_file_uri | The Hadoop Compatible Filesystem (HCFS) URI of the script that contains SQL queries.| GCSPath | Yes | ` ` -script_variables | Mapping of query variable names to values (equivalent to the Spark SQL command: SET name="value";). | List | Yes | `[]` -sparksql_job | The payload of a [SparkSqlJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkSqlJob). | Dict | Yes | `{}` -job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Dict | Yes | `{}` -wait_interval | The number of seconds to pause between polling the operation. | Integer | Yes | `30` +Argument| Description | Optional | Data type| Accepted values| Default | +:--- | :---------- | :--- | :------- | :------ | :------ +project_id | The ID of the Google Cloud Platform (GCP) project that the cluster belongs to. | No| GCPProjectID | | | +region | The Cloud Dataproc region to handle the request. | No | GCPRegion| +cluster_name | The name of the cluster to run the job. | No | String| | | +queries | The queries to execute the SparkSQL job. Specify multiple queries in one string by separating them with semicolons. You do not need to terminate queries with semicolons. | Yes | List | | None | +query_file_uri | The HCFS URI of the script that contains the SparkSQL queries.| Yes | GCSPath | | None | +script_variables | Mapping of the query’s variable names to their values (equivalent to the SparkSQL command: SET name="value";).| Yes| Dict | | None | +sparksql_job | The payload of a [SparkSqlJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkSqlJob). | Yes | Dict | | None | +job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Yes | Dict | | None | +wait_interval | The number of seconds to pause between polling the operation. | Yes |Integer | | 30 | ## Output Name | Description | Type @@ -25,20 +33,19 @@ job_id | The ID of the created job. | String ## Cautions & requirements To use the component, you must: -* Setup project by following the [guide](https://cloud.google.com/dataproc/docs/guides/setup-project). +* Set up a GCP project by following this [guide](https://cloud.google.com/dataproc/docs/guides/setup-project). * [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster). -* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: +* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: ``` component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) ``` -* Grant Kubeflow user service account the `roles/dataproc.editor` role on the project. +* Grant the Kubeflow user service account the role `roles/dataproc.editor` on the project. ## Detailed Description This component creates a Pig job from [Dataproc submit job REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/submit). -Here are the steps to use the component in a pipeline: -1. Install KFP SDK - +Follow these steps to use the component in a pipeline: +1. Install the Kubeflow Pipeline SDK: ```python @@ -59,23 +66,17 @@ dataproc_submit_sparksql_job_op = comp.load_component_from_url( help(dataproc_submit_sparksql_job_op) ``` -For more information about the component, please checkout: -* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_submit_sparksql_job.py) -* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) -* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/submit_sparksql_job/sample.ipynb) -* [Dataproc SparkSqlJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkSqlJob) - ### Sample -Note: the sample code below works in both IPython notebook or python code directly. +Note: The following sample code works in an IPython notebook or directly in Python code. See the sample code below to learn how to execute the template. #### Setup a Dataproc cluster [Create a new Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster) (or reuse an existing one) before running the sample code. -#### Prepare SparkSQL job -Directly put your SparkSQL queries in the `queires` list or upload your SparkSQL queries into a file to a Google Cloud Storage (GCS) bucket and place the path in `query_file_uri`. In this sample, we will use a hard coded query in the `queries` list to select data from a public CSV file from GCS. +#### Prepare a SparkSQL job +Either put your SparkSQL queries in the `queires` list, or upload your SparkSQL queries into a file to a Cloud Storage bucket and then enter the Cloud Storage bucket’s path in `query_file_uri`. In this sample, we will use a hard coded query in the `queries` list to select data from a public CSV file from Cloud Storage. -For more details about Spark SQL, please checkout the [programming guide](https://spark.apache.org/docs/latest/sql-programming-guide.html) +For more details about Spark SQL, see [Spark SQL, DataFrames and Datasets Guide](https://spark.apache.org/docs/latest/sql-programming-guide.html) #### Set sample parameters @@ -167,7 +168,11 @@ run_name = pipeline_func.__name__ + ' run' run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments) ``` +## References +* [Spark SQL, DataFrames and Datasets Guide](https://spark.apache.org/docs/latest/sql-programming-guide.html) +* [SparkSqlJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkSqlJob) +* [Cloud Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs) -```python -``` +## License +By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control. diff --git a/components/gcp/dataproc/submit_sparksql_job/sample.ipynb b/components/gcp/dataproc/submit_sparksql_job/sample.ipynb index 7d8709fa8c7..7e1ec4b84e8 100644 --- a/components/gcp/dataproc/submit_sparksql_job/sample.ipynb +++ b/components/gcp/dataproc/submit_sparksql_job/sample.ipynb @@ -4,24 +4,32 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Submitting a SparkSql Job to Cloud Dataproc\n", - "A Kubeflow Pipeline component to submit a SparkSql job to Google Cloud Dataproc service. \n", + "# Name\n", + "Data preparation using SparkSQL on YARN with Cloud Dataproc\n", "\n", - "## Intended Use\n", - "Use the component to run an Apache SparkSql job as one preprocessing step in a KFP pipeline. \n", + "# Label\n", + "Cloud Dataproc, GCP, Cloud Storage, YARN, SparkSQL, Kubeflow, pipelines, components \n", + "\n", + "# Summary\n", + "A Kubeflow Pipeline component to prepare data by submitting a SparkSql job on YARN to Cloud Dataproc.\n", + "\n", + "# Details\n", + "\n", + "## Intended use\n", + "Use the component to run an Apache SparkSql job as one preprocessing step in a Kubeflow Pipeline.\n", "\n", "## Runtime arguments\n", - "Name | Description | Type | Optional | Default\n", - ":--- | :---------- | :--- | :------- | :------\n", - "project_id | The Google Cloud Platform (GCP) project ID that the cluster belongs to. | GCPProjectID | No |\n", - "region | The Dataproc region that handles the request. | GCPRegion | No |\n", - "cluster_name | The name of the cluster that runs the job. | String | No |\n", - "queries | The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. | List | Yes | `[]`\n", - "query_file_uri | The Hadoop Compatible Filesystem (HCFS) URI of the script that contains SQL queries.| GCSPath | Yes | ` `\n", - "script_variables | Mapping of query variable names to values (equivalent to the Spark SQL command: SET name=\"value\";). | List | Yes | `[]`\n", - "sparksql_job | The payload of a [SparkSqlJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkSqlJob). | Dict | Yes | `{}`\n", - "job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Dict | Yes | `{}`\n", - "wait_interval | The number of seconds to pause between polling the operation. | Integer | Yes | `30`\n", + "Argument| Description | Optional | Data type| Accepted values| Default |\n", + ":--- | :---------- | :--- | :------- | :------ | :------\n", + "project_id | The ID of the Google Cloud Platform (GCP) project that the cluster belongs to. | No| GCPProjectID | | |\n", + "region | The Cloud Dataproc region to handle the request. | No | GCPRegion|\n", + "cluster_name | The name of the cluster to run the job. | No | String| | |\n", + "queries | The queries to execute the SparkSQL job. Specify multiple queries in one string by separating them with semicolons. You do not need to terminate queries with semicolons. | Yes | List | | None | \n", + "query_file_uri | The HCFS URI of the script that contains the SparkSQL queries.| Yes | GCSPath | | None |\n", + "script_variables | Mapping of the query’s variable names to their values (equivalent to the SparkSQL command: SET name=\"value\";).| Yes| Dict | | None |\n", + "sparksql_job | The payload of a [SparkSqlJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkSqlJob). | Yes | Dict | | None |\n", + "job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Yes | Dict | | None |\n", + "wait_interval | The number of seconds to pause between polling the operation. | Yes |Integer | | 30 |\n", "\n", "## Output\n", "Name | Description | Type\n", @@ -30,19 +38,19 @@ "\n", "## Cautions & requirements\n", "To use the component, you must:\n", - "* Setup project by following the [guide](https://cloud.google.com/dataproc/docs/guides/setup-project).\n", + "* Set up a GCP project by following this [guide](https://cloud.google.com/dataproc/docs/guides/setup-project).\n", "* [Create a new cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster).\n", - "* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", + "* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", "```\n", "component_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", "```\n", - "* Grant Kubeflow user service account the `roles/dataproc.editor` role on the project.\n", + "* Grant the Kubeflow user service account the role `roles/dataproc.editor` on the project.\n", "\n", "## Detailed Description\n", "This component creates a Pig job from [Dataproc submit job REST API](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/submit).\n", "\n", - "Here are the steps to use the component in a pipeline:\n", - "1. Install KFP SDK\n" + "Follow these steps to use the component in a pipeline:\n", + "1. Install the Kubeflow Pipeline SDK:" ] }, { @@ -81,29 +89,18 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "For more information about the component, please checkout:\n", - "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/dataproc/_submit_sparksql_job.py)\n", - "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", - "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/dataproc/submit_sparksql_job/sample.ipynb)\n", - "* [Dataproc SparkSqlJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkSqlJob)\n", - "\n", "### Sample\n", "\n", - "Note: the sample code below works in both IPython notebook or python code directly.\n", + "Note: The following sample code works in an IPython notebook or directly in Python code. See the sample code below to learn how to execute the template.\n", "\n", "#### Setup a Dataproc cluster\n", "[Create a new Dataproc cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster) (or reuse an existing one) before running the sample code.\n", "\n", - "#### Prepare SparkSQL job\n", - "Directly put your SparkSQL queries in the `queires` list or upload your SparkSQL queries into a file to a Google Cloud Storage (GCS) bucket and place the path in `query_file_uri`. In this sample, we will use a hard coded query in the `queries` list to select data from a public CSV file from GCS.\n", + "#### Prepare a SparkSQL job\n", + "Either put your SparkSQL queries in the `queires` list, or upload your SparkSQL queries into a file to a Cloud Storage bucket and then enter the Cloud Storage bucket’s path in `query_file_uri`. In this sample, we will use a hard coded query in the `queries` list to select data from a public CSV file from Cloud Storage.\n", + "\n", + "For more details about Spark SQL, see [Spark SQL, DataFrames and Datasets Guide](https://spark.apache.org/docs/latest/sql-programming-guide.html)\n", "\n", - "For more details about Spark SQL, please checkout the [programming guide](https://spark.apache.org/docs/latest/sql-programming-guide.html)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ "#### Set sample parameters" ] }, @@ -231,11 +228,18 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "## References\n", + "* [Spark SQL, DataFrames and Datasets Guide](https://spark.apache.org/docs/latest/sql-programming-guide.html)\n", + "* [SparkSqlJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkSqlJob)\n", + "* [Cloud Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs)\n", + "\n", + "\n", + "## License\n", + "By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control." + ] } ], "metadata": { @@ -254,7 +258,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.6.4" } }, "nbformat": 4, diff --git a/components/gcp/ml_engine/batch_predict/README.md b/components/gcp/ml_engine/batch_predict/README.md index 1e38885b54f..c6674458606 100644 --- a/components/gcp/ml_engine/batch_predict/README.md +++ b/components/gcp/ml_engine/batch_predict/README.md @@ -1,23 +1,49 @@ -# Batch predicting using Cloud Machine Learning Engine -A Kubeflow Pipeline component to submit a batch prediction job against a trained model to Cloud ML Engine service. +# Name + +Batch prediction using Cloud Machine Learning Engine + + +# Label + +Cloud Storage, Cloud ML Engine, Kubeflow, Pipeline, Component + + +# Summary + +A Kubeflow Pipeline component to submit a batch prediction job against a deployed model on Cloud ML Engine. + + +# Details + ## Intended use -Use the component to run a batch prediction job against a deployed model in Cloud Machine Learning Engine. The prediction output will be stored in a Cloud Storage bucket. + +Use the component to run a batch prediction job against a deployed model on Cloud ML Engine. The prediction output is stored in a Cloud Storage bucket. + ## Runtime arguments -Name | Description | Type | Optional | Default -:--- | :---------- | :--- | :------- | :------ -project_id | The ID of the parent project of the job. | GCPProjectID | No | -model_path | Required. The path to the model. It can be one of the following paths: | String | No | -input_paths | The Cloud Storage location of the input data files. May contain wildcards. For example: `gs://foo/*.csv` | List | No | -input_data_format | The format of the input data files. See [DataFormat](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#DataFormat). | String | No | -output_path | The Cloud Storage location for the output data. | GCSPath | No | -region | The region in Compute Engine where the prediction job is run. | GCPRegion | No | -output_data_format | The format of the output data files. See [DataFormat](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#DataFormat). | String | Yes | `JSON` -prediction_input | The JSON input parameters to create a prediction job. See [PredictionInput](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#PredictionInput) to know more. | Dict | Yes | ` ` -job_id_prefix | The prefix of the generated job id. | String | Yes | ` ` -wait_interval | A time-interval to wait for in case the operation has a long run time. | Integer | Yes | `30` + +| Argument | Description | Optional | Data type | Accepted values | Default | +|--------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|--------------|-----------------|---------| +| project_id | The ID of the Google Cloud Platform (GCP) project of the job. | No | GCPProjectID | | | +| model_path | The path to the model. It can be one of the following:
| No | GCSPath | | | +| input_paths | The path to the Cloud Storage location containing the input data files. It can contain wildcards, for example, `gs://foo/*.csv` | No | List | GCSPath | | +| input_data_format | The format of the input data files. See [REST Resource: projects.jobs](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#DataFormat) for more details. | No | String | DataFormat | | +| output_path | The path to the Cloud Storage location for the output data. | No | GCSPath | | | +| region | The Compute Engine region where the prediction job is run. | No | GCPRegion | | | +| output_data_format | The format of the output data files. See [REST Resource: projects.jobs](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#DataFormat) for more details. | Yes | String | DataFormat | JSON | +| prediction_input | The JSON input parameters to create a prediction job. See [PredictionInput](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#PredictionInput) for more information. | Yes | Dict | | None | +| job_id_prefix | The prefix of the generated job id. | Yes | String | | None | +| wait_interval | The number of seconds to wait in case the operation has a long run time. | Yes | | | 30 | + + +## Input data schema + +The component accepts the following as input: + +* A trained model: It can be a model file in Cloud Storage, a deployed model, or a version in Cloud ML Engine. Specify the path to the model in the `model_path `runtime argument. +* Input data: The data used to make predictions against the trained model. The data can be in [multiple formats](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#DataFormat). The data path is specified by `input_paths` and the format is specified by `input_data_format`. ## Output Name | Description | Type @@ -29,25 +55,28 @@ output_path | The output path of the batch prediction job | GCSPath ## Cautions & requirements To use the component, you must: -* Setup cloud environment by following the [guide](https://cloud.google.com/ml-engine/docs/tensorflow/getting-started-training-prediction#setup). -* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: -```python -mlengine_predict_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) +* Set up a cloud environment by following this [guide](https://cloud.google.com/ml-engine/docs/tensorflow/getting-started-training-prediction#setup). +* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: -``` -* Grant Kubeflow user service account the read access to the Cloud Storage buckets which contains the input data. -* Grant Kubeflow user service account the write access to the Cloud Storage bucket of the output directory. + ```python + mlengine_predict_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) + ``` + + +* Grant the following types of access to the Kubeflow user service account: + * Read access to the Cloud Storage buckets which contains the input data. + * Write access to the Cloud Storage bucket of the output directory. + + +## Detailed description +Follow these steps to use the component in a pipeline: -## Detailed Description -The component accepts following input data: -* A trained model: it can be a model file in Cloud Storage, or a deployed model or version in Cloud Machine Learning Engine. The path to the model is specified by the `model_path` parameter. -* Input data: the data will be used to make predictions against the input trained model. The data can be in [multiple formats](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#DataFormat). The path of the data is specified by `input_paths` parameter and the format is specified by `input_data_format` parameter. -Here are the steps to use the component in a pipeline: -1. Install KFP SDK +1. Install the Kubeflow Pipeline SDK: + @@ -69,17 +98,11 @@ mlengine_batch_predict_op = comp.load_component_from_url( help(mlengine_batch_predict_op) ``` -For more information about the component, please checkout: -* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/ml_engine/_batch_predict.py) -* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) -* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/ml_engine/batch_predict/sample.ipynb) -* [Cloud Machine Learning Engine job REST API](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs) ### Sample Code +Note: The following sample code works in an IPython notebook or directly in Python code. -Note: the sample code below works in both IPython notebook or python code directly. - -In this sample, we batch predict against a pre-built trained model from `gs://ml-pipeline-playground/samples/ml_engine/census/trained_model/` and use the test data from `gs://ml-pipeline-playground/samples/ml_engine/census/test.json`. +In this sample, you batch predict against a pre-built trained model from `gs://ml-pipeline-playground/samples/ml_engine/census/trained_model/` and use the test data from `gs://ml-pipeline-playground/samples/ml_engine/census/test.json`. #### Inspect the test data @@ -175,3 +198,12 @@ run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arg OUTPUT_FILES_PATTERN = OUTPUT_GCS_PATH + '*' !gsutil cat OUTPUT_FILES_PATTERN ``` + +## References +* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/ml_engine/_batch_predict.py) +* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) +* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/ml_engine/batch_predict/sample.ipynb) +* [Cloud Machine Learning Engine job REST API](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs) + +## License +By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control. diff --git a/components/gcp/ml_engine/batch_predict/sample.ipynb b/components/gcp/ml_engine/batch_predict/sample.ipynb index 4a88302f70b..92985e1b112 100644 --- a/components/gcp/ml_engine/batch_predict/sample.ipynb +++ b/components/gcp/ml_engine/batch_predict/sample.ipynb @@ -4,25 +4,51 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Batch predicting using Cloud Machine Learning Engine\n", - "A Kubeflow Pipeline component to submit a batch prediction job against a trained model to Cloud ML Engine service.\n", + "# Name\n", + "\n", + "Batch prediction using Cloud Machine Learning Engine\n", + "\n", + "\n", + "# Label\n", + "\n", + "Cloud Storage, Cloud ML Engine, Kubeflow, Pipeline, Component\n", + "\n", + "\n", + "# Summary\n", + "\n", + "A Kubeflow Pipeline component to submit a batch prediction job against a deployed model on Cloud ML Engine.\n", + "\n", + "\n", + "# Details\n", + "\n", "\n", "## Intended use\n", - "Use the component to run a batch prediction job against a deployed model in Cloud Machine Learning Engine. The prediction output will be stored in a Cloud Storage bucket.\n", + "\n", + "Use the component to run a batch prediction job against a deployed model on Cloud ML Engine. The prediction output is stored in a Cloud Storage bucket.\n", + "\n", "\n", "## Runtime arguments\n", - "Name | Description | Type | Optional | Default\n", - ":--- | :---------- | :--- | :------- | :------\n", - "project_id | The ID of the parent project of the job. | GCPProjectID | No |\n", - "model_path | Required. The path to the model. It can be one of the following paths: | String | No |\n", - "input_paths | The Cloud Storage location of the input data files. May contain wildcards. For example: `gs://foo/*.csv` | List | No |\n", - "input_data_format | The format of the input data files. See [DataFormat](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#DataFormat). | String | No |\n", - "output_path | The Cloud Storage location for the output data. | GCSPath | No |\n", - "region | The region in Compute Engine where the prediction job is run. | GCPRegion | No |\n", - "output_data_format | The format of the output data files. See [DataFormat](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#DataFormat). | String | Yes | `JSON`\n", - "prediction_input | The JSON input parameters to create a prediction job. See [PredictionInput](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#PredictionInput) to know more. | Dict | Yes | ` `\n", - "job_id_prefix | The prefix of the generated job id. | String | Yes | ` `\n", - "wait_interval | A time-interval to wait for in case the operation has a long run time. | Integer | Yes | `30`\n", + "\n", + "| Argument | Description | Optional | Data type | Accepted values | Default |\n", + "|--------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|--------------|-----------------|---------|\n", + "| project_id | The ID of the Google Cloud Platform (GCP) project of the job. | No | GCPProjectID | | |\n", + "| model_path | The path to the model. It can be one of the following:
| No | GCSPath | | |\n", + "| input_paths | The path to the Cloud Storage location containing the input data files. It can contain wildcards, for example, `gs://foo/*.csv` | No | List | GCSPath | |\n", + "| input_data_format | The format of the input data files. See [REST Resource: projects.jobs](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#DataFormat) for more details. | No | String | DataFormat | |\n", + "| output_path | The path to the Cloud Storage location for the output data. | No | GCSPath | | |\n", + "| region | The Compute Engine region where the prediction job is run. | No | GCPRegion | | |\n", + "| output_data_format | The format of the output data files. See [REST Resource: projects.jobs](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#DataFormat) for more details. | Yes | String | DataFormat | JSON |\n", + "| prediction_input | The JSON input parameters to create a prediction job. See [PredictionInput](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#PredictionInput) for more information. | Yes | Dict | | None |\n", + "| job_id_prefix | The prefix of the generated job id. | Yes | String | | None |\n", + "| wait_interval | The number of seconds to wait in case the operation has a long run time. | Yes | | | 30 |\n", + "\n", + "\n", + "## Input data schema\n", + "\n", + "The component accepts the following as input:\n", + "\n", + "* A trained model: It can be a model file in Cloud Storage, a deployed model, or a version in Cloud ML Engine. Specify the path to the model in the `model_path `runtime argument.\n", + "* Input data: The data used to make predictions against the trained model. The data can be in [multiple formats](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#DataFormat). The data path is specified by `input_paths` and the format is specified by `input_data_format`.\n", "\n", "## Output\n", "Name | Description | Type\n", @@ -34,25 +60,28 @@ "## Cautions & requirements\n", "\n", "To use the component, you must:\n", - "* Setup cloud environment by following the [guide](https://cloud.google.com/ml-engine/docs/tensorflow/getting-started-training-prediction#setup).\n", - "* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", "\n", - "```python\n", - "mlengine_predict_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", + "* Set up a cloud environment by following this [guide](https://cloud.google.com/ml-engine/docs/tensorflow/getting-started-training-prediction#setup).\n", + "* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", + "\n", + " ```python\n", + " mlengine_predict_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", + " ```\n", + "\n", "\n", - "```\n", - "* Grant Kubeflow user service account the read access to the Cloud Storage buckets which contains the input data.\n", - "* Grant Kubeflow user service account the write access to the Cloud Storage bucket of the output directory.\n", + "* Grant the following types of access to the Kubeflow user service account:\n", + " * Read access to the Cloud Storage buckets which contains the input data.\n", + " * Write access to the Cloud Storage bucket of the output directory.\n", "\n", "\n", - "## Detailed Description\n", + "## Detailed description\n", "\n", - "The component accepts following input data:\n", - "* A trained model: it can be a model file in Cloud Storage, or a deployed model or version in Cloud Machine Learning Engine. The path to the model is specified by the `model_path` parameter.\n", - "* Input data: the data will be used to make predictions against the input trained model. The data can be in [multiple formats](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#DataFormat). The path of the data is specified by `input_paths` parameter and the format is specified by `input_data_format` parameter.\n", + "Follow these steps to use the component in a pipeline:\n", "\n", - "Here are the steps to use the component in a pipeline:\n", - "1. Install KFP SDK\n" + "\n", + "\n", + "1. Install the Kubeflow Pipeline SDK:\n", + "\n" ] }, { @@ -91,17 +120,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "For more information about the component, please checkout:\n", - "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/ml_engine/_batch_predict.py)\n", - "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", - "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/ml_engine/batch_predict/sample.ipynb)\n", - "* [Cloud Machine Learning Engine job REST API](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs)\n", "\n", "### Sample Code\n", + "Note: The following sample code works in an IPython notebook or directly in Python code. \n", "\n", - "Note: the sample code below works in both IPython notebook or python code directly.\n", - "\n", - "In this sample, we batch predict against a pre-built trained model from `gs://ml-pipeline-playground/samples/ml_engine/census/trained_model/` and use the test data from `gs://ml-pipeline-playground/samples/ml_engine/census/test.json`. \n", + "In this sample, you batch predict against a pre-built trained model from `gs://ml-pipeline-playground/samples/ml_engine/census/trained_model/` and use the test data from `gs://ml-pipeline-playground/samples/ml_engine/census/test.json`.\n", "\n", "#### Inspect the test data" ] @@ -255,6 +278,20 @@ "OUTPUT_FILES_PATTERN = OUTPUT_GCS_PATH + '*'\n", "!gsutil cat OUTPUT_FILES_PATTERN" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/ml_engine/_batch_predict.py)\n", + "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", + "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/ml_engine/batch_predict/sample.ipynb)\n", + "* [Cloud Machine Learning Engine job REST API](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs)\n", + "\n", + "## License\n", + "By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control." + ] } ], "metadata": { @@ -273,7 +310,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.6.4" } }, "nbformat": 4, diff --git a/components/gcp/ml_engine/deploy/README.md b/components/gcp/ml_engine/deploy/README.md index ea88337ebbb..de191af2c78 100644 --- a/components/gcp/ml_engine/deploy/README.md +++ b/components/gcp/ml_engine/deploy/README.md @@ -1,55 +1,98 @@ -# Deploying a trained model to Cloud Machine Learning Engine -A Kubeflow Pipeline component to deploy a trained model from a Cloud Storage path to a Cloud Machine Learning Engine service. +# Name + +Deploying a trained model to Cloud Machine Learning Engine + + +# Label + +Cloud Storage, Cloud ML Engine, Kubeflow, Pipeline + + +# Summary + +A Kubeflow Pipeline component to deploy a trained model from a Cloud Storage location to Cloud ML Engine. + + +# Details + ## Intended use -Use the component to deploy a trained model to Cloud Machine Learning Engine service. The deployed model can serve online or batch predictions in a KFP pipeline. - -## Runtime arguments: -Name | Description | Type | Optional | Default -:--- | :---------- | :--- | :------- | :------ -model_uri | The Cloud Storage URI which contains a model file. Commonly used TF model search paths (export/exporter) will be used. | GCSPath | No | -project_id | The ID of the parent project of the serving model. | GCPProjectID | No | -model_id | The user-specified name of the model. If it is not provided, the operation uses a random name. | String | Yes | ` ` -version_id | The user-specified name of the version. If it is not provided, the operation uses a random name. | String | Yes | ` ` -runtime_version | The [Cloud ML Engine runtime version](https://cloud.google.com/ml-engine/docs/tensorflow/runtime-version-list) to use for this deployment. If it is not set, the Cloud ML Engine uses the default stable version, 1.0. | String | Yes | ` ` -python_version | The version of Python used in the prediction. If it is not set, the default version is `2.7`. Python `3.5` is available when the runtime_version is set to `1.4` and above. Python `2.7` works with all supported runtime versions. | String | Yes | ` ` -version | The JSON payload of the new [Version](https://cloud.google.com/ml-engine/reference/rest/v1/projects.models.versions). | Dict | Yes | ` ` -replace_existing_version | A Boolean flag that indicates whether to replace existing version in case of conflict. | Bool | Yes | False -set_default | A Boolean flag that indicates whether to set the new version as default version in the model. | Bool | Yes | False -wait_interval | A time-interval to wait for in case the operation has a long run time. | Integer | Yes | 30 - -## Output: -Name | Description | Type -:--- | :---------- | :--- -model_uri | The Cloud Storage URI of the trained model. | GCSPath -model_name | The name of the serving model. | String -version_name | The name of the deployed version of the model. | String + +Use the component to deploy a trained model to Cloud ML Engine. The deployed model can serve online or batch predictions in a Kubeflow Pipeline. + + +## Runtime arguments + +| Argument | Description | Optional | Data type | Accepted values | Default | +|--------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|--------------|-----------------|---------| +| model_uri | The URI of a Cloud Storage directory that contains a trained model file.
Or
An [Estimator export base directory](https://www.tensorflow.org/guide/saved_model#perform_the_export) that contains a list of subdirectories named by timestamp. The directory with the latest timestamp is used to load the trained model file. | No | GCSPath | | | +| project_id | The ID of the Google Cloud Platform (GCP) project of the serving model. | No | GCPProjectID | | | +| model_id | The name of the trained model. | Yes | String | | None | +| version_id | The name of the version of the model. If it is not provided, the operation uses a random name. | Yes | String | | None | +| runtime_version | The Cloud ML Engine runtime version to use for this deployment. If it is not provided, the default stable version, 1.0, is used. | Yes | String | | None | +| python_version | The version of Python used in the prediction. If it is not provided, version 2.7 is used. You can use Python 3.5 if runtime_version is set to 1.4 or above. Python 2.7 works with all supported runtime versions. | Yes | String | | 2.7 | +| model | The JSON payload of the new [model](https://cloud.google.com/ml-engine/reference/rest/v1/projects.models). | Yes | Dict | | None | +| version | The new [version](https://cloud.google.com/ml-engine/reference/rest/v1/projects.models.versions) of the trained model. | Yes | Dict | | None | +| replace_existing_version | Indicates whether to replace the existing version in case of a conflict (if the same version number is found.) | Yes | Boolean | | FALSE | +| set_default | Indicates whether to set the new version as the default version in the model. | Yes | Boolean | | FALSE | +| wait_interval | The number of seconds to wait in case the operation has a long run time. | Yes | Integer | | 30 | + + + +## Input data schema + +The component looks for a trained model in the location specified by the `model_uri` runtime argument. The accepted trained models are: + + +* [Tensorflow SavedModel](https://cloud.google.com/ml-engine/docs/tensorflow/exporting-for-prediction) +* [Scikit-learn & XGBoost model](https://cloud.google.com/ml-engine/docs/scikit/exporting-for-prediction) + +The accepted file formats are: + +* *.pb +* *.pbtext +* model.bst +* model.joblib +* model.pkl + +`model_uri` can also be an [Estimator export base directory, ](https://www.tensorflow.org/guide/saved_model#perform_the_export)which contains a list of subdirectories named by timestamp. The directory with the latest timestamp is used to load the trained model file. + +## Output +| Name | Description | Type | +|:------- |:---- | :--- | +| job_id | The ID of the created job. | String | +| job_dir | The Cloud Storage path that contains the trained model output files. | GCSPath | + ## Cautions & requirements To use the component, you must: -* Setup cloud environment by following the [guide](https://cloud.google.com/ml-engine/docs/tensorflow/getting-started-training-prediction#setup). -* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: -```python -mlengine_deploy_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) +* [Set up the cloud environment](https://cloud.google.com/ml-engine/docs/tensorflow/getting-started-training-prediction#setup). +* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: -``` -* Grant Kubeflow user service account the read access to the Cloud Storage buckets which contains the trained model. + ``` + ```python + mlengine_deploy_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) + + ``` +* Grant read access to the Cloud Storage bucket that contains the trained model to the Kubeflow user service account. -## Detailed Description +## Detailed description -The component does: -* Search for the trained model from the user provided Cloud Storage path. -* Create a new model if user provided model doesn’t exist. -* Delete the existing model version if `replace_existing_version` is enabled. -* Create a new model version from the trained model. -* Set the new version as the default version of the model if ‘set_default’ is enabled. +Use the component to: +* Locate the trained model at the Cloud Storage location you specify. +* Create a new model if a model provided by you doesn’t exist. +* Delete the existing model version if `replace_existing_version` is enabled. +* Create a new version of the model from the trained model. +* Set the new version as the default version of the model if `set_default` is enabled. + +Follow these steps to use the component in a pipeline: + +1. Install the Kubeflow Pipeline SDK: -Here are the steps to use the component in a pipeline: -1. Install KFP SDK @@ -71,18 +114,10 @@ mlengine_deploy_op = comp.load_component_from_url( help(mlengine_deploy_op) ``` -For more information about the component, please checkout: -* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/ml_engine/_deploy.py) -* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) -* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/ml_engine/deploy/sample.ipynb) -* [Cloud Machine Learning Engine Model REST API](https://cloud.google.com/ml-engine/reference/rest/v1/projects.models) -* [Cloud Machine Learning Engine Version REST API](https://cloud.google.com/ml-engine/reference/rest/v1/projects.versions) - - ### Sample Note: The following sample code works in IPython notebook or directly in Python code. -In this sample, we will deploy a pre-built trained model from `gs://ml-pipeline-playground/samples/ml_engine/census/trained_model/` to Cloud Machine Learning Engine service. The deployed model is named `kfp_sample_model`. A new version will be created every time when the sample is run, and the latest version will be set as the default version of the deployed model. +In this sample, you deploy a pre-built trained model from `gs://ml-pipeline-playground/samples/ml_engine/census/trained_model/` to Cloud ML Engine. The deployed model is `kfp_sample_model`. A new version is created every time the sample is run, and the latest version is set as the default version of the deployed model. #### Set sample parameters @@ -157,3 +192,13 @@ experiment = client.create_experiment(EXPERIMENT_NAME) run_name = pipeline_func.__name__ + ' run' run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments) ``` + +## References +* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/ml_engine/_deploy.py) +* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) +* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/ml_engine/deploy/sample.ipynb) +* [Cloud Machine Learning Engine Model REST API](https://cloud.google.com/ml-engine/reference/rest/v1/projects.models) +* [Cloud Machine Learning Engine Version REST API](https://cloud.google.com/ml-engine/reference/rest/v1/projects.versions) + +## License +By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control. diff --git a/components/gcp/ml_engine/deploy/sample.ipynb b/components/gcp/ml_engine/deploy/sample.ipynb index e7c0fff2039..1d3926a83ce 100644 --- a/components/gcp/ml_engine/deploy/sample.ipynb +++ b/components/gcp/ml_engine/deploy/sample.ipynb @@ -4,57 +4,100 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Deploying a trained model to Cloud Machine Learning Engine\n", - "A Kubeflow Pipeline component to deploy a trained model from a Cloud Storage path to a Cloud Machine Learning Engine service.\n", + "# Name\n", + "\n", + "Deploying a trained model to Cloud Machine Learning Engine \n", + "\n", + "\n", + "# Label\n", + "\n", + "Cloud Storage, Cloud ML Engine, Kubeflow, Pipeline\n", + "\n", + "\n", + "# Summary\n", + "\n", + "A Kubeflow Pipeline component to deploy a trained model from a Cloud Storage location to Cloud ML Engine.\n", + "\n", + "\n", + "# Details\n", + "\n", "\n", "## Intended use\n", - "Use the component to deploy a trained model to Cloud Machine Learning Engine service. The deployed model can serve online or batch predictions in a KFP pipeline.\n", - "\n", - "## Runtime arguments:\n", - "Name | Description | Type | Optional | Default\n", - ":--- | :---------- | :--- | :------- | :------\n", - "model_uri | The Cloud Storage URI which contains a model file. Commonly used TF model search paths (export/exporter) will be used. | GCSPath | No |\n", - "project_id | The ID of the parent project of the serving model. | GCPProjectID | No | \n", - "model_id | The user-specified name of the model. If it is not provided, the operation uses a random name. | String | Yes | ` `\n", - "version_id | The user-specified name of the version. If it is not provided, the operation uses a random name. | String | Yes | ` `\n", - "runtime_version | The [Cloud ML Engine runtime version](https://cloud.google.com/ml-engine/docs/tensorflow/runtime-version-list) to use for this deployment. If it is not set, the Cloud ML Engine uses the default stable version, 1.0. | String | Yes | ` ` \n", - "python_version | The version of Python used in the prediction. If it is not set, the default version is `2.7`. Python `3.5` is available when the runtime_version is set to `1.4` and above. Python `2.7` works with all supported runtime versions. | String | Yes | ` `\n", - "version | The JSON payload of the new [Version](https://cloud.google.com/ml-engine/reference/rest/v1/projects.models.versions). | Dict | Yes | ` `\n", - "replace_existing_version | A Boolean flag that indicates whether to replace existing version in case of conflict. | Bool | Yes | False\n", - "set_default | A Boolean flag that indicates whether to set the new version as default version in the model. | Bool | Yes | False\n", - "wait_interval | A time-interval to wait for in case the operation has a long run time. | Integer | Yes | 30\n", - "\n", - "## Output:\n", - "Name | Description | Type\n", - ":--- | :---------- | :---\n", - "model_uri | The Cloud Storage URI of the trained model. | GCSPath\n", - "model_name | The name of the serving model. | String\n", - "version_name | The name of the deployed version of the model. | String\n", + "\n", + "Use the component to deploy a trained model to Cloud ML Engine. The deployed model can serve online or batch predictions in a Kubeflow Pipeline.\n", + "\n", + "\n", + "## Runtime arguments\n", + "\n", + "| Argument | Description | Optional | Data type | Accepted values | Default |\n", + "|--------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|--------------|-----------------|---------|\n", + "| model_uri | The URI of a Cloud Storage directory that contains a trained model file.
Or
An [Estimator export base directory](https://www.tensorflow.org/guide/saved_model#perform_the_export) that contains a list of subdirectories named by timestamp. The directory with the latest timestamp is used to load the trained model file. | No | GCSPath | | |\n", + "| project_id | The ID of the Google Cloud Platform (GCP) project of the serving model. | No | GCPProjectID | | |\n", + "| model_id | The name of the trained model. | Yes | String | | None |\n", + "| version_id | The name of the version of the model. If it is not provided, the operation uses a random name. | Yes | String | | None |\n", + "| runtime_version | The Cloud ML Engine runtime version to use for this deployment. If it is not provided, the default stable version, 1.0, is used. | Yes | String | | None |\n", + "| python_version | The version of Python used in the prediction. If it is not provided, version 2.7 is used. You can use Python 3.5 if runtime_version is set to 1.4 or above. Python 2.7 works with all supported runtime versions. | Yes | String | | 2.7 |\n", + "| model | The JSON payload of the new [model](https://cloud.google.com/ml-engine/reference/rest/v1/projects.models). | Yes | Dict | | None |\n", + "| version | The new [version](https://cloud.google.com/ml-engine/reference/rest/v1/projects.models.versions) of the trained model. | Yes | Dict | | None |\n", + "| replace_existing_version | Indicates whether to replace the existing version in case of a conflict (if the same version number is found.) | Yes | Boolean | | FALSE |\n", + "| set_default | Indicates whether to set the new version as the default version in the model. | Yes | Boolean | | FALSE |\n", + "| wait_interval | The number of seconds to wait in case the operation has a long run time. | Yes | Integer | | 30 |\n", + "\n", + "\n", + "\n", + "## Input data schema\n", + "\n", + "The component looks for a trained model in the location specified by the `model_uri` runtime argument. The accepted trained models are:\n", + "\n", + "\n", + "* [Tensorflow SavedModel](https://cloud.google.com/ml-engine/docs/tensorflow/exporting-for-prediction) \n", + "* [Scikit-learn & XGBoost model](https://cloud.google.com/ml-engine/docs/scikit/exporting-for-prediction)\n", + "\n", + "The accepted file formats are:\n", + "\n", + "* *.pb\n", + "* *.pbtext\n", + "* model.bst\n", + "* model.joblib\n", + "* model.pkl\n", + "\n", + "`model_uri` can also be an [Estimator export base directory, ](https://www.tensorflow.org/guide/saved_model#perform_the_export)which contains a list of subdirectories named by timestamp. The directory with the latest timestamp is used to load the trained model file.\n", + "\n", + "## Output\n", + "| Name | Description | Type |\n", + "|:------- |:---- | :--- |\n", + "| job_id | The ID of the created job. | String |\n", + "| job_dir | The Cloud Storage path that contains the trained model output files. | GCSPath |\n", + "\n", "\n", "## Cautions & requirements\n", "\n", "To use the component, you must:\n", - "* Setup cloud environment by following the [guide](https://cloud.google.com/ml-engine/docs/tensorflow/getting-started-training-prediction#setup).\n", - "* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", "\n", - "```python\n", - "mlengine_deploy_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", + "* [Set up the cloud environment](https://cloud.google.com/ml-engine/docs/tensorflow/getting-started-training-prediction#setup).\n", + "* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", + "\n", + " ```\n", + " ```python\n", + " mlengine_deploy_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", "\n", - "```\n", - "* Grant Kubeflow user service account the read access to the Cloud Storage buckets which contains the trained model.\n", + " ```\n", "\n", + "* Grant read access to the Cloud Storage bucket that contains the trained model to the Kubeflow user service account.\n", "\n", - "## Detailed Description\n", + "## Detailed description\n", "\n", - "The component does:\n", - "* Search for the trained model from the user provided Cloud Storage path.\n", - "* Create a new model if user provided model doesn’t exist.\n", - "* Delete the existing model version if `replace_existing_version` is enabled.\n", - "* Create a new model version from the trained model.\n", - "* Set the new version as the default version of the model if ‘set_default’ is enabled.\n", + "Use the component to: \n", + "* Locate the trained model at the Cloud Storage location you specify.\n", + "* Create a new model if a model provided by you doesn’t exist.\n", + "* Delete the existing model version if `replace_existing_version` is enabled.\n", + "* Create a new version of the model from the trained model.\n", + "* Set the new version as the default version of the model if `set_default` is enabled.\n", "\n", - "Here are the steps to use the component in a pipeline:\n", - "1. Install KFP SDK\n" + "Follow these steps to use the component in a pipeline:\n", + "\n", + "1. Install the Kubeflow Pipeline SDK:\n", + "\n" ] }, { @@ -93,18 +136,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "For more information about the component, please checkout:\n", - "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/ml_engine/_deploy.py)\n", - "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", - "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/ml_engine/deploy/sample.ipynb)\n", - "* [Cloud Machine Learning Engine Model REST API](https://cloud.google.com/ml-engine/reference/rest/v1/projects.models)\n", - "* [Cloud Machine Learning Engine Version REST API](https://cloud.google.com/ml-engine/reference/rest/v1/projects.versions)\n", - "\n", - "\n", "### Sample\n", "Note: The following sample code works in IPython notebook or directly in Python code.\n", "\n", - "In this sample, we will deploy a pre-built trained model from `gs://ml-pipeline-playground/samples/ml_engine/census/trained_model/` to Cloud Machine Learning Engine service. The deployed model is named `kfp_sample_model`. A new version will be created every time when the sample is run, and the latest version will be set as the default version of the deployed model.\n", + "In this sample, you deploy a pre-built trained model from `gs://ml-pipeline-playground/samples/ml_engine/census/trained_model/` to Cloud ML Engine. The deployed model is `kfp_sample_model`. A new version is created every time the sample is run, and the latest version is set as the default version of the deployed model.\n", "\n", "#### Set sample parameters" ] @@ -215,6 +250,21 @@ "run_name = pipeline_func.__name__ + ' run'\n", "run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/ml_engine/_deploy.py)\n", + "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", + "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/ml_engine/deploy/sample.ipynb)\n", + "* [Cloud Machine Learning Engine Model REST API](https://cloud.google.com/ml-engine/reference/rest/v1/projects.models)\n", + "* [Cloud Machine Learning Engine Version REST API](https://cloud.google.com/ml-engine/reference/rest/v1/projects.versions)\n", + "\n", + "## License\n", + "By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control." + ] } ], "metadata": { @@ -233,7 +283,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.6.4" } }, "nbformat": 4, diff --git a/components/gcp/ml_engine/train/README.md b/components/gcp/ml_engine/train/README.md index cc3e8b7ae18..0322cfc0a83 100644 --- a/components/gcp/ml_engine/train/README.md +++ b/components/gcp/ml_engine/train/README.md @@ -1,57 +1,74 @@ -# Submitting a Cloud ML training job as a pipeline step -A Kubeflow Pipeline component to submit a Cloud Machine Learning (Cloud ML) Engine training job as a step in a pipeline. +# Name +Submitting a Cloud Machine Learning Engine training job as a pipeline step -## Intended Use -This component is intended to submit a training job to Cloud Machine Learning (ML) Engine from a Kubeflow Pipelines workflow. +# Label +GCP, Cloud ML Engine, Machine Learning, pipeline, component, Kubeflow, Kubeflow Pipeline + +# Summary +A Kubeflow Pipeline component to submit a Cloud ML Engine training job as a step in a pipeline. + +# Details +## Intended use +Use this component to submit a training job to Cloud ML Engine from a Kubeflow Pipeline. ## Runtime arguments -Name | Description | Type | Optional | Default -:--- | :---------- | :--- | :------- | :------ -project_id | The ID of the parent project of the job. | GCPProjectID | No | -python_module | The Python module name to run after installing the packages. | String | Yes | `` -package_uris | The Cloud Storage location of the packages (that contain the training program and any additional dependencies). The maximum number of package URIs is 100. | List | Yes | `` -region | The Compute Engine region in which the training job is run. | GCPRegion | Yes | `` -args | The command line arguments to pass to the program. | List | Yes | `` -job_dir | The list of arguments to pass to the Python file. | GCSPath | Yes | `` -python_version | A Cloud Storage path in which to store the training outputs and other data needed for training. This path is passed to your TensorFlow program as the `job-dir` command-line argument. The benefit of specifying this field is that Cloud ML validates the path for use in training. | String | Yes | `` -runtime_version | The Cloud ML Engine runtime version to use for training. If not set, Cloud ML Engine uses the default stable version, 1.0. | String | Yes | `` -master_image_uri | The Docker image to run on the master replica. This image must be in Container Registry. | GCRPath | Yes | `` -worker_image_uri | The Docker image to run on the worker replica. This image must be in Container Registry. | GCRPath | Yes | `` -training_input | The input parameters to create a training job. It is the JSON payload of a [TrainingInput](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#TrainingInput) | Dict | Yes | `` -job_id_prefix | The prefix of the generated job id. | String | Yes | `` -wait_interval | A time-interval to wait for between calls to get the job status. | Integer | Yes | `30` - -## Outputs -Name | Description | Type -:--- | :---------- | :--- -job_id | The ID of the created job. | String -job_dir | The output path in Cloud Storage of the trainning job, which contains the trained model files. | GCSPath +| Argument | Description | Optional | Data type | Accepted values | Default | +|:------------------|:------------------|:----------|:--------------|:-----------------|:-------------| +| project_id | The ID of the Google Cloud Platform (GCP) project of the job. | No | GCPProjectID | | | +| python_module | The name of the Python module to run after installing the training program. | Yes | String | | None | +| package_uris | The Cloud Storage location of the packages that contain the training program and any additional dependencies. The maximum number of package URIs is 100. | Yes | List | | None | +| region | The Compute Engine region in which the training job is run. | Yes | GCPRegion | | us-central1 | +| args | The command line arguments to pass to the training program. | Yes | List | | None | +| job_dir | A Cloud Storage path in which to store the training outputs and other data needed for training. This path is passed to your TensorFlow program as the `job-dir` command-line argument. The benefit of specifying this field is that Cloud ML validates the path for use in training. | Yes | GCSPath | | None | +| python_version | The version of Python used in training. If it is not set, the default version is 2.7. Python 3.5 is available when the runtime version is set to 1.4 and above. | Yes | String | | None | +| runtime_version | The runtime version of Cloud ML Engine to use for training. If it is not set, Cloud ML Engine uses the default. | Yes | String | | 1 | +| master_image_uri | The Docker image to run on the master replica. This image must be in Container Registry. | Yes | GCRPath | | None | +| worker_image_uri | The Docker image to run on the worker replica. This image must be in Container Registry. | Yes | GCRPath | | None | +| training_input | The input parameters to create a training job. | Yes | Dict | [TrainingInput](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#TrainingInput) | None | +| job_id_prefix | The prefix of the job ID that is generated. | Yes | String | | None | +| wait_interval | The number of seconds to wait between API calls to get the status of the job. | Yes | Integer | | 30 | + + + +## Input data schema + +The component accepts two types of inputs: +* A list of Python packages from Cloud Storage. + * You can manually build a Python package and upload it to Cloud Storage by following this [guide](https://cloud.google.com/ml-engine/docs/tensorflow/packaging-trainer#manual-build). +* A Docker container from Container Registry. + * Follow this [guide](https://cloud.google.com/ml-engine/docs/using-containers) to publish and use a Docker container with this component. + +## Output +| Name | Description | Type | +|:------- |:---- | :--- | +| job_id | The ID of the created job. | String | +| job_dir | The Cloud Storage path that contains the trained model output files. | GCSPath | + ## Cautions & requirements To use the component, you must: -* Setup cloud environment by following the [guide](https://cloud.google.com/ml-engine/docs/tensorflow/getting-started-training-prediction#setup). -* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: -```python -mlengine_train_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) +* Set up a cloud environment by following this [guide](https://cloud.google.com/ml-engine/docs/tensorflow/getting-started-training-prediction#setup). +* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example: -``` -* Grant Kubeflow user service account the read access to the Cloud Storage buckets which contains the input data, packages or docker images. -* Grant Kubeflow user service account the write access to the Cloud Storage bucket of the output directory. + ``` + mlengine_train_op(...).apply(gcp.use_gcp_secret('user-gcp-sa')) + ``` +* Grant the following access to the Kubeflow user service account: + * Read access to the Cloud Storage buckets which contain the input data, packages, or Docker images. + * Write access to the Cloud Storage bucket of the output directory. -## Detailed Description +## Detailed description -The component accepts one of the two types of executable inputs: -* A list of Python packages from Cloud Storage. You may manually build a Python package by following the [guide](https://cloud.google.com/ml-engine/docs/tensorflow/packaging-trainer#manual-build) and [upload it to Cloud Storage](https://cloud.google.com/ml-engine/docs/tensorflow/packaging-trainer#uploading_packages_manually). -* Docker container from Google Container Registry (GCR). Follow the [guide](https://cloud.google.com/ml-engine/docs/using-containers) to publish and use a Docker container with this component. +The component builds the [TrainingInput](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#TrainingInput) payload and submits a job via the [Cloud ML Engine REST API](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs). -The component builds the payload of a [TrainingInput](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#TrainingInput) and submit a job by Cloud Machine Learning Engine REST API. +The steps to use the component in a pipeline are: -Here are the steps to use the component in a pipeline: -1. Install KFP SDK + +1. Install the Kubeflow Pipeline SDK: @@ -73,18 +90,12 @@ mlengine_train_op = comp.load_component_from_url( help(mlengine_train_op) ``` -For more information about the component, please checkout: -* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/ml_engine/_train.py) -* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) -* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/ml_engine/train/sample.ipynb) -* [Cloud Machine Learning Engine job REST API](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs) - - ### Sample -Note: The following sample code works in IPython notebook or directly in Python code. +Note: The following sample code works in an IPython notebook or directly in Python code. -In this sample, we use the code from [census estimator sample](https://github.com/GoogleCloudPlatform/cloudml-samples/tree/master/census/estimator) to train a model in Cloud Machine Learning Engine service. In order to pass the code to the service, we need to package the python code and upload it in a Cloud Storage bucket. Make sure that you have read and write permissions on the bucket that you use as the working directory. +In this sample, you use the code from the [census estimator sample](https://github.com/GoogleCloudPlatform/cloudml-samples/tree/master/census/estimator) to train a model in Cloud ML Engine. To upload the code to Cloud ML Engine, package the Python code and upload it to a Cloud Storage bucket. +Note: You must have read and write permissions on the bucket that you use as the working directory. #### Set sample parameters @@ -208,13 +219,18 @@ run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arg #### Inspect the results -Follow the `Run` link to open the KFP UI. In the step logs, you should be able to click on the links to: -* Job dashboard -* And realtime logs on Stackdriver - Use the following command to inspect the contents in the output directory: ```python !gsutil ls $OUTPUT_GCS_PATH ``` + +## References +* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/ml_engine/_train.py) +* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile) +* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/ml_engine/train/sample.ipynb) +* [Cloud Machine Learning Engine job REST API](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs) + +## License +By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control. diff --git a/components/gcp/ml_engine/train/sample.ipynb b/components/gcp/ml_engine/train/sample.ipynb index d8f5e58d0aa..718c73dccbd 100644 --- a/components/gcp/ml_engine/train/sample.ipynb +++ b/components/gcp/ml_engine/train/sample.ipynb @@ -4,59 +4,76 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Submitting a Cloud ML training job as a pipeline step\n", - "A Kubeflow Pipeline component to submit a Cloud Machine Learning (Cloud ML) Engine training job as a step in a pipeline.\n", + "# Name\n", + "Submitting a Cloud Machine Learning Engine training job as a pipeline step\n", "\n", - "## Intended Use\n", - "This component is intended to submit a training job to Cloud Machine Learning (ML) Engine from a Kubeflow Pipelines workflow.\n", + "# Label\n", + "GCP, Cloud ML Engine, Machine Learning, pipeline, component, Kubeflow, Kubeflow Pipeline\n", + "\n", + "# Summary\n", + "A Kubeflow Pipeline component to submit a Cloud ML Engine training job as a step in a pipeline.\n", + "\n", + "# Details\n", + "## Intended use\n", + "Use this component to submit a training job to Cloud ML Engine from a Kubeflow Pipeline. \n", "\n", "## Runtime arguments\n", - "Name | Description | Type | Optional | Default\n", - ":--- | :---------- | :--- | :------- | :------\n", - "project_id | The ID of the parent project of the job. | GCPProjectID | No |\n", - "python_module | The Python module name to run after installing the packages. | String | Yes | ``\n", - "package_uris | The Cloud Storage location of the packages (that contain the training program and any additional dependencies). The maximum number of package URIs is 100. | List | Yes | ``\n", - "region | The Compute Engine region in which the training job is run. | GCPRegion | Yes | ``\n", - "args | The command line arguments to pass to the program. | List | Yes | ``\n", - "job_dir | The list of arguments to pass to the Python file. | GCSPath | Yes | ``\n", - "python_version | A Cloud Storage path in which to store the training outputs and other data needed for training. This path is passed to your TensorFlow program as the `job-dir` command-line argument. The benefit of specifying this field is that Cloud ML validates the path for use in training. | String | Yes | ``\n", - "runtime_version | The Cloud ML Engine runtime version to use for training. If not set, Cloud ML Engine uses the default stable version, 1.0. | String | Yes | ``\n", - "master_image_uri | The Docker image to run on the master replica. This image must be in Container Registry. | GCRPath | Yes | ``\n", - "worker_image_uri | The Docker image to run on the worker replica. This image must be in Container Registry. | GCRPath | Yes | ``\n", - "training_input | The input parameters to create a training job. It is the JSON payload of a [TrainingInput](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#TrainingInput) | Dict | Yes | ``\n", - "job_id_prefix | The prefix of the generated job id. | String | Yes | ``\n", - "wait_interval | A time-interval to wait for between calls to get the job status. | Integer | Yes | `30`\n", + "| Argument | Description | Optional | Data type | Accepted values | Default |\n", + "|:------------------|:------------------|:----------|:--------------|:-----------------|:-------------|\n", + "| project_id | The ID of the Google Cloud Platform (GCP) project of the job. | No | GCPProjectID | | |\n", + "| python_module | The name of the Python module to run after installing the training program. | Yes | String | | None |\n", + "| package_uris | The Cloud Storage location of the packages that contain the training program and any additional dependencies. The maximum number of package URIs is 100. | Yes | List | | None |\n", + "| region | The Compute Engine region in which the training job is run. | Yes | GCPRegion | | us-central1 |\n", + "| args | The command line arguments to pass to the training program. | Yes | List | | None |\n", + "| job_dir | A Cloud Storage path in which to store the training outputs and other data needed for training. This path is passed to your TensorFlow program as the `job-dir` command-line argument. The benefit of specifying this field is that Cloud ML validates the path for use in training. | Yes | GCSPath | | None |\n", + "| python_version | The version of Python used in training. If it is not set, the default version is 2.7. Python 3.5 is available when the runtime version is set to 1.4 and above. | Yes | String | | None |\n", + "| runtime_version | The runtime version of Cloud ML Engine to use for training. If it is not set, Cloud ML Engine uses the default. | Yes | String | | 1 |\n", + "| master_image_uri | The Docker image to run on the master replica. This image must be in Container Registry. | Yes | GCRPath | | None |\n", + "| worker_image_uri | The Docker image to run on the worker replica. This image must be in Container Registry. | Yes | GCRPath | | None |\n", + "| training_input | The input parameters to create a training job. | Yes | Dict | [TrainingInput](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#TrainingInput) | None |\n", + "| job_id_prefix | The prefix of the job ID that is generated. | Yes | String | | None |\n", + "| wait_interval | The number of seconds to wait between API calls to get the status of the job. | Yes | Integer | | 30 |\n", + "\n", + "\n", + "\n", + "## Input data schema\n", + "\n", + "The component accepts two types of inputs:\n", + "* A list of Python packages from Cloud Storage.\n", + " * You can manually build a Python package and upload it to Cloud Storage by following this [guide](https://cloud.google.com/ml-engine/docs/tensorflow/packaging-trainer#manual-build).\n", + "* A Docker container from Container Registry. \n", + " * Follow this [guide](https://cloud.google.com/ml-engine/docs/using-containers) to publish and use a Docker container with this component.\n", + "\n", + "## Output\n", + "| Name | Description | Type |\n", + "|:------- |:---- | :--- |\n", + "| job_id | The ID of the created job. | String |\n", + "| job_dir | The Cloud Storage path that contains the trained model output files. | GCSPath |\n", "\n", - "## Outputs\n", - "Name | Description | Type\n", - ":--- | :---------- | :---\n", - "job_id | The ID of the created job. | String\n", - "job_dir | The output path in Cloud Storage of the trainning job, which contains the trained model files. | GCSPath\n", "\n", "## Cautions & requirements\n", "\n", "To use the component, you must:\n", - "* Setup cloud environment by following the [guide](https://cloud.google.com/ml-engine/docs/tensorflow/getting-started-training-prediction#setup).\n", - "* The component is running under a secret of [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", "\n", - "```python\n", - "mlengine_train_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", + "* Set up a cloud environment by following this [guide](https://cloud.google.com/ml-engine/docs/tensorflow/getting-started-training-prediction#setup).\n", + "* Run the component under a secret [Kubeflow user service account](https://www.kubeflow.org/docs/started/getting-started-gke/#gcp-service-accounts) in a Kubeflow cluster. For example:\n", "\n", - "```\n", - "* Grant Kubeflow user service account the read access to the Cloud Storage buckets which contains the input data, packages or docker images.\n", - "* Grant Kubeflow user service account the write access to the Cloud Storage bucket of the output directory.\n", + " ```\n", + " mlengine_train_op(...).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", + " ```\n", "\n", + "* Grant the following access to the Kubeflow user service account: \n", + " * Read access to the Cloud Storage buckets which contain the input data, packages, or Docker images.\n", + " * Write access to the Cloud Storage bucket of the output directory.\n", "\n", - "## Detailed Description\n", + "## Detailed description\n", "\n", - "The component accepts one of the two types of executable inputs:\n", - "* A list of Python packages from Cloud Storage. You may manually build a Python package by following the [guide](https://cloud.google.com/ml-engine/docs/tensorflow/packaging-trainer#manual-build) and [upload it to Cloud Storage](https://cloud.google.com/ml-engine/docs/tensorflow/packaging-trainer#uploading_packages_manually).\n", - "* Docker container from Google Container Registry (GCR). Follow the [guide](https://cloud.google.com/ml-engine/docs/using-containers) to publish and use a Docker container with this component. \n", + "The component builds the [TrainingInput](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#TrainingInput) payload and submits a job via the [Cloud ML Engine REST API](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs).\n", "\n", - "The component builds the payload of a [TrainingInput](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#TrainingInput) and submit a job by Cloud Machine Learning Engine REST API.\n", + "The steps to use the component in a pipeline are:\n", "\n", - "Here are the steps to use the component in a pipeline:\n", - "1. Install KFP SDK\n" + "\n", + "1. Install the Kubeflow Pipeline SDK:\n" ] }, { @@ -95,18 +112,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "For more information about the component, please checkout:\n", - "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/ml_engine/_train.py)\n", - "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", - "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/ml_engine/train/sample.ipynb)\n", - "* [Cloud Machine Learning Engine job REST API](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs)\n", - "\n", - "\n", "### Sample\n", - "Note: The following sample code works in IPython notebook or directly in Python code.\n", + "Note: The following sample code works in an IPython notebook or directly in Python code.\n", "\n", - "In this sample, we use the code from [census estimator sample](https://github.com/GoogleCloudPlatform/cloudml-samples/tree/master/census/estimator) to train a model in Cloud Machine Learning Engine service. In order to pass the code to the service, we need to package the python code and upload it in a Cloud Storage bucket. Make sure that you have read and write permissions on the bucket that you use as the working directory. \n", + "In this sample, you use the code from the [census estimator sample](https://github.com/GoogleCloudPlatform/cloudml-samples/tree/master/census/estimator) to train a model in Cloud ML Engine. To upload the code to Cloud ML Engine, package the Python code and upload it to a Cloud Storage bucket. \n", "\n", + "Note: You must have read and write permissions on the bucket that you use as the working directory.\n", "#### Set sample parameters" ] }, @@ -301,10 +312,6 @@ "source": [ "#### Inspect the results\n", "\n", - "Follow the `Run` link to open the KFP UI. In the step logs, you should be able to click on the links to:\n", - "* Job dashboard\n", - "* And realtime logs on Stackdriver\n", - "\n", "Use the following command to inspect the contents in the output directory:" ] }, @@ -316,6 +323,20 @@ "source": [ "!gsutil ls $OUTPUT_GCS_PATH" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "* [Component python code](https://github.com/kubeflow/pipelines/blob/master/component_sdk/python/kfp_component/google/ml_engine/_train.py)\n", + "* [Component docker file](https://github.com/kubeflow/pipelines/blob/master/components/gcp/container/Dockerfile)\n", + "* [Sample notebook](https://github.com/kubeflow/pipelines/blob/master/components/gcp/ml_engine/train/sample.ipynb)\n", + "* [Cloud Machine Learning Engine job REST API](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs)\n", + "\n", + "## License\n", + "By deploying or using this software you agree to comply with the [AI Hub Terms of Service](https://aihub.cloud.google.com/u/0/aihub-tos) and the [Google APIs Terms of Service](https://developers.google.com/terms/). To the extent of a direct conflict of terms, the AI Hub Terms of Service will control." + ] } ], "metadata": { @@ -334,7 +355,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.6.4" } }, "nbformat": 4, From 69da4e559d63b973ffb71297cb3979d978fef411 Mon Sep 17 00:00:00 2001 From: Ning Date: Thu, 18 Apr 2019 12:59:26 -0700 Subject: [PATCH 04/43] update changelog (#1184) --- CHANGELOG.md | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a814dfce06..366175f0811 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,22 +1,99 @@ # Change Log -## [0.1.15](https://github.com/kubeflow/pipelines/tree/0.1.15) (2019-04-04) -[Full Changelog](https://github.com/kubeflow/pipelines/compare/0.1.14...0.1.15) +## [Unreleased](https://github.com/kubeflow/pipelines/tree/HEAD) + +[Full Changelog](https://github.com/kubeflow/pipelines/compare/0.1.17...HEAD) + +**Merged pull requests:** + +- SDK/Client - Stopped extracting pipeline file to disk during submission [\#1178](https://github.com/kubeflow/pipelines/pull/1178) ([Ark-kun](https://github.com/Ark-kun)) +- adding myself as reviewer for samples [\#1174](https://github.com/kubeflow/pipelines/pull/1174) ([animeshsingh](https://github.com/animeshsingh)) + +## [0.1.17](https://github.com/kubeflow/pipelines/tree/0.1.17) (2019-04-17) +[Full Changelog](https://github.com/kubeflow/pipelines/compare/0.1.16...0.1.17) + +**Closed issues:** + +- Runs leaked into archived page [\#1150](https://github.com/kubeflow/pipelines/issues/1150) +- Pagination broken in UI [\#1149](https://github.com/kubeflow/pipelines/issues/1149) +- The number of members in the policy \(1,503\) is larger than the maximum allowed size 1,500 [\#1146](https://github.com/kubeflow/pipelines/issues/1146) +- Backend Docker build fails with python error in resnet-train-pipeline.py [\#1142](https://github.com/kubeflow/pipelines/issues/1142) +- `func\_to\_container\_op` converting underscores to dashes [\#1135](https://github.com/kubeflow/pipelines/issues/1135) +- Use range instead of pin for python dependencies for kfp [\#1134](https://github.com/kubeflow/pipelines/issues/1134) +- Deploy a TFX pipeline from the command line [\#1127](https://github.com/kubeflow/pipelines/issues/1127) +- CloudBuild is failing since the CMLE deployer component was deleted [\#1123](https://github.com/kubeflow/pipelines/issues/1123) +- Failed to load the trained model using kubeflow deployer component [\#1102](https://github.com/kubeflow/pipelines/issues/1102) +- Pipeline parameters should be accessible to pipeline logic [\#1099](https://github.com/kubeflow/pipelines/issues/1099) +- Retrieve the experiment during list run [\#1084](https://github.com/kubeflow/pipelines/issues/1084) +- Connect local notebook to remote Pipeline cluster [\#1079](https://github.com/kubeflow/pipelines/issues/1079) +- How to support continuous online learning? [\#1053](https://github.com/kubeflow/pipelines/issues/1053) +- When notebook submits a run and clicks the link they get "Error: failed to retrieve run:" [\#1017](https://github.com/kubeflow/pipelines/issues/1017) +- Add static type checking sample test [\#974](https://github.com/kubeflow/pipelines/issues/974) +- Test coverage for runs without experiments [\#685](https://github.com/kubeflow/pipelines/issues/685) + +**Merged pull requests:** + +- release 0.1.17: update yaml tag in samples [\#1176](https://github.com/kubeflow/pipelines/pull/1176) ([gaoning777](https://github.com/gaoning777)) +- Component releasing for 0.1.17 [\#1171](https://github.com/kubeflow/pipelines/pull/1171) ([gaoning777](https://github.com/gaoning777)) +- SDK - Simplified the @component decorator [\#1166](https://github.com/kubeflow/pipelines/pull/1166) ([Ark-kun](https://github.com/Ark-kun)) +- delete the resnet image items [\#1165](https://github.com/kubeflow/pipelines/pull/1165) ([gaoning777](https://github.com/gaoning777)) +- Adding myself as a reviewer for components [\#1161](https://github.com/kubeflow/pipelines/pull/1161) ([animeshsingh](https://github.com/animeshsingh)) +- new kubernetes packages contain breaking change, thus fixing the version in the sample test image [\#1159](https://github.com/kubeflow/pipelines/pull/1159) ([gaoning777](https://github.com/gaoning777)) +- bug fix in resnet sample [\#1154](https://github.com/kubeflow/pipelines/pull/1154) ([gaoning777](https://github.com/gaoning777)) +- Add filter to next page token so it applies to subsequently requested pages [\#1153](https://github.com/kubeflow/pipelines/pull/1153) ([neuromage](https://github.com/neuromage)) +- fix missing filter for list call [\#1151](https://github.com/kubeflow/pipelines/pull/1151) ([IronPan](https://github.com/IronPan)) +- using comp yaml for ffdl pipeline [\#1148](https://github.com/kubeflow/pipelines/pull/1148) ([animeshsingh](https://github.com/animeshsingh)) +- component readmes [\#1147](https://github.com/kubeflow/pipelines/pull/1147) ([animeshsingh](https://github.com/animeshsingh)) +- Removes unnecessary API calls [\#1144](https://github.com/kubeflow/pipelines/pull/1144) ([rileyjbauer](https://github.com/rileyjbauer)) +- use kubeflow/pipelines branch for deployment in test [\#1143](https://github.com/kubeflow/pipelines/pull/1143) ([IronPan](https://github.com/IronPan)) +- Set run model default timestamp to 0 [\#1140](https://github.com/kubeflow/pipelines/pull/1140) ([IronPan](https://github.com/IronPan)) +- Allow adding pipeline with name and description. [\#1139](https://github.com/kubeflow/pipelines/pull/1139) ([neuromage](https://github.com/neuromage)) +- Allow later versions of dependencies to be used with Python KFP package. [\#1137](https://github.com/kubeflow/pipelines/pull/1137) ([neuromage](https://github.com/neuromage)) +- Expose service-type as parameter [\#1136](https://github.com/kubeflow/pipelines/pull/1136) ([vincent-pli](https://github.com/vincent-pli)) +- Add helper to easily add aws secrets [\#1133](https://github.com/kubeflow/pipelines/pull/1133) ([Jeffwan](https://github.com/Jeffwan)) +- add type checking sample to sample tests [\#1129](https://github.com/kubeflow/pipelines/pull/1129) ([gaoning777](https://github.com/gaoning777)) +- Updated descriptions of pre-installed samples [\#1126](https://github.com/kubeflow/pipelines/pull/1126) ([Ark-kun](https://github.com/Ark-kun)) +- Fixed CloudBuild failure [\#1124](https://github.com/kubeflow/pipelines/pull/1124) ([Ark-kun](https://github.com/Ark-kun)) +- Surface workflow finished time in list run API [\#1122](https://github.com/kubeflow/pipelines/pull/1122) ([IronPan](https://github.com/IronPan)) +- SDK - Simplified the @pipeline decorator [\#1120](https://github.com/kubeflow/pipelines/pull/1120) ([Ark-kun](https://github.com/Ark-kun)) +- update katib laucher [\#1118](https://github.com/kubeflow/pipelines/pull/1118) ([hougangliu](https://github.com/hougangliu)) +- Updated the package version in the notebooks [\#1117](https://github.com/kubeflow/pipelines/pull/1117) ([Ark-kun](https://github.com/Ark-kun)) +- Updated the "Basic - Retry" sample [\#1111](https://github.com/kubeflow/pipelines/pull/1111) ([Ark-kun](https://github.com/Ark-kun)) +- Updated the "Basic - Exit handler" sample [\#1109](https://github.com/kubeflow/pipelines/pull/1109) ([Ark-kun](https://github.com/Ark-kun)) +- incremented app version [\#1107](https://github.com/kubeflow/pipelines/pull/1107) ([kevinbache](https://github.com/kevinbache)) +- uprade tf-serving in deployer component [\#1103](https://github.com/kubeflow/pipelines/pull/1103) ([jinchihe](https://github.com/jinchihe)) +- Stabilized the artifact ordering during the compilation [\#1097](https://github.com/kubeflow/pipelines/pull/1097) ([Ark-kun](https://github.com/Ark-kun)) +- Creates a default experiment at API server set up time [\#1089](https://github.com/kubeflow/pipelines/pull/1089) ([rileyjbauer](https://github.com/rileyjbauer)) +- Passing the annotations and labels to the ContainerOp [\#1077](https://github.com/kubeflow/pipelines/pull/1077) ([Ark-kun](https://github.com/Ark-kun)) +- Build Pipeline leveraging Arena [\#1058](https://github.com/kubeflow/pipelines/pull/1058) ([cheyang](https://github.com/cheyang)) +- Rewrite ResNet sample by GCP components [\#1018](https://github.com/kubeflow/pipelines/pull/1018) ([hongye-sun](https://github.com/hongye-sun)) +- Add a ResNet example from NVIDIA [\#964](https://github.com/kubeflow/pipelines/pull/964) ([khoa-ho](https://github.com/khoa-ho)) + +## [0.1.16](https://github.com/kubeflow/pipelines/tree/0.1.16) (2019-04-06) +[Full Changelog](https://github.com/kubeflow/pipelines/compare/0.1.14...0.1.16) **Closed issues:** - usage reporting link broken [\#1073](https://github.com/kubeflow/pipelines/issues/1073) - Exit handler task does not have proper name in the UX [\#1051](https://github.com/kubeflow/pipelines/issues/1051) +- Remove link to JupyterHub from pipelines UI [\#1029](https://github.com/kubeflow/pipelines/issues/1029) - Invalid memory address or nil pointer dereference in metadata\_store.go [\#1024](https://github.com/kubeflow/pipelines/issues/1024) - KFP SDK: suggest retry if pipeline launch returns 500 error \('warmup' apparently required\) [\#1007](https://github.com/kubeflow/pipelines/issues/1007) - Refactor the test infra code [\#875](https://github.com/kubeflow/pipelines/issues/875) -- Add image substitution feature to either DSL compiler or KFP client [\#487](https://github.com/kubeflow/pipelines/issues/487) - Ability to stop runs [\#413](https://github.com/kubeflow/pipelines/issues/413) - Surface the sample test results to github [\#361](https://github.com/kubeflow/pipelines/issues/361) - feature ask: support for loops [\#303](https://github.com/kubeflow/pipelines/issues/303) **Merged pull requests:** +- Fixed Kubeflow sample test [\#1096](https://github.com/kubeflow/pipelines/pull/1096) ([Ark-kun](https://github.com/Ark-kun)) +- Rolling back TFMA image [\#1095](https://github.com/kubeflow/pipelines/pull/1095) ([Ark-kun](https://github.com/Ark-kun)) +- Fixed bug in TFMA component code [\#1094](https://github.com/kubeflow/pipelines/pull/1094) ([Ark-kun](https://github.com/Ark-kun)) +- Fixed bug in ROC component definition [\#1093](https://github.com/kubeflow/pipelines/pull/1093) ([Ark-kun](https://github.com/Ark-kun)) +- Added "Target lambda" parameter to "Confusion matrix" component [\#1091](https://github.com/kubeflow/pipelines/pull/1091) ([Ark-kun](https://github.com/Ark-kun)) +- SDK - Fix obj name in K8s -\> json conversion util [\#1088](https://github.com/kubeflow/pipelines/pull/1088) ([NathanDeMaria](https://github.com/NathanDeMaria)) +- optimize UX for loading pipeline pages [\#1085](https://github.com/kubeflow/pipelines/pull/1085) ([IronPan](https://github.com/IronPan)) +- Update CHANGELOG for release 0.1.15 [\#1083](https://github.com/kubeflow/pipelines/pull/1083) ([neuromage](https://github.com/neuromage)) - Release component image version a277f87ea1d4707bf860d080d06639b7caf9a1cf [\#1082](https://github.com/kubeflow/pipelines/pull/1082) ([neuromage](https://github.com/neuromage)) - Fix wrong indentation in extract\_pipelineparams\_from\_any\(\) [\#1076](https://github.com/kubeflow/pipelines/pull/1076) ([elikatsis](https://github.com/elikatsis)) - Removes link to Kubeflow privacy links as KFP no longer runs Spartakus [\#1074](https://github.com/kubeflow/pipelines/pull/1074) ([rileyjbauer](https://github.com/rileyjbauer)) From 0b40672f96e40e28ccd4415fdd686529aa819ada Mon Sep 17 00:00:00 2001 From: John Paton Date: Fri, 19 Apr 2019 02:23:54 +0200 Subject: [PATCH 05/43] Make Python Client robust to existing IPython installations (#1186) * Make Client robust to IPython installations * Return false directly instead of raising Co-Authored-By: JohnPaton --- sdk/python/kfp/_client.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sdk/python/kfp/_client.py b/sdk/python/kfp/_client.py index a5517fd6984..784ca427f21 100644 --- a/sdk/python/kfp/_client.py +++ b/sdk/python/kfp/_client.py @@ -84,6 +84,9 @@ def _is_ipython(self): """Returns whether we are running in notebook.""" try: import IPython + ipy = IPython.get_ipython() + if ipy is None: + return False except ImportError: return False From ee119ec6270cdbaeb7d585b722fd80c3c9bd9a35 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Thu, 18 Apr 2019 18:19:54 -0700 Subject: [PATCH 06/43] SDK - Got rid of the global variable collecting all created pipelines (#1167) * SDK - Got rid of the global variable collecting all created pipelines This list was only used by the command-line compiler. The command-line compiler can still collect the created pipelines by registering a handler function in `_pipeline_decorator_handlers`. * Replaced handler stack with a single handler. --- sdk/python/kfp/compiler/main.py | 28 +++++++++++++++++++++------- sdk/python/kfp/dsl/_pipeline.py | 27 ++++++++++----------------- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/sdk/python/kfp/compiler/main.py b/sdk/python/kfp/compiler/main.py index edcbb703b10..47f48362ec6 100644 --- a/sdk/python/kfp/compiler/main.py +++ b/sdk/python/kfp/compiler/main.py @@ -51,9 +51,7 @@ def parse_arguments(): return args -def _compile_pipeline_function(function_name, output_path, type_check): - - pipeline_funcs = dsl.Pipeline.get_pipeline_functions() +def _compile_pipeline_function(pipeline_funcs, function_name, output_path, type_check): if len(pipeline_funcs) == 0: raise ValueError('A function with @dsl.pipeline decorator is required in the py file.') @@ -72,13 +70,28 @@ def _compile_pipeline_function(function_name, output_path, type_check): kfp.compiler.Compiler().compile(pipeline_func, output_path, type_check) +class PipelineCollectorContext(): + def __enter__(self): + pipeline_funcs = [] + def add_pipeline(func): + pipeline_funcs.append(func) + return func + self.old_handler = dsl._pipeline._pipeline_decorator_handler + dsl._pipeline._pipeline_decorator_handler = add_pipeline + return pipeline_funcs + + def __exit__(self, *args): + dsl._pipeline._pipeline_decorator_handler = self.old_handler + + def compile_package(package_path, namespace, function_name, output_path, type_check): tmpdir = tempfile.mkdtemp() sys.path.insert(0, tmpdir) try: subprocess.check_call(['python3', '-m', 'pip', 'install', package_path, '-t', tmpdir]) - __import__(namespace) - _compile_pipeline_function(function_name, output_path, type_check) + with PipelineCollectorContext() as pipeline_funcs: + __import__(namespace) + _compile_pipeline_function(pipeline_funcs, function_name, output_path, type_check) finally: del sys.path[0] shutil.rmtree(tmpdir) @@ -88,8 +101,9 @@ def compile_pyfile(pyfile, function_name, output_path, type_check): sys.path.insert(0, os.path.dirname(pyfile)) try: filename = os.path.basename(pyfile) - __import__(os.path.splitext(filename)[0]) - _compile_pipeline_function(function_name, output_path, type_check) + with PipelineCollectorContext() as pipeline_funcs: + __import__(os.path.splitext(filename)[0]) + _compile_pipeline_function(pipeline_funcs, function_name, output_path, type_check) finally: del sys.path[0] diff --git a/sdk/python/kfp/dsl/_pipeline.py b/sdk/python/kfp/dsl/_pipeline.py index c60796d4938..f172a3c24ad 100644 --- a/sdk/python/kfp/dsl/_pipeline.py +++ b/sdk/python/kfp/dsl/_pipeline.py @@ -19,6 +19,11 @@ import sys +# This handler is called whenever the @pipeline decorator is applied. +# It can be used by command-line DSL compiler to inject code that runs for every pipeline definition. +_pipeline_decorator_handler = None + + def pipeline(name, description): """Decorator of pipeline functions. @@ -35,8 +40,11 @@ def my_pipeline(a: PipelineParam, b: PipelineParam): def _pipeline(func): func._pipeline_name = name func._pipeline_description = description - Pipeline._add_pipeline_to_global_list(func) - return func + + if _pipeline_decorator_handler: + return _pipeline_decorator_handler(func) or func + else: + return func return _pipeline @@ -82,31 +90,16 @@ class Pipeline(): # _default_pipeline is set when it (usually a compiler) runs "with Pipeline()" _default_pipeline = None - # All pipeline functions with @pipeline decorator that are imported. - # Each key is a pipeline function. Each value is a (name, description). - _pipeline_functions = [] - @staticmethod def get_default_pipeline(): """Get default pipeline. """ return Pipeline._default_pipeline - @staticmethod - def get_pipeline_functions(): - """Get all imported pipeline functions (decorated with @pipeline).""" - return Pipeline._pipeline_functions - - @staticmethod - def _add_pipeline_to_global_list(func): - """Add a pipeline function (decorated with @pipeline).""" - Pipeline._pipeline_functions.append(func) - @staticmethod def add_pipeline(name, description, func): """Add a pipeline function with the specified name and description.""" # Applying the @pipeline decorator to the pipeline function func = pipeline(name=name, description=description)(func) - Pipeline._add_pipeline_to_global_list(pipeline_meta, func) def __init__(self, name: str): """Create a new instance of Pipeline. From 866cc81dd9a75d3a024cf8a71d995e0268454512 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Thu, 18 Apr 2019 19:17:54 -0700 Subject: [PATCH 07/43] Updated the "Basic - Conditional" sample (#1108) * Updated the "Basic - Conditional" sample Modernized the sample pipeline code. * Arguments need to be strings. --- samples/basic/condition.py | 96 ++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 50 deletions(-) diff --git a/samples/basic/condition.py b/samples/basic/condition.py index 76d02336e77..62125c20f37 100755 --- a/samples/basic/condition.py +++ b/samples/basic/condition.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright 2018 Google LLC +# Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,66 +14,62 @@ # limitations under the License. -import kfp.dsl as dsl +import kfp +from kfp import dsl -class RandomNumOp(dsl.ContainerOp): - """Generate a random number between low and high.""" +def random_num_op(low, high): + """Generate a random number between low and high.""" + return dsl.ContainerOp( + name='Generate random number', + image='python:alpine3.6', + command=['sh', '-c'], + arguments=['python -c "import random; print(random.randint($0, $1))" | tee $2', str(low), str(high), '/tmp/output'], + file_outputs={'output': '/tmp/output'} + ) - def __init__(self, low, high): - super(RandomNumOp, self).__init__( - name='Random number', - image='python:alpine3.6', - command=['sh', '-c'], - arguments=['python -c "import random; print(random.randint(%s,%s))" | tee /tmp/output' % (low, high)], - file_outputs={'output': '/tmp/output'}) +def flip_coin_op(): + """Flip a coin and output heads or tails randomly.""" + return dsl.ContainerOp( + name='Flip coin', + image='python:alpine3.6', + command=['sh', '-c'], + arguments=['python -c "import random; result = \'heads\' if random.randint(0,1) == 0 ' + 'else \'tails\'; print(result)" | tee /tmp/output'], + file_outputs={'output': '/tmp/output'} + ) -class FlipCoinOp(dsl.ContainerOp): - """Flip a coin and output heads or tails randomly.""" - def __init__(self): - super(FlipCoinOp, self).__init__( - name='Flip', - image='python:alpine3.6', - command=['sh', '-c'], - arguments=['python -c "import random; result = \'heads\' if random.randint(0,1) == 0 ' - 'else \'tails\'; print(result)" | tee /tmp/output'], - file_outputs={'output': '/tmp/output'}) - - -class PrintOp(dsl.ContainerOp): - """Print a message.""" - - def __init__(self, msg): - super(PrintOp, self).__init__( - name='Print', - image='alpine:3.6', - command=['echo', msg], - ) +def print_op(msg): + """Print a message.""" + return dsl.ContainerOp( + name='Print', + image='alpine:3.6', + command=['echo', msg], + ) @dsl.pipeline( - name='pipeline flip coin', - description='shows how to use dsl.Condition.' + name='Conditional execution pipeline', + description='Shows how to use dsl.Condition().' ) -def flipcoin(): - flip = FlipCoinOp() - with dsl.Condition(flip.output == 'heads'): - random_num_head = RandomNumOp(0, 9) - with dsl.Condition(random_num_head.output > 5): - PrintOp('heads and %s > 5!' % random_num_head.output) - with dsl.Condition(random_num_head.output <= 5): - PrintOp('heads and %s <= 5!' % random_num_head.output) +def flipcoin_pipeline(): + flip = flip_coin_op() + with dsl.Condition(flip.output == 'heads'): + random_num_head = random_num_op(0, 9) + with dsl.Condition(random_num_head.output > 5): + print_op('heads and %s > 5!' % random_num_head.output) + with dsl.Condition(random_num_head.output <= 5): + print_op('heads and %s <= 5!' % random_num_head.output) - with dsl.Condition(flip.output == 'tails'): - random_num_tail = RandomNumOp(10, 19) - with dsl.Condition(random_num_tail.output > 15): - PrintOp('tails and %s > 15!' % random_num_tail.output) - with dsl.Condition(random_num_tail.output <= 15): - PrintOp('tails and %s <= 15!' % random_num_tail.output) + with dsl.Condition(flip.output == 'tails'): + random_num_tail = random_num_op(10, 19) + with dsl.Condition(random_num_tail.output > 15): + print_op('tails and %s > 15!' % random_num_tail.output) + with dsl.Condition(random_num_tail.output <= 15): + print_op('tails and %s <= 15!' % random_num_tail.output) if __name__ == '__main__': - import kfp.compiler as compiler - compiler.Compiler().compile(flipcoin, __file__ + '.zip') + kfp.compiler.Compiler().compile(flipcoin_pipeline, __file__ + '.zip') From 6e7e7b3877d2ed0ea11386c4cd9a6014410a28ec Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Thu, 18 Apr 2019 20:15:55 -0700 Subject: [PATCH 08/43] Updated the "Basic - Parallel execution" sample (#1110) Modernized the sample pipeline code. --- samples/basic/parallel_join.py | 64 +++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/samples/basic/parallel_join.py b/samples/basic/parallel_join.py index 6b14a821f0e..5f90a683c49 100755 --- a/samples/basic/parallel_join.py +++ b/samples/basic/parallel_join.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright 2018 Google LLC +# Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,38 +14,44 @@ # limitations under the License. -import kfp.dsl as dsl +import kfp +from kfp import dsl + +def gcs_download_op(url): + return dsl.ContainerOp( + name='GCS - Download', + image='google/cloud-sdk:216.0.0', + command=['sh', '-c'], + arguments=['gsutil cat $0 | tee $1', url, '/tmp/results.txt'], + file_outputs={ + 'data': '/tmp/results.txt', + } + ) + + +def echo2_op(text1, text2): + return dsl.ContainerOp( + name='echo', + image='library/bash:4.4.23', + command=['sh', '-c'], + arguments=['echo "Text 1: $0"; echo "Text 2: $1"', text1, text2] + ) @dsl.pipeline( - name='Parallel_and_Join', - description='Download two messages in parallel and print the concatenated result.' + name='Parallel pipeline', + description='Download two messages in parallel and prints the concatenated result.' ) def download_and_join( - url1='gs://ml-pipeline-playground/shakespeare1.txt', - url2='gs://ml-pipeline-playground/shakespeare2.txt'): - """A three-step pipeline with first two running in parallel.""" - - download1 = dsl.ContainerOp( - name='download1', - image='google/cloud-sdk:216.0.0', - command=['sh', '-c'], - arguments=['gsutil cat %s | tee /tmp/results.txt' % url1], - file_outputs={'downloaded': '/tmp/results.txt'}) - - download2 = dsl.ContainerOp( - name='download2', - image='google/cloud-sdk:216.0.0', - command=['sh', '-c'], - arguments=['gsutil cat %s | tee /tmp/results.txt' % url2], - file_outputs={'downloaded': '/tmp/results.txt'}) - - echo = dsl.ContainerOp( - name='echo', - image='library/bash:4.4.23', - command=['sh', '-c'], - arguments=['echo %s %s' % (download1.output, download2.output)]) + url1='gs://ml-pipeline-playground/shakespeare1.txt', + url2='gs://ml-pipeline-playground/shakespeare2.txt' +): + """A three-step pipeline with first two running in parallel.""" + + download1_task = gcs_download_op(url1) + download2_task = gcs_download_op(url2) + + echo_task = echo2_op(download1_task.output, download2_task.output) if __name__ == '__main__': - import kfp.compiler as compiler - compiler.Compiler().compile(download_and_join, __file__ + '.zip') + kfp.compiler.Compiler().compile(download_and_join, __file__ + '.zip') From 1e57cd3ce0b2e432642982838a534cc504d970c8 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Thu, 18 Apr 2019 21:07:53 -0700 Subject: [PATCH 09/43] Updated the "Basic - Sequential execution" sample (#1112) Modernized the sample pipeline code. --- samples/basic/sequential.py | 49 +++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/samples/basic/sequential.py b/samples/basic/sequential.py index 511becfc722..a7dc807f28e 100755 --- a/samples/basic/sequential.py +++ b/samples/basic/sequential.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright 2018 Google LLC +# Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,28 +14,39 @@ # limitations under the License. -import kfp.dsl as dsl +import kfp +from kfp import dsl +def gcs_download_op(url): + return dsl.ContainerOp( + name='GCS - Download', + image='google/cloud-sdk:216.0.0', + command=['sh', '-c'], + arguments=['gsutil cat $0 | tee $1', url, '/tmp/results.txt'], + file_outputs={ + 'data': '/tmp/results.txt', + } + ) + + +def echo_op(text): + return dsl.ContainerOp( + name='echo', + image='library/bash:4.4.23', + command=['sh', '-c'], + arguments=['echo "$0"', text] + ) + @dsl.pipeline( - name='Sequential', - description='A pipeline with two sequential steps.' + name='Sequential pipeline', + description='A pipeline with two sequential steps.' ) def sequential_pipeline(url='gs://ml-pipeline-playground/shakespeare1.txt'): - """A pipeline with two sequential steps.""" - - op1 = dsl.ContainerOp( - name='download', - image='google/cloud-sdk:216.0.0', - command=['sh', '-c'], - arguments=['gsutil cat %s | tee /tmp/results.txt' % url], - file_outputs={'downloaded': '/tmp/results.txt'}) - op2 = dsl.ContainerOp( - name='echo', - image='library/bash:4.4.23', - command=['sh', '-c'], - arguments=['echo "%s"' % op1.output]) + """A pipeline with two sequential steps.""" + + download_task = gcs_download_op(url) + echo_task = echo_op(download_task.output) if __name__ == '__main__': - import kfp.compiler as compiler - compiler.Compiler().compile(sequential_pipeline, __file__ + '.zip') + kfp.compiler.Compiler().compile(sequential_pipeline, __file__ + '.zip') From 07faa08b8622144e656d45cf015a665bb9a2a722 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Thu, 18 Apr 2019 22:01:55 -0700 Subject: [PATCH 10/43] Updated the "Basic - Recursive loop" sample (#1113) Modernized the sample pipeline code. --- samples/basic/recursion.py | 63 ++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/samples/basic/recursion.py b/samples/basic/recursion.py index 5bd6c652049..a5f219dac3f 100644 --- a/samples/basic/recursion.py +++ b/samples/basic/recursion.py @@ -13,54 +13,57 @@ # See the License for the specific language governing permissions and # limitations under the License. -import kfp.dsl as dsl -class FlipCoinOp(dsl.ContainerOp): - """Flip a coin and output heads or tails randomly.""" +import kfp +from kfp import dsl - def __init__(self): - super(FlipCoinOp, self).__init__( - name='Flip', + +def flip_coin_op(): + """Flip a coin and output heads or tails randomly.""" + return dsl.ContainerOp( + name='Flip coin', image='python:alpine3.6', command=['sh', '-c'], arguments=['python -c "import random; result = \'heads\' if random.randint(0,1) == 0 ' - 'else \'tails\'; print(result)" | tee /tmp/output'], - file_outputs={'output': '/tmp/output'}) + 'else \'tails\'; print(result)" | tee /tmp/output'], + file_outputs={'output': '/tmp/output'} + ) -class PrintOp(dsl.ContainerOp): - """Print a message.""" - def __init__(self, msg): - super(PrintOp, self).__init__( +def print_op(msg): + """Print a message.""" + return dsl.ContainerOp( name='Print', image='alpine:3.6', command=['echo', msg], ) -# Use the dsl.graph_component to decorate functions that are + +# Use the dsl.graph_component to decorate pipeline functions that can be # recursively called. @dsl.graph_component def flip_component(flip_result): - print_flip = PrintOp(flip_result) - flipA = FlipCoinOp().after(print_flip) - with dsl.Condition(flipA.output == 'heads'): - # When the flip_component is called recursively, the flipA.output - # from inside the graph component will be passed to the next flip_component - # as the input whereas the flip_result in the current graph component - # comes from the flipA.output in the flipcoin function. - flip_component(flipA.output) + print_flip = print_op(flip_result) + flipA = flip_coin_op().after(print_flip) + with dsl.Condition(flipA.output == 'heads'): + # When the flip_component is called recursively, the flipA.output + # from inside the graph component will be passed to the next flip_component + # as the input whereas the flip_result in the current graph component + # comes from the flipA.output in the flipcoin function. + flip_component(flipA.output) + @dsl.pipeline( - name='pipeline flip coin', - description='shows how to use dsl.Condition.' + name='Recursive loop pipeline', + description='Shows how to create recursive loops.' ) def flipcoin(): - flipA = FlipCoinOp() - flip_loop = flip_component(flipA.output) - # flip_loop is a graph_component with the outputs field - # filled with the returned dictionary. - PrintOp('cool, it is over. %s' % flipA.output).after(flip_loop) + first_flip = flip_coin_op() + flip_loop = flip_component(first_flip.output) + # flip_loop is a graph_component with the outputs field + # filled with the returned dictionary. + print_op('cool, it is over.').after(flip_loop) + if __name__ == '__main__': - import kfp.compiler as compiler - compiler.Compiler().compile(flipcoin, __file__ + '.tar.gz') + kfp.compiler.Compiler().compile(flipcoin, __file__ + '.tar.gz') From abfdd295ef03e0b0695e366370993f23cdfef301 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Thu, 18 Apr 2019 22:53:53 -0700 Subject: [PATCH 11/43] Updated the "TFX Taxi Cab Classification Pipeline" sample (#1115) Modernized the sample pipeline code. --- .../tfx/taxi-cab-classification-pipeline.py | 240 +++++++----------- 1 file changed, 94 insertions(+), 146 deletions(-) diff --git a/samples/tfx/taxi-cab-classification-pipeline.py b/samples/tfx/taxi-cab-classification-pipeline.py index c5a1b0dffb5..2771cc51062 100755 --- a/samples/tfx/taxi-cab-classification-pipeline.py +++ b/samples/tfx/taxi-cab-classification-pipeline.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright 2018 Google LLC +# Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,128 +14,22 @@ # limitations under the License. -import kfp.dsl as dsl -import kfp.gcp as gcp -import datetime - -def dataflow_tf_data_validation_op(inference_data: 'GcsUri', validation_data: 'GcsUri', column_names: 'GcsUri[text/json]', key_columns, project: 'GcpProject', mode, validation_output: 'GcsUri[Directory]', step_name='validation'): - return dsl.ContainerOp( - name = step_name, - image = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tfdv:e20fad3e161e88226c83437271adb063221459b9', - arguments = [ - '--csv-data-for-inference', inference_data, - '--csv-data-to-validate', validation_data, - '--column-names', column_names, - '--key-columns', key_columns, - '--project', project, - '--mode', mode, - '--output', '%s/{{workflow.name}}/validation' % validation_output, - ], - file_outputs = { - 'schema': '/schema.txt', - 'validation': '/output_validation_result.txt', - } - ) - -def dataflow_tf_transform_op(train_data: 'GcsUri', evaluation_data: 'GcsUri', schema: 'GcsUri[text/json]', project: 'GcpProject', preprocess_mode, preprocess_module: 'GcsUri[text/code/python]', transform_output: 'GcsUri[Directory]', step_name='preprocess'): - return dsl.ContainerOp( - name = step_name, - image = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tft:e20fad3e161e88226c83437271adb063221459b9', - arguments = [ - '--train', train_data, - '--eval', evaluation_data, - '--schema', schema, - '--project', project, - '--mode', preprocess_mode, - '--preprocessing-module', preprocess_module, - '--output', '%s/{{workflow.name}}/transformed' % transform_output, - ], - file_outputs = {'transformed': '/output.txt'} - ) - - -def tf_train_op(transformed_data_dir, schema: 'GcsUri[text/json]', learning_rate: float, hidden_layer_size: int, steps: int, target: str, preprocess_module: 'GcsUri[text/code/python]', training_output: 'GcsUri[Directory]', step_name='training'): - return dsl.ContainerOp( - name = step_name, - image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:e20fad3e161e88226c83437271adb063221459b9', - arguments = [ - '--transformed-data-dir', transformed_data_dir, - '--schema', schema, - '--learning-rate', learning_rate, - '--hidden-layer-size', hidden_layer_size, - '--steps', steps, - '--target', target, - '--preprocessing-module', preprocess_module, - '--job-dir', '%s/{{workflow.name}}/train' % training_output, - ], - file_outputs = {'train': '/output.txt'} - ) - -def dataflow_tf_model_analyze_op(model: 'TensorFlow model', evaluation_data: 'GcsUri', schema: 'GcsUri[text/json]', project: 'GcpProject', analyze_mode, analyze_slice_column, analysis_output: 'GcsUri', step_name='analysis'): - return dsl.ContainerOp( - name = step_name, - image = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tfma:e20fad3e161e88226c83437271adb063221459b9', - arguments = [ - '--model', model, - '--eval', evaluation_data, - '--schema', schema, - '--project', project, - '--mode', analyze_mode, - '--slice-columns', analyze_slice_column, - '--output', '%s/{{workflow.name}}/analysis' % analysis_output, - ], - file_outputs = {'analysis': '/output.txt'} - ) - - -def dataflow_tf_predict_op(evaluation_data: 'GcsUri', schema: 'GcsUri[text/json]', target: str, model: 'TensorFlow model', predict_mode, project: 'GcpProject', prediction_output: 'GcsUri', step_name='prediction'): - return dsl.ContainerOp( - name = step_name, - image = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:e20fad3e161e88226c83437271adb063221459b9', - arguments = [ - '--data', evaluation_data, - '--schema', schema, - '--target', target, - '--model', model, - '--mode', predict_mode, - '--project', project, - '--output', '%s/{{workflow.name}}/predict' % prediction_output, - ], - file_outputs = {'prediction': '/output.txt'} - ) - - -def confusion_matrix_op(predictions: 'GcsUri', output: 'GcsUri', step_name='confusion_matrix'): - return dsl.ContainerOp( - name=step_name, - image='gcr.io/ml-pipeline/ml-pipeline-local-confusion-matrix:e20fad3e161e88226c83437271adb063221459b9', - arguments=[ - '--output', '%s/{{workflow.name}}/confusionmatrix' % output, - '--predictions', predictions, - '--target_lambda', """lambda x: (x['target'] > x['fare'] * 0.2)""", - ]) - - -def roc_op(predictions: 'GcsUri', output: 'GcsUri', step_name='roc'): - return dsl.ContainerOp( - name=step_name, - image='gcr.io/ml-pipeline/ml-pipeline-local-roc:e20fad3e161e88226c83437271adb063221459b9', - arguments=[ - '--output', '%s/{{workflow.name}}/roc' % output, - '--predictions', predictions, - '--target_lambda', """lambda x: 1 if (x['target'] > x['fare'] * 0.2) else 0""", - ]) - - -def kubeflow_deploy_op(model: 'TensorFlow model', tf_server_name, step_name='deploy'): - return dsl.ContainerOp( - name = step_name, - image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-deployer:e20fad3e161e88226c83437271adb063221459b9', - arguments = [ - '--model-export-path', '%s/export/export' % model, - '--server-name', tf_server_name - ] - ) +import kfp +from kfp import components +from kfp import dsl +from kfp import gcp + + +dataflow_tf_data_validation_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/dataflow/tfdv/component.yaml') +dataflow_tf_transform_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/dataflow/tft/component.yaml') +tf_train_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/kubeflow/dnntrainer/component.yaml') +dataflow_tf_model_analyze_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/dataflow/tfma/component.yaml') +dataflow_tf_predict_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/dataflow/predict/component.yaml') + +confusion_matrix_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/local/confusion_matrix/component.yaml') +roc_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/local/roc/component.yaml') + +kubeflow_deploy_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/kubeflow/deployer/component.yaml') @dsl.pipeline( @@ -154,28 +48,82 @@ def taxi_cab_classification( learning_rate=0.1, hidden_layer_size='1500', steps=3000, - analyze_slice_column='trip_start_hour'): - - tf_server_name = 'taxi-cab-classification-model-{{workflow.uid}}' - validation = dataflow_tf_data_validation_op(train, evaluation, column_names, - key_columns, project, mode, output - ).apply(gcp.use_gcp_secret('user-gcp-sa')) - preprocess = dataflow_tf_transform_op(train, evaluation, validation.outputs['schema'], - project, mode, preprocess_module, output - ).apply(gcp.use_gcp_secret('user-gcp-sa')) - training = tf_train_op(preprocess.output, validation.outputs['schema'], learning_rate, - hidden_layer_size, steps, 'tips', preprocess_module, output - ).apply(gcp.use_gcp_secret('user-gcp-sa')) - analysis = dataflow_tf_model_analyze_op(training.output, evaluation, - validation.outputs['schema'], project, mode, analyze_slice_column, output - ).apply(gcp.use_gcp_secret('user-gcp-sa')) - prediction = dataflow_tf_predict_op(evaluation, validation.outputs['schema'], 'tips', - training.output, mode, project, output - ).apply(gcp.use_gcp_secret('user-gcp-sa')) - cm = confusion_matrix_op(prediction.output, output).apply(gcp.use_gcp_secret('user-gcp-sa')) - roc = roc_op(prediction.output, output).apply(gcp.use_gcp_secret('user-gcp-sa')) - deploy = kubeflow_deploy_op(training.output, tf_server_name).apply(gcp.use_gcp_secret('user-gcp-sa')) + analyze_slice_column='trip_start_hour' +): + output_template = str(output) + '/{{workflow.uid}}/{{pod.name}}/data' + target_lambda = """lambda x: (x['target'] > x['fare'] * 0.2)""" + target_class_lambda = """lambda x: 1 if (x['target'] > x['fare'] * 0.2) else 0""" + + tf_server_name = 'taxi-cab-classification-model-{{workflow.uid}}' + + validation = dataflow_tf_data_validation_op( + inference_data=train, + validation_data=evaluation, + column_names=column_names, + key_columns=key_columns, + gcp_project=project, + run_mode=mode, + validation_output=output_template + ).apply(gcp.use_gcp_secret('user-gcp-sa')) + + preprocess = dataflow_tf_transform_op( + training_data_file_pattern=train, + evaluation_data_file_pattern=evaluation, + schema=validation.outputs['schema'], + gcp_project=project, + run_mode=mode, + preprocessing_module=preprocess_module, + transformed_data_dir=output_template + ).apply(gcp.use_gcp_secret('user-gcp-sa')) + + training = tf_train_op( + transformed_data_dir=preprocess.output, + schema=validation.outputs['schema'], + learning_rate=learning_rate, + hidden_layer_size=hidden_layer_size, + steps=steps, + target='tips', + preprocessing_module=preprocess_module, + training_output_dir=output_template + ).apply(gcp.use_gcp_secret('user-gcp-sa')) + + analysis = dataflow_tf_model_analyze_op( + model=training.output, + evaluation_data=evaluation, + schema=validation.outputs['schema'], + gcp_project=project, + run_mode=mode, + slice_columns=analyze_slice_column, + analysis_results_dir=output_template + ).apply(gcp.use_gcp_secret('user-gcp-sa')) + + prediction = dataflow_tf_predict_op( + data_file_pattern=evaluation, + schema=validation.outputs['schema'], + target_column='tips', + model=training.output, + run_mode=mode, + gcp_project=project, + predictions_dir=output_template + ).apply(gcp.use_gcp_secret('user-gcp-sa')) + + cm = confusion_matrix_op( + predictions=prediction.output, + target_lambda=target_lambda, + output_dir=output_template + ).apply(gcp.use_gcp_secret('user-gcp-sa')) + + roc = roc_op( + predictions_dir=prediction.output, + target_lambda=target_class_lambda, + output_dir=output_template + ).apply(gcp.use_gcp_secret('user-gcp-sa')) + + deploy = kubeflow_deploy_op( + model_dir=str(training.output) + '/export/export', + server_name=tf_server_name + ).apply(gcp.use_gcp_secret('user-gcp-sa')) + if __name__ == '__main__': - import kfp.compiler as compiler - compiler.Compiler().compile(taxi_cab_classification, __file__ + '.zip') + kfp.compiler.Compiler().compile(taxi_cab_classification, __file__ + '.zip') From 2eddf0e3954f2e2c9c41269a6d37c1e76c482d70 Mon Sep 17 00:00:00 2001 From: cheyang Date: Fri, 19 Apr 2019 14:39:54 +0800 Subject: [PATCH 12/43] Update arena component with git support (#1179) * update sample * fix git sync * make downloading docker image automatically * make downloading docker image automatically * make downloading docker image automatically * fix typo * use extend to replace append --- components/arena/docker/arena_launcher.py | 37 +++++++++++++------ .../arena/python/arena/_arena_mpi_op.py | 6 ++- .../python/arena/_arena_standalone_op.py | 4 +- .../standalonejob/standalone_pipeline.py | 29 ++++++++------- 4 files changed, 48 insertions(+), 28 deletions(-) diff --git a/components/arena/docker/arena_launcher.py b/components/arena/docker/arena_launcher.py index 7fa528224a5..15d45d9f701 100644 --- a/components/arena/docker/arena_launcher.py +++ b/components/arena/docker/arena_launcher.py @@ -155,6 +155,7 @@ def generate_job_command(args): tensorboard_image = args.tensorboard_image tensorboard = str2bool(args.tensorboard) log_dir = args.log_dir + sync_source = args.sync_source commandArray = [ 'arena', 'submit', 'tfjob', @@ -163,22 +164,22 @@ def generate_job_command(args): ] if gpus > 0: - commandArray.append("--gpus={0}".format(gpus)) + commandArray.extend(['--gpus', str(gpus)]) if cpu > 0: - commandArray.append("--cpu={0}".format(cpu)) + commandArray.extend(['--cpu', str(cpu)]) if memory >0: - commandArray.append("--memory={0}".format(memory)) + commandArray.extend(['--memory', str(memory)]) if tensorboard_image != "tensorflow/tensorflow:1.12.0": - commandArray.append("--tensorboardImage={0}".format(tensorboard_image)) + commandArray.extend(['--tensorboardImage', tensorboard_image]) if tensorboard: commandArray.append("--tensorboard") if os.path.isdir(args.log_dir): - commandArray.append("--logdir={0}".format(args.log_dir)) + commandArray.append(['--logdir', args.log_dir]) else: logging.info("skip log dir :{0}".format(args.log_dir)) @@ -190,6 +191,12 @@ def generate_job_command(args): for e in env: commandArray.append("--env={0}".format(e)) + if len(sync_source) > 0: + if not sync_source.endswith(".git"): + raise ValueError("sync_source must be an http git url") + commandArray.extend(['--sync-mode','git']) + commandArray.extend(['--sync-source',sync_source]) + return commandArray, "tfjob" # Generate mpi job @@ -208,6 +215,7 @@ def generate_mpjob_command(args): tensorboard = str2bool(args.tensorboard) rdma = str2bool(args.rdma) log_dir = args.log_dir + sync_source = args.sync_source commandArray = [ 'arena', 'submit', 'mpijob', @@ -216,17 +224,17 @@ def generate_mpjob_command(args): '--image={0}'.format(image), ] - if gpus > 0: - commandArray.append("--gpus={0}".format(gpus)) + if gpus > 0: + commandArray.extend(['--gpus', str(gpus)]) if cpu > 0: - commandArray.append("--cpu={0}".format(cpu)) + commandArray.extend(['--cpu', str(cpu)]) if memory >0: - commandArray.append("--memory={0}".format(memory)) + commandArray.extend(['--memory', str(memory)]) if tensorboard_image != "tensorflow/tensorflow:1.12.0": - commandArray.append("--tensorboardImage={0}".format(tensorboard_image)) + commandArray.extend(['--tensorboardImage', tensorboard_image]) if tensorboard: commandArray.append("--tensorboard") @@ -235,7 +243,7 @@ def generate_mpjob_command(args): commandArray.append("--rdma") if os.path.isdir(args.log_dir): - commandArray.append("--logdir={0}".format(args.log_dir)) + commandArray.append(['--logdir', args.log_dir]) else: logging.info("skip log dir :{0}".format(args.log_dir)) @@ -247,6 +255,12 @@ def generate_mpjob_command(args): for e in env: commandArray.append("--env={0}".format(e)) + if len(sync_source) > 0: + if not sync_source.endswith(".git"): + raise ValueError("sync_source must be an http git url") + commandArray.extend(['--sync-mode','git']) + commandArray.extend(['--sync-source',sync_source]) + return commandArray, "mpijob" def str2bool(v): @@ -281,6 +295,7 @@ def main(argv=None): parser.add_argument('--env', action='append', type=str, default=[]) parser.add_argument('--data', action='append', type=str, default=[]) parser.add_argument('--metric', action='append', type=str, default=[]) + parser.add_argument('--sync-source', type=str, default='') subparsers = parser.add_subparsers(help='arena sub-command help') diff --git a/components/arena/python/arena/_arena_mpi_op.py b/components/arena/python/arena/_arena_mpi_op.py index 3cd8dbaada6..e9fb661100d 100644 --- a/components/arena/python/arena/_arena_mpi_op.py +++ b/components/arena/python/arena/_arena_mpi_op.py @@ -63,7 +63,7 @@ def mpi_job_op(name, image, command, workers=1, gpus=0, cpu=0, memory=0, env=[], options.append('--tensorboard-image') options.append(str(tensorboard_image)) - return dsl.ContainerOp( + op = dsl.ContainerOp( name=name, image=arenaImage, command=['python','arena_launcher.py'], @@ -81,4 +81,6 @@ def mpi_job_op(name, image, command, workers=1, gpus=0, cpu=0, memory=0, env=[], "mpijob", "--", str(command)], file_outputs={'train': '/output.txt'} - ) \ No newline at end of file + ) + op.set_image_pull_policy('Always') + return op \ No newline at end of file diff --git a/components/arena/python/arena/_arena_standalone_op.py b/components/arena/python/arena/_arena_standalone_op.py index e71f9e00b7c..e54ded5e319 100644 --- a/components/arena/python/arena/_arena_standalone_op.py +++ b/components/arena/python/arena/_arena_standalone_op.py @@ -64,7 +64,7 @@ def standalone_job_op(name, image, command, gpus=0, cpu=0, memory=0, env=[], options.append('--tensorboard-image') options.append(str(tensorboard_image)) - return dsl.ContainerOp( + op = dsl.ContainerOp( name=name, image=arena_image, command=['python','arena_launcher.py'], @@ -81,3 +81,5 @@ def standalone_job_op(name, image, command, gpus=0, cpu=0, memory=0, env=[], "--", str(command)], file_outputs={'train': '/output.txt'} ) + op.set_image_pull_policy('Always') + return op diff --git a/samples/arena-samples/standalonejob/standalone_pipeline.py b/samples/arena-samples/standalonejob/standalone_pipeline.py index 749507a4573..22c5eb4ae80 100644 --- a/samples/arena-samples/standalonejob/standalone_pipeline.py +++ b/samples/arena-samples/standalonejob/standalone_pipeline.py @@ -11,7 +11,8 @@ ) def sample_pipeline(learning_rate='0.01', dropout='0.9', - model_version='1'): + model_version='1', + commit='f097575656f927d86d99dd64931042e1a9003cb2'): """A pipeline for end to end machine learning workflow.""" data=["user-susan:/training"] gpus=1 @@ -27,29 +28,25 @@ def sample_pipeline(learning_rate='0.01', curl -O https://code.aliyun.com/xiaozhou/tensorflow-sample-code/raw/master/data/t10k-labels-idx1-ubyte.gz && \ curl -O https://code.aliyun.com/xiaozhou/tensorflow-sample-code/raw/master/data/train-images-idx3-ubyte.gz && \ curl -O https://code.aliyun.com/xiaozhou/tensorflow-sample-code/raw/master/data/train-labels-idx1-ubyte.gz") - # 2. prepare source code - prepare_code = arena.standalone_job_op( - name="source-code", - image="alpine/git", - data=data, - command="mkdir -p /training/models/ && \ - cd /training/models/ && \ - if [ ! -d /training/models/tensorflow-sample-code ]; then git clone https://code.aliyun.com/xiaozhou/tensorflow-sample-code.git; else echo no need download;fi") - # 3. train the models + # 2. download source code and train the models train = arena.standalone_job_op( name="train", image="tensorflow/tensorflow:1.11.0-gpu-py3", + sync_source="https://code.aliyun.com/xiaozhou/tensorflow-sample-code.git", + env=["GIT_SYNC_REV=%s" % (commit)], gpus=gpus, data=data, - command="echo %s;echo %s;python /training/models/tensorflow-sample-code/tfjob/docker/mnist/main.py --max_steps 500 --data_dir /training/dataset/mnist --log_dir /training/output/mnist --learning_rate %s --dropout %s" % (prepare_data.output, prepare_code.output, learning_rate, dropout), + command="echo %s;python code/tensorflow-sample-code/tfjob/docker/mnist/main.py --max_steps 500 --data_dir /training/dataset/mnist --log_dir /training/output/mnist --learning_rate %s --dropout %s" % (prepare_data.output, learning_rate, dropout), metrics=["Train-accuracy:PERCENTAGE"]) - # 4. export the model + # 3. export the model export_model = arena.standalone_job_op( name="export-model", image="tensorflow/tensorflow:1.11.0-py3", + sync_source="https://code.aliyun.com/xiaozhou/tensorflow-sample-code.git", + env=["GIT_SYNC_REV=%s" % (commit)], data=data, - command="echo %s;python /training/models/tensorflow-sample-code/tfjob/docker/mnist/export_model.py --model_version=%s --checkpoint_path=/training/output/mnist /training/output/models" % (train.output, model_version)) + command="echo %s;python code/tensorflow-sample-code/tfjob/docker/mnist/export_model.py --model_version=%s --checkpoint_path=/training/output/mnist /training/output/models" % (train.output, model_version)) if __name__ == '__main__': parser = argparse.ArgumentParser() @@ -60,11 +57,14 @@ def sample_pipeline(learning_rate='0.01', help='Keep probability for training dropout.') parser.add_argument('--learning_rate', type=str, default="0.001", help='Initial learning rate.') + parser.add_argument('--commit', type=str, default="f097575656f927d86d99dd64931042e1a9003cb2", + help='commit id.') FLAGS, unparsed = parser.parse_known_args() model_version = FLAGS.model_version dropout = FLAGS.dropout learning_rate = FLAGS.learning_rate + commit = FLAGS.commit EXPERIMENT_NAME="mnist" RUN_ID="run" @@ -79,4 +79,5 @@ def sample_pipeline(learning_rate='0.01', run = client.run_pipeline(experiment_id, RUN_ID, __file__ + '.tar.gz', params={'learning_rate':learning_rate, 'dropout':dropout, - 'model_version':model_version}) \ No newline at end of file + 'model_version':model_version, + 'commit':commit}) \ No newline at end of file From cf06dedc488aca44a49fdabf4e5d3bc86b83502b Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Fri, 19 Apr 2019 03:51:54 -0700 Subject: [PATCH 13/43] Updated the "XGBoost Trainer'" sample (#1116) * Updated the "XGBoost Trainer'" sample Modernized the sample pipeline code. * Added a note about enabling Dataproc API --- samples/xgboost-spark/xgboost-training-cm.py | 373 ++++++++++++------- 1 file changed, 233 insertions(+), 140 deletions(-) diff --git a/samples/xgboost-spark/xgboost-training-cm.py b/samples/xgboost-spark/xgboost-training-cm.py index d24e3d1330f..636a4c62531 100755 --- a/samples/xgboost-spark/xgboost-training-cm.py +++ b/samples/xgboost-spark/xgboost-training-cm.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright 2018 Google LLC +# Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,136 +14,188 @@ # limitations under the License. -import kfp.dsl as dsl -import kfp.gcp as gcp +import kfp from kfp import components +from kfp import dsl +from kfp import gcp confusion_matrix_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/local/confusion_matrix/component.yaml') roc_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/local/roc/component.yaml') +# ! Please do not forget to enable the Dataproc API in your cluster https://console.developers.google.com/apis/api/dataproc.googleapis.com/overview + # ================================================================ # The following classes should be provided by components provider. -class CreateClusterOp(dsl.ContainerOp): - - def __init__(self, name, project, region, staging): - super(CreateClusterOp, self).__init__( - name=name, - image='gcr.io/ml-pipeline/ml-pipeline-dataproc-create-cluster:e20fad3e161e88226c83437271adb063221459b9', - arguments=[ - '--project', project, - '--region', region, - '--name', 'xgb-{{workflow.name}}', - '--staging', staging - ], - file_outputs={'output': '/output.txt'}) - - -class DeleteClusterOp(dsl.ContainerOp): - - def __init__(self, name, project, region): - super(DeleteClusterOp, self).__init__( - name=name, - image='gcr.io/ml-pipeline/ml-pipeline-dataproc-delete-cluster:e20fad3e161e88226c83437271adb063221459b9', - arguments=[ - '--project', project, - '--region', region, - '--name', 'xgb-{{workflow.name}}', - ], - is_exit_handler=True) - - -class AnalyzeOp(dsl.ContainerOp): - - def __init__(self, name, project, region, cluster_name, schema, train_data, output): - super(AnalyzeOp, self).__init__( - name=name, - image='gcr.io/ml-pipeline/ml-pipeline-dataproc-analyze:e20fad3e161e88226c83437271adb063221459b9', - arguments=[ - '--project', project, - '--region', region, - '--cluster', cluster_name, - '--schema', schema, - '--train', train_data, - '--output', output, - ], - file_outputs={'output': '/output.txt'}) - - -class TransformOp(dsl.ContainerOp): - - def __init__(self, name, project, region, cluster_name, train_data, eval_data, - target, analysis, output): - super(TransformOp, self).__init__( - name=name, - image='gcr.io/ml-pipeline/ml-pipeline-dataproc-transform:e20fad3e161e88226c83437271adb063221459b9', - arguments=[ - '--project', project, - '--region', region, - '--cluster', cluster_name, - '--train', train_data, - '--eval', eval_data, - '--analysis', analysis, - '--target', target, - '--output', output, - ], - file_outputs={'train': '/output_train.txt', 'eval': '/output_eval.txt'}) - - -class TrainerOp(dsl.ContainerOp): - - def __init__(self, name, project, region, cluster_name, train_data, eval_data, - target, analysis, workers, rounds, output, is_classification=True): +def dataproc_create_cluster_op( + project, + region, + staging, + cluster_name='xgb-{{workflow.name}}' +): + return dsl.ContainerOp( + name='Dataproc - Create cluster', + image='gcr.io/ml-pipeline/ml-pipeline-dataproc-create-cluster:e20fad3e161e88226c83437271adb063221459b9', + arguments=[ + '--project', project, + '--region', region, + '--name', cluster_name, + '--staging', staging, + ], + file_outputs={ + 'output': '/output.txt', + } + ) + + +def dataproc_delete_cluster_op( + project, + region, + cluster_name='xgb-{{workflow.name}}' +): + return dsl.ContainerOp( + name='Dataproc - Delete cluster', + image='gcr.io/ml-pipeline/ml-pipeline-dataproc-delete-cluster:e20fad3e161e88226c83437271adb063221459b9', + arguments=[ + '--project', project, + '--region', region, + '--name', cluster_name, + ], + is_exit_handler=True + ) + + +def dataproc_analyze_op( + project, + region, + cluster_name, + schema, + train_data, + output +): + return dsl.ContainerOp( + name='Dataproc - Analyze', + image='gcr.io/ml-pipeline/ml-pipeline-dataproc-analyze:e20fad3e161e88226c83437271adb063221459b9', + arguments=[ + '--project', project, + '--region', region, + '--cluster', cluster_name, + '--schema', schema, + '--train', train_data, + '--output', output, + ], + file_outputs={ + 'output': '/output.txt', + } + ) + + +def dataproc_transform_op( + project, + region, + cluster_name, + train_data, + eval_data, + target, + analysis, + output +): + return dsl.ContainerOp( + name='Dataproc - Transform', + image='gcr.io/ml-pipeline/ml-pipeline-dataproc-transform:e20fad3e161e88226c83437271adb063221459b9', + arguments=[ + '--project', project, + '--region', region, + '--cluster', cluster_name, + '--train', train_data, + '--eval', eval_data, + '--analysis', analysis, + '--target', target, + '--output', output, + ], + file_outputs={ + 'train': '/output_train.txt', + 'eval': '/output_eval.txt', + } + ) + + +def dataproc_train_op( + project, + region, + cluster_name, + train_data, + eval_data, + target, + analysis, + workers, + rounds, + output, + is_classification=True +): if is_classification: config='gs://ml-pipeline-playground/trainconfcla.json' else: config='gs://ml-pipeline-playground/trainconfreg.json' - super(TrainerOp, self).__init__( - name=name, - image='gcr.io/ml-pipeline/ml-pipeline-dataproc-train:e20fad3e161e88226c83437271adb063221459b9', - arguments=[ - '--project', project, - '--region', region, - '--cluster', cluster_name, - '--train', train_data, - '--eval', eval_data, - '--analysis', analysis, - '--target', target, - '--package', 'gs://ml-pipeline-playground/xgboost4j-example-0.8-SNAPSHOT-jar-with-dependencies.jar', - '--workers', workers, - '--rounds', rounds, - '--conf', config, - '--output', output, - ], - file_outputs={'output': '/output.txt'}) - - -class PredictOp(dsl.ContainerOp): - - def __init__(self, name, project, region, cluster_name, data, model, target, analysis, output): - super(PredictOp, self).__init__( - name=name, - image='gcr.io/ml-pipeline/ml-pipeline-dataproc-predict:e20fad3e161e88226c83437271adb063221459b9', - arguments=[ - '--project', project, - '--region', region, - '--cluster', cluster_name, - '--predict', data, - '--analysis', analysis, - '--target', target, - '--package', 'gs://ml-pipeline-playground/xgboost4j-example-0.8-SNAPSHOT-jar-with-dependencies.jar', - '--model', model, - '--output', output, - ], - file_outputs={'output': '/output.txt'}) + return dsl.ContainerOp( + name='Dataproc - Train XGBoost model', + image='gcr.io/ml-pipeline/ml-pipeline-dataproc-train:e20fad3e161e88226c83437271adb063221459b9', + arguments=[ + '--project', project, + '--region', region, + '--cluster', cluster_name, + '--train', train_data, + '--eval', eval_data, + '--analysis', analysis, + '--target', target, + '--package', 'gs://ml-pipeline-playground/xgboost4j-example-0.8-SNAPSHOT-jar-with-dependencies.jar', + '--workers', workers, + '--rounds', rounds, + '--conf', config, + '--output', output, + ], + file_outputs={ + 'output': '/output.txt', + } + ) + + +def dataproc_predict_op( + project, + region, + cluster_name, + data, + model, + target, + analysis, + output +): + return dsl.ContainerOp( + name='Dataproc - Predict with XGBoost model', + image='gcr.io/ml-pipeline/ml-pipeline-dataproc-predict:e20fad3e161e88226c83437271adb063221459b9', + arguments=[ + '--project', project, + '--region', region, + '--cluster', cluster_name, + '--predict', data, + '--analysis', analysis, + '--target', target, + '--package', 'gs://ml-pipeline-playground/xgboost4j-example-0.8-SNAPSHOT-jar-with-dependencies.jar', + '--model', model, + '--output', output, + ], + file_outputs={ + 'output': '/output.txt', + } + ) # ======================================================================= @dsl.pipeline( - name='XGBoost Trainer', - description='A trainer that does end-to-end distributed training for XGBoost models.' + name='XGBoost Trainer', + description='A trainer that does end-to-end distributed training for XGBoost models.' ) def xgb_train_pipeline( output, @@ -157,34 +209,75 @@ def xgb_train_pipeline( workers=2, true_label='ACTION', ): - delete_cluster_op = DeleteClusterOp('delete-cluster', project, region).apply(gcp.use_gcp_secret('user-gcp-sa')) - with dsl.ExitHandler(exit_op=delete_cluster_op): - create_cluster_op = CreateClusterOp('create-cluster', project, region, output).apply(gcp.use_gcp_secret('user-gcp-sa')) - - analyze_op = AnalyzeOp('analyze', project, region, create_cluster_op.output, schema, - train_data, '%s/{{workflow.name}}/analysis' % output).apply(gcp.use_gcp_secret('user-gcp-sa')) + output_template = str(output) + '/{{workflow.uid}}/{{pod.name}}/data' - transform_op = TransformOp('transform', project, region, create_cluster_op.output, - train_data, eval_data, target, analyze_op.output, - '%s/{{workflow.name}}/transform' % output).apply(gcp.use_gcp_secret('user-gcp-sa')) - - train_op = TrainerOp('train', project, region, create_cluster_op.output, transform_op.outputs['train'], - transform_op.outputs['eval'], target, analyze_op.output, workers, - rounds, '%s/{{workflow.name}}/model' % output).apply(gcp.use_gcp_secret('user-gcp-sa')) - - predict_op = PredictOp('predict', project, region, create_cluster_op.output, transform_op.outputs['eval'], - train_op.output, target, analyze_op.output, '%s/{{workflow.name}}/predict' % output).apply(gcp.use_gcp_secret('user-gcp-sa')) - - confusion_matrix_task = confusion_matrix_op(predict_op.output, - '%s/{{workflow.name}}/confusionmatrix' % output).apply(gcp.use_gcp_secret('user-gcp-sa')) - - roc_task = roc_op( - predictions_dir=predict_op.output, - true_class=true_label, - true_score_column=true_label, - output_dir='%s/{{workflow.name}}/roc' % output + delete_cluster_op = dataproc_delete_cluster_op( + project, + region ).apply(gcp.use_gcp_secret('user-gcp-sa')) + with dsl.ExitHandler(exit_op=delete_cluster_op): + create_cluster_op = dataproc_create_cluster_op( + project, + region, + output + ).apply(gcp.use_gcp_secret('user-gcp-sa')) + + analyze_op = dataproc_analyze_op( + project, + region, + create_cluster_op.output, + schema, + train_data, + output_template + ).apply(gcp.use_gcp_secret('user-gcp-sa')) + + transform_op = dataproc_transform_op( + project, + region, + create_cluster_op.output, + train_data, + eval_data, + target, + analyze_op.output, + output_template + ).apply(gcp.use_gcp_secret('user-gcp-sa')) + + train_op = dataproc_train_op( + project, + region, + create_cluster_op.output, + transform_op.outputs['train'], + transform_op.outputs['eval'], + target, + analyze_op.output, + workers, + rounds, + output_template + ).apply(gcp.use_gcp_secret('user-gcp-sa')) + + predict_op = dataproc_predict_op( + project, + region, + create_cluster_op.output, + transform_op.outputs['eval'], + train_op.output, + target, + analyze_op.output, + output_template + ).apply(gcp.use_gcp_secret('user-gcp-sa')) + + confusion_matrix_task = confusion_matrix_op( + predict_op.output, + output_template + ).apply(gcp.use_gcp_secret('user-gcp-sa')) + + roc_task = roc_op( + predictions_dir=predict_op.output, + true_class=true_label, + true_score_column=true_label, + output_dir=output_template + ).apply(gcp.use_gcp_secret('user-gcp-sa')) + if __name__ == '__main__': - import kfp.compiler as compiler - compiler.Compiler().compile(xgb_train_pipeline, __file__ + '.zip') + kfp.compiler.Compiler().compile(xgb_train_pipeline, __file__ + '.zip') From fbb9824ea11527616287ba11de14b333de825208 Mon Sep 17 00:00:00 2001 From: Ajay Gopinathan Date: Fri, 19 Apr 2019 11:01:53 -0700 Subject: [PATCH 14/43] Ensure API server does not crash if ml-metadata serialized format does (#1192) not match expected format. Previously we assume the fields 'artifact_type' and 'artifact' always exist. This change ensures we guard against the case when one or both of these required fields aren't present. --- .../src/apiserver/metadata/metadata_store.go | 8 ++++ .../apiserver/metadata/metadata_store_test.go | 39 +++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/backend/src/apiserver/metadata/metadata_store.go b/backend/src/apiserver/metadata/metadata_store.go index 39d8b39410b..086bfa4634f 100644 --- a/backend/src/apiserver/metadata/metadata_store.go +++ b/backend/src/apiserver/metadata/metadata_store.go @@ -101,6 +101,14 @@ func (a *artifactStruct) UnmarshalJSON(b []byte) error { return errorF(err) } + if _, ok := jsonMap["artifact_type"]; !ok { + return util.NewInvalidInputError("JSON Unmarshal failure: missing 'artifact_type' field") + } + + if _, ok := jsonMap["artifact"]; !ok { + return util.NewInvalidInputError("JSON Unmarshal failure: missing 'artifact_type' field") + } + a.ArtifactType = &mlpb.ArtifactType{} a.Artifact = &mlpb.Artifact{} diff --git a/backend/src/apiserver/metadata/metadata_store_test.go b/backend/src/apiserver/metadata/metadata_store_test.go index 93791e8b600..5ca9691be1a 100644 --- a/backend/src/apiserver/metadata/metadata_store_test.go +++ b/backend/src/apiserver/metadata/metadata_store_test.go @@ -124,6 +124,45 @@ func TestParseValidTFXMetadata(t *testing.T) { } } +func TestParseInvalidTFXMetadata(t *testing.T) { + tests := []struct { + desc string + input string + }{ + { + "no artifact type", + `[{ + "artifact": { + "uri": "/location", + "properties": { + "state": {"stringValue": "complete"}, + "span": {"intValue": 10} } + } + }]`, + }, + { + "no artifact", + `[{ + "artifact_type": { + "name": "Artifact", + "properties": {"state": "STRING", "span": "INT" } }, + }]`, + }, + { + "empty string", + "", + }, + } + + for _, test := range tests { + _, err := parseTFXMetadata(test.input) + if err == nil { + t.Errorf("Test: %q", test.desc) + t.Errorf("parseTFXMetadata(%q)\nGot non-nil error. Want error.", test.input) + } + } +} + func fakeMLMDStore(t *testing.T) *mlmetadata.Store { cfg := &mlpb.ConnectionConfig{ Config: &mlpb.ConnectionConfig_FakeDatabase{&mlpb.FakeDatabaseConfig{}}, From 00c39dafea82b2f52824db5259b7a283fa55ca7c Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Fri, 19 Apr 2019 12:57:54 -0700 Subject: [PATCH 15/43] Updated the "Kubeflow training and classification" sample (#1114) Modernized the sample pipeline code. --- .../kubeflow-training-classification.py | 115 +++++++++--------- 1 file changed, 58 insertions(+), 57 deletions(-) diff --git a/samples/kubeflow-tf/kubeflow-training-classification.py b/samples/kubeflow-tf/kubeflow-training-classification.py index 522443cb3cb..dea5957bd8e 100755 --- a/samples/kubeflow-tf/kubeflow-training-classification.py +++ b/samples/kubeflow-tf/kubeflow-training-classification.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright 2018 Google LLC +# Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,10 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import kfp.dsl as dsl -import kfp.gcp as gcp +import kfp from kfp import components +from kfp import dsl +from kfp import gcp dataflow_tf_transform_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/dataflow/tft/component.yaml') kubeflow_tf_training_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/kubeflow/dnntrainer/component.yaml') @@ -24,67 +25,67 @@ confusion_matrix_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/local/confusion_matrix/component.yaml') @dsl.pipeline( - name='Pipeline TFJob', - description='Demonstrate the DSL for TFJob' + name='TF training and prediction pipeline', + description='' ) def kubeflow_training(output, project, - evaluation='gs://ml-pipeline-playground/flower/eval100.csv', - train='gs://ml-pipeline-playground/flower/train200.csv', - schema='gs://ml-pipeline-playground/flower/schema.json', - learning_rate=0.1, - hidden_layer_size='100,50', - steps=2000, - target='label', - workers=0, - pss=0, - preprocess_mode='local', - predict_mode='local'): - # TODO: use the argo job name as the workflow - workflow = '{{workflow.name}}' - # set the flag to use GPU trainer - use_gpu = False + evaluation='gs://ml-pipeline-playground/flower/eval100.csv', + train='gs://ml-pipeline-playground/flower/train200.csv', + schema='gs://ml-pipeline-playground/flower/schema.json', + learning_rate=0.1, + hidden_layer_size='100,50', + steps=2000, + target='label', + workers=0, + pss=0, + preprocess_mode='local', + predict_mode='local', +): + output_template = str(output) + '/{{workflow.uid}}/{{pod.name}}/data' - preprocess = dataflow_tf_transform_op( - training_data_file_pattern=train, - evaluation_data_file_pattern=evaluation, - schema=schema, - gcp_project=project, - run_mode=preprocess_mode, - preprocessing_module='', - transformed_data_dir='%s/%s/transformed' % (output, workflow) - ).apply(gcp.use_gcp_secret('user-gcp-sa')) + # set the flag to use GPU trainer + use_gpu = False - training = kubeflow_tf_training_op( - transformed_data_dir=preprocess.output, - schema=schema, - learning_rate=learning_rate, - hidden_layer_size=hidden_layer_size, - steps=steps, - target=target, - preprocessing_module='', - training_output_dir='%s/%s/train' % (output, workflow) - ).apply(gcp.use_gcp_secret('user-gcp-sa')) + preprocess = dataflow_tf_transform_op( + training_data_file_pattern=train, + evaluation_data_file_pattern=evaluation, + schema=schema, + gcp_project=project, + run_mode=preprocess_mode, + preprocessing_module='', + transformed_data_dir=output_template + ).apply(gcp.use_gcp_secret('user-gcp-sa')) - if use_gpu: - training.image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer-gpu:e20fad3e161e88226c83437271adb063221459b9', - training.set_gpu_limit(1) + training = kubeflow_tf_training_op( + transformed_data_dir=preprocess.output, + schema=schema, + learning_rate=learning_rate, + hidden_layer_size=hidden_layer_size, + steps=steps, + target=target, + preprocessing_module='', + training_output_dir=output_template + ).apply(gcp.use_gcp_secret('user-gcp-sa')) - prediction = dataflow_tf_predict_op( - data_file_pattern=evaluation, - schema=schema, - target_column=target, - model=training.output, - run_mode=predict_mode, - gcp_project=project, - predictions_dir='%s/%s/predict' % (output, workflow) - ).apply(gcp.use_gcp_secret('user-gcp-sa')) + if use_gpu: + training.image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer-gpu:e20fad3e161e88226c83437271adb063221459b9', + training.set_gpu_limit(1) - confusion_matrix = confusion_matrix_op( - predictions=prediction.output, - output_dir='%s/%s/confusionmatrix' % (output, workflow) - ).apply(gcp.use_gcp_secret('user-gcp-sa')) + prediction = dataflow_tf_predict_op( + data_file_pattern=evaluation, + schema=schema, + target_column=target, + model=training.output, + run_mode=predict_mode, + gcp_project=project, + predictions_dir=output_template + ).apply(gcp.use_gcp_secret('user-gcp-sa')) + + confusion_matrix = confusion_matrix_op( + predictions=prediction.output, + output_dir=output_template + ).apply(gcp.use_gcp_secret('user-gcp-sa')) if __name__ == '__main__': - import kfp.compiler as compiler - compiler.Compiler().compile(kubeflow_training, __file__ + '.zip') + kfp.compiler.Compiler().compile(kubeflow_training, __file__ + '.zip') From 9fc1212d33c8aad0661486bfca0f8af2247f6a83 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Fri, 19 Apr 2019 22:17:40 -0700 Subject: [PATCH 16/43] Updated vulnerable package (#1193) See https://nvd.nist.gov/vuln/detail/CVE-2019-10906 --- .../openvino/ovms-deployer/containers/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/components/openvino/ovms-deployer/containers/requirements.txt b/contrib/components/openvino/ovms-deployer/containers/requirements.txt index 0501d65fe0f..0122d02ef4e 100644 --- a/contrib/components/openvino/ovms-deployer/containers/requirements.txt +++ b/contrib/components/openvino/ovms-deployer/containers/requirements.txt @@ -1,3 +1,3 @@ -jinja2==2.10 +jinja2==2.10.1 futures==3.1.1 -tensorflow-serving-api==1.13.0 \ No newline at end of file +tensorflow-serving-api==1.13.0 From 72b091486842d8ed8d39be7d2a75e653e85fe3fd Mon Sep 17 00:00:00 2001 From: Riley Bauer <34456002+rileyjbauer@users.noreply.github.com> Date: Mon, 22 Apr 2019 10:45:30 -0700 Subject: [PATCH 17/43] Fixes deletion of recurring runs (#1185) * Fixes deletion of recurring runs and redirects after deleting pipeline from pipeline details page * Add comment for deletion on recurring run and pipeline details pages * Remove refreshOnComplete flag from buttons --- frontend/src/lib/Buttons.ts | 12 +++++++----- frontend/src/pages/PipelineDetails.tsx | 17 +++++++++++++++-- frontend/src/pages/PipelineList.tsx | 2 +- frontend/src/pages/RecurringRunDetails.tsx | 2 +- 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/frontend/src/lib/Buttons.ts b/frontend/src/lib/Buttons.ts index 6fe4aabff52..9e2d256191b 100644 --- a/frontend/src/lib/Buttons.ts +++ b/frontend/src/lib/Buttons.ts @@ -83,7 +83,7 @@ export default class Buttons { callback: (selectedIds: string[], success: boolean) => void, useCurrentResource: boolean): ToolbarActionConfig { return { action: () => resourceName === 'pipeline' ? - this._deletePipeline(getSelectedIds(), callback, useCurrentResource) : + this._deletePipeline(getSelectedIds(), useCurrentResource, callback) : this._deleteRecurringRun(getSelectedIds()[0], useCurrentResource, callback), disabled: !useCurrentResource, disabledTitle: useCurrentResource ? undefined : `Select at least one ${resourceName} to delete`, @@ -252,8 +252,8 @@ export default class Buttons { ); } - private _deletePipeline(selectedIds: string[], callback: (selectedIds: string[], success: boolean) => void, - useCurrentResource: boolean): void { + private _deletePipeline(selectedIds: string[], useCurrentResource: boolean, + callback: (selectedIds: string[], success: boolean) => void): void { this._dialogActionHandler( selectedIds, 'Do you want to delete this Pipeline? This action cannot be undone.', @@ -271,7 +271,7 @@ export default class Buttons { [id], 'Do you want to delete this recurring run config? This action cannot be undone.', useCurrentResource, - Apis.jobServiceApi.deleteJob, + jobId => Apis.jobServiceApi.deleteJob(jobId), callback, 'Delete', 'recurring run config', @@ -335,7 +335,9 @@ export default class Buttons { message: `${actionName} succeeded for ${useCurrentResource ? 'this' : successfulOps} ${resourceName}${useCurrentResource ? '' : s(successfulOps)}`, open: true, }); - this._refresh(); + if (!useCurrentResource) { + this._refresh(); + } } if (unsuccessfulIds.length > 0) { diff --git a/frontend/src/pages/PipelineDetails.tsx b/frontend/src/pages/PipelineDetails.tsx index 746781e8dd6..b4dfffef0ba 100644 --- a/frontend/src/pages/PipelineDetails.tsx +++ b/frontend/src/pages/PipelineDetails.tsx @@ -131,8 +131,12 @@ class PipelineDetails extends Page<{}, PipelineDetailsState> { // Add buttons for creating experiment and deleting pipeline actions = actions.concat([ buttons.newExperiment(() => this.state.pipeline ? this.state.pipeline.id! : ''), - buttons.delete(() => this.state.pipeline ? [this.state.pipeline.id!] : [], - 'pipeline', () => null, true), + buttons.delete( + () => this.state.pipeline ? [this.state.pipeline.id!] : [], + 'pipeline', + this._deleteCallback.bind(this), + true, /* useCurrentResource */ + ), ]); return { actions, @@ -345,6 +349,15 @@ class PipelineDetails extends Page<{}, PipelineDetailsState> { templateString, }); } + + private _deleteCallback(_: string[], success: boolean): void { + if (success) { + const breadcrumbs = this.props.toolbarProps.breadcrumbs; + const previousPage = breadcrumbs.length ? + breadcrumbs[breadcrumbs.length - 1].href : RoutePage.PIPELINES; + this.props.history.push(previousPage); + } + } } export default PipelineDetails; diff --git a/frontend/src/pages/PipelineList.tsx b/frontend/src/pages/PipelineList.tsx index 132be35363e..2463d8a7eb5 100644 --- a/frontend/src/pages/PipelineList.tsx +++ b/frontend/src/pages/PipelineList.tsx @@ -58,7 +58,7 @@ class PipelineList extends Page<{}, PipelineListState> { () => this.state.selectedIds, 'pipeline', ids => this._selectionChanged(ids), - false, + false, /* useCurrentResource */ ), ], breadcrumbs: [], diff --git a/frontend/src/pages/RecurringRunDetails.tsx b/frontend/src/pages/RecurringRunDetails.tsx index b3de4c58cce..184e6e58c7b 100644 --- a/frontend/src/pages/RecurringRunDetails.tsx +++ b/frontend/src/pages/RecurringRunDetails.tsx @@ -54,7 +54,7 @@ class RecurringRunDetails extends Page<{}, RecurringRunConfigState> { () => this.state.run ? [this.state.run!.id!] : [], 'recurring run config', this._deleteCallback.bind(this), - true, + true, /* useCurrentResource */ ), ], breadcrumbs: [], From b29266351ed3c639356c4fa578950a71baa54662 Mon Sep 17 00:00:00 2001 From: Riley Bauer <34456002+rileyjbauer@users.noreply.github.com> Date: Mon, 22 Apr 2019 11:59:45 -0700 Subject: [PATCH 18/43] Allow creating runs without experiments (#1175) * Adds 'Create run' button to experiment list / all runs page * Add run without experiment and filtering to FE integration test * Update snapshots * Add refresh and wait to integration test * Adjust * Adjust * Don't exit integration test early if npm test fails * PR comments * TEMP - take screenshots to debug integration test * Store screenshots * Remove create run without experiment integration test for now as it fails due to the default experiment being deleted at the end of the API initialization and integration test suites --- frontend/src/components/CustomTable.tsx | 2 +- .../__snapshots__/CustomTable.test.tsx.snap | 19 +++++++ frontend/src/lib/Buttons.ts | 2 +- frontend/src/pages/AllRunsList.tsx | 9 ++-- frontend/src/pages/ExperimentList.test.tsx | 4 +- frontend/src/pages/ExperimentList.tsx | 7 +-- frontend/src/pages/PipelineDetails.test.tsx | 4 +- .../__snapshots__/AllRunsList.test.tsx.snap | 11 +++- .../pages/__snapshots__/RunList.test.tsx.snap | 15 ++++++ .../helloworld.spec.js | 52 +++++++++++++++++++ test/frontend-integration-test/run_test.sh | 3 ++ 11 files changed, 115 insertions(+), 13 deletions(-) diff --git a/frontend/src/components/CustomTable.tsx b/frontend/src/components/CustomTable.tsx index cd7633f7b0d..35b2ceb2d5e 100644 --- a/frontend/src/components/CustomTable.tsx +++ b/frontend/src/components/CustomTable.tsx @@ -290,7 +290,7 @@ export default class CustomTable extends React.Component - { const buttons = new Buttons(this.props, this.refresh.bind(this)); return { actions: [ + buttons.newRun(), buttons.newExperiment(), buttons.compareRuns(() => this.state.selectedIds), buttons.cloneRun(() => this.state.selectedIds, false), @@ -76,12 +77,14 @@ class AllRunsList extends Page<{}, AllRunsListState> { private _selectionChanged(selectedIds: string[]): void { const toolbarActions = [...this.props.toolbarProps.actions]; + // TODO: keeping track of indices in the toolbarActions array is not ideal. This should be + // refactored so that individual buttons can be referenced with something other than indices. // Compare runs button - toolbarActions[1].disabled = selectedIds.length <= 1 || selectedIds.length > 10; + toolbarActions[2].disabled = selectedIds.length <= 1 || selectedIds.length > 10; // Clone run button - toolbarActions[2].disabled = selectedIds.length !== 1; + toolbarActions[3].disabled = selectedIds.length !== 1; // Archive run button - toolbarActions[3].disabled = !selectedIds.length; + toolbarActions[4].disabled = !selectedIds.length; this.props.updateToolbar({ breadcrumbs: this.props.toolbarProps.breadcrumbs, actions: toolbarActions }); this.setState({ selectedIds }); } diff --git a/frontend/src/pages/ExperimentList.test.tsx b/frontend/src/pages/ExperimentList.test.tsx index 0a9c71fbf32..41cd218e1fe 100644 --- a/frontend/src/pages/ExperimentList.test.tsx +++ b/frontend/src/pages/ExperimentList.test.tsx @@ -239,7 +239,7 @@ describe('ExperimentList', () => { it('navigates to new experiment page when Create experiment button is clicked', async () => { tree = TestUtils.mountWithRouter(); const createBtn = (tree.instance() as ExperimentList) - .getInitialToolbarState().actions.find(b => b.title === 'Create an experiment'); + .getInitialToolbarState().actions.find(b => b.title === 'Create experiment'); await createBtn!.action(); expect(historyPushSpy).toHaveBeenLastCalledWith(RoutePage.NEW_EXPERIMENT); }); @@ -247,7 +247,7 @@ describe('ExperimentList', () => { it('always has new experiment button enabled', async () => { await mountWithNExperiments(1, 1); const calls = updateToolbarSpy.mock.calls[0]; - expect(calls[0].actions.find((b: any) => b.title === 'Create an experiment')).not.toHaveProperty('disabled'); + expect(calls[0].actions.find((b: any) => b.title === 'Create experiment')).not.toHaveProperty('disabled'); }); it('enables clone button when one run is selected', async () => { diff --git a/frontend/src/pages/ExperimentList.tsx b/frontend/src/pages/ExperimentList.tsx index 4b830820868..01dd2249f94 100644 --- a/frontend/src/pages/ExperimentList.tsx +++ b/frontend/src/pages/ExperimentList.tsx @@ -61,6 +61,7 @@ class ExperimentList extends Page<{}, ExperimentListState> { const buttons = new Buttons(this.props, this.refresh.bind(this)); return { actions: [ + buttons.newRun(), buttons.newExperiment(), buttons.compareRuns(() => this.state.selectedIds), buttons.cloneRun(() => this.state.selectedIds, false), @@ -187,11 +188,11 @@ class ExperimentList extends Page<{}, ExperimentListState> { private _selectionChanged(selectedIds: string[]): void { const actions = produce(this.props.toolbarProps.actions, draft => { // Enable/Disable Run compare button - draft[1].disabled = selectedIds.length <= 1 || selectedIds.length > 10; + draft[2].disabled = selectedIds.length <= 1 || selectedIds.length > 10; // Enable/Disable Clone button - draft[2].disabled = selectedIds.length !== 1; + draft[3].disabled = selectedIds.length !== 1; // Archive run button - draft[3].disabled = !selectedIds.length; + draft[4].disabled = !selectedIds.length; }); this.props.updateToolbar({ actions }); this.setState({ selectedIds }); diff --git a/frontend/src/pages/PipelineDetails.test.tsx b/frontend/src/pages/PipelineDetails.test.tsx index 64c3970c78f..9652264c2be 100644 --- a/frontend/src/pages/PipelineDetails.test.tsx +++ b/frontend/src/pages/PipelineDetails.test.tsx @@ -325,7 +325,7 @@ describe('PipelineDetails', () => { await TestUtils.flushPromises(); const instance = tree.instance() as PipelineDetails; const newExperimentBtn = instance.getInitialToolbarState().actions.find( - b => b.title === 'Create an experiment'); + b => b.title === 'Create experiment'); expect(newExperimentBtn).toBeDefined(); }); @@ -379,7 +379,7 @@ describe('PipelineDetails', () => { await TestUtils.flushPromises(); const instance = tree.instance() as PipelineDetails; const newExperimentBtn = instance.getInitialToolbarState().actions.find( - b => b.title === 'Create an experiment'); + b => b.title === 'Create experiment'); await newExperimentBtn!.action(); expect(historyPushSpy).toHaveBeenCalledTimes(1); expect(historyPushSpy).toHaveBeenLastCalledWith( diff --git a/frontend/src/pages/__snapshots__/AllRunsList.test.tsx.snap b/frontend/src/pages/__snapshots__/AllRunsList.test.tsx.snap index 1e300552bac..8ec3c9211a3 100644 --- a/frontend/src/pages/__snapshots__/AllRunsList.test.tsx.snap +++ b/frontend/src/pages/__snapshots__/AllRunsList.test.tsx.snap @@ -19,12 +19,21 @@ exports[`AllRunsList renders all runs 1`] = ` toolbarProps={ Object { "actions": Array [ + Object { + "action": [Function], + "icon": [Function], + "id": "createNewRunBtn", + "outlined": true, + "primary": true, + "title": "Create run", + "tooltip": "Create a new run", + }, Object { "action": [Function], "icon": [Function], "id": "newExperimentBtn", "outlined": true, - "title": "Create an experiment", + "title": "Create experiment", "tooltip": "Create a new experiment", }, Object { diff --git a/frontend/src/pages/__snapshots__/RunList.test.tsx.snap b/frontend/src/pages/__snapshots__/RunList.test.tsx.snap index 4dd44a80aee..ee96341c88e 100644 --- a/frontend/src/pages/__snapshots__/RunList.test.tsx.snap +++ b/frontend/src/pages/__snapshots__/RunList.test.tsx.snap @@ -708,6 +708,7 @@ exports[`RunList reloads the run when refresh is called 1`] = ` } className="filterBox" height={48} + id="tableFilterBox" label="Filter runs" maxWidth="100%" onChange={[Function]} @@ -743,6 +744,7 @@ exports[`RunList reloads the run when refresh is called 1`] = ` } } className="filterBox" + id="tableFilterBox" label="Filter runs" onChange={[Function]} required={false} @@ -814,6 +816,7 @@ exports[`RunList reloads the run when refresh is called 1`] = ` "root": "noMargin", } } + htmlFor="tableFilterBox" > Filter runs @@ -944,6 +953,7 @@ exports[`RunList reloads the run when refresh is called 1`] = ` "root": "noLeftPadding", } } + id="tableFilterBox" labelWidth={0} onChange={[Function]} startAdornment={ @@ -980,6 +990,7 @@ exports[`RunList reloads the run when refresh is called 1`] = ` "root": "MuiOutlinedInput-root-23 noLeftPadding", } } + id="tableFilterBox" labelWidth={0} onChange={[Function]} startAdornment={ @@ -1017,6 +1028,7 @@ exports[`RunList reloads the run when refresh is called 1`] = ` } } fullWidth={false} + id="tableFilterBox" inputComponent="input" multiline={false} onChange={[Function]} @@ -1061,6 +1073,7 @@ exports[`RunList reloads the run when refresh is called 1`] = ` } } fullWidth={false} + id="tableFilterBox" inputComponent="input" multiline={false} onChange={[Function]} @@ -1105,6 +1118,7 @@ exports[`RunList reloads the run when refresh is called 1`] = ` } } fullWidth={false} + id="tableFilterBox" inputComponent="input" muiFormControl={ Object { @@ -1652,6 +1666,7 @@ exports[`RunList reloads the run when refresh is called 1`] = ` aria-invalid={false} className="MuiInputBase-input-46 MuiOutlinedInput-input-31 MuiInputBase-inputAdornedStart-51 MuiOutlinedInput-inputAdornedStart-34" disabled={false} + id="tableFilterBox" onBlur={[Function]} onChange={[Function]} onFocus={[Function]} diff --git a/test/frontend-integration-test/helloworld.spec.js b/test/frontend-integration-test/helloworld.spec.js index 391b1323550..4e57d57a1f8 100644 --- a/test/frontend-integration-test/helloworld.spec.js +++ b/test/frontend-integration-test/helloworld.spec.js @@ -17,6 +17,8 @@ const URL = require('url').URL; const experimentName = 'helloworld-experiment-' + Date.now(); const experimentDescription = 'hello world experiment description'; +const secondExperimentName = 'different-experiment-name-' + Date.now(); +const secondExperimentNameDescription = 'second experiment description'; const pipelineName = 'helloworld-pipeline-' + Date.now(); const runName = 'helloworld-' + Date.now(); const runDescription = 'test run description ' + runName; @@ -182,6 +184,56 @@ describe('deploy helloworld sample run', () => { return logs.indexOf(outputParameterValue + ' from node: ') > -1; }, waitTimeout); }); + + it('navigates back to the experiment list', () => { + $('button=Experiments').click(); + browser.waitUntil(() => { + return new URL(browser.getUrl()).hash.startsWith('#/experiments'); + }, waitTimeout); + }); + + it('creates a new experiment', () => { + $('#newExperimentBtn').click(); + browser.waitUntil(() => { + return new URL(browser.getUrl()).hash.startsWith('#/experiments/new'); + }, waitTimeout); + + $('#experimentName').setValue(secondExperimentName); + $('#experimentDescription').setValue(secondExperimentNameDescription); + + $('#createExperimentBtn').click(); + }); + + it('navigates back to the experiment list', () => { + $('button=Experiments').click(); + browser.waitUntil(() => { + return new URL(browser.getUrl()).hash.startsWith('#/experiments'); + }, waitTimeout); + }); + + it('displays both experiments in the list', () => { + $('.tableRow').waitForVisible(); + const rows = $$('.tableRow').length; + assert(rows === 2, 'there should now be two experiments in the table, instead there are: ' + rows); + }); + + it('filters the experiment list', () => { + // Enter "hello" into filter bar + browser.click('#tableFilterBox'); + browser.keys(experimentName.substring(0, 5)); + // Wait for the list to refresh + browser.pause(2000); + + $('.tableRow').waitForVisible(); + const rows = $$('.tableRow').length; + assert(rows === 1, 'there should now be one experiment in the table, instead there are: ' + rows); + }); + + // TODO: Add test for creating a run without an experiment. This will require changing the API + // initialization and integration tests to stop deleting the default experiment at the end of the + // suites. Otherwise, run creation here will fail with: + // 'Failed to store resource references to table for run [ID] : ResourceNotFoundError: [Default Experiment ID]' + //TODO: enable this after we change the pipeline to a unique name such that deleting this // pipeline will not jeopardize the concurrent basic e2e tests. // it('deletes the uploaded pipeline', () => { diff --git a/test/frontend-integration-test/run_test.sh b/test/frontend-integration-test/run_test.sh index 7754adfa500..bda8002496e 100755 --- a/test/frontend-integration-test/run_test.sh +++ b/test/frontend-integration-test/run_test.sh @@ -74,8 +74,11 @@ POD=`/src/tools/google-cloud-sdk/bin/kubectl get pods -n ${NAMESPACE} -l app=ml- ./node_modules/.bin/wait-port 127.0.0.1:3000 -t 20000 export PIPELINE_OUTPUT=${RESULTS_GCS_DIR}/pipeline_output +# Don't exit early if 'npm test' fails +set +e npm test TEST_EXIT_CODE=$? +set -e JUNIT_TEST_RESULT=junit_FrontendIntegrationTestOutput.xml From 99c40a22cff0df7103cd5125952e9d1b659cc780 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Mon, 22 Apr 2019 17:52:06 -0700 Subject: [PATCH 19/43] Fix package version conflict (#1201) * Fix package version conflict * Fixing component_sdk requirements.txt --- component_sdk/python/requirements.txt | 1 + sdk/python/setup.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/component_sdk/python/requirements.txt b/component_sdk/python/requirements.txt index b9f65adba24..2dfb6df00ab 100644 --- a/component_sdk/python/requirements.txt +++ b/component_sdk/python/requirements.txt @@ -1,4 +1,5 @@ kubernetes == 8.0.1 +urllib3>=1.15,<1.25 #Fixing the version conflict with the "requests" package fire == 0.1.3 google-api-python-client == 1.7.8 google-cloud-storage == 1.14.0 diff --git a/sdk/python/setup.py b/sdk/python/setup.py index b3760142cd1..cff12e11a8d 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -18,7 +18,7 @@ VERSION = '0.1.16' REQUIRES = [ - 'urllib3>=1.15', + 'urllib3>=1.15,<1.25', #Fixing the version conflict with the "requests" package 'six >= 1.10', 'certifi', 'python-dateutil', From fe042540e845009c22377c037d89513504129802 Mon Sep 17 00:00:00 2001 From: Ning Date: Tue, 23 Apr 2019 12:44:06 -0700 Subject: [PATCH 20/43] update types to defined core type names (#1206) --- components/dataflow/predict/component.yaml | 12 +- components/dataflow/tfdv/component.yaml | 12 +- components/dataflow/tfma/component.yaml | 12 +- components/dataflow/tft/component.yaml | 14 +- .../gcp/dataflow/launch_python/README.md | 250 +++++++++--------- .../gcp/dataflow/launch_python/sample.ipynb | 2 +- .../gcp/dataproc/submit_hive_job/README.md | 2 +- .../gcp/dataproc/submit_hive_job/sample.ipynb | 2 +- components/kubeflow/deployer/component.yaml | 2 +- components/kubeflow/dnntrainer/component.yaml | 10 +- .../kubeflow/katib-launcher/component.yaml | 2 +- .../local/confusion_matrix/component.yaml | 4 +- components/local/roc/component.yaml | 4 +- 13 files changed, 164 insertions(+), 164 deletions(-) diff --git a/components/dataflow/predict/component.yaml b/components/dataflow/predict/component.yaml index ddcaabe5e42..50bd9770738 100644 --- a/components/dataflow/predict/component.yaml +++ b/components/dataflow/predict/component.yaml @@ -3,16 +3,16 @@ description: | Runs TensorFlow prediction on Google Cloud Dataflow Input and output data is in GCS inputs: - - {name: Data file pattern, type: GCPPath, description: 'GCS or local path of test file patterns.'} # type: {GCSPath: {data_type: CSV}} - - {name: Schema, type: GCPPath, description: 'GCS json schema file path.'} # type: {GCSPath: {data_type: TFDV schema JSON}} + - {name: Data file pattern, type: GCSPath, description: 'GCS or local path of test file patterns.'} # type: {GCSPath: {data_type: CSV}} + - {name: Schema, type: GCSPath, description: 'GCS json schema file path.'} # type: {GCSPath: {data_type: TFDV schema JSON}} - {name: Target column, type: String, description: 'Name of the column for prediction target.'} - - {name: Model, type: GCPPath, description: 'GCS or local path of model trained with tft preprocessed data.'} # Models trained with estimator are exported to base/export/export/123456781 directory. # Our trainer export only one model. #TODO: Output single model from trainer # type: {GCSPath: {path_type: Directory, data_type: Exported TensorFlow models dir}} + - {name: Model, type: GCSPath, description: 'GCS or local path of model trained with tft preprocessed data.'} # Models trained with estimator are exported to base/export/export/123456781 directory. # Our trainer export only one model. #TODO: Output single model from trainer # type: {GCSPath: {path_type: Directory, data_type: Exported TensorFlow models dir}} - {name: Batch size, type: Integer, default: '32', description: 'Batch size used in prediction.'} - {name: Run mode, type: String, default: local, description: 'Whether to run the job locally or in Cloud Dataflow. Valid values are "local" and "cloud".'} - - {name: GCP project, type: GcpProject, description: 'The GCP project to run the dataflow job.'} - - {name: Predictions dir, type: GCPPath, description: 'GCS or local directory.'} #Will contain prediction_results-* and schema.json files; TODO: Split outputs and replace dir with single file # type: {GCSPath: {path_type: Directory}} + - {name: GCP project, type: GCPProjectID, description: 'The GCP project to run the dataflow job.'} + - {name: Predictions dir, type: GCSPath, description: 'GCS or local directory.'} #Will contain prediction_results-* and schema.json files; TODO: Split outputs and replace dir with single file # type: {GCSPath: {path_type: Directory}} outputs: - - {name: Predictions dir, type: GCPPath, description: 'GCS or local directory.'} #Will contain prediction_results-* and schema.json files; TODO: Split outputs and replace dir with single file # type: {GCSPath: {path_type: Directory}} + - {name: Predictions dir, type: GCSPath, description: 'GCS or local directory.'} #Will contain prediction_results-* and schema.json files; TODO: Split outputs and replace dir with single file # type: {GCSPath: {path_type: Directory}} implementation: container: image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:e20fad3e161e88226c83437271adb063221459b9 diff --git a/components/dataflow/tfdv/component.yaml b/components/dataflow/tfdv/component.yaml index b28c8fd920f..a07fb31287c 100644 --- a/components/dataflow/tfdv/component.yaml +++ b/components/dataflow/tfdv/component.yaml @@ -6,15 +6,15 @@ description: | * infer a schema, * detect data anomalies. inputs: -- {name: Inference data, type: GCPPath, description: GCS path of the CSV file from which to infer the schema.} # type: {GCSPath: {data_type: CSV}} -- {name: Validation data, type: GCPPath, description: GCS path of the CSV file whose contents should be validated.} # type: {GCSPath: {data_type: CSV}} -- {name: Column names, type: GCPPath, description: GCS json file containing a list of column names.} # type: {GCSPath: {data_type: JSON}} +- {name: Inference data, type: GCSPath, description: GCS path of the CSV file from which to infer the schema.} # type: {GCSPath: {data_type: CSV}} +- {name: Validation data, type: GCSPath, description: GCS path of the CSV file whose contents should be validated.} # type: {GCSPath: {data_type: CSV}} +- {name: Column names, type: GCSPath, description: GCS json file containing a list of column names.} # type: {GCSPath: {data_type: JSON}} - {name: Key columns, type: String, description: Comma separated list of columns to treat as keys.} -- {name: GCP project, type: GcpProject, default: '', description: The GCP project to run the dataflow job.} +- {name: GCP project, type: GCPProjectID, default: '', description: The GCP project to run the dataflow job.} - {name: Run mode, type: String, default: local, description: Whether to run the job locally or in Cloud Dataflow. Valid values are "local" and "cloud". } -- {name: Validation output, type: GCPPath, description: GCS or local directory.} # type: {GCSPath: {path_type: Directory}} +- {name: Validation output, type: GCSPath, description: GCS or local directory.} # type: {GCSPath: {path_type: Directory}} outputs: -- {name: Schema, type: GCPPath, description: GCS path of the inferred schema JSON.} # type: {GCSPath: {data_type: TFDV schema JSON}} +- {name: Schema, type: GCSPath, description: GCS path of the inferred schema JSON.} # type: {GCSPath: {data_type: TFDV schema JSON}} - {name: Validation result, type: String, description: Indicates whether anomalies were detected or not.} implementation: container: diff --git a/components/dataflow/tfma/component.yaml b/components/dataflow/tfma/component.yaml index 1efcd8d1a15..1eead992608 100644 --- a/components/dataflow/tfma/component.yaml +++ b/components/dataflow/tfma/component.yaml @@ -6,15 +6,15 @@ description: | * tracking metrics over time * model quality performance on different feature slices inputs: -- {name: Model, type: GCPPath, description: GCS path to the model which will be evaluated.} # type: {GCSPath: {path_type: Directory, data_type: Exported TensorFlow models dir}} -- {name: Evaluation data, type: GCPPath, description: GCS path of eval files.} # type: {GCSPath: {data_type: CSV}} -- {name: Schema, type: GCPPath, description: GCS json schema file path.} # type: {GCSPath: {data_type: TFDV schema JSON}} +- {name: Model, type: GCSPath, description: GCS path to the model which will be evaluated.} # type: {GCSPath: {path_type: Directory, data_type: Exported TensorFlow models dir}} +- {name: Evaluation data, type: GCSPath, description: GCS path of eval files.} # type: {GCSPath: {data_type: CSV}} +- {name: Schema, type: GCSPath, description: GCS json schema file path.} # type: {GCSPath: {data_type: TFDV schema JSON}} - {name: Run mode, type: String, default: local, description: whether to run the job locally or in Cloud Dataflow.} -- {name: GCP project, type: GcpProject, default: '', description: 'The GCP project to run the dataflow job, if running in the `cloud` mode.'} +- {name: GCP project, type: GCPProjectID, default: '', description: 'The GCP project to run the dataflow job, if running in the `cloud` mode.'} - {name: Slice columns, type: String, description: Comma-separated list of columns on which to slice for analysis.} -- {name: Analysis results dir, type: GCPPath, description: GCS or local directory where the analysis results should be written.} # type: {GCSPath: {path_type: Directory}} +- {name: Analysis results dir, type: GCSPath, description: GCS or local directory where the analysis results should be written.} # type: {GCSPath: {path_type: Directory}} outputs: -- {name: Analysis results dir, type: GCPPath, description: GCS or local directory where the analysis results should were written.} # type: {GCSPath: {path_type: Directory}} +- {name: Analysis results dir, type: GCSPath, description: GCS or local directory where the analysis results should were written.} # type: {GCSPath: {path_type: Directory}} implementation: container: image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tfma:e20fad3e161e88226c83437271adb063221459b9 diff --git a/components/dataflow/tft/component.yaml b/components/dataflow/tft/component.yaml index 30f38755600..de356d4506d 100644 --- a/components/dataflow/tft/component.yaml +++ b/components/dataflow/tft/component.yaml @@ -1,15 +1,15 @@ name: Transform using TF on Dataflow description: Runs TensorFlow Transform on Google Cloud Dataflow inputs: - - {name: Training data file pattern, type: GCPPath, description: 'GCS path of train file patterns.'} #Also supports local CSV # type: {GCSPath: {data_type: CSV}} - - {name: Evaluation data file pattern, type: GCPPath, description: 'GCS path of eval file patterns.'} #Also supports local CSV # type: {GCSPath: {data_type: CSV}} - - {name: Schema, type: GCPPath, description: 'GCS json schema file path.'} # type: {GCSPath: {data_type: JSON}} - - {name: GCP project, type: GcpProject, description: 'The GCP project to run the dataflow job.'} + - {name: Training data file pattern, type: GCSPath, description: 'GCS path of train file patterns.'} #Also supports local CSV # type: {GCSPath: {data_type: CSV}} + - {name: Evaluation data file pattern, type: GCSPath, description: 'GCS path of eval file patterns.'} #Also supports local CSV # type: {GCSPath: {data_type: CSV}} + - {name: Schema, type: GCSPath, description: 'GCS json schema file path.'} # type: {GCSPath: {data_type: JSON}} + - {name: GCP project, type: GCPProjectID, description: 'The GCP project to run the dataflow job.'} - {name: Run mode, type: String, default: local, description: 'Whether to run the job locally or in Cloud Dataflow. Valid values are "local" and "cloud".' } - - {name: Preprocessing module, type: GCPPath, default: '', description: 'GCS path to a python file defining "preprocess" and "get_feature_columns" functions.'} # type: {GCSPath: {data_type: Python}} - - {name: Transformed data dir, type: GCPPath, description: 'GCS or local directory'} #Also supports local paths # type: {GCSPath: {path_type: Directory}} + - {name: Preprocessing module, type: GCSPath, default: '', description: 'GCS path to a python file defining "preprocess" and "get_feature_columns" functions.'} # type: {GCSPath: {data_type: Python}} + - {name: Transformed data dir, type: GCSPath, description: 'GCS or local directory'} #Also supports local paths # type: {GCSPath: {path_type: Directory}} outputs: - - {name: Transformed data dir, type: GCPPath} # type: {GCSPath: {path_type: Directory}} + - {name: Transformed data dir, type: GCSPath} # type: {GCSPath: {path_type: Directory}} implementation: container: image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tft:e20fad3e161e88226c83437271adb063221459b9 diff --git a/components/gcp/dataflow/launch_python/README.md b/components/gcp/dataflow/launch_python/README.md index 514609a8a39..c3e9e28fd06 100644 --- a/components/gcp/dataflow/launch_python/README.md +++ b/components/gcp/dataflow/launch_python/README.md @@ -18,7 +18,7 @@ Name | Description | Optional | Data type| Accepted values | Default | :--- | :----------| :----------| :----------| :----------| :---------- | python_file_path | The path to the Cloud Storage bucket or local directory containing the Python file to be run. | | GCSPath | | | project_id | The ID of the Google Cloud Platform (GCP) project containing the Cloud Dataflow job.| | GCPProjectID | | | -staging_dir | The path to the Cloud Storage directory where the staging files are stored. A random subdirectory will be created under the staging directory to keep the job information.This is done so that you can resume the job in case of failure. `staging_dir` is passed as the command line arguments (`staging_location` and `temp_location`) of the Beam code. | Yes | GCPPath | | None | +staging_dir | The path to the Cloud Storage directory where the staging files are stored. A random subdirectory will be created under the staging directory to keep the job information.This is done so that you can resume the job in case of failure. `staging_dir` is passed as the command line arguments (`staging_location` and `temp_location`) of the Beam code. | Yes | GCSPath | | None | requirements_file_path | The path to the Cloud Storage bucket or local directory containing the pip requirements file. | Yes | GCSPath | | None | args | The list of arguments to pass to the Python file. | No | List | A list of string arguments | None | wait_interval | The number of seconds to wait between calls to get the status of the job. | Yes | Integer | | 30 | @@ -90,130 +90,130 @@ In this sample, we run a wordcount sample code in a Kubeflow Pipeline. The outpu !gsutil cat gs://ml-pipeline-playground/samples/dataflow/wc/wc.py ``` - # - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - # - - """A minimalist word-counting workflow that counts words in Shakespeare. - - This is the first in a series of successively more detailed 'word count' - examples. - - Next, see the wordcount pipeline, then the wordcount_debugging pipeline, for - more detailed examples that introduce additional concepts. - - Concepts: - - 1. Reading data from text files - 2. Specifying 'inline' transforms - 3. Counting a PCollection - 4. Writing data to Cloud Storage as text files - - To execute this pipeline locally, first edit the code to specify the output - location. Output location could be a local file path or an output prefix - on GCS. (Only update the output location marked with the first CHANGE comment.) - - To execute this pipeline remotely, first edit the code to set your project ID, - runner type, the staging location, the temp location, and the output location. - The specified GCS bucket(s) must already exist. (Update all the places marked - with a CHANGE comment.) - - Then, run the pipeline as described in the README. It will be deployed and run - using the Google Cloud Dataflow Service. No args are required to run the - pipeline. You can see the results in your output bucket in the GCS browser. - """ - - from __future__ import absolute_import - - import argparse - import logging - import re - - from past.builtins import unicode - - import apache_beam as beam - from apache_beam.io import ReadFromText - from apache_beam.io import WriteToText - from apache_beam.options.pipeline_options import PipelineOptions - from apache_beam.options.pipeline_options import SetupOptions - - - def run(argv=None): - """Main entry point; defines and runs the wordcount pipeline.""" - - parser = argparse.ArgumentParser() - parser.add_argument('--input', - dest='input', - default='gs://dataflow-samples/shakespeare/kinglear.txt', - help='Input file to process.') - parser.add_argument('--output', - dest='output', - # CHANGE 1/5: The Google Cloud Storage path is required - # for outputting the results. - default='gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX', - help='Output file to write results to.') - known_args, pipeline_args = parser.parse_known_args(argv) - # pipeline_args.extend([ - # # CHANGE 2/5: (OPTIONAL) Change this to DataflowRunner to - # # run your pipeline on the Google Cloud Dataflow Service. - # '--runner=DirectRunner', - # # CHANGE 3/5: Your project ID is required in order to run your pipeline on - # # the Google Cloud Dataflow Service. - # '--project=SET_YOUR_PROJECT_ID_HERE', - # # CHANGE 4/5: Your Google Cloud Storage path is required for staging local - # # files. - # '--staging_location=gs://YOUR_BUCKET_NAME/AND_STAGING_DIRECTORY', - # # CHANGE 5/5: Your Google Cloud Storage path is required for temporary - # # files. - # '--temp_location=gs://YOUR_BUCKET_NAME/AND_TEMP_DIRECTORY', - # '--job_name=your-wordcount-job', - # ]) - - # We use the save_main_session option because one or more DoFn's in this - # workflow rely on global context (e.g., a module imported at module level). - pipeline_options = PipelineOptions(pipeline_args) - pipeline_options.view_as(SetupOptions).save_main_session = True - with beam.Pipeline(options=pipeline_options) as p: - - # Read the text file[pattern] into a PCollection. - lines = p | ReadFromText(known_args.input) - - # Count the occurrences of each word. - counts = ( - lines - | 'Split' >> (beam.FlatMap(lambda x: re.findall(r'[A-Za-z\']+', x)) - .with_output_types(unicode)) - | 'PairWithOne' >> beam.Map(lambda x: (x, 1)) - | 'GroupAndSum' >> beam.CombinePerKey(sum)) - - # Format the counts into a PCollection of strings. - def format_result(word_count): - (word, count) = word_count - return '%s: %s' % (word, count) - - output = counts | 'Format' >> beam.Map(format_result) - - # Write the output using a "Write" transform that has side effects. - # pylint: disable=expression-not-assigned - output | WriteToText(known_args.output) - - - if __name__ == '__main__': - logging.getLogger().setLevel(logging.INFO) - run() + # + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + # + + """A minimalist word-counting workflow that counts words in Shakespeare. + + This is the first in a series of successively more detailed 'word count' + examples. + + Next, see the wordcount pipeline, then the wordcount_debugging pipeline, for + more detailed examples that introduce additional concepts. + + Concepts: + + 1. Reading data from text files + 2. Specifying 'inline' transforms + 3. Counting a PCollection + 4. Writing data to Cloud Storage as text files + + To execute this pipeline locally, first edit the code to specify the output + location. Output location could be a local file path or an output prefix + on GCS. (Only update the output location marked with the first CHANGE comment.) + + To execute this pipeline remotely, first edit the code to set your project ID, + runner type, the staging location, the temp location, and the output location. + The specified GCS bucket(s) must already exist. (Update all the places marked + with a CHANGE comment.) + + Then, run the pipeline as described in the README. It will be deployed and run + using the Google Cloud Dataflow Service. No args are required to run the + pipeline. You can see the results in your output bucket in the GCS browser. + """ + + from __future__ import absolute_import + + import argparse + import logging + import re + + from past.builtins import unicode + + import apache_beam as beam + from apache_beam.io import ReadFromText + from apache_beam.io import WriteToText + from apache_beam.options.pipeline_options import PipelineOptions + from apache_beam.options.pipeline_options import SetupOptions + + + def run(argv=None): + """Main entry point; defines and runs the wordcount pipeline.""" + + parser = argparse.ArgumentParser() + parser.add_argument('--input', + dest='input', + default='gs://dataflow-samples/shakespeare/kinglear.txt', + help='Input file to process.') + parser.add_argument('--output', + dest='output', + # CHANGE 1/5: The Google Cloud Storage path is required + # for outputting the results. + default='gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX', + help='Output file to write results to.') + known_args, pipeline_args = parser.parse_known_args(argv) + # pipeline_args.extend([ + # # CHANGE 2/5: (OPTIONAL) Change this to DataflowRunner to + # # run your pipeline on the Google Cloud Dataflow Service. + # '--runner=DirectRunner', + # # CHANGE 3/5: Your project ID is required in order to run your pipeline on + # # the Google Cloud Dataflow Service. + # '--project=SET_YOUR_PROJECT_ID_HERE', + # # CHANGE 4/5: Your Google Cloud Storage path is required for staging local + # # files. + # '--staging_location=gs://YOUR_BUCKET_NAME/AND_STAGING_DIRECTORY', + # # CHANGE 5/5: Your Google Cloud Storage path is required for temporary + # # files. + # '--temp_location=gs://YOUR_BUCKET_NAME/AND_TEMP_DIRECTORY', + # '--job_name=your-wordcount-job', + # ]) + + # We use the save_main_session option because one or more DoFn's in this + # workflow rely on global context (e.g., a module imported at module level). + pipeline_options = PipelineOptions(pipeline_args) + pipeline_options.view_as(SetupOptions).save_main_session = True + with beam.Pipeline(options=pipeline_options) as p: + + # Read the text file[pattern] into a PCollection. + lines = p | ReadFromText(known_args.input) + + # Count the occurrences of each word. + counts = ( + lines + | 'Split' >> (beam.FlatMap(lambda x: re.findall(r'[A-Za-z\']+', x)) + .with_output_types(unicode)) + | 'PairWithOne' >> beam.Map(lambda x: (x, 1)) + | 'GroupAndSum' >> beam.CombinePerKey(sum)) + + # Format the counts into a PCollection of strings. + def format_result(word_count): + (word, count) = word_count + return '%s: %s' % (word, count) + + output = counts | 'Format' >> beam.Map(format_result) + + # Write the output using a "Write" transform that has side effects. + # pylint: disable=expression-not-assigned + output | WriteToText(known_args.output) + + + if __name__ == '__main__': + logging.getLogger().setLevel(logging.INFO) + run() #### Set sample parameters diff --git a/components/gcp/dataflow/launch_python/sample.ipynb b/components/gcp/dataflow/launch_python/sample.ipynb index 61a663439ec..1b65e434305 100644 --- a/components/gcp/dataflow/launch_python/sample.ipynb +++ b/components/gcp/dataflow/launch_python/sample.ipynb @@ -23,7 +23,7 @@ ":--- | :----------| :----------| :----------| :----------| :---------- |\n", "python_file_path | The path to the Cloud Storage bucket or local directory containing the Python file to be run. | | GCSPath | | |\n", "project_id | The ID of the Google Cloud Platform (GCP) project containing the Cloud Dataflow job.| | GCPProjectID | | |\n", - "staging_dir | The path to the Cloud Storage directory where the staging files are stored. A random subdirectory will be created under the staging directory to keep the job information.This is done so that you can resume the job in case of failure. `staging_dir` is passed as the command line arguments (`staging_location` and `temp_location`) of the Beam code. | Yes | GCPPath | | None |\n", + "staging_dir | The path to the Cloud Storage directory where the staging files are stored. A random subdirectory will be created under the staging directory to keep the job information.This is done so that you can resume the job in case of failure. `staging_dir` is passed as the command line arguments (`staging_location` and `temp_location`) of the Beam code. | Yes | GCSPath | | None |\n", "requirements_file_path | The path to the Cloud Storage bucket or local directory containing the pip requirements file. | Yes | GCSPath | | None |\n", "args | The list of arguments to pass to the Python file. | No | List | A list of string arguments | None |\n", "wait_interval | The number of seconds to wait between calls to get the status of the job. | Yes | Integer | | 30 |\n", diff --git a/components/gcp/dataproc/submit_hive_job/README.md b/components/gcp/dataproc/submit_hive_job/README.md index f73bc257f1c..86408569c5b 100644 --- a/components/gcp/dataproc/submit_hive_job/README.md +++ b/components/gcp/dataproc/submit_hive_job/README.md @@ -19,7 +19,7 @@ Use the component to run an Apache Hive job as one preprocessing step in a Kubef | region | The Cloud Dataproc region to handle the request. | No | GCPRegion | | | | cluster_name | The name of the cluster to run the job. | No | String | | | | queries | The queries to execute the Hive job. Specify multiple queries in one string by separating them with semicolons. You do not need to terminate queries with semicolons. | Yes | List | | None | -| query_file_uri | The HCFS URI of the script that contains the Hive queries. | Yes | GCPPath | | None | +| query_file_uri | The HCFS URI of the script that contains the Hive queries. | Yes | GCSPath | | None | | script_variables | Mapping of the query’s variable names to their values (equivalent to the Hive command: SET name="value";). | Yes | Dict | | None | | hive_job | The payload of a [HiveJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HiveJob) | Yes | Dict | | None | | job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Yes | Dict | | None | diff --git a/components/gcp/dataproc/submit_hive_job/sample.ipynb b/components/gcp/dataproc/submit_hive_job/sample.ipynb index bfd32c6558a..a03cd64d82a 100644 --- a/components/gcp/dataproc/submit_hive_job/sample.ipynb +++ b/components/gcp/dataproc/submit_hive_job/sample.ipynb @@ -24,7 +24,7 @@ "| region | The Cloud Dataproc region to handle the request. | No | GCPRegion | | |\n", "| cluster_name | The name of the cluster to run the job. | No | String | | |\n", "| queries | The queries to execute the Hive job. Specify multiple queries in one string by separating them with semicolons. You do not need to terminate queries with semicolons. | Yes | List | | None |\n", - "| query_file_uri | The HCFS URI of the script that contains the Hive queries. | Yes | GCPPath | | None |\n", + "| query_file_uri | The HCFS URI of the script that contains the Hive queries. | Yes | GCSPath | | None |\n", "| script_variables | Mapping of the query’s variable names to their values (equivalent to the Hive command: SET name=\"value\";). | Yes | Dict | | None |\n", "| hive_job | The payload of a [HiveJob](https://cloud.google.com/dataproc/docs/reference/rest/v1/HiveJob) | Yes | Dict | | None |\n", "| job | The payload of a [Dataproc job](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs). | Yes | Dict | | None |\n", diff --git a/components/kubeflow/deployer/component.yaml b/components/kubeflow/deployer/component.yaml index 07a217fd407..2b24ac2f21f 100644 --- a/components/kubeflow/deployer/component.yaml +++ b/components/kubeflow/deployer/component.yaml @@ -1,7 +1,7 @@ name: Kubeflow - Serve TF model description: Serve TensorFlow model using Kubeflow TF-serving inputs: - - {name: Model dir, type: GCPPath, description: 'Path of GCS directory containing exported Tensorflow model.'} # type: {GCSPath: {path_type: Directory}} + - {name: Model dir, type: GCSPath, description: 'Path of GCS directory containing exported Tensorflow model.'} # type: {GCSPath: {path_type: Directory}} - {name: Cluster name, type: String, default: '', description: 'Kubernetes cluster name where the TS-serving service should be deployed. Uses the current cluster by default.'} - {name: Namespace, type: String, default: 'kubeflow', description: 'Kubernetes namespace where the TS-serving service should be deployed.'} - {name: Server name, type: String, default: 'model-server', description: 'TF-serving server name to use when deploying.'} diff --git a/components/kubeflow/dnntrainer/component.yaml b/components/kubeflow/dnntrainer/component.yaml index 7fc97450d81..ba84c326f85 100644 --- a/components/kubeflow/dnntrainer/component.yaml +++ b/components/kubeflow/dnntrainer/component.yaml @@ -1,18 +1,18 @@ name: Train FC DNN using TF description: Trains fully-connected neural network using Tensorflow inputs: - - {name: Transformed data dir, type: GCPPath, description: 'GCS path containing tf-transformed training and eval data.'} # type: {GCSPath: {path_type: Directory}} - - {name: Schema, type: GCPPath, description: 'GCS json schema file path.'} # type: {GCSPath: {data_type: JSON}} + - {name: Transformed data dir, type: GCSPath, description: 'GCS path containing tf-transformed training and eval data.'} # type: {GCSPath: {path_type: Directory}} + - {name: Schema, type: GCSPath, description: 'GCS json schema file path.'} # type: {GCSPath: {data_type: JSON}} - {name: Learning rate, type: Float, default: '0.1', description: 'Learning rate for training.'} - {name: Optimizer, type: String, default: 'Adagrad', description: 'Optimizer for training. Valid values are: Adam, SGD, Adagrad. If not provided, tf.estimator default will be used.'} - {name: Hidden layer size, type: String, default: '100', description: 'Comma-separated hidden layer sizes. For example "200,100,50".'} - {name: Steps, type: Integer, description: 'Maximum number of training steps to perform. If unspecified, will honor epochs.'} #- {name: Epochs, type: Integer, default: '', description: 'Maximum number of training data epochs on which to train. If both "steps" and "epochs" are specified, the training job will run for "steps" or "epochs", whichever occurs first.'} - {name: Target, type: String, description: 'Name of the column for prediction target.'} - - {name: Preprocessing module, type: GCPPath, default: '', description: 'GCS path to a python file defining "preprocess" and "get_feature_columns" functions.'} # type: {GCSPath: {data_type: Python}} - - {name: Training output dir, type: GCPPath, description: 'GCS or local directory.'} # type: {GCSPath: {path_type: Directory}} + - {name: Preprocessing module, type: GCSPath, default: '', description: 'GCS path to a python file defining "preprocess" and "get_feature_columns" functions.'} # type: {GCSPath: {data_type: Python}} + - {name: Training output dir, type: GCSPath, description: 'GCS or local directory.'} # type: {GCSPath: {path_type: Directory}} outputs: - - {name: Training output dir, type: GCPPath, description: 'GCS or local directory.'} # type: {GCSPath: {path_type: Directory}} + - {name: Training output dir, type: GCSPath, description: 'GCS or local directory.'} # type: {GCSPath: {path_type: Directory}} implementation: container: image: gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:e20fad3e161e88226c83437271adb063221459b9 diff --git a/components/kubeflow/katib-launcher/component.yaml b/components/kubeflow/katib-launcher/component.yaml index 49c847f5921..c458daed395 100644 --- a/components/kubeflow/katib-launcher/component.yaml +++ b/components/kubeflow/katib-launcher/component.yaml @@ -14,7 +14,7 @@ inputs: - {name: Metrics collector template path, type: String, default: '', description: 'Metrics collector spec.'} - {name: Suggestion spec, type: YAML, default: '', description: 'Suggestion spec (YAML/JSON format).'} - {name: StudyJob timeout minutes, type: Integer, default: '10', description: 'Time in minutes to wait for the StudyJob to complete.'} -- {name: Delete finished job, type: Boolean, default: 'True', description: 'Whether to delete the job after it is finished.'} +- {name: Delete finished job, type: Bool, default: 'True', description: 'Whether to delete the job after it is finished.'} outputs: - {name: Best parameter set, type: JSON, description: 'The parameter set of the best StudyJob trial.'} implementation: diff --git a/components/local/confusion_matrix/component.yaml b/components/local/confusion_matrix/component.yaml index 114a4d8d7db..324d977d33a 100644 --- a/components/local/confusion_matrix/component.yaml +++ b/components/local/confusion_matrix/component.yaml @@ -1,9 +1,9 @@ name: Confusion matrix description: Calculates confusion matrix inputs: - - {name: Predictions, type: GCPPath, description: 'GCS path of prediction file pattern.'} # type: {GCSPath: {data_type: CSV}} + - {name: Predictions, type: GCSPath, description: 'GCS path of prediction file pattern.'} # type: {GCSPath: {data_type: CSV}} - {name: Target lambda, type: String, default: '', description: 'Text of Python lambda function which computes target value. For example, "lambda x: x[''a''] + x[''b'']". If not set, the input must include a "target" column.'} - - {name: Output dir, type: GCPPath, description: 'GCS path of the output directory.'} # type: {GCSPath: {path_type: Directory}} + - {name: Output dir, type: GCSPath, description: 'GCS path of the output directory.'} # type: {GCSPath: {path_type: Directory}} #outputs: # - {name: UI metadata, type: UI metadata} # - {name: Metrics, type: Metrics} diff --git a/components/local/roc/component.yaml b/components/local/roc/component.yaml index 07f338d5561..dc889d36ead 100644 --- a/components/local/roc/component.yaml +++ b/components/local/roc/component.yaml @@ -1,11 +1,11 @@ name: ROC curve description: Calculates Receiver Operating Characteristic curve. See https://en.wikipedia.org/wiki/Receiver_operating_characteristic inputs: - - {name: Predictions dir, type: GCPPath, description: 'GCS path of prediction file pattern.'} #TODO: Replace dir data + schema files # type: {GCSPath: {path_type: Directory}} + - {name: Predictions dir, type: GCSPath, description: 'GCS path of prediction file pattern.'} #TODO: Replace dir data + schema files # type: {GCSPath: {path_type: Directory}} - {name: True class, type: String, default: 'true', description: 'The true class label for the sample. Default is "true".'} - {name: True score column, type: String, default: 'true', description: 'The name of the column for positive probability.'} - {name: Target lambda, type: String, default: '', description: 'Text of Python lambda function which returns boolean value indicating whether the classification result is correct.\nFor example, "lambda x: x[''a''] and x[''b'']". If missing, input must have a "target" column.'} - - {name: Output dir, type: GCPPath, description: 'GCS path of the output directory.'} #TODO: Replace dir with single file # type: {GCSPath: {path_type: Directory}} + - {name: Output dir, type: GCSPath, description: 'GCS path of the output directory.'} #TODO: Replace dir with single file # type: {GCSPath: {path_type: Directory}} #outputs: # - {name: UI metadata, type: UI metadata} # - {name: Metrics, type: Metrics} From c777401bf1aceb1e5dca0cb69de04100301f01c1 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Tue, 23 Apr 2019 13:42:01 -0700 Subject: [PATCH 21/43] SDK - Decoupling ContainerOp from compiler (#1168) * SDK - Decoupling ContainerOp from compiler Currently, some code in DSL module depends on some classes that belong to the DSL-compiler. Ideally, the dependency should go the the other way - the DSL-compiler should depend on DSL, but not the other way around. This commit fixes that issue for the ContainerOp class. * Switched from a list of handlers to a single handler --- sdk/python/kfp/dsl/_container_op.py | 20 ++++++++++++++------ sdk/python/kfp/dsl/_pipeline.py | 7 +++++++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/sdk/python/kfp/dsl/_container_op.py b/sdk/python/kfp/dsl/_container_op.py index 6634e3dd533..8d2cdece9fb 100644 --- a/sdk/python/kfp/dsl/_container_op.py +++ b/sdk/python/kfp/dsl/_container_op.py @@ -20,7 +20,6 @@ V1ResourceRequirements, V1VolumeDevice, V1VolumeMount, V1ContainerPort, V1Lifecycle) -from . import _pipeline from . import _pipeline_param from ._metadata import ComponentMeta @@ -623,6 +622,15 @@ def inputs(self): return _pipeline_param.extract_pipelineparams_from_any(self) +def _make_hash_based_id_for_container_op(container_op): + # Generating a unique ID for ContainerOp. For class instances, the hash is the object's memory address which is unique. + return container_op.human_name + ' ' + hex(2**63 + hash(container_op))[2:] + + +# Pointer to a function that generates a unique ID for the ContainerOp instance (Possibly by registering the ContainerOp instance in some system). +_register_container_op_handler = _make_hash_based_id_for_container_op + + class ContainerOp(object): """ Represents an op implemented by a container image. @@ -693,8 +701,6 @@ def __init__(self, one way for outside world to receive outputs of the container. is_exit_handler: Whether it is used as an exit handler. """ - if not _pipeline.Pipeline.get_default_pipeline(): - raise ValueError('Default pipeline not defined.') valid_name_regex = r'^[A-Za-z][A-Za-z0-9\s_-]*$' if not re.match(valid_name_regex, name): @@ -708,9 +714,6 @@ def __init__(self, # human_name must exist to construct containerOps name self.human_name = name - # actual name for argo workflow - self.name = _pipeline.Pipeline.get_default_pipeline().add_op( - self, is_exit_handler) # `container` prop in `io.argoproj.workflow.v1alpha1.Template` container_kwargs = container_kwargs or {} @@ -761,6 +764,11 @@ def _decorated(*args, **kwargs): self.is_exit_handler = is_exit_handler self._metadata = None + # ID of the current ContainerOp. Ideally, it should be generated by the compiler that sees the bigger context. + # However, the ID is used in the task output references (PipelineParams) which can be serialized to strings. + # Because of this we must obtain a unique ID right now. + self.name = _register_container_op_handler(self) + self.outputs = {} if file_outputs: self.outputs = { diff --git a/sdk/python/kfp/dsl/_pipeline.py b/sdk/python/kfp/dsl/_pipeline.py index f172a3c24ad..ce22aa741fa 100644 --- a/sdk/python/kfp/dsl/_pipeline.py +++ b/sdk/python/kfp/dsl/_pipeline.py @@ -120,10 +120,17 @@ def __enter__(self): raise Exception('Nested pipelines are not allowed.') Pipeline._default_pipeline = self + + def register_op_and_generate_id(op): + return self.add_op(op, op.is_exit_handler) + + self._old__register_container_op_handler = _container_op._register_container_op_handler + _container_op._register_container_op_handler = register_op_and_generate_id return self def __exit__(self, *args): Pipeline._default_pipeline = None + _container_op._register_container_op_handler = self._old__register_container_op_handler def add_op(self, op: _container_op.ContainerOp, define_only: bool): """Add a new operator. From b588ba087ac7dfd231a4c0e8e444bab53b894073 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Tue, 23 Apr 2019 14:34:01 -0700 Subject: [PATCH 22/43] SDK/Tests - Properly closing tar files opened for writing (#1169) --- .../tests/compiler/component_builder_test.py | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/sdk/python/tests/compiler/component_builder_test.py b/sdk/python/tests/compiler/component_builder_test.py index 54d159a424f..a14ff236182 100644 --- a/sdk/python/tests/compiler/component_builder_test.py +++ b/sdk/python/tests/compiler/component_builder_test.py @@ -131,11 +131,11 @@ def test_wrap_files_in_tarball(self): docker_helper = DockerfileHelper(arc_dockerfile_name='') docker_helper._wrap_files_in_tarball(temp_tarball, {'dockerfile':temp_file_one, 'main.py':temp_file_two}) self.assertTrue(os.path.exists(temp_tarball)) - temp_tarball_handle = tarfile.open(temp_tarball) - temp_files = temp_tarball_handle.getmembers() - self.assertTrue(len(temp_files) == 2) - for temp_file in temp_files: - self.assertTrue(temp_file.name in ['dockerfile', 'main.py']) + with tarfile.open(temp_tarball) as temp_tarball_handle: + temp_files = temp_tarball_handle.getmembers() + self.assertTrue(len(temp_files) == 2) + for temp_file in temp_files: + self.assertTrue(temp_file.name in ['dockerfile', 'main.py']) # clean up os.remove(temp_file_one) @@ -206,11 +206,11 @@ def test_prepare_docker_with_py(self): docker_helper.prepare_docker_tarball_with_py(arc_python_filename='main.py', python_filepath=python_filepath, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.8.0', local_tarball_path=local_tarball_path, python_version='python3') - temp_tarball_handle = tarfile.open(local_tarball_path) - temp_files = temp_tarball_handle.getmembers() - self.assertTrue(len(temp_files) == 2) - for temp_file in temp_files: - self.assertTrue(temp_file.name in ['dockerfile', 'main.py']) + with tarfile.open(local_tarball_path) as temp_tarball_handle: + temp_files = temp_tarball_handle.getmembers() + self.assertTrue(len(temp_files) == 2) + for temp_file in temp_files: + self.assertTrue(temp_file.name in ['dockerfile', 'main.py']) # clean up os.remove(local_tarball_path) @@ -233,13 +233,13 @@ def test_prepare_docker_with_py_and_dependency(self): base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.8.0', local_tarball_path=local_tarball_path, python_version='python3', dependency=dependencies) - temp_tarball_handle = tarfile.open(local_tarball_path) - temp_files = temp_tarball_handle.getmembers() - self.assertTrue(len(temp_files) == 3) - for temp_file in temp_files: - self.assertTrue(temp_file.name in ['dockerfile', 'main.py', 'requirements.txt']) + with tarfile.open(local_tarball_path) as temp_tarball_handle: + temp_files = temp_tarball_handle.getmembers() + self.assertTrue(len(temp_files) == 3) + for temp_file in temp_files: + self.assertTrue(temp_file.name in ['dockerfile', 'main.py', 'requirements.txt']) - # clean up + # clean up os.remove(local_tarball_path) def test_prepare_docker_tarball(self): @@ -254,11 +254,11 @@ def test_prepare_docker_tarball(self): # check docker_helper = DockerfileHelper(arc_dockerfile_name='dockerfile') docker_helper.prepare_docker_tarball(dockerfile_path=dockerfile_path, local_tarball_path=local_tarball_path) - temp_tarball_handle = tarfile.open(local_tarball_path) - temp_files = temp_tarball_handle.getmembers() - self.assertTrue(len(temp_files) == 1) - for temp_file in temp_files: - self.assertTrue(temp_file.name in ['dockerfile']) + with tarfile.open(local_tarball_path) as temp_tarball_handle: + temp_files = temp_tarball_handle.getmembers() + self.assertTrue(len(temp_files) == 1) + for temp_file in temp_files: + self.assertTrue(temp_file.name in ['dockerfile']) # clean up os.remove(local_tarball_path) From 848d4fb99c9dc3e37012705a3468b27fe66193e1 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Tue, 23 Apr 2019 15:26:00 -0700 Subject: [PATCH 23/43] SDK - Replaced insecure yaml.load with yaml.safe_load (#1170) This improves security and gets rid of security warnings. See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation --- .../ibm-components/ffdl/train/src/train.py | 2 +- .../katib-launcher/src/launch_study_job.py | 4 ++-- .../kubeflow/launcher/src/launch_tf_job.py | 2 +- .../kubeflow/launcher/test/test_launcher.py | 4 ++-- sdk/python/kfp/_client.py | 6 +++--- sdk/python/kfp/components/_yaml_utils.py | 2 +- sdk/python/tests/compiler/compiler_tests.py | 18 +++++++++--------- .../tests/compiler/component_builder_test.py | 2 +- 8 files changed, 20 insertions(+), 20 deletions(-) diff --git a/components/ibm-components/ffdl/train/src/train.py b/components/ibm-components/ffdl/train/src/train.py index ee86f080555..f43ad5dfeb8 100644 --- a/components/ibm-components/ffdl/train/src/train.py +++ b/components/ibm-components/ffdl/train/src/train.py @@ -78,7 +78,7 @@ ''' Update FfDL manifest with the corresponding object storage credentials ''' f = open('manifest.yml', 'r') - manifest = yaml.load(f.read()) + manifest = yaml.safe_load(f.read()) f.close() manifest['data_stores'][0]['connection']['auth_url'] = s3_url diff --git a/components/kubeflow/katib-launcher/src/launch_study_job.py b/components/kubeflow/katib-launcher/src/launch_study_job.py index 8f901a00d82..54e614abd4d 100644 --- a/components/kubeflow/katib-launcher/src/launch_study_job.py +++ b/components/kubeflow/katib-launcher/src/launch_study_job.py @@ -35,7 +35,7 @@ def yamlOrJsonStr(str): try: return json.loads(str) except: - return yaml.load(str) + return yaml.safe_load(str) def strToList(str): return str.split(",") @@ -50,7 +50,7 @@ def _generate_studyjob_yaml(src_filename, name, namespace, optimizationtype, obj metricsnames, parameterconfigs, nasConfig, workertemplatepath, mcollectortemplatepath, suggestionspec): """_generate_studyjob_yaml generates studyjob yaml file based on hp.template.yaml""" with open(src_filename, 'r') as f: - content = yaml.load(f) + content = yaml.safe_load(f) content['metadata']['name'] = name content['metadata']['namespace'] = namespace diff --git a/components/kubeflow/launcher/src/launch_tf_job.py b/components/kubeflow/launcher/src/launch_tf_job.py index a728ae76df9..414c15fd7c5 100644 --- a/components/kubeflow/launcher/src/launch_tf_job.py +++ b/components/kubeflow/launcher/src/launch_tf_job.py @@ -49,7 +49,7 @@ def _generate_train_yaml(src_filename, tfjob_ns, workers, pss, trainer_image, command): """_generate_train_yaml generates train yaml files based on train.template.yaml""" with open(src_filename, 'r') as f: - content = yaml.load(f) + content = yaml.safe_load(f) content['metadata']['generateName'] = 'trainer-' content['metadata']['namespace'] = tfjob_ns diff --git a/components/kubeflow/launcher/test/test_launcher.py b/components/kubeflow/launcher/test/test_launcher.py index c9447f980a8..4b1f0ab5029 100644 --- a/components/kubeflow/launcher/test/test_launcher.py +++ b/components/kubeflow/launcher/test/test_launcher.py @@ -38,7 +38,7 @@ def test_yaml_generation_basic(self): args_list.append('--learning-rate=0.1') generated_yaml = train._generate_train_yaml(train_template_file, tfjob_ns, worker, pss, args_list) with open(os.path.join(test_data_dir, 'train_basic.yaml'), 'r') as f: - golden = yaml.load(f) + golden = yaml.safe_load(f) self.assertEqual(golden, generated_yaml) def test_yaml_generation_advanced(self): @@ -53,7 +53,7 @@ def test_yaml_generation_advanced(self): args_list.append('--learning-rate=0.1') generated_yaml = train._generate_train_yaml(train_template_file, tfjob_ns, worker, pss, args_list) with open(os.path.join(test_data_dir, 'train_zero_worker.yaml'), 'r') as f: - golden = yaml.load(f) + golden = yaml.safe_load(f) self.assertEqual(golden, generated_yaml) if __name__ == '__main__': diff --git a/sdk/python/kfp/_client.py b/sdk/python/kfp/_client.py index 784ca427f21..c70d946ca4e 100644 --- a/sdk/python/kfp/_client.py +++ b/sdk/python/kfp/_client.py @@ -181,7 +181,7 @@ def _extract_pipeline_yaml(self, package_file): raise ValueError('Invalid package. Multiple yaml files in the package.') with tar.extractfile(all_yaml_files[0]) as f: - return yaml.load(f) + return yaml.safe_load(f) elif package_file.endswith('.zip'): with zipfile.ZipFile(package_file, 'r') as zip: all_yaml_files = [m for m in zip.namelist() if @@ -193,10 +193,10 @@ def _extract_pipeline_yaml(self, package_file): raise ValueError('Invalid package. Multiple yaml files in the package.') with zip.open(all_yaml_files[0]) as f: - return yaml.load(f) + return yaml.safe_load(f) elif package_file.endswith('.yaml') or package_file.endswith('.yml'): with open(package_file, 'r') as f: - return yaml.load(f) + return yaml.safe_load(f) else: raise ValueError('The package_file '+ package_file + ' should ends with one of the following formats: [.tar.gz, .tgz, .zip, .yaml, .yml]') diff --git a/sdk/python/kfp/components/_yaml_utils.py b/sdk/python/kfp/components/_yaml_utils.py index 71edad6b454..22be0899998 100644 --- a/sdk/python/kfp/components/_yaml_utils.py +++ b/sdk/python/kfp/components/_yaml_utils.py @@ -18,7 +18,7 @@ def load_yaml(stream): #!!! Yaml should only be loaded using this function. Otherwise the dict ordering may be broken in Python versions prior to 3.6 #See https://stackoverflow.com/questions/5121931/in-python-how-can-you-load-yaml-mappings-as-ordereddicts/21912744#21912744 - def ordered_load(stream, Loader=yaml.Loader, object_pairs_hook=OrderedDict): + def ordered_load(stream, Loader=yaml.SafeLoader, object_pairs_hook=OrderedDict): class OrderedLoader(Loader): pass def construct_mapping(loader, node): diff --git a/sdk/python/tests/compiler/compiler_tests.py b/sdk/python/tests/compiler/compiler_tests.py index 7f2f588f684..93f6839635c 100644 --- a/sdk/python/tests/compiler/compiler_tests.py +++ b/sdk/python/tests/compiler/compiler_tests.py @@ -122,11 +122,11 @@ def test_operator_to_template(self): def _get_yaml_from_zip(self, zip_file): with zipfile.ZipFile(zip_file, 'r') as zip: with open(zip.extract(zip.namelist()[0]), 'r') as yaml_file: - return yaml.load(yaml_file) + return yaml.safe_load(yaml_file) def _get_yaml_from_tar(self, tar_file): with tarfile.open(tar_file, 'r:gz') as tar: - return yaml.load(tar.extractfile(tar.getmembers()[0])) + return yaml.safe_load(tar.extractfile(tar.getmembers()[0])) def test_basic_workflow(self): """Test compiling a basic workflow.""" @@ -139,7 +139,7 @@ def test_basic_workflow(self): try: compiler.Compiler().compile(basic.save_most_frequent_word, package_path) with open(os.path.join(test_data_dir, 'basic.yaml'), 'r') as f: - golden = yaml.load(f) + golden = yaml.safe_load(f) compiled = self._get_yaml_from_zip(package_path) self.maxDiff = None @@ -166,7 +166,7 @@ def test_composing_workflow(self): compose_package_path = os.path.join(tmpdir, 'compose.zip') compiler.Compiler().compile(compose.download_save_most_frequent_word, compose_package_path) with open(os.path.join(test_data_dir, 'compose.yaml'), 'r') as f: - golden = yaml.load(f) + golden = yaml.safe_load(f) compiled = self._get_yaml_from_zip(compose_package_path) self.maxDiff = None @@ -193,7 +193,7 @@ def test_package_compile(self): 'dsl-compile', '--package', package_path, '--namespace', 'mypipeline', '--output', target_zip, '--function', 'download_save_most_frequent_word']) with open(os.path.join(test_data_dir, 'compose.yaml'), 'r') as f: - golden = yaml.load(f) + golden = yaml.safe_load(f) compiled = self._get_yaml_from_zip(target_zip) self.maxDiff = None @@ -211,7 +211,7 @@ def _test_py_compile_zip(self, file_base_name): subprocess.check_call([ 'dsl-compile', '--py', py_file, '--output', target_zip]) with open(os.path.join(test_data_dir, file_base_name + '.yaml'), 'r') as f: - golden = yaml.load(f) + golden = yaml.safe_load(f) compiled = self._get_yaml_from_zip(target_zip) self.maxDiff = None @@ -228,7 +228,7 @@ def _test_py_compile_targz(self, file_base_name): subprocess.check_call([ 'dsl-compile', '--py', py_file, '--output', target_tar]) with open(os.path.join(test_data_dir, file_base_name + '.yaml'), 'r') as f: - golden = yaml.load(f) + golden = yaml.safe_load(f) compiled = self._get_yaml_from_tar(target_tar) self.maxDiff = None self.assertEqual(golden, compiled) @@ -244,10 +244,10 @@ def _test_py_compile_yaml(self, file_base_name): subprocess.check_call([ 'dsl-compile', '--py', py_file, '--output', target_yaml]) with open(os.path.join(test_data_dir, file_base_name + '.yaml'), 'r') as f: - golden = yaml.load(f) + golden = yaml.safe_load(f) with open(os.path.join(test_data_dir, target_yaml), 'r') as f: - compiled = yaml.load(f) + compiled = yaml.safe_load(f) self.maxDiff = None self.assertEqual(golden, compiled) diff --git a/sdk/python/tests/compiler/component_builder_test.py b/sdk/python/tests/compiler/component_builder_test.py index a14ff236182..f45f09db645 100644 --- a/sdk/python/tests/compiler/component_builder_test.py +++ b/sdk/python/tests/compiler/component_builder_test.py @@ -313,7 +313,7 @@ def test_generate_kaniko_yaml(self): generated_yaml = builder._generate_kaniko_spec(namespace='default', arc_dockerfile_name='dockerfile', gcs_path='gs://mlpipeline/kaniko_build.tar.gz', target_image='gcr.io/mlpipeline/kaniko_image:latest') with open(os.path.join(test_data_dir, 'kaniko.basic.yaml'), 'r') as f: - golden = yaml.load(f) + golden = yaml.safe_load(f) self.assertEqual(golden, generated_yaml) From 173ecbda4ccd4c0183aafdfdcddd5f959b7db297 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Tue, 23 Apr 2019 16:12:00 -0700 Subject: [PATCH 24/43] Marked all scripts as executable (#1177) --- components/sample/keras/train_classifier/build_image.sh | 0 frontend/src/apis/filter/git_push.sh | 0 test/check-argo-status.sh | 0 test/install-argo.sh | 0 test/test-prep.sh | 0 5 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 components/sample/keras/train_classifier/build_image.sh mode change 100644 => 100755 frontend/src/apis/filter/git_push.sh mode change 100644 => 100755 test/check-argo-status.sh mode change 100644 => 100755 test/install-argo.sh mode change 100644 => 100755 test/test-prep.sh diff --git a/components/sample/keras/train_classifier/build_image.sh b/components/sample/keras/train_classifier/build_image.sh old mode 100644 new mode 100755 diff --git a/frontend/src/apis/filter/git_push.sh b/frontend/src/apis/filter/git_push.sh old mode 100644 new mode 100755 diff --git a/test/check-argo-status.sh b/test/check-argo-status.sh old mode 100644 new mode 100755 diff --git a/test/install-argo.sh b/test/install-argo.sh old mode 100644 new mode 100755 diff --git a/test/test-prep.sh b/test/test-prep.sh old mode 100644 new mode 100755 From 4de20179c6231724ecd51011304bf9ffa4ca85b5 Mon Sep 17 00:00:00 2001 From: Riley Bauer <34456002+rileyjbauer@users.noreply.github.com> Date: Wed, 24 Apr 2019 10:24:10 -0700 Subject: [PATCH 25/43] Update to version 3.0.2 of npm package 'extend' (#1211) * Update to version 3.0.2 of npm package 'extend' * Use 'new' with Storage --- frontend/server/package-lock.json | 807 +++++++++-------- frontend/server/server.ts | 2 +- .../package-lock.json | 842 ++++++++++-------- test/frontend-integration-test/package.json | 2 +- 4 files changed, 884 insertions(+), 769 deletions(-) diff --git a/frontend/server/package-lock.json b/frontend/server/package-lock.json index 7abf6977b20..353ea09d08b 100644 --- a/frontend/server/package-lock.json +++ b/frontend/server/package-lock.json @@ -79,14 +79,14 @@ } }, "@types/base-64": { - "version": "0.1.2", - "resolved": "https://registry.npmjs.org/@types/base-64/-/base-64-0.1.2.tgz", - "integrity": "sha1-Y6wxgwLNq7XwToripW5U1IMhB+I=" + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/@types/base-64/-/base-64-0.1.3.tgz", + "integrity": "sha512-DJpw7RKNMXygZ0j2xe6ROBqiJUy7JWEItkzOPBzrT35HUWS7VLYyW9XJX8yCCvE2xg8QD7wesvVyXFg8AVHTMA==" }, "@types/bluebird": { - "version": "3.5.24", - "resolved": "https://registry.npmjs.org/@types/bluebird/-/bluebird-3.5.24.tgz", - "integrity": "sha512-YeQoDpq4Lm8ppSBqAnAeF/xy1cYp/dMTif2JFcvmAbETMRlvKHT2iLcWu+WyYiJO3b3Ivokwo7EQca/xfLVJmg==" + "version": "3.5.26", + "resolved": "https://registry.npmjs.org/@types/bluebird/-/bluebird-3.5.26.tgz", + "integrity": "sha512-aj2mrBLn5ky0GmAg6IPXrQjnN0iB/ulozuJ+oZdrHRAzRbXjGmu4UXsNCjFvPbSaaPZmniocdOzsM392qLOlmQ==" }, "@types/body-parser": { "version": "1.17.0", @@ -99,9 +99,9 @@ } }, "@types/caseless": { - "version": "0.12.1", - "resolved": "https://registry.npmjs.org/@types/caseless/-/caseless-0.12.1.tgz", - "integrity": "sha512-FhlMa34NHp9K5MY1Uz8yb+ZvuX0pnvn3jScRSNAb75KHGB8d3rEU6hqMs3Z2vjuytcMfRg6c5CHMc3wtYyD2/A==" + "version": "0.12.2", + "resolved": "https://registry.npmjs.org/@types/caseless/-/caseless-0.12.2.tgz", + "integrity": "sha512-6ckxMjBBD8URvjB6J3NcnuAn5Pkl7t3TizAg+xdlzzQGSPSmBcXf8KoIH0ua/i+tio+ZRUHEXp0HEmvaR4kt0w==" }, "@types/connect": { "version": "3.4.32", @@ -113,14 +113,14 @@ } }, "@types/events": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/@types/events/-/events-1.2.0.tgz", - "integrity": "sha512-KEIlhXnIutzKwRbQkGWb/I4HFqBuUykAdHgDED6xqwXJfONCjF5VoE0cXEiurh3XauygxzeDzgtXUqvLkxFzzA==" + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@types/events/-/events-3.0.0.tgz", + "integrity": "sha512-EaObqwIvayI5a8dCzhFrjKzVwKLxjoG9T6Ppd5CEo07LRKfQ8Yokw54r5+Wq7FaBQ+yXRvQAYPrHwya1/UFt9g==" }, "@types/express": { - "version": "4.16.0", - "resolved": "https://registry.npmjs.org/@types/express/-/express-4.16.0.tgz", - "integrity": "sha512-TtPEYumsmSTtTetAPXlJVf3kEqb6wZK0bZojpJQrnD/djV4q1oB6QQ8aKvKqwNPACoe02GNiy5zDzcYivR5Z2w==", + "version": "4.16.1", + "resolved": "https://registry.npmjs.org/@types/express/-/express-4.16.1.tgz", + "integrity": "sha512-V0clmJow23WeyblmACoxbHBu2JKlE5TiIme6Lem14FnPW9gsttyHtk6wq7njcdIWH1njAaFgR8gW09lgY98gQg==", "dev": true, "requires": { "@types/body-parser": "*", @@ -129,12 +129,11 @@ } }, "@types/express-serve-static-core": { - "version": "4.16.0", - "resolved": "https://registry.npmjs.org/@types/express-serve-static-core/-/express-serve-static-core-4.16.0.tgz", - "integrity": "sha512-lTeoCu5NxJU4OD9moCgm0ESZzweAx0YqsAcab6OB0EB3+As1OaHtKnaGJvcngQxYsi9UNv0abn4/DRavrRxt4w==", + "version": "4.16.3", + "resolved": "https://registry.npmjs.org/@types/express-serve-static-core/-/express-serve-static-core-4.16.3.tgz", + "integrity": "sha512-HFgBmRDTvdnrRFXqBr2NM2NUCu6fIpzJsUTlRVENF8lxvstof7cl9Fxfwq5S0kJbO/FsPVcjlxpOM3ZxIkn7Rw==", "dev": true, "requires": { - "@types/events": "*", "@types/node": "*", "@types/range-parser": "*" } @@ -148,23 +147,24 @@ } }, "@types/google-cloud__storage": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/@types/google-cloud__storage/-/google-cloud__storage-1.1.7.tgz", - "integrity": "sha512-010Llp+5ze+XWWmZuLDxs0pZgFjOgtJQVt9icJ0Ed67ZFLq7PnXkYx8x/k9nwDojR5/X4XoLPNqB1F627TScdQ==", + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/@types/google-cloud__storage/-/google-cloud__storage-1.7.2.tgz", + "integrity": "sha512-RaQJ7+Ht20MRYJu7mgKBpbVNZIPneztKIl/DUKacRC6A8mXRsJfgDdPA7indHmJGIgm+hzUTj44+A3RyuuYZhg==", "dev": true, "requires": { - "@types/node": "*" + "@types/node": "*", + "@types/request": "*" } }, "@types/js-yaml": { - "version": "3.11.2", - "resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-3.11.2.tgz", - "integrity": "sha512-JRDtMPEqXrzfuYAdqbxLot1GvAr/QvicIZAnOAigZaj8xVMhuSJTg/xsv9E1TvyL+wujYhRLx9ZsQ0oFOSmwyA==" + "version": "3.12.1", + "resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-3.12.1.tgz", + "integrity": "sha512-SGGAhXLHDx+PK4YLNcNGa6goPf9XRWQNAUUbffkwVGGXIxmDKWyGGL4inzq2sPmExu431Ekb9aEMn9BkPqEYFA==" }, "@types/mime": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/@types/mime/-/mime-2.0.0.tgz", - "integrity": "sha512-A2TAGbTFdBw9azHbpVd+/FkdW2T6msN1uct1O9bH3vTerEHKZhTXJUQXy+hNq1B0RagfU8U+KBdqiZpxjhOUQA==", + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/@types/mime/-/mime-2.0.1.tgz", + "integrity": "sha512-FwI9gX75FgVBJ7ywgnq/P7tw+/o1GUbtP0KzbtusLigAOgIgNISRK0ZPl4qertvXSIE8YbsVJueQ90cDt9YYyw==", "dev": true }, "@types/minio": { @@ -177,23 +177,23 @@ } }, "@types/node": { - "version": "8.10.20", - "resolved": "https://registry.npmjs.org/@types/node/-/node-8.10.20.tgz", - "integrity": "sha512-M7x8+5D1k/CuA6jhiwuSCmE8sbUWJF0wYsjcig9WrXvwUI5ArEoUBdOXpV4JcEMrLp02/QbDjw+kI+vQeKyQgg==" + "version": "8.10.46", + "resolved": "https://registry.npmjs.org/@types/node/-/node-8.10.46.tgz", + "integrity": "sha512-PfnRbk836fFs9T9QnZh0G1k9oC6YXCqIK3LX6vU/6oiXtEBSFCiJFj6UnLZtqIIHTsgMn8Dojq3yhmpwY7QWcw==" }, "@types/node-fetch": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.1.2.tgz", - "integrity": "sha512-XroxUzLpKuL+CVkQqXlffRkEPi4Gh3Oui/mWyS7ztKiyqVxiU+h3imCW5I2NQmde5jK+3q++36/Q96cyRWsweg==", + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.3.2.tgz", + "integrity": "sha512-yW0EOebSsQme9yKu09XbdDfle4/SmWZMK4dfteWcSLCYNQQcF+YOv0kIrvm+9pO11/ghA4E6A+RNQqvYj4Nr3A==", "dev": true, "requires": { "@types/node": "*" } }, "@types/range-parser": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/@types/range-parser/-/range-parser-1.2.2.tgz", - "integrity": "sha512-HtKGu+qG1NPvYe1z7ezLsyIaXYyi8SoAVqWDZgDQ8dLrsZvSzUNCwZyfX33uhWxL/SU0ZDQZ3nwZ0nimt507Kw==", + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/@types/range-parser/-/range-parser-1.2.3.tgz", + "integrity": "sha512-ewFXqrQHlFsgc09MK5jP5iR7vumV/BYayNC6PgJO2LPe8vrnNFyjQjSppfEngITi0qvfKtzFvgKymGheFM9UOA==", "dev": true }, "@types/request": { @@ -218,18 +218,18 @@ } }, "@types/tough-cookie": { - "version": "2.3.4", - "resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-2.3.4.tgz", - "integrity": "sha512-Set5ZdrAaKI/qHdFlVMgm/GsAv/wkXhSTuZFkJ+JI7HK+wIkIlOaUXSXieIvJ0+OvGIqtREFoE+NHJtEq0gtEw==" + "version": "2.3.5", + "resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-2.3.5.tgz", + "integrity": "sha512-SCcK7mvGi3+ZNz833RRjFIxrn4gI1PPR3NtuIS+6vMkvmsGjosqTJwRt5bAEFLRz+wtJMWv8+uOnZf2hi2QXTg==" }, "@types/underscore": { - "version": "1.8.9", - "resolved": "https://registry.npmjs.org/@types/underscore/-/underscore-1.8.9.tgz", - "integrity": "sha512-vfzZGgZKRFy7KEWcBGfIFk+h6B+thDCLfkD1exMBMRlUsx2icA+J6y4kAbZs/TjSTeY1duw89QUU133TSzr60Q==" + "version": "1.8.14", + "resolved": "https://registry.npmjs.org/@types/underscore/-/underscore-1.8.14.tgz", + "integrity": "sha512-xbzi6UaATVKupInG3D65/EPQ3qkJCvG2ZAzmlIYt6x93ACOEX2Y0fHW4/e8TF3G7q5KB2l7wTZgzfNjyYDMuZw==" }, "@types/websocket": { "version": "0.0.38", - "resolved": "http://registry.npmjs.org/@types/websocket/-/websocket-0.0.38.tgz", + "resolved": "https://registry.npmjs.org/@types/websocket/-/websocket-0.0.38.tgz", "integrity": "sha512-Z7dRTAiMoIjz9HBa/xb3k+2mx2uJx2sbnbkRRIvM+l/srNLfthHFBW/jD59thOcEa1/ZooKd30G0D+KGH9wU7Q==", "requires": { "@types/events": "*", @@ -241,6 +241,14 @@ "resolved": "https://registry.npmjs.org/JSONSelect/-/JSONSelect-0.4.0.tgz", "integrity": "sha1-oI7cxn6z/L6Z7WMIVTRKDPKCu40=" }, + "abort-controller": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz", + "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==", + "requires": { + "event-target-shim": "^5.0.0" + } + }, "accepts": { "version": "1.3.5", "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.5.tgz", @@ -250,15 +258,23 @@ "negotiator": "0.6.1" } }, + "agent-base": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-4.2.1.tgz", + "integrity": "sha512-JVwXMr9nHYTUXsBFKUqhJwvlcYU/blreOEUkhNR2eXZIvwd+c+o5V4MgDPKWnMS/56awN3TRzIP+KoPn+roQtg==", + "requires": { + "es6-promisify": "^5.0.0" + } + }, "ajv": { - "version": "5.5.2", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-5.5.2.tgz", - "integrity": "sha1-c7Xuyj+rZT49P5Qis0GtQiBdyWU=", + "version": "6.10.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.10.0.tgz", + "integrity": "sha512-nffhOpkymDECQyR0mnsUtoCE8RlX38G0rYP+wgLWFyZuUyuuojSSvi/+euOiQBIn63whYwYVIIH1TvE3tu4OEg==", "requires": { - "co": "^4.6.0", - "fast-deep-equal": "^1.0.0", + "fast-deep-equal": "^2.0.1", "fast-json-stable-stringify": "^2.0.0", - "json-schema-traverse": "^0.3.0" + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" } }, "argparse": { @@ -305,9 +321,12 @@ "integrity": "sha1-iYUI2iIm84DfkEcoRWhJwVAaSw0=" }, "asn1": { - "version": "0.2.3", - "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.3.tgz", - "integrity": "sha1-2sh4dxPJlmhJ/IGAd36+nB3fO4Y=" + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.4.tgz", + "integrity": "sha512-jxwzQpLQjSmWXgwaCZE9Nz+glAG01yF1QnWgbhGwHI5A6FRIEY6IVqtHhIepHqI7/kyEyQEagBC5mBEFlIYvdg==", + "requires": { + "safer-buffer": "~2.1.0" + } }, "assert-plus": { "version": "1.0.0", @@ -320,11 +339,11 @@ "integrity": "sha1-WWZ/QfrdTyDMvCu5a41Pf3jsA2c=" }, "async": { - "version": "2.6.1", - "resolved": "https://registry.npmjs.org/async/-/async-2.6.1.tgz", - "integrity": "sha512-fNEiL2+AZt6AlAw/29Cr0UDe4sRAHCpEHh54WMz+Bb7QfNcFw4h3loofyJpLeQs4Yx7yuqu/2dLgM5hKOs6HlQ==", + "version": "2.6.2", + "resolved": "https://registry.npmjs.org/async/-/async-2.6.2.tgz", + "integrity": "sha512-H1qVYh1MYhEEFLsP97cVKqCGo7KfCyTt6uEWqsTBr9SO84oK9Uwbyd/yCW+6rKJLHksBNUVWZDAjfS+Ccx0Bbg==", "requires": { - "lodash": "^4.17.10" + "lodash": "^4.17.11" } }, "asynckit": { @@ -333,9 +352,9 @@ "integrity": "sha1-x57Zf380y48robyXkLzDZkdLS3k=" }, "atob": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/atob/-/atob-2.1.1.tgz", - "integrity": "sha1-ri1acpR38onWDdf5amMUoi3Wwio=" + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/atob/-/atob-2.1.2.tgz", + "integrity": "sha512-Wm6ukoaOGJi/73p/cl2GvLjTI5JM1k/O14isD73YML8StrH/7/lRFgmg8nICZgD3bZZvjwCGxtMOD3wWNAu8cg==" }, "aws-sign2": { "version": "0.7.0", @@ -343,9 +362,9 @@ "integrity": "sha1-tG6JCTSpWR8tL2+G1+ap8bP+dqg=" }, "aws4": { - "version": "1.7.0", - "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.7.0.tgz", - "integrity": "sha512-32NDda82rhwD9/JBCCkB+MRYDp0oSvlo2IL6rQWA10PQi7tDUM3eqMSltXmY+Oyl/7N3P3qNtAlv7X0d9bI28w==" + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.8.0.tgz", + "integrity": "sha512-ReZxvNHIOv88FlT7rxcXIIC0fPt4KZqZbOlivyWtXLt8ESx84zd3kMC6iK5jVeS2qt+g7ftS7ye4fi06X5rtRQ==" }, "axios": { "version": "0.18.0", @@ -417,10 +436,9 @@ "integrity": "sha1-eAqZyE59YAJgNhURxId2E78k9rs=" }, "bcrypt-pbkdf": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.1.tgz", - "integrity": "sha1-Y7xdy2EzG5K8Bf1SiVPDNGKgb40=", - "optional": true, + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz", + "integrity": "sha1-pDAdOJtqQ/m2f/PKEaP2Y342Dp4=", "requires": { "tweetnacl": "^0.14.3" } @@ -436,25 +454,25 @@ } }, "bluebird": { - "version": "3.5.3", - "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.5.3.tgz", - "integrity": "sha512-/qKPUQlaW1OyR51WeCPBvRnAlnZFUJkCSG5HzGnuIqhgyJtF+T94lFnn33eiazjRm2LAHVy2guNnaq48X9SJuw==" + "version": "3.5.4", + "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.5.4.tgz", + "integrity": "sha512-FG+nFEZChJrbQ9tIccIfZJBz3J7mLrAhxakAbnrJWn8d7aKOC+LWifa0G+p4ZqKp4y13T7juYvdhq9NzKdsrjw==" }, "body-parser": { - "version": "1.18.2", - "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.18.2.tgz", - "integrity": "sha1-h2eKGdhLR9hZuDGZvVm84iKxBFQ=", + "version": "1.18.3", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.18.3.tgz", + "integrity": "sha1-WykhmP/dVTs6DyDe0FkrlWlVyLQ=", "requires": { "bytes": "3.0.0", "content-type": "~1.0.4", "debug": "2.6.9", - "depd": "~1.1.1", - "http-errors": "~1.6.2", - "iconv-lite": "0.4.19", + "depd": "~1.1.2", + "http-errors": "~1.6.3", + "iconv-lite": "0.4.23", "on-finished": "~2.3.0", - "qs": "6.5.1", - "raw-body": "2.3.2", - "type-is": "~1.6.15" + "qs": "6.5.2", + "raw-body": "2.3.3", + "type-is": "~1.6.16" }, "dependencies": { "debug": { @@ -465,10 +483,10 @@ "ms": "2.0.0" } }, - "qs": { - "version": "6.5.1", - "resolved": "https://registry.npmjs.org/qs/-/qs-6.5.1.tgz", - "integrity": "sha512-eRzhrN1WSINYCDCbrz796z37LOe3m5tmW7RQf6oBntukAG1nmovJvhnwHHRMAfeoItc1m2Hk02WER2aQ/iqs+A==" + "ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" } } }, @@ -514,9 +532,9 @@ "integrity": "sha1-+OcRMvf/5uAaXJaXpMbz5I1cyBk=" }, "buffer-from": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.0.tgz", - "integrity": "sha512-c5mRlguI/Pe2dSZmpER62rSCu0ryKmWddzRYsuXc50U2/g8jMOulc31VZMa4mYx31U5xsmSOpDCgH88Vl9cDGQ==" + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz", + "integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==" }, "byline": { "version": "5.0.0", @@ -545,9 +563,9 @@ } }, "capture-stack-trace": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/capture-stack-trace/-/capture-stack-trace-1.0.0.tgz", - "integrity": "sha1-Sm+gc5nCa7pH8LJJa00PtAjFVQ0=" + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/capture-stack-trace/-/capture-stack-trace-1.0.1.tgz", + "integrity": "sha512-mYQLZnx5Qt1JgB1WEiMCf2647plpGeQ2NMR/5L0HNZzGQo4fuSPnK+wjfPnKZV0aiJDgzmWqqkV/g7JD+DW0qw==" }, "caseless": { "version": "0.12.0", @@ -555,9 +573,9 @@ "integrity": "sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw=" }, "chownr": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.0.1.tgz", - "integrity": "sha1-4qdQQqlVGQi+vSW4Uj1fl2nXkYE=" + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.1.tgz", + "integrity": "sha512-j38EvO5+LHX84jlo6h4UzmOwi0UgW61WRyPtJz4qaadK5eY3BTS5TY/S1Stc3Uk2lIM6TPevAlULiEJwie860g==" }, "cjson": { "version": "0.2.1", @@ -585,11 +603,6 @@ } } }, - "co": { - "version": "4.6.0", - "resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz", - "integrity": "sha1-bqa989hTrlTMuOR7+gvz+QMfsYQ=" - }, "collection-visit": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/collection-visit/-/collection-visit-1.0.0.tgz", @@ -601,35 +614,28 @@ }, "colors": { "version": "0.5.1", - "resolved": "http://registry.npmjs.org/colors/-/colors-0.5.1.tgz", + "resolved": "https://registry.npmjs.org/colors/-/colors-0.5.1.tgz", "integrity": "sha1-fQAj6usVTo7p/Oddy5I9DtFmd3Q=" }, "combined-stream": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.6.tgz", - "integrity": "sha1-cj599ugBrFYTETp+RFqbactjKBg=", + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.7.tgz", + "integrity": "sha512-brWl9y6vOB1xYPZcpZde3N9zDByXTosAeMDo4p1wzo6UMOX4vumB+TP1RZ76sfE6Md68Q0NJSrE/gbezd4Ul+w==", "requires": { "delayed-stream": "~1.0.0" } }, "component-emitter": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/component-emitter/-/component-emitter-1.2.1.tgz", - "integrity": "sha1-E3kY1teCg/ffemt8WmPhQOaUJeY=" + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/component-emitter/-/component-emitter-1.3.0.tgz", + "integrity": "sha512-Rd3se6QB+sO1TwqZjscQrurpEPIfO0/yYnSin6Q/rD3mOutHvUrCAhJub3r90uNb+SESBuE0QYoB90YdfatsRg==" }, "compressible": { - "version": "2.0.14", - "resolved": "https://registry.npmjs.org/compressible/-/compressible-2.0.14.tgz", - "integrity": "sha1-MmxfUH+7BV9UEWeCuWmoG2einac=", + "version": "2.0.16", + "resolved": "https://registry.npmjs.org/compressible/-/compressible-2.0.16.tgz", + "integrity": "sha512-JQfEOdnI7dASwCuSPWIeVYwc/zMsu/+tRhoUvEfXz2gxOA2DNjmG5vhtFdBlhWPPGo+RdT9S3tgc/uH5qgDiiA==", "requires": { - "mime-db": ">= 1.34.0 < 2" - }, - "dependencies": { - "mime-db": { - "version": "1.34.0", - "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.34.0.tgz", - "integrity": "sha1-RS0Oz/XDA0am3B5kseruDTcZ/5o=" - } + "mime-db": ">= 1.38.0 < 2" } }, "concat-map": { @@ -713,11 +719,11 @@ } }, "debug": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/debug/-/debug-3.1.0.tgz", - "integrity": "sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==", + "version": "3.2.6", + "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.6.tgz", + "integrity": "sha512-mel+jf7nrtEl5Pn1Qx46zARXKDpBbvzezse7p7LqINmdoIk8PYP5SySaxEmYv6TZ0JyEKA1hsCId6DIhgITtWQ==", "requires": { - "ms": "2.0.0" + "ms": "^2.1.1" } }, "decode-uri-component": { @@ -796,9 +802,9 @@ } }, "duplexify": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/duplexify/-/duplexify-3.6.0.tgz", - "integrity": "sha512-fO3Di4tBKJpYTFHAxTU00BcfWMY9w24r/x21a6rZRbsD/ToUgGxsMbiGRmB7uVAXeGKXD9MwiLZa5E97EVgIRQ==", + "version": "3.7.1", + "resolved": "https://registry.npmjs.org/duplexify/-/duplexify-3.7.1.tgz", + "integrity": "sha512-07z8uv2wMyS51kKhD1KsdXJg5WQ6t93RneqRxUHnskXVtlYYkLqM0gqStQZ3pj073g687jPCHrqNfCzawLYh5g==", "requires": { "end-of-stream": "^1.0.0", "inherits": "^2.0.1", @@ -812,18 +818,18 @@ "integrity": "sha1-zR9rpHfFY4xAyX7ZtXLbW6tdgzE=" }, "ecc-jsbn": { - "version": "0.1.1", - "resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.1.tgz", - "integrity": "sha1-D8c6ntXw1Tw4GTOYUj735UN3dQU=", - "optional": true, + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz", + "integrity": "sha1-OoOpBOVDUyh4dMVkt1SThoSamMk=", "requires": { - "jsbn": "~0.1.0" + "jsbn": "~0.1.0", + "safer-buffer": "^2.1.0" } }, "ecdsa-sig-formatter": { - "version": "1.0.10", - "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.10.tgz", - "integrity": "sha1-HFlQAPBKiJffuFAAiSoPTDOvhsM=", + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", + "integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==", "requires": { "safe-buffer": "^5.0.1" } @@ -856,6 +862,19 @@ "resolved": "https://registry.npmjs.org/es6-error/-/es6-error-2.1.1.tgz", "integrity": "sha1-kThDAexe0cmnJH0RKCRyFvA1R80=" }, + "es6-promise": { + "version": "4.2.6", + "resolved": "https://registry.npmjs.org/es6-promise/-/es6-promise-4.2.6.tgz", + "integrity": "sha512-aRVgGdnmW2OiySVPUC9e6m+plolMAJKjZnQlCwNSuK5yQ0JN61DZSO1X1Ufd1foqWRAlig0rhduTCHe7sVtK5Q==" + }, + "es6-promisify": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/es6-promisify/-/es6-promisify-5.0.0.tgz", + "integrity": "sha1-UQnWLz5W6pZ8S2NQWu8IKRyKUgM=", + "requires": { + "es6-promise": "^4.0.3" + } + }, "escape-html": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", @@ -898,6 +917,11 @@ "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", "integrity": "sha1-Qa4u62XvpiJorr/qg6x9eSmbCIc=" }, + "event-target-shim": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", + "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==" + }, "eventemitter3": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-3.1.0.tgz", @@ -940,17 +964,22 @@ "requires": { "is-extendable": "^0.1.0" } + }, + "ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" } } }, "express": { - "version": "4.16.3", - "resolved": "https://registry.npmjs.org/express/-/express-4.16.3.tgz", - "integrity": "sha1-avilAjUNsyRuzEvs9rWjTSL37VM=", + "version": "4.16.4", + "resolved": "https://registry.npmjs.org/express/-/express-4.16.4.tgz", + "integrity": "sha512-j12Uuyb4FMrd/qQAm6uCHAkPtO8FDTRJZBDd5D2KOL2eLaz1yUNdUB/NOIyq0iU4q4cFarsUCrnFDPBcnksuOg==", "requires": { "accepts": "~1.3.5", "array-flatten": "1.1.1", - "body-parser": "1.18.2", + "body-parser": "1.18.3", "content-disposition": "0.5.2", "content-type": "~1.0.4", "cookie": "0.3.1", @@ -967,10 +996,10 @@ "on-finished": "~2.3.0", "parseurl": "~1.3.2", "path-to-regexp": "0.1.7", - "proxy-addr": "~2.0.3", - "qs": "6.5.1", + "proxy-addr": "~2.0.4", + "qs": "6.5.2", "range-parser": "~1.2.0", - "safe-buffer": "5.1.1", + "safe-buffer": "5.1.2", "send": "0.16.2", "serve-static": "1.13.2", "setprototypeof": "1.1.0", @@ -988,22 +1017,17 @@ "ms": "2.0.0" } }, - "qs": { - "version": "6.5.1", - "resolved": "https://registry.npmjs.org/qs/-/qs-6.5.1.tgz", - "integrity": "sha512-eRzhrN1WSINYCDCbrz796z37LOe3m5tmW7RQf6oBntukAG1nmovJvhnwHHRMAfeoItc1m2Hk02WER2aQ/iqs+A==" - }, - "safe-buffer": { - "version": "5.1.1", - "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.1.tgz", - "integrity": "sha512-kKvNJn6Mm93gAczWVJg7wH+wGYWNrDHdWvpUmHyEsgCtIwwo3bqPtV4tR5tuPaUhTOo/kvhVwd8XwwOllGYkbg==" + "ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" } } }, "extend": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.1.tgz", - "integrity": "sha1-p1Xqe8Gt/MWjHOfnYtuq3F5jZEQ=" + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==" }, "extend-shallow": { "version": "3.0.2", @@ -1089,9 +1113,9 @@ "integrity": "sha1-lpGEQOMEGnpBT4xS48V06zw+HgU=" }, "fast-deep-equal": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-1.1.0.tgz", - "integrity": "sha1-wFNHeBfIa1HaqFPIHgWbcz0CNhQ=" + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz", + "integrity": "sha1-ewUhjd+WZ79/Nwv3/bLLFf3Qqkk=" }, "fast-json-stable-stringify": { "version": "2.0.0", @@ -1145,15 +1169,20 @@ "requires": { "ms": "2.0.0" } + }, + "ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" } } }, "follow-redirects": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.5.0.tgz", - "integrity": "sha512-fdrt472/9qQ6Kgjvb935ig6vJCuofpBUD14f9Vb+SLlm7xIe4Qva5gey8EKtv8lp7ahE1wilg3xL1znpVGtZIA==", + "version": "1.7.0", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.7.0.tgz", + "integrity": "sha512-m/pZQy4Gj287eNy94nivy5wchN3Kp+Q5WgUPNy5lJSZ3sgkVKSYV/ZChMAQVIgx1SqfZ2zBZtPA2YlXIWxxJOQ==", "requires": { - "debug": "^3.1.0" + "debug": "^3.2.6" } }, "for-in": { @@ -1167,12 +1196,12 @@ "integrity": "sha1-+8cfDEGt6zf5bFd60e1C2P2sypE=" }, "form-data": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/form-data/-/form-data-2.3.2.tgz", - "integrity": "sha1-SXBJi+YEwgwAXU9cI67NIda0kJk=", + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-2.3.3.tgz", + "integrity": "sha512-1lLKB2Mu3aGP1Q/2eCOx0fNbRMe7XdwktwOruhfqqd0rIJWwN4Dh+E3hrPSlDCXnSR7UtZ1N38rVXm+6+MEhJQ==", "requires": { "asynckit": "^0.4.0", - "combined-stream": "1.0.6", + "combined-stream": "^1.0.6", "mime-types": "^2.1.12" } }, @@ -1207,6 +1236,17 @@ "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=" }, + "gaxios": { + "version": "1.8.4", + "resolved": "https://registry.npmjs.org/gaxios/-/gaxios-1.8.4.tgz", + "integrity": "sha512-BoENMnu1Gav18HcpV9IleMPZ9exM+AvUjrAOV4Mzs/vfz2Lu/ABv451iEXByKiMPn2M140uul1txXCg83sAENw==", + "requires": { + "abort-controller": "^3.0.0", + "extend": "^3.0.2", + "https-proxy-agent": "^2.2.1", + "node-fetch": "^2.3.0" + } + }, "gcp-metadata": { "version": "0.6.3", "resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-0.6.3.tgz", @@ -1281,29 +1321,29 @@ } }, "google-p12-pem": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/google-p12-pem/-/google-p12-pem-1.0.2.tgz", - "integrity": "sha512-+EuKr4CLlGsnXx4XIJIVkcKYrsa2xkAmCvxRhX2HsazJzUBAJ35wARGeApHUn4nNfPD03Vl057FskNr20VaCyg==", + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/google-p12-pem/-/google-p12-pem-1.0.4.tgz", + "integrity": "sha512-SwLAUJqUfTB2iS+wFfSS/G9p7bt4eWcc2LyfvmUXe7cWp6p3mpxDo6LLI29MXdU6wvPcQ/up298X7GMC5ylAlA==", "requires": { - "node-forge": "^0.7.4", - "pify": "^3.0.0" + "node-forge": "^0.8.0", + "pify": "^4.0.0" } }, "graceful-fs": { - "version": "4.1.11", - "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.1.11.tgz", - "integrity": "sha1-Dovf5NHduIVNZOBOp8AOKgJuVlg=" + "version": "4.1.15", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.1.15.tgz", + "integrity": "sha512-6uHUhOPEBgQ24HM+r6b/QwWfZq+yiFcipKFrOFiBEnWdy5sdzYoi+pJeQaPI5qOLRFqWmAXUPQNsielzdLoecA==" }, "gtoken": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/gtoken/-/gtoken-2.3.0.tgz", - "integrity": "sha512-Jc9/8mV630cZE9FC5tIlJCZNdUjwunvlwOtCz6IDlaiB4Sz68ki29a1+q97sWTnTYroiuF9B135rod9zrQdHLw==", + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/gtoken/-/gtoken-2.3.3.tgz", + "integrity": "sha512-EaB49bu/TCoNeQjhCYKI/CurooBKkGxIqFHsWABW0b25fobBYVTMe84A8EBVVZhl8emiUdNypil9huMOTmyAnw==", "requires": { - "axios": "^0.18.0", + "gaxios": "^1.0.4", "google-p12-pem": "^1.0.0", - "jws": "^3.1.4", + "jws": "^3.1.5", "mime": "^2.2.0", - "pify": "^3.0.0" + "pify": "^4.0.0" } }, "har-schema": { @@ -1312,11 +1352,11 @@ "integrity": "sha1-qUwiJOvKwEeCoNkDVSHyRzW37JI=" }, "har-validator": { - "version": "5.0.3", - "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.0.3.tgz", - "integrity": "sha1-ukAsJmGU8VlW7xXg/PJCmT9qff0=", + "version": "5.1.3", + "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.1.3.tgz", + "integrity": "sha512-sNvOCzEQNr/qrvJgc3UG/kD4QtlHycrzwS+6mfTrrSq97BvaYcPZZI1ZSqGSPR73Cxn4LKTD4PttRwfU7jWq5g==", "requires": { - "ajv": "^5.1.0", + "ajv": "^6.5.5", "har-schema": "^2.0.0" } }, @@ -1399,10 +1439,22 @@ "sshpk": "^1.7.0" } }, + "https-proxy-agent": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-2.2.1.tgz", + "integrity": "sha512-HPCTS1LW51bcyMYbxUIOO4HEOlQ1/1qRaFWcyxvwaqUS9TY88aoEuHUY33kuAh1YhVVaDQhLZsnPd+XNARWZlQ==", + "requires": { + "agent-base": "^4.1.0", + "debug": "^3.1.0" + } + }, "iconv-lite": { - "version": "0.4.19", - "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.19.tgz", - "integrity": "sha512-oTZqweIP51xaGPI4uPa56/Pri/480R+mo7SeU+YETByQNhDG55ycFyNLIgta9vXhILrxXDmF7ZGhqZIcuN0gJQ==" + "version": "0.4.23", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.23.tgz", + "integrity": "sha512-neyTUVFtahjf0mB3dZT77u+8O0QB89jFdnBkd5P1JgYPbPaia3gXXOVL2fq8VyU2gMMD7SaN7QukTB/pmXYvDA==", + "requires": { + "safer-buffer": ">= 2.1.2 < 3" + } }, "imurmurhash": { "version": "0.1.4", @@ -1424,19 +1476,19 @@ "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=" }, "interpret": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/interpret/-/interpret-1.1.0.tgz", - "integrity": "sha1-ftGxQQxqDg94z5XTuEQMY/eLhhQ=" + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/interpret/-/interpret-1.2.0.tgz", + "integrity": "sha512-mT34yGKMNceBQUoVn7iCDKDntA7SC6gycMAWzGx1z/CMCTV7b2AAtXlo3nRyHZ1FelRkQbQjprHSYGwzLtkVbw==" }, "ipaddr.js": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.6.0.tgz", - "integrity": "sha1-4/o1e3c9phnybpXwSdBVxyeW+Gs=" + "version": "1.9.0", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.0.tgz", + "integrity": "sha512-M4Sjn6N/+O6/IXSJseKqHoFc+5FdGJ22sXqnjTpdZweHK64MzEPAyQZyEU3R/KRv2GLoa7nNtg/C2Ev6m7z+eA==" }, "is": { - "version": "3.2.1", - "resolved": "https://registry.npmjs.org/is/-/is-3.2.1.tgz", - "integrity": "sha1-0Kwq1V63sL7JJqUmb2xmKqqD3KU=" + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/is/-/is-3.3.0.tgz", + "integrity": "sha512-nW24QBoPcFGGHJGUwnfpI7Yc5CdqWNdsyHQszVE/z2pKHXzh7FZ5GWhJqSyaQ9wMkQnsTx+kAI8bHlCX4tKdbg==" }, "is-accessor-descriptor": { "version": "0.1.6", @@ -1507,9 +1559,9 @@ "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=" }, "is-glob": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.0.tgz", - "integrity": "sha1-lSHHaEXMJhCoUgPd8ICpWML/q8A=", + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.1.tgz", + "integrity": "sha512-5G0tKtBTFImOqDnLB2hG6Bp2qcKEFduo4tZu9MT/H6NQv/ghhy30o55ufafxJ/LdH79LLs2Kfrn85TLKyA7BUg==", "requires": { "is-extglob": "^2.1.1" } @@ -1537,21 +1589,6 @@ "resolved": "https://registry.npmjs.org/is-obj/-/is-obj-1.0.1.tgz", "integrity": "sha1-PkcprB9f3gJc19g6iW2rn09n2w8=" }, - "is-odd": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/is-odd/-/is-odd-2.0.0.tgz", - "integrity": "sha512-OTiixgpZAT1M4NHgS5IguFp/Vz2VI3U7Goh4/HA1adtwyLtSBrxYlcSYkhpAE07s4fKEcjrFxyvtQBND4vFQyQ==", - "requires": { - "is-number": "^4.0.0" - }, - "dependencies": { - "is-number": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-4.0.0.tgz", - "integrity": "sha512-rSklcAIlf1OmFdyAqbnWTLVelsQ58uvZ66S/ZyawjWqIviTWCjg2PzVGw8WUA+nNuPTqb4wgA+NszrJ+08LlgQ==" - } - } - }, "is-plain-object": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-2.0.4.tgz", @@ -1622,9 +1659,9 @@ } }, "js-yaml": { - "version": "3.12.0", - "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.12.0.tgz", - "integrity": "sha512-PIt2cnwmPfL4hKNwqeiuz4bKfnzHTBv6HyVgjahA6mPLwPDzjDWrplJBMjHUFxku/N3FlmrbyPclad+I+4mJ3A==", + "version": "3.13.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.13.1.tgz", + "integrity": "sha512-YfbcO7jXDdyj0DGxYVSlSeQNHbD7XPWvrVWeVUujrQEoZzWJIRrCPoyk6kL6IAjAG2IolMK4T0hNUe0HOUs5Jw==", "requires": { "argparse": "^1.0.7", "esprima": "^4.0.0" @@ -1633,8 +1670,7 @@ "jsbn": { "version": "0.1.1", "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", - "integrity": "sha1-peZUwuWi3rXyAdls77yoDA7y9RM=", - "optional": true + "integrity": "sha1-peZUwuWi3rXyAdls77yoDA7y9RM=" }, "json-schema": { "version": "0.2.3", @@ -1642,9 +1678,9 @@ "integrity": "sha1-tIDIkuWaLwWVTOcnvT8qTogvnhM=" }, "json-schema-traverse": { - "version": "0.3.1", - "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.3.1.tgz", - "integrity": "sha1-NJptRMU6Ud6JtAgFxdXlm0F9M0A=" + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==" }, "json-stream": { "version": "1.0.0", @@ -1657,13 +1693,13 @@ "integrity": "sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus=" }, "jsonpath": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/jsonpath/-/jsonpath-1.0.0.tgz", - "integrity": "sha1-Rc2dTE0NaCXZC9fkD4PxGCsT3Qc=", + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/jsonpath/-/jsonpath-1.0.1.tgz", + "integrity": "sha512-HY5kSg82LHIs0r0h9gYBwpNc1w1qGY0qJ7al01W1bJltsN2lp+mjjA/a79gXWuvD6Xf8oPkD2d5uKMZQXTGzqA==", "requires": { "esprima": "1.2.2", "jison": "0.4.13", - "static-eval": "2.0.0", + "static-eval": "2.0.2", "underscore": "1.7.0" }, "dependencies": { @@ -1691,21 +1727,21 @@ } }, "jwa": { - "version": "1.1.6", - "resolved": "https://registry.npmjs.org/jwa/-/jwa-1.1.6.tgz", - "integrity": "sha512-tBO/cf++BUsJkYql/kBbJroKOgHWEigTKBAjjBEmrMGYd1QMBC74Hr4Wo2zCZw6ZrVhlJPvoMrkcOnlWR/DJfw==", + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/jwa/-/jwa-1.4.1.tgz", + "integrity": "sha512-qiLX/xhEEFKUAJ6FiBMbes3w9ATzyk5W7Hvzpa/SLYdxNtng+gcurvrI7TbACjIXlsJyr05/S1oUhZrc63evQA==", "requires": { "buffer-equal-constant-time": "1.0.1", - "ecdsa-sig-formatter": "1.0.10", + "ecdsa-sig-formatter": "1.0.11", "safe-buffer": "^5.0.1" } }, "jws": { - "version": "3.1.5", - "resolved": "https://registry.npmjs.org/jws/-/jws-3.1.5.tgz", - "integrity": "sha512-GsCSexFADNQUr8T5HPJvayTjvPIfoyJPtLQBwn5a4WZQchcrPMPMAWcC1AzJVRDKyD6ZPROPAxgv6rfHViO4uQ==", + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/jws/-/jws-3.2.2.tgz", + "integrity": "sha512-YHlZCB6lMTllWDtSPHz/ZXTsi8S00usEV6v1tjq8tOUZzw7DpSDWVXjXDre6ed1w/pd495ODpHZYSdkRTsa0HA==", "requires": { - "jwa": "^1.1.5", + "jwa": "^1.4.1", "safe-buffer": "^5.0.1" } }, @@ -1744,9 +1780,9 @@ "integrity": "sha512-U7KCmLdqsGHBLeWqYlFA0V0Sl6P08EE1ZrmA9cxjUE0WVqT9qnyVDPz1kzpFEP0jdJuFnasWIfSd7fsaNXkpbg==" }, "lru-cache": { - "version": "4.1.3", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.1.3.tgz", - "integrity": "sha512-fFEhvcgzuIoJVUF8fYr5KR0YqxD238zgObTps31YdADwPPAp82a4M8TrckkWyx7ekNlf9aBcVn81cFwwXngrJA==", + "version": "4.1.5", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.1.5.tgz", + "integrity": "sha512-sWZlbEP2OsHNkXrMl5GYk/jKk70MBng6UU4YI/qGDYbgf6YbP4EvmqISbXCoJiRKs+1bSpFHVgQxvJ17F2li5g==", "requires": { "pseudomap": "^1.0.2", "yallist": "^2.1.2" @@ -1758,6 +1794,13 @@ "integrity": "sha512-2w31R7SJtieJJnQtGc7RVL2StM2vGYVfqUOvUDxH6bC6aJTxPxTF0GnIgCyu7tjockiUWAYQRbxa7vKn34s5sQ==", "requires": { "pify": "^3.0.0" + }, + "dependencies": { + "pify": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pify/-/pify-3.0.0.tgz", + "integrity": "sha1-5aSs0sEB/fPZpNB/DbxNtJ3SgXY=" + } } }, "map-cache": { @@ -1814,21 +1857,21 @@ } }, "mime": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/mime/-/mime-2.3.1.tgz", - "integrity": "sha512-OEUllcVoydBHGN1z84yfQDimn58pZNNNXgZlHXSboxMlFvgI6MXSWpWKpFRra7H1HxpVhHTkrghfRW49k6yjeg==" + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/mime/-/mime-2.4.2.tgz", + "integrity": "sha512-zJBfZDkwRu+j3Pdd2aHsR5GfH2jIWhmL1ZzBoc+X+3JEti2hbArWcyJ+1laC1D2/U/W1a/+Cegj0/OnEU2ybjg==" }, "mime-db": { - "version": "1.33.0", - "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.33.0.tgz", - "integrity": "sha512-BHJ/EKruNIqJf/QahvxwQZXKygOQ256myeN/Ew+THcAa5q+PjyTTMMeNQC4DZw5AwfvelsUrA6B67NKMqXDbzQ==" + "version": "1.40.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.40.0.tgz", + "integrity": "sha512-jYdeOMPy9vnxEqFRRo6ZvTZ8d9oPb+k18PKoYNYUe2stVEBPPwsln/qWzdbmaIvnhZ9v2P+CuecK+fpUfsV2mA==" }, "mime-types": { - "version": "2.1.18", - "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.18.tgz", - "integrity": "sha512-lc/aahn+t4/SWV/qcmumYjymLsWfN3ELhpmVuUFjgsORruuZPVSwAQryq+HHGvO/SI2KVX26bx+En+zhM8g8hQ==", + "version": "2.1.24", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.24.tgz", + "integrity": "sha512-WaFHS3MCl5fapm3oLxU4eYDw77IQM2ACcxQ9RIxfaC3ooc6PFuBMGZZsYpvoXS5D5QTWPieo1jjLdAm3TBP3cQ==", "requires": { - "mime-db": "~1.33.0" + "mime-db": "1.40.0" } }, "minimatch": { @@ -1845,9 +1888,9 @@ "integrity": "sha1-hX/Kv8M5fSYluCKCYuhqp6ARsF0=" }, "minio": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/minio/-/minio-7.0.0.tgz", - "integrity": "sha512-UpOs2+vpHC2ppicw9x3VVRJcJGhcYZwraLJeYXbvc2HQkSoY9Bws1+yEt/5s0dfMk8/gVyWVaTXIJwrcLpzB0g==", + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/minio/-/minio-7.0.6.tgz", + "integrity": "sha512-rGVY6wB17SJD9VP95eD+zk3Lu7+zyg6zhgou2MbO+c8pg4amciH/hdx0ysPvny5byG4WeF0y+VyeZVvzxxyuHA==", "requires": { "async": "^1.5.2", "block-stream2": "^1.0.0", @@ -1902,25 +1945,25 @@ } }, "minipass": { - "version": "2.3.4", - "resolved": "https://registry.npmjs.org/minipass/-/minipass-2.3.4.tgz", - "integrity": "sha512-mlouk1OHlaUE8Odt1drMtG1bAJA4ZA6B/ehysgV0LUIrDHdKgo1KorZq3pK0b/7Z7LJIQ12MNM6aC+Tn6lUZ5w==", + "version": "2.3.5", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-2.3.5.tgz", + "integrity": "sha512-Gi1W4k059gyRbyVUZQ4mEqLm0YIUiGYfvxhF6SIlk3ui1WVxMTGfGdQ2SInh3PDrRTVvPKgULkpJtT4RH10+VA==", "requires": { "safe-buffer": "^5.1.2", "yallist": "^3.0.0" }, "dependencies": { "yallist": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.0.2.tgz", - "integrity": "sha1-hFK0u36Dx8GI2AQcGoN8dz1ti7k=" + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.0.3.tgz", + "integrity": "sha512-S+Zk8DEWE6oKpV+vI3qWkaK+jSbIK86pCwe2IF/xwIpQ8jEuxpw9NyaGjmp9+BoJv5FV2piqCDcoCtStppiq2A==" } } }, "minizlib": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-1.1.0.tgz", - "integrity": "sha512-4T6Ur/GctZ27nHfpt9THOdRZNgyJ9FZchYO1ceg5S8Q3DNLCKYy44nCZzgCJgcvx2UM8czmqak5BCxJMrq37lA==", + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-1.2.1.tgz", + "integrity": "sha512-7+4oTUOWKg7AuL3vloEWekXY2/D20cevzsrNT2kGWm+39J9hGTCBv8VI5Pm5lXZ/o3/mdR4f8rflAPhnQb8mPA==", "requires": { "minipass": "^2.2.1" } @@ -1958,26 +2001,25 @@ "integrity": "sha512-9DITV2YEMcw7XojdfvGl3gDD8J9QjZTJ7ZOUuSAkP+F3T6rDbzMJuPktxptsdHYEvZcmXrCD3LMOhdSAEq6zKA==" }, "ms": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", - "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz", + "integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg==" }, "nan": { - "version": "2.11.1", - "resolved": "https://registry.npmjs.org/nan/-/nan-2.11.1.tgz", - "integrity": "sha512-iji6k87OSXa0CcrLl9z+ZiYSuR2o+c0bGuNmXdrhTQTakxytAFsC56SArGYoiHlJlFoHSnvmhpceZJaXkVuOtA==" + "version": "2.13.2", + "resolved": "https://registry.npmjs.org/nan/-/nan-2.13.2.tgz", + "integrity": "sha512-TghvYc72wlMGMVMluVo9WRJc0mB8KxxF/gZ4YYFy7V2ZQX9l7rgbPg7vjS9mt6U5HXODVFVI2bOduCzwOMv/lw==" }, "nanomatch": { - "version": "1.2.9", - "resolved": "https://registry.npmjs.org/nanomatch/-/nanomatch-1.2.9.tgz", - "integrity": "sha512-n8R9bS8yQ6eSXaV6jHUpKzD8gLsin02w1HSFiegwrs9E098Ylhw5jdyKPaYqvHknHaSCKTPp7C8dGCQ0q9koXA==", + "version": "1.2.13", + "resolved": "https://registry.npmjs.org/nanomatch/-/nanomatch-1.2.13.tgz", + "integrity": "sha512-fpoe2T0RbHwBTBUOftAfBPaDEi06ufaUai0mE6Yn1kacc3SnTErfb/h+X94VXzI64rKFHYImXSvdwGGCmwOqCA==", "requires": { "arr-diff": "^4.0.0", "array-unique": "^0.3.2", "define-property": "^2.0.2", "extend-shallow": "^3.0.2", "fragment-cache": "^0.2.1", - "is-odd": "^2.0.0", "is-windows": "^1.0.2", "kind-of": "^6.0.2", "object.pick": "^1.3.0", @@ -1992,14 +2034,14 @@ "integrity": "sha1-KzJxhOiZIQEXeyhWP7XnECrNDKk=" }, "node-fetch": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.1.2.tgz", - "integrity": "sha1-q4hOjn5X44qUR1POxwb3iNF2i7U=" + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.3.0.tgz", + "integrity": "sha512-MOd8pV3fxENbryESLgVIeaGKrdl+uaYhCSSVkjeOb/31/njTpcis5aWfdqgNlHIrKOLRbMnfPINPOML2CIFeXA==" }, "node-forge": { - "version": "0.7.5", - "resolved": "https://registry.npmjs.org/node-forge/-/node-forge-0.7.5.tgz", - "integrity": "sha512-MmbQJ2MTESTjt3Gi/3yG1wGpIMhUfcIypUCGtTizFR9IiccFwxSpfp0vtIZlkFclEqERemxfnSdZEMR9VqqEFQ==" + "version": "0.8.2", + "resolved": "https://registry.npmjs.org/node-forge/-/node-forge-0.8.2.tgz", + "integrity": "sha512-mXQ9GBq1N3uDCyV1pdSzgIguwgtVpM7f5/5J4ipz12PKWElmPpVWLDuWl8iXmhysr21+WmX/OJ5UKx82wjomgg==" }, "nomnom": { "version": "1.5.2", @@ -2018,9 +2060,9 @@ } }, "oauth-sign": { - "version": "0.8.2", - "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.8.2.tgz", - "integrity": "sha1-Rqarfwrq2N6unsBWV4C31O/rnUM=" + "version": "0.9.0", + "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.9.0.tgz", + "integrity": "sha512-fexhUFFPTGV8ybAtSIGbV6gOkSv8UtRbDBnAyLQw4QPKkgNlsH2ByPGtMUqdWkos6YCRmAqViwgZrJc/mRDzZQ==" }, "object-copy": { "version": "0.1.0", @@ -2096,9 +2138,9 @@ } }, "parseurl": { - "version": "1.3.2", - "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.2.tgz", - "integrity": "sha1-/CidTtiZMRlGDBViUyYs3I3mW/M=" + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", + "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==" }, "pascalcase": { "version": "0.1.1", @@ -2107,7 +2149,7 @@ }, "path-is-absolute": { "version": "1.0.1", - "resolved": "http://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=" }, "path-parse": { @@ -2126,9 +2168,9 @@ "integrity": "sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns=" }, "pify": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/pify/-/pify-3.0.0.tgz", - "integrity": "sha1-5aSs0sEB/fPZpNB/DbxNtJ3SgXY=" + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/pify/-/pify-4.0.1.tgz", + "integrity": "sha512-uB80kBFb/tfd68bVleG9T5GGsGPjJrLAUpR5PZIrhBnIaRTQRjqdJSsIKkOP6OAIFbj7GOrcudc5pNjZ+geV2g==" }, "posix-character-classes": { "version": "0.1.1", @@ -2146,12 +2188,12 @@ "integrity": "sha512-MtEC1TqN0EU5nephaJ4rAtThHtC86dNN9qCuEhtshvpVBkAW5ZO7BASN9REnF9eoXGcRub+pFuKEpOHE+HbEMw==" }, "proxy-addr": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.3.tgz", - "integrity": "sha512-jQTChiCJteusULxjBp8+jftSQE5Obdl3k4cnmLA6WXtK6XFuWRnvVL7aCiBqaLPM8c4ph0S4tKna8XvmIwEnXQ==", + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.5.tgz", + "integrity": "sha512-t/7RxHXPH6cJtP0pRG6smSr9QJidhB+3kXu0KgXnbGYMgzEnUxRQ4/LDdfOwZEMyIh3/xHb8PX3t+lfL9z+YVQ==", "requires": { "forwarded": "~0.1.2", - "ipaddr.js": "1.6.0" + "ipaddr.js": "1.9.0" } }, "pseudomap": { @@ -2159,6 +2201,11 @@ "resolved": "https://registry.npmjs.org/pseudomap/-/pseudomap-1.0.2.tgz", "integrity": "sha1-8FKijacOYYkX7wqKw0wa5aaChrM=" }, + "psl": { + "version": "1.1.31", + "resolved": "https://registry.npmjs.org/psl/-/psl-1.1.31.tgz", + "integrity": "sha512-/6pt4+C+T+wZUieKR620OpzN/LlnNKuWjy1iFLQ/UG35JqHlR/89MP1d96dUfkf6Dne3TuLQzOYEYshJ+Hx8mw==" + }, "pump": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/pump/-/pump-2.0.1.tgz", @@ -2179,9 +2226,9 @@ } }, "punycode": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz", - "integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4=" + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz", + "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==" }, "qs": { "version": "6.5.2", @@ -2199,37 +2246,14 @@ "integrity": "sha1-9JvmtIeJTdxA3MlKMi9hEJLgDV4=" }, "raw-body": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.3.2.tgz", - "integrity": "sha1-vNYMd9Prk83gBQKVw/N5OJvIj4k=", + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.3.3.tgz", + "integrity": "sha512-9esiElv1BrZoI3rCDuOuKCBRbuApGGaDPQfjSflGxdy4oyzqghxu6klEkkVIvBje+FF0BX9coEv8KqW6X/7njw==", "requires": { "bytes": "3.0.0", - "http-errors": "1.6.2", - "iconv-lite": "0.4.19", + "http-errors": "1.6.3", + "iconv-lite": "0.4.23", "unpipe": "1.0.0" - }, - "dependencies": { - "depd": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/depd/-/depd-1.1.1.tgz", - "integrity": "sha1-V4O04cRZ8G+lyif5kfPQbnoxA1k=" - }, - "http-errors": { - "version": "1.6.2", - "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.6.2.tgz", - "integrity": "sha1-CgAsyFcHGSp+eUbO7cERVfYOxzY=", - "requires": { - "depd": "1.1.1", - "inherits": "2.0.3", - "setprototypeof": "1.0.3", - "statuses": ">= 1.3.1 < 2" - } - }, - "setprototypeof": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.0.3.tgz", - "integrity": "sha1-ZlZ+NwQ+608E2RvWWMDL77VbjgQ=" - } } }, "readable-stream": { @@ -2264,9 +2288,9 @@ } }, "repeat-element": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/repeat-element/-/repeat-element-1.1.2.tgz", - "integrity": "sha1-7wiaF40Ug7quTZPrmLT55OEdmQo=" + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/repeat-element/-/repeat-element-1.1.3.tgz", + "integrity": "sha512-ahGq0ZnV5m5XtZLMb+vP76kcAM5nkLqk0lpqAuojSKGgQtn4eRi4ZZGm2olo2zKFH+sMsWaqOCW1dqAnOru72g==" }, "repeat-string": { "version": "1.6.1", @@ -2274,30 +2298,30 @@ "integrity": "sha1-jcrkcOHIirwtYA//Sndihtp15jc=" }, "request": { - "version": "2.87.0", - "resolved": "https://registry.npmjs.org/request/-/request-2.87.0.tgz", - "integrity": "sha512-fcogkm7Az5bsS6Sl0sibkbhcKsnyon/jV1kF3ajGmF0c8HrttdKTPRT9hieOaQHA5HEq6r8OyWOo/o781C1tNw==", + "version": "2.88.0", + "resolved": "https://registry.npmjs.org/request/-/request-2.88.0.tgz", + "integrity": "sha512-NAqBSrijGLZdM0WZNsInLJpkJokL72XYjUpnB0iwsRgxh7dB6COrHnTBNwN0E+lHDAJzu7kLAkDeY08z2/A0hg==", "requires": { "aws-sign2": "~0.7.0", - "aws4": "^1.6.0", + "aws4": "^1.8.0", "caseless": "~0.12.0", - "combined-stream": "~1.0.5", - "extend": "~3.0.1", + "combined-stream": "~1.0.6", + "extend": "~3.0.2", "forever-agent": "~0.6.1", - "form-data": "~2.3.1", - "har-validator": "~5.0.3", + "form-data": "~2.3.2", + "har-validator": "~5.1.0", "http-signature": "~1.2.0", "is-typedarray": "~1.0.0", "isstream": "~0.1.2", "json-stringify-safe": "~5.0.1", - "mime-types": "~2.1.17", - "oauth-sign": "~0.8.2", + "mime-types": "~2.1.19", + "oauth-sign": "~0.9.0", "performance-now": "^2.1.0", - "qs": "~6.5.1", - "safe-buffer": "^5.1.1", - "tough-cookie": "~2.3.3", + "qs": "~6.5.2", + "safe-buffer": "^5.1.2", + "tough-cookie": "~2.4.3", "tunnel-agent": "^0.6.0", - "uuid": "^3.1.0" + "uuid": "^3.3.2" } }, "requires-port": { @@ -2306,11 +2330,11 @@ "integrity": "sha1-kl0mAdOaxIXgkc8NpcbmlNw9yv8=" }, "resolve": { - "version": "1.8.1", - "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.8.1.tgz", - "integrity": "sha512-AicPrAC7Qu1JxPCZ9ZgCZlY35QgFnNqc+0LtbRNxnVw4TXvjQ72wnuL9JQcEBgXkI9JM8MsT9kaQoHcpCRJOYA==", + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.10.0.tgz", + "integrity": "sha512-3sUr9aq5OfSg2S9pNtPA9hL1FVEAjvfOC4leW0SNf/mpnaakz2a9femSd6LqAww2RaFctwyf1lCqnTHuF1rxDg==", "requires": { - "path-parse": "^1.0.5" + "path-parse": "^1.0.6" } }, "resolve-url": { @@ -2392,6 +2416,11 @@ "version": "1.4.1", "resolved": "https://registry.npmjs.org/mime/-/mime-1.4.1.tgz", "integrity": "sha512-KI1+qOZu5DcW6wayYHSzR/tXKCDC5Om4s1z2QJjDULzLcmf3DvzS7oluY4HCTrc+9FiKmWUgeNLg7W3uIQvxtQ==" + }, + "ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" } } }, @@ -2491,6 +2520,11 @@ "is-extendable": "^0.1.0" } }, + "ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" + }, "source-map": { "version": "0.5.7", "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.5.7.tgz", @@ -2608,9 +2642,9 @@ "integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw=" }, "sshpk": { - "version": "1.14.2", - "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.14.2.tgz", - "integrity": "sha1-xvxhZIo9nE52T9P8306hBeSSupg=", + "version": "1.16.1", + "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.16.1.tgz", + "integrity": "sha512-HXXqVUq7+pcKeLqqZj6mHFUMvXtOJt1uoUx09pFW6011inTMxqI8BA8PM95myrIyyKwdnzjdFjLiE6KBPVtJIg==", "requires": { "asn1": "~0.2.3", "assert-plus": "^1.0.0", @@ -2624,17 +2658,17 @@ } }, "static-eval": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/static-eval/-/static-eval-2.0.0.tgz", - "integrity": "sha512-6flshd3F1Gwm+Ksxq463LtFd1liC77N/PX1FVVc3OzL3hAmo2fwHFbuArkcfi7s9rTNsLEhcRmXGFZhlgy40uw==", + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/static-eval/-/static-eval-2.0.2.tgz", + "integrity": "sha512-N/D219Hcr2bPjLxPiV+TQE++Tsmrady7TqAJugLy7Xk1EumfDWS/f5dtBbkRCGE7wKKXuYockQoj8Rm2/pVKyg==", "requires": { "escodegen": "^1.8.1" }, "dependencies": { "escodegen": { - "version": "1.11.0", - "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-1.11.0.tgz", - "integrity": "sha512-IeMV45ReixHS53K/OmfKAIztN/igDHzTJUhZM3k1jMhIZWjk45SMwAtBsEXiJp3vSPmTcu6CXn7mDvFHRN66fw==", + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-1.11.1.tgz", + "integrity": "sha512-JwiqFD9KdGVVpeuRa68yU3zZnBEOcPs0nKW7wZzXky8Z7tffdYUHbe11bPCV5jYlK6DVdKLWLm0f5I/QlL0Kmw==", "requires": { "esprima": "^3.1.3", "estraverse": "^4.2.0", @@ -2686,9 +2720,9 @@ "integrity": "sha512-zhSCtt8v2NDrRlPQpCNtw/heZLtfUDqxBM1udqikb/Hbk52LK4nQSwr10u77iopCW5LsyHpuXS0GnEc48mLeew==" }, "stream-events": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/stream-events/-/stream-events-1.0.4.tgz", - "integrity": "sha512-D243NJaYs/xBN2QnoiMDY7IesJFIK7gEhnvAYqJa5JvDdnh2dC4qDBwlCf0ohPpX2QRlA/4gnbnPd3rs3KxVcA==", + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/stream-events/-/stream-events-1.0.5.tgz", + "integrity": "sha512-E1GUzBSgvct8Jsb3v2X15pjzN1tYebtbLaMg+eBOUOAxgbLoSbT2NS91ckc5lJD1KfLjId+jXJRgo0qnV5Nerg==", "requires": { "stubs": "^3.0.0" } @@ -2717,32 +2751,32 @@ "integrity": "sha1-6NK6H6nJBXAwPAMLaQD31fiavls=" }, "tar": { - "version": "4.4.6", - "resolved": "https://registry.npmjs.org/tar/-/tar-4.4.6.tgz", - "integrity": "sha512-tMkTnh9EdzxyfW+6GK6fCahagXsnYk6kE6S9Gr9pjVdys769+laCTbodXDhPAjzVtEBazRgP0gYqOjnk9dQzLg==", + "version": "4.4.8", + "resolved": "https://registry.npmjs.org/tar/-/tar-4.4.8.tgz", + "integrity": "sha512-LzHF64s5chPQQS0IYBn9IN5h3i98c12bo4NCO7e0sGM2llXQ3p2FGC5sdENN4cTW48O915Sh+x+EXx7XW96xYQ==", "requires": { - "chownr": "^1.0.1", + "chownr": "^1.1.1", "fs-minipass": "^1.2.5", - "minipass": "^2.3.3", - "minizlib": "^1.1.0", + "minipass": "^2.3.4", + "minizlib": "^1.1.1", "mkdirp": "^0.5.0", "safe-buffer": "^5.1.2", "yallist": "^3.0.2" }, "dependencies": { "yallist": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.0.2.tgz", - "integrity": "sha1-hFK0u36Dx8GI2AQcGoN8dz1ti7k=" + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.0.3.tgz", + "integrity": "sha512-S+Zk8DEWE6oKpV+vI3qWkaK+jSbIK86pCwe2IF/xwIpQ8jEuxpw9NyaGjmp9+BoJv5FV2piqCDcoCtStppiq2A==" } } }, "through2": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/through2/-/through2-2.0.3.tgz", - "integrity": "sha1-AARWmzfHx0ujnEPzzteNGtlBQL4=", + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/through2/-/through2-2.0.5.tgz", + "integrity": "sha512-/mrRod8xqpA+IHSLyGCQ2s8SPHiCDEeQJSep1jqLYeEUClOFG2Qsh+4FU6G9VeqpZnGW/Su8LQGc4YKni5rYSQ==", "requires": { - "readable-stream": "^2.1.5", + "readable-stream": "~2.3.6", "xtend": "~4.0.1" } }, @@ -2785,11 +2819,19 @@ } }, "tough-cookie": { - "version": "2.3.4", - "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.3.4.tgz", - "integrity": "sha512-TZ6TTfI5NtZnuyy/Kecv+CnoROnyXn2DN97LontgQpCwsX2XyLYCC0ENhYkehSOwAp8rTQKc/NUIF7BkQ5rKLA==", + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.4.3.tgz", + "integrity": "sha512-Q5srk/4vDM54WJsJio3XNn6K2sCG+CQ8G5Wz6bZhRZoAe/+TxjWB/GlFAnYEbkYVlON9FMk/fE3h2RLpPXo4lQ==", "requires": { + "psl": "^1.1.24", "punycode": "^1.4.1" + }, + "dependencies": { + "punycode": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz", + "integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4=" + } } }, "tunnel-agent": { @@ -2803,8 +2845,7 @@ "tweetnacl": { "version": "0.14.5", "resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz", - "integrity": "sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q=", - "optional": true + "integrity": "sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q=" }, "type-check": { "version": "0.3.2", @@ -2928,18 +2969,23 @@ } } }, + "uri-js": { + "version": "4.2.2", + "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.2.2.tgz", + "integrity": "sha512-KY9Frmirql91X2Qgjry0Wd4Y+YTdrdZheS8TFwvkbLWf/G5KNJDCh6pKL5OZctEW4+0Baa5idK2ZQuELRwPznQ==", + "requires": { + "punycode": "^2.1.0" + } + }, "urix": { "version": "0.1.0", "resolved": "https://registry.npmjs.org/urix/-/urix-0.1.0.tgz", "integrity": "sha1-2pN/emLiH+wf0Y1Js1wpNQZ6bHI=" }, "use": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/use/-/use-3.1.0.tgz", - "integrity": "sha512-6UJEQM/L+mzC3ZJNM56Q4DFGLX/evKGRg15UJHGB9X5j5Z3AFbgZvjUh2yq/UJUY4U5dh7Fal++XbNg1uzpRAw==", - "requires": { - "kind-of": "^6.0.2" - } + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/use/-/use-3.1.1.tgz", + "integrity": "sha512-cwESVXlO3url9YWlFW/TA9cshCEhtu7IKJ/p5soJ/gGpj7vbvFrAY/eIioQ6Dw23KjZhYgiIo8HOs1nQ2vr/oQ==" }, "util-deprecate": { "version": "1.0.2", @@ -2952,9 +2998,9 @@ "integrity": "sha1-n5VxD1CiZ5R7LMwSR0HBAoQn5xM=" }, "uuid": { - "version": "3.2.1", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.2.1.tgz", - "integrity": "sha512-jZnMwlb9Iku/O3smGWvZhauCf6cvvpKi4BKRiliS3cxnI+Gz9j5MEpTz2UFuXiKPJocb7gnsLHwiS05ige5BEA==" + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.3.2.tgz", + "integrity": "sha512-yXJmeNaw3DnnKAOKJE51sL/ZaYfWJRl1pK9dr19YFCu0ObS231AB1/LbqTKRAQ5kw8A90rA6fr4riOUpTZvQZA==" }, "vary": { "version": "1.1.2", @@ -2989,6 +3035,11 @@ "requires": { "ms": "2.0.0" } + }, + "ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" } } }, @@ -3003,9 +3054,9 @@ "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=" }, "write-file-atomic": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-2.3.0.tgz", - "integrity": "sha512-xuPeK4OdjWqtfi59ylvVL0Yn35SF3zgcAcv7rBPFHVaEapaDr4GdGgm3j7ckTwH9wHL7fGmgfAnb0+THrHb8tA==", + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-2.4.2.tgz", + "integrity": "sha512-s0b6vB3xIVRLWywa6X9TOMA7k9zio0TMOsl9ZnDkliA/cfJlpHXAscj0gbHVJiTdIuAYpIyqS5GW91fqm6gG5g==", "requires": { "graceful-fs": "^4.1.11", "imurmurhash": "^0.1.4", diff --git a/frontend/server/server.ts b/frontend/server/server.ts index 941267268ce..7f68538a4fa 100644 --- a/frontend/server/server.ts +++ b/frontend/server/server.ts @@ -115,7 +115,7 @@ const artifactsHandler = async (req, res) => { // of the pattern until the first wildcard, then we create a regular // expression out of the pattern, escaping all non-wildcard characters, // and we use it to match all enumerated paths. - const storage = Storage(); + const storage = new Storage(); const prefix = key.indexOf('*') > -1 ? key.substr(0, key.indexOf('*')) : key; const files = await storage.bucket(bucket).getFiles({ prefix }); const matchingFiles = files[0].filter((f) => { diff --git a/test/frontend-integration-test/package-lock.json b/test/frontend-integration-test/package-lock.json index 30599158a3b..7a15ec31114 100644 --- a/test/frontend-integration-test/package-lock.json +++ b/test/frontend-integration-test/package-lock.json @@ -5,25 +5,20 @@ "requires": true, "dependencies": { "ajv": { - "version": "5.5.2", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-5.5.2.tgz", - "integrity": "sha1-c7Xuyj+rZT49P5Qis0GtQiBdyWU=", + "version": "6.10.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.10.0.tgz", + "integrity": "sha512-nffhOpkymDECQyR0mnsUtoCE8RlX38G0rYP+wgLWFyZuUyuuojSSvi/+euOiQBIn63whYwYVIIH1TvE3tu4OEg==", "requires": { - "co": "^4.6.0", - "fast-deep-equal": "^1.0.0", + "fast-deep-equal": "^2.0.1", "fast-json-stable-stringify": "^2.0.0", - "json-schema-traverse": "^0.3.0" + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" } }, - "amdefine": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/amdefine/-/amdefine-1.0.1.tgz", - "integrity": "sha1-SlKCrBZHKek2Gbz9OtFR+BfOkfU=" - }, "ansi-escapes": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-3.1.0.tgz", - "integrity": "sha512-UgAb8H9D41AQnu/PbWlCofQVcnV4Gs2bBJi9eZPxfU/hgglFh3SMDMENRIqdr7H6XFnXdoknctFByVsCOotTVw==" + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-3.2.0.tgz", + "integrity": "sha512-cBhpre4ma+U0T1oM5fXg7Dy1Jw7zzwv7lt/GoCpr+hDQJoYnKVPLL4dCvSEFMmQurOQvSrwT7SL/DAlhBI97RQ==" }, "ansi-regex": { "version": "2.1.1", @@ -50,12 +45,41 @@ "zip-stream": "^1.2.0" }, "dependencies": { - "async": { - "version": "2.6.1", - "resolved": "https://registry.npmjs.org/async/-/async-2.6.1.tgz", - "integrity": "sha512-fNEiL2+AZt6AlAw/29Cr0UDe4sRAHCpEHh54WMz+Bb7QfNcFw4h3loofyJpLeQs4Yx7yuqu/2dLgM5hKOs6HlQ==", + "bl": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/bl/-/bl-1.2.2.tgz", + "integrity": "sha512-e8tQYnZodmebYDWGH7KMRvtzKXaJHx3BbilrgZCfvyLUYdKpK1t5PSPmpkny/SgiTSCnjfLW7v5rlONXVFkQEA==", + "requires": { + "readable-stream": "^2.3.5", + "safe-buffer": "^5.1.1" + } + }, + "readable-stream": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz", + "integrity": "sha512-tQtKA9WIAhBF3+VLAseyMqZeBjW0AHJoxOtYqSUZNJxauErmLbVm2FW1y+J/YA9dUrAC39ITejlZWhVIwawkKw==", "requires": { - "lodash": "^4.17.10" + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, + "tar-stream": { + "version": "1.6.2", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-1.6.2.tgz", + "integrity": "sha512-rzS0heiNf8Xn7/mpdSVVSMAWAoy9bfb1WOTYC78Z0UQKeKa/CWS8FOq0lKGNa8DWKAn9gxjCvMLYc5PGXYlK2A==", + "requires": { + "bl": "^1.0.0", + "buffer-alloc": "^1.2.0", + "end-of-stream": "^1.0.0", + "fs-constants": "^1.0.0", + "readable-stream": "^2.3.0", + "to-buffer": "^1.1.1", + "xtend": "^4.0.0" } } } @@ -71,27 +95,54 @@ "lodash": "^4.8.0", "normalize-path": "^2.0.0", "readable-stream": "^2.0.0" + }, + "dependencies": { + "readable-stream": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz", + "integrity": "sha512-tQtKA9WIAhBF3+VLAseyMqZeBjW0AHJoxOtYqSUZNJxauErmLbVm2FW1y+J/YA9dUrAC39ITejlZWhVIwawkKw==", + "requires": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + } } }, "asn1": { - "version": "0.2.3", - "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.3.tgz", - "integrity": "sha1-2sh4dxPJlmhJ/IGAd36+nB3fO4Y=" + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.4.tgz", + "integrity": "sha512-jxwzQpLQjSmWXgwaCZE9Nz+glAG01yF1QnWgbhGwHI5A6FRIEY6IVqtHhIepHqI7/kyEyQEagBC5mBEFlIYvdg==", + "requires": { + "safer-buffer": "~2.1.0" + } }, "assert-plus": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-1.0.0.tgz", "integrity": "sha1-8S4PPF13sLHN2RRpQuTpbB5N1SU=" }, + "async": { + "version": "2.6.2", + "resolved": "https://registry.npmjs.org/async/-/async-2.6.2.tgz", + "integrity": "sha512-H1qVYh1MYhEEFLsP97cVKqCGo7KfCyTt6uEWqsTBr9SO84oK9Uwbyd/yCW+6rKJLHksBNUVWZDAjfS+Ccx0Bbg==", + "requires": { + "lodash": "^4.17.11" + } + }, "asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", "integrity": "sha1-x57Zf380y48robyXkLzDZkdLS3k=" }, "atob": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/atob/-/atob-2.1.1.tgz", - "integrity": "sha1-ri1acpR38onWDdf5amMUoi3Wwio=" + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/atob/-/atob-2.1.2.tgz", + "integrity": "sha512-Wm6ukoaOGJi/73p/cl2GvLjTI5JM1k/O14isD73YML8StrH/7/lRFgmg8nICZgD3bZZvjwCGxtMOD3wWNAu8cg==" }, "aws-sign2": { "version": "0.7.0", @@ -99,9 +150,9 @@ "integrity": "sha1-tG6JCTSpWR8tL2+G1+ap8bP+dqg=" }, "aws4": { - "version": "1.7.0", - "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.7.0.tgz", - "integrity": "sha512-32NDda82rhwD9/JBCCkB+MRYDp0oSvlo2IL6rQWA10PQi7tDUM3eqMSltXmY+Oyl/7N3P3qNtAlv7X0d9bI28w==" + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.8.0.tgz", + "integrity": "sha512-ReZxvNHIOv88FlT7rxcXIIC0fPt4KZqZbOlivyWtXLt8ESx84zd3kMC6iK5jVeS2qt+g7ftS7ye4fi06X5rtRQ==" }, "babel-runtime": { "version": "6.26.0", @@ -126,18 +177,33 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz", "integrity": "sha1-pDAdOJtqQ/m2f/PKEaP2Y342Dp4=", - "optional": true, "requires": { "tweetnacl": "^0.14.3" } }, "bl": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/bl/-/bl-1.2.2.tgz", - "integrity": "sha512-e8tQYnZodmebYDWGH7KMRvtzKXaJHx3BbilrgZCfvyLUYdKpK1t5PSPmpkny/SgiTSCnjfLW7v5rlONXVFkQEA==", + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/bl/-/bl-2.2.0.tgz", + "integrity": "sha512-wbgvOpqopSr7uq6fJrLH8EsvYMJf9gzfo2jCsL2eTy75qXPukA4pCgHamOQkZtY5vmfVtjB+P3LNlMHW5CEZXA==", "requires": { "readable-stream": "^2.3.5", "safe-buffer": "^5.1.1" + }, + "dependencies": { + "readable-stream": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz", + "integrity": "sha512-tQtKA9WIAhBF3+VLAseyMqZeBjW0AHJoxOtYqSUZNJxauErmLbVm2FW1y+J/YA9dUrAC39ITejlZWhVIwawkKw==", + "requires": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + } } }, "brace-expansion": { @@ -155,9 +221,9 @@ "integrity": "sha512-qhAVI1+Av2X7qelOfAIYwXONood6XlZE/fXaBSmW/T5SzLAmCgzi+eiWE7fUvbHaeNBQH13UftjpXxsfLkMpgw==" }, "buffer": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.1.0.tgz", - "integrity": "sha512-YkIRgwsZwJWTnyQrsBTWefizHh+8GYj3kbL1BTiAQ/9pwpino0G7B2gp5tx/FUBqUlvtxV85KNR3mwfAtv15Yw==", + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.2.1.tgz", + "integrity": "sha512-c+Ko0loDaFfuPWiL02ls9Xd3GO3cPVmUobQ6t3rXNUk304u6hGq+8N/kFi+QEIKhzK3uwolVhLzszmfLmMLnqg==", "requires": { "base64-js": "^1.0.2", "ieee754": "^1.1.4" @@ -202,6 +268,13 @@ "has-ansi": "^2.0.0", "strip-ansi": "^3.0.0", "supports-color": "^2.0.0" + }, + "dependencies": { + "supports-color": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-2.0.0.tgz", + "integrity": "sha1-U10EXOa2Nj+kARcIRimZXp3zJMc=" + } } }, "chardet": { @@ -222,36 +295,31 @@ "resolved": "https://registry.npmjs.org/cli-width/-/cli-width-2.2.0.tgz", "integrity": "sha1-/xnt6Kml5XkyQUewwR8PvLq+1jk=" }, - "co": { - "version": "4.6.0", - "resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz", - "integrity": "sha1-bqa989hTrlTMuOR7+gvz+QMfsYQ=" - }, "color-convert": { - "version": "1.9.2", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.2.tgz", - "integrity": "sha512-3NUJZdhMhcdPn8vJ9v2UQJoH0qqoGUkYTgFEPZaPjEtwmmKUfNV46zZmgB2M5M4DCEQHMaCfWHCxiBflLm04Tg==", + "version": "1.9.3", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", + "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", "requires": { - "color-name": "1.1.1" + "color-name": "1.1.3" } }, "color-name": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.1.tgz", - "integrity": "sha1-SxQVMEz1ACjqgWQ2Q72C6gWANok=" + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", + "integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=" }, "combined-stream": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.6.tgz", - "integrity": "sha1-cj599ugBrFYTETp+RFqbactjKBg=", + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.7.tgz", + "integrity": "sha512-brWl9y6vOB1xYPZcpZde3N9zDByXTosAeMDo4p1wzo6UMOX4vumB+TP1RZ76sfE6Md68Q0NJSrE/gbezd4Ul+w==", "requires": { "delayed-stream": "~1.0.0" } }, "commander": { - "version": "2.16.0", - "resolved": "https://registry.npmjs.org/commander/-/commander-2.16.0.tgz", - "integrity": "sha512-sVXqklSaotK9at437sFlFpyOcJonxe0yST/AG9DkQKUdIE6IqGIMv4SfAQSKaJbSdVEJYItASCrBiVQHq1HQew==" + "version": "2.15.1", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.15.1.tgz", + "integrity": "sha512-VlfT9F3V0v+jr4yxPc5gg9s62/fIVWsd2Bk2iD435um1NlGMYdVCq+MjcXnhYq2icNOizHr1kK+5TI6H0Hy0ag==" }, "compress-commons": { "version": "1.2.2", @@ -262,6 +330,22 @@ "crc32-stream": "^2.0.0", "normalize-path": "^2.0.0", "readable-stream": "^2.0.0" + }, + "dependencies": { + "readable-stream": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz", + "integrity": "sha512-tQtKA9WIAhBF3+VLAseyMqZeBjW0AHJoxOtYqSUZNJxauErmLbVm2FW1y+J/YA9dUrAC39ITejlZWhVIwawkKw==", + "requires": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + } } }, "concat-map": { @@ -270,9 +354,9 @@ "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=" }, "core-js": { - "version": "2.5.7", - "resolved": "https://registry.npmjs.org/core-js/-/core-js-2.5.7.tgz", - "integrity": "sha512-RszJCAxg/PP6uzXVXL6BsxSXx/B05oJAQ2vkJRjyjrEcNVycaqOmNb5OTxZPE3xa5gwZduqza6L9JOCenh/Ecw==" + "version": "2.6.5", + "resolved": "https://registry.npmjs.org/core-js/-/core-js-2.6.5.tgz", + "integrity": "sha512-klh/kDpwX8hryYL14M9w/xei6vrv6sE8gTHDG7/T/+SEovB/G4ejwcfE/CBzO6Edsu+OETZMZ3wcX/EjUkrl5A==" }, "core-util-is": { "version": "1.0.2", @@ -280,9 +364,9 @@ "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=" }, "crc": { - "version": "3.7.0", - "resolved": "https://registry.npmjs.org/crc/-/crc-3.7.0.tgz", - "integrity": "sha512-ZwmUex488OBjSVOMxnR/dIa1yxisBMJNEi+UxzXpKhax8MPsQtoRQtl5Qgo+W7pcSVkRXa3BEVjaniaWKtvKvw==", + "version": "3.8.0", + "resolved": "https://registry.npmjs.org/crc/-/crc-3.8.0.tgz", + "integrity": "sha512-iX3mfgcTMIq3ZKLIsVFAbv7+Mc10kxabAGQb8HvjA1o3T1PIYprbakQ65d3I+2HGHt6nSKkM9PYjgoJO2KcFBQ==", "requires": { "buffer": "^5.1.0" } @@ -294,16 +378,44 @@ "requires": { "crc": "^3.4.4", "readable-stream": "^2.0.0" + }, + "dependencies": { + "readable-stream": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz", + "integrity": "sha512-tQtKA9WIAhBF3+VLAseyMqZeBjW0AHJoxOtYqSUZNJxauErmLbVm2FW1y+J/YA9dUrAC39ITejlZWhVIwawkKw==", + "requires": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + } + } + }, + "cross-spawn": { + "version": "6.0.5", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-6.0.5.tgz", + "integrity": "sha512-eTVLrBSt7fjbDygz805pMnstIs2VTBNkRm0qxZd+M7A5XDdxVRWO5MxGBXZhjY4cqLYLdtrGqRf8mBPmzwSpWQ==", + "requires": { + "nice-try": "^1.0.4", + "path-key": "^2.0.1", + "semver": "^5.5.0", + "shebang-command": "^1.2.0", + "which": "^1.2.9" } }, "css": { - "version": "2.2.3", - "resolved": "https://registry.npmjs.org/css/-/css-2.2.3.tgz", - "integrity": "sha512-0W171WccAjQGGTKLhw4m2nnl0zPHUlTO/I8td4XzJgIB8Hg3ZZx71qT4G4eX8OVsSiaAKiUMy73E3nsbPlg2DQ==", + "version": "2.2.4", + "resolved": "https://registry.npmjs.org/css/-/css-2.2.4.tgz", + "integrity": "sha512-oUnjmWpy0niI3x/mPL8dVEI1l7MnG3+HHyRPHf+YFSbK+svOhXpmSOcDURUh2aOCgl2grzrOPt1nHLuCVFULLw==", "requires": { - "inherits": "^2.0.1", - "source-map": "^0.1.38", - "source-map-resolve": "^0.5.1", + "inherits": "^2.0.3", + "source-map": "^0.6.1", + "source-map-resolve": "^0.5.2", "urix": "^0.1.0" } }, @@ -334,9 +446,9 @@ "integrity": "sha1-+v1Ej3IRXvHitzkVWukvK+bCjdE=" }, "debug": { - "version": "2.6.9", - "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", - "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/debug/-/debug-3.1.0.tgz", + "integrity": "sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==", "requires": { "ms": "2.0.0" } @@ -346,13 +458,17 @@ "resolved": "https://registry.npmjs.org/decode-uri-component/-/decode-uri-component-0.2.0.tgz", "integrity": "sha1-6zkTMzRYd1y4TNGh+uBiEGu4dUU=" }, + "deepmerge": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-2.0.1.tgz", + "integrity": "sha512-VIPwiMJqJ13ZQfaCsIFnp5Me9tnjURiaIFxfz7EH0Ci0dTSQpZtSLrqOicXqEd/z2r+z+Klk9GzmnRsgpgbOsQ==" + }, "define-properties": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.1.2.tgz", - "integrity": "sha1-g6c/L+pWmJj7c3GTyPhzyvbUXJQ=", + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.1.3.tgz", + "integrity": "sha512-3MqfYKj2lLzdMSf8ZIZE/V+Zuy+BgD6f164e8K2w7dgnpKArBDerGYpM46IYYcjnkdPNMjPk9A6VFB8+3SKlXQ==", "requires": { - "foreach": "^2.0.5", - "object-keys": "^1.0.8" + "object-keys": "^1.0.12" } }, "delayed-stream": { @@ -360,18 +476,23 @@ "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", "integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk=" }, + "detect-libc": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-1.0.3.tgz", + "integrity": "sha1-+hN8S9aY7fVc1c0CrFWfkaTEups=" + }, "diff": { "version": "3.5.0", "resolved": "https://registry.npmjs.org/diff/-/diff-3.5.0.tgz", "integrity": "sha512-A46qtFgd+g7pDZinpnwiRJtxbC1hpgf0uzP3iG89scHk0AUC7A1TGxf5OiiOUv/JMZR8GOt8hL900hV0bOy5xA==" }, "ecc-jsbn": { - "version": "0.1.1", - "resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.1.tgz", - "integrity": "sha1-D8c6ntXw1Tw4GTOYUj735UN3dQU=", - "optional": true, + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz", + "integrity": "sha1-OoOpBOVDUyh4dMVkt1SThoSamMk=", "requires": { - "jsbn": "~0.1.0" + "jsbn": "~0.1.0", + "safer-buffer": "^2.1.0" } }, "ejs": { @@ -393,9 +514,9 @@ "integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=" }, "extend": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.1.tgz", - "integrity": "sha1-p1Xqe8Gt/MWjHOfnYtuq3F5jZEQ=" + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==" }, "external-editor": { "version": "2.2.0", @@ -413,9 +534,9 @@ "integrity": "sha1-lpGEQOMEGnpBT4xS48V06zw+HgU=" }, "fast-deep-equal": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-1.1.0.tgz", - "integrity": "sha1-wFNHeBfIa1HaqFPIHgWbcz0CNhQ=" + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz", + "integrity": "sha1-ewUhjd+WZ79/Nwv3/bLLFf3Qqkk=" }, "fast-json-stable-stringify": { "version": "2.0.0", @@ -431,9 +552,12 @@ } }, "fibers": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/fibers/-/fibers-2.0.2.tgz", - "integrity": "sha512-HfVRxhYG7C8Jl9FqtrlElMR2z/8YiLQVDKf67MLY25Ic+ILx3ecmklfT1v3u+7P5/4vEFjuxaAFXhr2/Afwk5g==" + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/fibers/-/fibers-3.1.1.tgz", + "integrity": "sha512-dl3Ukt08rHVQfY8xGD0ODwyjwrRALtaghuqGH2jByYX1wpY+nAnRQjJ6Dbqq0DnVgNVQ9yibObzbF4IlPyiwPw==", + "requires": { + "detect-libc": "^1.0.3" + } }, "figures": { "version": "2.0.0", @@ -443,23 +567,18 @@ "escape-string-regexp": "^1.0.5" } }, - "foreach": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/foreach/-/foreach-2.0.5.tgz", - "integrity": "sha1-C+4AUBiusmDQo6865ljdATbsG5k=" - }, "forever-agent": { "version": "0.6.1", "resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz", "integrity": "sha1-+8cfDEGt6zf5bFd60e1C2P2sypE=" }, "form-data": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/form-data/-/form-data-2.3.2.tgz", - "integrity": "sha1-SXBJi+YEwgwAXU9cI67NIda0kJk=", + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-2.3.3.tgz", + "integrity": "sha512-1lLKB2Mu3aGP1Q/2eCOx0fNbRMe7XdwktwOruhfqqd0rIJWwN4Dh+E3hrPSlDCXnSR7UtZ1N38rVXm+6+MEhJQ==", "requires": { "asynckit": "^0.4.0", - "combined-stream": "1.0.6", + "combined-stream": "^1.0.6", "mime-types": "^2.1.12" } }, @@ -468,6 +587,18 @@ "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==" }, + "fs-extra": { + "version": "0.30.0", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-0.30.0.tgz", + "integrity": "sha1-8jP/zAjU2n1DLapEl3aYnbHfk/A=", + "requires": { + "graceful-fs": "^4.1.2", + "jsonfile": "^2.1.0", + "klaw": "^1.0.0", + "path-is-absolute": "^1.0.0", + "rimraf": "^2.2.8" + } + }, "fs.realpath": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", @@ -518,9 +649,14 @@ } }, "graceful-fs": { - "version": "4.1.11", - "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.1.11.tgz", - "integrity": "sha1-Dovf5NHduIVNZOBOp8AOKgJuVlg=" + "version": "4.1.15", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.1.15.tgz", + "integrity": "sha512-6uHUhOPEBgQ24HM+r6b/QwWfZq+yiFcipKFrOFiBEnWdy5sdzYoi+pJeQaPI5qOLRFqWmAXUPQNsielzdLoecA==" + }, + "grapheme-splitter": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/grapheme-splitter/-/grapheme-splitter-1.0.4.tgz", + "integrity": "sha512-bzh50DW9kTPM00T8y4o8vQg89Di9oLJVLW/KaOGIXJWP/iqCN6WKYkbNOF04vFLJhwcpYUh9ydh/+5vpOqV4YQ==" }, "growl": { "version": "1.10.5", @@ -533,11 +669,11 @@ "integrity": "sha1-qUwiJOvKwEeCoNkDVSHyRzW37JI=" }, "har-validator": { - "version": "5.0.3", - "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.0.3.tgz", - "integrity": "sha1-ukAsJmGU8VlW7xXg/PJCmT9qff0=", + "version": "5.1.3", + "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.1.3.tgz", + "integrity": "sha512-sNvOCzEQNr/qrvJgc3UG/kD4QtlHycrzwS+6mfTrrSq97BvaYcPZZI1ZSqGSPR73Cxn4LKTD4PttRwfU7jWq5g==", "requires": { - "ajv": "^5.1.0", + "ajv": "^6.5.5", "har-schema": "^2.0.0" } }, @@ -575,17 +711,17 @@ } }, "iconv-lite": { - "version": "0.4.23", - "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.23.tgz", - "integrity": "sha512-neyTUVFtahjf0mB3dZT77u+8O0QB89jFdnBkd5P1JgYPbPaia3gXXOVL2fq8VyU2gMMD7SaN7QukTB/pmXYvDA==", + "version": "0.4.24", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", + "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", "requires": { "safer-buffer": ">= 2.1.2 < 3" } }, "ieee754": { - "version": "1.1.12", - "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.1.12.tgz", - "integrity": "sha512-GguP+DRY+pJ3soyIiGPTvdiVXjZ+DbXOxGpXn3eMvNW4x4irjqXm4wHKscC+TfxSJ0yw/S1F24tqdMNsMZTiLA==" + "version": "1.1.13", + "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.1.13.tgz", + "integrity": "sha512-4vf7I2LYV/HaWerSo3XmlMkp5eZ83i+/CDluXi/IGTs/O1sejBNhTtnxzmRZfvOUqj7lZjqHkeTvpgSFDlWZTg==" }, "inflight": { "version": "1.0.6", @@ -636,9 +772,9 @@ } }, "chalk": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.1.tgz", - "integrity": "sha512-ObN6h1v2fTJSmUXoS3nMQ92LbDK9be4TV+6G+omQlGJFdcUX5heKi1LZ1YnRMIgwTLEj3E24bT6tYni50rlCfQ==", + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", + "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", "requires": { "ansi-styles": "^3.2.1", "escape-string-regexp": "^1.0.5", @@ -652,14 +788,6 @@ "requires": { "ansi-regex": "^3.0.0" } - }, - "supports-color": { - "version": "5.4.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.4.0.tgz", - "integrity": "sha512-zjaXglF5nnWpsq470jSv6P9DwPvgLkuapYmfDm3JWOm0vkNTVF2tI4UrN2r6jH1qM/uc/WtxYY1hYoA2dOKj5w==", - "requires": { - "has-flag": "^3.0.0" - } } } }, @@ -696,8 +824,7 @@ "jsbn": { "version": "0.1.1", "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", - "integrity": "sha1-peZUwuWi3rXyAdls77yoDA7y9RM=", - "optional": true + "integrity": "sha1-peZUwuWi3rXyAdls77yoDA7y9RM=" }, "json-schema": { "version": "0.2.3", @@ -705,15 +832,23 @@ "integrity": "sha1-tIDIkuWaLwWVTOcnvT8qTogvnhM=" }, "json-schema-traverse": { - "version": "0.3.1", - "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.3.1.tgz", - "integrity": "sha1-NJptRMU6Ud6JtAgFxdXlm0F9M0A=" + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==" }, "json-stringify-safe": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz", "integrity": "sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus=" }, + "jsonfile": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-2.4.0.tgz", + "integrity": "sha1-NzaitCi4e72gzIO1P6PWM6NcKug=", + "requires": { + "graceful-fs": "^4.1.6" + } + }, "jsprim": { "version": "1.4.1", "resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.1.tgz", @@ -726,9 +861,9 @@ } }, "junit-report-builder": { - "version": "1.3.1", - "resolved": "https://registry.npmjs.org/junit-report-builder/-/junit-report-builder-1.3.1.tgz", - "integrity": "sha512-KTueBpPsmjfiyrAxxhKlEMwXb3aRmDHG5tRYwtRF3ujLQ7/e/5MH3b2p9ND2P84rU8z5dQq40vWJv6TtEdS16Q==", + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/junit-report-builder/-/junit-report-builder-1.3.2.tgz", + "integrity": "sha512-TPpe1hWatrBnBxiRT1M8ss6nCaaoEzZ0fFEdRkv45jVwrpZm9HAqNz1vBVfsrN4Z2PLwhIxpxPAoWfW/b5Kzpw==", "requires": { "date-format": "0.0.2", "lodash": "^4.17.10", @@ -750,6 +885,22 @@ "integrity": "sha1-9plf4PggOS9hOWvolGJAe7dxaOQ=", "requires": { "readable-stream": "^2.0.5" + }, + "dependencies": { + "readable-stream": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz", + "integrity": "sha512-tQtKA9WIAhBF3+VLAseyMqZeBjW0AHJoxOtYqSUZNJxauErmLbVm2FW1y+J/YA9dUrAC39ITejlZWhVIwawkKw==", + "requires": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + } } }, "lodash": { @@ -763,16 +914,16 @@ "integrity": "sha1-LRd/ZS+jHpObRDjVNBSZ36OCXpk=" }, "mime-db": { - "version": "1.33.0", - "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.33.0.tgz", - "integrity": "sha512-BHJ/EKruNIqJf/QahvxwQZXKygOQ256myeN/Ew+THcAa5q+PjyTTMMeNQC4DZw5AwfvelsUrA6B67NKMqXDbzQ==" + "version": "1.40.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.40.0.tgz", + "integrity": "sha512-jYdeOMPy9vnxEqFRRo6ZvTZ8d9oPb+k18PKoYNYUe2stVEBPPwsln/qWzdbmaIvnhZ9v2P+CuecK+fpUfsV2mA==" }, "mime-types": { - "version": "2.1.18", - "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.18.tgz", - "integrity": "sha512-lc/aahn+t4/SWV/qcmumYjymLsWfN3ELhpmVuUFjgsORruuZPVSwAQryq+HHGvO/SI2KVX26bx+En+zhM8g8hQ==", + "version": "2.1.24", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.24.tgz", + "integrity": "sha512-WaFHS3MCl5fapm3oLxU4eYDw77IQM2ACcxQ9RIxfaC3ooc6PFuBMGZZsYpvoXS5D5QTWPieo1jjLdAm3TBP3cQ==", "requires": { - "mime-db": "~1.33.0" + "mime-db": "1.40.0" } }, "mimic-fn": { @@ -817,29 +968,6 @@ "minimatch": "3.0.4", "mkdirp": "0.5.1", "supports-color": "5.4.0" - }, - "dependencies": { - "commander": { - "version": "2.15.1", - "resolved": "https://registry.npmjs.org/commander/-/commander-2.15.1.tgz", - "integrity": "sha512-VlfT9F3V0v+jr4yxPc5gg9s62/fIVWsd2Bk2iD435um1NlGMYdVCq+MjcXnhYq2icNOizHr1kK+5TI6H0Hy0ag==" - }, - "debug": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/debug/-/debug-3.1.0.tgz", - "integrity": "sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==", - "requires": { - "ms": "2.0.0" - } - }, - "supports-color": { - "version": "5.4.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.4.0.tgz", - "integrity": "sha512-zjaXglF5nnWpsq470jSv6P9DwPvgLkuapYmfDm3JWOm0vkNTVF2tI4UrN2r6jH1qM/uc/WtxYY1hYoA2dOKj5w==", - "requires": { - "has-flag": "^3.0.0" - } - } } }, "ms": { @@ -871,14 +999,14 @@ "integrity": "sha1-1+/jz816sAYUuJbqUxGdyaslkSU=" }, "oauth-sign": { - "version": "0.8.2", - "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.8.2.tgz", - "integrity": "sha1-Rqarfwrq2N6unsBWV4C31O/rnUM=" + "version": "0.9.0", + "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.9.0.tgz", + "integrity": "sha512-fexhUFFPTGV8ybAtSIGbV6gOkSv8UtRbDBnAyLQw4QPKkgNlsH2ByPGtMUqdWkos6YCRmAqViwgZrJc/mRDzZQ==" }, "object-keys": { - "version": "1.0.12", - "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.0.12.tgz", - "integrity": "sha512-FTMyFUm2wBcGHnH2eXmz7tC6IwlqQZ6mVZ+6dm6vZ4IQIHjs6FdNsQBuKGPuUUUY6NfJw2PshC08Tn6LzLDOag==" + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz", + "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==" }, "object.assign": { "version": "4.1.0", @@ -947,19 +1075,19 @@ "integrity": "sha512-MtEC1TqN0EU5nephaJ4rAtThHtC86dNN9qCuEhtshvpVBkAW5ZO7BASN9REnF9eoXGcRub+pFuKEpOHE+HbEMw==" }, "progress": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.0.tgz", - "integrity": "sha1-ihvjZr+Pwj2yvSPxDG/pILQ4nR8=" + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", + "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==" }, "psl": { - "version": "1.1.29", - "resolved": "https://registry.npmjs.org/psl/-/psl-1.1.29.tgz", - "integrity": "sha512-AeUmQ0oLN02flVHXWh9sSJF7mcdFq0ppid/JkErufc3hGIV/AMa8Fo9VgDo/cT2jFdOWoFvHp90qqBH54W+gjQ==" + "version": "1.1.31", + "resolved": "https://registry.npmjs.org/psl/-/psl-1.1.31.tgz", + "integrity": "sha512-/6pt4+C+T+wZUieKR620OpzN/LlnNKuWjy1iFLQ/UG35JqHlR/89MP1d96dUfkf6Dne3TuLQzOYEYshJ+Hx8mw==" }, "punycode": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz", - "integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4=" + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz", + "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==" }, "q": { "version": "1.5.1", @@ -977,17 +1105,13 @@ "integrity": "sha1-sgmEkgO7Jd+CDadW50cAWHhSFiA=" }, "readable-stream": { - "version": "2.3.6", - "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz", - "integrity": "sha512-tQtKA9WIAhBF3+VLAseyMqZeBjW0AHJoxOtYqSUZNJxauErmLbVm2FW1y+J/YA9dUrAC39ITejlZWhVIwawkKw==", - "requires": { - "core-util-is": "~1.0.0", - "inherits": "~2.0.3", - "isarray": "~1.0.0", - "process-nextick-args": "~2.0.0", - "safe-buffer": "~5.1.1", - "string_decoder": "~1.1.1", - "util-deprecate": "~1.0.1" + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.3.0.tgz", + "integrity": "sha512-EsI+s3k3XsW+fU8fQACLN59ky34AZ14LoeVZpYwmZvldCFo0r0gnelwF2TcMjLor/BTL5aDJVBMkss0dthToPw==", + "requires": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" } }, "regenerator-runtime": { @@ -1001,30 +1125,30 @@ "integrity": "sha1-wkvOKig62tW8P1jg1IJJuSN52O8=" }, "request": { - "version": "2.87.0", - "resolved": "https://registry.npmjs.org/request/-/request-2.87.0.tgz", - "integrity": "sha512-fcogkm7Az5bsS6Sl0sibkbhcKsnyon/jV1kF3ajGmF0c8HrttdKTPRT9hieOaQHA5HEq6r8OyWOo/o781C1tNw==", + "version": "2.88.0", + "resolved": "https://registry.npmjs.org/request/-/request-2.88.0.tgz", + "integrity": "sha512-NAqBSrijGLZdM0WZNsInLJpkJokL72XYjUpnB0iwsRgxh7dB6COrHnTBNwN0E+lHDAJzu7kLAkDeY08z2/A0hg==", "requires": { "aws-sign2": "~0.7.0", - "aws4": "^1.6.0", + "aws4": "^1.8.0", "caseless": "~0.12.0", - "combined-stream": "~1.0.5", - "extend": "~3.0.1", + "combined-stream": "~1.0.6", + "extend": "~3.0.2", "forever-agent": "~0.6.1", - "form-data": "~2.3.1", - "har-validator": "~5.0.3", + "form-data": "~2.3.2", + "har-validator": "~5.1.0", "http-signature": "~1.2.0", "is-typedarray": "~1.0.0", "isstream": "~0.1.2", "json-stringify-safe": "~5.0.1", - "mime-types": "~2.1.17", - "oauth-sign": "~0.8.2", + "mime-types": "~2.1.19", + "oauth-sign": "~0.9.0", "performance-now": "^2.1.0", - "qs": "~6.5.1", - "safe-buffer": "^5.1.1", - "tough-cookie": "~2.3.3", + "qs": "~6.5.2", + "safe-buffer": "^5.1.2", + "tough-cookie": "~2.4.3", "tunnel-agent": "^0.6.0", - "uuid": "^3.1.0" + "uuid": "^3.3.2" } }, "resolve-url": { @@ -1042,16 +1166,31 @@ } }, "rgb2hex": { - "version": "0.1.8", - "resolved": "https://registry.npmjs.org/rgb2hex/-/rgb2hex-0.1.8.tgz", - "integrity": "sha512-kPH3Zm3UrBIfJv17AtJJGLRxak+Hvvz6SnsTBIajqB2Zbh+A4EEjkMWKkmGhms0cJlzOOjZcu1LX5K3vnON7ug==" + "version": "0.1.9", + "resolved": "https://registry.npmjs.org/rgb2hex/-/rgb2hex-0.1.9.tgz", + "integrity": "sha512-32iuQzhOjyT+cv9aAFRBJ19JgHwzQwbjUhH3Fj2sWW2EEGAW8fpFrDFP5ndoKDxJaLO06x1hE3kyuIFrUQtybQ==" }, "rimraf": { - "version": "2.6.2", - "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.6.2.tgz", - "integrity": "sha512-lreewLK/BlghmxtfH36YYVg1i8IAce4TI7oao75I1g245+6BctqTVQiBP3YUJ9C6DQOXJmkYR9X9fCLtCOJc5w==", + "version": "2.6.3", + "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.6.3.tgz", + "integrity": "sha512-mwqeW5XsA2qAejG46gYdENaxXjx9onRNCfn7L0duuP4hCuTIi/QO7PDK07KJfp1d+izWPrzEJDcSqBa0OZQriA==", "requires": { - "glob": "^7.0.5" + "glob": "^7.1.3" + }, + "dependencies": { + "glob": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.3.tgz", + "integrity": "sha512-vcfuiIxogLV4DlGBHIUOwI0IbrJ8HWPc4MU7HzviGeNho/UJDfi6B5p3sHeWIQ0KGIU0Jpxi5ZHxemQfLkkAwQ==", + "requires": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.0.4", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + } + } } }, "run-async": { @@ -1086,142 +1225,54 @@ "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" }, "selenium-standalone": { - "version": "6.15.3", - "resolved": "https://registry.npmjs.org/selenium-standalone/-/selenium-standalone-6.15.3.tgz", - "integrity": "sha512-BFzdXRB8yYPfCRcLxpJDBLWM0akTBP/x0hB0g+8AR7N/PEvbW39dM/hq0Yp1R0hihVQTPI3KkAJpW6h/f41S4g==", - "requires": { - "async": "^2.1.4", - "commander": "^2.9.0", - "cross-spawn": "^6.0.0", - "debug": "^4.0.0", - "lodash": "^4.17.4", + "version": "6.16.0", + "resolved": "https://registry.npmjs.org/selenium-standalone/-/selenium-standalone-6.16.0.tgz", + "integrity": "sha512-tl7HFH2FOxJD1is7Pzzsl0pY4vuePSdSWiJdPn+6ETBkpeJDiuzou8hBjvWYWpD+eIVcOrmy3L0R3GzkdHLzDw==", + "requires": { + "async": "^2.6.2", + "commander": "^2.19.0", + "cross-spawn": "^6.0.5", + "debug": "^4.1.1", + "lodash": "^4.17.11", "minimist": "^1.2.0", "mkdirp": "^0.5.1", - "progress": "2.0.0", + "progress": "2.0.3", "request": "2.88.0", - "tar-stream": "1.6.1", - "urijs": "^1.18.4", - "which": "^1.2.12", - "yauzl": "^2.5.0" + "tar-stream": "2.0.0", + "urijs": "^1.19.1", + "which": "^1.3.1", + "yauzl": "^2.10.0" }, "dependencies": { - "async": { - "version": "2.6.1", - "resolved": "https://registry.npmjs.org/async/-/async-2.6.1.tgz", - "integrity": "sha512-fNEiL2+AZt6AlAw/29Cr0UDe4sRAHCpEHh54WMz+Bb7QfNcFw4h3loofyJpLeQs4Yx7yuqu/2dLgM5hKOs6HlQ==", - "requires": { - "lodash": "^4.17.10" - } - }, - "aws4": { - "version": "1.8.0", - "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.8.0.tgz", - "integrity": "sha512-ReZxvNHIOv88FlT7rxcXIIC0fPt4KZqZbOlivyWtXLt8ESx84zd3kMC6iK5jVeS2qt+g7ftS7ye4fi06X5rtRQ==" - }, - "cross-spawn": { - "version": "6.0.5", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-6.0.5.tgz", - "integrity": "sha512-eTVLrBSt7fjbDygz805pMnstIs2VTBNkRm0qxZd+M7A5XDdxVRWO5MxGBXZhjY4cqLYLdtrGqRf8mBPmzwSpWQ==", - "requires": { - "nice-try": "^1.0.4", - "path-key": "^2.0.1", - "semver": "^5.5.0", - "shebang-command": "^1.2.0", - "which": "^1.2.9" - } + "commander": { + "version": "2.20.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.0.tgz", + "integrity": "sha512-7j2y+40w61zy6YC2iRNpUe/NwhNyoXrYpHMrSunaMG64nRnaf96zO/KMQR4OyN/UnE5KLyEBnKHd4aG3rskjpQ==" }, "debug": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.1.0.tgz", - "integrity": "sha512-heNPJUJIqC+xB6ayLAMHaIrmN9HKa7aQO8MGqKpvCA+uJYVcvR6l5kgdrhRuwPFHU7P5/A1w0BjByPHwpfTDKg==", + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.1.1.tgz", + "integrity": "sha512-pYAIzeRo8J6KPEaJ0VWOh5Pzkbw/RetuzehGM7QRRX5he4fPHx2rdKMB256ehJCkX+XRQm16eZLqLNS8RSZXZw==", "requires": { "ms": "^2.1.1" } }, - "extend": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", - "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==" - }, - "har-validator": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.1.0.tgz", - "integrity": "sha512-+qnmNjI4OfH2ipQ9VQOw23bBd/ibtfbVdK2fYbY4acTDqKTW/YDp9McimZdDbG8iV9fZizUqQMD5xvriB146TA==", - "requires": { - "ajv": "^5.3.0", - "har-schema": "^2.0.0" - } - }, - "mime-db": { - "version": "1.36.0", - "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.36.0.tgz", - "integrity": "sha512-L+xvyD9MkoYMXb1jAmzI/lWYAxAMCPvIBSWur0PZ5nOf5euahRLVqH//FKW9mWp2lkqUgYiXPgkzfMUFi4zVDw==" - }, - "mime-types": { - "version": "2.1.20", - "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.20.tgz", - "integrity": "sha512-HrkrPaP9vGuWbLK1B1FfgAkbqNjIuy4eHlIYnFi7kamZyLLrGlo2mpcx0bBmNpKqBtYtAfGbodDddIgddSJC2A==", - "requires": { - "mime-db": "~1.36.0" - } - }, "minimist": { "version": "1.2.0", - "resolved": "http://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz", "integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=" }, "ms": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz", "integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg==" - }, - "oauth-sign": { - "version": "0.9.0", - "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.9.0.tgz", - "integrity": "sha512-fexhUFFPTGV8ybAtSIGbV6gOkSv8UtRbDBnAyLQw4QPKkgNlsH2ByPGtMUqdWkos6YCRmAqViwgZrJc/mRDzZQ==" - }, - "request": { - "version": "2.88.0", - "resolved": "https://registry.npmjs.org/request/-/request-2.88.0.tgz", - "integrity": "sha512-NAqBSrijGLZdM0WZNsInLJpkJokL72XYjUpnB0iwsRgxh7dB6COrHnTBNwN0E+lHDAJzu7kLAkDeY08z2/A0hg==", - "requires": { - "aws-sign2": "~0.7.0", - "aws4": "^1.8.0", - "caseless": "~0.12.0", - "combined-stream": "~1.0.6", - "extend": "~3.0.2", - "forever-agent": "~0.6.1", - "form-data": "~2.3.2", - "har-validator": "~5.1.0", - "http-signature": "~1.2.0", - "is-typedarray": "~1.0.0", - "isstream": "~0.1.2", - "json-stringify-safe": "~5.0.1", - "mime-types": "~2.1.19", - "oauth-sign": "~0.9.0", - "performance-now": "^2.1.0", - "qs": "~6.5.2", - "safe-buffer": "^5.1.2", - "tough-cookie": "~2.4.3", - "tunnel-agent": "^0.6.0", - "uuid": "^3.3.2" - } - }, - "tough-cookie": { - "version": "2.4.3", - "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.4.3.tgz", - "integrity": "sha512-Q5srk/4vDM54WJsJio3XNn6K2sCG+CQ8G5Wz6bZhRZoAe/+TxjWB/GlFAnYEbkYVlON9FMk/fE3h2RLpPXo4lQ==", - "requires": { - "psl": "^1.1.24", - "punycode": "^1.4.1" - } } } }, "semver": { - "version": "5.6.0", - "resolved": "https://registry.npmjs.org/semver/-/semver-5.6.0.tgz", - "integrity": "sha512-RS9R6R35NYgQn++fkDWaOmqGoj4Ek9gGs+DPxNUZKuwE183xjJroKvyo1IzVFeXvUrvmALy6FWD5xrdJT25gMg==" + "version": "5.7.0", + "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.0.tgz", + "integrity": "sha512-Ya52jSX2u7QKghxeoFGpLwCtGlt7j0oY9DYb5apt9nPlJ42ID+ulTXESnt/qAQcoSERyZ5sl3LDIOw0nAn/5DA==" }, "shebang-command": { "version": "1.2.0", @@ -1242,12 +1293,9 @@ "integrity": "sha1-tf3AjxKH6hF4Yo5BXiUTK3NkbG0=" }, "source-map": { - "version": "0.1.43", - "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.1.43.tgz", - "integrity": "sha1-wkvBRspRfBRx9drL4lcbK3+eM0Y=", - "requires": { - "amdefine": ">=0.0.4" - } + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==" }, "source-map-resolve": { "version": "0.5.2", @@ -1267,9 +1315,9 @@ "integrity": "sha1-PpNdfd1zYxuXZZlW1VEo6HtQhKM=" }, "sshpk": { - "version": "1.14.2", - "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.14.2.tgz", - "integrity": "sha1-xvxhZIo9nE52T9P8306hBeSSupg=", + "version": "1.16.1", + "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.16.1.tgz", + "integrity": "sha512-HXXqVUq7+pcKeLqqZj6mHFUMvXtOJt1uoUx09pFW6011inTMxqI8BA8PM95myrIyyKwdnzjdFjLiE6KBPVtJIg==", "requires": { "asn1": "~0.2.3", "assert-plus": "^1.0.0", @@ -1323,22 +1371,23 @@ } }, "supports-color": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-2.0.0.tgz", - "integrity": "sha1-U10EXOa2Nj+kARcIRimZXp3zJMc=" + "version": "5.4.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.4.0.tgz", + "integrity": "sha512-zjaXglF5nnWpsq470jSv6P9DwPvgLkuapYmfDm3JWOm0vkNTVF2tI4UrN2r6jH1qM/uc/WtxYY1hYoA2dOKj5w==", + "requires": { + "has-flag": "^3.0.0" + } }, "tar-stream": { - "version": "1.6.1", - "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-1.6.1.tgz", - "integrity": "sha512-IFLM5wp3QrJODQFPm6/to3LJZrONdBY/otxcvDIQzu217zKye6yVR3hhi9lAjrC2Z+m/j5oDxMPb1qcd8cIvpA==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.0.0.tgz", + "integrity": "sha512-n2vtsWshZOVr/SY4KtslPoUlyNh06I2SGgAOCZmquCEjlbV/LjY2CY80rDtdQRHFOYXNlgBDo6Fr3ww2CWPOtA==", "requires": { - "bl": "^1.0.0", - "buffer-alloc": "^1.1.0", - "end-of-stream": "^1.0.0", + "bl": "^2.2.0", + "end-of-stream": "^1.4.1", "fs-constants": "^1.0.0", - "readable-stream": "^2.3.0", - "to-buffer": "^1.1.0", - "xtend": "^4.0.0" + "inherits": "^2.0.3", + "readable-stream": "^3.1.1" } }, "through": { @@ -1360,11 +1409,19 @@ "integrity": "sha512-lx9B5iv7msuFYE3dytT+KE5tap+rNYw+K4jVkb9R/asAb+pbBSM17jtunHplhBe6RRJdZx3Pn2Jph24O32mOVg==" }, "tough-cookie": { - "version": "2.3.4", - "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.3.4.tgz", - "integrity": "sha512-TZ6TTfI5NtZnuyy/Kecv+CnoROnyXn2DN97LontgQpCwsX2XyLYCC0ENhYkehSOwAp8rTQKc/NUIF7BkQ5rKLA==", + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.4.3.tgz", + "integrity": "sha512-Q5srk/4vDM54WJsJio3XNn6K2sCG+CQ8G5Wz6bZhRZoAe/+TxjWB/GlFAnYEbkYVlON9FMk/fE3h2RLpPXo4lQ==", "requires": { + "psl": "^1.1.24", "punycode": "^1.4.1" + }, + "dependencies": { + "punycode": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz", + "integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4=" + } } }, "tunnel-agent": { @@ -1378,8 +1435,15 @@ "tweetnacl": { "version": "0.14.5", "resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz", - "integrity": "sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q=", - "optional": true + "integrity": "sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q=" + }, + "uri-js": { + "version": "4.2.2", + "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.2.2.tgz", + "integrity": "sha512-KY9Frmirql91X2Qgjry0Wd4Y+YTdrdZheS8TFwvkbLWf/G5KNJDCh6pKL5OZctEW4+0Baa5idK2ZQuELRwPznQ==", + "requires": { + "punycode": "^2.1.0" + } }, "urijs": { "version": "1.19.1", @@ -1435,6 +1499,16 @@ "chalk": "^1.1.3", "commander": "^2.9.0", "debug": "^2.6.6" + }, + "dependencies": { + "debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "requires": { + "ms": "2.0.0" + } + } } }, "wdio-dot-reporter": { @@ -1453,60 +1527,38 @@ } }, "wdio-mocha-framework": { - "version": "0.6.2", - "resolved": "https://registry.npmjs.org/wdio-mocha-framework/-/wdio-mocha-framework-0.6.2.tgz", - "integrity": "sha512-OBSesrxsAmfr5kDKrltfxDU1m/EnA/bnaE+JTa8APMi9fMD7IrCeDLHUQel2Q+9IbKcWlDisWxmUzuZZ1gZOzQ==", + "version": "0.6.4", + "resolved": "https://registry.npmjs.org/wdio-mocha-framework/-/wdio-mocha-framework-0.6.4.tgz", + "integrity": "sha512-GZsXwoW60/fkkfqZJR/ZAdiALaM+hW+BbnTT9x214qPR4Pe5XeyYxhJNEdyf0dNI9625cMdkyZYaWoFHN5zDcA==", "requires": { "babel-runtime": "^6.23.0", - "mocha": "^5.0.0", - "wdio-sync": "0.7.1" + "mocha": "^5.2.0", + "wdio-sync": "0.7.3" } }, "wdio-selenium-standalone-service": { "version": "0.0.10", - "resolved": "http://registry.npmjs.org/wdio-selenium-standalone-service/-/wdio-selenium-standalone-service-0.0.10.tgz", + "resolved": "https://registry.npmjs.org/wdio-selenium-standalone-service/-/wdio-selenium-standalone-service-0.0.10.tgz", "integrity": "sha512-PnpY6r8DcMwU2ZVk5y1d8vKhqOq6bXJILy/g5O5ncMpFaxIZcLkFA1u/11XBJwgzWXgUAXPPnIchkb+/t1dbXA==", "requires": { "fs-extra": "^0.30.0", "selenium-standalone": "^6.13.0" - }, - "dependencies": { - "fs-extra": { - "version": "0.30.0", - "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-0.30.0.tgz", - "integrity": "sha1-8jP/zAjU2n1DLapEl3aYnbHfk/A=", - "requires": { - "graceful-fs": "^4.1.2", - "jsonfile": "^2.1.0", - "klaw": "^1.0.0", - "path-is-absolute": "^1.0.0", - "rimraf": "^2.2.8" - } - }, - "jsonfile": { - "version": "2.4.0", - "resolved": "http://registry.npmjs.org/jsonfile/-/jsonfile-2.4.0.tgz", - "integrity": "sha1-NzaitCi4e72gzIO1P6PWM6NcKug=", - "requires": { - "graceful-fs": "^4.1.6" - } - } } }, "wdio-sync": { - "version": "0.7.1", - "resolved": "https://registry.npmjs.org/wdio-sync/-/wdio-sync-0.7.1.tgz", - "integrity": "sha512-7BTWoBbDZsIVR67mx3cqkYiE3gZid5OJPBcjje1SlC28uXJA73YVxKPBR3SzY+iQy4dk0vSyqUcGkuQBjUNQew==", + "version": "0.7.3", + "resolved": "https://registry.npmjs.org/wdio-sync/-/wdio-sync-0.7.3.tgz", + "integrity": "sha512-ukASSHOQmOxaz5HTILR0jykqlHBtAPsBpMtwhpiG0aW9uc7SO7PF+E5LhVvTG4ypAh+UGmY3rTjohOsqDr39jw==", "requires": { - "babel-runtime": "6.26.0", - "fibers": "~2.0.0", + "babel-runtime": "^6.26.0", + "fibers": "^3.0.0", "object.assign": "^4.0.3" } }, "webdriverio": { - "version": "4.13.1", - "resolved": "https://registry.npmjs.org/webdriverio/-/webdriverio-4.13.1.tgz", - "integrity": "sha1-Yk70ylafPJpejpsRMCtEMe2h+4o=", + "version": "4.14.4", + "resolved": "https://registry.npmjs.org/webdriverio/-/webdriverio-4.14.4.tgz", + "integrity": "sha512-Knp2vzuzP5c5ybgLu+zTwy/l1Gh0bRP4zAr8NWcrStbuomm9Krn9oRF0rZucT6AyORpXinETzmeowFwIoo7mNA==", "requires": { "archiver": "~2.1.0", "babel-runtime": "^6.26.0", @@ -1516,6 +1568,7 @@ "ejs": "~2.5.6", "gaze": "~1.1.2", "glob": "~7.1.1", + "grapheme-splitter": "^1.0.2", "inquirer": "~3.3.0", "json-stringify-safe": "~5.0.1", "mkdirp": "~0.5.1", @@ -1523,7 +1576,7 @@ "optimist": "~0.6.1", "q": "~1.5.0", "request": "^2.83.0", - "rgb2hex": "~0.1.4", + "rgb2hex": "^0.1.9", "safe-buffer": "~5.1.1", "supports-color": "~5.0.0", "url": "~0.11.0", @@ -1531,11 +1584,6 @@ "wgxpath": "~1.0.0" }, "dependencies": { - "deepmerge": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-2.0.1.tgz", - "integrity": "sha512-VIPwiMJqJ13ZQfaCsIFnp5Me9tnjURiaIFxfz7EH0Ci0dTSQpZtSLrqOicXqEd/z2r+z+Klk9GzmnRsgpgbOsQ==" - }, "has-flag": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-2.0.0.tgz", @@ -1575,9 +1623,9 @@ "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=" }, "xmlbuilder": { - "version": "10.0.0", - "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-10.0.0.tgz", - "integrity": "sha512-7RWHlmF1yU/E++BZkRQTEv8ZFAhZ+YHINUAxiZ5LQTKRQq//igpiY8rh7dJqPzgb/IzeC5jH9P7OaCERfM9DwA==" + "version": "10.1.1", + "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-10.1.1.tgz", + "integrity": "sha512-OyzrcFLL/nb6fMGHbiRDuPup9ljBycsdCypwuyg5AAHvyWzGfChJpCXMG88AGTIMFhGZ9RccFN1e6lhg3hkwKg==" }, "xtend": { "version": "4.0.1", @@ -1602,6 +1650,22 @@ "compress-commons": "^1.2.0", "lodash": "^4.8.0", "readable-stream": "^2.0.0" + }, + "dependencies": { + "readable-stream": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz", + "integrity": "sha512-tQtKA9WIAhBF3+VLAseyMqZeBjW0AHJoxOtYqSUZNJxauErmLbVm2FW1y+J/YA9dUrAC39ITejlZWhVIwawkKw==", + "requires": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + } } } } diff --git a/test/frontend-integration-test/package.json b/test/frontend-integration-test/package.json index 4181e8c71ff..61429c82c4e 100644 --- a/test/frontend-integration-test/package.json +++ b/test/frontend-integration-test/package.json @@ -9,7 +9,7 @@ "wdio-junit-reporter": "^0.4.4", "wdio-mocha-framework": "^0.6.2", "wdio-selenium-standalone-service": "0.0.10", - "webdriverio": "^4.12.0" + "webdriverio": "^4.14.1" }, "scripts": { "docker": "docker build -t gcr.io/ml-pipeline/e2e-tests .", From 6920aceeba0d0f5e4dadc09ef80da05dfc73b5cf Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Wed, 24 Apr 2019 12:06:26 -0700 Subject: [PATCH 26/43] SDK - Removed SourceSpec structure (#1119) It has never been used and ComponentSpec.metadata.annotations['source'] is a better place for such metadata. --- sdk/python/kfp/components/_structures.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/sdk/python/kfp/components/_structures.py b/sdk/python/kfp/components/_structures.py index 42c3c475512..460af697345 100644 --- a/sdk/python/kfp/components/_structures.py +++ b/sdk/python/kfp/components/_structures.py @@ -26,8 +26,6 @@ 'ContainerSpec', 'ContainerImplementation', - 'SourceSpec', - 'ComponentSpec', 'ComponentReference', @@ -217,14 +215,6 @@ def __init__(self, ImplementationType = Union[ContainerImplementation, 'GraphImplementation'] -class SourceSpec(ModelBase): - '''Specifies the location of the component source code.''' - def __init__(self, - url: str = None - ): - super().__init__(locals()) - - class MetadataSpec(ModelBase): def __init__(self, annotations: Optional[Dict[str, str]] = None, @@ -234,13 +224,12 @@ def __init__(self, class ComponentSpec(ModelBase): - '''Component specification. Describes the metadata (name, description, source), the interface (inputs and outputs) and the implementation of the component.''' + '''Component specification. Describes the metadata (name, description, annotations and labels), the interface (inputs and outputs) and the implementation of the component.''' def __init__( self, implementation: ImplementationType, name: Optional[str] = None, #? Move to metadata? description: Optional[str] = None, #? Move to metadata? - source: Optional[SourceSpec] = None, #? Move to metadata? metadata: Optional[MetadataSpec] = None, inputs: Optional[List[InputSpec]] = None, outputs: Optional[List[OutputSpec]] = None, From f40a22a3f4a8e06d20cf3e3f425b5058d5c87e0b Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Wed, 24 Apr 2019 12:54:46 -0700 Subject: [PATCH 27/43] SDK - Made ComponentSpec.implementation field optional (#1188) * SDK - Made ComponentSpec.implementation field optional Improved the error message when trying to convert tasks to ContainerOp. * Switched from attribute checking to type checking --- sdk/python/kfp/components/_dsl_bridge.py | 6 +++--- sdk/python/kfp/components/_structures.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sdk/python/kfp/components/_dsl_bridge.py b/sdk/python/kfp/components/_dsl_bridge.py index 7032722cdf4..0816eeb12f8 100644 --- a/sdk/python/kfp/components/_dsl_bridge.py +++ b/sdk/python/kfp/components/_dsl_bridge.py @@ -14,7 +14,7 @@ from collections import OrderedDict from typing import Mapping -from ._structures import ConcatPlaceholder, IfPlaceholder, InputValuePlaceholder, InputPathPlaceholder, IsPresentPlaceholder, OutputPathPlaceholder, TaskSpec +from ._structures import ContainerImplementation, ConcatPlaceholder, IfPlaceholder, InputValuePlaceholder, InputPathPlaceholder, IsPresentPlaceholder, OutputPathPlaceholder, TaskSpec from ._components import _generate_output_file_name, _default_component_name from kfp.dsl._metadata import ComponentMeta, ParameterMeta, TypeMeta, _annotation_to_typemeta @@ -22,8 +22,8 @@ def create_container_op_from_task(task_spec: TaskSpec): argument_values = task_spec.arguments component_spec = task_spec.component_ref._component_spec - if hasattr(component_spec.implementation, 'graph'): - raise TypeError('Cannot convert graph component to ContainerOp') + if not isinstance(component_spec.implementation, ContainerImplementation): + raise TypeError('Only container component tasks can be converted to ContainerOp') inputs_dict = {input_spec.name: input_spec for input_spec in component_spec.inputs or []} container_spec = component_spec.implementation.container diff --git a/sdk/python/kfp/components/_structures.py b/sdk/python/kfp/components/_structures.py index 460af697345..d4000ae372a 100644 --- a/sdk/python/kfp/components/_structures.py +++ b/sdk/python/kfp/components/_structures.py @@ -227,12 +227,12 @@ class ComponentSpec(ModelBase): '''Component specification. Describes the metadata (name, description, annotations and labels), the interface (inputs and outputs) and the implementation of the component.''' def __init__( self, - implementation: ImplementationType, name: Optional[str] = None, #? Move to metadata? description: Optional[str] = None, #? Move to metadata? metadata: Optional[MetadataSpec] = None, inputs: Optional[List[InputSpec]] = None, outputs: Optional[List[OutputSpec]] = None, + implementation: Optional[ImplementationType] = None, version: Optional[str] = 'google.com/cloud/pipelines/component/v1', #tags: Optional[Set[str]] = None, ): From 982d94a20572ac9b44651c35efa7478a945d047c Mon Sep 17 00:00:00 2001 From: WeiYan Date: Wed, 24 Apr 2019 15:50:44 -0700 Subject: [PATCH 28/43] Allow more flexible way to config the api server addr in persistence agent (#867) * Allow more flexible way to config the api server addr * Remove namespace config from persistent-agent --- backend/src/agent/persistence/client/pipeline_client.go | 7 +++---- backend/src/agent/persistence/main.go | 5 ----- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/backend/src/agent/persistence/client/pipeline_client.go b/backend/src/agent/persistence/client/pipeline_client.go index 9c60e8d7f10..38768008953 100644 --- a/backend/src/agent/persistence/client/pipeline_client.go +++ b/backend/src/agent/persistence/client/pipeline_client.go @@ -27,7 +27,7 @@ import ( ) const ( - addressTemp = "%s.%s.svc.cluster.local:%s" + addressTemp = "%s:%s" ) type PipelineClientInterface interface { @@ -48,15 +48,14 @@ type PipelineClient struct { } func NewPipelineClient( - namespace string, initializeTimeout time.Duration, timeout time.Duration, basePath string, mlPipelineServiceName string, mlPipelineServiceHttpPort string, mlPipelineServiceGRPCPort string) (*PipelineClient, error) { - httpAddress := fmt.Sprintf(addressTemp, mlPipelineServiceName, namespace, mlPipelineServiceHttpPort) - grpcAddress := fmt.Sprintf(addressTemp, mlPipelineServiceName, namespace, mlPipelineServiceGRPCPort) + httpAddress := fmt.Sprintf(addressTemp, mlPipelineServiceName, mlPipelineServiceHttpPort) + grpcAddress := fmt.Sprintf(addressTemp, mlPipelineServiceName, mlPipelineServiceGRPCPort) err := util.WaitForAPIAvailable(initializeTimeout, basePath, httpAddress) if err != nil { return nil, errors.Wrapf(err, diff --git a/backend/src/agent/persistence/main.go b/backend/src/agent/persistence/main.go index 88ffa4e49be..b8765593596 100644 --- a/backend/src/agent/persistence/main.go +++ b/backend/src/agent/persistence/main.go @@ -16,7 +16,6 @@ package main import ( "flag" - "os" "time" workflowclientSet "github.com/argoproj/argo/pkg/client/clientset/versioned" @@ -34,7 +33,6 @@ import ( var ( masterURL string kubeconfig string - namespace string initializeTimeout time.Duration timeout time.Duration mlPipelineAPIServerName string @@ -47,7 +45,6 @@ var ( const ( kubeconfigFlagName = "kubeconfig" masterFlagName = "master" - namespaceFlagName = "namespace" initializationTimeoutFlagName = "initializeTimeout" timeoutFlagName = "timeout" mlPipelineAPIServerBasePathFlagName = "mlPipelineAPIServerBasePath" @@ -81,7 +78,6 @@ func main() { workflowInformerFactory := workflowinformers.NewSharedInformerFactory(workflowClient, time.Second*30) pipelineClient, err := client.NewPipelineClient( - namespace, initializeTimeout, timeout, mlPipelineAPIServerBasePath, @@ -109,7 +105,6 @@ func main() { func init() { flag.StringVar(&kubeconfig, kubeconfigFlagName, "", "Path to a kubeconfig. Only required if out-of-cluster.") flag.StringVar(&masterURL, masterFlagName, "", "The address of the Kubernetes API server. Overrides any value in kubeconfig. Only required if out-of-cluster.") - flag.StringVar(&namespace, namespaceFlagName, os.Getenv("POD_NAMESPACE"), "The namespace the ML pipeline API server is deployed to") flag.DurationVar(&initializeTimeout, initializationTimeoutFlagName, 2*time.Minute, "Duration to wait for initialization of the ML pipeline API server.") flag.DurationVar(&timeout, timeoutFlagName, 1*time.Minute, "Duration to wait for calls to complete.") flag.StringVar(&mlPipelineAPIServerName, mlPipelineAPIServerNameFlagName, "ml-pipeline", "Name of the ML pipeline API server.") From b70a8bef7a94de096622598f80ba54e0559e9de8 Mon Sep 17 00:00:00 2001 From: Animesh Singh Date: Wed, 24 Apr 2019 17:10:45 -0700 Subject: [PATCH 29/43] shortening names (#1202) --- components/ibm-components/watson/deploy/component.yaml | 2 +- .../watson/manage/monitor_fairness/component.yaml | 2 +- .../watson/manage/monitor_quality/component.yaml | 2 +- .../watson/manage/subscribe/component.yaml | 2 +- components/ibm-components/watson/store/component.yaml | 2 +- components/ibm-components/watson/train/component.yaml | 2 +- samples/ibm-samples/ffdl-seldon/ffdl_pipeline.py | 10 +++++----- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/components/ibm-components/watson/deploy/component.yaml b/components/ibm-components/watson/deploy/component.yaml index 42d1fb59c7f..a7d2f4ca8a8 100644 --- a/components/ibm-components/watson/deploy/component.yaml +++ b/components/ibm-components/watson/deploy/component.yaml @@ -10,7 +10,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: 'Watson Machine Learning - Deploy Model' +name: 'Deploy Model - Watson Machine Learning' description: | Deploy stored model on Watson Machine Learning as a web service. metadata: diff --git a/components/ibm-components/watson/manage/monitor_fairness/component.yaml b/components/ibm-components/watson/manage/monitor_fairness/component.yaml index 6df912741e6..b454919a20e 100644 --- a/components/ibm-components/watson/manage/monitor_fairness/component.yaml +++ b/components/ibm-components/watson/manage/monitor_fairness/component.yaml @@ -10,7 +10,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: 'Watson OpenScale - Monitor Fairness' +name: 'Monitor Fairness - Watson OpenScale' description: | Enable model fairness monitoring on Watson OpenScale. metadata: diff --git a/components/ibm-components/watson/manage/monitor_quality/component.yaml b/components/ibm-components/watson/manage/monitor_quality/component.yaml index 362b50615df..52acb3c91f2 100644 --- a/components/ibm-components/watson/manage/monitor_quality/component.yaml +++ b/components/ibm-components/watson/manage/monitor_quality/component.yaml @@ -10,7 +10,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: 'Watson OpenScale - Monitor quality' +name: 'Monitor quality - Watson OpenScale' description: | Enable model quality monitoring on Watson OpenScale. metadata: diff --git a/components/ibm-components/watson/manage/subscribe/component.yaml b/components/ibm-components/watson/manage/subscribe/component.yaml index 3a541a18ab3..6ed259f2554 100644 --- a/components/ibm-components/watson/manage/subscribe/component.yaml +++ b/components/ibm-components/watson/manage/subscribe/component.yaml @@ -10,7 +10,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: 'Watson OpenScale - Subscribe' +name: 'Subscribe - Watson OpenScale' description: | Binding deployed models and subscribe them to Watson OpenScale service. metadata: diff --git a/components/ibm-components/watson/store/component.yaml b/components/ibm-components/watson/store/component.yaml index 978e3073688..0eb46184b20 100644 --- a/components/ibm-components/watson/store/component.yaml +++ b/components/ibm-components/watson/store/component.yaml @@ -10,7 +10,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: 'Watson Machine Learning - Store model' +name: 'Store model - Watson Machine Learning' description: | Store and persistent trained model on Watson Machine Learning. metadata: diff --git a/components/ibm-components/watson/train/component.yaml b/components/ibm-components/watson/train/component.yaml index 18f75949e63..3abba949595 100644 --- a/components/ibm-components/watson/train/component.yaml +++ b/components/ibm-components/watson/train/component.yaml @@ -10,7 +10,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: 'Watson Machine Learning - Train Model' +name: 'Train Model - Watson Machine Learning' description: | Train Machine Learning and Deep Learning Models in the Cloud using Watson Machine Learning metadata: diff --git a/samples/ibm-samples/ffdl-seldon/ffdl_pipeline.py b/samples/ibm-samples/ffdl-seldon/ffdl_pipeline.py index f5fdb5e4002..62db9ade7c3 100644 --- a/samples/ibm-samples/ffdl-seldon/ffdl_pipeline.py +++ b/samples/ibm-samples/ffdl-seldon/ffdl_pipeline.py @@ -6,12 +6,16 @@ # generate default secret name secret_name = 'kfp-creds' - +configuration_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/master/components/ibm-components/commons/config/component.yaml') +train_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/master/components/ibm-components/ffdl/train/component.yaml') +serve_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/master/components/ibm-components/ffdl/serve/component.yaml') + # create pipeline @dsl.pipeline( name='FfDL pipeline', description='A pipeline for machine learning workflow using Fabric for Deep Learning and Seldon.' ) + def ffdlPipeline( GITHUB_TOKEN=dsl.PipelineParam(name='github-token', value=''), @@ -30,10 +34,6 @@ def ffdlPipeline( ): """A pipeline for end to end machine learning workflow.""" - configuration_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/ibm-components/commons/config/component.yaml') - train_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/ibm-components/ffdl/train/component.yaml') - serve_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/ibm-components/ffdl/serve/component.yaml') - get_configuration = configuration_op( token = GITHUB_TOKEN, url = CONFIG_FILE_URL, From 18cc860dbba1b0fb494a10dcbe2adf418bd2de39 Mon Sep 17 00:00:00 2001 From: Ning Date: Wed, 24 Apr 2019 18:32:45 -0700 Subject: [PATCH 30/43] update a broken link (#1221) --- samples/notebooks/DSL Static Type Checking.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/notebooks/DSL Static Type Checking.ipynb b/samples/notebooks/DSL Static Type Checking.ipynb index ed89b79fe10..caa2878e1ed 100644 --- a/samples/notebooks/DSL Static Type Checking.ipynb +++ b/samples/notebooks/DSL Static Type Checking.ipynb @@ -52,7 +52,7 @@ " field_o: /output.txt\n", "```\n", "\n", - "If you define the component using the function decorator, there are a list of [core types](https://github.com/kubeflow/pipelines/blob/master/sdk/python/kfp/dsl/_types.py).\n", + "If you define the component using the function decorator, there are a list of [core types](https://github.com/kubeflow/pipelines/blob/master/sdk/python/kfp/dsl/types.py).\n", "For example, the following component declares a core type Integer for input field_l while\n", "declares customized_type for its output field_n.\n", "\n", @@ -446,7 +446,7 @@ " )\n", "\n", "# Users can also use the core types that are pre-defined in the SDK.\n", - "# For a full list of core types, check out: https://github.com/kubeflow/pipelines/blob/master/sdk/python/kfp/dsl/_types.py\n", + "# For a full list of core types, check out: https://github.com/kubeflow/pipelines/blob/master/sdk/python/kfp/dsl/types.py\n", "@component\n", "def task_factory_b(field_x: 'customized_type',\n", " field_y: Integer(),\n", From ae2795a8a20173ad6ae21fb8411ec48c0d02a236 Mon Sep 17 00:00:00 2001 From: rostam-github <40585490+rostam-github@users.noreply.github.com> Date: Wed, 24 Apr 2019 19:24:45 -0700 Subject: [PATCH 31/43] Kubeflow pipelines quickstart notebooks added. (#821) * Kubeflow pipelines quickstart notebooks added. * Incorporated comments. * Incorporated comments. --- samples/notebooks/quickstart.ipynb | 566 ++++++++++++++++++++++++++ samples/notebooks/quickstart_iris.csv | 150 +++++++ 2 files changed, 716 insertions(+) create mode 100644 samples/notebooks/quickstart.ipynb create mode 100644 samples/notebooks/quickstart_iris.csv diff --git a/samples/notebooks/quickstart.ipynb b/samples/notebooks/quickstart.ipynb new file mode 100644 index 00000000000..e880a118d11 --- /dev/null +++ b/samples/notebooks/quickstart.ipynb @@ -0,0 +1,566 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Copyright 2019 Google Inc. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Part 1\n", + "# Two ways to author a component to list blobs in a GCS bucket\n", + "A pipeline is composed of one or more components. In this section, you will build a single component that lists the blobs in a GCS bucket. Then you buid a pipeline that consists of this component. There are two ways to author a component. In the following sections we will go through each of them." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Create a lightweight python component from a Python function." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.1 Define component function\n", + "The requirements for the component function:\n", + "* The function must be stand-alone.\n", + "* The function can only import packages that are available in the base image.\n", + "* If the function operates on numbers, the parameters must have type hints. Supported types are `int`, `float`, `bool`. Everything else is passed as `str`, that is, string.\n", + "* To build a component with multiple output values, use Python’s `typing.NamedTuple` type hint syntax." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def list_blobs(bucket_name: str) -> str:\n", + " '''Lists all the blobs in the bucket.'''\n", + " import subprocess\n", + "\n", + " subprocess.call(['pip', 'install', '--upgrade', 'google-cloud-storage'])\n", + " from google.cloud import storage\n", + " storage_client = storage.Client()\n", + " bucket = storage_client.get_bucket(bucket_name)\n", + " list_blobs_response = bucket.list_blobs()\n", + " blobs = ','.join([blob.name for blob in list_blobs_response])\n", + " print(blobs)\n", + " return blobs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.2 Create a lightweight Python component" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import kfp.components as comp\n", + "\n", + "# Converts the function to a lightweight Python component.\n", + "list_blobs_op = comp.func_to_container_op(list_blobs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.3 Define pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import kfp.dsl as dsl\n", + "\n", + "# Defines the pipeline.\n", + "@dsl.pipeline(name='List GCS blobs', description='Lists GCS blobs.')\n", + "def pipeline_func(bucket_name=dsl.PipelineParam('bucket')):\n", + " list_blobs_task = list_blobs_op(bucket_name)\n", + "\n", + "# Compile the pipeline to a file.\n", + "import kfp.compiler as compiler\n", + "compiler.Compiler().compile(pipeline_func, 'list_blobs.pipeline.tar.gz')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Wrap an existing Docker container image using `ContainerOp`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.1 Create a Docker container\n", + "Create your own container image that includes your program. If your component creates some outputs to be fed as inputs to the downstream components, each separate output must be written as a string to a separate local text file by the container image. For example, if a trainer component needs to output the trained model path, it can write the path to a local file `/output.txt`. The string written to an output file cannot be too big. If it is too big (>> 100 kB), save the output to an external persistent storage and pass the storage path to the next component.\n", + "\n", + "Start by entering the value of your Google Cloud Platform Project ID." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell creates a file `app.py` that contains a Python script. The script takes a GCS bucket name as an input argument, gets the lists of blobs in that bucket, prints the list of blobs and also writes them to an output file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "\n", + "# Create folders if they don't exist.\n", + "mkdir -p tmp/components/list-gcs-blobs\n", + "\n", + "# Create the Python file that lists GCS blobs.\n", + "cat > ./tmp/components/list-gcs-blobs/app.py < ./tmp/components/list-gcs-blobs/Dockerfile < ./tmp/components/list-gcs-blobs/build_image.sh < ./tmp/components/view-input/app.py < ./tmp/components/view-input/Dockerfile < ./tmp/components/view-input/build_image.sh < Date: Wed, 24 Apr 2019 20:12:45 -0700 Subject: [PATCH 32/43] Testing - Fixed the postsubmit tests (#1210) --- test/postsubmit-tests-with-pipeline-deployment.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/postsubmit-tests-with-pipeline-deployment.sh b/test/postsubmit-tests-with-pipeline-deployment.sh index 0bd1ca2217e..422f566b882 100755 --- a/test/postsubmit-tests-with-pipeline-deployment.sh +++ b/test/postsubmit-tests-with-pipeline-deployment.sh @@ -30,6 +30,7 @@ usage() PLATFORM=gcp PROJECT=ml-pipeline-test TEST_RESULT_BUCKET=ml-pipeline-test +CLOUDBUILD_PROJECT=ml-pipeline-staging GCR_IMAGE_BASE_DIR=gcr.io/ml-pipeline-staging/ TARGET_IMAGE_BASE_DIR=gcr.io/ml-pipeline-test/${PULL_BASE_SHA} TIMEOUT_SECONDS=1800 @@ -81,7 +82,7 @@ CLOUDBUILD_STARTED=TIMEOUT for i in $(seq 1 ${PULL_CLOUDBUILD_STATUS_MAX_ATTEMPT}) do - output=`gcloud builds list --filter="sourceProvenance.resolvedRepoSource.commitSha:${PULL_BASE_SHA}"` + output=`gcloud builds list --project="$CLOUDBUILD_PROJECT" --filter="sourceProvenance.resolvedRepoSource.commitSha:${PULL_BASE_SHA}"` if [[ ${output} != "" ]]; then CLOUDBUILD_STARTED=True break @@ -98,7 +99,7 @@ fi CLOUDBUILD_FINISHED=TIMEOUT for i in $(seq 1 ${PULL_CLOUDBUILD_STATUS_MAX_ATTEMPT}) do - output=`gcloud builds list --filter="sourceProvenance.resolvedRepoSource.commitSha:${PULL_BASE_SHA}"` + output=`gcloud builds list --project="$CLOUDBUILD_PROJECT" --filter="sourceProvenance.resolvedRepoSource.commitSha:${PULL_BASE_SHA}"` if [[ ${output} == *"SUCCESS"* ]]; then CLOUDBUILD_FINISHED=SUCCESS break From 07cb50ee0c1d531e8f0a9d1c8a418734d82cae1c Mon Sep 17 00:00:00 2001 From: Ilias Katsakioris Date: Thu, 25 Apr 2019 20:40:48 +0300 Subject: [PATCH 33/43] Extend the DSL to implement the design of #801 (#926) * SDK: Create BaseOp class * BaseOp class is the base class for any Argo Template type * ContainerOp derives from BaseOp * Rename dependent_names to deps Signed-off-by: Ilias Katsakioris * SDK: In preparation for the new feature ResourceOps (#801) * Add cops attributes to Pipeline. This is a dict having all the ContainerOps of the pipeline. * Set some processing in _op_to_template as ContainerOp specific Signed-off-by: Ilias Katsakioris * SDK: Simplify the consumption of Volumes by ContainerOps Add `pvolumes` argument and attribute to ContainerOp. It is a dict having mount paths as keys and V1Volumes as values. These are added to the pipeline and mounted by the container of the ContainerOp. Signed-off-by: Ilias Katsakioris * SDK: Add ResourceOp * ResourceOp is the SDK's equivalent for Argo's resource template * Add rops attribute to Pipeline: Dictionary containing ResourceOps * Extend _op_to_template to produce the template for ResourceOps * Use processed_op instead of op everywhere in _op_to_template() * Add samples/resourceop/resourceop_basic.py * Add tests/dsl/resource_op_tests.py * Extend tests/compiler/compiler_tests.py Signed-off-by: Ilias Katsakioris * SDK: Simplify the creation of PersistentVolumeClaim instances * Add VolumeOp: A specified ResourceOp for PVC creation * Add samples/resourceops/volumeop_basic.py * Add tests/dsl/volume_op_tests.py * Extend tests/compiler/compiler_tests.py Signed-off-by: Ilias Katsakioris * SDK: Emit a V1Volume as `.volume` from dsl.VolumeOp * Extend VolumeOp so it outputs a `.volume` attribute ready to be consumed by the `pvolumes` argument to ContainerOp's constructor * Update samples/resourceop/volumeop_basic.py * Extend tests/dsl/volume_op_tests.py * Update tests/compiler/compiler_tests.py Signed-off-by: Ilias Katsakioris * SDK: Add PipelineVolume * PipelineVolume inherits from V1Volume and it comes with its own set of KFP-specific dependencies. It is aligned with how PipelineParam instances are used. I.e. consuming a PipelineVolume leads to implicit dependencies without the user having to call the `.after()` method on a ContainerOp. * PipelineVolume comes with its own `.after()` method, which can be used to append extra dependencies to the instance. * Extend ContainerOp to handle PipelineVolume deps * Set `.volume` attribute of VolumeOp to be a PipelineVolume instead * Add samples/resourceops/volumeop_{parallel,dag,sequential}.py * Fix tests/dsl/volume_op_tests.py * Add tests/dsl/pipeline_volume_tests.py * Extend tests/compiler/compiler_tests.py Signed-off-by: Ilias Katsakioris * SDK: Simplify the creation of VolumeSnapshot instances * VolumeSnapshotOp: A specified ResourceOp for VolumeSnapshot creation * Add samples/resourceops/volume_snapshotop_{sequential,rokurl}.py * Add tests/dsl/volume_snapshotop_tests.py * Extend tests/compiler/compiler_tests.py NOTE: VolumeSnapshots is an Alpha feature at the time of this commit. Signed-off-by: Ilias Katsakioris * Extend UI for the ResourceOp and Volumes feature of the Compiler * Add VolumeMounts tab/entry (Run/Pipeline view) * Add Manifest tab/entry (Run/Pipeline view) * Add & Extend tests * Update tests snapshot files Signed-off-by: Ilias Katsakioris * Cleaning up the diff (before moving things back) * Renamed op.deps back to op.dependent_names * Moved Container, Sidecar and BaseOp classed back to _container_op.py This way the diff is much smaller and more understandable. We can always split or refactor the file later. Refactorings should not be mixed with genuine changes. --- frontend/src/components/StaticNodeDetails.tsx | 38 +- frontend/src/lib/StaticGraphParser.test.ts | 196 +++++++++- frontend/src/lib/StaticGraphParser.ts | 38 +- frontend/src/lib/WorkflowParser.test.ts | 339 ++++++++++++++++- frontend/src/lib/WorkflowParser.ts | 34 +- frontend/src/pages/RunDetails.test.tsx | 62 ++- frontend/src/pages/RunDetails.tsx | 22 +- .../PipelineDetails.test.tsx.snap | 12 + .../__snapshots__/RunDetails.test.tsx.snap | 242 +++++++++++- samples/resourceops/resourceop_basic.py | 60 +++ .../resourceops/volume_snapshotop_rokurl.py | 91 +++++ .../volume_snapshotop_sequential.py | 87 +++++ samples/resourceops/volumeop_basic.py | 42 +++ samples/resourceops/volumeop_dag.py | 58 +++ samples/resourceops/volumeop_parallel.py | 58 +++ samples/resourceops/volumeop_sequential.py | 57 +++ sdk/python/kfp/compiler/_op_to_template.py | 106 ++++-- sdk/python/kfp/compiler/compiler.py | 38 +- sdk/python/kfp/dsl/__init__.py | 10 +- sdk/python/kfp/dsl/_container_op.py | 356 ++++++++++-------- sdk/python/kfp/dsl/_ops_group.py | 4 +- sdk/python/kfp/dsl/_pipeline.py | 20 +- sdk/python/kfp/dsl/_pipeline_volume.py | 104 +++++ sdk/python/kfp/dsl/_resource_op.py | 149 ++++++++ sdk/python/kfp/dsl/_volume_op.py | 142 +++++++ sdk/python/kfp/dsl/_volume_snapshot_op.py | 126 +++++++ sdk/python/tests/compiler/compiler_tests.py | 81 +++- .../compiler/testdata/resourceop_basic.py | 60 +++ .../compiler/testdata/resourceop_basic.yaml | 99 +++++ .../testdata/volume_snapshotop_rokurl.py | 91 +++++ .../testdata/volume_snapshotop_rokurl.yaml | 325 ++++++++++++++++ .../testdata/volume_snapshotop_sequential.py | 87 +++++ .../volume_snapshotop_sequential.yaml | 335 ++++++++++++++++ .../tests/compiler/testdata/volumeop_basic.py | 42 +++ .../compiler/testdata/volumeop_basic.yaml | 97 +++++ .../tests/compiler/testdata/volumeop_dag.py | 58 +++ .../tests/compiler/testdata/volumeop_dag.yaml | 188 +++++++++ .../compiler/testdata/volumeop_parallel.py | 58 +++ .../compiler/testdata/volumeop_parallel.yaml | 186 +++++++++ .../compiler/testdata/volumeop_sequential.py | 57 +++ .../testdata/volumeop_sequential.yaml | 187 +++++++++ sdk/python/tests/dsl/container_op_tests.py | 5 +- sdk/python/tests/dsl/main.py | 19 +- sdk/python/tests/dsl/pipeline_volume_tests.py | 61 +++ sdk/python/tests/dsl/resource_op_tests.py | 69 ++++ sdk/python/tests/dsl/volume_op_tests.py | 68 ++++ .../tests/dsl/volume_snapshotop_tests.py | 97 +++++ 47 files changed, 4494 insertions(+), 267 deletions(-) create mode 100644 samples/resourceops/resourceop_basic.py create mode 100644 samples/resourceops/volume_snapshotop_rokurl.py create mode 100644 samples/resourceops/volume_snapshotop_sequential.py create mode 100644 samples/resourceops/volumeop_basic.py create mode 100644 samples/resourceops/volumeop_dag.py create mode 100644 samples/resourceops/volumeop_parallel.py create mode 100644 samples/resourceops/volumeop_sequential.py create mode 100644 sdk/python/kfp/dsl/_pipeline_volume.py create mode 100644 sdk/python/kfp/dsl/_resource_op.py create mode 100644 sdk/python/kfp/dsl/_volume_op.py create mode 100644 sdk/python/kfp/dsl/_volume_snapshot_op.py create mode 100644 sdk/python/tests/compiler/testdata/resourceop_basic.py create mode 100644 sdk/python/tests/compiler/testdata/resourceop_basic.yaml create mode 100644 sdk/python/tests/compiler/testdata/volume_snapshotop_rokurl.py create mode 100644 sdk/python/tests/compiler/testdata/volume_snapshotop_rokurl.yaml create mode 100644 sdk/python/tests/compiler/testdata/volume_snapshotop_sequential.py create mode 100644 sdk/python/tests/compiler/testdata/volume_snapshotop_sequential.yaml create mode 100644 sdk/python/tests/compiler/testdata/volumeop_basic.py create mode 100644 sdk/python/tests/compiler/testdata/volumeop_basic.yaml create mode 100644 sdk/python/tests/compiler/testdata/volumeop_dag.py create mode 100644 sdk/python/tests/compiler/testdata/volumeop_dag.yaml create mode 100644 sdk/python/tests/compiler/testdata/volumeop_parallel.py create mode 100644 sdk/python/tests/compiler/testdata/volumeop_parallel.yaml create mode 100644 sdk/python/tests/compiler/testdata/volumeop_sequential.py create mode 100644 sdk/python/tests/compiler/testdata/volumeop_sequential.yaml create mode 100644 sdk/python/tests/dsl/pipeline_volume_tests.py create mode 100644 sdk/python/tests/dsl/resource_op_tests.py create mode 100644 sdk/python/tests/dsl/volume_op_tests.py create mode 100644 sdk/python/tests/dsl/volume_snapshotop_tests.py diff --git a/frontend/src/components/StaticNodeDetails.tsx b/frontend/src/components/StaticNodeDetails.tsx index 7426dab3df4..44be8eb901f 100644 --- a/frontend/src/components/StaticNodeDetails.tsx +++ b/frontend/src/components/StaticNodeDetails.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2018 Google LLC + * Copyright 2018-2019 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ import { classes, stylesheet } from 'typestyle'; import { commonCss, fontsize } from '../Css'; import { SelectedNodeInfo } from '../lib/StaticGraphParser'; -export type nodeType = 'container' | 'dag' | 'unknown'; +export type nodeType = 'container' | 'resource' | 'dag' | 'unknown'; const css = stylesheet({ fontSizeTitle: { @@ -42,19 +42,35 @@ class StaticNodeDetails extends React.Component { const nodeInfo = this.props.nodeInfo; return
- + {(nodeInfo.nodeType === 'container') && ( +
+ - + -
Arguments
- {nodeInfo.args.map((arg, i) => -
{arg}
)} +
Arguments
+ {nodeInfo.args.map((arg, i) => +
{arg}
)} -
Command
- {nodeInfo.command.map((c, i) =>
{c}
)} +
Command
+ {nodeInfo.command.map((c, i) =>
{c}
)} -
Image
-
{nodeInfo.image}
+
Image
+
{nodeInfo.image}
+ + +
+ )} + + {(nodeInfo.nodeType === 'resource') && ( +
+ + + + + +
+ )} {!!nodeInfo.condition && (
diff --git a/frontend/src/lib/StaticGraphParser.test.ts b/frontend/src/lib/StaticGraphParser.test.ts index 044c9fdb968..6e868633c01 100644 --- a/frontend/src/lib/StaticGraphParser.test.ts +++ b/frontend/src/lib/StaticGraphParser.test.ts @@ -1,5 +1,5 @@ /* - * Copyright 2018 Google LLC + * Copyright 2018-2019 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -76,6 +76,52 @@ describe('StaticGraphParser', () => { }; } + function newResourceCreatingWorkflow(): any { + return { + spec: { + entrypoint: 'template-1', + templates: [ + { + dag: { + tasks: [ + { name: 'create-pvc-task', template: 'create-pvc' }, + { + dependencies: ['create-pvc-task'], + name: 'container-1', + template: 'container-1', + }, + { + dependencies: ['container-1'], + name: 'create-snapshot-task', + template: 'create-snapshot', + }, + ] + }, + name: 'template-1', + }, + { + name: 'create-pvc', + resource: { + action: 'create', + manifest: 'apiVersion: v1\nkind: PersistentVolumeClaim', + }, + }, + { + container: {}, + name: 'container-1', + }, + { + name: 'create-snapshot', + resource: { + action: 'create', + manifest: 'apiVersion: snapshot.storage.k8s.io/v1alpha1\nkind: VolumeSnapshot', + }, + }, + ] + } + }; + } + describe('createGraph', () => { it('creates a single node with no edges for a workflow with one step.', () => { const workflow = newWorkflow(); @@ -198,6 +244,18 @@ describe('StaticGraphParser', () => { }); }); + it('includes the resource\'s action and manifest itself in the info of resource nodes', () => { + const g = createGraph(newResourceCreatingWorkflow()); + g.nodes().forEach((nodeName) => { + const node = g.node(nodeName); + if (nodeName.startsWith('create-pvc')) { + expect(node.info.resource).toEqual([['create', 'apiVersion: v1\nkind: PersistentVolumeClaim']]); + } else if (nodeName.startsWith('create-snapshot')) { + expect(node.info.resource).toEqual([['create', 'apiVersion: snapshot.storage.k8s.io\nkind: VolumeSnapshot']]); + } + }); + }); + it('renders extremely simple workflows with no steps or DAGs', () => { const simpleWorkflow = { spec: { @@ -392,12 +450,13 @@ describe('StaticGraphParser', () => { expect(nodeInfo).toEqual(defaultSelectedNodeInfo); }); - it('returns nodeInfo with empty values for args, command, and/or image if container does not have them', () => { + it('returns nodeInfo of a container with empty values for args, command, image and/or volumeMounts if container does not have them', () => { const template = { container: { // No args // No command // No image + // No volumeMounts }, dag: [], name: 'template-1', @@ -407,6 +466,7 @@ describe('StaticGraphParser', () => { expect(nodeInfo.args).toEqual([]); expect(nodeInfo.command).toEqual([]); expect(nodeInfo.image).toEqual(''); + expect(nodeInfo.volumeMounts).toEqual([]); }); @@ -449,7 +509,48 @@ describe('StaticGraphParser', () => { expect(nodeInfo.image).toEqual('some-image'); }); - it('returns nodeInfo with empty values if template does not have inputs and/or outputs', () => { + it('returns nodeInfo containing container volumeMounts', () => { + const template = { + container: { + volumeMounts: [{'mountPath': '/some/path', 'name': 'some-vol'}] + }, + dag: [], + name: 'template-1', + } as any; + const nodeInfo = _populateInfoFromTemplate(new SelectedNodeInfo(), template); + expect(nodeInfo.nodeType).toEqual('container'); + expect(nodeInfo.volumeMounts).toEqual([['/some/path', 'some-vol']]); + }); + + it('returns nodeInfo of a resource with empty values for action and manifest', () => { + const template = { + dag: [], + name: 'template-1', + resource: { + // No action + // No manifest + }, + } as any; + const nodeInfo = _populateInfoFromTemplate(new SelectedNodeInfo(), template); + expect(nodeInfo.nodeType).toEqual('resource'); + expect(nodeInfo.resource).toEqual([[]]); + }); + + it('returns nodeInfo containing resource action and manifest', () => { + const template = { + dag: [], + name: 'template-1', + resource: { + action: 'create', + manifest: 'manifest' + }, + } as any; + const nodeInfo = _populateInfoFromTemplate(new SelectedNodeInfo(), template); + expect(nodeInfo.nodeType).toEqual('resource'); + expect(nodeInfo.resource).toEqual([['create', 'manifest']]); + }); + + it('returns nodeInfo of a container with empty values if template does not have inputs and/or outputs', () => { const template = { container: {}, dag: [], @@ -463,7 +564,7 @@ describe('StaticGraphParser', () => { expect(nodeInfo.outputs).toEqual([[]]); }); - it('returns nodeInfo containing template inputs params as list of name/value tuples', () => { + it('returns nodeInfo of a container containing template inputs params as list of name/value tuples', () => { const template = { container: {}, dag: [], @@ -477,7 +578,7 @@ describe('StaticGraphParser', () => { expect(nodeInfo.inputs).toEqual([['param1', 'val1'], ['param2', 'val2']]); }); - it('returns empty strings for inputs with no specified value', () => { + it('returns nodeInfo of a container with empty strings for inputs with no specified value', () => { const template = { container: {}, dag: [], @@ -516,7 +617,7 @@ describe('StaticGraphParser', () => { ]); }); - it('returns empty strings for outputs with no specified value', () => { + it('returns nodeInfo of a container with empty strings for outputs with no specified value', () => { const template = { container: {}, name: 'template-1', @@ -532,6 +633,89 @@ describe('StaticGraphParser', () => { expect(nodeInfo.outputs).toEqual([['param1', ''], ['param2', '']]); }); + it('returns nodeInfo of a resource with empty values if template does not have inputs and/or outputs', () => { + const template = { + dag: [], + // No inputs + // No outputs + name: 'template-1', + resource: {}, + } as any; + const nodeInfo = _populateInfoFromTemplate(new SelectedNodeInfo(), template); + expect(nodeInfo.nodeType).toEqual('resource'); + expect(nodeInfo.inputs).toEqual([[]]); + expect(nodeInfo.outputs).toEqual([[]]); + }); + + it('returns nodeInfo of a resource containing template inputs params as list of name/value tuples', () => { + const template = { + dag: [], + inputs: { + parameters: [{ name: 'param1', value: 'val1' }, { name: 'param2', value: 'val2' }] + }, + name: 'template-1', + resource: {}, + } as any; + const nodeInfo = _populateInfoFromTemplate(new SelectedNodeInfo(), template); + expect(nodeInfo.nodeType).toEqual('resource'); + expect(nodeInfo.inputs).toEqual([['param1', 'val1'], ['param2', 'val2']]); + }); + + it('returns nodeInfo of a resource with empty strings for inputs with no specified value', () => { + const template = { + dag: [], + inputs: { + parameters: [{ name: 'param1' }, { name: 'param2' }] + }, + name: 'template-1', + resource: {}, + } as any; + const nodeInfo = _populateInfoFromTemplate(new SelectedNodeInfo(), template); + expect(nodeInfo.nodeType).toEqual('resource'); + expect(nodeInfo.inputs).toEqual([['param1', ''], ['param2', '']]); + }); + + it('returns nodeInfo containing resource outputs as list of name/value tuples, pulling from valueFrom if necessary', () => { + const template = { + name: 'template-1', + outputs: { + parameters: [ + { name: 'param1', value: 'val1' }, + { name: 'param2', valueFrom: { jsonPath: 'jsonPath' } }, + { name: 'param3', valueFrom: { path: 'path' } }, + { name: 'param4', valueFrom: { parameter: 'parameterReference' } }, + { name: 'param5', valueFrom: { jqFilter: 'jqFilter' } }, + ], + }, + resource: {}, + } as any; + const nodeInfo = _populateInfoFromTemplate(new SelectedNodeInfo(), template); + expect(nodeInfo.nodeType).toEqual('resource'); + expect(nodeInfo.outputs).toEqual([ + ['param1', 'val1'], + ['param2', 'jsonPath'], + ['param3', 'path'], + ['param4', 'parameterReference'], + ['param5', 'jqFilter'], + ]); + }); + + it('returns nodeInfo of a resource with empty strings for outputs with no specified value', () => { + const template = { + name: 'template-1', + outputs: { + parameters: [ + { name: 'param1' }, + { name: 'param2' }, + ], + }, + resource: {}, + } as any; + const nodeInfo = _populateInfoFromTemplate(new SelectedNodeInfo(), template); + expect(nodeInfo.nodeType).toEqual('resource'); + expect(nodeInfo.outputs).toEqual([['param1', ''], ['param2', '']]); + }); + }); }); diff --git a/frontend/src/lib/StaticGraphParser.ts b/frontend/src/lib/StaticGraphParser.ts index d297029a667..3330f15c20f 100644 --- a/frontend/src/lib/StaticGraphParser.ts +++ b/frontend/src/lib/StaticGraphParser.ts @@ -1,5 +1,5 @@ /* - * Copyright 2018 Google LLC + * Copyright 2018-2019 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ import { Workflow, Template } from '../../third_party/argo-ui/argo_template'; import { color } from '../Css'; import { logger } from './Utils'; -export type nodeType = 'container' | 'dag' | 'unknown'; +export type nodeType = 'container' | 'resource' | 'dag' | 'unknown'; export class SelectedNodeInfo { public args: string[]; @@ -30,6 +30,8 @@ export class SelectedNodeInfo { public inputs: string[][]; public nodeType: nodeType; public outputs: string[][]; + public volumeMounts: string[][]; + public resource: string[][]; constructor() { this.args = []; @@ -39,18 +41,30 @@ export class SelectedNodeInfo { this.inputs = [[]]; this.nodeType = 'unknown'; this.outputs = [[]]; + this.volumeMounts = [[]]; + this.resource = [[]]; } } export function _populateInfoFromTemplate(info: SelectedNodeInfo, template?: Template): SelectedNodeInfo { - if (!template || !template.container) { + if (!template || (!template.container && !template.resource)) { return info; } - info.nodeType = 'container'; - info.args = template.container.args || [], - info.command = template.container.command || [], - info.image = template.container.image || ''; + if (template.container) { + info.nodeType = 'container'; + info.args = template.container.args || [], + info.command = template.container.command || [], + info.image = template.container.image || ''; + info.volumeMounts = (template.container.volumeMounts || []).map(v => [v.mountPath, v.name]); + } else { + info.nodeType = 'resource'; + if (template.resource && template.resource.action && template.resource.manifest) { + info.resource = [[template.resource.action, template.resource.manifest]]; + } else { + info.resource = [[]]; + } + } if (template.inputs) { info.inputs = @@ -67,6 +81,7 @@ export function _populateInfoFromTemplate(info: SelectedNodeInfo, template?: Tem return [p.name, value]; }); } + return info; } @@ -143,12 +158,13 @@ function buildDag( } // "Child" here is the template that this task points to. This template should either be a - // DAG, in which case we recurse, or a container which can be thought of as a leaf node + // DAG, in which case we recurse, or a container/resource which can be thought of as a + // leaf node const child = templates.get(task.template); if (child) { if (child.nodeType === 'dag') { buildDag(graph, task.template, templates, alreadyVisited, nodeId); - } else if (child.nodeType === 'container' ) { + } else if (child.nodeType === 'container' || child.nodeType === 'resource') { _populateInfoFromTemplate(info, child.template); } else { throw new Error(`Unknown nodetype: ${child.nodeType} on workflow template: ${child.template}`); @@ -204,10 +220,12 @@ export function createGraph(workflow: Workflow): dagre.graphlib.Graph { if (template.container) { templates.set(template.name, { nodeType: 'container', template }); + } else if (template.resource) { + templates.set(template.name, { nodeType: 'resource', template }); } else if (template.dag) { templates.set(template.name, { nodeType: 'dag', template }); } else { - logger.verbose(`Template: ${template.name} was neither a Container nor a DAG`); + logger.verbose(`Template: ${template.name} was neither a Container/Resource nor a DAG`); } } diff --git a/frontend/src/lib/WorkflowParser.test.ts b/frontend/src/lib/WorkflowParser.test.ts index 6433da6e727..0c244da469a 100644 --- a/frontend/src/lib/WorkflowParser.test.ts +++ b/frontend/src/lib/WorkflowParser.test.ts @@ -1,5 +1,5 @@ /* - * Copyright 2018 Google LLC + * Copyright 2018-2019 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -825,4 +825,341 @@ describe('WorkflowParser', () => { }); }); + + describe('getNodeVolumeMounts', () => { + it('handles undefined workflow', () => { + expect(WorkflowParser.getNodeVolumeMounts(undefined as any, '')).toEqual([]); + }); + + it('handles empty workflow, without status', () => { + expect(WorkflowParser.getNodeVolumeMounts({} as any, '')).toEqual([]); + }); + + it('handles workflow without nodes', () => { + const workflow = { status: {} }; + expect(WorkflowParser.getNodeVolumeMounts(workflow as any, '')).toEqual([]); + }); + + it('handles node not existing in graph', () => { + const workflow = { status: { nodes: { node1: {} } } }; + expect(WorkflowParser.getNodeVolumeMounts(workflow as any, 'node2')).toEqual([]); + }); + + it('handles an empty node', () => { + const workflow = { status: { nodes: { node1: {} } } }; + expect(WorkflowParser.getNodeVolumeMounts(workflow as any, 'node1')).toEqual([]); + }); + + it('handles a workflow without spec', () => { + const workflow = { + spec: {}, + status: { + nodes: { + node1: { + templateName: 'template-1' + } + } + }, + }; + expect(WorkflowParser.getNodeVolumeMounts(workflow as any, 'node1')).toEqual([]); + }); + + it('handles a workflow without templates', () => { + const workflow = { + spec: { templates: [] }, + status: { + nodes: { + node1: { + templateName: 'template-1' + } + } + }, + }; + expect(WorkflowParser.getNodeVolumeMounts(workflow as any, 'node1')).toEqual([]); + }); + + it('handles a node without a template', () => { + const workflow = { + spec: { + templates: [{ + container: {}, + name: 'template-2', + }] + }, + status: { + nodes: { + node1: { + templateName: 'template-1' + } + } + }, + }; + expect(WorkflowParser.getNodeVolumeMounts(workflow as any, 'node1')).toEqual([]); + }); + + it('handles a node which is not a container template', () => { + const workflow = { + spec: { + templates: [{ + name: 'template-1', + resource: {}, + }] + }, + status: { + nodes: { + node1: { + templateName: 'template-1' + } + } + }, + }; + expect(WorkflowParser.getNodeVolumeMounts(workflow as any, 'node1')).toEqual([]); + }); + + it('handles a node which is an empty container template', () => { + const workflow = { + spec: { + templates: [{ + container: {}, + name: 'template-1', + }] + }, + status: { + nodes: { + node1: { + templateName: 'template-1' + } + } + }, + }; + expect(WorkflowParser.getNodeVolumeMounts(workflow as any, 'node1')).toEqual([]); + }); + + it('handles a node which is a container template without volumeMounts', () => { + const workflow = { + spec: { + templates: [{ + container: { + image: 'image' + }, + name: 'template-1', + }] + }, + status: { + nodes: { + node1: { + templateName: 'template-1' + } + } + }, + }; + expect(WorkflowParser.getNodeVolumeMounts(workflow as any, 'node1')).toEqual([]); + }); + + it('handles a node which is a container template with empty volumeMounts', () => { + const workflow = { + spec: { + templates: [{ + container: { + volumeMounts: [] + }, + name: 'template-1', + }] + }, + status: { + nodes: { + node1: { + templateName: 'template-1' + } + } + }, + }; + expect(WorkflowParser.getNodeVolumeMounts(workflow as any, 'node1')).toEqual([]); + }); + + it('handles a node which is a container template with one entry in volumeMounts', () => { + const workflow = { + spec: { + templates: [{ + container: { + volumeMounts: [{ + mountPath: '/data', + name: 'vol1', + }] + }, + name: 'template-1', + }] + }, + status: { + nodes: { + node1: { + templateName: 'template-1' + } + } + }, + }; + expect(WorkflowParser.getNodeVolumeMounts(workflow as any, 'node1')).toEqual([['/data', 'vol1']]); + }); + + it('handles a node which is a container template with multiple volumeMounts', () => { + const workflow = { + spec: { + templates: [{ + container: { + volumeMounts: [ + { + mountPath: '/data', + name: 'vol1', + },{ + mountPath: '/common', + name: 'vol2', + } + ] + }, + name: 'template-1', + }] + }, + status: { + nodes: { + node1: { + templateName: 'template-1' + } + } + }, + }; + expect(WorkflowParser.getNodeVolumeMounts(workflow as any, 'node1')).toEqual([['/data', 'vol1'], ['/common', 'vol2']]); + }); + }); + + describe('getNodeManifest', () => { + it('handles undefined workflow', () => { + expect(WorkflowParser.getNodeManifest(undefined as any, '')).toEqual([]); + }); + + it('handles empty workflow, without status', () => { + expect(WorkflowParser.getNodeManifest({} as any, '')).toEqual([]); + }); + + it('handles workflow without nodes', () => { + const workflow = { status: {} }; + expect(WorkflowParser.getNodeManifest(workflow as any, '')).toEqual([]); + }); + + it('handles node not existing in graph', () => { + const workflow = { status: { nodes: { node1: {} } } }; + expect(WorkflowParser.getNodeManifest(workflow as any, 'node2')).toEqual([]); + }); + + it('handles an empty node', () => { + const workflow = { status: { nodes: { node1: {} } } }; + expect(WorkflowParser.getNodeManifest(workflow as any, 'node1')).toEqual([]); + }); + + it('handles a workflow without spec', () => { + const workflow = { + spec: {}, + status: { + nodes: { + node1: { + templateName: 'template-1' + } + } + }, + }; + expect(WorkflowParser.getNodeManifest(workflow as any, 'node1')).toEqual([]); + }); + + it('handles a workflow without templates', () => { + const workflow = { + spec: { templates: [] }, + status: { + nodes: { + node1: { + templateName: 'template-1' + } + } + }, + }; + expect(WorkflowParser.getNodeManifest(workflow as any, 'node1')).toEqual([]); + }); + + it('handles a node without a template', () => { + const workflow = { + spec: { + templates: [{ + container: {}, + name: 'template-2', + }] + }, + status: { + nodes: { + node1: { + templateName: 'template-1' + } + } + }, + }; + expect(WorkflowParser.getNodeManifest(workflow as any, 'node1')).toEqual([]); + }); + + it('handles a node which is not a resource template', () => { + const workflow = { + spec: { + templates: [{ + container: {}, + name: 'template-1', + }] + }, + status: { + nodes: { + node1: { + templateName: 'template-1' + } + } + }, + }; + expect(WorkflowParser.getNodeManifest(workflow as any, 'node1')).toEqual([]); + }); + + it('handles a node which is an empty resource template', () => { + const workflow = { + spec: { + templates: [{ + name: 'template-1', + resource: {}, + }] + }, + status: { + nodes: { + node1: { + templateName: 'template-1' + } + } + }, + }; + expect(WorkflowParser.getNodeManifest(workflow as any, 'node1')).toEqual([]); + }); + + it('handles a node which is a complete resource template', () => { + const workflow = { + spec: { + templates: [{ + name: 'template-1', + resource: { + action: 'create', + manifest: 'manifest' + }, + }] + }, + status: { + nodes: { + node1: { + templateName: 'template-1' + } + } + }, + }; + expect(WorkflowParser.getNodeManifest(workflow as any, 'node1')).toEqual([['create', 'manifest']]); + }); + }); }); diff --git a/frontend/src/lib/WorkflowParser.ts b/frontend/src/lib/WorkflowParser.ts index c34e87c6638..3a86b0c657b 100644 --- a/frontend/src/lib/WorkflowParser.ts +++ b/frontend/src/lib/WorkflowParser.ts @@ -1,5 +1,5 @@ /* - * Copyright 2018 Google LLC + * Copyright 2018-2019 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -166,6 +166,38 @@ export default class WorkflowParser { return inputsOutputs; } + // Makes sure the workflow object contains the node and returns its + // volume mounts if any. + public static getNodeVolumeMounts(workflow: Workflow, nodeId: string): string[][] { + if (!workflow || !workflow.status || !workflow.status.nodes || !workflow.status.nodes[nodeId] || !workflow.spec || !workflow.spec.templates) { + return []; + } + + const node = workflow.status.nodes[nodeId]; + const tmpl = workflow.spec.templates.find(t => !!t && !!t.name && t.name === node.templateName); + let volumeMounts: string[][] = []; + if (tmpl && tmpl.container && tmpl.container.volumeMounts) { + volumeMounts = tmpl.container.volumeMounts.map(v => [v.mountPath, v.name]); + } + return volumeMounts; + } + + // Makes sure the workflow object contains the node and returns its + // action and manifest. + public static getNodeManifest(workflow: Workflow, nodeId: string): string[][] { + if (!workflow || !workflow.status || !workflow.status.nodes || !workflow.status.nodes[nodeId] || !workflow.spec || !workflow.spec.templates) { + return []; + } + + const node = workflow.status.nodes[nodeId]; + const tmpl = workflow.spec.templates.find(t => !!t && !!t.name && t.name === node.templateName); + let manifest: string[][] = []; + if (tmpl && tmpl.resource && tmpl.resource.action && tmpl.resource.manifest) { + manifest = [[tmpl.resource.action, tmpl.resource.manifest]]; + } + return manifest; + } + // Returns a list of output paths for the given workflow Node, by looking for // and the Argo artifacts syntax in the outputs section. public static loadNodeOutputPaths(selectedWorkflowNode: NodeStatus): StoragePath[] { diff --git a/frontend/src/pages/RunDetails.test.tsx b/frontend/src/pages/RunDetails.test.tsx index 3bd1e6343d0..667b98aad0c 100644 --- a/frontend/src/pages/RunDetails.test.tsx +++ b/frontend/src/pages/RunDetails.test.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2018 Google LLC + * Copyright 2018-2019 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -452,7 +452,7 @@ describe('RunDetails', () => { expect(tree).toMatchSnapshot(); }); - it('switches to logs tab in side pane', async () => { + it('switches to volumes tab in side pane', async () => { testRun.pipeline_runtime!.workflow_manifest = JSON.stringify({ status: { nodes: { node1: { id: 'node1', }, }, }, }); @@ -465,6 +465,32 @@ describe('RunDetails', () => { expect(tree).toMatchSnapshot(); }); + it('switches to manifest tab in side pane', async () => { + testRun.pipeline_runtime!.workflow_manifest = JSON.stringify({ + status: { nodes: { node1: { id: 'node1', }, }, }, + }); + tree = shallow(); + await getRunSpy; + await TestUtils.flushPromises(); + tree.find('Graph').simulate('click', 'node1'); + tree.find('MD2Tabs').at(1).simulate('switch', 3); + expect(tree.state('sidepanelSelectedTab')).toEqual(3); + expect(tree).toMatchSnapshot(); + }); + + it('switches to logs tab in side pane', async () => { + testRun.pipeline_runtime!.workflow_manifest = JSON.stringify({ + status: { nodes: { node1: { id: 'node1', }, }, }, + }); + tree = shallow(); + await getRunSpy; + await TestUtils.flushPromises(); + tree.find('Graph').simulate('click', 'node1'); + tree.find('MD2Tabs').at(1).simulate('switch', 4); + expect(tree.state('sidepanelSelectedTab')).toEqual(4); + expect(tree).toMatchSnapshot(); + }); + it('loads and shows logs in side pane', async () => { testRun.pipeline_runtime!.workflow_manifest = JSON.stringify({ status: { nodes: { node1: { id: 'node1', }, }, }, @@ -473,7 +499,7 @@ describe('RunDetails', () => { await getRunSpy; await TestUtils.flushPromises(); tree.find('Graph').simulate('click', 'node1'); - tree.find('MD2Tabs').at(1).simulate('switch', 2); + tree.find('MD2Tabs').at(1).simulate('switch', 4); await getPodLogsSpy; expect(getPodLogsSpy).toHaveBeenCalledTimes(1); expect(getPodLogsSpy).toHaveBeenLastCalledWith('node1'); @@ -489,7 +515,7 @@ describe('RunDetails', () => { await getRunSpy; await TestUtils.flushPromises(); tree.find('Graph').simulate('click', 'node1'); - tree.find('MD2Tabs').at(1).simulate('switch', 2); + tree.find('MD2Tabs').at(1).simulate('switch', 4); await getPodLogsSpy; await TestUtils.flushPromises(); expect(tree.state()).toMatchObject({ @@ -515,7 +541,7 @@ describe('RunDetails', () => { await getRunSpy; await TestUtils.flushPromises(); tree.find('Graph').simulate('click', 'node1'); - tree.find('MD2Tabs').at(1).simulate('switch', 2); + tree.find('MD2Tabs').at(1).simulate('switch', 4); await getPodLogsSpy; await TestUtils.flushPromises(); expect(getPodLogsSpy).not.toHaveBeenCalled(); @@ -550,14 +576,14 @@ describe('RunDetails', () => { await getRunSpy; await TestUtils.flushPromises(); tree.find('Graph').simulate('click', 'node1'); - tree.find('MD2Tabs').at(1).simulate('switch', 2); + tree.find('MD2Tabs').at(1).simulate('switch', 4); expect(tree.state('selectedNodeDetails')).toHaveProperty('id', 'node1'); - expect(tree.state('sidepanelSelectedTab')).toEqual(2); + expect(tree.state('sidepanelSelectedTab')).toEqual(4); await (tree.instance() as RunDetails).refresh(); expect (getRunSpy).toHaveBeenCalledTimes(2); expect(tree.state('selectedNodeDetails')).toHaveProperty('id', 'node1'); - expect(tree.state('sidepanelSelectedTab')).toEqual(2); + expect(tree.state('sidepanelSelectedTab')).toEqual(4); }); it('keeps side pane open and on same tab when more nodes are added after refresh', async () => { @@ -573,14 +599,14 @@ describe('RunDetails', () => { await getRunSpy; await TestUtils.flushPromises(); tree.find('Graph').simulate('click', 'node1'); - tree.find('MD2Tabs').at(1).simulate('switch', 2); + tree.find('MD2Tabs').at(1).simulate('switch', 4); expect(tree.state('selectedNodeDetails')).toHaveProperty('id', 'node1'); - expect(tree.state('sidepanelSelectedTab')).toEqual(2); + expect(tree.state('sidepanelSelectedTab')).toEqual(4); await (tree.instance() as RunDetails).refresh(); expect(getRunSpy).toHaveBeenCalledTimes(2); expect(tree.state('selectedNodeDetails')).toHaveProperty('id', 'node1'); - expect(tree.state('sidepanelSelectedTab')).toEqual(2); + expect(tree.state('sidepanelSelectedTab')).toEqual(4); }); it('keeps side pane open and on same tab when run status changes, shows new status', async () => { @@ -591,9 +617,9 @@ describe('RunDetails', () => { await getRunSpy; await TestUtils.flushPromises(); tree.find('Graph').simulate('click', 'node1'); - tree.find('MD2Tabs').at(1).simulate('switch', 2); + tree.find('MD2Tabs').at(1).simulate('switch', 4); expect(tree.state('selectedNodeDetails')).toHaveProperty('id', 'node1'); - expect(tree.state('sidepanelSelectedTab')).toEqual(2); + expect(tree.state('sidepanelSelectedTab')).toEqual(4); expect(updateToolbarSpy).toHaveBeenCalledTimes(3); const thirdCall = updateToolbarSpy.mock.calls[2][0]; @@ -613,9 +639,9 @@ describe('RunDetails', () => { await getRunSpy; await TestUtils.flushPromises(); tree.find('Graph').simulate('click', 'node1'); - tree.find('MD2Tabs').at(1).simulate('switch', 2); + tree.find('MD2Tabs').at(1).simulate('switch', 4); expect(tree.state('selectedNodeDetails')).toHaveProperty('id', 'node1'); - expect(tree.state('sidepanelSelectedTab')).toEqual(2); + expect(tree.state('sidepanelSelectedTab')).toEqual(4); getPodLogsSpy.mockImplementationOnce(() => 'new test logs'); await (tree.instance() as RunDetails).refresh(); @@ -631,7 +657,7 @@ describe('RunDetails', () => { await getRunSpy; await TestUtils.flushPromises(); tree.find('Graph').simulate('click', 'node1'); - tree.find('MD2Tabs').at(1).simulate('switch', 2); + tree.find('MD2Tabs').at(1).simulate('switch', 4); await getPodLogsSpy; await TestUtils.flushPromises(); expect(tree.state()).toMatchObject({ @@ -656,7 +682,7 @@ describe('RunDetails', () => { await getRunSpy; await TestUtils.flushPromises(); tree.find('Graph').simulate('click', 'node1'); - tree.find('MD2Tabs').at(1).simulate('switch', 2); + tree.find('MD2Tabs').at(1).simulate('switch', 4); expect(tree.state('selectedNodeDetails')).toHaveProperty('phaseMessage', undefined); testRun.pipeline_runtime!.workflow_manifest = JSON.stringify({ @@ -675,7 +701,7 @@ describe('RunDetails', () => { await getRunSpy; await TestUtils.flushPromises(); tree.find('Graph').simulate('click', 'node1'); - tree.find('MD2Tabs').at(1).simulate('switch', 2); + tree.find('MD2Tabs').at(1).simulate('switch', 4); expect(tree.state('selectedNodeDetails')).toHaveProperty('phaseMessage', 'This step is in Succeeded state with this message: some node message'); diff --git a/frontend/src/pages/RunDetails.tsx b/frontend/src/pages/RunDetails.tsx index 3906ebdb0fd..b55f33c1062 100644 --- a/frontend/src/pages/RunDetails.tsx +++ b/frontend/src/pages/RunDetails.tsx @@ -1,5 +1,5 @@ /* - * Copyright 2018 Google LLC + * Copyright 2018-2019 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -48,6 +48,8 @@ import { formatDateString, getRunDurationFromWorkflow, logger, errorToMessage } enum SidePaneTab { ARTIFACTS, INPUT_OUTPUT, + VOLUMES, + MANIFEST, LOGS, } @@ -174,7 +176,7 @@ class RunDetails extends Page { )}
- @@ -208,6 +210,22 @@ class RunDetails extends Page {
)} + {sidepanelSelectedTab === SidePaneTab.VOLUMES && ( +
+ +
+ )} + + {sidepanelSelectedTab === SidePaneTab.MANIFEST && ( +
+ +
+ )} + {sidepanelSelectedTab === SidePaneTab.LOGS && (
{this.state.logsBannerMessage && ( diff --git a/frontend/src/pages/__snapshots__/PipelineDetails.test.tsx.snap b/frontend/src/pages/__snapshots__/PipelineDetails.test.tsx.snap index a22ed1aac28..f086dcaa941 100644 --- a/frontend/src/pages/__snapshots__/PipelineDetails.test.tsx.snap +++ b/frontend/src/pages/__snapshots__/PipelineDetails.test.tsx.snap @@ -483,6 +483,12 @@ exports[`PipelineDetails shows clicked node info in the side panel if it is in t "val4", ], ], + "resource": Array [ + Array [], + ], + "volumeMounts": Array [ + Array [], + ], }, "label": "node1", }, @@ -546,6 +552,12 @@ exports[`PipelineDetails shows clicked node info in the side panel if it is in t "val4", ], ], + "resource": Array [ + Array [], + ], + "volumeMounts": Array [ + Array [], + ], } } /> diff --git a/frontend/src/pages/__snapshots__/RunDetails.test.tsx.snap b/frontend/src/pages/__snapshots__/RunDetails.test.tsx.snap index e340013d494..08fe86c7a20 100644 --- a/frontend/src/pages/__snapshots__/RunDetails.test.tsx.snap +++ b/frontend/src/pages/__snapshots__/RunDetails.test.tsx.snap @@ -408,11 +408,13 @@ exports[`RunDetails does not load logs if clicked node status is skipped 1`] = ` > `; +exports[`RunDetails switches to manifest tab in side pane 1`] = ` +
+
+ +
+
+
+ + +
+ +
+
+ +
+
+
+
+
+
+ + + Runtime execution graph. Only steps that are currently running or have already completed are shown. + +
+
+
+
+
+
+
+`; + exports[`RunDetails switches to run output tab, shows empty message 1`] = `
`; + +exports[`RunDetails switches to volumes tab in side pane 1`] = ` +
+
+ +
+
+
+ + +
+ +
+
+ +
+
+
+
+
+
+ + + Runtime execution graph. Only steps that are currently running or have already completed are shown. + +
+
+
+
+
+
+
+`; diff --git a/samples/resourceops/resourceop_basic.py b/samples/resourceops/resourceop_basic.py new file mode 100644 index 00000000000..3079379cbdb --- /dev/null +++ b/samples/resourceops/resourceop_basic.py @@ -0,0 +1,60 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Note that this sample is just to show the ResourceOp's usage. + +It is not a good practice to put password as a pipeline argument, since it will +be visible on KFP UI. +""" + +from kubernetes import client as k8s_client +import kfp.dsl as dsl + + +@dsl.pipeline( + name="ResourceOp Basic", + description="A Basic Example on ResourceOp Usage." +) +def resourceop_basic(username, password): + secret_resource = k8s_client.V1Secret( + api_version="v1", + kind="Secret", + metadata=k8s_client.V1ObjectMeta(generate_name="my-secret-"), + type="Opaque", + data={"username": username, "password": password} + ) + rop = dsl.ResourceOp( + name="create-my-secret", + k8s_resource=secret_resource, + attribute_outputs={"name": "{.metadata.name}"} + ) + + secret = k8s_client.V1Volume( + name="my-secret", + secret=k8s_client.V1SecretVolumeSource(secret_name=rop.output) + ) + + cop = dsl.ContainerOp( + name="cop", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["ls /etc/secret-volume"], + pvolumes={"/etc/secret-volume": secret} + ) + + +if __name__ == "__main__": + import kfp.compiler as compiler + compiler.Compiler().compile(resourceop_basic, __file__ + ".tar.gz") diff --git a/samples/resourceops/volume_snapshotop_rokurl.py b/samples/resourceops/volume_snapshotop_rokurl.py new file mode 100644 index 00000000000..0753d549f3f --- /dev/null +++ b/samples/resourceops/volume_snapshotop_rokurl.py @@ -0,0 +1,91 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""This sample uses Rok as an example to show case how VolumeOp accepts +annotations as an extra argument, and how we can use arbitrary PipelineParams +to determine their contents. + +The specific annotation is Rok-specific, but the use of annotations in such way +is widespread in storage systems integrated with K8s. +""" + +import kfp.dsl as dsl + + +@dsl.pipeline( + name="VolumeSnapshotOp RokURL", + description="The fifth example of the design doc." +) +def volume_snapshotop_rokurl(rok_url): + vop1 = dsl.VolumeOp( + name="create_volume_1", + resource_name="vol1", + size="1Gi", + annotations={"rok/origin": rok_url}, + modes=dsl.VOLUME_MODE_RWM + ) + + step1 = dsl.ContainerOp( + name="step1_concat", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["cat /data/file*| gzip -c >/data/full.gz"], + pvolumes={"/data": vop1.volume} + ) + + step1_snap = dsl.VolumeSnapshotOp( + name="create_snapshot_1", + resource_name="snap1", + volume=step1.pvolume + ) + + vop2 = dsl.VolumeOp( + name="create_volume_2", + resource_name="vol2", + data_source=step1_snap.snapshot, + size=step1_snap.outputs["size"] + ) + + step2 = dsl.ContainerOp( + name="step2_gunzip", + image="library/bash:4.4.23", + command=["gunzip", "-k", "/data/full.gz"], + pvolumes={"/data": vop2.volume} + ) + + step2_snap = dsl.VolumeSnapshotOp( + name="create_snapshot_2", + resource_name="snap2", + volume=step2.pvolume + ) + + vop3 = dsl.VolumeOp( + name="create_volume_3", + resource_name="vol3", + data_source=step2_snap.snapshot, + size=step2_snap.outputs["size"] + ) + + step3 = dsl.ContainerOp( + name="step3_output", + image="library/bash:4.4.23", + command=["cat", "/data/full"], + pvolumes={"/data": vop3.volume} + ) + + +if __name__ == "__main__": + import kfp.compiler as compiler + compiler.Compiler().compile(volume_snapshotop_rokurl, __file__ + ".tar.gz") diff --git a/samples/resourceops/volume_snapshotop_sequential.py b/samples/resourceops/volume_snapshotop_sequential.py new file mode 100644 index 00000000000..2b8500ec963 --- /dev/null +++ b/samples/resourceops/volume_snapshotop_sequential.py @@ -0,0 +1,87 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import kfp.dsl as dsl + + +@dsl.pipeline( + name="VolumeSnapshotOp Sequential", + description="The fourth example of the design doc." +) +def volume_snapshotop_sequential(url): + vop = dsl.VolumeOp( + name="create_volume", + resource_name="vol1", + size="1Gi", + modes=dsl.VOLUME_MODE_RWM + ) + + step1 = dsl.ContainerOp( + name="step1_ingest", + image="google/cloud-sdk:216.0.0", + command=["sh", "-c"], + arguments=["mkdir /data/step1 && " + "gsutil cat %s | gzip -c >/data/step1/file1.gz" % url], + pvolumes={"/data": vop.volume} + ) + + step1_snap = dsl.VolumeSnapshotOp( + name="step1_snap", + resource_name="step1_snap", + volume=step1.pvolume + ) + + step2 = dsl.ContainerOp( + name="step2_gunzip", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["mkdir /data/step2 && " + "gunzip /data/step1/file1.gz -c >/data/step2/file1"], + pvolumes={"/data": step1.pvolume} + ) + + step2_snap = dsl.VolumeSnapshotOp( + name="step2_snap", + resource_name="step2_snap", + volume=step2.pvolume + ) + + step3 = dsl.ContainerOp( + name="step3_copy", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["mkdir /data/step3 && " + "cp -av /data/step2/file1 /data/step3/file3"], + pvolumes={"/data": step2.pvolume} + ) + + step3_snap = dsl.VolumeSnapshotOp( + name="step3_snap", + resource_name="step3_snap", + volume=step3.pvolume + ) + + step4 = dsl.ContainerOp( + name="step4_output", + image="library/bash:4.4.23", + command=["cat", "/data/step2/file1", "/data/step3/file3"], + pvolumes={"/data": step3.pvolume} + ) + + +if __name__ == "__main__": + import kfp.compiler as compiler + compiler.Compiler().compile(volume_snapshotop_sequential, + __file__ + ".tar.gz") diff --git a/samples/resourceops/volumeop_basic.py b/samples/resourceops/volumeop_basic.py new file mode 100644 index 00000000000..babf12db6d1 --- /dev/null +++ b/samples/resourceops/volumeop_basic.py @@ -0,0 +1,42 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import kfp.dsl as dsl + + +@dsl.pipeline( + name="VolumeOp Basic", + description="A Basic Example on VolumeOp Usage." +) +def volumeop_basic(size): + vop = dsl.VolumeOp( + name="create_pvc", + resource_name="my-pvc", + modes=dsl.VOLUME_MODE_RWM, + size=size + ) + + cop = dsl.ContainerOp( + name="cop", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["echo foo > /mnt/file1"], + pvolumes={"/mnt": vop.volume} + ) + + +if __name__ == "__main__": + import kfp.compiler as compiler + compiler.Compiler().compile(volumeop_basic, __file__ + ".tar.gz") diff --git a/samples/resourceops/volumeop_dag.py b/samples/resourceops/volumeop_dag.py new file mode 100644 index 00000000000..9d9514550b6 --- /dev/null +++ b/samples/resourceops/volumeop_dag.py @@ -0,0 +1,58 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import kfp.dsl as dsl + + +@dsl.pipeline( + name="Volume Op DAG", + description="The second example of the design doc." +) +def volume_op_dag(): + vop = dsl.VolumeOp( + name="create_pvc", + resource_name="my-pvc", + size="10Gi", + modes=dsl.VOLUME_MODE_RWM + ) + + step1 = dsl.ContainerOp( + name="step1", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["echo 1 | tee /mnt/file1"], + pvolumes={"/mnt": vop.volume} + ) + + step2 = dsl.ContainerOp( + name="step2", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["echo 2 | tee /mnt2/file2"], + pvolumes={"/mnt2": vop.volume} + ) + + step3 = dsl.ContainerOp( + name="step3", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["cat /mnt/file1 /mnt/file2"], + pvolumes={"/mnt": vop.volume.after(step1, step2)} + ) + + +if __name__ == "__main__": + import kfp.compiler as compiler + compiler.Compiler().compile(volume_op_dag, __file__ + ".tar.gz") diff --git a/samples/resourceops/volumeop_parallel.py b/samples/resourceops/volumeop_parallel.py new file mode 100644 index 00000000000..15955e4c7ab --- /dev/null +++ b/samples/resourceops/volumeop_parallel.py @@ -0,0 +1,58 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import kfp.dsl as dsl + + +@dsl.pipeline( + name="VolumeOp Parallel", + description="The first example of the design doc." +) +def volumeop_parallel(): + vop = dsl.VolumeOp( + name="create_pvc", + resource_name="my-pvc", + size="10Gi", + modes=dsl.VOLUME_MODE_RWM + ) + + step1 = dsl.ContainerOp( + name="step1", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["echo 1 | tee /mnt/file1"], + pvolumes={"/mnt": vop.volume} + ) + + step2 = dsl.ContainerOp( + name="step2", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["echo 2 | tee /common/file2"], + pvolumes={"/common": vop.volume} + ) + + step3 = dsl.ContainerOp( + name="step3", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["echo 3 | tee /mnt3/file3"], + pvolumes={"/mnt3": vop.volume} + ) + + +if __name__ == "__main__": + import kfp.compiler as compiler + compiler.Compiler().compile(volumeop_parallel, __file__ + ".tar.gz") diff --git a/samples/resourceops/volumeop_sequential.py b/samples/resourceops/volumeop_sequential.py new file mode 100644 index 00000000000..3c8b0317c82 --- /dev/null +++ b/samples/resourceops/volumeop_sequential.py @@ -0,0 +1,57 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import kfp.dsl as dsl + + +@dsl.pipeline( + name="VolumeOp Sequential", + description="The third example of the design doc." +) +def volumeop_sequential(): + vop = dsl.VolumeOp( + name="mypvc", + resource_name="newpvc", + size="10Gi", + modes=dsl.VOLUME_MODE_RWM + ) + + step1 = dsl.ContainerOp( + name="step1", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["echo 1|tee /data/file1"], + pvolumes={"/data": vop.volume} + ) + + step2 = dsl.ContainerOp( + name="step2", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["cp /data/file1 /data/file2"], + pvolumes={"/data": step1.pvolume} + ) + + step3 = dsl.ContainerOp( + name="step3", + image="library/bash:4.4.23", + command=["cat", "/mnt/file1", "/mnt/file2"], + pvolumes={"/mnt": step2.pvolume} + ) + + +if __name__ == "__main__": + import kfp.compiler as compiler + compiler.Compiler().compile(volumeop_sequential, __file__ + ".tar.gz") diff --git a/sdk/python/kfp/compiler/_op_to_template.py b/sdk/python/kfp/compiler/_op_to_template.py index 509d48dc6d5..f5ae58b6152 100644 --- a/sdk/python/kfp/compiler/_op_to_template.py +++ b/sdk/python/kfp/compiler/_op_to_template.py @@ -14,10 +14,12 @@ import re from collections import OrderedDict +import yaml from typing import Union, List, Any, Callable, TypeVar, Dict from ._k8s_helper import K8sHelper from .. import dsl +from ..dsl._container_op import BaseOp # generics T = TypeVar('T') @@ -76,21 +78,21 @@ def _process_obj(obj: Any, map_to_tmpl_var: dict): return obj -def _process_container_ops(op: dsl.ContainerOp): - """Recursively go through the attrs listed in `attrs_with_pipelineparams` - and sanitize and replace pipeline params with template var string. - - Returns a processed `ContainerOp`. +def _process_base_ops(op: BaseOp): + """Recursively go through the attrs listed in `attrs_with_pipelineparams` + and sanitize and replace pipeline params with template var string. + + Returns a processed `BaseOp`. - NOTE this is an in-place update to `ContainerOp`'s attributes (i.e. other than - `file_outputs`, and `outputs`, all `PipelineParam` are replaced with the - corresponding template variable strings). + NOTE this is an in-place update to `BaseOp`'s attributes (i.e. the ones + specified in `attrs_with_pipelineparams`, all `PipelineParam` are replaced + with the corresponding template variable strings). Args: - op {dsl.ContainerOp}: class that inherits from ds.ContainerOp - + op {BaseOp}: class that inherits from BaseOp + Returns: - dsl.ContainerOp + BaseOp """ # map param's (unsanitized pattern or serialized str pattern) -> input param var str @@ -123,16 +125,21 @@ def _inputs_to_json(inputs_params: List[dsl.PipelineParam], _artifacts=None): return {'parameters': parameters} if parameters else None -def _outputs_to_json(outputs: Dict[str, dsl.PipelineParam], - file_outputs: Dict[str, str], +def _outputs_to_json(op: BaseOp, + outputs: Dict[str, dsl.PipelineParam], + param_outputs: Dict[str, str], output_artifacts: List[dict]): """Creates an argo `outputs` JSON obj.""" + if isinstance(op, dsl.ResourceOp): + value_from_key = "jsonPath" + else: + value_from_key = "path" output_parameters = [] for param in outputs.values(): output_parameters.append({ 'name': param.full_name, 'valueFrom': { - 'path': file_outputs[param.name] + value_from_key: param_outputs[param.name] } }) output_parameters.sort(key=lambda x: x['name']) @@ -168,29 +175,46 @@ def _build_conventional_artifact(name, path): # TODO: generate argo python classes from swagger and use convert_k8s_obj_to_json?? -def _op_to_template(op: dsl.ContainerOp): - """Generate template given an operator inherited from dsl.ContainerOp.""" - - # NOTE in-place update to ContainerOp - # replace all PipelineParams (except in `file_outputs`, `outputs`, `inputs`) - # with template var strings - processed_op = _process_container_ops(op) - - # default output artifacts - output_artifact_paths = OrderedDict() - output_artifact_paths.setdefault('mlpipeline-ui-metadata', '/mlpipeline-ui-metadata.json') - output_artifact_paths.setdefault('mlpipeline-metrics', '/mlpipeline-metrics.json') - - output_artifacts = [ - _build_conventional_artifact(name, path) - for name, path in output_artifact_paths.items() - ] - - # workflow template - template = { - 'name': op.name, - 'container': K8sHelper.convert_k8s_obj_to_json(op.container) - } +def _op_to_template(op: BaseOp): + """Generate template given an operator inherited from BaseOp.""" + + # NOTE in-place update to BaseOp + # replace all PipelineParams with template var strings + processed_op = _process_base_ops(op) + + if isinstance(op, dsl.ContainerOp): + # default output artifacts + output_artifact_paths = OrderedDict() + output_artifact_paths.setdefault('mlpipeline-ui-metadata', '/mlpipeline-ui-metadata.json') + output_artifact_paths.setdefault('mlpipeline-metrics', '/mlpipeline-metrics.json') + + output_artifacts = [ + _build_conventional_artifact(name, path) + for name, path in output_artifact_paths.items() + ] + + # workflow template + template = { + 'name': processed_op.name, + 'container': K8sHelper.convert_k8s_obj_to_json( + processed_op.container + ) + } + elif isinstance(op, dsl.ResourceOp): + # no output artifacts + output_artifacts = [] + + # workflow template + processed_op.resource["manifest"] = yaml.dump( + K8sHelper.convert_k8s_obj_to_json(processed_op.k8s_resource), + default_flow_style=False + ) + template = { + 'name': processed_op.name, + 'resource': K8sHelper.convert_k8s_obj_to_json( + processed_op.resource + ) + } # inputs inputs = _inputs_to_json(processed_op.inputs) @@ -198,8 +222,12 @@ def _op_to_template(op: dsl.ContainerOp): template['inputs'] = inputs # outputs - template['outputs'] = _outputs_to_json(op.outputs, op.file_outputs, - output_artifacts) + if isinstance(op, dsl.ContainerOp): + param_outputs = processed_op.file_outputs + elif isinstance(op, dsl.ResourceOp): + param_outputs = processed_op.attribute_outputs + template['outputs'] = _outputs_to_json(op, processed_op.outputs, + param_outputs, output_artifacts) # node selector if processed_op.node_selector: diff --git a/sdk/python/kfp/compiler/compiler.py b/sdk/python/kfp/compiler/compiler.py index d89b7376366..8b9d3cfd6c6 100644 --- a/sdk/python/kfp/compiler/compiler.py +++ b/sdk/python/kfp/compiler/compiler.py @@ -1,4 +1,4 @@ -# Copyright 2018 Google LLC +# Copyright 2018-2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -285,7 +285,7 @@ def _get_dependencies(self, pipeline, root_group, op_groups, opsgroups_groups, o upstream_op_names |= set(op.dependent_names) for op_name in upstream_op_names: - # the dependent op could be either a ContainerOp or an opsgroup + # the dependent op could be either a BaseOp or an opsgroup if op_name in pipeline.ops: upstream_op = pipeline.ops[op_name] elif op_name in opsgroups: @@ -601,8 +601,8 @@ def _compile(self, pipeline_func): arg.value = default.value if isinstance(default, dsl.PipelineParam) else default # Sanitize operator names and param names - sanitized_ops = {} - for op in p.ops.values(): + sanitized_cops = {} + for op in p.cops.values(): sanitized_name = K8sHelper.sanitize_k8s_name(op.name) op.name = sanitized_name for param in op.outputs.values(): @@ -619,8 +619,34 @@ def _compile(self, pipeline_func): for key in op.file_outputs.keys(): sanitized_file_outputs[K8sHelper.sanitize_k8s_name(key)] = op.file_outputs[key] op.file_outputs = sanitized_file_outputs - sanitized_ops[sanitized_name] = op - p.ops = sanitized_ops + sanitized_cops[sanitized_name] = op + p.cops = sanitized_cops + p.ops = dict(sanitized_cops) + + # Sanitize operator names and param names of ResourceOps + sanitized_rops = {} + for rop in p.rops.values(): + sanitized_name = K8sHelper.sanitize_k8s_name(rop.name) + rop.name = sanitized_name + for param in rop.outputs.values(): + param.name = K8sHelper.sanitize_k8s_name(param.name) + if param.op_name: + param.op_name = K8sHelper.sanitize_k8s_name(param.op_name) + if rop.output is not None: + rop.output.name = K8sHelper.sanitize_k8s_name(rop.output.name) + rop.output.op_name = K8sHelper.sanitize_k8s_name(rop.output.op_name) + if rop.dependent_names: + rop.dependent_names = [K8sHelper.sanitize_k8s_name(name) for name in rop.dependent_names] + if rop.attribute_outputs is not None: + sanitized_attribute_outputs = {} + for key in rop.attribute_outputs.keys(): + sanitized_attribute_outputs[K8sHelper.sanitize_k8s_name(key)] = \ + rop.attribute_outputs[key] + rop.attribute_outputs = sanitized_attribute_outputs + sanitized_rops[sanitized_name] = rop + p.rops = sanitized_rops + p.ops.update(dict(sanitized_rops)) + workflow = self._create_pipeline_workflow(args_list_with_defaults, p) return workflow diff --git a/sdk/python/kfp/dsl/__init__.py b/sdk/python/kfp/dsl/__init__.py index 6ead6327e49..d45eefc4275 100644 --- a/sdk/python/kfp/dsl/__init__.py +++ b/sdk/python/kfp/dsl/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2018 Google LLC +# Copyright 2018-2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,5 +16,11 @@ from ._pipeline_param import PipelineParam, match_serialized_pipelineparam from ._pipeline import Pipeline, pipeline, get_pipeline_conf from ._container_op import ContainerOp, Sidecar +from ._resource_op import ResourceOp +from ._volume_op import ( + VolumeOp, VOLUME_MODE_RWO, VOLUME_MODE_RWM, VOLUME_MODE_ROM +) +from ._pipeline_volume import PipelineVolume +from ._volume_snapshot_op import VolumeSnapshotOp from ._ops_group import OpsGroup, ExitHandler, Condition -from ._component import python_component, graph_component, component \ No newline at end of file +from ._component import python_component, graph_component, component diff --git a/sdk/python/kfp/dsl/_container_op.py b/sdk/python/kfp/dsl/_container_op.py index 8d2cdece9fb..6456e5f7eb4 100644 --- a/sdk/python/kfp/dsl/_container_op.py +++ b/sdk/python/kfp/dsl/_container_op.py @@ -18,7 +18,8 @@ from kubernetes.client.models import ( V1Container, V1EnvVar, V1EnvFromSource, V1SecurityContext, V1Probe, V1ResourceRequirements, V1VolumeDevice, V1VolumeMount, V1ContainerPort, - V1Lifecycle) + V1Lifecycle, V1Volume +) from . import _pipeline_param from ._metadata import ComponentMeta @@ -622,16 +623,187 @@ def inputs(self): return _pipeline_param.extract_pipelineparams_from_any(self) -def _make_hash_based_id_for_container_op(container_op): - # Generating a unique ID for ContainerOp. For class instances, the hash is the object's memory address which is unique. - return container_op.human_name + ' ' + hex(2**63 + hash(container_op))[2:] +def _make_hash_based_id_for_op(op): + # Generating a unique ID for Op. For class instances, the hash is the object's memory address which is unique. + return op.human_name + ' ' + hex(2**63 + hash(op))[2:] -# Pointer to a function that generates a unique ID for the ContainerOp instance (Possibly by registering the ContainerOp instance in some system). -_register_container_op_handler = _make_hash_based_id_for_container_op +# Pointer to a function that generates a unique ID for the Op instance (Possibly by registering the Op instance in some system). +_register_op_handler = _make_hash_based_id_for_op -class ContainerOp(object): +class BaseOp(object): + + # list of attributes that might have pipeline params - used to generate + # the input parameters during compilation. + # Excludes `file_outputs` and `outputs` as they are handled separately + # in the compilation process to generate the DAGs and task io parameters. + attrs_with_pipelineparams = [ + 'node_selector', 'volumes', 'pod_annotations', 'pod_labels', + 'num_retries', 'sidecars' + ] + + def __init__(self, + name: str, + sidecars: List[Sidecar] = None, + is_exit_handler: bool = False): + """Create a new instance of BaseOp + + Args: + name: the name of the op. It does not have to be unique within a pipeline + because the pipeline will generates a unique new name in case of conflicts. + sidecars: the list of `Sidecar` objects describing the sidecar containers to deploy + together with the `main` container. + is_exit_handler: Whether it is used as an exit handler. + """ + + valid_name_regex = r'^[A-Za-z][A-Za-z0-9\s_-]*$' + if not re.match(valid_name_regex, name): + raise ValueError( + 'Only letters, numbers, spaces, "_", and "-" are allowed in name. Must begin with letter: %s' + % (name)) + + self.is_exit_handler = is_exit_handler + + # human_name must exist to construct operator's name + self.human_name = name + # ID of the current Op. Ideally, it should be generated by the compiler that sees the bigger context. + # However, the ID is used in the task output references (PipelineParams) which can be serialized to strings. + # Because of this we must obtain a unique ID right now. + self.name = _register_op_handler(self) + + # TODO: proper k8s definitions so that `convert_k8s_obj_to_json` can be used? + # `io.argoproj.workflow.v1alpha1.Template` properties + self.node_selector = {} + self.volumes = [] + self.pod_annotations = {} + self.pod_labels = {} + self.num_retries = 0 + self.sidecars = sidecars or [] + + # attributes specific to `BaseOp` + self._inputs = [] + self.dependent_names = [] + + @property + def inputs(self): + """List of PipelineParams that will be converted into input parameters + (io.argoproj.workflow.v1alpha1.Inputs) for the argo workflow. + """ + # Iterate through and extract all the `PipelineParam` in Op when + # called the 1st time (because there are in-place updates to `PipelineParam` + # during compilation - remove in-place updates for easier debugging?) + if not self._inputs: + self._inputs = [] + # TODO replace with proper k8s obj? + for key in self.attrs_with_pipelineparams: + self._inputs += [ + param for param in _pipeline_param. + extract_pipelineparams_from_any(getattr(self, key)) + ] + # keep only unique + self._inputs = list(set(self._inputs)) + return self._inputs + + @inputs.setter + def inputs(self, value): + # to support in-place updates + self._inputs = value + + def apply(self, mod_func): + """Applies a modifier function to self. The function should return the passed object. + This is needed to chain "extention methods" to this class. + + Example: + from kfp.gcp import use_gcp_secret + task = ( + train_op(...) + .set_memory_request('1GB') + .apply(use_gcp_secret('user-gcp-sa')) + .set_memory_limit('2GB') + ) + """ + return mod_func(self) + + def after(self, op): + """Specify explicit dependency on another op.""" + self.dependent_names.append(op.name) + return self + + def add_volume(self, volume): + """Add K8s volume to the container + + Args: + volume: Kubernetes volumes + For detailed spec, check volume definition + https://github.com/kubernetes-client/python/blob/master/kubernetes/client/models/v1_volume.py + """ + self.volumes.append(volume) + return self + + def add_node_selector_constraint(self, label_name, value): + """Add a constraint for nodeSelector. Each constraint is a key-value pair label. For the + container to be eligible to run on a node, the node must have each of the constraints appeared + as labels. + + Args: + label_name: The name of the constraint label. + value: The value of the constraint label. + """ + + self.node_selector[label_name] = value + return self + + def add_pod_annotation(self, name: str, value: str): + """Adds a pod's metadata annotation. + + Args: + name: The name of the annotation. + value: The value of the annotation. + """ + + self.pod_annotations[name] = value + return self + + def add_pod_label(self, name: str, value: str): + """Adds a pod's metadata label. + + Args: + name: The name of the label. + value: The value of the label. + """ + + self.pod_labels[name] = value + return self + + def set_retry(self, num_retries: int): + """Sets the number of times the task is retried until it's declared failed. + + Args: + num_retries: Number of times to retry on failures. + """ + + self.num_retries = num_retries + return self + + def add_sidecar(self, sidecar: Sidecar): + """Add a sidecar to the Op. + + Args: + sidecar: SideCar object. + """ + + self.sidecars.append(sidecar) + return self + + def __repr__(self): + return str({self.__class__.__name__: self.__dict__}) + + +from ._pipeline_volume import PipelineVolume #The import is here to prevent circular reference problems. + + +class ContainerOp(BaseOp): """ Represents an op implemented by a container image. @@ -667,10 +839,6 @@ def foo_pipeline(tag: str, pull_image_policy: str): # the input parameters during compilation. # Excludes `file_outputs` and `outputs` as they are handled separately # in the compilation process to generate the DAGs and task io parameters. - attrs_with_pipelineparams = [ - '_container', 'node_selector', 'volumes', 'pod_annotations', - 'pod_labels', 'num_retries', 'sidecars' - ] def __init__(self, name: str, @@ -680,7 +848,9 @@ def __init__(self, sidecars: List[Sidecar] = None, container_kwargs: Dict = None, file_outputs: Dict[str, str] = None, - is_exit_handler=False): + is_exit_handler=False, + pvolumes: Dict[str, V1Volume] = None, + ): """Create a new instance of ContainerOp. Args: @@ -700,21 +870,18 @@ def __init__(self, the value of a PipelineParam is saved to its corresponding local file. It's one way for outside world to receive outputs of the container. is_exit_handler: Whether it is used as an exit handler. + pvolumes: Dictionary for the user to match a path on the op's fs with a + V1Volume or it inherited type. + E.g {"/my/path": vol, "/mnt": other_op.volumes["/output"]}. """ - valid_name_regex = r'^[A-Za-z][A-Za-z0-9\s_-]*$' - if not re.match(valid_name_regex, name): - raise ValueError( - 'Only letters, numbers, spaces, "_", and "-" are allowed in name. Must begin with letter: %s' - % (name)) + super().__init__(name=name, sidecars=sidecars, is_exit_handler=is_exit_handler) + self.attrs_with_pipelineparams = BaseOp.attrs_with_pipelineparams + ['_container'] #Copying the BaseOp class variable! # convert to list if not a list command = as_list(command) arguments = as_list(arguments) - # human_name must exist to construct containerOps name - self.human_name = name - # `container` prop in `io.argoproj.workflow.v1alpha1.Template` container_kwargs = container_kwargs or {} self._container = Container( @@ -748,27 +915,10 @@ def _decorated(*args, **kwargs): # only proxy public callables setattr(self, attr_to_proxy, _proxy(attr_to_proxy)) - # TODO: proper k8s definitions so that `convert_k8s_obj_to_json` can be used? - # `io.argoproj.workflow.v1alpha1.Template` properties - self.node_selector = {} - self.volumes = [] - self.pod_annotations = {} - self.pod_labels = {} - self.num_retries = 0 - self.sidecars = sidecars or [] - # attributes specific to `ContainerOp` - self._inputs = [] self.file_outputs = file_outputs - self.dependent_names = [] - self.is_exit_handler = is_exit_handler self._metadata = None - # ID of the current ContainerOp. Ideally, it should be generated by the compiler that sees the bigger context. - # However, the ID is used in the task output references (PipelineParams) which can be serialized to strings. - # Because of this we must obtain a unique ID right now. - self.name = _register_container_op_handler(self) - self.outputs = {} if file_outputs: self.outputs = { @@ -780,6 +930,24 @@ def _decorated(*args, **kwargs): if len(self.outputs) == 1: self.output = list(self.outputs.values())[0] + self.pvolumes = {} + if pvolumes: + for mount_path, pvolume in pvolumes.items(): + if hasattr(pvolume, "dependent_names"): #TODO: Replace with type check + self.dependent_names.extend(pvolume.dependent_names) + else: + pvolume = PipelineVolume(volume=pvolume) + self.pvolumes[mount_path] = pvolume.after(self) + self.add_volume(pvolume) + self._container.add_volume_mount(V1VolumeMount( + name=pvolume.name, + mount_path=mount_path + )) + + self.pvolume = None + if self.pvolumes and len(self.pvolumes) == 1: + self.pvolume = list(self.pvolumes.values())[0] + @property def command(self): return self._container.command @@ -796,31 +964,6 @@ def arguments(self): def arguments(self, value): self._container.args = as_list(value) - @property - def inputs(self): - """List of PipelineParams that will be converted into input parameters - (io.argoproj.workflow.v1alpha1.Inputs) for the argo workflow. - """ - # iterate thru and extract all the `PipelineParam` in `ContainerOp` when - # called the 1st time (because there are in-place updates to `PipelineParam` - # during compilation - remove in-place updates for easier debugging?) - if not self._inputs: - self._inputs = [] - # TODO replace with proper k8s obj? - for key in self.attrs_with_pipelineparams: - self._inputs += [ - param for param in _pipeline_param. - extract_pipelineparams_from_any(getattr(self, key)) - ] - # keep only unique - self._inputs = list(set(self._inputs)) - return self._inputs - - @inputs.setter - def inputs(self, value): - # to support in-place updates - self._inputs = value - @property def container(self): """`Container` object that represents the `container` property in @@ -842,95 +985,6 @@ def immediate_value_pipeline(): """ return self._container - def apply(self, mod_func): - """Applies a modifier function to self. The function should return the passed object. - This is needed to chain "extention methods" to this class. - - Example: - from kfp.gcp import use_gcp_secret - task = ( - train_op(...) - .set_memory_request('1GB') - .apply(use_gcp_secret('user-gcp-sa')) - .set_memory_limit('2GB') - ) - """ - return mod_func(self) - - def after(self, op): - """Specify explicit dependency on another op.""" - self.dependent_names.append(op.name) - return self - - def add_volume(self, volume): - """Add K8s volume to the container - - Args: - volume: Kubernetes volumes - For detailed spec, check volume definition - https://github.com/kubernetes-client/python/blob/master/kubernetes/client/models/v1_volume.py - """ - self.volumes.append(volume) - return self - - def add_node_selector_constraint(self, label_name, value): - """Add a constraint for nodeSelector. Each constraint is a key-value pair label. For the - container to be eligible to run on a node, the node must have each of the constraints appeared - as labels. - - Args: - label_name: The name of the constraint label. - value: The value of the constraint label. - """ - - self.node_selector[label_name] = value - return self - - def add_pod_annotation(self, name: str, value: str): - """Adds a pod's metadata annotation. - - Args: - name: The name of the annotation. - value: The value of the annotation. - """ - - self.pod_annotations[name] = value - return self - - def add_pod_label(self, name: str, value: str): - """Adds a pod's metadata label. - - Args: - name: The name of the label. - value: The value of the label. - """ - - self.pod_labels[name] = value - return self - - def set_retry(self, num_retries: int): - """Sets the number of times the task is retried until it's declared failed. - - Args: - num_retries: Number of times to retry on failures. - """ - - self.num_retries = num_retries - return self - - def add_sidecar(self, sidecar: Sidecar): - """Add a sidecar to the ContainerOps. - - Args: - sidecar: SideCar object. - """ - - self.sidecars.append(sidecar) - return self - - def __repr__(self): - return str({self.__class__.__name__: self.__dict__}) - def _set_metadata(self, metadata): '''_set_metadata passes the containerop the metadata information and configures the right output diff --git a/sdk/python/kfp/dsl/_ops_group.py b/sdk/python/kfp/dsl/_ops_group.py index 99078916fea..7e251b7a67e 100644 --- a/sdk/python/kfp/dsl/_ops_group.py +++ b/sdk/python/kfp/dsl/_ops_group.py @@ -1,4 +1,4 @@ -# Copyright 2018 Google LLC +# Copyright 2018-2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -143,4 +143,4 @@ def __init__(self, name): super(Graph, self).__init__(group_type='graph', name=name) self.inputs = [] self.outputs = {} - self.dependencies = [] \ No newline at end of file + self.dependencies = [] diff --git a/sdk/python/kfp/dsl/_pipeline.py b/sdk/python/kfp/dsl/_pipeline.py index ce22aa741fa..5de27328c0a 100644 --- a/sdk/python/kfp/dsl/_pipeline.py +++ b/sdk/python/kfp/dsl/_pipeline.py @@ -1,4 +1,4 @@ -# Copyright 2018 Google LLC +# Copyright 2018-2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ from . import _container_op +from . import _resource_op from . import _ops_group from ..components._naming import _make_name_unique_by_adding_index import sys @@ -109,6 +110,8 @@ def __init__(self, name: str): """ self.name = name self.ops = {} + self.cops = {} + self.rops = {} # Add the root group. self.groups = [_ops_group.OpsGroup('pipeline', name=name)] self.group_id = 0 @@ -124,28 +127,31 @@ def __enter__(self): def register_op_and_generate_id(op): return self.add_op(op, op.is_exit_handler) - self._old__register_container_op_handler = _container_op._register_container_op_handler - _container_op._register_container_op_handler = register_op_and_generate_id + self._old__register_op_handler = _container_op._register_op_handler + _container_op._register_op_handler = register_op_and_generate_id return self def __exit__(self, *args): Pipeline._default_pipeline = None - _container_op._register_container_op_handler = self._old__register_container_op_handler + _container_op._register_op_handler = self._old__register_op_handler - def add_op(self, op: _container_op.ContainerOp, define_only: bool): + def add_op(self, op: _container_op.BaseOp, define_only: bool): """Add a new operator. Args: - op: An operator of ContainerOp or its inherited type. + op: An operator of ContainerOp, ResourceOp or their inherited types. Returns op_name: a unique op name. """ - #If there is an existing op with this name then generate a new name. op_name = _make_name_unique_by_adding_index(op.human_name, list(self.ops.keys()), ' ') self.ops[op_name] = op + if isinstance(op, _container_op.ContainerOp): + self.cops[op_name] = op + elif isinstance(op, _resource_op.ResourceOp): + self.rops[op_name] = op if not define_only: self.groups[-1].ops.append(op) diff --git a/sdk/python/kfp/dsl/_pipeline_volume.py b/sdk/python/kfp/dsl/_pipeline_volume.py new file mode 100644 index 00000000000..1364380bfb8 --- /dev/null +++ b/sdk/python/kfp/dsl/_pipeline_volume.py @@ -0,0 +1,104 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from kubernetes.client.models import ( + V1Volume, V1PersistentVolumeClaimVolumeSource, + V1ObjectMeta, V1TypedLocalObjectReference +) + +from . import _pipeline +from ._pipeline_param import sanitize_k8s_name, match_serialized_pipelineparam +from ._volume_snapshot_op import VolumeSnapshotOp + + +class PipelineVolume(V1Volume): + """Representing a volume that is passed between pipeline operators and is + to be mounted by a ContainerOp or its inherited type. + + A PipelineVolume object can be used as an extention of the pipeline + function's filesystem. It may then be passed between ContainerOps, + exposing dependencies. + """ + def __init__(self, + pvc: str = None, + volume: V1Volume = None, + **kwargs): + """Create a new instance of PipelineVolume. + + Args: + pvc: The name of an existing PVC + volume: Create a deep copy out of a V1Volume or PipelineVolume + with no deps + Raises: + ValueError: if pvc is not None and name is None + if volume is not None and kwargs is not None + if pvc is not None and kwargs.pop("name") is not None + """ + if pvc and "name" not in kwargs: + raise ValueError("Please provide name.") + elif volume and kwargs: + raise ValueError("You can't pass a volume along with other " + "kwargs.") + + init_volume = {} + if volume: + init_volume = {attr: getattr(volume, attr) + for attr in self.attribute_map.keys()} + else: + init_volume = {"name": kwargs.pop("name") + if "name" in kwargs else None} + if pvc and kwargs: + raise ValueError("You can only pass 'name' along with 'pvc'.") + elif pvc and not kwargs: + pvc_volume_source = V1PersistentVolumeClaimVolumeSource( + claim_name=pvc + ) + init_volume["persistent_volume_claim"] = pvc_volume_source + super().__init__(**init_volume, **kwargs) + self.dependent_names = [] + + def after(self, *ops): + """Creates a duplicate of self with the required dependecies excluding + the redundant dependenices. + Args: + *ops: Pipeline operators to add as dependencies + """ + def implies(newdep, olddep): + if newdep.name == olddep: + return True + for parentdep_name in newdep.dependent_names: + if parentdep_name == olddep: + return True + else: + parentdep = _pipeline.Pipeline.get_default_pipeline( + ).ops[parentdep_name] + if parentdep: + if implies(parentdep, olddep): + return True + return False + + ret = self.__class__(volume=self) + ret.dependent_names = [op.name for op in ops] + + for olddep in self.dependent_names: + implied = False + for newdep in ops: + implied = implies(newdep, olddep) + if implied: + break + if not implied: + ret.dependent_names.append(olddep) + + return ret diff --git a/sdk/python/kfp/dsl/_resource_op.py b/sdk/python/kfp/dsl/_resource_op.py new file mode 100644 index 00000000000..b07207662bf --- /dev/null +++ b/sdk/python/kfp/dsl/_resource_op.py @@ -0,0 +1,149 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from typing import Dict + +from ._container_op import BaseOp +from . import _pipeline_param + + +class Resource(object): + """ + A wrapper over Argo ResourceTemplate definition object + (io.argoproj.workflow.v1alpha1.ResourceTemplate) + which is used to represent the `resource` property in argo's workflow + template (io.argoproj.workflow.v1alpha1.Template). + """ + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + "action": "str", + "merge_strategy": "str", + "success_condition": "str", + "failure_condition": "str", + "manifest": "str" + } + attribute_map = { + "action": "action", + "merge_strategy": "mergeStrategy", + "success_condition": "successCondition", + "failure_condition": "failureCondition", + "manifest": "manifest" + } + + def __init__(self, + action: str = None, + merge_strategy: str = None, + success_condition: str = None, + failure_condition: str = None, + manifest: str = None): + """Create a new instance of Resource""" + self.action = action + self.merge_strategy = merge_strategy + self.success_condition = success_condition + self.failure_condition = failure_condition + self.manifest = manifest + + +class ResourceOp(BaseOp): + """Represents an op which will be translated into a resource template""" + + def __init__(self, + k8s_resource=None, + action: str = "create", + merge_strategy: str = None, + success_condition: str = None, + failure_condition: str = None, + attribute_outputs: Dict[str, str] = None, + **kwargs): + """Create a new instance of ResourceOp. + + Args: + k8s_resource: A k8s resource which will be submitted to the cluster + action: One of "create"/"delete"/"apply"/"patch" + (default is "create") + merge_strategy: The merge strategy for the "apply" action + success_condition: The successCondition of the template + failure_condition: The failureCondition of the template + For more info see: + https://github.com/argoproj/argo/blob/master/examples/k8s-jobs.yaml + attribute_outputs: Maps output labels to resource's json paths, + similarly to file_outputs of ContainerOp + kwargs: name, sidecars & is_exit_handler. See BaseOp definition + Raises: + ValueError: if not inside a pipeline + if the name is an invalid string + if no k8s_resource is provided + if merge_strategy is set without "apply" action + """ + + super().__init__(**kwargs) + self.attrs_with_pipelineparams = list(self.attrs_with_pipelineparams) + self.attrs_with_pipelineparams.extend([ + "_resource", "k8s_resource", "attribute_outputs" + ]) + + if k8s_resource is None: + ValueError("You need to provide a k8s_resource.") + + if merge_strategy and action != "apply": + ValueError("You can't set merge_strategy when action != 'apply'") + + init_resource = { + "action": action, + "merge_strategy": merge_strategy, + "success_condition": success_condition, + "failure_condition": failure_condition + } + # `resource` prop in `io.argoproj.workflow.v1alpha1.Template` + self._resource = Resource(**init_resource) + + self.k8s_resource = k8s_resource + + # Set attribute_outputs + extra_attribute_outputs = \ + attribute_outputs if attribute_outputs else {} + self.attribute_outputs = \ + self.attribute_outputs if hasattr(self, "attribute_outputs") \ + else {} + self.attribute_outputs.update(extra_attribute_outputs) + # Add name and manifest if not specified by the user + if "name" not in self.attribute_outputs: + self.attribute_outputs["name"] = "{.metadata.name}" + if "manifest" not in self.attribute_outputs: + self.attribute_outputs["manifest"] = "{}" + + # Set outputs + self.outputs = { + name: _pipeline_param.PipelineParam(name, op_name=self.name) + for name in self.attribute_outputs.keys() + } + # If user set a single attribute_output, set self.output as that + # parameter, else set it as the resource name + self.output = self.outputs["name"] + if len(extra_attribute_outputs) == 1: + self.output = self.outputs[list(extra_attribute_outputs)[0]] + + @property + def resource(self): + """`Resource` object that represents the `resource` property in + `io.argoproj.workflow.v1alpha1.Template`. + """ + return self._resource diff --git a/sdk/python/kfp/dsl/_volume_op.py b/sdk/python/kfp/dsl/_volume_op.py new file mode 100644 index 00000000000..56a626aa4d8 --- /dev/null +++ b/sdk/python/kfp/dsl/_volume_op.py @@ -0,0 +1,142 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import re +from typing import List, Dict +from kubernetes.client.models import ( + V1ObjectMeta, V1ResourceRequirements, V1PersistentVolumeClaimSpec, + V1PersistentVolumeClaim, V1TypedLocalObjectReference +) + +from ._resource_op import ResourceOp +from ._pipeline_param import ( + PipelineParam, match_serialized_pipelineparam, sanitize_k8s_name +) +from ._pipeline_volume import PipelineVolume + + +VOLUME_MODE_RWO = ["ReadWriteOnce"] +VOLUME_MODE_RWM = ["ReadWriteMany"] +VOLUME_MODE_ROM = ["ReadOnlyMany"] + + +class VolumeOp(ResourceOp): + """Represents an op which will be translated into a resource template + which will be creating a PVC. + """ + + def __init__(self, + resource_name: str = None, + size: str = None, + storage_class: str = None, + modes: List[str] = VOLUME_MODE_RWM, + annotations: Dict[str, str] = None, + data_source=None, + **kwargs): + """Create a new instance of VolumeOp. + + Args: + resource_name: A desired name for the PVC which will be created + size: The size of the PVC which will be created + storage_class: The storage class to use for the dynamically created + PVC + modes: The access modes for the PVC + annotations: Annotations to be patched in the PVC + data_source: May be a V1TypedLocalObjectReference, and then it is + used in the data_source field of the PVC as is. Can also be a + string/PipelineParam, and in that case it will be used as a + VolumeSnapshot name (Alpha feature) + kwargs: See ResourceOp definition + Raises: + ValueError: if k8s_resource is provided along with other arguments + if k8s_resource is not a V1PersistentVolumeClaim + if size is None + if size is an invalid memory string (when not a + PipelineParam) + if data_source is not one of (str, PipelineParam, + V1TypedLocalObjectReference) + """ + # Add size to attribute outputs + self.attribute_outputs = {"size": "{.status.capacity.storage}"} + + if "k8s_resource" in kwargs: + if resource_name or size or storage_class or modes or annotations: + raise ValueError("You cannot provide k8s_resource along with " + "other arguments.") + if not isinstance(kwargs["k8s_resource"], V1PersistentVolumeClaim): + raise ValueError("k8s_resource in VolumeOp must be an instance" + " of V1PersistentVolumeClaim") + super().__init__(**kwargs) + self.volume = PipelineVolume( + name=sanitize_k8s_name(self.name), + pvc=self.outputs["name"] + ) + return + + if not size: + raise ValueError("Please provide size") + elif not match_serialized_pipelineparam(str(size)): + self._validate_memory_string(size) + + if data_source and not isinstance( + data_source, (str, PipelineParam, V1TypedLocalObjectReference)): + raise ValueError("data_source can be one of (str, PipelineParam, " + "V1TypedLocalObjectReference).") + if data_source and isinstance(data_source, (str, PipelineParam)): + data_source = V1TypedLocalObjectReference( + api_group="snapshot.storage.k8s.io", + kind="VolumeSnapshot", + name=data_source + ) + + # Set the k8s_resource + if not match_serialized_pipelineparam(str(resource_name)): + resource_name = sanitize_k8s_name(resource_name) + pvc_metadata = V1ObjectMeta( + name="{{workflow.name}}-%s" % resource_name, + annotations=annotations + ) + requested_resources = V1ResourceRequirements( + requests={"storage": size} + ) + pvc_spec = V1PersistentVolumeClaimSpec( + access_modes=modes, + resources=requested_resources, + storage_class_name=storage_class, + data_source=data_source + ) + k8s_resource = V1PersistentVolumeClaim( + api_version="v1", + kind="PersistentVolumeClaim", + metadata=pvc_metadata, + spec=pvc_spec + ) + + super().__init__( + k8s_resource=k8s_resource, + **kwargs, + ) + self.volume = PipelineVolume( + name=sanitize_k8s_name(self.name), + pvc=self.outputs["name"] + ) + + def _validate_memory_string(self, memory_string): + """Validate a given string is valid for memory request or limit.""" + if re.match(r'^[0-9]+(E|Ei|P|Pi|T|Ti|G|Gi|M|Mi|K|Ki){0,1}$', + memory_string) is None: + raise ValueError('Invalid memory string. Should be an integer, ' + + 'or integer followed by one of ' + + '"E|Ei|P|Pi|T|Ti|G|Gi|M|Mi|K|Ki"') diff --git a/sdk/python/kfp/dsl/_volume_snapshot_op.py b/sdk/python/kfp/dsl/_volume_snapshot_op.py new file mode 100644 index 00000000000..694d04cc39f --- /dev/null +++ b/sdk/python/kfp/dsl/_volume_snapshot_op.py @@ -0,0 +1,126 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from typing import Dict +from kubernetes.client.models import ( + V1Volume, V1TypedLocalObjectReference, V1ObjectMeta +) + +from ._resource_op import ResourceOp +from ._pipeline_param import match_serialized_pipelineparam, sanitize_k8s_name + + +class VolumeSnapshotOp(ResourceOp): + """Represents an op which will be translated into a resource template + which will be creating a VolumeSnapshot. + + At the time that this feature is written, VolumeSnapshots are an Alpha + feature in Kubernetes. You should check with your Kubernetes Cluster admin + if they have it enabled. + """ + + def __init__(self, + resource_name: str = None, + pvc: str = None, + snapshot_class: str = None, + annotations: Dict[str, str] = None, + volume: V1Volume = None, + **kwargs): + """Create a new instance of VolumeSnapshotOp. + + Args: + resource_name: A desired name for the VolumeSnapshot which will be + created + pvc: The name of the PVC which will be snapshotted + snapshot_class: The snapshot class to use for the dynamically + created VolumeSnapshot + annotations: Annotations to be patched in the VolumeSnapshot + volume: An instance of V1Volume + kwargs: See ResourceOp definition + Raises: + ValueError: if k8s_resource is provided along with other arguments + if k8s_resource is not a VolumeSnapshot + if pvc and volume are None + if pvc and volume are not None + if volume does not reference a PVC + """ + # Add size to output params + self.attribute_outputs = {"size": "{.status.restoreSize}"} + # Add default success_condition if None provided + if "success_condition" not in kwargs: + kwargs["success_condition"] = "status.readyToUse == true" + + if "k8s_resource" in kwargs: + if resource_name or pvc or snapshot_class or annotations or volume: + raise ValueError("You cannot provide k8s_resource along with " + "other arguments.") + # TODO: Check if is VolumeSnapshot + super().__init__(**kwargs) + self.snapshot = V1TypedLocalObjectReference( + api_group="snapshot.storage.k8s.io", + kind="VolumeSnapshot", + name=self.outputs["name"] + ) + return + + if not (pvc or volume): + raise ValueError("You must provide a pvc or a volume.") + elif pvc and volume: + raise ValueError("You can't provide both pvc and volume.") + + source = None + deps = [] + if pvc: + source = V1TypedLocalObjectReference( + kind="PersistentVolumeClaim", + name=pvc + ) + else: + if not hasattr(volume, "persistent_volume_claim"): + raise ValueError("The volume must be referencing a PVC.") + if hasattr(volume, "dependent_names"): #TODO: Replace with type check + deps = list(volume.dependent_names) + source = V1TypedLocalObjectReference( + kind="PersistentVolumeClaim", + name=volume.persistent_volume_claim.claim_name + ) + + # Set the k8s_resource + # TODO: Use VolumeSnapshot + if not match_serialized_pipelineparam(str(resource_name)): + resource_name = sanitize_k8s_name(resource_name) + snapshot_metadata = V1ObjectMeta( + name="{{workflow.name}}-%s" % resource_name, + annotations=annotations + ) + k8s_resource = { + "apiVersion": "snapshot.storage.k8s.io/v1alpha1", + "kind": "VolumeSnapshot", + "metadata": snapshot_metadata, + "spec": {"source": source} + } + if snapshot_class: + k8s_resource["spec"]["snapshotClassName"] = snapshot_class + + super().__init__( + k8s_resource=k8s_resource, + **kwargs + ) + self.dependent_names.extend(deps) + self.snapshot = V1TypedLocalObjectReference( + api_group="snapshot.storage.k8s.io", + kind="VolumeSnapshot", + name=self.outputs["name"] + ) diff --git a/sdk/python/tests/compiler/compiler_tests.py b/sdk/python/tests/compiler/compiler_tests.py index 93f6839635c..051e246c162 100644 --- a/sdk/python/tests/compiler/compiler_tests.py +++ b/sdk/python/tests/compiler/compiler_tests.py @@ -1,4 +1,4 @@ -# Copyright 2018 Google LLC +# Copyright 2018-2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -38,6 +38,8 @@ def test_operator_to_template(self): with dsl.Pipeline('somename') as p: msg1 = dsl.PipelineParam('msg1') msg2 = dsl.PipelineParam('msg2', value='value2') + json = dsl.PipelineParam('json') + kind = dsl.PipelineParam('kind') op = dsl.ContainerOp(name='echo', image='image', command=['sh', '-c'], arguments=['echo %s %s | tee /tmp/message.txt' % (msg1, msg2)], file_outputs={'merged': '/tmp/message.txt'}) \ @@ -47,6 +49,17 @@ def test_operator_to_template(self): .add_env_variable(k8s_client.V1EnvVar( name='GOOGLE_APPLICATION_CREDENTIALS', value='/secret/gcp-credentials/user-gcp-sa.json')) + res = dsl.ResourceOp( + name="test-resource", + k8s_resource=k8s_client.V1PersistentVolumeClaim( + api_version="v1", + kind=kind, + metadata=k8s_client.V1ObjectMeta( + name="resource" + ) + ), + attribute_outputs={"out": json} + ) golden_output = { 'container': { 'image': 'image', @@ -115,9 +128,47 @@ def test_operator_to_template(self): }] } } + res_output = { + 'inputs': { + 'parameters': [{ + 'name': 'json' + }, { + 'name': 'kind' + }] + }, + 'name': 'test-resource', + 'outputs': { + 'parameters': [{ + 'name': 'test-resource-manifest', + 'valueFrom': { + 'jsonPath': '{}' + } + }, { + 'name': 'test-resource-name', + 'valueFrom': { + 'jsonPath': '{.metadata.name}' + } + }, { + 'name': 'test-resource-out', + 'valueFrom': { + 'jsonPath': '{{inputs.parameters.json}}' + } + }] + }, + 'resource': { + 'action': 'create', + 'manifest': ( + "apiVersion: v1\n" + "kind: '{{inputs.parameters.kind}}'\n" + "metadata:\n" + " name: resource\n" + ) + } + } self.maxDiff = None self.assertEqual(golden_output, compiler.Compiler()._op_to_template(op)) + self.assertEqual(res_output, compiler.Compiler()._op_to_template(res)) def _get_yaml_from_zip(self, zip_file): with zipfile.ZipFile(zip_file, 'r') as zip: @@ -298,6 +349,34 @@ def test_py_recursive_while(self): """Test pipeline recursive.""" self._test_py_compile_yaml('recursive_while') + def test_py_resourceop_basic(self): + """Test pipeline resourceop_basic.""" + self._test_py_compile_yaml('resourceop_basic') + + def test_py_volumeop_basic(self): + """Test pipeline volumeop_basic.""" + self._test_py_compile_yaml('volumeop_basic') + + def test_py_volumeop_parallel(self): + """Test pipeline volumeop_parallel.""" + self._test_py_compile_yaml('volumeop_parallel') + + def test_py_volumeop_dag(self): + """Test pipeline volumeop_dag.""" + self._test_py_compile_yaml('volumeop_dag') + + def test_py_volume_snapshotop_sequential(self): + """Test pipeline volume_snapshotop_sequential.""" + self._test_py_compile_yaml('volume_snapshotop_sequential') + + def test_py_volume_snapshotop_rokurl(self): + """Test pipeline volumeop_sequential.""" + self._test_py_compile_yaml('volume_snapshotop_rokurl') + + def test_py_volumeop_sequential(self): + """Test pipeline volumeop_sequential.""" + self._test_py_compile_yaml('volumeop_sequential') + def test_type_checking_with_consistent_types(self): """Test type check pipeline parameters against component metadata.""" @component diff --git a/sdk/python/tests/compiler/testdata/resourceop_basic.py b/sdk/python/tests/compiler/testdata/resourceop_basic.py new file mode 100644 index 00000000000..3079379cbdb --- /dev/null +++ b/sdk/python/tests/compiler/testdata/resourceop_basic.py @@ -0,0 +1,60 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Note that this sample is just to show the ResourceOp's usage. + +It is not a good practice to put password as a pipeline argument, since it will +be visible on KFP UI. +""" + +from kubernetes import client as k8s_client +import kfp.dsl as dsl + + +@dsl.pipeline( + name="ResourceOp Basic", + description="A Basic Example on ResourceOp Usage." +) +def resourceop_basic(username, password): + secret_resource = k8s_client.V1Secret( + api_version="v1", + kind="Secret", + metadata=k8s_client.V1ObjectMeta(generate_name="my-secret-"), + type="Opaque", + data={"username": username, "password": password} + ) + rop = dsl.ResourceOp( + name="create-my-secret", + k8s_resource=secret_resource, + attribute_outputs={"name": "{.metadata.name}"} + ) + + secret = k8s_client.V1Volume( + name="my-secret", + secret=k8s_client.V1SecretVolumeSource(secret_name=rop.output) + ) + + cop = dsl.ContainerOp( + name="cop", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["ls /etc/secret-volume"], + pvolumes={"/etc/secret-volume": secret} + ) + + +if __name__ == "__main__": + import kfp.compiler as compiler + compiler.Compiler().compile(resourceop_basic, __file__ + ".tar.gz") diff --git a/sdk/python/tests/compiler/testdata/resourceop_basic.yaml b/sdk/python/tests/compiler/testdata/resourceop_basic.yaml new file mode 100644 index 00000000000..4f71d4094fd --- /dev/null +++ b/sdk/python/tests/compiler/testdata/resourceop_basic.yaml @@ -0,0 +1,99 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: resourceop-basic- +spec: + arguments: + parameters: + - name: username + - name: password + entrypoint: resourceop-basic + serviceAccountName: pipeline-runner + templates: + - container: + args: + - ls /etc/secret-volume + command: + - sh + - -c + image: library/bash:4.4.23 + volumeMounts: + - mountPath: /etc/secret-volume + name: my-secret + inputs: + parameters: + - name: create-my-secret-name + name: cop + outputs: + artifacts: + - name: mlpipeline-ui-metadata + path: /mlpipeline-ui-metadata.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - name: mlpipeline-metrics + path: /mlpipeline-metrics.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - inputs: + parameters: + - name: password + - name: username + name: create-my-secret + outputs: + parameters: + - name: create-my-secret-manifest + valueFrom: + jsonPath: '{}' + - name: create-my-secret-name + valueFrom: + jsonPath: '{.metadata.name}' + resource: + action: create + manifest: "apiVersion: v1\ndata:\n password: '{{inputs.parameters.password}}'\n\ + \ username: '{{inputs.parameters.username}}'\nkind: Secret\nmetadata:\n \ + \ generateName: my-secret-\ntype: Opaque\n" + - dag: + tasks: + - arguments: + parameters: + - name: create-my-secret-name + value: '{{tasks.create-my-secret.outputs.parameters.create-my-secret-name}}' + dependencies: + - create-my-secret + name: cop + template: cop + - arguments: + parameters: + - name: password + value: '{{inputs.parameters.password}}' + - name: username + value: '{{inputs.parameters.username}}' + name: create-my-secret + template: create-my-secret + inputs: + parameters: + - name: password + - name: username + name: resourceop-basic + volumes: + - name: my-secret + secret: + secretName: '{{inputs.parameters.create-my-secret-name}}' diff --git a/sdk/python/tests/compiler/testdata/volume_snapshotop_rokurl.py b/sdk/python/tests/compiler/testdata/volume_snapshotop_rokurl.py new file mode 100644 index 00000000000..0753d549f3f --- /dev/null +++ b/sdk/python/tests/compiler/testdata/volume_snapshotop_rokurl.py @@ -0,0 +1,91 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""This sample uses Rok as an example to show case how VolumeOp accepts +annotations as an extra argument, and how we can use arbitrary PipelineParams +to determine their contents. + +The specific annotation is Rok-specific, but the use of annotations in such way +is widespread in storage systems integrated with K8s. +""" + +import kfp.dsl as dsl + + +@dsl.pipeline( + name="VolumeSnapshotOp RokURL", + description="The fifth example of the design doc." +) +def volume_snapshotop_rokurl(rok_url): + vop1 = dsl.VolumeOp( + name="create_volume_1", + resource_name="vol1", + size="1Gi", + annotations={"rok/origin": rok_url}, + modes=dsl.VOLUME_MODE_RWM + ) + + step1 = dsl.ContainerOp( + name="step1_concat", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["cat /data/file*| gzip -c >/data/full.gz"], + pvolumes={"/data": vop1.volume} + ) + + step1_snap = dsl.VolumeSnapshotOp( + name="create_snapshot_1", + resource_name="snap1", + volume=step1.pvolume + ) + + vop2 = dsl.VolumeOp( + name="create_volume_2", + resource_name="vol2", + data_source=step1_snap.snapshot, + size=step1_snap.outputs["size"] + ) + + step2 = dsl.ContainerOp( + name="step2_gunzip", + image="library/bash:4.4.23", + command=["gunzip", "-k", "/data/full.gz"], + pvolumes={"/data": vop2.volume} + ) + + step2_snap = dsl.VolumeSnapshotOp( + name="create_snapshot_2", + resource_name="snap2", + volume=step2.pvolume + ) + + vop3 = dsl.VolumeOp( + name="create_volume_3", + resource_name="vol3", + data_source=step2_snap.snapshot, + size=step2_snap.outputs["size"] + ) + + step3 = dsl.ContainerOp( + name="step3_output", + image="library/bash:4.4.23", + command=["cat", "/data/full"], + pvolumes={"/data": vop3.volume} + ) + + +if __name__ == "__main__": + import kfp.compiler as compiler + compiler.Compiler().compile(volume_snapshotop_rokurl, __file__ + ".tar.gz") diff --git a/sdk/python/tests/compiler/testdata/volume_snapshotop_rokurl.yaml b/sdk/python/tests/compiler/testdata/volume_snapshotop_rokurl.yaml new file mode 100644 index 00000000000..d91e65d72b8 --- /dev/null +++ b/sdk/python/tests/compiler/testdata/volume_snapshotop_rokurl.yaml @@ -0,0 +1,325 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: volumesnapshotop-rokurl- +spec: + arguments: + parameters: + - name: rok-url + entrypoint: volumesnapshotop-rokurl + serviceAccountName: pipeline-runner + templates: + - inputs: + parameters: + - name: create-volume-1-name + name: create-snapshot-1 + outputs: + parameters: + - name: create-snapshot-1-manifest + valueFrom: + jsonPath: '{}' + - name: create-snapshot-1-name + valueFrom: + jsonPath: '{.metadata.name}' + - name: create-snapshot-1-size + valueFrom: + jsonPath: '{.status.restoreSize}' + resource: + action: create + manifest: "apiVersion: snapshot.storage.k8s.io/v1alpha1\nkind: VolumeSnapshot\n\ + metadata:\n name: '{{workflow.name}}-snap1'\nspec:\n source:\n kind:\ + \ PersistentVolumeClaim\n name: '{{inputs.parameters.create-volume-1-name}}'\n" + successCondition: status.readyToUse == true + - inputs: + parameters: + - name: create-volume-2-name + name: create-snapshot-2 + outputs: + parameters: + - name: create-snapshot-2-manifest + valueFrom: + jsonPath: '{}' + - name: create-snapshot-2-name + valueFrom: + jsonPath: '{.metadata.name}' + - name: create-snapshot-2-size + valueFrom: + jsonPath: '{.status.restoreSize}' + resource: + action: create + manifest: "apiVersion: snapshot.storage.k8s.io/v1alpha1\nkind: VolumeSnapshot\n\ + metadata:\n name: '{{workflow.name}}-snap2'\nspec:\n source:\n kind:\ + \ PersistentVolumeClaim\n name: '{{inputs.parameters.create-volume-2-name}}'\n" + successCondition: status.readyToUse == true + - inputs: + parameters: + - name: rok-url + name: create-volume-1 + outputs: + parameters: + - name: create-volume-1-manifest + valueFrom: + jsonPath: '{}' + - name: create-volume-1-name + valueFrom: + jsonPath: '{.metadata.name}' + - name: create-volume-1-size + valueFrom: + jsonPath: '{.status.capacity.storage}' + resource: + action: create + manifest: "apiVersion: v1\nkind: PersistentVolumeClaim\nmetadata:\n annotations:\n\ + \ rok/origin: '{{inputs.parameters.rok-url}}'\n name: '{{workflow.name}}-vol1'\n\ + spec:\n accessModes:\n - ReadWriteMany\n resources:\n requests:\n \ + \ storage: 1Gi\n" + - inputs: + parameters: + - name: create-snapshot-1-name + - name: create-snapshot-1-size + name: create-volume-2 + outputs: + parameters: + - name: create-volume-2-manifest + valueFrom: + jsonPath: '{}' + - name: create-volume-2-name + valueFrom: + jsonPath: '{.metadata.name}' + - name: create-volume-2-size + valueFrom: + jsonPath: '{.status.capacity.storage}' + resource: + action: create + manifest: "apiVersion: v1\nkind: PersistentVolumeClaim\nmetadata:\n name: '{{workflow.name}}-vol2'\n\ + spec:\n accessModes:\n - ReadWriteMany\n dataSource:\n apiGroup: snapshot.storage.k8s.io\n\ + \ kind: VolumeSnapshot\n name: '{{inputs.parameters.create-snapshot-1-name}}'\n\ + \ resources:\n requests:\n storage: '{{inputs.parameters.create-snapshot-1-size}}'\n" + - inputs: + parameters: + - name: create-snapshot-2-name + - name: create-snapshot-2-size + name: create-volume-3 + outputs: + parameters: + - name: create-volume-3-manifest + valueFrom: + jsonPath: '{}' + - name: create-volume-3-name + valueFrom: + jsonPath: '{.metadata.name}' + - name: create-volume-3-size + valueFrom: + jsonPath: '{.status.capacity.storage}' + resource: + action: create + manifest: "apiVersion: v1\nkind: PersistentVolumeClaim\nmetadata:\n name: '{{workflow.name}}-vol3'\n\ + spec:\n accessModes:\n - ReadWriteMany\n dataSource:\n apiGroup: snapshot.storage.k8s.io\n\ + \ kind: VolumeSnapshot\n name: '{{inputs.parameters.create-snapshot-2-name}}'\n\ + \ resources:\n requests:\n storage: '{{inputs.parameters.create-snapshot-2-size}}'\n" + - container: + args: + - cat /data/file*| gzip -c >/data/full.gz + command: + - sh + - -c + image: library/bash:4.4.23 + volumeMounts: + - mountPath: /data + name: create-volume-1 + inputs: + parameters: + - name: create-volume-1-name + name: step1-concat + outputs: + artifacts: + - name: mlpipeline-ui-metadata + path: /mlpipeline-ui-metadata.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - name: mlpipeline-metrics + path: /mlpipeline-metrics.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - container: + command: + - gunzip + - -k + - /data/full.gz + image: library/bash:4.4.23 + volumeMounts: + - mountPath: /data + name: create-volume-2 + inputs: + parameters: + - name: create-volume-2-name + name: step2-gunzip + outputs: + artifacts: + - name: mlpipeline-ui-metadata + path: /mlpipeline-ui-metadata.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - name: mlpipeline-metrics + path: /mlpipeline-metrics.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - container: + command: + - cat + - /data/full + image: library/bash:4.4.23 + volumeMounts: + - mountPath: /data + name: create-volume-3 + inputs: + parameters: + - name: create-volume-3-name + name: step3-output + outputs: + artifacts: + - name: mlpipeline-ui-metadata + path: /mlpipeline-ui-metadata.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - name: mlpipeline-metrics + path: /mlpipeline-metrics.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - dag: + tasks: + - arguments: + parameters: + - name: create-volume-1-name + value: '{{tasks.create-volume-1.outputs.parameters.create-volume-1-name}}' + dependencies: + - create-volume-1 + - step1-concat + name: create-snapshot-1 + template: create-snapshot-1 + - arguments: + parameters: + - name: create-volume-2-name + value: '{{tasks.create-volume-2.outputs.parameters.create-volume-2-name}}' + dependencies: + - create-volume-2 + - step2-gunzip + name: create-snapshot-2 + template: create-snapshot-2 + - arguments: + parameters: + - name: rok-url + value: '{{inputs.parameters.rok-url}}' + name: create-volume-1 + template: create-volume-1 + - arguments: + parameters: + - name: create-snapshot-1-name + value: '{{tasks.create-snapshot-1.outputs.parameters.create-snapshot-1-name}}' + - name: create-snapshot-1-size + value: '{{tasks.create-snapshot-1.outputs.parameters.create-snapshot-1-size}}' + dependencies: + - create-snapshot-1 + name: create-volume-2 + template: create-volume-2 + - arguments: + parameters: + - name: create-snapshot-2-name + value: '{{tasks.create-snapshot-2.outputs.parameters.create-snapshot-2-name}}' + - name: create-snapshot-2-size + value: '{{tasks.create-snapshot-2.outputs.parameters.create-snapshot-2-size}}' + dependencies: + - create-snapshot-2 + name: create-volume-3 + template: create-volume-3 + - arguments: + parameters: + - name: create-volume-1-name + value: '{{tasks.create-volume-1.outputs.parameters.create-volume-1-name}}' + dependencies: + - create-volume-1 + name: step1-concat + template: step1-concat + - arguments: + parameters: + - name: create-volume-2-name + value: '{{tasks.create-volume-2.outputs.parameters.create-volume-2-name}}' + dependencies: + - create-volume-2 + name: step2-gunzip + template: step2-gunzip + - arguments: + parameters: + - name: create-volume-3-name + value: '{{tasks.create-volume-3.outputs.parameters.create-volume-3-name}}' + dependencies: + - create-volume-3 + name: step3-output + template: step3-output + inputs: + parameters: + - name: rok-url + name: volumesnapshotop-rokurl + volumes: + - name: create-volume-1 + persistentVolumeClaim: + claimName: '{{inputs.parameters.create-volume-1-name}}' + - name: create-volume-2 + persistentVolumeClaim: + claimName: '{{inputs.parameters.create-volume-2-name}}' + - name: create-volume-3 + persistentVolumeClaim: + claimName: '{{inputs.parameters.create-volume-3-name}}' diff --git a/sdk/python/tests/compiler/testdata/volume_snapshotop_sequential.py b/sdk/python/tests/compiler/testdata/volume_snapshotop_sequential.py new file mode 100644 index 00000000000..2b8500ec963 --- /dev/null +++ b/sdk/python/tests/compiler/testdata/volume_snapshotop_sequential.py @@ -0,0 +1,87 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import kfp.dsl as dsl + + +@dsl.pipeline( + name="VolumeSnapshotOp Sequential", + description="The fourth example of the design doc." +) +def volume_snapshotop_sequential(url): + vop = dsl.VolumeOp( + name="create_volume", + resource_name="vol1", + size="1Gi", + modes=dsl.VOLUME_MODE_RWM + ) + + step1 = dsl.ContainerOp( + name="step1_ingest", + image="google/cloud-sdk:216.0.0", + command=["sh", "-c"], + arguments=["mkdir /data/step1 && " + "gsutil cat %s | gzip -c >/data/step1/file1.gz" % url], + pvolumes={"/data": vop.volume} + ) + + step1_snap = dsl.VolumeSnapshotOp( + name="step1_snap", + resource_name="step1_snap", + volume=step1.pvolume + ) + + step2 = dsl.ContainerOp( + name="step2_gunzip", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["mkdir /data/step2 && " + "gunzip /data/step1/file1.gz -c >/data/step2/file1"], + pvolumes={"/data": step1.pvolume} + ) + + step2_snap = dsl.VolumeSnapshotOp( + name="step2_snap", + resource_name="step2_snap", + volume=step2.pvolume + ) + + step3 = dsl.ContainerOp( + name="step3_copy", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["mkdir /data/step3 && " + "cp -av /data/step2/file1 /data/step3/file3"], + pvolumes={"/data": step2.pvolume} + ) + + step3_snap = dsl.VolumeSnapshotOp( + name="step3_snap", + resource_name="step3_snap", + volume=step3.pvolume + ) + + step4 = dsl.ContainerOp( + name="step4_output", + image="library/bash:4.4.23", + command=["cat", "/data/step2/file1", "/data/step3/file3"], + pvolumes={"/data": step3.pvolume} + ) + + +if __name__ == "__main__": + import kfp.compiler as compiler + compiler.Compiler().compile(volume_snapshotop_sequential, + __file__ + ".tar.gz") diff --git a/sdk/python/tests/compiler/testdata/volume_snapshotop_sequential.yaml b/sdk/python/tests/compiler/testdata/volume_snapshotop_sequential.yaml new file mode 100644 index 00000000000..2f58f0b204c --- /dev/null +++ b/sdk/python/tests/compiler/testdata/volume_snapshotop_sequential.yaml @@ -0,0 +1,335 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: volumesnapshotop-sequential- +spec: + arguments: + parameters: + - name: url + entrypoint: volumesnapshotop-sequential + serviceAccountName: pipeline-runner + templates: + - name: create-volume + outputs: + parameters: + - name: create-volume-manifest + valueFrom: + jsonPath: '{}' + - name: create-volume-name + valueFrom: + jsonPath: '{.metadata.name}' + - name: create-volume-size + valueFrom: + jsonPath: '{.status.capacity.storage}' + resource: + action: create + manifest: "apiVersion: v1\nkind: PersistentVolumeClaim\nmetadata:\n name: '{{workflow.name}}-vol1'\n\ + spec:\n accessModes:\n - ReadWriteMany\n resources:\n requests:\n \ + \ storage: 1Gi\n" + - container: + args: + - mkdir /data/step1 && gsutil cat {{inputs.parameters.url}} | gzip -c >/data/step1/file1.gz + command: + - sh + - -c + image: google/cloud-sdk:216.0.0 + volumeMounts: + - mountPath: /data + name: create-volume + inputs: + parameters: + - name: create-volume-name + - name: url + name: step1-ingest + outputs: + artifacts: + - name: mlpipeline-ui-metadata + path: /mlpipeline-ui-metadata.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - name: mlpipeline-metrics + path: /mlpipeline-metrics.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - inputs: + parameters: + - name: create-volume-name + name: step1-snap + outputs: + parameters: + - name: step1-snap-manifest + valueFrom: + jsonPath: '{}' + - name: step1-snap-name + valueFrom: + jsonPath: '{.metadata.name}' + - name: step1-snap-size + valueFrom: + jsonPath: '{.status.restoreSize}' + resource: + action: create + manifest: "apiVersion: snapshot.storage.k8s.io/v1alpha1\nkind: VolumeSnapshot\n\ + metadata:\n name: '{{workflow.name}}-step1-snap'\nspec:\n source:\n kind:\ + \ PersistentVolumeClaim\n name: '{{inputs.parameters.create-volume-name}}'\n" + successCondition: status.readyToUse == true + - container: + args: + - mkdir /data/step2 && gunzip /data/step1/file1.gz -c >/data/step2/file1 + command: + - sh + - -c + image: library/bash:4.4.23 + volumeMounts: + - mountPath: /data + name: create-volume + inputs: + parameters: + - name: create-volume-name + name: step2-gunzip + outputs: + artifacts: + - name: mlpipeline-ui-metadata + path: /mlpipeline-ui-metadata.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - name: mlpipeline-metrics + path: /mlpipeline-metrics.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - inputs: + parameters: + - name: create-volume-name + name: step2-snap + outputs: + parameters: + - name: step2-snap-manifest + valueFrom: + jsonPath: '{}' + - name: step2-snap-name + valueFrom: + jsonPath: '{.metadata.name}' + - name: step2-snap-size + valueFrom: + jsonPath: '{.status.restoreSize}' + resource: + action: create + manifest: "apiVersion: snapshot.storage.k8s.io/v1alpha1\nkind: VolumeSnapshot\n\ + metadata:\n name: '{{workflow.name}}-step2-snap'\nspec:\n source:\n kind:\ + \ PersistentVolumeClaim\n name: '{{inputs.parameters.create-volume-name}}'\n" + successCondition: status.readyToUse == true + - container: + args: + - mkdir /data/step3 && cp -av /data/step2/file1 /data/step3/file3 + command: + - sh + - -c + image: library/bash:4.4.23 + volumeMounts: + - mountPath: /data + name: create-volume + inputs: + parameters: + - name: create-volume-name + name: step3-copy + outputs: + artifacts: + - name: mlpipeline-ui-metadata + path: /mlpipeline-ui-metadata.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - name: mlpipeline-metrics + path: /mlpipeline-metrics.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - inputs: + parameters: + - name: create-volume-name + name: step3-snap + outputs: + parameters: + - name: step3-snap-manifest + valueFrom: + jsonPath: '{}' + - name: step3-snap-name + valueFrom: + jsonPath: '{.metadata.name}' + - name: step3-snap-size + valueFrom: + jsonPath: '{.status.restoreSize}' + resource: + action: create + manifest: "apiVersion: snapshot.storage.k8s.io/v1alpha1\nkind: VolumeSnapshot\n\ + metadata:\n name: '{{workflow.name}}-step3-snap'\nspec:\n source:\n kind:\ + \ PersistentVolumeClaim\n name: '{{inputs.parameters.create-volume-name}}'\n" + successCondition: status.readyToUse == true + - container: + command: + - cat + - /data/step2/file1 + - /data/step3/file3 + image: library/bash:4.4.23 + volumeMounts: + - mountPath: /data + name: create-volume + inputs: + parameters: + - name: create-volume-name + name: step4-output + outputs: + artifacts: + - name: mlpipeline-ui-metadata + path: /mlpipeline-ui-metadata.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - name: mlpipeline-metrics + path: /mlpipeline-metrics.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - dag: + tasks: + - name: create-volume + template: create-volume + - arguments: + parameters: + - name: create-volume-name + value: '{{tasks.create-volume.outputs.parameters.create-volume-name}}' + - name: url + value: '{{inputs.parameters.url}}' + dependencies: + - create-volume + name: step1-ingest + template: step1-ingest + - arguments: + parameters: + - name: create-volume-name + value: '{{tasks.create-volume.outputs.parameters.create-volume-name}}' + dependencies: + - create-volume + - step1-ingest + name: step1-snap + template: step1-snap + - arguments: + parameters: + - name: create-volume-name + value: '{{tasks.create-volume.outputs.parameters.create-volume-name}}' + dependencies: + - create-volume + - step1-ingest + name: step2-gunzip + template: step2-gunzip + - arguments: + parameters: + - name: create-volume-name + value: '{{tasks.create-volume.outputs.parameters.create-volume-name}}' + dependencies: + - create-volume + - step2-gunzip + name: step2-snap + template: step2-snap + - arguments: + parameters: + - name: create-volume-name + value: '{{tasks.create-volume.outputs.parameters.create-volume-name}}' + dependencies: + - create-volume + - step2-gunzip + name: step3-copy + template: step3-copy + - arguments: + parameters: + - name: create-volume-name + value: '{{tasks.create-volume.outputs.parameters.create-volume-name}}' + dependencies: + - create-volume + - step3-copy + name: step3-snap + template: step3-snap + - arguments: + parameters: + - name: create-volume-name + value: '{{tasks.create-volume.outputs.parameters.create-volume-name}}' + dependencies: + - create-volume + - step3-copy + name: step4-output + template: step4-output + inputs: + parameters: + - name: url + name: volumesnapshotop-sequential + volumes: + - name: create-volume + persistentVolumeClaim: + claimName: '{{inputs.parameters.create-volume-name}}' diff --git a/sdk/python/tests/compiler/testdata/volumeop_basic.py b/sdk/python/tests/compiler/testdata/volumeop_basic.py new file mode 100644 index 00000000000..babf12db6d1 --- /dev/null +++ b/sdk/python/tests/compiler/testdata/volumeop_basic.py @@ -0,0 +1,42 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import kfp.dsl as dsl + + +@dsl.pipeline( + name="VolumeOp Basic", + description="A Basic Example on VolumeOp Usage." +) +def volumeop_basic(size): + vop = dsl.VolumeOp( + name="create_pvc", + resource_name="my-pvc", + modes=dsl.VOLUME_MODE_RWM, + size=size + ) + + cop = dsl.ContainerOp( + name="cop", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["echo foo > /mnt/file1"], + pvolumes={"/mnt": vop.volume} + ) + + +if __name__ == "__main__": + import kfp.compiler as compiler + compiler.Compiler().compile(volumeop_basic, __file__ + ".tar.gz") diff --git a/sdk/python/tests/compiler/testdata/volumeop_basic.yaml b/sdk/python/tests/compiler/testdata/volumeop_basic.yaml new file mode 100644 index 00000000000..c26dc9bc5c0 --- /dev/null +++ b/sdk/python/tests/compiler/testdata/volumeop_basic.yaml @@ -0,0 +1,97 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: volumeop-basic- +spec: + arguments: + parameters: + - name: size + entrypoint: volumeop-basic + serviceAccountName: pipeline-runner + templates: + - container: + args: + - echo foo > /mnt/file1 + command: + - sh + - -c + image: library/bash:4.4.23 + volumeMounts: + - mountPath: /mnt + name: create-pvc + inputs: + parameters: + - name: create-pvc-name + name: cop + outputs: + artifacts: + - name: mlpipeline-ui-metadata + path: /mlpipeline-ui-metadata.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - name: mlpipeline-metrics + path: /mlpipeline-metrics.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - inputs: + parameters: + - name: size + name: create-pvc + outputs: + parameters: + - name: create-pvc-manifest + valueFrom: + jsonPath: '{}' + - name: create-pvc-name + valueFrom: + jsonPath: '{.metadata.name}' + - name: create-pvc-size + valueFrom: + jsonPath: '{.status.capacity.storage}' + resource: + action: create + manifest: "apiVersion: v1\nkind: PersistentVolumeClaim\nmetadata:\n name: '{{workflow.name}}-my-pvc'\n\ + spec:\n accessModes:\n - ReadWriteMany\n resources:\n requests:\n \ + \ storage: '{{inputs.parameters.size}}'\n" + - dag: + tasks: + - arguments: + parameters: + - name: create-pvc-name + value: '{{tasks.create-pvc.outputs.parameters.create-pvc-name}}' + dependencies: + - create-pvc + name: cop + template: cop + - arguments: + parameters: + - name: size + value: '{{inputs.parameters.size}}' + name: create-pvc + template: create-pvc + inputs: + parameters: + - name: size + name: volumeop-basic + volumes: + - name: create-pvc + persistentVolumeClaim: + claimName: '{{inputs.parameters.create-pvc-name}}' diff --git a/sdk/python/tests/compiler/testdata/volumeop_dag.py b/sdk/python/tests/compiler/testdata/volumeop_dag.py new file mode 100644 index 00000000000..9d9514550b6 --- /dev/null +++ b/sdk/python/tests/compiler/testdata/volumeop_dag.py @@ -0,0 +1,58 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import kfp.dsl as dsl + + +@dsl.pipeline( + name="Volume Op DAG", + description="The second example of the design doc." +) +def volume_op_dag(): + vop = dsl.VolumeOp( + name="create_pvc", + resource_name="my-pvc", + size="10Gi", + modes=dsl.VOLUME_MODE_RWM + ) + + step1 = dsl.ContainerOp( + name="step1", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["echo 1 | tee /mnt/file1"], + pvolumes={"/mnt": vop.volume} + ) + + step2 = dsl.ContainerOp( + name="step2", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["echo 2 | tee /mnt2/file2"], + pvolumes={"/mnt2": vop.volume} + ) + + step3 = dsl.ContainerOp( + name="step3", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["cat /mnt/file1 /mnt/file2"], + pvolumes={"/mnt": vop.volume.after(step1, step2)} + ) + + +if __name__ == "__main__": + import kfp.compiler as compiler + compiler.Compiler().compile(volume_op_dag, __file__ + ".tar.gz") diff --git a/sdk/python/tests/compiler/testdata/volumeop_dag.yaml b/sdk/python/tests/compiler/testdata/volumeop_dag.yaml new file mode 100644 index 00000000000..6df782c8ded --- /dev/null +++ b/sdk/python/tests/compiler/testdata/volumeop_dag.yaml @@ -0,0 +1,188 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: volume-op-dag- +spec: + arguments: + parameters: [] + entrypoint: volume-op-dag + serviceAccountName: pipeline-runner + templates: + - name: create-pvc + outputs: + parameters: + - name: create-pvc-manifest + valueFrom: + jsonPath: '{}' + - name: create-pvc-name + valueFrom: + jsonPath: '{.metadata.name}' + - name: create-pvc-size + valueFrom: + jsonPath: '{.status.capacity.storage}' + resource: + action: create + manifest: "apiVersion: v1\nkind: PersistentVolumeClaim\nmetadata:\n name: '{{workflow.name}}-my-pvc'\n\ + spec:\n accessModes:\n - ReadWriteMany\n resources:\n requests:\n \ + \ storage: 10Gi\n" + - container: + args: + - echo 1 | tee /mnt/file1 + command: + - sh + - -c + image: library/bash:4.4.23 + volumeMounts: + - mountPath: /mnt + name: create-pvc + inputs: + parameters: + - name: create-pvc-name + name: step1 + outputs: + artifacts: + - name: mlpipeline-ui-metadata + path: /mlpipeline-ui-metadata.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - name: mlpipeline-metrics + path: /mlpipeline-metrics.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - container: + args: + - echo 2 | tee /mnt2/file2 + command: + - sh + - -c + image: library/bash:4.4.23 + volumeMounts: + - mountPath: /mnt2 + name: create-pvc + inputs: + parameters: + - name: create-pvc-name + name: step2 + outputs: + artifacts: + - name: mlpipeline-ui-metadata + path: /mlpipeline-ui-metadata.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - name: mlpipeline-metrics + path: /mlpipeline-metrics.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - container: + args: + - cat /mnt/file1 /mnt/file2 + command: + - sh + - -c + image: library/bash:4.4.23 + volumeMounts: + - mountPath: /mnt + name: create-pvc + inputs: + parameters: + - name: create-pvc-name + name: step3 + outputs: + artifacts: + - name: mlpipeline-ui-metadata + path: /mlpipeline-ui-metadata.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - name: mlpipeline-metrics + path: /mlpipeline-metrics.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - dag: + tasks: + - name: create-pvc + template: create-pvc + - arguments: + parameters: + - name: create-pvc-name + value: '{{tasks.create-pvc.outputs.parameters.create-pvc-name}}' + dependencies: + - create-pvc + name: step1 + template: step1 + - arguments: + parameters: + - name: create-pvc-name + value: '{{tasks.create-pvc.outputs.parameters.create-pvc-name}}' + dependencies: + - create-pvc + name: step2 + template: step2 + - arguments: + parameters: + - name: create-pvc-name + value: '{{tasks.create-pvc.outputs.parameters.create-pvc-name}}' + dependencies: + - create-pvc + - step1 + - step2 + name: step3 + template: step3 + name: volume-op-dag + volumes: + - name: create-pvc + persistentVolumeClaim: + claimName: '{{inputs.parameters.create-pvc-name}}' diff --git a/sdk/python/tests/compiler/testdata/volumeop_parallel.py b/sdk/python/tests/compiler/testdata/volumeop_parallel.py new file mode 100644 index 00000000000..15955e4c7ab --- /dev/null +++ b/sdk/python/tests/compiler/testdata/volumeop_parallel.py @@ -0,0 +1,58 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import kfp.dsl as dsl + + +@dsl.pipeline( + name="VolumeOp Parallel", + description="The first example of the design doc." +) +def volumeop_parallel(): + vop = dsl.VolumeOp( + name="create_pvc", + resource_name="my-pvc", + size="10Gi", + modes=dsl.VOLUME_MODE_RWM + ) + + step1 = dsl.ContainerOp( + name="step1", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["echo 1 | tee /mnt/file1"], + pvolumes={"/mnt": vop.volume} + ) + + step2 = dsl.ContainerOp( + name="step2", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["echo 2 | tee /common/file2"], + pvolumes={"/common": vop.volume} + ) + + step3 = dsl.ContainerOp( + name="step3", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["echo 3 | tee /mnt3/file3"], + pvolumes={"/mnt3": vop.volume} + ) + + +if __name__ == "__main__": + import kfp.compiler as compiler + compiler.Compiler().compile(volumeop_parallel, __file__ + ".tar.gz") diff --git a/sdk/python/tests/compiler/testdata/volumeop_parallel.yaml b/sdk/python/tests/compiler/testdata/volumeop_parallel.yaml new file mode 100644 index 00000000000..49d5b4e6ee6 --- /dev/null +++ b/sdk/python/tests/compiler/testdata/volumeop_parallel.yaml @@ -0,0 +1,186 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: volumeop-parallel- +spec: + arguments: + parameters: [] + entrypoint: volumeop-parallel + serviceAccountName: pipeline-runner + templates: + - name: create-pvc + outputs: + parameters: + - name: create-pvc-manifest + valueFrom: + jsonPath: '{}' + - name: create-pvc-name + valueFrom: + jsonPath: '{.metadata.name}' + - name: create-pvc-size + valueFrom: + jsonPath: '{.status.capacity.storage}' + resource: + action: create + manifest: "apiVersion: v1\nkind: PersistentVolumeClaim\nmetadata:\n name: '{{workflow.name}}-my-pvc'\n\ + spec:\n accessModes:\n - ReadWriteMany\n resources:\n requests:\n \ + \ storage: 10Gi\n" + - container: + args: + - echo 1 | tee /mnt/file1 + command: + - sh + - -c + image: library/bash:4.4.23 + volumeMounts: + - mountPath: /mnt + name: create-pvc + inputs: + parameters: + - name: create-pvc-name + name: step1 + outputs: + artifacts: + - name: mlpipeline-ui-metadata + path: /mlpipeline-ui-metadata.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - name: mlpipeline-metrics + path: /mlpipeline-metrics.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - container: + args: + - echo 2 | tee /common/file2 + command: + - sh + - -c + image: library/bash:4.4.23 + volumeMounts: + - mountPath: /common + name: create-pvc + inputs: + parameters: + - name: create-pvc-name + name: step2 + outputs: + artifacts: + - name: mlpipeline-ui-metadata + path: /mlpipeline-ui-metadata.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - name: mlpipeline-metrics + path: /mlpipeline-metrics.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - container: + args: + - echo 3 | tee /mnt3/file3 + command: + - sh + - -c + image: library/bash:4.4.23 + volumeMounts: + - mountPath: /mnt3 + name: create-pvc + inputs: + parameters: + - name: create-pvc-name + name: step3 + outputs: + artifacts: + - name: mlpipeline-ui-metadata + path: /mlpipeline-ui-metadata.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - name: mlpipeline-metrics + path: /mlpipeline-metrics.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - dag: + tasks: + - name: create-pvc + template: create-pvc + - arguments: + parameters: + - name: create-pvc-name + value: '{{tasks.create-pvc.outputs.parameters.create-pvc-name}}' + dependencies: + - create-pvc + name: step1 + template: step1 + - arguments: + parameters: + - name: create-pvc-name + value: '{{tasks.create-pvc.outputs.parameters.create-pvc-name}}' + dependencies: + - create-pvc + name: step2 + template: step2 + - arguments: + parameters: + - name: create-pvc-name + value: '{{tasks.create-pvc.outputs.parameters.create-pvc-name}}' + dependencies: + - create-pvc + name: step3 + template: step3 + name: volumeop-parallel + volumes: + - name: create-pvc + persistentVolumeClaim: + claimName: '{{inputs.parameters.create-pvc-name}}' diff --git a/sdk/python/tests/compiler/testdata/volumeop_sequential.py b/sdk/python/tests/compiler/testdata/volumeop_sequential.py new file mode 100644 index 00000000000..3c8b0317c82 --- /dev/null +++ b/sdk/python/tests/compiler/testdata/volumeop_sequential.py @@ -0,0 +1,57 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import kfp.dsl as dsl + + +@dsl.pipeline( + name="VolumeOp Sequential", + description="The third example of the design doc." +) +def volumeop_sequential(): + vop = dsl.VolumeOp( + name="mypvc", + resource_name="newpvc", + size="10Gi", + modes=dsl.VOLUME_MODE_RWM + ) + + step1 = dsl.ContainerOp( + name="step1", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["echo 1|tee /data/file1"], + pvolumes={"/data": vop.volume} + ) + + step2 = dsl.ContainerOp( + name="step2", + image="library/bash:4.4.23", + command=["sh", "-c"], + arguments=["cp /data/file1 /data/file2"], + pvolumes={"/data": step1.pvolume} + ) + + step3 = dsl.ContainerOp( + name="step3", + image="library/bash:4.4.23", + command=["cat", "/mnt/file1", "/mnt/file2"], + pvolumes={"/mnt": step2.pvolume} + ) + + +if __name__ == "__main__": + import kfp.compiler as compiler + compiler.Compiler().compile(volumeop_sequential, __file__ + ".tar.gz") diff --git a/sdk/python/tests/compiler/testdata/volumeop_sequential.yaml b/sdk/python/tests/compiler/testdata/volumeop_sequential.yaml new file mode 100644 index 00000000000..f3615663ae2 --- /dev/null +++ b/sdk/python/tests/compiler/testdata/volumeop_sequential.yaml @@ -0,0 +1,187 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: volumeop-sequential- +spec: + arguments: + parameters: [] + entrypoint: volumeop-sequential + serviceAccountName: pipeline-runner + templates: + - name: mypvc + outputs: + parameters: + - name: mypvc-manifest + valueFrom: + jsonPath: '{}' + - name: mypvc-name + valueFrom: + jsonPath: '{.metadata.name}' + - name: mypvc-size + valueFrom: + jsonPath: '{.status.capacity.storage}' + resource: + action: create + manifest: "apiVersion: v1\nkind: PersistentVolumeClaim\nmetadata:\n name: '{{workflow.name}}-newpvc'\n\ + spec:\n accessModes:\n - ReadWriteMany\n resources:\n requests:\n \ + \ storage: 10Gi\n" + - container: + args: + - echo 1|tee /data/file1 + command: + - sh + - -c + image: library/bash:4.4.23 + volumeMounts: + - mountPath: /data + name: mypvc + inputs: + parameters: + - name: mypvc-name + name: step1 + outputs: + artifacts: + - name: mlpipeline-ui-metadata + path: /mlpipeline-ui-metadata.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - name: mlpipeline-metrics + path: /mlpipeline-metrics.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - container: + args: + - cp /data/file1 /data/file2 + command: + - sh + - -c + image: library/bash:4.4.23 + volumeMounts: + - mountPath: /data + name: mypvc + inputs: + parameters: + - name: mypvc-name + name: step2 + outputs: + artifacts: + - name: mlpipeline-ui-metadata + path: /mlpipeline-ui-metadata.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - name: mlpipeline-metrics + path: /mlpipeline-metrics.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - container: + command: + - cat + - /mnt/file1 + - /mnt/file2 + image: library/bash:4.4.23 + volumeMounts: + - mountPath: /mnt + name: mypvc + inputs: + parameters: + - name: mypvc-name + name: step3 + outputs: + artifacts: + - name: mlpipeline-ui-metadata + path: /mlpipeline-ui-metadata.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - name: mlpipeline-metrics + path: /mlpipeline-metrics.json + s3: + accessKeySecret: + key: accesskey + name: mlpipeline-minio-artifact + bucket: mlpipeline + endpoint: minio-service.kubeflow:9000 + insecure: true + key: runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz + secretKeySecret: + key: secretkey + name: mlpipeline-minio-artifact + - dag: + tasks: + - name: mypvc + template: mypvc + - arguments: + parameters: + - name: mypvc-name + value: '{{tasks.mypvc.outputs.parameters.mypvc-name}}' + dependencies: + - mypvc + name: step1 + template: step1 + - arguments: + parameters: + - name: mypvc-name + value: '{{tasks.mypvc.outputs.parameters.mypvc-name}}' + dependencies: + - mypvc + - step1 + name: step2 + template: step2 + - arguments: + parameters: + - name: mypvc-name + value: '{{tasks.mypvc.outputs.parameters.mypvc-name}}' + dependencies: + - mypvc + - step2 + name: step3 + template: step3 + name: volumeop-sequential + volumes: + - name: mypvc + persistentVolumeClaim: + claimName: '{{inputs.parameters.mypvc-name}}' diff --git a/sdk/python/tests/dsl/container_op_tests.py b/sdk/python/tests/dsl/container_op_tests.py index 25fea3984c4..dfba7eaddbc 100644 --- a/sdk/python/tests/dsl/container_op_tests.py +++ b/sdk/python/tests/dsl/container_op_tests.py @@ -1,4 +1,4 @@ -# Copyright 2018 Google LLC +# Copyright 2018-2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ # limitations under the License. -import warnings import unittest from kubernetes.client.models import V1EnvVar, V1VolumeMount @@ -84,4 +83,4 @@ def test_deprecation_warnings(self): with self.assertWarns(PendingDeprecationWarning): op.add_volume_mount(V1VolumeMount( mount_path='/secret/gcp-credentials', - name='gcp-credentials')) \ No newline at end of file + name='gcp-credentials')) diff --git a/sdk/python/tests/dsl/main.py b/sdk/python/tests/dsl/main.py index e994f21d83e..00857e433d7 100644 --- a/sdk/python/tests/dsl/main.py +++ b/sdk/python/tests/dsl/main.py @@ -1,4 +1,4 @@ -# Copyright 2018 Google LLC +# Copyright 2018-2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -23,6 +23,10 @@ import type_tests import component_tests import metadata_tests +import resource_op_tests +import volume_op_tests +import pipeline_volume_tests +import volume_snapshotop_tests if __name__ == '__main__': suite = unittest.TestSuite() @@ -33,7 +37,18 @@ suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(type_tests)) suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(component_tests)) suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(metadata_tests)) + suite.addTests( + unittest.defaultTestLoader.loadTestsFromModule(resource_op_tests) + ) + suite.addTests( + unittest.defaultTestLoader.loadTestsFromModule(volume_op_tests) + ) + suite.addTests( + unittest.defaultTestLoader.loadTestsFromModule(pipeline_volume_tests) + ) + suite.addTests( + unittest.defaultTestLoader.loadTestsFromModule(volume_snapshotop_tests) + ) runner = unittest.TextTestRunner() if not runner.run(suite).wasSuccessful(): sys.exit(1) - diff --git a/sdk/python/tests/dsl/pipeline_volume_tests.py b/sdk/python/tests/dsl/pipeline_volume_tests.py new file mode 100644 index 00000000000..4c28153f307 --- /dev/null +++ b/sdk/python/tests/dsl/pipeline_volume_tests.py @@ -0,0 +1,61 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from kfp.dsl import Pipeline, VolumeOp, ContainerOp, PipelineVolume +import unittest + + +class TestPipelineVolume(unittest.TestCase): + + def test_basic(self): + """Test basic usage.""" + with Pipeline("somename") as p: + vol = VolumeOp( + name="myvol_creation", + resource_name="myvol", + size="1Gi" + ) + op1 = ContainerOp( + name="op1", + image="image", + pvolumes={"/mnt": vol.volume} + ) + op2 = ContainerOp( + name="op2", + image="image", + pvolumes={"/data": op1.pvolume} + ) + + self.assertEqual(vol.volume.dependent_names, []) + self.assertEqual(op1.pvolume.dependent_names, [op1.name]) + self.assertEqual(op2.dependent_names, [op1.name]) + + def test_after_method(self): + """Test the after method.""" + with Pipeline("somename") as p: + op1 = ContainerOp(name="op1", image="image") + op2 = ContainerOp(name="op2", image="image").after(op1) + op3 = ContainerOp(name="op3", image="image") + vol1 = PipelineVolume(name="pipeline-volume") + vol2 = vol1.after(op1) + vol3 = vol2.after(op2) + vol4 = vol3.after(op1, op2) + vol5 = vol4.after(op3) + + self.assertEqual(vol1.dependent_names, []) + self.assertEqual(vol2.dependent_names, [op1.name]) + self.assertEqual(vol3.dependent_names, [op2.name]) + self.assertEqual(sorted(vol4.dependent_names), [op1.name, op2.name]) + self.assertEqual(sorted(vol5.dependent_names), [op1.name, op2.name, op3.name]) diff --git a/sdk/python/tests/dsl/resource_op_tests.py b/sdk/python/tests/dsl/resource_op_tests.py new file mode 100644 index 00000000000..883d8943872 --- /dev/null +++ b/sdk/python/tests/dsl/resource_op_tests.py @@ -0,0 +1,69 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from kfp.dsl import Pipeline, PipelineParam, ResourceOp +from kubernetes import client as k8s_client +import unittest + + +class TestResourceOp(unittest.TestCase): + + def test_basic(self): + """Test basic usage.""" + with Pipeline("somename") as p: + param = PipelineParam("param") + resource_metadata = k8s_client.V1ObjectMeta( + name="my-resource" + ) + k8s_resource = k8s_client.V1PersistentVolumeClaim( + api_version="v1", + kind="PersistentVolumeClaim", + metadata=resource_metadata + ) + res = ResourceOp( + name="resource", + k8s_resource=k8s_resource, + success_condition=param, + attribute_outputs={"test": "attr"} + ) + + self.assertCountEqual( + [x.name for x in res.inputs], ["param"] + ) + self.assertEqual(res.name, "resource") + self.assertEqual( + res.resource.success_condition, + PipelineParam("param") + ) + self.assertEqual(res.resource.action, "create") + self.assertEqual(res.resource.failure_condition, None) + self.assertEqual(res.resource.manifest, None) + expected_attribute_outputs = { + "manifest": "{}", + "name": "{.metadata.name}", + "test": "attr" + } + self.assertEqual(res.attribute_outputs, expected_attribute_outputs) + expected_outputs = { + "manifest": PipelineParam(name="manifest", op_name=res.name), + "name": PipelineParam(name="name", op_name=res.name), + "test": PipelineParam(name="test", op_name=res.name), + } + self.assertEqual(res.outputs, expected_outputs) + self.assertEqual( + res.output, + PipelineParam(name="test", op_name=res.name) + ) + self.assertEqual(res.dependent_names, []) diff --git a/sdk/python/tests/dsl/volume_op_tests.py b/sdk/python/tests/dsl/volume_op_tests.py new file mode 100644 index 00000000000..f563ca5fe50 --- /dev/null +++ b/sdk/python/tests/dsl/volume_op_tests.py @@ -0,0 +1,68 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from kubernetes.client.models import ( + V1Volume, V1PersistentVolumeClaimVolumeSource +) + +from kfp.dsl import Pipeline, PipelineParam, VolumeOp, PipelineVolume +import unittest + + +class TestVolumeOp(unittest.TestCase): + + def test_basic(self): + """Test basic usage.""" + with Pipeline("somename") as p: + param1 = PipelineParam("param1") + param2 = PipelineParam("param2") + vol = VolumeOp( + name="myvol_creation", + resource_name=param1, + size=param2, + annotations={"test": "annotation"} + ) + + self.assertCountEqual( + [x.name for x in vol.inputs], ["param1", "param2"] + ) + self.assertEqual( + vol.k8s_resource.metadata.name, + "{{workflow.name}}-%s" % PipelineParam("param1") + ) + expected_attribute_outputs = { + "manifest": "{}", + "name": "{.metadata.name}", + "size": "{.status.capacity.storage}" + } + self.assertEqual(vol.attribute_outputs, expected_attribute_outputs) + expected_outputs = { + "manifest": PipelineParam(name="manifest", op_name=vol.name), + "name": PipelineParam(name="name", op_name=vol.name), + "size": PipelineParam(name="size", op_name=vol.name) + } + self.assertEqual(vol.outputs, expected_outputs) + self.assertEqual( + vol.output, + PipelineParam(name="name", op_name=vol.name) + ) + self.assertEqual(vol.dependent_names, []) + expected_volume = PipelineVolume( + name="myvol-creation", + persistent_volume_claim=V1PersistentVolumeClaimVolumeSource( + claim_name=PipelineParam(name="name", op_name=vol.name) + ) + ) + self.assertEqual(vol.volume, expected_volume) diff --git a/sdk/python/tests/dsl/volume_snapshotop_tests.py b/sdk/python/tests/dsl/volume_snapshotop_tests.py new file mode 100644 index 00000000000..1c067b66756 --- /dev/null +++ b/sdk/python/tests/dsl/volume_snapshotop_tests.py @@ -0,0 +1,97 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from kubernetes import client as k8s_client +from kfp.dsl import ( + Pipeline, PipelineParam, VolumeOp, VolumeSnapshotOp +) +import unittest + + +class TestVolumeSnapshotOp(unittest.TestCase): + + def test_basic(self): + """Test basic usage.""" + with Pipeline("somename") as p: + param1 = PipelineParam("param1") + param2 = PipelineParam("param2") + vol = VolumeOp( + name="myvol_creation", + resource_name="myvol", + size="1Gi", + ) + snap1 = VolumeSnapshotOp( + name="mysnap_creation", + resource_name=param1, + volume=vol.volume, + ) + snap2 = VolumeSnapshotOp( + name="mysnap_creation", + resource_name="mysnap", + pvc=param2, + attribute_outputs={"size": "test"} + ) + + self.assertEqual( + sorted([x.name for x in snap1.inputs]), ["name", "param1"] + ) + self.assertEqual( + sorted([x.name for x in snap2.inputs]), ["param2"] + ) + expected_attribute_outputs_1 = { + "manifest": "{}", + "name": "{.metadata.name}", + "size": "{.status.restoreSize}" + } + self.assertEqual(snap1.attribute_outputs, expected_attribute_outputs_1) + expected_attribute_outputs_2 = { + "manifest": "{}", + "name": "{.metadata.name}", + "size": "test" + } + self.assertEqual(snap2.attribute_outputs, expected_attribute_outputs_2) + expected_outputs_1 = { + "manifest": PipelineParam(name="manifest", op_name=snap1.name), + "name": PipelineParam(name="name", op_name=snap1.name), + "size": PipelineParam(name="name", op_name=snap1.name), + } + self.assertEqual(snap1.outputs, expected_outputs_1) + expected_outputs_2 = { + "manifest": PipelineParam(name="manifest", op_name=snap2.name), + "name": PipelineParam(name="name", op_name=snap2.name), + "size": PipelineParam(name="name", op_name=snap2.name), + } + self.assertEqual(snap2.outputs, expected_outputs_2) + self.assertEqual( + snap1.output, + PipelineParam(name="name", op_name=snap1.name) + ) + self.assertEqual( + snap2.output, + PipelineParam(name="size", op_name=snap2.name) + ) + self.assertEqual(snap1.dependent_names, []) + self.assertEqual(snap2.dependent_names, []) + expected_snapshot_1 = k8s_client.V1TypedLocalObjectReference( + api_group="snapshot.storage.k8s.io", + kind="VolumeSnapshot", + name=PipelineParam(name="name", op_name=vol.name) + ) + self.assertEqual(snap1.snapshot, expected_snapshot_1) + expected_snapshot_2 = k8s_client.V1TypedLocalObjectReference( + api_group="snapshot.storage.k8s.io", + kind="VolumeSnapshot", + name=PipelineParam(name="param1") + ) + self.assertEqual(snap2.snapshot, expected_snapshot_2) From bb0a5e36f6c3cdf0a9cd45ab6c8106150ab76e5a Mon Sep 17 00:00:00 2001 From: Tommy Li Date: Thu, 25 Apr 2019 12:08:34 -0700 Subject: [PATCH 34/43] Parameterize the artifact path for mlpipeline ui-metadata and metrics (#998) * parameterize artifact path for ui-metadata and metrics * change output_artifact_paths as containerops args * change output_artifact_paths default args to None --- sdk/python/kfp/compiler/_op_to_template.py | 2 +- sdk/python/kfp/dsl/_container_op.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/sdk/python/kfp/compiler/_op_to_template.py b/sdk/python/kfp/compiler/_op_to_template.py index f5ae58b6152..e208458b439 100644 --- a/sdk/python/kfp/compiler/_op_to_template.py +++ b/sdk/python/kfp/compiler/_op_to_template.py @@ -184,7 +184,7 @@ def _op_to_template(op: BaseOp): if isinstance(op, dsl.ContainerOp): # default output artifacts - output_artifact_paths = OrderedDict() + output_artifact_paths = OrderedDict(op.output_artifact_paths) output_artifact_paths.setdefault('mlpipeline-ui-metadata', '/mlpipeline-ui-metadata.json') output_artifact_paths.setdefault('mlpipeline-metrics', '/mlpipeline-metrics.json') diff --git a/sdk/python/kfp/dsl/_container_op.py b/sdk/python/kfp/dsl/_container_op.py index 6456e5f7eb4..56e3e025c21 100644 --- a/sdk/python/kfp/dsl/_container_op.py +++ b/sdk/python/kfp/dsl/_container_op.py @@ -848,6 +848,7 @@ def __init__(self, sidecars: List[Sidecar] = None, container_kwargs: Dict = None, file_outputs: Dict[str, str] = None, + output_artifact_paths : Dict[str, str]=None, is_exit_handler=False, pvolumes: Dict[str, V1Volume] = None, ): @@ -869,6 +870,10 @@ def __init__(self, file_outputs: Maps output labels to local file paths. At pipeline run time, the value of a PipelineParam is saved to its corresponding local file. It's one way for outside world to receive outputs of the container. + output_artifact_paths: Maps output artifact labels to local artifact file paths. + It has the following default artifact paths during compile time. + {'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json', + 'mlpipeline-metrics': '/mlpipeline-metrics.json'} is_exit_handler: Whether it is used as an exit handler. pvolumes: Dictionary for the user to match a path on the op's fs with a V1Volume or it inherited type. @@ -917,6 +922,8 @@ def _decorated(*args, **kwargs): # attributes specific to `ContainerOp` self.file_outputs = file_outputs + self.output_artifact_paths = output_artifact_paths or {} + self._metadata = None self.outputs = {} From f6283d45834efeaa135a0dc543a81efd07bf751c Mon Sep 17 00:00:00 2001 From: Animesh Singh Date: Thu, 25 Apr 2019 13:05:01 -0700 Subject: [PATCH 35/43] modernize-wml-pipeline (#1227) * modernized-wml-pipeline * simplifying-params --- .../watson/watson_train_serve_pipeline.py | 77 ++++++++++--------- 1 file changed, 40 insertions(+), 37 deletions(-) diff --git a/samples/ibm-samples/watson/watson_train_serve_pipeline.py b/samples/ibm-samples/watson/watson_train_serve_pipeline.py index f0ecfcb84d0..058142b3971 100644 --- a/samples/ibm-samples/watson/watson_train_serve_pipeline.py +++ b/samples/ibm-samples/watson/watson_train_serve_pipeline.py @@ -16,60 +16,63 @@ # generate default secret name import os -secret_name = 'ai-pipeline-' + os.path.splitext(os.path.basename(CONFIG_FILE_URL))[0] +import kfp +from kfp import components +from kfp import dsl +import ai_pipeline_params as params +secret_name = 'kfp-creds' +configuration_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/master/components/ibm-components/commons/config/component.yaml') +train_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/master/components/ibm-components/watson/train/component.yaml') +store_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/master/components/ibm-components/watson/store/component.yaml') +deploy_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/master/components/ibm-components/watson/deploy/component.yaml') + # create pipelines -import kfp.dsl as dsl -import ai_pipeline_params as params @dsl.pipeline( name='KFP on WML training', description='Kubeflow pipelines running on WML performing tensorflow image recognition.' ) def kfp_wml_pipeline( + GITHUB_TOKEN='', + CONFIG_FILE_URL='https://raw.githubusercontent.com/user/repository/branch/creds.ini', + train_code='tf-model.zip', + execution_command='\'python3 convolutional_network.py --trainImagesFile ${DATA_DIR}/train-images-idx3-ubyte.gz --trainLabelsFile ${DATA_DIR}/train-labels-idx1-ubyte.gz --testImagesFile ${DATA_DIR}/t10k-images-idx3-ubyte.gz --testLabelsFile ${DATA_DIR}/t10k-labels-idx1-ubyte.gz --learningRate 0.001 --trainingIters 20000\'', + framework= 'tensorflow', + framework_version = '1.5', + runtime = 'python', + runtime_version = '3.5', + run_definition = 'wml-tensorflow-definition', + run_name = 'wml-tensorflow-run', + model_name='wml-tensorflow-mnist', + scoring_payload='tf-mnist-test-payload.json' ): # op1 - this operation will create the credentials as secrets to be used by other operations - config_op = dsl.ContainerOp( - name="config", - image="aipipeline/wml-config", - command=['python3'], - arguments=['/app/config.py', - '--token', GITHUB_TOKEN, - '--url', CONFIG_FILE_URL], - file_outputs={'secret-name' : '/tmp/'+secret_name} + get_configuration = configuration_op( + token = GITHUB_TOKEN, + url = CONFIG_FILE_URL, + name = secret_name ) # op2 - this operation trains the model with the model codes and data saved in the cloud object store - train_op = dsl.ContainerOp( - name="train", - image="aipipeline/wml-train", - command=['python3'], - arguments=['/app/wml-train.py', - '--config', config_op.output, - '--train-code', 'tf-model.zip', - '--execution-command', '\'python3 convolutional_network.py --trainImagesFile ${DATA_DIR}/train-images-idx3-ubyte.gz --trainLabelsFile ${DATA_DIR}/train-labels-idx1-ubyte.gz --testImagesFile ${DATA_DIR}/t10k-images-idx3-ubyte.gz --testLabelsFile ${DATA_DIR}/t10k-labels-idx1-ubyte.gz --learningRate 0.001 --trainingIters 20000\''], - file_outputs={'run-uid' : '/tmp/run_uid'}).apply(params.use_ai_pipeline_params(secret_name)) + wml_train = train_op( + get_configuration.output, + train_code, + execution_command + ).apply(params.use_ai_pipeline_params(secret_name)) # op3 - this operation stores the model trained above - store_op = dsl.ContainerOp( - name="store", - image="aipipeline/wml-store", - command=['python3'], - arguments=['/app/wml-store.py', - '--run-uid', train_op.output, - '--model-name', 'python-tensorflow-mnist'], - file_outputs={'model-uid' : '/tmp/model_uid'}).apply(params.use_ai_pipeline_params(secret_name)) + wml_store = store_op( + wml_train.output, + model_name + ).apply(params.use_ai_pipeline_params(secret_name)) # op4 - this operation deploys the model to a web service and run scoring with the payload in the cloud object store - deploy_op = dsl.ContainerOp( - name="deploy", - image="aipipeline/wml-deploy", - command=['python3'], - arguments=['/app/wml-deploy.py', - '--model-uid', store_op.output, - '--model-name', 'python-tensorflow-mnist', - '--scoring-payload', 'tf-mnist-test-payload.json'], - file_outputs={'output' : '/tmp/output'}).apply(params.use_ai_pipeline_params(secret_name)) + wml_deploy = deploy_op( + wml_store.output, + model_name, + scoring_payload + ).apply(params.use_ai_pipeline_params(secret_name)) if __name__ == '__main__': # compile the pipeline From 6a9d639a17911421fa1bca7453fe8afb91757b45 Mon Sep 17 00:00:00 2001 From: Animesh Singh Date: Thu, 25 Apr 2019 13:05:33 -0700 Subject: [PATCH 36/43] simplifying-ffdl-params (#1230) --- .../ibm-samples/ffdl-seldon/ffdl_pipeline.py | 21 +++++++------------ 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/samples/ibm-samples/ffdl-seldon/ffdl_pipeline.py b/samples/ibm-samples/ffdl-seldon/ffdl_pipeline.py index 62db9ade7c3..db9cc2673ab 100644 --- a/samples/ibm-samples/ffdl-seldon/ffdl_pipeline.py +++ b/samples/ibm-samples/ffdl-seldon/ffdl_pipeline.py @@ -17,20 +17,13 @@ ) def ffdlPipeline( - GITHUB_TOKEN=dsl.PipelineParam(name='github-token', - value=''), - CONFIG_FILE_URL=dsl.PipelineParam(name='config-file-url', - value='https://raw.githubusercontent.com/user/repository/branch/creds.ini'), - model_def_file_path=dsl.PipelineParam(name='model-def-file-path', - value='gender-classification.zip'), - manifest_file_path=dsl.PipelineParam(name='manifest-file-path', - value='manifest.yml'), - model_deployment_name=dsl.PipelineParam(name='model-deployment-name', - value='gender-classifier'), - model_class_name=dsl.PipelineParam(name='model-class-name', - value='ThreeLayerCNN'), - model_class_file=dsl.PipelineParam(name='model-class-file', - value='gender_classification.py') + GITHUB_TOKEN='', + CONFIG_FILE_URL='https://raw.githubusercontent.com/user/repository/branch/creds.ini', + model_def_file_path='gender-classification.zip', + manifest_file_path='manifest.yml', + model_deployment_name='gender-classifier', + model_class_name='ThreeLayerCNN', + model_class_file='gender_classification.py' ): """A pipeline for end to end machine learning workflow.""" From b795a9a8993ab92dcd71bb6819daf4ed23580071 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Thu, 25 Apr 2019 14:00:32 -0700 Subject: [PATCH 37/43] Swagger - Specifying content types on the route level (#1225) --- backend/api/swagger/pipeline.upload.swagger.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/backend/api/swagger/pipeline.upload.swagger.json b/backend/api/swagger/pipeline.upload.swagger.json index 739c026c9e0..42d1e73d9c5 100644 --- a/backend/api/swagger/pipeline.upload.swagger.json +++ b/backend/api/swagger/pipeline.upload.swagger.json @@ -8,16 +8,16 @@ "http", "https" ], - "consumes": [ - "multipart/form-data" - ], - "produces": [ - "application/json" - ], "paths": { "/apis/v1beta1/pipelines/upload": { "post": { "operationId": "UploadPipeline", + "consumes": [ + "multipart/form-data" + ], + "produces": [ + "application/json" + ], "responses": { "200": { "description": "", From 8c8e5052df19998adfcfbfb1a41aca5bb0714311 Mon Sep 17 00:00:00 2001 From: cheyang Date: Fri, 26 Apr 2019 05:56:28 +0800 Subject: [PATCH 38/43] Expose step id and step name (#1191) * add step id and step name * add step id and step name * update python sdk version * fix samples * fix typo * add env step id * pass the name to next step * change to wf name --- components/arena/docker/arena_launcher.py | 118 ++++++++---------- .../python/arena/_arena_distributed_tf_op.py | 9 +- .../arena/python/arena/_arena_mpi_op.py | 9 +- .../python/arena/_arena_standalone_op.py | 9 +- components/arena/python/build.sh | 2 +- components/arena/python/setup.py | 2 +- samples/arena-samples/standalonejob/README.md | 2 +- .../standalonejob/standalone_pipeline.py | 16 ++- 8 files changed, 87 insertions(+), 80 deletions(-) diff --git a/components/arena/docker/arena_launcher.py b/components/arena/docker/arena_launcher.py index 15d45d9f701..083d3113906 100644 --- a/components/arena/docker/arena_launcher.py +++ b/components/arena/docker/arena_launcher.py @@ -114,8 +114,6 @@ def _collect_metrics(name, job_type, metric_name): return metric - - def _get_job_status(name, job_type): get_cmd = "arena get %s --type %s | grep -i STATUS:|awk -F: '{print $NF}'" % (name, job_type) status = "" @@ -139,16 +137,12 @@ def _get_tensorboard_url(name, job_type): return url -# - -# Generate standalone job -def generate_job_command(args): - name = args.name +# Generate common options +def generate_options(args): gpus = args.gpus cpu = args.cpu memory = args.memory tensorboard = args.tensorboard - image = args.image output_data = args.output_data data = args.data env = args.env @@ -157,45 +151,64 @@ def generate_job_command(args): log_dir = args.log_dir sync_source = args.sync_source - commandArray = [ - 'arena', 'submit', 'tfjob', - '--name={0}'.format(name), - '--image={0}'.format(image), - ] + options = [] if gpus > 0: - commandArray.extend(['--gpus', str(gpus)]) + options.extend(['--gpus', str(gpus)]) if cpu > 0: - commandArray.extend(['--cpu', str(cpu)]) + options.extend(['--cpu', str(cpu)]) if memory >0: - commandArray.extend(['--memory', str(memory)]) + options.extend(['--memory', str(memory)]) if tensorboard_image != "tensorflow/tensorflow:1.12.0": - commandArray.extend(['--tensorboardImage', tensorboard_image]) + options.extend(['--tensorboardImage', tensorboard_image]) if tensorboard: - commandArray.append("--tensorboard") + options.append("--tensorboard") if os.path.isdir(args.log_dir): - commandArray.append(['--logdir', args.log_dir]) + options.extend(['--logdir', args.log_dir]) else: logging.info("skip log dir :{0}".format(args.log_dir)) if len(data) > 0: for d in data: - commandArray.append("--data={0}".format(d)) + options.append("--data={0}".format(d)) if len(env) > 0: for e in env: - commandArray.append("--env={0}".format(e)) + options.append("--env={0}".format(e)) + + if len(args.workflow_name) > 0: + options.append("--env=WORKFLOW_NAME={0}".format(args.workflow_name)) + + if len(args.step_name) > 0: + options.append("--env=STEP_NAME={0}".format(args.step_name)) if len(sync_source) > 0: if not sync_source.endswith(".git"): raise ValueError("sync_source must be an http git url") - commandArray.extend(['--sync-mode','git']) - commandArray.extend(['--sync-source',sync_source]) + options.extend(['--sync-mode','git']) + options.extend(['--sync-source',sync_source]) + + return options + + + +# Generate standalone job +def generate_job_command(args): + name = args.name + image = args.image + + commandArray = [ + 'arena', 'submit', 'tfjob', + '--name={0}'.format(name), + '--image={0}'.format(image), + ] + + commandArray.extend(generate_options(args)) return commandArray, "tfjob" @@ -203,19 +216,7 @@ def generate_job_command(args): def generate_mpjob_command(args): name = args.name workers = args.workers - gpus = args.gpus - cpu = args.cpu - memory = args.memory - tensorboard = args.tensorboard image = args.image - output_data = args.output_data - data = args.data - env = args.env - tensorboard_image = args.tensorboard_image - tensorboard = str2bool(args.tensorboard) - rdma = str2bool(args.rdma) - log_dir = args.log_dir - sync_source = args.sync_source commandArray = [ 'arena', 'submit', 'mpijob', @@ -224,42 +225,10 @@ def generate_mpjob_command(args): '--image={0}'.format(image), ] - if gpus > 0: - commandArray.extend(['--gpus', str(gpus)]) - - if cpu > 0: - commandArray.extend(['--cpu', str(cpu)]) - - if memory >0: - commandArray.extend(['--memory', str(memory)]) - - if tensorboard_image != "tensorflow/tensorflow:1.12.0": - commandArray.extend(['--tensorboardImage', tensorboard_image]) - - if tensorboard: - commandArray.append("--tensorboard") - if rdma: - commandArray.append("--rdma") + commandArray.append("--rdma") - if os.path.isdir(args.log_dir): - commandArray.append(['--logdir', args.log_dir]) - else: - logging.info("skip log dir :{0}".format(args.log_dir)) - - if len(data) > 0: - for d in data: - commandArray.append("--data={0}".format(d)) - - if len(env) > 0: - for e in env: - commandArray.append("--env={0}".format(e)) - - if len(sync_source) > 0: - if not sync_source.endswith(".git"): - raise ValueError("sync_source must be an http git url") - commandArray.extend(['--sync-mode','git']) - commandArray.extend(['--sync-source',sync_source]) + commandArray.extend(generate_options(args)) return commandArray, "mpijob" @@ -297,6 +266,9 @@ def main(argv=None): parser.add_argument('--metric', action='append', type=str, default=[]) parser.add_argument('--sync-source', type=str, default='') + parser.add_argument('--workflow-name', type=str, default='') + parser.add_argument('--step-name', type=str, default='') + subparsers = parser.add_subparsers(help='arena sub-command help') #create the parser for the 'mpijob' command @@ -407,6 +379,14 @@ def main(argv=None): with open('/output.txt', 'w') as f: f.write(output) + with open('/workflow-name.txt', 'w') as f: + f.write(args.workflow_name) + + with open('/step-name.txt', 'w') as f: + f.write(args.step_name) + + with open('/name.txt', 'w') as f: + f.write(args.name) if __name__== "__main__": main() diff --git a/components/arena/python/arena/_arena_distributed_tf_op.py b/components/arena/python/arena/_arena_distributed_tf_op.py index f8a09065fb6..5b5ea9e5974 100644 --- a/components/arena/python/arena/_arena_distributed_tf_op.py +++ b/components/arena/python/arena/_arena_distributed_tf_op.py @@ -62,7 +62,7 @@ def parameter_servers_op(name, image, command, env, data, sync_source, annotatio tensorboard, worker_port, ps_port, metrics=['Train-accuracy:PERCENTAGE'], - arena_image='cheyang/arena_launcher:v0.3', + arena_image='cheyang/arena_launcher:v0.4', timeout_hours=240): """This function submits Distributed TFJob in Parameter Servers mode. @@ -123,8 +123,13 @@ def distributed_tf_op(name, image, command, env=[], data=[], sync_source=None, "--timeout-hours", timeout_hours, "--metric-name", metric_name, "--metric-unit", metric_unit, + "--step-name", '{{pod.name}}', + "--workflow-name", '{{workflow.name}}', "tfjob", "--workers", workers, "--", command], - file_outputs={'train': '/output.txt'} + file_outputs={'train': '/output.txt', + 'workflow':'/workflow-name.txt', + 'step':'/step-name.txt', + 'name':'/name.txt'} ) diff --git a/components/arena/python/arena/_arena_mpi_op.py b/components/arena/python/arena/_arena_mpi_op.py index e9fb661100d..3c1623564c4 100644 --- a/components/arena/python/arena/_arena_mpi_op.py +++ b/components/arena/python/arena/_arena_mpi_op.py @@ -23,7 +23,7 @@ def mpi_job_op(name, image, command, workers=1, gpus=0, cpu=0, memory=0, env=[], rdma=False, tensorboard=False, tensorboard_image=None, metrics=['Train-accuracy:PERCENTAGE'], - arenaImage='cheyang/arena_launcher:v0.3', + arenaImage='cheyang/arena_launcher:v0.4', timeout_hours=240): """This function submits MPI Job, it can run Allreduce-style Distributed Training. @@ -74,13 +74,18 @@ def mpi_job_op(name, image, command, workers=1, gpus=0, cpu=0, memory=0, env=[], "--gpus", str(gpus), "--cpu", str(cpu), "--memory", str(memory), + "--step-name", '{{pod.name}}', + "--workflow-name", '{{workflow.name}}', "--workers", str(workers), "--timeout-hours", str(timeout_hours), ] + options + [ "mpijob", "--", str(command)], - file_outputs={'train': '/output.txt'} + file_outputs={'train': '/output.txt', + 'workflow':'/workflow-name.txt', + 'step':'/step-name.txt', + 'name':'/name.txt'} ) op.set_image_pull_policy('Always') return op \ No newline at end of file diff --git a/components/arena/python/arena/_arena_standalone_op.py b/components/arena/python/arena/_arena_standalone_op.py index e54ded5e319..682d8f47b4e 100644 --- a/components/arena/python/arena/_arena_standalone_op.py +++ b/components/arena/python/arena/_arena_standalone_op.py @@ -23,7 +23,7 @@ def standalone_job_op(name, image, command, gpus=0, cpu=0, memory=0, env=[], tensorboard=False, tensorboard_image=None, data=[], sync_source=None, annotations=[], metrics=['Train-accuracy:PERCENTAGE'], - arena_image='cheyang/arena_launcher:v0.3', + arena_image='cheyang/arena_launcher:v0.4', timeout_hours=240): """This function submits a standalone training Job @@ -73,13 +73,18 @@ def standalone_job_op(name, image, command, gpus=0, cpu=0, memory=0, env=[], "--image", str(image), "--gpus", str(gpus), "--cpu", str(cpu), + "--step-name", '{{pod.name}}', + "--workflow-name", '{{workflow.name}}', "--memory", str(memory), "--timeout-hours", str(timeout_hours), ] + options + [ "job", "--", str(command)], - file_outputs={'train': '/output.txt'} + file_outputs={'train': '/output.txt', + 'workflow':'/workflow-name.txt', + 'step':'/step-name.txt', + 'name':'/name.txt'} ) op.set_image_pull_policy('Always') return op diff --git a/components/arena/python/build.sh b/components/arena/python/build.sh index 28809bc1074..2ca818a1caa 100755 --- a/components/arena/python/build.sh +++ b/components/arena/python/build.sh @@ -5,7 +5,7 @@ get_abs_filename() { echo "$(cd "$(dirname "$1")" && pwd)/$(basename "$1")" } -target_archive_file=${1:-kfp-arena-0.3.tar.gz} +target_archive_file=${1:-kfp-arena-0.4.tar.gz} target_archive_file=$(get_abs_filename "$target_archive_file") DIR=$(mktemp -d) diff --git a/components/arena/python/setup.py b/components/arena/python/setup.py index ad65e966a02..8b56456de11 100644 --- a/components/arena/python/setup.py +++ b/components/arena/python/setup.py @@ -2,7 +2,7 @@ NAME = 'kfp-arena' -VERSION = '0.3' +VERSION = '0.4' REQUIRES = ['kfp >= 0.1'] diff --git a/samples/arena-samples/standalonejob/README.md b/samples/arena-samples/standalonejob/README.md index a09786da4c9..a7e2183ebb1 100644 --- a/samples/arena-samples/standalonejob/README.md +++ b/samples/arena-samples/standalonejob/README.md @@ -73,7 +73,7 @@ spec: First, install the necessary Python Packages ```shell pip3 install http://kubeflow.oss-cn-beijing.aliyuncs.com/kfp/0.1.14/kfp.tar.gz --upgrade -pip3 install http://kubeflow.oss-cn-beijing.aliyuncs.com/kfp-arena/kfp-arena-0.3.tar.gz --upgrade +pip3 install http://kubeflow.oss-cn-beijing.aliyuncs.com/kfp-arena/kfp-arena-0.4.tar.gz --upgrade ``` Then run [standalone_pipeline.py](standalone_pipeline.py) with different parameters. diff --git a/samples/arena-samples/standalonejob/standalone_pipeline.py b/samples/arena-samples/standalonejob/standalone_pipeline.py index 22c5eb4ae80..096de1cc3e2 100644 --- a/samples/arena-samples/standalonejob/standalone_pipeline.py +++ b/samples/arena-samples/standalonejob/standalone_pipeline.py @@ -37,7 +37,15 @@ def sample_pipeline(learning_rate='0.01', env=["GIT_SYNC_REV=%s" % (commit)], gpus=gpus, data=data, - command="echo %s;python code/tensorflow-sample-code/tfjob/docker/mnist/main.py --max_steps 500 --data_dir /training/dataset/mnist --log_dir /training/output/mnist --learning_rate %s --dropout %s" % (prepare_data.output, learning_rate, dropout), + command='''echo prepare_step_name=%s and prepare_wf_name=%s && \ + python code/tensorflow-sample-code/tfjob/docker/mnist/main.py --max_steps 500 \ + --data_dir /training/dataset/mnist \ + --log_dir /training/output/mnist \ + --learning_rate %s --dropout %s''' % ( + prepare_data.outputs['step'], + prepare_data.outputs['workflow'], + learning_rate, + dropout), metrics=["Train-accuracy:PERCENTAGE"]) # 3. export the model export_model = arena.standalone_job_op( @@ -46,7 +54,11 @@ def sample_pipeline(learning_rate='0.01', sync_source="https://code.aliyun.com/xiaozhou/tensorflow-sample-code.git", env=["GIT_SYNC_REV=%s" % (commit)], data=data, - command="echo %s;python code/tensorflow-sample-code/tfjob/docker/mnist/export_model.py --model_version=%s --checkpoint_path=/training/output/mnist /training/output/models" % (train.output, model_version)) + command="echo train_step_name=%s and train_wf_name=%s && \ + python code/tensorflow-sample-code/tfjob/docker/mnist/export_model.py \ + --model_version=%s \ + --checkpoint_path=/training/output/mnist \ + /training/output/models" % (train.outputs['step'], train.outputs['workflow'], model_version)) if __name__ == '__main__': parser = argparse.ArgumentParser() From e2492896aaecd1919b090aefb2491adce5d14575 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Thu, 25 Apr 2019 15:46:27 -0700 Subject: [PATCH 39/43] Testing/Sample - Made checking confusion matrix data more robust (#1196) * Testing/Sample - Made checking confusion matrix data more robust The sample tests no longer depend on particular file names inside the archive. Now they only depend on the artifact name. * Fixed json loading on python 3.5 `json.load` only supports reading from binary files in python 3.6+. https://docs.python.org/3/library/json.html#json.load --- test/sample-test/run_kubeflow_test.py | 17 +++++++++-------- test/sample-test/run_xgboost_test.py | 16 ++++++++-------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/test/sample-test/run_kubeflow_test.py b/test/sample-test/run_kubeflow_test.py index a34d682cd4e..7e8608ffa48 100644 --- a/test/sample-test/run_kubeflow_test.py +++ b/test/sample-test/run_kubeflow_test.py @@ -14,6 +14,7 @@ import argparse import os +import io import json import tarfile from datetime import datetime @@ -107,14 +108,14 @@ def main(): # confusion matrix should show three columns for the flower data # target, predicted, count cm_tar_path = './confusion_matrix.tar.gz' - cm_filename = 'mlpipeline-ui-metadata.json' - utils.get_artifact_in_minio(workflow_json, 'confusion-matrix', cm_tar_path) - tar_handler = tarfile.open(cm_tar_path) - tar_handler.extractall() - - with open(cm_filename, 'r') as f: - cm_data = json.load(f) - utils.add_junit_test(test_cases, 'confusion matrix format', (len(cm_data['outputs'][0]['schema']) == 3), 'the column number of the confusion matrix output is not equal to three') + utils.get_artifact_in_minio(workflow_json, 'confusion-matrix', cm_tar_path, 'mlpipeline-ui-metadata') + with tarfile.open(cm_tar_path) as tar_handle: + file_handles = tar_handle.getmembers() + assert len(file_handles) == 1 + + with tar_handle.extractfile(file_handles[0]) as f: + cm_data = json.load(io.TextIOWrapper(f)) + utils.add_junit_test(test_cases, 'confusion matrix format', (len(cm_data['outputs'][0]['schema']) == 3), 'the column number of the confusion matrix output is not equal to three') ###### Delete Job ###### #TODO: add deletion when the backend API offers the interface. diff --git a/test/sample-test/run_xgboost_test.py b/test/sample-test/run_xgboost_test.py index a339a859e99..7259dc0abd5 100644 --- a/test/sample-test/run_xgboost_test.py +++ b/test/sample-test/run_xgboost_test.py @@ -109,14 +109,14 @@ def main(): # confusion matrix should show three columns for the flower data # target, predicted, count cm_tar_path = './confusion_matrix.tar.gz' - cm_filename = 'mlpipeline-ui-metadata.json' - utils.get_artifact_in_minio(workflow_json, 'confusion-matrix', cm_tar_path) - tar_handler = tarfile.open(cm_tar_path) - tar_handler.extractall() - - with open(cm_filename, 'r') as f: - cm_data = f.read() - utils.add_junit_test(test_cases, 'confusion matrix format', (len(cm_data) > 0), 'the confusion matrix file is empty') + utils.get_artifact_in_minio(workflow_json, 'confusion-matrix', cm_tar_path, 'mlpipeline-ui-metadata') + with tarfile.open(cm_tar_path) as tar_handle: + file_handles = tar_handle.getmembers() + assert len(file_handles) == 1 + + with tar_handle.extractfile(file_handles[0]) as f: + cm_data = f.read() + utils.add_junit_test(test_cases, 'confusion matrix format', (len(cm_data) > 0), 'the confusion matrix file is empty') ###### Delete Job ###### #TODO: add deletion when the backend API offers the interface. From 688f2205a6fa9f8dfd077079265eb46403f333d2 Mon Sep 17 00:00:00 2001 From: Ning Date: Thu, 25 Apr 2019 16:34:27 -0700 Subject: [PATCH 40/43] clean up quickstart sample a bit (#1232) * clean up a bit * move the project id to the top --- samples/notebooks/quickstart.ipynb | 90 ++++++++++++++++++++---------- 1 file changed, 62 insertions(+), 28 deletions(-) diff --git a/samples/notebooks/quickstart.ipynb b/samples/notebooks/quickstart.ipynb index e880a118d11..ff2f297427e 100644 --- a/samples/notebooks/quickstart.ipynb +++ b/samples/notebooks/quickstart.ipynb @@ -21,13 +21,47 @@ "# limitations under the License." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.16/kfp.tar.gz'\n", + "# PROJECT_ID is used to construct the docker image registry. We will use Google Container Registry, \n", + "# but any other accessible registry works as well. \n", + "PROJECT_ID='Your-Gcp-Project-Id'" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# Install Pipeline SDK\n", + "!pip3 install $KFP_PACKAGE --upgrade\n", + "!mkdir tmp/pipelines" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Part 1\n", "# Two ways to author a component to list blobs in a GCS bucket\n", - "A pipeline is composed of one or more components. In this section, you will build a single component that lists the blobs in a GCS bucket. Then you buid a pipeline that consists of this component. There are two ways to author a component. In the following sections we will go through each of them." + "A pipeline is composed of one or more components. In this section, you will build a single component that lists the blobs in a GCS bucket. Then you build a pipeline that consists of this component. There are two ways to author a component. In the following sections we will go through each of them." ] }, { @@ -51,7 +85,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -78,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -97,7 +131,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -110,7 +144,7 @@ "\n", "# Compile the pipeline to a file.\n", "import kfp.compiler as compiler\n", - "compiler.Compiler().compile(pipeline_func, 'list_blobs.pipeline.tar.gz')" + "compiler.Compiler().compile(pipeline_func, 'tmp/pipelines/list_blobs.pipeline.tar.gz')" ] }, { @@ -125,7 +159,7 @@ "metadata": {}, "source": [ "### 2.1 Create a Docker container\n", - "Create your own container image that includes your program. If your component creates some outputs to be fed as inputs to the downstream components, each separate output must be written as a string to a separate local text file by the container image. For example, if a trainer component needs to output the trained model path, it can write the path to a local file `/output.txt`. The string written to an output file cannot be too big. If it is too big (>> 100 kB), save the output to an external persistent storage and pass the storage path to the next component.\n", + "Create your own container image that includes your program. If your component creates some outputs to be fed as inputs to the downstream components, each separate output must be written as a string to a separate local text file inside the container image. For example, if a trainer component needs to output the trained model path, it can write the path to a local file `/output.txt`. The string written to an output file cannot be too big. If it is too big (>> 100 kB), it is recommended to save the output to an external persistent storage and pass the storage path to the next component.\n", "\n", "Start by entering the value of your Google Cloud Platform Project ID." ] @@ -185,38 +219,19 @@ "%%bash\n", "\n", "# Create Dockerfile.\n", - "cat > ./tmp/components/list-gcs-blobs/Dockerfile < ./tmp/components/list-gcs-blobs/Dockerfile < Date: Thu, 25 Apr 2019 17:30:26 -0700 Subject: [PATCH 41/43] SDK - Fixed the non-string items in the command-line arguments (#1197) * SDK - Fixed the non-string items if the command-line arguments Fixes a bug discovered by Makoto. When you pass non-string arguments to the ContainerOp.command, they're passed as such to the resulting Argo workflow which then cannot be loaded in back-end due to the item types. This PR ensures the items are converted to string. * Removed the if_none parameter It wasn't used anywhere. * Specified that the argument is optional. --- sdk/python/kfp/dsl/_container_op.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/sdk/python/kfp/dsl/_container_op.py b/sdk/python/kfp/dsl/_container_op.py index 56e3e025c21..9dece451216 100644 --- a/sdk/python/kfp/dsl/_container_op.py +++ b/sdk/python/kfp/dsl/_container_op.py @@ -14,7 +14,7 @@ import re import warnings -from typing import Any, Dict, List, TypeVar, Union, Callable +from typing import Any, Dict, List, TypeVar, Union, Callable, Optional, Sequence from kubernetes.client.models import ( V1Container, V1EnvVar, V1EnvFromSource, V1SecurityContext, V1Probe, V1ResourceRequirements, V1VolumeDevice, V1VolumeMount, V1ContainerPort, @@ -72,11 +72,15 @@ def _proxy_container_op_props(cls: "ContainerOp"): return cls -def as_list(value: Any, if_none: Union[None, List] = None) -> List: +def as_string_list(list_or_str: Optional[Union[Any, Sequence[Any]]]) -> List[str]: """Convert any value except None to a list if not already a list.""" - if value is None: - return if_none - return value if isinstance(value, list) else [value] + if list_or_str is None: + return None + if isinstance(list_or_str, Sequence) and not isinstance(list_or_str, str): + list_value = list_or_str + else: + list_value = [list_or_str] + return [str(item) for item in list_value] def create_and_append(current_list: Union[List[T], None], item: T) -> List[T]: @@ -596,8 +600,8 @@ def __init__(self, super().__init__( name=name, image=image, - command=as_list(command), - args=as_list(args), + command=as_string_list(command), + args=as_string_list(args), **kwargs) self.mirror_volume_mounts = mirror_volume_mounts @@ -884,8 +888,8 @@ def __init__(self, self.attrs_with_pipelineparams = BaseOp.attrs_with_pipelineparams + ['_container'] #Copying the BaseOp class variable! # convert to list if not a list - command = as_list(command) - arguments = as_list(arguments) + command = as_string_list(command) + arguments = as_string_list(arguments) # `container` prop in `io.argoproj.workflow.v1alpha1.Template` container_kwargs = container_kwargs or {} @@ -961,7 +965,7 @@ def command(self): @command.setter def command(self, value): - self._container.command = as_list(value) + self._container.command = as_string_list(value) @property def arguments(self): @@ -969,7 +973,7 @@ def arguments(self): @arguments.setter def arguments(self, value): - self._container.args = as_list(value) + self._container.args = as_string_list(value) @property def container(self): @@ -1017,4 +1021,4 @@ def _set_metadata(self, metadata): # proxy old ContainerOp properties to ContainerOp.container # with PendingDeprecationWarning. -ContainerOp = _proxy_container_op_props(ContainerOp) \ No newline at end of file +ContainerOp = _proxy_container_op_props(ContainerOp) From f91ab96cc88dfa8660dbf84ef034f4cc458ae141 Mon Sep 17 00:00:00 2001 From: ucdmkt Date: Fri, 26 Apr 2019 00:02:27 -0700 Subject: [PATCH 42/43] Minor fix on samples/tfx-oss/README.md (#969) * minor fixes to tfx-oss README * minor fix on documentation. * update location of tfx/examples * update location of tfx/examples --- samples/tfx-oss/README.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/samples/tfx-oss/README.md b/samples/tfx-oss/README.md index 24cf92af742..d197a2c6216 100644 --- a/samples/tfx-oss/README.md +++ b/samples/tfx-oss/README.md @@ -18,10 +18,11 @@ conda create -n tfx-kfp pip python=3.5.3 then activate the environment. -Install TFX and Kubeflow Pipelines SDK +Install TensorFlow, TFX and Kubeflow Pipelines SDK ``` -!pip3 install https://storage.googleapis.com/ml-pipeline/tfx/tfx-0.12.0rc0-py2.py3-none-any.whl -!pip3 install https://storage.googleapis.com/ml-pipeline/release/0.1.10/kfp.tar.gz --upgrade +pip install tensorflow --upgrade +pip install https://storage.googleapis.com/ml-pipeline/tfx/tfx-0.12.0rc0-py2.py3-none-any.whl +pip install https://storage.googleapis.com/ml-pipeline/release/0.1.10/kfp.tar.gz --upgrade ``` Clone TFX github repo @@ -31,20 +32,20 @@ git clone https://github.com/tensorflow/tfx Upload the utility code to your storage bucket. You can modify this code if needed for a different dataset. ``` -gsutil cp tfx/examples/chicago_taxi_pipeline/taxi_utils.py gs://my-bucket// +gsutil cp tfx/tfx/examples/chicago_taxi_pipeline/taxi_utils.py gs://my-bucket// ``` If gsutil does not work, try `tensorflow.gfile`: ``` from tensorflow import gfile -gfile.Copy('tfx/examples/chicago_taxi_pipeline/taxi_utils.py', 'gs:////taxi_utils.py') +gfile.Copy('tfx/tfx/examples/chicago_taxi_pipeline/taxi_utils.py', 'gs:////taxi_utils.py') ``` ## Configure the TFX Pipeline Modify the pipeline configuration file at ``` -tfx/examples/chicago_taxi_pipeline/taxi_pipeline_kubeflow.py +tfx/tfx/examples/chicago_taxi_pipeline/taxi_pipeline_kubeflow.py ``` Configure - Set `_input_bucket` to the GCS directory where you've copied taxi_utils.py. I.e. gs://// @@ -54,7 +55,7 @@ Configure ## Compile and run the pipeline ``` -python tfx/examples/chicago_taxi_pipeline/taxi_pipeline_kubeflow.py +python tfx/tfx/examples/chicago_taxi_pipeline/taxi_pipeline_kubeflow.py ``` This will generate a file named chicago_taxi_pipeline_kubeflow.tar.gz -Upload this file to the Pipelines Cluster and create a run. +Upload this file to the Pipelines Cluster and crate a run. From 7228ba1f0bde296a8383a4ddad2796dda0498bb2 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Fri, 26 Apr 2019 10:24:26 -0700 Subject: [PATCH 43/43] Release b0147bdbed9f25212408e0468a475289e80e0406 (#1238) * Updated component images to version b0147bdbed9f25212408e0468a475289e80e0406 * Updated components to version e8524eefb138725fc06600d1956da0f4dd477178 * Updated the Python SDK version --- components/dataflow/predict/component.yaml | 2 +- components/dataflow/tfdv/component.yaml | 2 +- components/dataflow/tfma/component.yaml | 2 +- components/dataflow/tft/component.yaml | 2 +- components/gcp/bigquery/query/README.md | 2 +- components/gcp/bigquery/query/component.yaml | 2 +- components/gcp/bigquery/query/sample.ipynb | 2 +- components/gcp/dataflow/launch_python/README.md | 2 +- .../gcp/dataflow/launch_python/component.yaml | 2 +- .../gcp/dataflow/launch_python/sample.ipynb | 2 +- .../gcp/dataflow/launch_template/README.md | 2 +- .../gcp/dataflow/launch_template/component.yaml | 2 +- .../gcp/dataflow/launch_template/sample.ipynb | 2 +- components/gcp/dataproc/create_cluster/README.md | 2 +- .../gcp/dataproc/create_cluster/component.yaml | 2 +- .../gcp/dataproc/create_cluster/sample.ipynb | 2 +- components/gcp/dataproc/delete_cluster/README.md | 2 +- .../gcp/dataproc/delete_cluster/component.yaml | 2 +- .../gcp/dataproc/delete_cluster/sample.ipynb | 2 +- .../gcp/dataproc/submit_hadoop_job/README.md | 2 +- .../dataproc/submit_hadoop_job/component.yaml | 2 +- .../gcp/dataproc/submit_hadoop_job/sample.ipynb | 2 +- .../gcp/dataproc/submit_hive_job/README.md | 2 +- .../gcp/dataproc/submit_hive_job/component.yaml | 2 +- .../gcp/dataproc/submit_hive_job/sample.ipynb | 2 +- components/gcp/dataproc/submit_pig_job/README.md | 2 +- .../gcp/dataproc/submit_pig_job/component.yaml | 2 +- .../gcp/dataproc/submit_pig_job/sample.ipynb | 2 +- .../gcp/dataproc/submit_pyspark_job/README.md | 2 +- .../dataproc/submit_pyspark_job/component.yaml | 2 +- .../gcp/dataproc/submit_pyspark_job/sample.ipynb | 2 +- .../gcp/dataproc/submit_spark_job/README.md | 2 +- .../gcp/dataproc/submit_spark_job/component.yaml | 2 +- .../gcp/dataproc/submit_spark_job/sample.ipynb | 2 +- .../gcp/dataproc/submit_sparksql_job/README.md | 2 +- .../dataproc/submit_sparksql_job/component.yaml | 2 +- .../dataproc/submit_sparksql_job/sample.ipynb | 2 +- components/gcp/ml_engine/batch_predict/README.md | 2 +- .../gcp/ml_engine/batch_predict/component.yaml | 2 +- .../gcp/ml_engine/batch_predict/sample.ipynb | 2 +- components/gcp/ml_engine/deploy/README.md | 2 +- components/gcp/ml_engine/deploy/component.yaml | 2 +- components/gcp/ml_engine/deploy/sample.ipynb | 2 +- components/gcp/ml_engine/train/README.md | 2 +- components/gcp/ml_engine/train/component.yaml | 2 +- components/gcp/ml_engine/train/sample.ipynb | 2 +- components/kubeflow/deployer/component.yaml | 2 +- components/kubeflow/dnntrainer/component.yaml | 2 +- .../launcher/kubeflow_tfjob_launcher_op.py | 2 +- .../kubeflow/launcher/src/train.template.yaml | 6 +++--- components/local/confusion_matrix/component.yaml | 2 +- components/local/roc/component.yaml | 2 +- .../kubeflow-training-classification.py | 10 +++++----- ...eFlow Pipeline Using TFX OSS Components.ipynb | 14 +++++++------- samples/resnet-cmle/resnet-train-pipeline.py | 6 +++--- samples/tfx/taxi-cab-classification-pipeline.py | 16 ++++++++-------- samples/xgboost-spark/xgboost-training-cm.py | 16 ++++++++-------- sdk/python/setup.py | 2 +- 58 files changed, 86 insertions(+), 86 deletions(-) diff --git a/components/dataflow/predict/component.yaml b/components/dataflow/predict/component.yaml index 50bd9770738..5e05ab570a2 100644 --- a/components/dataflow/predict/component.yaml +++ b/components/dataflow/predict/component.yaml @@ -15,7 +15,7 @@ outputs: - {name: Predictions dir, type: GCSPath, description: 'GCS or local directory.'} #Will contain prediction_results-* and schema.json files; TODO: Split outputs and replace dir with single file # type: {GCSPath: {path_type: Directory}} implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:b0147bdbed9f25212408e0468a475289e80e0406 command: [python2, /ml/predict.py] args: [ --data, {inputValue: Data file pattern}, diff --git a/components/dataflow/tfdv/component.yaml b/components/dataflow/tfdv/component.yaml index a07fb31287c..bc186561210 100644 --- a/components/dataflow/tfdv/component.yaml +++ b/components/dataflow/tfdv/component.yaml @@ -18,7 +18,7 @@ outputs: - {name: Validation result, type: String, description: Indicates whether anomalies were detected or not.} implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tfdv:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tfdv:b0147bdbed9f25212408e0468a475289e80e0406 command: [python2, /ml/validate.py] args: [ --csv-data-for-inference, {inputValue: Inference data}, diff --git a/components/dataflow/tfma/component.yaml b/components/dataflow/tfma/component.yaml index 1eead992608..d3900f81a93 100644 --- a/components/dataflow/tfma/component.yaml +++ b/components/dataflow/tfma/component.yaml @@ -17,7 +17,7 @@ outputs: - {name: Analysis results dir, type: GCSPath, description: GCS or local directory where the analysis results should were written.} # type: {GCSPath: {path_type: Directory}} implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tfma:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tfma:b0147bdbed9f25212408e0468a475289e80e0406 command: [python2, /ml/model_analysis.py] args: [ --model, {inputValue: Model}, diff --git a/components/dataflow/tft/component.yaml b/components/dataflow/tft/component.yaml index de356d4506d..d96b1f17052 100644 --- a/components/dataflow/tft/component.yaml +++ b/components/dataflow/tft/component.yaml @@ -12,7 +12,7 @@ outputs: - {name: Transformed data dir, type: GCSPath} # type: {GCSPath: {path_type: Directory}} implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tft:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tft:b0147bdbed9f25212408e0468a475289e80e0406 command: [python2, /ml/transform.py] args: [ --train, {inputValue: Training data file pattern}, diff --git a/components/gcp/bigquery/query/README.md b/components/gcp/bigquery/query/README.md index f42dff1e85e..50f3e1ce99d 100644 --- a/components/gcp/bigquery/query/README.md +++ b/components/gcp/bigquery/query/README.md @@ -89,7 +89,7 @@ KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.14/kfp.tar import kfp.components as comp bigquery_query_op = comp.load_component_from_url( - 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/bigquery/query/component.yaml') + 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/bigquery/query/component.yaml') help(bigquery_query_op) ``` diff --git a/components/gcp/bigquery/query/component.yaml b/components/gcp/bigquery/query/component.yaml index 3edc4298d17..118f20976fa 100644 --- a/components/gcp/bigquery/query/component.yaml +++ b/components/gcp/bigquery/query/component.yaml @@ -54,7 +54,7 @@ outputs: type: GCSPath implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-gcp:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-gcp:b0147bdbed9f25212408e0468a475289e80e0406 args: [ kfp_component.google.bigquery, query, --query, {inputValue: query}, diff --git a/components/gcp/bigquery/query/sample.ipynb b/components/gcp/bigquery/query/sample.ipynb index 9da2362ef87..d74cd169da2 100644 --- a/components/gcp/bigquery/query/sample.ipynb +++ b/components/gcp/bigquery/query/sample.ipynb @@ -108,7 +108,7 @@ "import kfp.components as comp\n", "\n", "bigquery_query_op = comp.load_component_from_url(\n", - " 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/bigquery/query/component.yaml')\n", + " 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/bigquery/query/component.yaml')\n", "help(bigquery_query_op)" ] }, diff --git a/components/gcp/dataflow/launch_python/README.md b/components/gcp/dataflow/launch_python/README.md index c3e9e28fd06..36e65e90002 100644 --- a/components/gcp/dataflow/launch_python/README.md +++ b/components/gcp/dataflow/launch_python/README.md @@ -77,7 +77,7 @@ KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.14/kfp.tar import kfp.components as comp dataflow_python_op = comp.load_component_from_url( - 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/dataflow/launch_python/component.yaml') + 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataflow/launch_python/component.yaml') help(dataflow_python_op) ``` diff --git a/components/gcp/dataflow/launch_python/component.yaml b/components/gcp/dataflow/launch_python/component.yaml index ace78957cbc..7fcadb2bb4a 100644 --- a/components/gcp/dataflow/launch_python/component.yaml +++ b/components/gcp/dataflow/launch_python/component.yaml @@ -48,7 +48,7 @@ outputs: type: String implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-gcp:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-gcp:b0147bdbed9f25212408e0468a475289e80e0406 args: [ kfp_component.google.dataflow, launch_python, --python_file_path, {inputValue: python_file_path}, diff --git a/components/gcp/dataflow/launch_python/sample.ipynb b/components/gcp/dataflow/launch_python/sample.ipynb index 1b65e434305..1fe6e425c82 100644 --- a/components/gcp/dataflow/launch_python/sample.ipynb +++ b/components/gcp/dataflow/launch_python/sample.ipynb @@ -95,7 +95,7 @@ "import kfp.components as comp\n", "\n", "dataflow_python_op = comp.load_component_from_url(\n", - " 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/dataflow/launch_python/component.yaml')\n", + " 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataflow/launch_python/component.yaml')\n", "help(dataflow_python_op)" ] }, diff --git a/components/gcp/dataflow/launch_template/README.md b/components/gcp/dataflow/launch_template/README.md index d04adad6363..451dfb03afa 100644 --- a/components/gcp/dataflow/launch_template/README.md +++ b/components/gcp/dataflow/launch_template/README.md @@ -67,7 +67,7 @@ KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.14/kfp.tar import kfp.components as comp dataflow_template_op = comp.load_component_from_url( - 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/dataflow/launch_template/component.yaml') + 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataflow/launch_template/component.yaml') help(dataflow_template_op) ``` diff --git a/components/gcp/dataflow/launch_template/component.yaml b/components/gcp/dataflow/launch_template/component.yaml index 72398a6d2eb..4b348a67de5 100644 --- a/components/gcp/dataflow/launch_template/component.yaml +++ b/components/gcp/dataflow/launch_template/component.yaml @@ -58,7 +58,7 @@ outputs: type: String implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-gcp:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-gcp:b0147bdbed9f25212408e0468a475289e80e0406 args: [ kfp_component.google.dataflow, launch_template, --project_id, {inputValue: project_id}, diff --git a/components/gcp/dataflow/launch_template/sample.ipynb b/components/gcp/dataflow/launch_template/sample.ipynb index ec313804895..59e29578cc9 100644 --- a/components/gcp/dataflow/launch_template/sample.ipynb +++ b/components/gcp/dataflow/launch_template/sample.ipynb @@ -85,7 +85,7 @@ "import kfp.components as comp\n", "\n", "dataflow_template_op = comp.load_component_from_url(\n", - " 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/dataflow/launch_template/component.yaml')\n", + " 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataflow/launch_template/component.yaml')\n", "help(dataflow_template_op)" ] }, diff --git a/components/gcp/dataproc/create_cluster/README.md b/components/gcp/dataproc/create_cluster/README.md index 2ffedc57163..f953925508e 100644 --- a/components/gcp/dataproc/create_cluster/README.md +++ b/components/gcp/dataproc/create_cluster/README.md @@ -74,7 +74,7 @@ KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.14/kfp.tar import kfp.components as comp dataproc_create_cluster_op = comp.load_component_from_url( - 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/dataproc/create_cluster/component.yaml') + 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataproc/create_cluster/component.yaml') help(dataproc_create_cluster_op) ``` diff --git a/components/gcp/dataproc/create_cluster/component.yaml b/components/gcp/dataproc/create_cluster/component.yaml index 51611880956..d2659f9163d 100644 --- a/components/gcp/dataproc/create_cluster/component.yaml +++ b/components/gcp/dataproc/create_cluster/component.yaml @@ -65,7 +65,7 @@ outputs: type: String implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-gcp:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-gcp:b0147bdbed9f25212408e0468a475289e80e0406 args: [ kfp_component.google.dataproc, create_cluster, --project_id, {inputValue: project_id}, diff --git a/components/gcp/dataproc/create_cluster/sample.ipynb b/components/gcp/dataproc/create_cluster/sample.ipynb index 16a7dd8c60b..447e4826e70 100644 --- a/components/gcp/dataproc/create_cluster/sample.ipynb +++ b/components/gcp/dataproc/create_cluster/sample.ipynb @@ -92,7 +92,7 @@ "import kfp.components as comp\n", "\n", "dataproc_create_cluster_op = comp.load_component_from_url(\n", - " 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/dataproc/create_cluster/component.yaml')\n", + " 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataproc/create_cluster/component.yaml')\n", "help(dataproc_create_cluster_op)" ] }, diff --git a/components/gcp/dataproc/delete_cluster/README.md b/components/gcp/dataproc/delete_cluster/README.md index 5cb238c607f..77c62656943 100644 --- a/components/gcp/dataproc/delete_cluster/README.md +++ b/components/gcp/dataproc/delete_cluster/README.md @@ -54,7 +54,7 @@ KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.14/kfp.tar import kfp.components as comp dataproc_delete_cluster_op = comp.load_component_from_url( - 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/dataproc/delete_cluster/component.yaml') + 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataproc/delete_cluster/component.yaml') help(dataproc_delete_cluster_op) ``` diff --git a/components/gcp/dataproc/delete_cluster/component.yaml b/components/gcp/dataproc/delete_cluster/component.yaml index 30cb279949f..9cbef1fbf3e 100644 --- a/components/gcp/dataproc/delete_cluster/component.yaml +++ b/components/gcp/dataproc/delete_cluster/component.yaml @@ -33,7 +33,7 @@ inputs: type: Integer implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-gcp:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-gcp:b0147bdbed9f25212408e0468a475289e80e0406 args: [ kfp_component.google.dataproc, delete_cluster, --project_id, {inputValue: project_id}, diff --git a/components/gcp/dataproc/delete_cluster/sample.ipynb b/components/gcp/dataproc/delete_cluster/sample.ipynb index d0de6367956..24f14bf1111 100644 --- a/components/gcp/dataproc/delete_cluster/sample.ipynb +++ b/components/gcp/dataproc/delete_cluster/sample.ipynb @@ -73,7 +73,7 @@ "import kfp.components as comp\n", "\n", "dataproc_delete_cluster_op = comp.load_component_from_url(\n", - " 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/dataproc/delete_cluster/component.yaml')\n", + " 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataproc/delete_cluster/component.yaml')\n", "help(dataproc_delete_cluster_op)" ] }, diff --git a/components/gcp/dataproc/submit_hadoop_job/README.md b/components/gcp/dataproc/submit_hadoop_job/README.md index d1ae5d3c975..2a8e545b53f 100644 --- a/components/gcp/dataproc/submit_hadoop_job/README.md +++ b/components/gcp/dataproc/submit_hadoop_job/README.md @@ -72,7 +72,7 @@ KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.14/kfp.tar import kfp.components as comp dataproc_submit_hadoop_job_op = comp.load_component_from_url( - 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/dataproc/submit_hadoop_job/component.yaml') + 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataproc/submit_hadoop_job/component.yaml') help(dataproc_submit_hadoop_job_op) ``` diff --git a/components/gcp/dataproc/submit_hadoop_job/component.yaml b/components/gcp/dataproc/submit_hadoop_job/component.yaml index 66533e97fbd..781ab883300 100644 --- a/components/gcp/dataproc/submit_hadoop_job/component.yaml +++ b/components/gcp/dataproc/submit_hadoop_job/component.yaml @@ -75,7 +75,7 @@ outputs: type: String implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-gcp:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-gcp:b0147bdbed9f25212408e0468a475289e80e0406 args: [ kfp_component.google.dataproc, submit_hadoop_job, --project_id, {inputValue: project_id}, diff --git a/components/gcp/dataproc/submit_hadoop_job/sample.ipynb b/components/gcp/dataproc/submit_hadoop_job/sample.ipynb index dc4b1230ebe..61651f3cf3d 100644 --- a/components/gcp/dataproc/submit_hadoop_job/sample.ipynb +++ b/components/gcp/dataproc/submit_hadoop_job/sample.ipynb @@ -90,7 +90,7 @@ "import kfp.components as comp\n", "\n", "dataproc_submit_hadoop_job_op = comp.load_component_from_url(\n", - " 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/dataproc/submit_hadoop_job/component.yaml')\n", + " 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataproc/submit_hadoop_job/component.yaml')\n", "help(dataproc_submit_hadoop_job_op)" ] }, diff --git a/components/gcp/dataproc/submit_hive_job/README.md b/components/gcp/dataproc/submit_hive_job/README.md index 86408569c5b..baeef7812a0 100644 --- a/components/gcp/dataproc/submit_hive_job/README.md +++ b/components/gcp/dataproc/submit_hive_job/README.md @@ -63,7 +63,7 @@ KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.14/kfp.tar import kfp.components as comp dataproc_submit_hive_job_op = comp.load_component_from_url( - 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/dataproc/submit_hive_job/component.yaml') + 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataproc/submit_hive_job/component.yaml') help(dataproc_submit_hive_job_op) ``` diff --git a/components/gcp/dataproc/submit_hive_job/component.yaml b/components/gcp/dataproc/submit_hive_job/component.yaml index 4d1a2505849..350467ef7cb 100644 --- a/components/gcp/dataproc/submit_hive_job/component.yaml +++ b/components/gcp/dataproc/submit_hive_job/component.yaml @@ -70,7 +70,7 @@ outputs: type: String implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-gcp:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-gcp:b0147bdbed9f25212408e0468a475289e80e0406 args: [ kfp_component.google.dataproc, submit_hive_job, --project_id, {inputValue: project_id}, diff --git a/components/gcp/dataproc/submit_hive_job/sample.ipynb b/components/gcp/dataproc/submit_hive_job/sample.ipynb index a03cd64d82a..2783d365473 100644 --- a/components/gcp/dataproc/submit_hive_job/sample.ipynb +++ b/components/gcp/dataproc/submit_hive_job/sample.ipynb @@ -81,7 +81,7 @@ "import kfp.components as comp\n", "\n", "dataproc_submit_hive_job_op = comp.load_component_from_url(\n", - " 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/dataproc/submit_hive_job/component.yaml')\n", + " 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataproc/submit_hive_job/component.yaml')\n", "help(dataproc_submit_hive_job_op)" ] }, diff --git a/components/gcp/dataproc/submit_pig_job/README.md b/components/gcp/dataproc/submit_pig_job/README.md index 70ead813b0e..c54ccbc4945 100644 --- a/components/gcp/dataproc/submit_pig_job/README.md +++ b/components/gcp/dataproc/submit_pig_job/README.md @@ -66,7 +66,7 @@ KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.14/kfp.tar import kfp.components as comp dataproc_submit_pig_job_op = comp.load_component_from_url( - 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/dataproc/submit_pig_job/component.yaml') + 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataproc/submit_pig_job/component.yaml') help(dataproc_submit_pig_job_op) ``` diff --git a/components/gcp/dataproc/submit_pig_job/component.yaml b/components/gcp/dataproc/submit_pig_job/component.yaml index 18ae945d145..711c8079cb1 100644 --- a/components/gcp/dataproc/submit_pig_job/component.yaml +++ b/components/gcp/dataproc/submit_pig_job/component.yaml @@ -70,7 +70,7 @@ outputs: type: String implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-gcp:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-gcp:b0147bdbed9f25212408e0468a475289e80e0406 args: [ kfp_component.google.dataproc, submit_pig_job, --project_id, {inputValue: project_id}, diff --git a/components/gcp/dataproc/submit_pig_job/sample.ipynb b/components/gcp/dataproc/submit_pig_job/sample.ipynb index b695b2eadaa..c55b404dcbb 100644 --- a/components/gcp/dataproc/submit_pig_job/sample.ipynb +++ b/components/gcp/dataproc/submit_pig_job/sample.ipynb @@ -84,7 +84,7 @@ "import kfp.components as comp\n", "\n", "dataproc_submit_pig_job_op = comp.load_component_from_url(\n", - " 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/dataproc/submit_pig_job/component.yaml')\n", + " 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataproc/submit_pig_job/component.yaml')\n", "help(dataproc_submit_pig_job_op)" ] }, diff --git a/components/gcp/dataproc/submit_pyspark_job/README.md b/components/gcp/dataproc/submit_pyspark_job/README.md index 7ba0533cb3e..ed13914d98d 100644 --- a/components/gcp/dataproc/submit_pyspark_job/README.md +++ b/components/gcp/dataproc/submit_pyspark_job/README.md @@ -67,7 +67,7 @@ KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.14/kfp.tar import kfp.components as comp dataproc_submit_pyspark_job_op = comp.load_component_from_url( - 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/dataproc/submit_pyspark_job/component.yaml') + 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataproc/submit_pyspark_job/component.yaml') help(dataproc_submit_pyspark_job_op) ``` diff --git a/components/gcp/dataproc/submit_pyspark_job/component.yaml b/components/gcp/dataproc/submit_pyspark_job/component.yaml index 476540f1d06..94a91a63660 100644 --- a/components/gcp/dataproc/submit_pyspark_job/component.yaml +++ b/components/gcp/dataproc/submit_pyspark_job/component.yaml @@ -64,7 +64,7 @@ outputs: type: String implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-gcp:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-gcp:b0147bdbed9f25212408e0468a475289e80e0406 args: [ kfp_component.google.dataproc, submit_pyspark_job, --project_id, {inputValue: project_id}, diff --git a/components/gcp/dataproc/submit_pyspark_job/sample.ipynb b/components/gcp/dataproc/submit_pyspark_job/sample.ipynb index f9f8bc09245..7a4f5f1583b 100644 --- a/components/gcp/dataproc/submit_pyspark_job/sample.ipynb +++ b/components/gcp/dataproc/submit_pyspark_job/sample.ipynb @@ -86,7 +86,7 @@ "import kfp.components as comp\n", "\n", "dataproc_submit_pyspark_job_op = comp.load_component_from_url(\n", - " 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/dataproc/submit_pyspark_job/component.yaml')\n", + " 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataproc/submit_pyspark_job/component.yaml')\n", "help(dataproc_submit_pyspark_job_op)" ] }, diff --git a/components/gcp/dataproc/submit_spark_job/README.md b/components/gcp/dataproc/submit_spark_job/README.md index 5cad85794b5..0855b1c2874 100644 --- a/components/gcp/dataproc/submit_spark_job/README.md +++ b/components/gcp/dataproc/submit_spark_job/README.md @@ -80,7 +80,7 @@ KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.14/kfp.tar import kfp.components as comp dataproc_submit_spark_job_op = comp.load_component_from_url( - 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/dataproc/submit_spark_job/component.yaml') + 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataproc/submit_spark_job/component.yaml') help(dataproc_submit_spark_job_op) ``` diff --git a/components/gcp/dataproc/submit_spark_job/component.yaml b/components/gcp/dataproc/submit_spark_job/component.yaml index e833f3444a9..cea2568b4e2 100644 --- a/components/gcp/dataproc/submit_spark_job/component.yaml +++ b/components/gcp/dataproc/submit_spark_job/component.yaml @@ -71,7 +71,7 @@ outputs: type: String implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-gcp:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-gcp:b0147bdbed9f25212408e0468a475289e80e0406 args: [ kfp_component.google.dataproc, submit_spark_job, --project_id, {inputValue: project_id}, diff --git a/components/gcp/dataproc/submit_spark_job/sample.ipynb b/components/gcp/dataproc/submit_spark_job/sample.ipynb index 3d2b79cdc42..3e19e6c303c 100644 --- a/components/gcp/dataproc/submit_spark_job/sample.ipynb +++ b/components/gcp/dataproc/submit_spark_job/sample.ipynb @@ -99,7 +99,7 @@ "import kfp.components as comp\n", "\n", "dataproc_submit_spark_job_op = comp.load_component_from_url(\n", - " 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/dataproc/submit_spark_job/component.yaml')\n", + " 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataproc/submit_spark_job/component.yaml')\n", "help(dataproc_submit_spark_job_op)" ] }, diff --git a/components/gcp/dataproc/submit_sparksql_job/README.md b/components/gcp/dataproc/submit_sparksql_job/README.md index 4b743859ad8..7367aea0efd 100644 --- a/components/gcp/dataproc/submit_sparksql_job/README.md +++ b/components/gcp/dataproc/submit_sparksql_job/README.md @@ -62,7 +62,7 @@ KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.14/kfp.tar import kfp.components as comp dataproc_submit_sparksql_job_op = comp.load_component_from_url( - 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/dataproc/submit_sparksql_job/component.yaml') + 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataproc/submit_sparksql_job/component.yaml') help(dataproc_submit_sparksql_job_op) ``` diff --git a/components/gcp/dataproc/submit_sparksql_job/component.yaml b/components/gcp/dataproc/submit_sparksql_job/component.yaml index d5a0d13bf66..b6f6ac2b275 100644 --- a/components/gcp/dataproc/submit_sparksql_job/component.yaml +++ b/components/gcp/dataproc/submit_sparksql_job/component.yaml @@ -70,7 +70,7 @@ outputs: type: String implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-gcp:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-gcp:b0147bdbed9f25212408e0468a475289e80e0406 args: [ kfp_component.google.dataproc, submit_sparksql_job, --project_id, {inputValue: project_id}, diff --git a/components/gcp/dataproc/submit_sparksql_job/sample.ipynb b/components/gcp/dataproc/submit_sparksql_job/sample.ipynb index 7e1ec4b84e8..51d029c1d76 100644 --- a/components/gcp/dataproc/submit_sparksql_job/sample.ipynb +++ b/components/gcp/dataproc/submit_sparksql_job/sample.ipynb @@ -81,7 +81,7 @@ "import kfp.components as comp\n", "\n", "dataproc_submit_sparksql_job_op = comp.load_component_from_url(\n", - " 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/dataproc/submit_sparksql_job/component.yaml')\n", + " 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataproc/submit_sparksql_job/component.yaml')\n", "help(dataproc_submit_sparksql_job_op)" ] }, diff --git a/components/gcp/ml_engine/batch_predict/README.md b/components/gcp/ml_engine/batch_predict/README.md index c6674458606..ebfc0af1fa7 100644 --- a/components/gcp/ml_engine/batch_predict/README.md +++ b/components/gcp/ml_engine/batch_predict/README.md @@ -94,7 +94,7 @@ KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.14/kfp.tar import kfp.components as comp mlengine_batch_predict_op = comp.load_component_from_url( - 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/ml_engine/batch_predict/component.yaml') + 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/ml_engine/batch_predict/component.yaml') help(mlengine_batch_predict_op) ``` diff --git a/components/gcp/ml_engine/batch_predict/component.yaml b/components/gcp/ml_engine/batch_predict/component.yaml index d08290b2f53..0677cf50a81 100644 --- a/components/gcp/ml_engine/batch_predict/component.yaml +++ b/components/gcp/ml_engine/batch_predict/component.yaml @@ -64,7 +64,7 @@ outputs: type: String implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-gcp:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-gcp:b0147bdbed9f25212408e0468a475289e80e0406 args: [ kfp_component.google.ml_engine, batch_predict, --project_id, {inputValue: project_id}, diff --git a/components/gcp/ml_engine/batch_predict/sample.ipynb b/components/gcp/ml_engine/batch_predict/sample.ipynb index 92985e1b112..2596b111b7f 100644 --- a/components/gcp/ml_engine/batch_predict/sample.ipynb +++ b/components/gcp/ml_engine/batch_predict/sample.ipynb @@ -112,7 +112,7 @@ "import kfp.components as comp\n", "\n", "mlengine_batch_predict_op = comp.load_component_from_url(\n", - " 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/ml_engine/batch_predict/component.yaml')\n", + " 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/ml_engine/batch_predict/component.yaml')\n", "help(mlengine_batch_predict_op)" ] }, diff --git a/components/gcp/ml_engine/deploy/README.md b/components/gcp/ml_engine/deploy/README.md index de191af2c78..b36511b8ed6 100644 --- a/components/gcp/ml_engine/deploy/README.md +++ b/components/gcp/ml_engine/deploy/README.md @@ -110,7 +110,7 @@ KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.14/kfp.tar import kfp.components as comp mlengine_deploy_op = comp.load_component_from_url( - 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/ml_engine/deploy/component.yaml') + 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/ml_engine/deploy/component.yaml') help(mlengine_deploy_op) ``` diff --git a/components/gcp/ml_engine/deploy/component.yaml b/components/gcp/ml_engine/deploy/component.yaml index 51b2208e85a..1b92e1fc7e1 100644 --- a/components/gcp/ml_engine/deploy/component.yaml +++ b/components/gcp/ml_engine/deploy/component.yaml @@ -90,7 +90,7 @@ outputs: type: String implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-gcp:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-gcp:b0147bdbed9f25212408e0468a475289e80e0406 args: [ kfp_component.google.ml_engine, deploy, --model_uri, {inputValue: model_uri}, diff --git a/components/gcp/ml_engine/deploy/sample.ipynb b/components/gcp/ml_engine/deploy/sample.ipynb index 1d3926a83ce..253bfdacb2c 100644 --- a/components/gcp/ml_engine/deploy/sample.ipynb +++ b/components/gcp/ml_engine/deploy/sample.ipynb @@ -128,7 +128,7 @@ "import kfp.components as comp\n", "\n", "mlengine_deploy_op = comp.load_component_from_url(\n", - " 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/ml_engine/deploy/component.yaml')\n", + " 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/ml_engine/deploy/component.yaml')\n", "help(mlengine_deploy_op)" ] }, diff --git a/components/gcp/ml_engine/train/README.md b/components/gcp/ml_engine/train/README.md index 0322cfc0a83..9b1d1510792 100644 --- a/components/gcp/ml_engine/train/README.md +++ b/components/gcp/ml_engine/train/README.md @@ -86,7 +86,7 @@ KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.14/kfp.tar import kfp.components as comp mlengine_train_op = comp.load_component_from_url( - 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/ml_engine/train/component.yaml') + 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/ml_engine/train/component.yaml') help(mlengine_train_op) ``` diff --git a/components/gcp/ml_engine/train/component.yaml b/components/gcp/ml_engine/train/component.yaml index de54d9802cc..8803b545b62 100644 --- a/components/gcp/ml_engine/train/component.yaml +++ b/components/gcp/ml_engine/train/component.yaml @@ -98,7 +98,7 @@ outputs: type: GCSPath implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-gcp:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-gcp:b0147bdbed9f25212408e0468a475289e80e0406 args: [ kfp_component.google.ml_engine, train, --project_id, {inputValue: project_id}, diff --git a/components/gcp/ml_engine/train/sample.ipynb b/components/gcp/ml_engine/train/sample.ipynb index 718c73dccbd..be793ce0a44 100644 --- a/components/gcp/ml_engine/train/sample.ipynb +++ b/components/gcp/ml_engine/train/sample.ipynb @@ -104,7 +104,7 @@ "import kfp.components as comp\n", "\n", "mlengine_train_op = comp.load_component_from_url(\n", - " 'https://raw.githubusercontent.com/kubeflow/pipelines/d2f5cc92a46012b9927209e2aaccab70961582dc/components/gcp/ml_engine/train/component.yaml')\n", + " 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/ml_engine/train/component.yaml')\n", "help(mlengine_train_op)" ] }, diff --git a/components/kubeflow/deployer/component.yaml b/components/kubeflow/deployer/component.yaml index 2b24ac2f21f..0518499d304 100644 --- a/components/kubeflow/deployer/component.yaml +++ b/components/kubeflow/deployer/component.yaml @@ -11,7 +11,7 @@ inputs: # - {name: Endppoint URI, type: Serving URI, description: 'URI of the deployed prediction service..'} implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-kubeflow-deployer:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-kubeflow-deployer:b0147bdbed9f25212408e0468a475289e80e0406 command: [/bin/deploy.sh] args: [ --model-export-path, {inputValue: Model dir}, diff --git a/components/kubeflow/dnntrainer/component.yaml b/components/kubeflow/dnntrainer/component.yaml index ba84c326f85..eafa3fea6c6 100644 --- a/components/kubeflow/dnntrainer/component.yaml +++ b/components/kubeflow/dnntrainer/component.yaml @@ -15,7 +15,7 @@ outputs: - {name: Training output dir, type: GCSPath, description: 'GCS or local directory.'} # type: {GCSPath: {path_type: Directory}} implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:b0147bdbed9f25212408e0468a475289e80e0406 command: [python2, -m, trainer.task] args: [ --transformed-data-dir, {inputValue: Transformed data dir}, diff --git a/components/kubeflow/launcher/kubeflow_tfjob_launcher_op.py b/components/kubeflow/launcher/kubeflow_tfjob_launcher_op.py index 526b2b5348d..f992a93d52c 100644 --- a/components/kubeflow/launcher/kubeflow_tfjob_launcher_op.py +++ b/components/kubeflow/launcher/kubeflow_tfjob_launcher_op.py @@ -17,7 +17,7 @@ def kubeflow_tfjob_launcher_op(container_image, command, number_of_workers: int, number_of_parameter_servers: int, tfjob_timeout_minutes: int, output_dir=None, step_name='TFJob-launcher'): return dsl.ContainerOp( name = step_name, - image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf:e20fad3e161e88226c83437271adb063221459b9', + image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf:b0147bdbed9f25212408e0468a475289e80e0406', arguments = [ '--workers', number_of_workers, '--pss', number_of_parameter_servers, diff --git a/components/kubeflow/launcher/src/train.template.yaml b/components/kubeflow/launcher/src/train.template.yaml index 4a03e2c3c2c..f41e2bf13a3 100644 --- a/components/kubeflow/launcher/src/train.template.yaml +++ b/components/kubeflow/launcher/src/train.template.yaml @@ -26,7 +26,7 @@ spec: spec: containers: - name: tensorflow - image: gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:b0147bdbed9f25212408e0468a475289e80e0406 command: - python - -m @@ -49,7 +49,7 @@ spec: spec: containers: - name: tensorflow - image: gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:b0147bdbed9f25212408e0468a475289e80e0406 command: - python - -m @@ -72,7 +72,7 @@ spec: spec: containers: - name: tensorflow - image: gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:b0147bdbed9f25212408e0468a475289e80e0406 command: - python - -m diff --git a/components/local/confusion_matrix/component.yaml b/components/local/confusion_matrix/component.yaml index 324d977d33a..96703dfb984 100644 --- a/components/local/confusion_matrix/component.yaml +++ b/components/local/confusion_matrix/component.yaml @@ -9,7 +9,7 @@ inputs: # - {name: Metrics, type: Metrics} implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-local-confusion-matrix:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-local-confusion-matrix:b0147bdbed9f25212408e0468a475289e80e0406 command: [python2, /ml/confusion_matrix.py] args: [ --predictions, {inputValue: Predictions}, diff --git a/components/local/roc/component.yaml b/components/local/roc/component.yaml index dc889d36ead..24297d79424 100644 --- a/components/local/roc/component.yaml +++ b/components/local/roc/component.yaml @@ -11,7 +11,7 @@ inputs: # - {name: Metrics, type: Metrics} implementation: container: - image: gcr.io/ml-pipeline/ml-pipeline-local-confusion-matrix:e20fad3e161e88226c83437271adb063221459b9 + image: gcr.io/ml-pipeline/ml-pipeline-local-confusion-matrix:b0147bdbed9f25212408e0468a475289e80e0406 command: [python2, /ml/roc.py] args: [ --predictions, {inputValue: Predictions dir}, diff --git a/samples/kubeflow-tf/kubeflow-training-classification.py b/samples/kubeflow-tf/kubeflow-training-classification.py index dea5957bd8e..ea19ea2f665 100755 --- a/samples/kubeflow-tf/kubeflow-training-classification.py +++ b/samples/kubeflow-tf/kubeflow-training-classification.py @@ -19,10 +19,10 @@ from kfp import dsl from kfp import gcp -dataflow_tf_transform_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/dataflow/tft/component.yaml') -kubeflow_tf_training_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/kubeflow/dnntrainer/component.yaml') -dataflow_tf_predict_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/dataflow/predict/component.yaml') -confusion_matrix_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/local/confusion_matrix/component.yaml') +dataflow_tf_transform_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/dataflow/tft/component.yaml') +kubeflow_tf_training_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/kubeflow/dnntrainer/component.yaml') +dataflow_tf_predict_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/dataflow/predict/component.yaml') +confusion_matrix_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/local/confusion_matrix/component.yaml') @dsl.pipeline( name='TF training and prediction pipeline', @@ -68,7 +68,7 @@ def kubeflow_training(output, project, ).apply(gcp.use_gcp_secret('user-gcp-sa')) if use_gpu: - training.image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer-gpu:e20fad3e161e88226c83437271adb063221459b9', + training.image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer-gpu:b0147bdbed9f25212408e0468a475289e80e0406', training.set_gpu_limit(1) prediction = dataflow_tf_predict_op( diff --git a/samples/notebooks/KubeFlow Pipeline Using TFX OSS Components.ipynb b/samples/notebooks/KubeFlow Pipeline Using TFX OSS Components.ipynb index f3434ee577f..96fb33c3f47 100644 --- a/samples/notebooks/KubeFlow Pipeline Using TFX OSS Components.ipynb +++ b/samples/notebooks/KubeFlow Pipeline Using TFX OSS Components.ipynb @@ -44,13 +44,13 @@ "EVAL_DATA = 'gs://ml-pipeline-playground/tfx/taxi-cab-classification/eval.csv'\n", "HIDDEN_LAYER_SIZE = '1500'\n", "STEPS = 3000\n", - "DATAFLOW_TFDV_IMAGE = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tfdv:e20fad3e161e88226c83437271adb063221459b9'\n", - "DATAFLOW_TFT_IMAGE = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tft:e20fad3e161e88226c83437271adb063221459b9'\n", - "DATAFLOW_TFMA_IMAGE = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tfma:e20fad3e161e88226c83437271adb063221459b9'\n", - "DATAFLOW_TF_PREDICT_IMAGE = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:e20fad3e161e88226c83437271adb063221459b9'\n", - "KUBEFLOW_TF_TRAINER_IMAGE = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:e20fad3e161e88226c83437271adb063221459b9'\n", - "KUBEFLOW_TF_TRAINER_GPU_IMAGE = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer-gpu:e20fad3e161e88226c83437271adb063221459b9'\n", - "KUBEFLOW_DEPLOYER_IMAGE = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-deployer:e20fad3e161e88226c83437271adb063221459b9'\n", + "DATAFLOW_TFDV_IMAGE = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tfdv:b0147bdbed9f25212408e0468a475289e80e0406'\n", + "DATAFLOW_TFT_IMAGE = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tft:b0147bdbed9f25212408e0468a475289e80e0406'\n", + "DATAFLOW_TFMA_IMAGE = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tfma:b0147bdbed9f25212408e0468a475289e80e0406'\n", + "DATAFLOW_TF_PREDICT_IMAGE = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:b0147bdbed9f25212408e0468a475289e80e0406'\n", + "KUBEFLOW_TF_TRAINER_IMAGE = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:b0147bdbed9f25212408e0468a475289e80e0406'\n", + "KUBEFLOW_TF_TRAINER_GPU_IMAGE = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer-gpu:b0147bdbed9f25212408e0468a475289e80e0406'\n", + "KUBEFLOW_DEPLOYER_IMAGE = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-deployer:b0147bdbed9f25212408e0468a475289e80e0406'\n", "DEPLOYER_MODEL = 'notebook_tfx_taxi'\n", "DEPLOYER_VERSION_DEV = 'dev'\n", "DEPLOYER_VERSION_PROD = 'prod'\n", diff --git a/samples/resnet-cmle/resnet-train-pipeline.py b/samples/resnet-cmle/resnet-train-pipeline.py index 04be110ec42..95552fba889 100644 --- a/samples/resnet-cmle/resnet-train-pipeline.py +++ b/samples/resnet-cmle/resnet-train-pipeline.py @@ -22,11 +22,11 @@ import os dataflow_python_op = comp.load_component_from_url( - 'https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/gcp/dataflow/launch_python/component.yaml') + 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/dataflow/launch_python/component.yaml') cloudml_train_op = comp.load_component_from_url( - 'https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/gcp/ml_engine/train/component.yaml') + 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/ml_engine/train/component.yaml') cloudml_deploy_op = comp.load_component_from_url( - 'https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/gcp/ml_engine/deploy/component.yaml') + 'https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/gcp/ml_engine/deploy/component.yaml') def resnet_preprocess_op(project_id: 'GcpProject', output: 'GcsUri', staging_dir: 'GcsUri', train_csv: 'GcsUri[text/csv]', diff --git a/samples/tfx/taxi-cab-classification-pipeline.py b/samples/tfx/taxi-cab-classification-pipeline.py index 2771cc51062..bf0834b28ff 100755 --- a/samples/tfx/taxi-cab-classification-pipeline.py +++ b/samples/tfx/taxi-cab-classification-pipeline.py @@ -20,16 +20,16 @@ from kfp import gcp -dataflow_tf_data_validation_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/dataflow/tfdv/component.yaml') -dataflow_tf_transform_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/dataflow/tft/component.yaml') -tf_train_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/kubeflow/dnntrainer/component.yaml') -dataflow_tf_model_analyze_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/dataflow/tfma/component.yaml') -dataflow_tf_predict_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/dataflow/predict/component.yaml') +dataflow_tf_data_validation_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/dataflow/tfdv/component.yaml') +dataflow_tf_transform_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/dataflow/tft/component.yaml') +tf_train_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/kubeflow/dnntrainer/component.yaml') +dataflow_tf_model_analyze_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/dataflow/tfma/component.yaml') +dataflow_tf_predict_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/dataflow/predict/component.yaml') -confusion_matrix_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/local/confusion_matrix/component.yaml') -roc_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/local/roc/component.yaml') +confusion_matrix_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/local/confusion_matrix/component.yaml') +roc_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/local/roc/component.yaml') -kubeflow_deploy_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/kubeflow/deployer/component.yaml') +kubeflow_deploy_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/kubeflow/deployer/component.yaml') @dsl.pipeline( diff --git a/samples/xgboost-spark/xgboost-training-cm.py b/samples/xgboost-spark/xgboost-training-cm.py index 636a4c62531..2ce5cb58dea 100755 --- a/samples/xgboost-spark/xgboost-training-cm.py +++ b/samples/xgboost-spark/xgboost-training-cm.py @@ -20,8 +20,8 @@ from kfp import dsl from kfp import gcp -confusion_matrix_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/local/confusion_matrix/component.yaml') -roc_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/785d474699cffb7463986b9abc4b1fbe03796cb6/components/local/roc/component.yaml') +confusion_matrix_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/local/confusion_matrix/component.yaml') +roc_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e8524eefb138725fc06600d1956da0f4dd477178/components/local/roc/component.yaml') # ! Please do not forget to enable the Dataproc API in your cluster https://console.developers.google.com/apis/api/dataproc.googleapis.com/overview @@ -36,7 +36,7 @@ def dataproc_create_cluster_op( ): return dsl.ContainerOp( name='Dataproc - Create cluster', - image='gcr.io/ml-pipeline/ml-pipeline-dataproc-create-cluster:e20fad3e161e88226c83437271adb063221459b9', + image='gcr.io/ml-pipeline/ml-pipeline-dataproc-create-cluster:b0147bdbed9f25212408e0468a475289e80e0406', arguments=[ '--project', project, '--region', region, @@ -56,7 +56,7 @@ def dataproc_delete_cluster_op( ): return dsl.ContainerOp( name='Dataproc - Delete cluster', - image='gcr.io/ml-pipeline/ml-pipeline-dataproc-delete-cluster:e20fad3e161e88226c83437271adb063221459b9', + image='gcr.io/ml-pipeline/ml-pipeline-dataproc-delete-cluster:b0147bdbed9f25212408e0468a475289e80e0406', arguments=[ '--project', project, '--region', region, @@ -76,7 +76,7 @@ def dataproc_analyze_op( ): return dsl.ContainerOp( name='Dataproc - Analyze', - image='gcr.io/ml-pipeline/ml-pipeline-dataproc-analyze:e20fad3e161e88226c83437271adb063221459b9', + image='gcr.io/ml-pipeline/ml-pipeline-dataproc-analyze:b0147bdbed9f25212408e0468a475289e80e0406', arguments=[ '--project', project, '--region', region, @@ -103,7 +103,7 @@ def dataproc_transform_op( ): return dsl.ContainerOp( name='Dataproc - Transform', - image='gcr.io/ml-pipeline/ml-pipeline-dataproc-transform:e20fad3e161e88226c83437271adb063221459b9', + image='gcr.io/ml-pipeline/ml-pipeline-dataproc-transform:b0147bdbed9f25212408e0468a475289e80e0406', arguments=[ '--project', project, '--region', region, @@ -141,7 +141,7 @@ def dataproc_train_op( return dsl.ContainerOp( name='Dataproc - Train XGBoost model', - image='gcr.io/ml-pipeline/ml-pipeline-dataproc-train:e20fad3e161e88226c83437271adb063221459b9', + image='gcr.io/ml-pipeline/ml-pipeline-dataproc-train:b0147bdbed9f25212408e0468a475289e80e0406', arguments=[ '--project', project, '--region', region, @@ -174,7 +174,7 @@ def dataproc_predict_op( ): return dsl.ContainerOp( name='Dataproc - Predict with XGBoost model', - image='gcr.io/ml-pipeline/ml-pipeline-dataproc-predict:e20fad3e161e88226c83437271adb063221459b9', + image='gcr.io/ml-pipeline/ml-pipeline-dataproc-predict:b0147bdbed9f25212408e0468a475289e80e0406', arguments=[ '--project', project, '--region', region, diff --git a/sdk/python/setup.py b/sdk/python/setup.py index cff12e11a8d..3f61ad52047 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -15,7 +15,7 @@ from setuptools import setup NAME = 'kfp' -VERSION = '0.1.16' +VERSION = '0.1.18' REQUIRES = [ 'urllib3>=1.15,<1.25', #Fixing the version conflict with the "requests" package