forked from kubeflow/pipelines
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(components): Added the Run notebook using papermill component. F…
…ixes kubeflow#497 (kubeflow#4578) * Components - Added the Run notebook using papermill component Fixes kubeflow#497 * Added a notebook to be used in samples * Added the sample pipeline
- Loading branch information
Showing
3 changed files
with
152 additions
and
0 deletions.
There are no files selected for viewing
50 changes: 50 additions & 0 deletions
50
components/notebooks/Run_notebook_using_papermill/component.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
name: Run notebook using papermill | ||
description: | | ||
Run Jupyter notebook using papermill. | ||
The notebook will receive the parameter values passed to it as well as the INPUT_DATA_PATH and OUTPUT_DATA_PATH variables that will be set to the input data path (if provided) and directory for the optional output data. | ||
inputs: | ||
- {name: Notebook, type: JupyterNotebook, description: 'Notebook to execute.'} | ||
- {name: Parameters, type: JsonObject, default: '{}', description: 'Map with notebook paramater values.'} | ||
- {name: Packages to install, type: JsonArray, default: '', description: 'Python packages to install'} | ||
- {name: Input data, optional: true, description: 'Optional data that can be passed to notebook. In notebook, the INPUT_DATA_PATH variable will point to the data (if passed).'} | ||
outputs: | ||
- {name: Notebook, type: JupyterNotebook, description: 'Executed notebook.'} | ||
- {name: Output data, description: 'Directory with any output data. In notebook, the OUTPUT_DATA_PATH variable will point to this directory, so that the notebook can write output data there.'} | ||
metadata: | ||
annotations: | ||
author: Alexey Volkov <alexey.volkov@ark-kun.com> | ||
implementation: | ||
container: | ||
image: python:3.7 | ||
command: | ||
- sh | ||
- -exc | ||
- | | ||
input_notebook_path="$0" | ||
output_notebook_path="$1" | ||
arguments="$2" | ||
packages_to_install="$3" | ||
input_data_path="$4" | ||
output_data_path="$5" | ||
mkdir -p "$(dirname "$output_notebook_path")" | ||
mkdir -p "$output_data_path" | ||
# Converting packages_to_install from JSON to command-line arguments | ||
packages_to_install=$(echo "$packages_to_install" | sed -E -e 's/^\[//' -e 's/]$//' -e 's/",/" /g' -e "s/\"/'/g") | ||
# Installing packages | ||
sh -c "python3 -m pip install --upgrade --quiet jupyter papermill==2.2.0 ${packages_to_install}" | ||
# Running the notebook using papermill | ||
papermill --parameters_yaml "$arguments" --parameters INPUT_DATA_PATH "$input_data_path" --parameters OUTPUT_DATA_PATH "$output_data_path" "$input_notebook_path" "$output_notebook_path" | ||
- {inputPath: Notebook} | ||
- {outputPath: Notebook} | ||
- {inputValue: Parameters} | ||
- if: | ||
cond: {isPresent: Packages to install} | ||
then: [{inputValue: Packages to install}] | ||
else: "{}" | ||
- if: | ||
cond: {isPresent: Input data} | ||
then: [{inputPath: Input data}] | ||
else: "" | ||
- {outputPath: Output data} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
kfp_endpoint = None | ||
|
||
import kfp | ||
from kfp import components | ||
|
||
download_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/240543e483076ae718f82c6f280441daa2f041fd/components/web/Download/component.yaml') | ||
run_notebook_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/4ebce5f643b6af5639053ea7eaed52b02bf7e928/components/notebooks/Run_notebook_using_papermill/component.yaml') | ||
|
||
def notebook_pipeline(): | ||
notebook = download_op('https://raw.githubusercontent.com/kubeflow/pipelines/93fc34474bf989998cf19445149aca2847eee763/components/notebooks/samples/test_notebook.ipynb').output | ||
|
||
run_notebook_op( | ||
notebook=notebook, | ||
parameters={'param1': 'value 1'}, | ||
input_data="Optional. Pass output of any component here. Can be a directory.", | ||
packages_to_install=["matplotlib"], | ||
) | ||
|
||
if __name__ == '__main__': | ||
pipelin_run = kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func(notebook_pipeline, arguments={}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
{ | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.8" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2, | ||
"cells": [ | ||
{ | ||
"source": [ | ||
"# Parameters\n", | ||
"INPUT_DATA_PATH = INPUT_DATA_PATH or \"\"\n", | ||
"OUTPUT_DATA_PATH = OUTPUT_DATA_PATH or \"\"" | ||
], | ||
"cell_type": "code", | ||
"metadata": {}, | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# Show the parameter values\n", | ||
"print('INPUT_DATA_PATH = ' + INPUT_DATA_PATH)\n", | ||
"print('OUTPUT_DATA_PATH = ' + OUTPUT_DATA_PATH)\n", | ||
"print('locals() = ' + str(locals()))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Checking the input data\n", | ||
"import os\n", | ||
"\n", | ||
"if INPUT_DATA_PATH:\n", | ||
" if os.path.isdir(INPUT_DATA_PATH):\n", | ||
" print('os.listdir(INPUT_DATA_PATH):')\n", | ||
" print(os.listdir(INPUT_DATA_PATH))\n", | ||
" if os.path.isfile(INPUT_DATA_PATH):\n", | ||
" print('os.stat(INPUT_DATA_PATH):')\n", | ||
" print(os.stat(INPUT_DATA_PATH))\n", | ||
"else:\n", | ||
" print('INPUT_DATA_PATH is empty')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Writing some output data\n", | ||
"from pathlib import Path\n", | ||
"\n", | ||
"(Path(OUTPUT_DATA_PATH) / 'output.txt').write_text(\"Hello world!\")" | ||
] | ||
} | ||
] | ||
} |