diff --git a/RELEASE.md b/RELEASE.md index 6f18db1fe1..cc6032a7d1 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -10,12 +10,19 @@ # Upcoming Release 0.18.14 ## Major features and improvements +* Allowed using of custom cookiecutter templates for creating pipelines with `--template` flag for `kedro pipeline create` or via `template/pipeline` folder. + ## Bug fixes and other changes * Updated dataset factories to resolve nested catalog config properly. ## Documentation changes ## Breaking changes to the API ## Upcoming deprecations for Kedro 0.19.0 +## Community contributions +Many thanks to the following Kedroids for contributing PRs to this release: + +* [Jason Hite](https://github.com/jasonmhite) + # Release 0.18.13 diff --git a/docs/source/nodes_and_pipelines/modular_pipelines.md b/docs/source/nodes_and_pipelines/modular_pipelines.md index 5064ae7b87..09c0d5b54e 100644 --- a/docs/source/nodes_and_pipelines/modular_pipelines.md +++ b/docs/source/nodes_and_pipelines/modular_pipelines.md @@ -59,7 +59,6 @@ Running the `kedro pipeline create` command adds boilerplate folders and files f │ └── pipelines │ ├── __init__.py │ └── {{pipeline_name}} <-- This folder defines the modular pipeline - │ ├── README.md <-- Pipeline-specific documentation │ ├── __init__.py <-- So that Python treats this pipeline as a module │ ├── nodes.py <-- To declare your nodes │ └── pipeline.py <-- To structure the pipeline itself @@ -77,6 +76,36 @@ Running the `kedro pipeline create` command adds boilerplate folders and files f If you want to do the reverse and remove a modular pipeline, you can use ``kedro pipeline delete `` to do so. +### Custom templates + +If you want to generate a pipeline with a custom Cookiecutter template, you can save it in `/templates/pipeline`. +The `kedro pipeline create` command will pick up the custom template in your project as the default. You can also specify the path to your custom +Cookiecutter pipeline template with the `--template` flag like this: +```bash +kedro pipeline create --template +``` +A template folder passed to `kedro pipeline create` using the `--template` argument will take precedence over any local templates. +Kedro supports having a single pipeline template in your project. If you need to have multiple pipeline templates, consider saving them in a +separate folder and pointing to them with the `--template` flag. + +#### Creating custom templates + +It is your responsibility to create functional Cookiecutter templates for custom modular pipelines. Please ensure you understand the +basic structure of a modular pipeline. Your template should render to a valid, importable Python module containing a +`create_pipeline` function at the top level that returns a `Pipeline` object. You will also need appropriate +`config` and `tests` subdirectories that will be copied to the project `config` and `tests` directories when the pipeline is created. +The `config` and `tests` directories need to follow the same layout as in the default template and cannot +be customised, although the contents of the parameters and actual test file can be changed. File and folder names or structure +do not matter beyond that and can be customised according to your needs. You can use [the +default template that Kedro](https://github.com/kedro-org/kedro/tree/main/kedro/templates/pipeline) uses as a starting point. + +Pipeline templates are rendered using [Cookiecutter](https://cookiecutter.readthedocs.io/), and must also contain a `cookiecutter.json` +See the [`cookiecutter.json` file in the Kedro default template](https://github.com/kedro-org/kedro/tree/main/kedro/templates/pipeline/cookiecutter.json) for an example. +It is important to note that if you are embedding your custom pipeline template within a +Kedro starter template, you must tell Cookiecutter not to render this template when creating a new project from the starter. To do this, +you must add [`_copy_without_render: ["templates"]`](https://cookiecutter.readthedocs.io/en/latest/advanced/copy_without_render.html) to the `cookiecutter.json` file for the starter +and not the `cookiecutter.json` for the pipeline template. + ### Ensuring portability Modular pipelines are shareable between Kedro codebases via [micro-packaging](micro_packaging.md), but you must follow a couple of rules to ensure portability: diff --git a/kedro/framework/cli/pipeline.py b/kedro/framework/cli/pipeline.py index d3d9b2d2fd..b3ddb46b6d 100644 --- a/kedro/framework/cli/pipeline.py +++ b/kedro/framework/cli/pipeline.py @@ -90,10 +90,17 @@ def pipeline(): is_flag=True, help="Skip creation of config files for the new pipeline(s).", ) +@click.option( + "template_path", + "-t", + "--template", + type=click.Path(file_okay=False, dir_okay=True, exists=True, path_type=Path), + help="Path to cookiecutter template to use for pipeline(s). Will override any local templates.", +) @env_option(help="Environment to create pipeline configuration in. Defaults to `base`.") @click.pass_obj # this will pass the metadata as first argument def create_pipeline( - metadata: ProjectMetadata, name, skip_config, env, **kwargs + metadata: ProjectMetadata, name, template_path, skip_config, env, **kwargs ): # noqa: unused-argument """Create a new modular pipeline by providing a name.""" package_dir = metadata.source_dir / metadata.package_name @@ -107,7 +114,19 @@ def create_pipeline( f"Make sure it exists in the project configuration." ) - result_path = _create_pipeline(name, package_dir / "pipelines") + # Precedence for template_path is: command line > project templates/pipeline dir > global default + # If passed on the CLI, click will verify that the path exists so no need to check again + if template_path is None: + # No path provided on the CLI, try `PROJECT_PATH/templates/pipeline` + template_path = Path(metadata.project_path / "templates" / "pipeline") + + if not template_path.exists(): + # and if that folder doesn't exist fall back to the global default + template_path = Path(kedro.__file__).parent / "templates" / "pipeline" + + click.secho(f"Using pipeline template at: '{template_path}'") + + result_path = _create_pipeline(name, template_path, package_dir / "pipelines") _copy_pipeline_tests(name, result_path, package_dir) _copy_pipeline_configs(result_path, project_conf_path, skip_config, env=env) click.secho(f"\nPipeline '{name}' was successfully created.\n", fg="green") @@ -191,12 +210,11 @@ def _echo_deletion_warning(message: str, **paths: list[Path]): click.echo(indent(paths_str, " " * 2)) -def _create_pipeline(name: str, output_dir: Path) -> Path: +def _create_pipeline(name: str, template_path: Path, output_dir: Path) -> Path: with _filter_deprecation_warnings(): # noqa: import-outside-toplevel from cookiecutter.main import cookiecutter - template_path = Path(kedro.__file__).parent / "templates" / "pipeline" cookie_context = {"pipeline_name": name, "kedro_version": kedro.__version__} click.echo(f"Creating the pipeline '{name}': ", nl=False) diff --git a/tests/framework/cli/pipeline/conftest.py b/tests/framework/cli/pipeline/conftest.py index f934ab6939..672fee3eb6 100644 --- a/tests/framework/cli/pipeline/conftest.py +++ b/tests/framework/cli/pipeline/conftest.py @@ -1,10 +1,24 @@ +import json import shutil +from pathlib import Path import pytest from kedro.framework.project import settings +def _write_json(filepath: Path, content: dict): + filepath.parent.mkdir(parents=True, exist_ok=True) + json_str = json.dumps(content, indent=4) + filepath.write_text(json_str) + + +def _write_dummy_file(filepath: Path, content: str = ""): + filepath.parent.mkdir(parents=True, exist_ok=True) + with filepath.open("w") as f: + f.write(content) + + @pytest.fixture(autouse=True) def cleanup_micropackages(fake_repo_path, fake_package_path): packages = {p.name for p in fake_package_path.iterdir() if p.is_dir()} @@ -82,3 +96,39 @@ def cleanup_pyproject_toml(fake_repo_path): yield pyproject_toml.write_text(existing_toml) + + +@pytest.fixture() +def fake_local_template_dir(fake_repo_path): + """Set up a local template directory. This won't be functional we're just testing the actual layout works. + + Note that this is not scoped to module because we don't want to have this folder present in most of the tests, + so we will tear it down every time. + """ + template_path = fake_repo_path / Path("templates") + pipeline_template_path = template_path / Path("pipeline") + cookiecutter_template_path = ( + pipeline_template_path / "{{ cookiecutter.pipeline_name }}" + ) + + cookiecutter_template_path.mkdir(parents=True) + + # Create the absolute bare minimum files + cookiecutter_json = { + "pipeline_name": "default", + } + _write_json(pipeline_template_path / "cookiecutter.json", cookiecutter_json) + _write_dummy_file( + cookiecutter_template_path / "pipeline_{{ cookiecutter.pipeline_name }}.py", + ) + _write_dummy_file(cookiecutter_template_path / "__init__.py", "") + _write_dummy_file( + cookiecutter_template_path + / r"config/parameters/{{ cookiecutter.pipeline_name }}.yml", + ) + _write_dummy_file( + cookiecutter_template_path / r"tests/test_{{ cookiecutter.pipeline_name }}.py", + ) + yield template_path.resolve() + + shutil.rmtree(template_path) diff --git a/tests/framework/cli/pipeline/test_pipeline.py b/tests/framework/cli/pipeline/test_pipeline.py index 0414e79656..2426a352af 100644 --- a/tests/framework/cli/pipeline/test_pipeline.py +++ b/tests/framework/cli/pipeline/test_pipeline.py @@ -79,6 +79,78 @@ def test_create_pipeline( # pylint: disable=too-many-locals actual_files = {f.name for f in test_dir.iterdir()} assert actual_files == expected_files + @pytest.mark.parametrize("env", [None, "local"]) + def test_create_pipeline_template( # pylint: disable=too-many-locals + self, + fake_repo_path, + fake_project_cli, + fake_metadata, + env, + fake_package_path, + fake_local_template_dir, + ): + pipelines_dir = fake_package_path / "pipelines" + assert pipelines_dir.is_dir() + + assert not (pipelines_dir / PIPELINE_NAME).exists() + + cmd = ["pipeline", "create", PIPELINE_NAME] + cmd += ["-e", env] if env else [] + result = CliRunner().invoke(fake_project_cli, cmd, obj=fake_metadata) + + assert ( + f"Using pipeline template at: '{fake_repo_path / 'templates'}" + in result.output + ) + assert f"Creating the pipeline '{PIPELINE_NAME}': OK" in result.output + assert f"Location: '{pipelines_dir / PIPELINE_NAME}'" in result.output + assert f"Pipeline '{PIPELINE_NAME}' was successfully created." in result.output + + # Dummy pipeline rendered correctly + assert (pipelines_dir / PIPELINE_NAME / f"pipeline_{PIPELINE_NAME}.py").exists() + + assert result.exit_code == 0 + + @pytest.mark.parametrize("env", [None, "local"]) + def test_create_pipeline_template_command_line_override( # pylint: disable=too-many-locals + self, + fake_repo_path, + fake_project_cli, + fake_metadata, + env, + fake_package_path, + fake_local_template_dir, + ): + pipelines_dir = fake_package_path / "pipelines" + assert pipelines_dir.is_dir() + + assert not (pipelines_dir / PIPELINE_NAME).exists() + + # Rename the local template dir to something else so we know the command line flag is taking precedence + try: + # Can skip if already there but copytree has a dirs_exist_ok flag in >python 3.8 only + shutil.copytree(fake_local_template_dir, fake_repo_path / "local_templates") + except FileExistsError: + pass + + cmd = ["pipeline", "create", PIPELINE_NAME] + cmd += ["-t", str(fake_repo_path / "local_templates/pipeline")] + cmd += ["-e", env] if env else [] + result = CliRunner().invoke(fake_project_cli, cmd, obj=fake_metadata) + + assert ( + f"Using pipeline template at: '{fake_repo_path / 'local_templates'}" + in result.output + ) + assert f"Creating the pipeline '{PIPELINE_NAME}': OK" in result.output + assert f"Location: '{pipelines_dir / PIPELINE_NAME}'" in result.output + assert f"Pipeline '{PIPELINE_NAME}' was successfully created." in result.output + + # Dummy pipeline rendered correctly + assert (pipelines_dir / PIPELINE_NAME / f"pipeline_{PIPELINE_NAME}.py").exists() + + assert result.exit_code == 0 + @pytest.mark.parametrize("env", [None, "local"]) def test_create_pipeline_skip_config( self, fake_repo_path, fake_project_cli, fake_metadata, env