Skip to content

Commit

Permalink
Add an option to get an example pipeline (#3295)
Browse files Browse the repository at this point in the history
Add an option to get an example pipeline, fix and add new tests

---------

Signed-off-by: Dmitry Sorokin <dmd40in@gmail.com>
  • Loading branch information
DimedS committed Nov 22, 2023
1 parent f3fbce7 commit 9ea4ce8
Show file tree
Hide file tree
Showing 10 changed files with 221 additions and 54 deletions.
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ include LICENSE.md
include kedro/framework/project/default_logging.yml
include kedro/ipython/*.png
include kedro/ipython/*.svg
recursive-include templates *
recursive-include kedro/templates *
2 changes: 1 addition & 1 deletion RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Major features and improvements
* Dropped Python 3.7 support.
* Introduced add-ons to the `kedro new` CLI flow.
* Introduced add-ons and example to the `kedro new` CLI flow.
* The new spaceflights starters, `spaceflights-pandas`, `spaceflights-pandas-viz`, `spaceflights-pyspark`, and `spaceflights-pyspark-viz` can be used with the `kedro new` command with the `--starter` flag.
* Added the `--conf-source` option to `%reload_kedro`, allowing users to specify a source for project configuration.
* Added the functionality to choose a merging strategy for config files loaded with `OmegaConfigLoader`.
Expand Down
1 change: 1 addition & 0 deletions features/steps/cli_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def create_config_file(context):
config = {
"add_ons": "1-5",
"project_name": context.project_name,
"example_pipeline": "no",
"repo_name": context.project_name,
"output_dir": str(context.temp_dir),
"python_package": context.package_name,
Expand Down
88 changes: 66 additions & 22 deletions kedro/framework/cli/starters.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
This can be the path to a local directory, a URL to a remote VCS repository supported
by `cookiecutter` or one of the aliases listed in ``kedro starter list``.
"""
EXAMPLE_ARG_HELP = "Enter y to enable, n to disable the example pipeline."


@define(order=True)
Expand Down Expand Up @@ -125,6 +126,27 @@ class KedroStarterSpec: # noqa: too-few-public-methods
"7": "Kedro Viz",
}

VALIDATION_PATTERNS = {
"yes_no": {
"regex": r"(?i)^\s*(y|yes|n|no)\s*$",
"error_message": "|It must contain only y, n, YES, NO, case insensitive.",
}
}


def _validate_regex(pattern_name, text):
if not re.match(VALIDATION_PATTERNS[pattern_name]["regex"], text):
click.secho(
VALIDATION_PATTERNS[pattern_name]["error_message"],
fg="red",
err=True,
)
sys.exit(1)


def _parse_yes_no_to_bool(value):
return value.strip().lower() in ["y", "yes"] if value is not None else None


# noqa: missing-function-docstring
@click.group(context_settings=CONTEXT_SETTINGS, name="Kedro")
Expand All @@ -150,13 +172,15 @@ def starter():
@click.option("--directory", help=DIRECTORY_ARG_HELP)
@click.option("--addons", "-a", "selected_add_ons_flag", help=ADDON_ARG_HELP)
@click.option("--name", "-n", "project_name", help=NAME_ARG_HELP)
@click.option("--example", "-e", "example_pipeline", help=EXAMPLE_ARG_HELP)
def new( # noqa: PLR0913
config_path,
starter_alias,
selected_add_ons_flag,
project_name,
checkout,
directory,
example_pipeline, # This will be True or False
**kwargs,
):
"""Create a new kedro project."""
Expand Down Expand Up @@ -198,7 +222,7 @@ def new( # noqa: PLR0913

# Select which prompts will be displayed to the user based on which flags were selected.
prompts_required = _select_prompts_to_display(
prompts_required, selected_add_ons_flag, project_name
prompts_required, selected_add_ons_flag, project_name, example_pipeline
)

# We only need to make cookiecutter_context if interactive prompts are needed.
Expand All @@ -221,6 +245,7 @@ def new( # noqa: PLR0913
cookiecutter_context=cookiecutter_context,
selected_add_ons_flag=selected_add_ons_flag,
project_name=project_name,
example_pipeline=example_pipeline,
)

cookiecutter_args = _make_cookiecutter_args(
Expand Down Expand Up @@ -370,12 +395,13 @@ def _get_starters_dict() -> dict[str, KedroStarterSpec]:
return starter_specs


def _get_extra_context(
def _get_extra_context( # noqa: PLR0913
prompts_required: dict,
config_path: str,
cookiecutter_context: OrderedDict,
selected_add_ons_flag: str | None,
project_name: str | None,
example_pipeline: str | None,
) -> dict[str, str]:
"""Generates a config dictionary that will be passed to cookiecutter as `extra_context`, based
on CLI flags, user prompts, or a configuration file.
Expand Down Expand Up @@ -429,6 +455,14 @@ def _get_extra_context(
]
extra_context["add_ons"] = str(extra_context["add_ons"])

extra_context["example_pipeline"] = (
_parse_yes_no_to_bool(
example_pipeline
if example_pipeline is not None
else extra_context.get("example_pipeline", "no")
) # type: ignore
)

return extra_context


Expand Down Expand Up @@ -457,7 +491,10 @@ def _convert_addon_names_to_numbers(selected_add_ons_flag: str | None) -> str |


def _select_prompts_to_display(
prompts_required: dict, selected_add_ons_flag: str, project_name: str
prompts_required: dict,
selected_add_ons_flag: str,
project_name: str,
example_pipeline: str,
) -> dict:
"""Selects which prompts an user will receive when creating a new
Kedro project, based on what information was already made available
Expand All @@ -470,6 +507,8 @@ def _select_prompts_to_display(
or None in case the flag wasn't used.
project_name: a string containing the value for the --name flag, or
None in case the flag wasn't used.
example_pipeline: "Yes" or "No" for --example flag, or
None in case the flag wasn't used.
Returns:
the prompts_required dictionary, with all the redundant information removed.
Expand Down Expand Up @@ -505,6 +544,10 @@ def _select_prompts_to_display(
sys.exit(1)
del prompts_required["project_name"]

if example_pipeline is not None:
_validate_regex("yes_no", example_pipeline)
del prompts_required["example_pipeline"]

return prompts_required


Expand Down Expand Up @@ -577,23 +620,25 @@ def _fetch_config_from_user_prompts(

def fetch_template_based_on_add_ons(template_path, cookiecutter_args: dict[str, Any]):
extra_context = cookiecutter_args["extra_context"]
add_ons = extra_context.get("add_ons")
# If 'add_ons' or 'example_pipeline' are not specified in prompts.yml and not prompted in 'kedro new' options,
# default options will be used instead
add_ons = extra_context.get("add_ons", [])
example_pipeline = extra_context.get("example_pipeline", False)
starter_path = "git+https://github.com/kedro-org/kedro-starters.git"
if add_ons:
if "Pyspark" in add_ons and "Kedro Viz" in add_ons:
# Use the spaceflights-pyspark-viz starter if both Pyspark and Kedro Viz are chosen.
cookiecutter_args["directory"] = "spaceflights-pyspark-viz"
elif "Pyspark" in add_ons:
# Use the spaceflights-pyspark starter if only Pyspark is chosen.
cookiecutter_args["directory"] = "spaceflights-pyspark"
elif "Kedro Viz" in add_ons:
# Use the spaceflights-pandas-viz starter if only Kedro Viz is chosen.
cookiecutter_args["directory"] = "spaceflights-pandas-viz"
else:
# Use the default template path for any other combinations or if "none" is chosen.
starter_path = template_path
if "Pyspark" in add_ons and "Kedro Viz" in add_ons:
# Use the spaceflights-pyspark-viz starter if both Pyspark and Kedro Viz are chosen.
cookiecutter_args["directory"] = "spaceflights-pyspark-viz"
elif "Pyspark" in add_ons:
# Use the spaceflights-pyspark starter if only Pyspark is chosen.
cookiecutter_args["directory"] = "spaceflights-pyspark"
elif "Kedro Viz" in add_ons:
# Use the spaceflights-pandas-viz starter if only Kedro Viz is chosen.
cookiecutter_args["directory"] = "spaceflights-pandas-viz"
elif example_pipeline:
# Use spaceflights-pandas starter if example was selected, but PySpark or Viz wasn't
cookiecutter_args["directory"] = "spaceflights-pandas"
else:
# Use the default template path if add_ons is None, which can occur if there is no prompts.yml or its empty.
# Use the default template path for non Pyspark, Viz or example options:
starter_path = template_path
return starter_path

Expand Down Expand Up @@ -706,6 +751,7 @@ def _validate_config_file_inputs(config: dict[str, str]):

selected_add_ons = _parse_add_ons_input(input_add_ons)
_validate_selection(selected_add_ons)
_validate_regex("yes_no", config.get("example_pipeline", "no"))


def _validate_selection(add_ons: list[str]):
Expand Down Expand Up @@ -789,10 +835,8 @@ def _create_project(template_path: str, cookiecutter_args: dict[str, Any]):
)
add_ons = extra_context.get("add_ons")

# Only core template and spaceflight starters have configurable add-ons
if template_path == str(TEMPLATE_PATH) or (
add_ons and ("Pyspark" in add_ons or "Kedro Viz" in add_ons)
):
# we can use starters without add_ons:
if add_ons is not None:
if add_ons == "[]": # TODO: This should be a list
click.secho("\nYou have selected no add-ons")
else:
Expand Down
3 changes: 2 additions & 1 deletion kedro/templates/project/cookiecutter.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
"repo_name": "{{ cookiecutter.project_name.strip().replace(' ', '-').replace('_', '-').lower() }}",
"python_package": "{{ cookiecutter.project_name.strip().replace(' ', '_').replace('-', '_').lower() }}",
"kedro_version": "{{ cookiecutter.kedro_version }}",
"add_ons": "none"
"add_ons": "none",
"example_pipeline": "no"
}
3 changes: 2 additions & 1 deletion kedro/templates/project/hooks/post_gen_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ def main():

# Get the selected add-ons from cookiecutter
selected_add_ons = "{{ cookiecutter.add_ons }}"
example_pipeline = "{{ cookiecutter.example_pipeline }}"

# Handle template directories and requirements according to selected add-ons
setup_template_add_ons(selected_add_ons, requirements_file_path, pyproject_file_path, python_package_name)
setup_template_add_ons(selected_add_ons, requirements_file_path, pyproject_file_path, python_package_name, example_pipeline)

# Sort requirements.txt file in alphabetical order
sort_requirements(requirements_file_path)
Expand Down
23 changes: 9 additions & 14 deletions kedro/templates/project/hooks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,11 @@ def _remove_file(path: Path) -> None:
path.unlink()


def _handle_starter_setup(selected_add_ons_list: str, python_package_name: str) -> None:
def _remove_pyspark_viz_starter_files(is_viz: bool, python_package_name: str) -> None:
"""Clean up the unnecessary files in the starters template.
Args:
selected_add_ons_list (str): A string contains the selected add-ons.
is_viz (bool): if Viz included in starter, then need to remove "reporting" folder.
python_package_name (str): The name of the python package.
"""
# Remove all .csv and .xlsx files from data/01_raw/
Expand All @@ -129,11 +129,8 @@ def _handle_starter_setup(selected_add_ons_list: str, python_package_name: str)
for param_file in conf_base_path.glob(pattern):
_remove_file(param_file)

# Remove the pipelines subdirectories
if "Kedro Viz" in selected_add_ons_list: # Remove reporting if Kedro Viz is selected
pipelines_to_remove = ["data_science", "data_processing", "reporting"]
else:
pipelines_to_remove = ["data_science", "data_processing"]
# Remove the pipelines subdirectories, if Viz - also "reporting" folder
pipelines_to_remove = ["data_science", "data_processing"] + (["reporting"] if is_viz else [])

pipelines_path = current_dir / f"src/{python_package_name}/pipelines/"
for pipeline_subdir in pipelines_to_remove:
Expand All @@ -144,14 +141,15 @@ def _handle_starter_setup(selected_add_ons_list: str, python_package_name: str)
_remove_file(test_pipeline_path)


def setup_template_add_ons(selected_add_ons_list: str, requirements_file_path: str, pyproject_file_path: str, python_package_name: str) -> None:
def setup_template_add_ons(selected_add_ons_list: str, requirements_file_path: str, pyproject_file_path: str, python_package_name: str, example_pipeline: str) -> None:
"""Setup the templates according to the choice of add-ons.
Args:
selected_add_ons_list (str): A string contains the selected add-ons.
requirements_file_path (str): The path of the `requiremenets.txt` in the template.
pyproject_file_path (str): The path of the `pyproject.toml` in the template
python_package_name (str): The name of the python package.
example_pipeline (str): 'True' if example pipeline was selected
"""
if "Linting" not in selected_add_ons_list:
_remove_from_file(requirements_file_path, lint_requirements)
Expand All @@ -169,14 +167,11 @@ def setup_template_add_ons(selected_add_ons_list: str, requirements_file_path: s
_remove_from_toml(pyproject_file_path, docs_pyproject_requirements)
_remove_dir(current_dir / "docs")

if "Data Structure" not in selected_add_ons_list:
if "Data Structure" not in selected_add_ons_list and example_pipeline != "True":
_remove_dir(current_dir / "data")

if "Pyspark" in selected_add_ons_list:
_handle_starter_setup(selected_add_ons_list, python_package_name)

if "Kedro Viz" in selected_add_ons_list:
_handle_starter_setup(selected_add_ons_list, python_package_name)
if ("Pyspark" in selected_add_ons_list or "Kedro Viz" in selected_add_ons_list) and example_pipeline != "True":
_remove_pyspark_viz_starter_files("Kedro Viz" in selected_add_ons_list, python_package_name)


def sort_requirements(requirements_file_path: Path) -> None:
Expand Down
11 changes: 11 additions & 0 deletions kedro/templates/project/prompts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,14 @@ project_name:
error_message: |
It must contain only alphanumeric symbols, spaces, underscores and hyphens and
be at least 2 characters long.
example_pipeline:
title: "Example Pipeline"
text: |
Select whether you would like an example spaceflights pipeline included in your project.
To skip this step in the future use --example=y/n
To read more about how examples work visit: kedro.org/
Would you like to include an example pipeline? \[y/N]:
regex_validator: "(?i)^(y|yes|n|no)$"
error_message: |
It must contain only y, n, YES, NO, case insensitive.
Loading

0 comments on commit 9ea4ce8

Please sign in to comment.