Skip to content

Commit

Permalink
Change to short names for tools in config file
Browse files Browse the repository at this point in the history
Signed-off-by: Dmitry Sorokin <dmd40in@gmail.com>
  • Loading branch information
DimedS committed Jan 12, 2024
1 parent d433ff3 commit 829d048
Show file tree
Hide file tree
Showing 4 changed files with 213 additions and 302 deletions.
4 changes: 2 additions & 2 deletions features/steps/cli_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def create_config_file_without_starter(context):
context.root_project_dir = context.temp_dir / context.project_name
context.package_name = context.project_name.replace("-", "_")
config = {
"tools": "1-5",
"tools": "lint, test, log, docs, data",
"project_name": context.project_name,
"example_pipeline": "no",
"repo_name": context.project_name,
Expand All @@ -189,7 +189,7 @@ def create_config_file_with_tools(context, tools):
It takes a custom tools list and sets example prompt to `y`.
"""

tools_str = tools if tools != "none" else ""
tools_str = tools

context.config_file = context.temp_dir / "config.yml"
context.project_name = "project-dummy"
Expand Down
12 changes: 6 additions & 6 deletions features/tools.feature
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ Feature: New Kedro project with tools
Then I should get a successful exit code

Scenario: Create a new Kedro project with all tools except 'viz' and 'pyspark'
Given I have prepared a config file with tools "1,2,3,4,5"
Given I have prepared a config file with tools "lint, test, log, docs, data"
When I run a non-interactive kedro new without starter
Then the expected tool directories and files should be created with "1,2,3,4,5"
Then the expected tool directories and files should be created with "lint, test, log, docs, data"
Given I have installed the project dependencies
When I execute the kedro command "run"
Then I should get a successful exit code
Expand All @@ -25,17 +25,17 @@ Feature: New Kedro project with tools
Then I should get a successful exit code

Scenario: Create a new Kedro project with only 'pyspark' tool
Given I have prepared a config file with tools "6"
Given I have prepared a config file with tools "pyspark"
When I run a non-interactive kedro new without starter
Then the expected tool directories and files should be created with "6"
Then the expected tool directories and files should be created with "pyspark"
Given I have installed the project dependencies
When I execute the kedro command "run"
Then I should get a successful exit code

Scenario: Create a new Kedro project with only 'viz' tool
Given I have prepared a config file with tools "7"
Given I have prepared a config file with tools "viz"
When I run a non-interactive kedro new without starter
Then the expected tool directories and files should be created with "7"
Then the expected tool directories and files should be created with "viz"
Given I have installed the project dependencies
When I execute the kedro command "run"
Then I should get a successful exit code
196 changes: 97 additions & 99 deletions kedro/framework/cli/starters.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,28 +207,26 @@ def _validate_selected_tools(selected_tools: str | None) -> None:


def _print_selection_and_prompt_info(
selected_tools: str | None, example_pipeline: str | None, interactive: bool
selected_tools: str, example_pipeline: str, interactive: bool
) -> None:
# Confirm tools selection
if selected_tools is not None:
if selected_tools == "['None']":
click.secho(
"You have selected no project tools",
fg="green",
)
else:
click.secho(
f"You have selected the following project tools: {selected_tools}",
fg="green",
)
if selected_tools == "['None']":
click.secho(
"You have selected no project tools",
fg="green",
)
else:
click.secho(
f"You have selected the following project tools: {selected_tools}",
fg="green",
)

# Confirm example selection
if example_pipeline is not None:
if example_pipeline:
click.secho(
"It has been created with an example pipeline.",
fg="green",
)
if example_pipeline == "True":
click.secho(
"It has been created with an example pipeline.",
fg="green",
)

# Give hint for skipping interactive flow
if interactive:
Expand Down Expand Up @@ -353,8 +351,8 @@ def new( # noqa: PLR0913
# If interactive flow used, print hint
interactive_flow = prompts_required and not config_path
_print_selection_and_prompt_info(
extra_context.get("tools"),
extra_context.get("example_pipeline"),
extra_context["tools"],
extra_context["example_pipeline"],
interactive_flow,
)

Expand Down Expand Up @@ -525,7 +523,8 @@ def _get_extra_context( # noqa: PLR0913
starter_alias: str | None,
) -> dict[str, str]:
"""Generates a config dictionary that will be passed to cookiecutter as `extra_context`, based
on CLI flags, user prompts, or a configuration file.
on CLI flags, user prompts, configuration file or Default values.
It is crucial to return a dictionary with string values, otherwise, there will be issues with Cookiecutter.
Args:
prompts_required: a dictionary of all the prompts that will be shown to
Expand All @@ -543,64 +542,51 @@ def _get_extra_context( # noqa: PLR0913
None in case the flag wasn't used
Returns:
the prompts_required dictionary, with all the redundant information removed.
Config dictionary, passed the necessary processing, with default values if needed.
"""
if config_path:
extra_context = _fetch_config_from_file(config_path)
_validate_config_file_against_prompts(extra_context, prompts_required)
_validate_config_file_inputs(extra_context, starter_alias)
extra_context = _fetch_validate_parse_config_from_file(
config_path, prompts_required, starter_alias
)
else:
extra_context = _fetch_config_from_user_prompts(
extra_context = _fetch_validate_parse_config_from_user_prompts(
prompts_required, cookiecutter_context
)

# Format
extra_context.setdefault("kedro_version", version)

converted_tools = _convert_tool_names_to_numbers(selected_tools)

if converted_tools is not None:
extra_context["tools"] = converted_tools

# Update extra_context, if CLI inputs are available
if selected_tools is not None:
tools_numbers = _convert_tool_short_names_to_numbers(selected_tools)
extra_context["tools"] = _convert_tool_numbers_to_readable_names(tools_numbers)
if project_name is not None:
extra_context["project_name"] = project_name
if example_pipeline is not None:
extra_context["example_pipeline"] = str(_parse_yes_no_to_bool(example_pipeline))

# Map the selected tools lists to readable name
tools_context = extra_context.get("tools")
tools = _parse_tools_input(tools_context)

# Check if no tools selected
if not tools:
extra_context["tools"] = str(["None"])
else:
extra_context["tools"] = str([NUMBER_TO_TOOLS_NAME[tool] for tool in tools])

extra_context["example_pipeline"] = _parse_yes_no_to_bool(
example_pipeline
if example_pipeline is not None
else extra_context.get("example_pipeline", "no")
)
# set defaults for required fields, will be used mostly for starters
extra_context.setdefault("kedro_version", version)
extra_context.setdefault("tools", str(["None"]))
extra_context.setdefault("example_pipeline", "False")

return extra_context


def _convert_tool_names_to_numbers(selected_tools: str | None) -> str | None:
"""Prepares tools selection from the CLI input to the correct format
def _convert_tool_short_names_to_numbers(selected_tools: str | None) -> list:
"""Prepares tools selection from the CLI or config input to the correct format
to be put in the project configuration, if it exists.
Replaces tool strings with the corresponding prompt number.
Args:
selected_tools: a string containing the value for the --tools flag,
selected_tools: a string containing the value for the --tools flag or config file,
or None in case none were provided, i.e. lint,docs.
Returns:
String with the numbers corresponding to the desired tools, or
None in case the --tools flag was not used.
"""
if selected_tools is None or selected_tools.lower() == "none":
return None
return []
if selected_tools.lower() == "all":
return ",".join(NUMBER_TO_TOOLS_NAME.keys())
return list(NUMBER_TO_TOOLS_NAME.keys())

tools = []
for tool in selected_tools.lower().split(","):
Expand All @@ -611,11 +597,29 @@ def _convert_tool_names_to_numbers(selected_tools: str | None) -> str | None:
# Remove duplicates if any
tools = sorted(list(set(tools)))

return ",".join(tools)
return tools


def _convert_tool_numbers_to_readable_names(tools_numbers: list | None) -> str:
"""Transform the list of tool numbers into a list of readable names, using 'None' for empty lists.
Then, convert the result into a string format to prevent issues with Cookiecutter.
"""
tools_names = [NUMBER_TO_TOOLS_NAME[tool] for tool in tools_numbers]
if tools_names == []:
tools_names = ["None"]
return str(tools_names)


def _fetch_config_from_file(config_path: str) -> dict[str, str]:
def _fetch_validate_parse_config_from_file(
config_path: str, prompts_required: dict, starter_alias: str | None
) -> dict[str, str]:
"""Obtains configuration for a new kedro project non-interactively from a file.
Validates that:
1. All keys specified in prompts_required are retrieved from the configuration.
2. The options 'tools' and 'example_pipeline' are not used in the configuration when any starter option is selected.
3. Variables sourced from the configuration file adhere to the expected format.
Parse tools from short names to list of numbers
Args:
config_path: The path of the config.yml which should contain the data required
Expand All @@ -641,11 +645,28 @@ def _fetch_config_from_file(config_path: str) -> dict[str, str]:
f"Failed to generate project: could not load config at {config_path}."
) from exc

# The return type defined is more specific than the "Any" type config return from yaml.safe_load
return config # type: ignore[no-any-return]
_validate_config_file_against_prompts(config, prompts_required)

if starter_alias and ("tools" in config or "example_pipeline" in config):
raise KedroCliError(
"The --starter flag can not be used with `example_pipeline` and/or `tools` keys in the config file."
)

_validate_regex("project_name", config.get("project_name", "New Kedro Project"))

example_pipeline = config.get("example_pipeline", "no")
_validate_regex("yes_no", example_pipeline)
config["example_pipeline"] = str(_parse_yes_no_to_bool(example_pipeline))

tools_short_names = config.get("tools", "none").lower()
_validate_selected_tools(tools_short_names)
tools_numbers = _convert_tool_short_names_to_numbers(tools_short_names)
config["tools"] = _convert_tool_numbers_to_readable_names(tools_numbers)

return config


def _fetch_config_from_user_prompts(
def _fetch_validate_parse_config_from_user_prompts(
prompts: dict[str, Any], cookiecutter_context: OrderedDict | None
) -> dict[str, str]:
"""Interactively obtains information from user prompts.
Expand Down Expand Up @@ -682,6 +703,16 @@ def _fetch_config_from_user_prompts(
if user_input:
prompt.validate(user_input)
config[variable_name] = user_input

if "tools" in config:
# convert tools input to list of numbers and validate
tools_numbers = _parse_tools_input(config["tools"])
_validate_selection(tools_numbers)
config["tools"] = _convert_tool_numbers_to_readable_names(tools_numbers)
if "example_pipeline" in config:
example_pipeline_bool = _parse_yes_no_to_bool(config["example_pipeline"])
config["example_pipeline"] = str(example_pipeline_bool)

return config


Expand Down Expand Up @@ -729,11 +760,8 @@ def _make_cookiecutter_args_and_fetch_template(
if directory:
cookiecutter_args["directory"] = directory

# If 'tools' or 'example_pipeline' are not specified in prompts.yml, CLI or config.yml,
# default options will be used instead
# That can be when starter used or while loading from config.yml
tools: str | list = config.get("tools", [])
example_pipeline = config.get("example_pipeline", False)
tools = config["tools"]
example_pipeline = config["example_pipeline"]
starter_path = "git+https://github.com/kedro-org/kedro-starters.git"

if "PySpark" in tools and "Kedro Viz" in tools:
Expand All @@ -745,7 +773,7 @@ def _make_cookiecutter_args_and_fetch_template(
elif "Kedro Viz" in tools:
# Use the spaceflights-pandas-viz starter if only Kedro Viz is chosen.
cookiecutter_args["directory"] = "spaceflights-pandas-viz"
elif example_pipeline:
elif example_pipeline == "True":
# Use spaceflights-pandas starter if example was selected, but PySpark or Viz wasn't
cookiecutter_args["directory"] = "spaceflights-pandas"
else:
Expand Down Expand Up @@ -792,36 +820,6 @@ def _validate_config_file_against_prompts(
)


def _validate_config_file_inputs(
config: dict[str, str], starter_alias: str | None
) -> None:
"""Checks that variables provided through the config file are of the expected format. This
validates the config provided by `kedro new --config` in a similar way to `prompts.yml` for starters.
Also validates that "tools" or "example_pipeline" options cannot be used in config when any starter option is
selected.
Args:
config: The config as a dictionary
starter_alias: Starter alias if it was provided from CLI, otherwise None
Raises:
SystemExit: If the provided variables are not properly formatted.
"""
if starter_alias and ("tools" in config or "example_pipeline" in config):
raise KedroCliError(
"The --starter flag can not be used with `example_pipeline` and/or `tools` keys in the config file."
)

_validate_regex("project_name", config.get("project_name", "New Kedro Project"))

input_tools = config.get("tools", "none")
_validate_regex("tools", input_tools.lower())
selected_tools = _parse_tools_input(input_tools)
_validate_selection(selected_tools)

_validate_regex("yes_no", config.get("example_pipeline", "no"))


def _validate_selection(tools: list[str]) -> None:
# start validating from the end, when user select 1-20, it will generate a message
# '20' is not a valid selection instead of '8'
Expand All @@ -847,6 +845,10 @@ def _validate_range(start: Any, end: Any) -> None:
message = f"'{start}-{end}' is an invalid range for project tools.\nPlease ensure range values go from smaller to larger."
click.secho(message, fg="red", err=True)
sys.exit(1)
if int(end) > max([int(num) for num in NUMBER_TO_TOOLS_NAME]):
message = f"'{start}-{end}' is an invalid range for project tools.\n{end} is too large."
click.secho(message, fg="red", err=True)
sys.exit(1)

if not tools_str:
return [] # pragma: no cover
Expand Down Expand Up @@ -937,10 +939,6 @@ def validate(self, user_input: str) -> None:
click.secho(self.error_message, fg="red", err=True)
sys.exit(1)

if self.title == "Project Tools":
# Validate user input
_validate_selection(_parse_tools_input(user_input))


# noqa: unused-argument
def _remove_readonly(
Expand Down
Loading

0 comments on commit 829d048

Please sign in to comment.