Skip to content

Commit

Permalink
[KED-2856][KED-2919] Allow packaging a modular pipeline with the same…
Browse files Browse the repository at this point in the history
… name as the project's package name (kedro-org#996)

Signed-off-by: Laurens Vijnck <laurens_vijnck@mckinsey.com>
  • Loading branch information
Lorena Bălan authored and lvijnck committed Apr 7, 2022
1 parent 1402a63 commit 5c4d28c
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 19 deletions.
1 change: 1 addition & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
* Relaxed the bounds on the `plotly` requirement for `plotly.PlotlyDataSet`.
* `kedro pipeline package <pipeline>` now raises an error if the `<pipeline>` argument doesn't look like a valid Python module path (e.g. has `/` instead of `.`).
* Fixed slow startup because of catalog processing by reducing the exponential growth of extra processing during `_FrozenDatasets` creations.
* Fixed a bug where packaging or pulling a modular pipeline with the same name as the project's package name would throw an error (or silently pass without including the pipeline source code in the wheel file).

## Minor breaking changes to the API

Expand Down
54 changes: 37 additions & 17 deletions kedro/framework/cli/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,14 +563,28 @@ def _refactor_code_for_unpacking(
|__ <pipeline_name>
|__ __init__.py
"""

def _move_package_with_conflicting_name(
target: Path, original_name: str, desired_name: str = None
) -> Path:
_rename_package(project, original_name, "tmp_name")
full_path = _create_nested_package(project, target)
_move_package(project, "tmp_name", target.as_posix())
desired_name = desired_name or original_name
_rename_package(project, (target / "tmp_name").as_posix(), desired_name)
return full_path

pipeline_name = package_path.stem
if alias:
_rename_package(project, pipeline_name, alias)
pipeline_name = alias

package_target = Path(project_metadata.package_name) / "pipelines"
full_path = _create_nested_package(project, package_target)
_move_package(project, pipeline_name, package_target.as_posix())
if pipeline_name == project_metadata.package_name:
full_path = _move_package_with_conflicting_name(package_target, pipeline_name)
else:
full_path = _create_nested_package(project, package_target)
_move_package(project, pipeline_name, package_target.as_posix())
refactored_package_path = full_path / pipeline_name

if not tests_path.exists():
Expand All @@ -581,11 +595,10 @@ def _refactor_code_for_unpacking(
# hence we give it a temp name, create the expected
# nested folder structure, move the contents there,
# then rename the temp name to <pipeline_name>.
_rename_package(project, "tests", "tmp_name")
tests_target = Path("tests") / "pipelines"
full_path = _create_nested_package(project, tests_target)
_move_package(project, "tmp_name", tests_target.as_posix())
_rename_package(project, (tests_target / "tmp_name").as_posix(), pipeline_name)
full_path = _move_package_with_conflicting_name(
tests_target, original_name="tests", desired_name=pipeline_name
)
refactored_tests_path = full_path / pipeline_name

return refactored_package_path, refactored_tests_path
Expand Down Expand Up @@ -831,6 +844,15 @@ def _refactor_code_for_package(
|__ __init__.py
|__ test_pipeline.py
"""

def _move_package_with_conflicting_name(target: Path, conflicting_name: str):
tmp_name = "tmp_name"
tmp_module = target.parent / tmp_name
_rename_package(project, target.as_posix(), tmp_name)
_move_package(project, tmp_module.as_posix(), "")
shutil.rmtree(Path(project.address) / conflicting_name)
_rename_package(project, tmp_name, conflicting_name)

# Copy source in appropriate folder structure
package_target = package_path.relative_to(project_metadata.source_dir)
full_path = _create_nested_package(project, package_target)
Expand All @@ -845,10 +867,15 @@ def _refactor_code_for_package(
_sync_dirs(tests_path, full_path, overwrite=True)

# Refactor imports in src/package_name/pipelines/pipeline_name
# and imports of `pipeline_name` in tests
_move_package(project, package_target.as_posix(), "")
# and imports of `pipeline_name` in tests.
pipeline_name = package_target.stem
if pipeline_name == project_metadata.package_name:
_move_package_with_conflicting_name(package_target, pipeline_name)
else:
_move_package(project, package_target.as_posix(), "")
shutil.rmtree(Path(project.address) / project_metadata.package_name)

if alias:
pipeline_name = package_target.stem
_rename_package(project, pipeline_name, alias)

if tests_path.exists():
Expand All @@ -858,14 +885,7 @@ def _refactor_code_for_package(
# with the existing "tests" folder at top level;
# hence we give it a temp name, move it, delete tests/ and
# rename the temp name to tests.
tmp_name = "extracted_tests"
tmp_module = tests_target.parent / tmp_name
_rename_package(project, tests_target.as_posix(), tmp_name)
_move_package(project, tmp_module.as_posix(), "")
shutil.rmtree(Path(project.address) / "tests")
_rename_package(project, tmp_name, "tests")

shutil.rmtree(Path(project.address) / project_metadata.package_name)
_move_package_with_conflicting_name(tests_target, "tests")


_SourcePathType = Union[Path, List[Tuple[Path, str]]]
Expand Down
53 changes: 51 additions & 2 deletions tests/framework/cli/pipeline/test_pipeline_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,56 @@ def test_package_pipeline(
wheel_location=wheel_location, package_name=package_name, version=version
)

def test_pipeline_package_same_name_as_package_name(
self, fake_metadata, fake_project_cli, fake_repo_path
):
"""Create modular pipeline with the same name as the
package name, then package as is. The command should run
and the resulting wheel should have all expected contents.
"""
pipeline_name = fake_metadata.package_name
result = CliRunner().invoke(
fake_project_cli, ["pipeline", "create", pipeline_name], obj=fake_metadata
)
assert result.exit_code == 0

result = CliRunner().invoke(
fake_project_cli, ["pipeline", "package", pipeline_name], obj=fake_metadata
)
wheel_location = fake_repo_path / "src" / "dist"

assert result.exit_code == 0
assert f"Location: {wheel_location}" in result.output
self.assert_wheel_contents_correct(
wheel_location=wheel_location, package_name=pipeline_name
)

def test_pipeline_package_same_name_as_package_name_alias(
self, fake_metadata, fake_project_cli, fake_repo_path
):
"""Create modular pipeline, then package under alias
the same name as the package name. The command should run
and the resulting wheel should have all expected contents.
"""
alias = fake_metadata.package_name
result = CliRunner().invoke(
fake_project_cli, ["pipeline", "create", PIPELINE_NAME], obj=fake_metadata
)
assert result.exit_code == 0

result = CliRunner().invoke(
fake_project_cli,
["pipeline", "package", PIPELINE_NAME, "--alias", alias],
obj=fake_metadata,
)
wheel_location = fake_repo_path / "src" / "dist"

assert result.exit_code == 0
assert f"Location: {wheel_location}" in result.output
self.assert_wheel_contents_correct(
wheel_location=wheel_location, package_name=alias
)

@pytest.mark.parametrize("existing_dir", [True, False])
def test_pipeline_package_to_destination(
self, fake_project_cli, existing_dir, tmp_path, fake_metadata
Expand Down Expand Up @@ -188,8 +238,7 @@ def test_package_pipeline_bad_alias(

def test_package_pipeline_invalid_module_path(self, fake_project_cli):
result = CliRunner().invoke(
fake_project_cli,
["pipeline", "package", f"pipelines/{PIPELINE_NAME}"],
fake_project_cli, ["pipeline", "package", f"pipelines/{PIPELINE_NAME}"]
)
error_message = (
"The pipeline location you provided is not a valid Python module path"
Expand Down
61 changes: 61 additions & 0 deletions tests/framework/cli/pipeline/test_pipeline_pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,67 @@ def test_pipeline_alias_refactors_imports(
)
assert expected_stmt in file_content

def test_pipeline_pull_from_aliased_pipeline_conflicting_name(
self, fake_project_cli, fake_package_path, fake_repo_path, fake_metadata
):
package_name = fake_metadata.package_name
call_pipeline_create(fake_project_cli, fake_metadata)
pipeline_file = fake_package_path / "pipelines" / PIPELINE_NAME / "pipeline.py"
import_stmt = f"import {package_name}.pipelines.{PIPELINE_NAME}.nodes"
with pipeline_file.open("a") as f:
f.write(import_stmt)

call_pipeline_package(
cli=fake_project_cli, metadata=fake_metadata, alias=package_name
)
wheel_file = (
fake_repo_path
/ "src"
/ "dist"
/ _get_wheel_name(name=package_name, version="0.1")
)
assert wheel_file.is_file()

result = CliRunner().invoke(
fake_project_cli, ["pipeline", "pull", str(wheel_file)], obj=fake_metadata
)
assert result.exit_code == 0, result.output

path = fake_package_path / "pipelines" / package_name / "pipeline.py"
file_content = path.read_text()
expected_stmt = f"import {package_name}.pipelines.{package_name}.nodes"
assert expected_stmt in file_content

def test_pipeline_pull_as_aliased_pipeline_conflicting_name(
self, fake_project_cli, fake_package_path, fake_repo_path, fake_metadata
):
package_name = fake_metadata.package_name
call_pipeline_create(fake_project_cli, fake_metadata)
pipeline_file = fake_package_path / "pipelines" / PIPELINE_NAME / "pipeline.py"
import_stmt = f"import {package_name}.pipelines.{PIPELINE_NAME}.nodes"
with pipeline_file.open("a") as f:
f.write(import_stmt)

call_pipeline_package(cli=fake_project_cli, metadata=fake_metadata)
wheel_file = (
fake_repo_path
/ "src"
/ "dist"
/ _get_wheel_name(name=PIPELINE_NAME, version="0.1")
)
assert wheel_file.is_file()

result = CliRunner().invoke(
fake_project_cli,
["pipeline", "pull", str(wheel_file), "--alias", package_name],
obj=fake_metadata,
)
assert result.exit_code == 0, result.output
path = fake_package_path / "pipelines" / package_name / "pipeline.py"
file_content = path.read_text()
expected_stmt = f"import {package_name}.pipelines.{package_name}.nodes"
assert expected_stmt in file_content

def test_pull_whl_fs_args(
self, fake_project_cli, fake_repo_path, mocker, tmp_path, fake_metadata
):
Expand Down

0 comments on commit 5c4d28c

Please sign in to comment.