Skip to content

Commit

Permalink
Stop relying on .egg-info directories
Browse files Browse the repository at this point in the history
Fix gh-2567.

Signed-off-by: Juan Luis Cano Rodríguez <juan_luis_cano@mckinsey.com>
  • Loading branch information
astrojuanlu committed May 30, 2023
1 parent 922503e commit 20bba12
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 46 deletions.
74 changes: 55 additions & 19 deletions kedro/framework/cli/micropkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@
from typing import Iterable, List, Tuple, Union

import click
from build.util import project_wheel_metadata
from packaging.requirements import InvalidRequirement, Requirement
from rope.base.project import Project
from rope.contrib import generate
from rope.refactor.move import MoveModule
from rope.refactor.rename import Rename
from setuptools.discovery import FlatLayoutPackageFinder

from kedro.framework.cli.pipeline import (
_assert_pkg_name_ok,
Expand Down Expand Up @@ -131,7 +133,7 @@ def pull_package( # pylint:disable=unused-argument, too-many-arguments
click.secho(message, fg="green")


# pylint: disable=too-many-arguments, too-many-locals
# pylint: disable=too-many-arguments
def _pull_package(
package_path: str,
metadata: ProjectMetadata,
Expand All @@ -144,25 +146,50 @@ def _pull_package(
temp_dir_path = Path(temp_dir).resolve()
_unpack_sdist(package_path, temp_dir_path, fs_args)

egg_info_files = list((temp_dir_path).rglob("*.egg-info"))
if len(egg_info_files) != 1:
raise KedroCliError(
f"More than 1 or no egg-info files found from {package_path}. "
f"There has to be exactly one egg-info directory."
# temp_dir_path is the parent directory of the project root dir
contents = [member for member in temp_dir_path.iterdir() if member.is_dir()]
if len(contents) != 1:
raise RuntimeError(
"Invalid sdist was extracted: exactly one directory was expected, "
f"got {contents}"
)
egg_info_file = egg_info_files[0]
package_name = egg_info_file.stem
package_requirements = egg_info_file.parent / "setup.py"

# Finds a string representation of 'install_requires' list from setup.py
reqs_list_pattern = r"install_requires\=(.*?)\,\n"
list_reqs = re.findall(
reqs_list_pattern, package_requirements.read_text(encoding="utf-8")
)
project_root_dir = contents[0]

# This is much slower than parsing the requirements
# directly from the metadata files
# because it installs the package in an isolated environment,
# but it's the only reliable way of doing it
# without making assumptions on the project metadata.
library_meta = project_wheel_metadata(project_root_dir)

# Project name will be `my-pipeline` even if `setup.py` says `my_pipeline`
# because standards mandate normalization of names for comparison.
# The proper way to get it would be
# project_name = library_meta.get("Name")
# However, the rest of the code somehow mixes package name with project name
# and expects the non-normalized version, so we have to find it.
# In a pre-pyproject.toml world we could just parse setup.py,
# but now things are more complicated.
# We will assume flat (non-src) layout,
# which is what `kedro micropkg package` produces,
# and use setuptools utilities to save some complexity.
packages = [
package
for package in FlatLayoutPackageFinder().find(project_root_dir)
if "." not in package
]
if len(packages) != 1:
# Should not happen if user is calling `micropkg pull`
# with the result of a `micropkg package`,
# and in general if the distribution only contains one package (most likely),
# but helps give a sensible error message otherwise
raise RuntimeError(
"Invalid package contents: exactly one package was expected, "
f"got {packages}"
)
package_name = packages[0]

# Finds all elements from the above string representation of a list
reqs_element_pattern = r"\'(.*?)\'"
package_reqs = re.findall(reqs_element_pattern, list_reqs[0])
package_reqs = _get_all_library_reqs(library_meta)

if package_reqs:
requirements_txt = metadata.source_dir / "requirements.txt"
Expand All @@ -172,7 +199,7 @@ def _pull_package(
_install_files(
metadata,
package_name,
egg_info_file.parent,
project_root_dir,
env,
alias,
destination,
Expand Down Expand Up @@ -878,6 +905,15 @@ def _append_package_reqs(
)


def _get_all_library_reqs(metadata):
# Inspired by https://discuss.python.org/t/\
# programmatically-getting-non-optional-requirements-of-current-directory/26963/2
# but leaving markers intact
return [
str(Requirement(dep_str)) for dep_str in metadata.get_all("Requires-Dist", [])
]


def _safe_parse_requirements(
requirements: str | Iterable[str],
) -> set[Requirement]:
Expand Down
27 changes: 0 additions & 27 deletions tests/framework/cli/micropkg/test_micropkg_pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,33 +424,6 @@ def test_pull_sdist_fs_args(
"file", fs_arg_1=1, fs_arg_2={"fs_arg_2_nested_1": 2}
)

def test_pull_two_egg_info(
self, fake_project_cli, fake_repo_path, mocker, tmp_path, fake_metadata
):
"""Test for pulling an sdist file with more than one
dist-info directory.
"""
call_pipeline_create(fake_project_cli, fake_metadata)
call_micropkg_package(fake_project_cli, fake_metadata)
sdist_file = (
fake_repo_path / "dist" / _get_sdist_name(name=PIPELINE_NAME, version="0.1")
)
assert sdist_file.is_file()

(tmp_path / f"{PIPELINE_NAME}-0.1" / "dummy.egg-info").mkdir(parents=True)

mocker.patch(
"kedro.framework.cli.micropkg.tempfile.TemporaryDirectory",
return_value=tmp_path,
)
result = CliRunner().invoke(
fake_project_cli,
["micropkg", "pull", str(sdist_file)],
obj=fake_metadata,
)
assert result.exit_code
assert "Error: More than 1 or no egg-info files found" in result.output

@pytest.mark.parametrize("env", [None, "local"])
@pytest.mark.parametrize("alias", [None, "alias_path"])
def test_pull_tests_missing(
Expand Down

0 comments on commit 20bba12

Please sign in to comment.