diff --git a/news/11137.feature.rst b/news/11137.feature.rst new file mode 100644 index 00000000000..c5986f4fd89 --- /dev/null +++ b/news/11137.feature.rst @@ -0,0 +1,3 @@ +Record in wheel cache entries the URL of the original artifiact that was downloaded +to build the cached wheels. The record is named ``origin.json`` and uses the PEP 610 +Direct URL format. diff --git a/src/pip/_internal/cache.py b/src/pip/_internal/cache.py index 1d6df220118..1edcc76722b 100644 --- a/src/pip/_internal/cache.py +++ b/src/pip/_internal/cache.py @@ -5,12 +5,14 @@ import json import logging import os +from pathlib import Path from typing import Any, Dict, List, Optional, Set from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version from pip._vendor.packaging.utils import canonicalize_name from pip._internal.exceptions import InvalidWheelFilename +from pip._internal.models.direct_url import DirectUrl from pip._internal.models.format_control import FormatControl from pip._internal.models.link import Link from pip._internal.models.wheel import Wheel @@ -19,6 +21,8 @@ logger = logging.getLogger(__name__) +ORIGIN_JSON_NAME = "origin.json" + def _hash_dict(d: Dict[str, str]) -> str: """Return a stable sha224 of a dictionary.""" @@ -204,6 +208,10 @@ def __init__( ): self.link = link self.persistent = persistent + self.origin: Optional[DirectUrl] = None + origin_direct_url_path = Path(self.link.file_path).parent / ORIGIN_JSON_NAME + if origin_direct_url_path.exists(): + self.origin = DirectUrl.from_json(origin_direct_url_path.read_text()) class WheelCache(Cache): @@ -262,3 +270,20 @@ def get_cache_entry( return CacheEntry(retval, persistent=False) return None + + @staticmethod + def record_download_origin(cache_dir: str, download_info: DirectUrl) -> None: + origin_path = Path(cache_dir) / ORIGIN_JSON_NAME + if origin_path.is_file(): + origin = DirectUrl.from_json(origin_path.read_text()) + # TODO: use DirectUrl.equivalent when https://github.com/pypa/pip/pull/10564 + # is merged. + if origin.url != download_info.url: + logger.warning( + "Origin URL %s in cache entry %s does not match download URL %s. " + "This is likely a pip bug or a cache corruption issue.", + origin.url, + cache_dir, + download_info.url, + ) + origin_path.write_text(download_info.to_json()) diff --git a/src/pip/_internal/resolution/legacy/resolver.py b/src/pip/_internal/resolution/legacy/resolver.py index 1225ae70fcb..ddfda41279c 100644 --- a/src/pip/_internal/resolution/legacy/resolver.py +++ b/src/pip/_internal/resolution/legacy/resolver.py @@ -431,6 +431,8 @@ def _populate_link(self, req: InstallRequirement) -> None: logger.debug("Using cached wheel link: %s", cache_entry.link) if req.link is req.original_link and cache_entry.persistent: req.original_link_is_in_wheel_cache = True + if cache_entry.origin is not None: + req.download_info = cache_entry.origin req.link = cache_entry.link def _get_dist_for(self, req: InstallRequirement) -> BaseDistribution: diff --git a/src/pip/_internal/resolution/resolvelib/candidates.py b/src/pip/_internal/resolution/resolvelib/candidates.py index d1470ecbf4e..2561e03f0ed 100644 --- a/src/pip/_internal/resolution/resolvelib/candidates.py +++ b/src/pip/_internal/resolution/resolvelib/candidates.py @@ -287,6 +287,8 @@ def __init__( and template.link is template.original_link ): ireq.original_link_is_in_wheel_cache = True + if cache_entry.origin is not None: + ireq.download_info = cache_entry.origin super().__init__( link=link, diff --git a/src/pip/_internal/wheel_builder.py b/src/pip/_internal/wheel_builder.py index d0663443b22..77a17ff0f15 100644 --- a/src/pip/_internal/wheel_builder.py +++ b/src/pip/_internal/wheel_builder.py @@ -354,6 +354,12 @@ def build( req.editable and req.permit_editable_wheels, ) if wheel_file: + # Record the download origin in the cache + if req.download_info is not None: + # download_info is guaranteed to be set because when we build an + # InstallRequirement it has been through the preparer before, but + # let's be cautious. + wheel_cache.record_download_origin(cache_dir, req.download_info) # Update the link for this. req.link = Link(path_to_url(wheel_file)) req.local_file_path = req.link.file_path diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py index bec8b72fc96..3bf2579ed4b 100644 --- a/tests/functional/test_install.py +++ b/tests/functional/test_install.py @@ -1550,9 +1550,9 @@ def test_install_builds_wheels(script: PipTestEnvironment, data: TestData) -> No ) # Must have installed it all assert expected in str(res), str(res) - wheels = [] + wheels: List[str] = [] for _, _, files in os.walk(wheels_cache): - wheels.extend(files) + wheels.extend(f for f in files if f.endswith(".whl")) # and built wheels for upper and wheelbroken assert "Building wheel for upper" in str(res), str(res) assert "Building wheel for wheelb" in str(res), str(res)