Skip to content

Commit

Permalink
Merge pull request #11137 from sbidoul/download-info-sbi
Browse files Browse the repository at this point in the history
Add download_info: DirectUrl to InstallRequirement
  • Loading branch information
sbidoul authored Jun 1, 2022
2 parents 713e00f + 05d2b85 commit e58a8a5
Show file tree
Hide file tree
Showing 9 changed files with 259 additions and 13 deletions.
3 changes: 3 additions & 0 deletions news/11137.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Record in wheel cache entries the URL of the original artifiact that was downloaded
to build the cached wheels. The record is named ``origin.json`` and uses the PEP 610
Direct URL format.
25 changes: 25 additions & 0 deletions src/pip/_internal/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
import json
import logging
import os
from pathlib import Path
from typing import Any, Dict, List, Optional, Set

from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version
from pip._vendor.packaging.utils import canonicalize_name

from pip._internal.exceptions import InvalidWheelFilename
from pip._internal.models.direct_url import DirectUrl
from pip._internal.models.format_control import FormatControl
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
Expand All @@ -19,6 +21,8 @@

logger = logging.getLogger(__name__)

ORIGIN_JSON_NAME = "origin.json"


def _hash_dict(d: Dict[str, str]) -> str:
"""Return a stable sha224 of a dictionary."""
Expand Down Expand Up @@ -204,6 +208,10 @@ def __init__(
):
self.link = link
self.persistent = persistent
self.origin: Optional[DirectUrl] = None
origin_direct_url_path = Path(self.link.file_path).parent / ORIGIN_JSON_NAME
if origin_direct_url_path.exists():
self.origin = DirectUrl.from_json(origin_direct_url_path.read_text())


class WheelCache(Cache):
Expand Down Expand Up @@ -262,3 +270,20 @@ def get_cache_entry(
return CacheEntry(retval, persistent=False)

return None

@staticmethod
def record_download_origin(cache_dir: str, download_info: DirectUrl) -> None:
origin_path = Path(cache_dir) / ORIGIN_JSON_NAME
if origin_path.is_file():
origin = DirectUrl.from_json(origin_path.read_text())
# TODO: use DirectUrl.equivalent when https://github.com/pypa/pip/pull/10564
# is merged.
if origin.url != download_info.url:
logger.warning(
"Origin URL %s in cache entry %s does not match download URL %s. "
"This is likely a pip bug or a cache corruption issue.",
origin.url,
cache_dir,
download_info.url,
)
origin_path.write_text(download_info.to_json())
31 changes: 30 additions & 1 deletion src/pip/_internal/operations/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
)
from pip._internal.index.package_finder import PackageFinder
from pip._internal.metadata import BaseDistribution
from pip._internal.models.direct_url import ArchiveInfo
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
from pip._internal.network.download import BatchDownloader, Downloader
Expand All @@ -35,9 +36,18 @@
from pip._internal.network.session import PipSession
from pip._internal.operations.build.build_tracker import BuildTracker
from pip._internal.req.req_install import InstallRequirement
from pip._internal.utils.direct_url_helpers import (
direct_url_for_editable,
direct_url_from_link,
)
from pip._internal.utils.hashes import Hashes, MissingHashes
from pip._internal.utils.logging import indent_log
from pip._internal.utils.misc import display_path, hide_url, is_installable_dir
from pip._internal.utils.misc import (
display_path,
hash_file,
hide_url,
is_installable_dir,
)
from pip._internal.utils.temp_dir import TempDirectory
from pip._internal.utils.unpacking import unpack_file
from pip._internal.vcs import vcs
Expand Down Expand Up @@ -489,6 +499,23 @@ def _prepare_linked_requirement(
hashes.check_against_path(file_path)
local_file = File(file_path, content_type=None)

# If download_info is set, we got it from the wheel cache.
if req.download_info is None:
# Editables don't go through this function (see
# prepare_editable_requirement).
assert not req.editable
req.download_info = direct_url_from_link(link, req.source_dir)
# Make sure we have a hash in download_info. If we got it as part of the
# URL, it will have been verified and we can rely on it. Otherwise we
# compute it from the downloaded file.
if (
isinstance(req.download_info.info, ArchiveInfo)
and not req.download_info.info.hash
and local_file
):
hash = hash_file(local_file.path)[0].hexdigest()
req.download_info.info.hash = f"sha256={hash}"

# For use in later processing,
# preserve the file path on the requirement.
if local_file:
Expand Down Expand Up @@ -547,6 +574,8 @@ def prepare_editable_requirement(
)
req.ensure_has_source_dir(self.src_dir)
req.update_editable()
assert req.source_dir
req.download_info = direct_url_for_editable(req.unpacked_source_directory)

dist = _get_prepared_distribution(
req,
Expand Down
6 changes: 6 additions & 0 deletions src/pip/_internal/req/req_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
get_default_environment,
get_directory_distribution,
)
from pip._internal.models.direct_url import DirectUrl
from pip._internal.models.link import Link
from pip._internal.operations.build.metadata import generate_metadata
from pip._internal.operations.build.metadata_editable import generate_editable_metadata
Expand Down Expand Up @@ -112,6 +113,10 @@ def __init__(
self.link = self.original_link = link
self.original_link_is_in_wheel_cache = False

# Information about the location of the artifact that was downloaded . This
# property is guaranteed to be set in resolver results.
self.download_info: Optional[DirectUrl] = None

# Path to any downloaded or already-existing package.
self.local_file_path: Optional[str] = None
if self.link and self.link.is_file:
Expand Down Expand Up @@ -762,6 +767,7 @@ def install(
if self.is_wheel:
assert self.local_file_path
direct_url = None
# TODO this can be refactored to direct_url = self.download_info
if self.editable:
direct_url = direct_url_for_editable(self.unpacked_source_directory)
elif self.original_link:
Expand Down
9 changes: 9 additions & 0 deletions src/pip/_internal/resolution/legacy/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
from pip._internal.resolution.base import BaseResolver, InstallRequirementProvider
from pip._internal.utils import compatibility_tags
from pip._internal.utils.compatibility_tags import get_supported
from pip._internal.utils.direct_url_helpers import direct_url_from_link
from pip._internal.utils.logging import indent_log
from pip._internal.utils.misc import normalize_version_info
from pip._internal.utils.packaging import check_requires_python
Expand Down Expand Up @@ -431,6 +432,14 @@ def _populate_link(self, req: InstallRequirement) -> None:
logger.debug("Using cached wheel link: %s", cache_entry.link)
if req.link is req.original_link and cache_entry.persistent:
req.original_link_is_in_wheel_cache = True
if cache_entry.origin is not None:
req.download_info = cache_entry.origin
else:
# Legacy cache entry that does not have origin.json.
# download_info may miss the archive_info.hash field.
req.download_info = direct_url_from_link(
req.link, link_is_in_wheel_cache=cache_entry.persistent
)
req.link = cache_entry.link

def _get_dist_for(self, req: InstallRequirement) -> BaseDistribution:
Expand Down
18 changes: 12 additions & 6 deletions src/pip/_internal/resolution/resolvelib/candidates.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
install_req_from_line,
)
from pip._internal.req.req_install import InstallRequirement
from pip._internal.utils.direct_url_helpers import direct_url_from_link
from pip._internal.utils.misc import normalize_version_info

from .base import Candidate, CandidateVersion, Requirement, format_name
Expand Down Expand Up @@ -281,12 +282,17 @@ def __init__(
version, wheel_version, name
)

if (
cache_entry is not None
and cache_entry.persistent
and template.link is template.original_link
):
ireq.original_link_is_in_wheel_cache = True
if cache_entry is not None:
if cache_entry.persistent and template.link is template.original_link:
ireq.original_link_is_in_wheel_cache = True
if cache_entry.origin is not None:
ireq.download_info = cache_entry.origin
else:
# Legacy cache entry that does not have origin.json.
# download_info may miss the archive_info.hash field.
ireq.download_info = direct_url_from_link(
source_link, link_is_in_wheel_cache=cache_entry.persistent
)

super().__init__(
link=link,
Expand Down
6 changes: 6 additions & 0 deletions src/pip/_internal/wheel_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,12 @@ def build(
req.editable and req.permit_editable_wheels,
)
if wheel_file:
# Record the download origin in the cache
if req.download_info is not None:
# download_info is guaranteed to be set because when we build an
# InstallRequirement it has been through the preparer before, but
# let's be cautious.
wheel_cache.record_download_origin(cache_dir, req.download_info)
# Update the link for this.
req.link = Link(path_to_url(wheel_file))
req.local_file_path = req.link.file_path
Expand Down
4 changes: 2 additions & 2 deletions tests/functional/test_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -1550,9 +1550,9 @@ def test_install_builds_wheels(script: PipTestEnvironment, data: TestData) -> No
)
# Must have installed it all
assert expected in str(res), str(res)
wheels = []
wheels: List[str] = []
for _, _, files in os.walk(wheels_cache):
wheels.extend(files)
wheels.extend(f for f in files if f.endswith(".whl"))
# and built wheels for upper and wheelbroken
assert "Building wheel for upper" in str(res), str(res)
assert "Building wheel for wheelb" in str(res), str(res)
Expand Down
Loading

0 comments on commit e58a8a5

Please sign in to comment.