Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/5701.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Resolver performance: package sources following PEP 503 will leverage package hashes from the URL fragment, without downloading the package.
11 changes: 9 additions & 2 deletions pipenv/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -855,14 +855,21 @@ def write_lockfile(self, content):

def pipfile_sources(self, expand_vars=True):
if self.pipfile_is_empty or "source" not in self.parsed_pipfile:
return [self.default_source]
sources = [self.default_source]
if os.environ.get("PIPENV_PYPI_MIRROR"):
sources[0]["url"] = os.environ["PIPENV_PYPI_MIRROR"]
return sources
# We need to make copies of the source info so we don't
# accidentally modify the cache. See #2100 where values are
# written after the os.path.expandvars() call.
return [
sources = [
{k: safe_expandvars(v) if expand_vars else v for k, v in source.items()}
for source in self.parsed_pipfile["source"]
]
for source in sources:
if os.environ.get("PIPENV_PYPI_MIRROR") and is_pypi_url(source.get("url")):
source["url"] = os.environ["PIPENV_PYPI_MIRROR"]
return sources

@property
def sources(self):
Expand Down
66 changes: 62 additions & 4 deletions pipenv/utils/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
import tempfile
import warnings
from functools import lru_cache
from html.parser import HTMLParser
from pathlib import Path
from typing import Dict, List, Optional, Set, Tuple, Union
from urllib import parse

from pipenv import environments
from pipenv.exceptions import RequirementError, ResolutionFailure
Expand Down Expand Up @@ -123,6 +125,20 @@ def _get_file_hash(self, link):
return ":".join([h.name, h.hexdigest()])


class PackageIndexHTMLParser(HTMLParser):
def __init__(self):
super().__init__()
self.urls = []

def handle_starttag(self, tag, attrs):
# If tag is an anchor
if tag == "a":
# find href attribute
for attr in attrs:
if attr[0] == "href":
self.urls.append(attr[1])


class Resolver:
def __init__(
self,
Expand Down Expand Up @@ -758,6 +774,42 @@ def _get_hashes_from_pypi(self, ireq):
)
return None

def _get_hashes_from_remote_index_urls(self, ireq, source):
pkg_url = f"{source['url']}/{ireq.name}/"
session = _get_requests_session(self.project.s.PIPENV_MAX_RETRIES)
try:
collected_hashes = set()
# Grab the hashes from the new warehouse API.
response = session.get(pkg_url, timeout=10)
# Create an instance of the parser
parser = PackageIndexHTMLParser()
# Feed the HTML to the parser
parser.feed(response.text)
# Extract hrefs
hrefs = parser.urls

version = ""
if ireq.specifier:
spec = next(iter(s for s in ireq.specifier), None)
if spec:
version = spec.version
for package_url in hrefs:
if version in package_url:
url_params = parse.urlparse(package_url).fragment
params_dict = parse.parse_qs(url_params)
if params_dict.get(FAVORITE_HASH):
collected_hashes.add(params_dict[FAVORITE_HASH][0])
return self.prepend_hash_types(collected_hashes, FAVORITE_HASH)
except (ValueError, KeyError, ConnectionError):
if self.project.s.is_verbose():
click.echo(
"{}: Error generating hash for {}".format(
click.style("Warning", bold=True, fg="red"), ireq.name
),
err=True,
)
return None

def collect_hashes(self, ireq):
link = ireq.link # Handle VCS and file links first
if link and (link.is_vcs or (link.is_file and link.is_existing_dir())):
Expand All @@ -773,10 +825,16 @@ def collect_hashes(self, ireq):
sources = list(
filter(lambda s: s.get("name") == self.index_lookup[ireq.name], sources)
)
if any(is_pypi_url(source["url"]) for source in sources):
hashes = self._get_hashes_from_pypi(ireq)
if hashes:
return hashes
source = sources[0] if len(sources) else None
if source:
if is_pypi_url(source["url"]):
hashes = self._get_hashes_from_pypi(ireq)
if hashes:
return hashes
else:
hashes = self._get_hashes_from_remote_index_urls(ireq, source)
if hashes:
return hashes

applicable_candidates = self.ignore_compatibility_finder.find_best_candidate(
ireq.name, ireq.specifier
Expand Down