-
-
Notifications
You must be signed in to change notification settings - Fork 32.2k
GH-80789: Bundle ensurepip
wheels at build time
#109130
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
0d8c94f
c2c9a55
76de0b8
252ee8e
800c487
4f0b3a2
0aaf135
a163396
ccaad2d
db6f0db
8fae12a
161f304
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
* | ||
!.gitignore | ||
!README.rst |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
Bundling ensurepip wheels | ||
========================= | ||
|
||
``ensurepip`` expects wheels for ``pip`` to be located in this directory. | ||
These need to be 'bundled' by each distributor of Python, | ||
ordinarily by running the ``Tools/build/bundle_ensurepip_wheels.py`` script. | ||
|
||
This will download the version of ``pip`` specified by the | ||
``ensurepip._PIP_VERSION`` variable to this directory, | ||
and verify it against the stored SHA-256 checksum. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -76,6 +76,12 @@ def setUp(self): | |
patched_os.path = os.path | ||
self.os_environ = patched_os.environ = os.environ.copy() | ||
|
||
# ensure the pip wheel exists | ||
pip_filename = os.path.join(test.support.STDLIB_DIR, 'ensurepip', '_bundled', | ||
f'pip-{ensurepip._PIP_VERSION}-py3-none-any.whl') | ||
if not os.path.isfile(pip_filename): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I feel like if it didn't exist, there should be some code to clean it up after testing so that the dummy file doesn't get forgotten on disk. Tests should avoid side effects... |
||
open(pip_filename, "wb").close() | ||
|
||
|
||
class TestBootstrap(EnsurepipMixin, unittest.TestCase): | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
import sys | ||
import tempfile | ||
import unittest | ||
import unittest.mock | ||
import urllib.request | ||
from hashlib import sha256 | ||
from io import BytesIO | ||
from pathlib import Path | ||
from random import randbytes | ||
from test import support, test_tools | ||
from test.support import captured_stderr | ||
|
||
import ensurepip | ||
|
||
test_tools.skip_if_missing('build') | ||
with test_tools.imports_under_tool('build'): | ||
import bundle_ensurepip_wheels as bew | ||
|
||
|
||
# Disable fancy GitHub actions output during the tests | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There should be a test for enabled GHA, just for those helpers, then. It'd be sad not to get coverage there. |
||
@unittest.mock.patch.object(bew, 'GITHUB_ACTIONS', False) | ||
class TestBundle(unittest.TestCase): | ||
contents = randbytes(512) | ||
checksum = sha256(contents).hexdigest() | ||
projects = [('pip', '1.2.3', checksum)] | ||
pip_filename = "pip-1.2.3-py3-none-any.whl" | ||
|
||
def test__wheel_url(self): | ||
self.assertEqual( | ||
bew._wheel_url('pip', '1.2.3'), | ||
'https://files.pythonhosted.org/packages/py3/p/pip/pip-1.2.3-py3-none-any.whl', | ||
) | ||
|
||
def test__get_projects(self): | ||
self.assertListEqual( | ||
bew._get_projects(), | ||
[('pip', ensurepip._PIP_VERSION, ensurepip._PIP_SHA_256)], | ||
) | ||
|
||
def test__is_valid_wheel(self): | ||
self.assertTrue(bew._is_valid_wheel(self.contents, checksum=self.checksum)) | ||
|
||
def test_cached_wheel(self): | ||
with tempfile.TemporaryDirectory() as tmpdir: | ||
tmpdir = Path(tmpdir) | ||
(tmpdir / self.pip_filename).write_bytes(self.contents) | ||
with ( | ||
captured_stderr() as stderr, | ||
unittest.mock.patch.object(bew, 'WHEEL_DIR', tmpdir), | ||
unittest.mock.patch.object(bew, '_get_projects', lambda: self.projects), | ||
): | ||
exit_code = bew.download_wheels() | ||
self.assertEqual(exit_code, 0) | ||
stderr = stderr.getvalue() | ||
self.assertIn("A valid 'pip' wheel already exists!", stderr) | ||
|
||
def test_invalid_checksum(self): | ||
class MockedHTTPSOpener: | ||
@staticmethod | ||
def open(url, data, timeout): | ||
assert 'pip' in url | ||
assert data is None # HTTP GET | ||
hugovk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# Intentionally corrupt the wheel: | ||
return BytesIO(self.contents[:-1]) | ||
|
||
with ( | ||
captured_stderr() as stderr, | ||
unittest.mock.patch.object(urllib.request, '_opener', None), | ||
unittest.mock.patch.object(bew, '_get_projects', lambda: self.projects), | ||
): | ||
urllib.request.install_opener(MockedHTTPSOpener()) | ||
exit_code = bew.download_wheels() | ||
self.assertEqual(exit_code, 1) | ||
stderr = stderr.getvalue() | ||
self.assertIn("Failed to validate checksum for", stderr) | ||
|
||
def test_download_wheel(self): | ||
class MockedHTTPSOpener: | ||
@staticmethod | ||
def open(url, data, timeout): | ||
assert 'pip' in url | ||
assert data is None # HTTP GET | ||
hugovk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return BytesIO(self.contents) | ||
|
||
with tempfile.TemporaryDirectory() as tmpdir: | ||
tmpdir = Path(tmpdir) | ||
with ( | ||
captured_stderr() as stderr, | ||
unittest.mock.patch.object(urllib.request, '_opener', None), | ||
unittest.mock.patch.object(bew, 'WHEEL_DIR', tmpdir), | ||
unittest.mock.patch.object(bew, '_get_projects', lambda: self.projects), | ||
): | ||
urllib.request.install_opener(MockedHTTPSOpener()) | ||
exit_code = bew.download_wheels() | ||
self.assertEqual((tmpdir / self.pip_filename).read_bytes(), self.contents) | ||
self.assertEqual(exit_code, 0) | ||
stderr = stderr.getvalue() | ||
self.assertIn("Downloading 'https://files.pythonhosted.org/packages/py3/p/pip/pip-1.2.3-py3-none-any.whl'", | ||
stderr) | ||
self.assertIn("Writing 'pip-1.2.3-py3-none-any.whl' to disk", stderr) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Replaced vendored ``pip`` wheels for :mod:`ensurepip` with a new bundler script, | ||
:file:`Tools/build/bundle_ensurepip_wheels.py`, to be run by distributors. |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,108 @@ | ||||||
#!/usr/bin/env python3 | ||||||
|
||||||
""" | ||||||
Download wheels for 'ensurepip' packages from PyPI. | ||||||
|
||||||
When GitHub Actions executes the script, output is formatted accordingly. | ||||||
https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-notice-message | ||||||
""" | ||||||
|
||||||
import importlib.util | ||||||
import os | ||||||
import sys | ||||||
from hashlib import sha256 | ||||||
from pathlib import Path | ||||||
from urllib.error import URLError | ||||||
from urllib.request import urlopen | ||||||
|
||||||
HOST = 'https://files.pythonhosted.org' | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we name this something like a |
||||||
ENSURE_PIP_ROOT = Path(__file__, "..", "..", "..", "Lib", "ensurepip").resolve() | ||||||
WHEEL_DIR = ENSURE_PIP_ROOT / "_bundled" | ||||||
ENSURE_PIP_INIT = ENSURE_PIP_ROOT / "__init__.py" | ||||||
GITHUB_ACTIONS = "GITHUB_ACTIONS" in os.environ | ||||||
|
||||||
|
||||||
def print_notice(message: str) -> None: | ||||||
if GITHUB_ACTIONS: | ||||||
print(f"::notice::{message}", end="\n\n") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: this also works if output to stderr FYI. |
||||||
else: | ||||||
print(message, file=sys.stderr) | ||||||
|
||||||
|
||||||
def print_error(message: str) -> None: | ||||||
if GITHUB_ACTIONS: | ||||||
print(f"::error::{message}", end="\n\n") | ||||||
else: | ||||||
print(message, file=sys.stderr) | ||||||
|
||||||
|
||||||
def download_wheels() -> int: | ||||||
"""Download wheels into bundle if they are not there yet.""" | ||||||
|
||||||
try: | ||||||
projects = _get_projects() | ||||||
except (AttributeError, TypeError): | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think there's an abstraction leak here: it's hard to guess from looking at the |
||||||
print_error(f"Could not find '_PROJECTS' in {ENSURE_PIP_INIT}.") | ||||||
return 1 | ||||||
|
||||||
errors = 0 | ||||||
for name, version, checksum in projects: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FWIW with #109245, looping will stop making sense here. So maybe you could start working with just There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, let's keep them separate. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was kinda assuming that the simplification PR would get merged first, in which case, this one would have to adapt.. |
||||||
wheel_filename = f'{name}-{version}-py3-none-any.whl' | ||||||
wheel_path = WHEEL_DIR / wheel_filename | ||||||
if wheel_path.exists(): | ||||||
if _is_valid_wheel(wheel_path.read_bytes(), checksum=checksum): | ||||||
print_notice(f"A valid '{name}' wheel already exists!") | ||||||
continue | ||||||
else: | ||||||
print_error(f"An invalid '{name}' wheel exists.") | ||||||
os.remove(wheel_path) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason not to use pathlib's method? Seeing that it's already used everywhere.. |
||||||
|
||||||
wheel_url = _wheel_url(name, version) | ||||||
print_notice(f"Downloading {wheel_url!r}") | ||||||
try: | ||||||
with urlopen(wheel_url) as response: | ||||||
whl = response.read() | ||||||
except URLError as exc: | ||||||
print_error(f"Failed to download {wheel_url!r}: {exc}") | ||||||
errors = 1 | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was this meant to be a counter?
Suggested change
|
||||||
continue | ||||||
if not _is_valid_wheel(whl, checksum=checksum): | ||||||
print_error(f"Failed to validate checksum for {wheel_url!r}!") | ||||||
errors = 1 | ||||||
continue | ||||||
|
||||||
print_notice(f"Writing {wheel_filename!r} to disk") | ||||||
wheel_path.write_bytes(whl) | ||||||
|
||||||
return errors | ||||||
|
||||||
|
||||||
def _get_projects() -> list[tuple[str, str, str]]: | ||||||
spec = importlib.util.spec_from_file_location("ensurepip", ENSURE_PIP_INIT) | ||||||
ensurepip = importlib.util.module_from_spec(spec) | ||||||
spec.loader.exec_module(ensurepip) | ||||||
return ensurepip._PROJECTS | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @AA-Turner I believe this would need to be updated after #109245, and it'll probably let you work with simpler structures. |
||||||
|
||||||
|
||||||
def _wheel_url(name: str, version: str, /) -> str: | ||||||
# The following code was adapted from | ||||||
# https://warehouse.pypa.io/api-reference/integration-guide.html#predictable-urls | ||||||
# | ||||||
# We rely on the fact that pip is, as a matter of policy, portable code. | ||||||
# We can therefore guarantee that we'll always have the values: | ||||||
# build_tag = "" | ||||||
# python_tag = "py3" | ||||||
# abi_tag = "none" | ||||||
# platform_tag = "any" | ||||||
|
||||||
# https://www.python.org/dev/peps/pep-0491/#file-name-convention | ||||||
filename = f'{name}-{version}-py3-none-any.whl' | ||||||
return f'{HOST}/packages/py3/{name[0]}/{name}/{filename}' | ||||||
|
||||||
|
||||||
def _is_valid_wheel(content: bytes, *, checksum: str) -> bool: | ||||||
return checksum == sha256(content, usedforsecurity=False).hexdigest() | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @AA-Turner I already know that this effort is going to be rejected for now, but out of curiosity — what's the motivation for setting |
||||||
|
||||||
|
||||||
if __name__ == '__main__': | ||||||
raise SystemExit(download_wheels()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This would probably be clearer: