Skip to content

Commit

Permalink
harden lazy wheel wheel error handling
Browse files Browse the repository at this point in the history
This change attempts to provide a catch-all safety net for circumstances
where an index server is behaving badly when ranged requests are made.

Additionally, this change also helps guard against  missing
"Content-Range" headers in specific scenarios.
  • Loading branch information
abn committed Feb 28, 2024
1 parent f66f8ab commit 9f39259
Show file tree
Hide file tree
Showing 6 changed files with 222 additions and 44 deletions.
39 changes: 35 additions & 4 deletions src/poetry/inspection/lazy_wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,19 @@
logger = logging.getLogger(__name__)


class HTTPRangeRequestUnsupported(Exception):
class LazyWheelUnsupportedError(Exception):
"""Raised when a lazy wheel is unsupported."""


class HTTPRangeRequestUnsupported(LazyWheelUnsupportedError):
"""Raised when the remote server appears unable to support byte ranges."""


class UnsupportedWheel(Exception):
class UnsupportedWheel(LazyWheelUnsupportedError):
"""Unsupported wheel."""


class InvalidWheel(Exception):
class InvalidWheel(LazyWheelUnsupportedError):
"""Invalid (e.g. corrupt) wheel."""

def __init__(self, location: str, name: str) -> None:
Expand Down Expand Up @@ -85,6 +89,24 @@ def metadata_from_wheel_url(
# themselves are invalid, not because we've messed up our bookkeeping
# and produced an invalid file.
raise InvalidWheel(url, name)
except Exception as e:
if isinstance(e, LazyWheelUnsupportedError):
# this is expected when the code handles issues with lazy wheel metadata retrieval correctly
raise e

logger.debug(
"There was an unexpected %s when handling lazy wheel metadata retrieval for %s from %s: %s",
type(e).__name__,
name,
url,
e,
)

# Catch all exception to handle any issues that may have occurred during
# attempts to use Lazy Wheel.
raise LazyWheelUnsupportedError(
f"Attempts to use lazy wheel metadata retrieval for {name} from {url} failed"
) from e


class MergeIntervals:
Expand Down Expand Up @@ -590,6 +612,12 @@ def _try_initial_chunk_request(
f"did not receive partial content: got code {code}"
)

if "Content-Range" not in tail.headers:
raise LazyWheelUnsupportedError(
f"file length cannot be determined for {self._url}, "
f"did not receive content range header from server"
)

file_length = self._parse_full_length_from_content_range(
tail.headers["Content-Range"]
)
Expand All @@ -614,7 +642,10 @@ def _extract_content_length(

# This indicates that the requested range from the end was larger than the
# actual file size: https://www.rfc-editor.org/rfc/rfc9110#status.416.
if code == codes.requested_range_not_satisfiable:
if (
code == codes.requested_range_not_satisfiable
and "Content-Range" in resp.headers
):
# In this case, we don't have any file content yet, but we do know the
# size the file will be, so we can return that and exit here.
assert resp is not None
Expand Down
4 changes: 2 additions & 2 deletions src/poetry/repositories/http_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from poetry.config.config import Config
from poetry.inspection.info import PackageInfo
from poetry.inspection.lazy_wheel import HTTPRangeRequestUnsupported
from poetry.inspection.lazy_wheel import LazyWheelUnsupportedError
from poetry.inspection.lazy_wheel import metadata_from_wheel_url
from poetry.repositories.cached_repository import CachedRepository
from poetry.repositories.exceptions import PackageNotFound
Expand Down Expand Up @@ -122,7 +122,7 @@ def _get_info_from_wheel(self, link: Link) -> PackageInfo:
package_info = PackageInfo.from_metadata(
metadata_from_wheel_url(link.filename, link.url, self.session)
)
except HTTPRangeRequestUnsupported:
except LazyWheelUnsupportedError:
# Do not set to False if we already know that the domain supports
# range requests for some URLs!
raise_accepts_ranges = False
Expand Down
12 changes: 12 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from tests.helpers import TestLocker
from tests.helpers import TestRepository
from tests.helpers import get_package
from tests.helpers import http_setup_redirect
from tests.helpers import isolated_environment
from tests.helpers import mock_clone
from tests.helpers import mock_download
Expand Down Expand Up @@ -321,6 +322,11 @@ def http() -> Iterator[type[httpretty.httpretty]]:
yield httpretty


@pytest.fixture
def http_redirector(http: type[httpretty.httpretty]) -> None:
http_setup_redirect(http, http.HEAD, http.GET, http.PUT, http.POST)


@pytest.fixture
def project_root() -> Path:
return Path(__file__).parent.parent
Expand Down Expand Up @@ -513,3 +519,9 @@ def venv_flags_default() -> dict[str, bool]:
"no-pip": False,
"no-setuptools": False,
}


@pytest.fixture(autouse=(os.name == "nt"))
def httpretty_windows_mock_urllib3_wait_for_socket(mocker: MockerFixture) -> None:
# this is a workaround for https://github.com/gabrielfalcao/HTTPretty/issues/442
mocker.patch("urllib3.util.wait.select_wait_for_socket", returns=True)
26 changes: 26 additions & 0 deletions tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
from typing import Any
from typing import Mapping

import httpretty

from httpretty.core import HTTPrettyRequest
from poetry.core.constraints.version import Version
from poetry.core.packages.dependency import Dependency
from pytest_mock import MockerFixture
Expand All @@ -38,6 +41,7 @@
from poetry.installation.operations.operation import Operation
from poetry.poetry import Poetry
from poetry.utils.authenticator import Authenticator
from tests.types import HTTPrettyResponse

FIXTURE_PATH = Path(__file__).parent / "fixtures"

Expand Down Expand Up @@ -321,3 +325,25 @@ def recurse_keys(obj: Mapping[str, Any]) -> Iterator[tuple[list[str], Any]]:
yield ([], obj)

return {delimiter.join(path): value for path, value in recurse_keys(obj)}


def http_setup_redirect(
http: type[httpretty.httpretty], *methods: str, status_code: int = 301
) -> None:
redirect_uri_regex = re.compile("^(?P<protocol>https?)://redirect.(?P<uri>.*)$")

def redirect_request_callback(
request: HTTPrettyRequest, uri: str, headers: dict[str, Any]
) -> HTTPrettyResponse:
redirect_uri_match = redirect_uri_regex.match(uri)
assert redirect_uri_match is not None
redirect_uri = f"{redirect_uri_match.group('protocol')}://{redirect_uri_match.group('uri')}"
return status_code, {"Location": redirect_uri}, b""

for method in methods:
http.register_uri(
method,
redirect_uri_regex,
status=status_code,
body=redirect_request_callback,
)
173 changes: 135 additions & 38 deletions tests/inspection/test_lazy_wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,31 @@
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Any
from typing import Dict
from typing import Protocol
from typing import Tuple
from urllib.parse import urlparse

import httpretty
import pytest
import requests

from requests import codes

from poetry.inspection.lazy_wheel import HTTPRangeRequestUnsupported
from poetry.inspection.lazy_wheel import InvalidWheel
from poetry.inspection.lazy_wheel import LazyWheelUnsupportedError
from poetry.inspection.lazy_wheel import metadata_from_wheel_url
from tests.helpers import http_setup_redirect


if TYPE_CHECKING:
from collections.abc import Callable
import httpretty

from httpretty.core import HTTPrettyRequest
from pytest_mock import MockerFixture

from tests.types import FixtureDirGetter

HTTPrettyResponse = Tuple[int, Dict[str, Any], bytes] # status code, headers, body
HTTPrettyRequestCallback = Callable[
[HTTPrettyRequest, str, Dict[str, Any]], HTTPrettyResponse
]
from tests.types import HTTPPrettyRequestCallbackWrapper
from tests.types import HTTPrettyRequestCallback
from tests.types import HTTPrettyResponse

class RequestCallbackFactory(Protocol):
def __call__(
Expand All @@ -41,6 +39,17 @@ def __call__(
negative_offset_error: tuple[int, bytes] | None = None,
) -> HTTPrettyRequestCallback: ...

class AssertMetadataFromWheelUrl(Protocol):
def __call__(
self,
*,
accept_ranges: str | None = "bytes",
negative_offset_error: tuple[int, bytes] | None = None,
expected_requests: int = 3,
request_callback_wrapper: HTTPPrettyRequestCallbackWrapper | None = None,
redirect: bool = True,
) -> None: ...


NEGATIVE_OFFSET_AS_POSITIVE = -1

Expand Down Expand Up @@ -152,6 +161,55 @@ def handle_request(
return _factory


@pytest.fixture
def assert_metadata_from_wheel_url(
http: type[httpretty.httpretty],
handle_request_factory: RequestCallbackFactory,
) -> AssertMetadataFromWheelUrl:
def _assertion(
*,
accept_ranges: str | None = "bytes",
negative_offset_error: tuple[int, bytes] | None = None,
expected_requests: int = 3,
request_callback_wrapper: HTTPPrettyRequestCallbackWrapper | None = None,
redirect: bool = False,
) -> None:
latest_requests = http.latest_requests()
latest_requests.clear()

domain = (
f"lazy-wheel-{negative_offset_error[0] if negative_offset_error else 0}.com"
)
uri_regex = re.compile(f"^https://{domain}/.*$")
request_callback = handle_request_factory(
accept_ranges=accept_ranges, negative_offset_error=negative_offset_error
)
if request_callback_wrapper is not None:
request_callback = request_callback_wrapper(request_callback)

http.register_uri(http.GET, uri_regex, body=request_callback)
http.register_uri(http.HEAD, uri_regex, body=request_callback)

if redirect:
http_setup_redirect(http, http.GET, http.HEAD)

url_prefix = "redirect." if redirect else ""
url = f"https://{url_prefix}{domain}/poetry_core-1.5.0-py3-none-any.whl"

metadata = metadata_from_wheel_url("poetry-core", url, requests.Session())

assert metadata["name"] == "poetry-core"
assert metadata["version"] == "1.5.0"
assert metadata["author"] == "Sébastien Eustace"
assert metadata["requires_dist"] == [
'importlib-metadata (>=1.7.0) ; python_version < "3.8"'
]

assert len(latest_requests) == expected_requests

return _assertion


@pytest.mark.parametrize(
"negative_offset_error",
[
Expand All @@ -165,31 +223,9 @@ def handle_request(
],
)
def test_metadata_from_wheel_url(
http: type[httpretty.httpretty],
handle_request_factory: RequestCallbackFactory,
assert_metadata_from_wheel_url: AssertMetadataFromWheelUrl,
negative_offset_error: tuple[int, bytes] | None,
) -> None:
domain = (
f"lazy-wheel-{negative_offset_error[0] if negative_offset_error else 0}.com"
)
uri_regex = re.compile(f"^https://{domain}/.*$")
request_callback = handle_request_factory(
negative_offset_error=negative_offset_error
)
http.register_uri(http.GET, uri_regex, body=request_callback)
http.register_uri(http.HEAD, uri_regex, body=request_callback)

url = f"https://{domain}/poetry_core-1.5.0-py3-none-any.whl"

metadata = metadata_from_wheel_url("poetry-core", url, requests.Session())

assert metadata["name"] == "poetry-core"
assert metadata["version"] == "1.5.0"
assert metadata["author"] == "Sébastien Eustace"
assert metadata["requires_dist"] == [
'importlib-metadata (>=1.7.0) ; python_version < "3.8"'
]

# negative offsets supported:
# 1. end of central directory
# 2. whole central directory
Expand All @@ -207,15 +243,60 @@ def test_metadata_from_wheel_url(
expected_requests += 1
else:
expected_requests += 2
latest_requests = http.latest_requests()
assert len(latest_requests) == expected_requests

assert_metadata_from_wheel_url(
negative_offset_error=negative_offset_error, expected_requests=expected_requests
)

# second wheel -> one less request if negative offsets are not supported
latest_requests.clear()
metadata_from_wheel_url("poetry-core", url, requests.Session())
expected_requests = min(expected_requests, 4)
latest_requests = httpretty.latest_requests()
assert len(latest_requests) == expected_requests
assert_metadata_from_wheel_url(
negative_offset_error=negative_offset_error, expected_requests=expected_requests
)


def test_metadata_from_wheel_url_416_missing_content_range(
assert_metadata_from_wheel_url: AssertMetadataFromWheelUrl,
) -> None:
def request_callback_wrapper(
request_callback: HTTPrettyRequestCallback,
) -> HTTPrettyRequestCallback:
def _wrapped(
request: HTTPrettyRequest, uri: str, response_headers: dict[str, Any]
) -> HTTPrettyResponse:
status_code, response_headers, body = request_callback(
request, uri, response_headers
)
return (
status_code,
{
header: response_headers[header]
for header in response_headers
if header.lower() != "content-range"
},
body,
)

return _wrapped

assert_metadata_from_wheel_url(
negative_offset_error=(
codes.requested_range_not_satisfiable,
b"Requested range not satisfiable",
),
expected_requests=5,
request_callback_wrapper=request_callback_wrapper,
)


def test_metadata_from_wheel_url_with_redirect(
assert_metadata_from_wheel_url: AssertMetadataFromWheelUrl,
) -> None:
assert_metadata_from_wheel_url(
negative_offset_error=None,
expected_requests=6,
redirect=True,
)


@pytest.mark.parametrize("negative_offset_as_positive", [False, True])
Expand Down Expand Up @@ -320,3 +401,19 @@ def test_metadata_from_wheel_url_invalid_wheel(
latest_requests = http.latest_requests()
assert len(latest_requests) == 1
assert latest_requests[0].method == "GET"


def test_metadata_from_wheel_url_handles_unexpected_errors(
mocker: MockerFixture,
) -> None:
mocker.patch(
"poetry.inspection.lazy_wheel.LazyWheelOverHTTP.read_metadata",
side_effect=RuntimeError(),
)

with pytest.raises(LazyWheelUnsupportedError):
metadata_from_wheel_url(
"demo-missing-dist-info",
"https://runtime-error.com/demo_missing_dist_info-0.1.0-py2.py3-none-any.whl",
requests.Session(),
)
Loading

0 comments on commit 9f39259

Please sign in to comment.