Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PERF: download and compute hashes in chunks of 1MB, did you know the progress bar was 30% of the runtime! #12810

Merged
merged 1 commit into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions news/12810.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Improve download performance. Download packages and update the
progress bar in larger chunks of 256 kB, up from 10 kB.
Limit the progress bar to 5 refresh per second.
Improve hash performance. Read package files in larger chunks of 1 MB,
up from 8192 bytes.
2 changes: 1 addition & 1 deletion src/pip/_internal/cli/progress_bars.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def _rich_progress_bar(
TimeRemainingColumn(),
)

progress = Progress(*columns, refresh_per_second=30)
progress = Progress(*columns, refresh_per_second=5)
task_id = progress.add_task(" " * (get_indentation() + 2), total=total)
with progress:
for chunk in iterable:
Expand Down
6 changes: 3 additions & 3 deletions src/pip/_internal/network/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import os
from typing import Iterable, Optional, Tuple

from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response
from pip._vendor.requests.models import Response

from pip._internal.cli.progress_bars import get_download_progress_renderer
from pip._internal.exceptions import NetworkConnectionError
Expand Down Expand Up @@ -56,12 +56,12 @@ def _prepare_download(
show_progress = False
elif not total_length:
show_progress = True
elif total_length > (40 * 1000):
elif total_length > (512 * 1024):
show_progress = True
else:
show_progress = False

chunks = response_chunks(resp, CONTENT_CHUNK_SIZE)
chunks = response_chunks(resp)

if not show_progress:
return chunks
Expand Down
6 changes: 4 additions & 2 deletions src/pip/_internal/network/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Dict, Generator

from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response
from pip._vendor.requests.models import Response

from pip._internal.exceptions import NetworkConnectionError

Expand All @@ -25,6 +25,8 @@
# possible to make this work.
HEADERS: Dict[str, str] = {"Accept-Encoding": "identity"}

DOWNLOAD_CHUNK_SIZE = 256 * 1024


def raise_for_status(resp: Response) -> None:
http_error_msg = ""
Expand Down Expand Up @@ -55,7 +57,7 @@ def raise_for_status(resp: Response) -> None:


def response_chunks(
response: Response, chunk_size: int = CONTENT_CHUNK_SIZE
response: Response, chunk_size: int = DOWNLOAD_CHUNK_SIZE
uranusjr marked this conversation as resolved.
Show resolved Hide resolved
) -> Generator[bytes, None, None]:
"""Given a requests Response, provide the data chunks."""
try:
Expand Down
12 changes: 5 additions & 7 deletions src/pip/_internal/utils/misc.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import errno
import getpass
import hashlib
import io
import logging
import os
import posixpath
Expand Down Expand Up @@ -70,6 +69,8 @@
OnExc = Callable[[FunctionType, Path, BaseException], Any]
OnErr = Callable[[FunctionType, Path, ExcInfo], Any]

FILE_CHUNK_SIZE = 1024 * 1024


def get_pip_version() -> str:
pip_pkg_dir = os.path.join(os.path.dirname(__file__), "..", "..")
Expand Down Expand Up @@ -122,9 +123,7 @@ def get_prog() -> str:
# Retry every half second for up to 3 seconds
@retry(stop_after_delay=3, wait=0.5)
def rmtree(
dir: str,
ignore_errors: bool = False,
onexc: Optional[OnExc] = None,
dir: str, ignore_errors: bool = False, onexc: Optional[OnExc] = None
) -> None:
if ignore_errors:
onexc = _onerror_ignore
Expand Down Expand Up @@ -313,7 +312,7 @@ def is_installable_dir(path: str) -> bool:


def read_chunks(
file: BinaryIO, size: int = io.DEFAULT_BUFFER_SIZE
file: BinaryIO, size: int = FILE_CHUNK_SIZE
) -> Generator[bytes, None, None]:
"""Yield pieces of data from a file-like object until EOF."""
while True:
Expand Down Expand Up @@ -643,8 +642,7 @@ def pairwise(iterable: Iterable[Any]) -> Iterator[Tuple[Any, Any]]:


def partition(
pred: Callable[[T], bool],
iterable: Iterable[T],
pred: Callable[[T], bool], iterable: Iterable[T]
) -> Tuple[Iterable[T], Iterable[T]]:
"""
Use a predicate to partition entries into false entries and true entries,
Expand Down
Loading