Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Respect HF_HUB_OFFLINE for every http call #1899

Merged
merged 3 commits into from
Dec 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions docs/source/en/package_reference/environment_variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,11 @@ as `True` if its value is one of `{"1", "ON", "YES", "TRUE"}` (case-insensitive)

### HF_HUB_OFFLINE

If set, no HTTP calls will me made when trying to fetch files. Only files that are already
cached will be accessed. This is useful in case your network is slow and you don't care
about having absolutely the latest version of a file.
If set, no HTTP calls will me made to the Hugging Face Hub. If you try to download files, only the cached files will be accessed. If no cache file is detected, an error is raised This is useful in case your network is slow and you don't care about having the latest version of a file.

**Note:** even if the latest version of a file is cached, calling `hf_hub_download` still triggers
a HTTP request to check that a new version is not available. Setting `HF_HUB_OFFLINE=1` will
skip this call which speeds up your loading time.
If `HF_HUB_OFFLINE=1` is set as environment variable and you call any method of [`HfApi`], an [`~huggingface_hub.utils.OfflineModeIsEnabled`] exception will be raised.

**Note:** even if the latest version of a file is cached, calling `hf_hub_download` still triggers a HTTP request to check that a new version is not available. Setting `HF_HUB_OFFLINE=1` will skip this call which speeds up your loading time.

### HF_HUB_DISABLE_IMPLICIT_TOKEN

Expand Down
4 changes: 4 additions & 0 deletions docs/source/en/package_reference/utilities.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,10 @@ user as possible.

[[autodoc]] huggingface_hub.utils.LocalEntryNotFoundError

#### OfflineModeIsEnabled

[[autodoc]] huggingface_hub.utils.OfflineModeIsEnabled

## Telemetry

`huggingface_hub` includes an helper to send telemetry data. This information helps us debug issues and prioritize new features.
Expand Down
3 changes: 2 additions & 1 deletion src/huggingface_hub/_snapshot_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
DEFAULT_REVISION,
HF_HUB_CACHE,
HF_HUB_ENABLE_HF_TRANSFER,
HF_HUB_OFFLINE,
REPO_TYPES,
)
from .file_download import REGEX_COMMIT_HASH, hf_hub_download, repo_folder_name
Expand Down Expand Up @@ -161,7 +162,7 @@ def snapshot_download(
# appropriate folder in the cache
# If the specified revision is a commit hash, look inside "snapshots".
# If the specified revision is a branch or tag, look inside "refs".
if local_files_only:
if local_files_only or HF_HUB_OFFLINE:
if REGEX_COMMIT_HASH.match(revision):
commit_hash = revision
else:
Expand Down
27 changes: 4 additions & 23 deletions src/huggingface_hub/file_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
FileMetadataError,
GatedRepoError,
LocalEntryNotFoundError,
OfflineModeIsEnabled,
RepositoryNotFoundError,
RevisionNotFoundError,
SoftTemporaryDirectory,
Expand Down Expand Up @@ -361,28 +362,11 @@ def http_user_agent(
)


class OfflineModeIsEnabled(ConnectionError):
pass


def _raise_if_offline_mode_is_enabled(msg: Optional[str] = None):
"""Raise a OfflineModeIsEnabled error (subclass of ConnectionError) if
HF_HUB_OFFLINE is True."""
if constants.HF_HUB_OFFLINE:
raise OfflineModeIsEnabled(
"Offline mode is enabled." if msg is None else "Offline mode is enabled. " + str(msg)
)


def _request_wrapper(
method: HTTP_METHOD_T, url: str, *, follow_relative_redirects: bool = False, **params
) -> requests.Response:
"""Wrapper around requests methods to add several features.

What it does:
1. Ensure offline mode is disabled (env variable `HF_HUB_OFFLINE` not set to 1). If enabled, a
`OfflineModeIsEnabled` exception is raised.
2. Follow relative redirects if `follow_relative_redirects=True` even when `allow_redirection=False`.
"""Wrapper around requests methods to follow relative redirects if `follow_relative_redirects=True` even when
`allow_redirection=False`.

Args:
method (`str`):
Expand All @@ -396,10 +380,7 @@ def _request_wrapper(
**params (`dict`, *optional*):
Params to pass to `requests.request`.
"""
# 1. Check online mode
_raise_if_offline_mode_is_enabled(f"Tried to reach {url}")

# 2. Force relative redirection
# Recursively follow relative redirects
if follow_relative_redirects:
response = _request_wrapper(
method=method,
Expand Down
2 changes: 1 addition & 1 deletion src/huggingface_hub/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
from ._git_credential import list_credential_helpers, set_git_credential, unset_git_credential
from ._headers import build_hf_headers, get_token_to_send, LocalTokenNotFoundError
from ._hf_folder import HfFolder
from ._http import configure_http_backend, get_session, http_backoff, reset_sessions
from ._http import configure_http_backend, get_session, http_backoff, reset_sessions, OfflineModeIsEnabled
from ._pagination import paginate
from ._paths import filter_repo_objects, IGNORE_GIT_FOLDER_PATTERNS
from ._experimental import experimental
Expand Down
20 changes: 18 additions & 2 deletions src/huggingface_hub/utils/_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from requests.adapters import HTTPAdapter
from requests.models import PreparedRequest

from .. import constants
from . import logging
from ._typing import HTTP_METHOD_T

Expand All @@ -40,6 +41,10 @@
X_REQUEST_ID = "x-request-id"


class OfflineModeIsEnabled(ConnectionError):
"""Raised when a request is made but `HF_HUB_OFFLINE=1` is set as environment variable."""


class UniqueRequestIdAdapter(HTTPAdapter):
X_AMZN_TRACE_ID = "X-Amzn-Trace-Id"

Expand Down Expand Up @@ -68,10 +73,21 @@ def send(self, request: PreparedRequest, *args, **kwargs) -> Response:
raise


class OfflineAdapter(HTTPAdapter):
def send(self, request: PreparedRequest, *args, **kwargs) -> Response:
raise OfflineModeIsEnabled(
f"Cannot reach {request.url}: offline mode is enabled. To disable it, please unset the `HF_HUB_OFFLINE` environment variable."
)


def _default_backend_factory() -> requests.Session:
session = requests.Session()
session.mount("http://", UniqueRequestIdAdapter())
session.mount("https://", UniqueRequestIdAdapter())
if constants.HF_HUB_OFFLINE:
session.mount("http://", OfflineAdapter())
session.mount("https://", OfflineAdapter())
else:
session.mount("http://", UniqueRequestIdAdapter())
session.mount("https://", UniqueRequestIdAdapter())
return session


Expand Down
21 changes: 20 additions & 1 deletion tests/test_utils_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,13 @@
from requests import ConnectTimeout, HTTPError

from huggingface_hub.constants import ENDPOINT
from huggingface_hub.utils._http import configure_http_backend, get_session, http_backoff
from huggingface_hub.utils._http import (
OfflineModeIsEnabled,
configure_http_backend,
get_session,
http_backoff,
reset_sessions,
)


URL = "https://www.google.com"
Expand Down Expand Up @@ -234,6 +240,19 @@ def _child_target():
self.assertNotEqual(repr(main_session), child_session)


class OfflineModeSessionTest(unittest.TestCase):
def tearDown(self) -> None:
reset_sessions()
return super().tearDown()

@patch("huggingface_hub.constants.HF_HUB_OFFLINE", True)
def test_offline_mode(self):
configure_http_backend()
session = get_session()
with self.assertRaises(OfflineModeIsEnabled):
session.get("https://huggingface.co")


class TestUniqueRequestId(unittest.TestCase):
api_endpoint = ENDPOINT + "/api/tasks" # any endpoint is fine

Expand Down
4 changes: 3 additions & 1 deletion tests/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import pytest
import requests

from huggingface_hub.utils import is_gradio_available, logging
from huggingface_hub.utils import is_gradio_available, logging, reset_sessions
from tests.testing_constants import ENDPOINT_PRODUCTION, ENDPOINT_PRODUCTION_URL_SCHEME


Expand Down Expand Up @@ -193,7 +193,9 @@ def offline_socket(*args, **kwargs):
yield
elif mode is OfflineSimulationMode.HF_HUB_OFFLINE_SET_TO_1:
with patch("huggingface_hub.constants.HF_HUB_OFFLINE", True):
reset_sessions()
yield
reset_sessions()
else:
raise ValueError("Please use a value from the OfflineSimulationMode enum.")

Expand Down
Loading