Merge branch 'main' into large-upload-cli

huggingface · Aug 29, 2024 · cac2b50 · cac2b50
2 parents 3e2db8e + 893e889
commit cac2b50
Show file tree

Hide file tree

Showing 54 changed files with 1,691 additions and 1,272 deletions.
diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
@@ -26,7 +26,8 @@ jobs:
           [
             "Repository only",
             "Everything else",
-            "torch",
+            "torch_1.11",
+            "torch_latest",
           ]
         include:
           - python-version: "3.11" # LFS not ran on 3.8
@@ -71,8 +72,18 @@ jobs:
               git config --global user.name "ci"
               ;;
 
-            fastai | torch)
-              uv pip install "huggingface_hub[${{ matrix.test_name }}] @ ."
+            fastai)
+              uv pip install "huggingface_hub[fastai] @ ."
+              ;;
+
+            torch_latest)
+              uv pip install "huggingface_hub[torch] @ ."
+              uv pip install --upgrade torch
+              ;;
+
+            torch_1.11)
+              uv pip install "huggingface_hub[torch] @ ."
+              uv pip install torch~=1.11
               ;;
 
             tensorflow)
@@ -121,7 +132,7 @@ jobs:
               eval "$PYTEST ../tests/test_serialization.py"
             ;;
 
-            torch)
+            torch_1.11 | torch_latest)
             eval "$PYTEST ../tests/test_hub_mixin*"
             eval "$PYTEST ../tests/test_serialization.py"
             ;;

diff --git a/README.md b/README.md
@@ -90,7 +90,7 @@ Files will be downloaded in a local cache folder. More details in [this guide](h
 
 ### Login
 
-The Hugging Face Hub uses tokens to authenticate applications (see [docs](https://huggingface.co/docs/hub/security-tokens)). To login your machine, run the following CLI:
+The Hugging Face Hub uses tokens to authenticate applications (see [docs](https://huggingface.co/docs/hub/security-tokens)). To log in your machine, run the following CLI:
 
 ```bash
 huggingface-cli login

diff --git a/contrib/conftest.py b/contrib/conftest.py
@@ -21,7 +21,7 @@ def user() -> str:
 
 @pytest.fixture(autouse=True, scope="session")
 def login_as_dummy_user(token: str) -> Generator:
-    """Login with dummy user token."""
+    """Log in with dummy user token."""
     # Cannot use `monkeypatch` fixture since we want it to be "session-scoped"
     old_token = os.environ["HF_TOKEN"]
     os.environ["HF_TOKEN"] = token

diff --git a/contrib/spacy/test_spacy.py b/contrib/spacy/test_spacy.py
@@ -3,7 +3,7 @@
 from spacy_huggingface_hub import push
 
 from huggingface_hub import delete_repo, hf_hub_download, model_info
-from huggingface_hub.utils import HfHubHTTPError
+from huggingface_hub.errors import HfHubHTTPError
 
 from ..utils import production_endpoint
 

diff --git a/docs/source/en/guides/cli.md b/docs/source/en/guides/cli.md
@@ -101,7 +101,7 @@ _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|
 _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
 _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|
 
-To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
+To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
 Token:
 Add token as git credential? (Y/n)
 Token is valid (permission: write).
@@ -226,7 +226,7 @@ The examples above show how to download from the latest commit on the main branc
 
 The recommended (and default) way to download files from the Hub is to use the cache-system. However, in some cases you want to download files and move them to a specific folder. This is useful to get a workflow closer to what git commands offer. You can do that using the `--local-dir` option.
 
-A `./huggingface/` folder is created at the root of your local directory containing metadata about the downloaded files. This prevents re-downloading files if they're already up-to-date. If the metadata has changed, then the new file version is downloaded. This makes the `local-dir` optimized for pulling only the latest changes.
+A `.cache/huggingface/` folder is created at the root of your local directory containing metadata about the downloaded files. This prevents re-downloading files if they're already up-to-date. If the metadata has changed, then the new file version is downloaded. This makes the `local-dir` optimized for pulling only the latest changes.
 
 <Tip>
 
@@ -451,7 +451,7 @@ Files correctly deleted from repo. Commit: https://huggingface.co/Wauplin/my-coo
 
 Use Unix-style wildcards to delete sets of files: 
 ```bash
->>> huggingface-cli repo-files Wauplin/my-cool-model delete *.txt folder/*.bin 
+>>> huggingface-cli repo-files Wauplin/my-cool-model delete "*.txt" "folder/*.bin"
 Files correctly deleted from repo. Commit: https://huggingface.co/Wauplin/my-cool-mo...
 ```
 

diff --git a/docs/source/en/guides/download.md b/docs/source/en/guides/download.md
@@ -132,7 +132,7 @@ By default, we recommend using the [cache system](./manage-cache) to download fi
 
 However, if you need to download files to a specific folder, you can pass a `local_dir` parameter to the download function. This is useful to get a workflow closer to what the `git` command offers. The downloaded files will maintain their original file structure within the specified folder. For example, if `filename="data/train.csv"` and `local_dir="path/to/folder"`, the resulting filepath will be `"path/to/folder/data/train.csv"`.
 
-A `./huggingface/` folder is created at the root of your local directory containing metadata about the downloaded files. This prevents re-downloading files if they're already up-to-date. If the metadata has changed, then the new file version is downloaded. This makes the `local_dir` optimized for pulling only the latest changes.
+A `.cache/huggingface/` folder is created at the root of your local directory containing metadata about the downloaded files. This prevents re-downloading files if they're already up-to-date. If the metadata has changed, then the new file version is downloaded. This makes the `local_dir` optimized for pulling only the latest changes.
 
 After completing the download, you can safely remove the `.cache/huggingface/` folder if you no longer need it. However, be aware that re-running your script without this folder may result in longer recovery times, as metadata will be lost. Rest assured that your local data will remain intact and unaffected.
 

diff --git a/docs/source/en/guides/hf_file_system.md b/docs/source/en/guides/hf_file_system.md
@@ -103,11 +103,11 @@ The same workflow can also be used for [Dask](https://docs.dask.org/en/stable/ho
 
 In many cases, you must be logged in with a Hugging Face account to interact with the Hub. Refer to the [Authentication](../quick-start#authentication) section of the documentation to learn more about authentication methods on the Hub.
 
-It is also possible to login programmatically by passing your `token` as an argument to [`HfFileSystem`]:
+It is also possible to log in programmatically by passing your `token` as an argument to [`HfFileSystem`]:
 
 ```python
 >>> from huggingface_hub import HfFileSystem
 >>> fs = HfFileSystem(token=token)
 ```
 
-If you login this way, be careful not to accidentally leak the token when sharing your source code!
+If you log in this way, be careful not to accidentally leak the token when sharing your source code!
diff --git a/docs/source/en/guides/search.md b/docs/source/en/guides/search.md
@@ -57,5 +57,5 @@ the following example fetches the top 5 most downloaded datasets on the Hub:
 
 
 
-To explore available filter on the Hub, visit [models](https://huggingface.co/models) and [datasets](https://huggingface.co/datasets) pages
+To explore available filters on the Hub, visit [models](https://huggingface.co/models) and [datasets](https://huggingface.co/datasets) pages
 in your browser, search for some parameters and look at the values in the URL.
diff --git a/docs/source/ko/guides/cli.md b/docs/source/ko/guides/cli.md
@@ -99,7 +99,7 @@ _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|
 _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
 _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|
 
-To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
+To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
 Token:
 Add token as git credential? (Y/n)
 Token is valid (permission: write).

diff --git a/src/huggingface_hub/_commit_api.py b/src/huggingface_hub/_commit_api.py
@@ -15,12 +15,12 @@
 
 from tqdm.contrib.concurrent import thread_map
 
-from .constants import ENDPOINT, HF_HUB_ENABLE_HF_TRANSFER
+from . import constants
+from .errors import EntryNotFoundError
 from .file_download import hf_hub_url
 from .lfs import UploadInfo, lfs_upload, post_lfs_batch_info
 from .utils import (
     FORBIDDEN_FOLDERS,
-    EntryNotFoundError,
     chunk_iterable,
     get_session,
     hf_raise_for_status,
@@ -432,7 +432,7 @@ def _wrapped_lfs_upload(batch_action) -> None:
         except Exception as exc:
             raise RuntimeError(f"Error while uploading '{operation.path_in_repo}' to the Hub.") from exc
 
-    if HF_HUB_ENABLE_HF_TRANSFER:
+    if constants.HF_HUB_ENABLE_HF_TRANSFER:
         logger.debug(f"Uploading {len(filtered_actions)} LFS files to the Hub using `hf_transfer`.")
         for action in hf_tqdm(filtered_actions, name="huggingface_hub.lfs_upload"):
             _wrapped_lfs_upload(action)
@@ -506,7 +506,7 @@ def _fetch_upload_modes(
         [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
             If the Hub API response is improperly formatted.
     """
-    endpoint = endpoint if endpoint is not None else ENDPOINT
+    endpoint = endpoint if endpoint is not None else constants.ENDPOINT
 
     # Fetch upload mode (LFS or regular) chunk by chunk.
     upload_modes: Dict[str, UploadMode] = {}

diff --git a/src/huggingface_hub/_inference_endpoints.py b/src/huggingface_hub/_inference_endpoints.py
@@ -238,11 +238,14 @@ def update(
         instance_type: Optional[str] = None,
         min_replica: Optional[int] = None,
         max_replica: Optional[int] = None,
+        scale_to_zero_timeout: Optional[int] = None,
         # Model update
         repository: Optional[str] = None,
         framework: Optional[str] = None,
         revision: Optional[str] = None,
         task: Optional[str] = None,
+        custom_image: Optional[Dict] = None,
+        secrets: Optional[Dict[str, str]] = None,
     ) -> "InferenceEndpoint":
         """Update the Inference Endpoint.
 
@@ -263,6 +266,8 @@ def update(
                 The minimum number of replicas (instances) to keep running for the Inference Endpoint.
             max_replica (`int`, *optional*):
                 The maximum number of replicas (instances) to scale to for the Inference Endpoint.
+            scale_to_zero_timeout (`int`, *optional*):
+                The duration in minutes before an inactive endpoint is scaled to zero.
 
             repository (`str`, *optional*):
                 The name of the model repository associated with the Inference Endpoint (e.g. `"gpt2"`).
@@ -272,7 +277,11 @@ def update(
                 The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`).
             task (`str`, *optional*):
                 The task on which to deploy the model (e.g. `"text-classification"`).
-
+            custom_image (`Dict`, *optional*):
+                A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
+                Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
+            secrets (`Dict[str, str]`, *optional*):
+                Secret values to inject in the container environment.
         Returns:
             [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
         """
@@ -285,10 +294,13 @@ def update(
             instance_type=instance_type,
             min_replica=min_replica,
             max_replica=max_replica,
+            scale_to_zero_timeout=scale_to_zero_timeout,
             repository=repository,
             framework=framework,
             revision=revision,
             task=task,
+            custom_image=custom_image,
+            secrets=secrets,
             token=self._token,  # type: ignore [arg-type]
         )
 

diff --git a/src/huggingface_hub/_login.py b/src/huggingface_hub/_login.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Contains methods to login to the Hub."""
+"""Contains methods to log in to the Hub."""
 
 import os
 import subprocess
@@ -60,7 +60,7 @@ def login(
     components. If `token` is not provided, it will be prompted to the user either with
     a widget (in a notebook) or via the terminal.
 
-    To login from outside of a script, one can also use `huggingface-cli login` which is
+    To log in from outside of a script, one can also use `huggingface-cli login` which is
     a cli command that wraps [`login`].
 
     <Tip>
@@ -94,7 +94,7 @@ def login(
     Raises:
         [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
             If an organization token is passed. Only personal account tokens are valid
-            to login.
+            to log in.
         [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
             If token is invalid.
         [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError)
@@ -155,7 +155,7 @@ def logout() -> None:
 
 def interpreter_login(new_session: bool = True, write_permission: bool = False) -> None:
     """
-    Displays a prompt to login to the HF website and store the token.
+    Displays a prompt to log in to the HF website and store the token.
 
     This is equivalent to [`login`] without passing a token when not run in a notebook.
     [`interpreter_login`] is useful if you want to force the use of the terminal prompt
@@ -185,7 +185,7 @@ def interpreter_login(new_session: bool = True, write_permission: bool = False)
         )
         print("    Setting a new token will erase the existing one.")
 
-    print("    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .")
+    print("    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .")
     if os.name == "nt":
         print("Token can be pasted using 'Right-Click'.")
     token = getpass("Enter your token (input will not be visible): ")
@@ -220,7 +220,7 @@ def interpreter_login(new_session: bool = True, write_permission: bool = False)
 
 def notebook_login(new_session: bool = True, write_permission: bool = False) -> None:
     """
-    Displays a widget to login to the HF website and store the token.
+    Displays a widget to log in to the HF website and store the token.
 
     This is equivalent to [`login`] without passing a token when run in a notebook.
     [`notebook_login`] is useful if you want to force the use of the notebook widget

diff --git a/src/huggingface_hub/_snapshot_download.py b/src/huggingface_hub/_snapshot_download.py
@@ -6,21 +6,12 @@
 from tqdm.auto import tqdm as base_tqdm
 from tqdm.contrib.concurrent import thread_map
 
-from .constants import (
-    DEFAULT_ETAG_TIMEOUT,
-    DEFAULT_REVISION,
-    HF_HUB_CACHE,
-    HF_HUB_ENABLE_HF_TRANSFER,
-    REPO_TYPES,
-)
+from . import constants
+from .errors import GatedRepoError, LocalEntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError
 from .file_download import REGEX_COMMIT_HASH, hf_hub_download, repo_folder_name
 from .hf_api import DatasetInfo, HfApi, ModelInfo, SpaceInfo
 from .utils import (
-    GatedRepoError,
-    LocalEntryNotFoundError,
     OfflineModeIsEnabled,
-    RepositoryNotFoundError,
-    RevisionNotFoundError,
     filter_repo_objects,
     logging,
     validate_hf_hub_args,
@@ -43,7 +34,7 @@ def snapshot_download(
     library_version: Optional[str] = None,
     user_agent: Optional[Union[Dict, str]] = None,
     proxies: Optional[Dict] = None,
-    etag_timeout: float = DEFAULT_ETAG_TIMEOUT,
+    etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
     force_download: bool = False,
     token: Optional[Union[bool, str]] = None,
     local_files_only: bool = False,
@@ -140,16 +131,16 @@ def snapshot_download(
             if some parameter value is invalid.
     """
     if cache_dir is None:
-        cache_dir = HF_HUB_CACHE
+        cache_dir = constants.HF_HUB_CACHE
     if revision is None:
-        revision = DEFAULT_REVISION
+        revision = constants.DEFAULT_REVISION
     if isinstance(cache_dir, Path):
         cache_dir = str(cache_dir)
 
     if repo_type is None:
         repo_type = "model"
-    if repo_type not in REPO_TYPES:
-        raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(REPO_TYPES)}")
+    if repo_type not in constants.REPO_TYPES:
+        raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}")
 
     storage_folder = os.path.join(cache_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type))
 
@@ -290,7 +281,7 @@ def _inner_hf_hub_download(repo_file: str):
             headers=headers,
         )
 
-    if HF_HUB_ENABLE_HF_TRANSFER:
+    if constants.HF_HUB_ENABLE_HF_TRANSFER:
         # when using hf_transfer we don't want extra parallelism
         # from the one hf_transfer provides
         for file in filtered_repo_files:

diff --git a/src/huggingface_hub/_tensorboard_logger.py b/src/huggingface_hub/_tensorboard_logger.py
@@ -17,8 +17,9 @@
 from typing import TYPE_CHECKING, List, Optional, Union
 
 from ._commit_scheduler import CommitScheduler
+from .errors import EntryNotFoundError
 from .repocard import ModelCard
-from .utils import EntryNotFoundError, experimental
+from .utils import experimental
 
 
 # Depending on user's setup, SummaryWriter can come either from 'tensorboardX'

diff --git a/src/huggingface_hub/_webhooks_server.py b/src/huggingface_hub/_webhooks_server.py
@@ -36,7 +36,7 @@
 
 
 _global_app: Optional["WebhooksServer"] = None
-_is_local = os.getenv("SYSTEM") != "spaces"
+_is_local = os.environ.get("SPACE_ID") is None
 
 
 @experimental

diff --git a/src/huggingface_hub/commands/repo_files.py b/src/huggingface_hub/commands/repo_files.py
@@ -16,7 +16,7 @@
 
 Usage:
     # delete all
-    huggingface-cli repo-files <repo_id> delete *
+    huggingface-cli repo-files <repo_id> delete "*"
 
     # delete single file
     huggingface-cli repo-files <repo_id> delete file.txt
@@ -28,7 +28,7 @@
     huggingface-cli repo-files <repo_id> delete file.txt folder/ file2.txt
 
     # delete multiple patterns
-    huggingface-cli repo-files <repo_id> delete file.txt *.json folder/*.parquet
+    huggingface-cli repo-files <repo_id> delete file.txt "*.json" "folder/*.parquet"
 
     # delete from different revision / repo-type
     huggingface-cli repo-files <repo_id> delete file.txt --revision=refs/pr/1 --repo-type=dataset