Update inference types (automated commit)

huggingface · hanouticelina · Dec 3, 2024 · Nov 19, 2024 · Nov 20, 2024 · Nov 20, 2024
commit 15d861251852df3ce9c8d898f6b7015d320ba882
diff --git a/docs/source/en/package_reference/inference_types.md b/docs/source/en/package_reference/inference_types.md
@@ -369,8 +369,6 @@ This part of the lib is still under development and will be improved in future r
 
 [[autodoc]] huggingface_hub.ZeroShotClassificationInput
 
-[[autodoc]] huggingface_hub.ZeroShotClassificationInputData
-
 [[autodoc]] huggingface_hub.ZeroShotClassificationOutputElement
 
 [[autodoc]] huggingface_hub.ZeroShotClassificationParameters
@@ -381,8 +379,6 @@ This part of the lib is still under development and will be improved in future r
 
 [[autodoc]] huggingface_hub.ZeroShotImageClassificationInput
 
-[[autodoc]] huggingface_hub.ZeroShotImageClassificationInputData
-
 [[autodoc]] huggingface_hub.ZeroShotImageClassificationOutputElement
 
 [[autodoc]] huggingface_hub.ZeroShotImageClassificationParameters
@@ -395,6 +391,6 @@ This part of the lib is still under development and will be improved in future r
 
 [[autodoc]] huggingface_hub.ZeroShotObjectDetectionInput
 
-[[autodoc]] huggingface_hub.ZeroShotObjectDetectionInputData
-
 [[autodoc]] huggingface_hub.ZeroShotObjectDetectionOutputElement
+
+[[autodoc]] huggingface_hub.ZeroShotObjectDetectionParameters
diff --git a/docs/source/ko/package_reference/inference_types.md b/docs/source/ko/package_reference/inference_types.md
@@ -368,8 +368,6 @@ rendered properly in your Markdown viewer.
 
 [[autodoc]] huggingface_hub.ZeroShotClassificationInput
 
-[[autodoc]] huggingface_hub.ZeroShotClassificationInputData
-
 [[autodoc]] huggingface_hub.ZeroShotClassificationOutputElement
 
 [[autodoc]] huggingface_hub.ZeroShotClassificationParameters
@@ -380,8 +378,6 @@ rendered properly in your Markdown viewer.
 
 [[autodoc]] huggingface_hub.ZeroShotImageClassificationInput
 
-[[autodoc]] huggingface_hub.ZeroShotImageClassificationInputData
-
 [[autodoc]] huggingface_hub.ZeroShotImageClassificationOutputElement
 
 [[autodoc]] huggingface_hub.ZeroShotImageClassificationParameters
@@ -394,6 +390,6 @@ rendered properly in your Markdown viewer.
 
 [[autodoc]] huggingface_hub.ZeroShotObjectDetectionInput
 
-[[autodoc]] huggingface_hub.ZeroShotObjectDetectionInputData
-
 [[autodoc]] huggingface_hub.ZeroShotObjectDetectionOutputElement
+
+[[autodoc]] huggingface_hub.ZeroShotObjectDetectionParameters
diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py
@@ -348,7 +348,6 @@ def audio_classification(
             top_k (`int`, *optional*):
                 When specified, limits the output to the top K most probable classes.
             function_to_apply (`"AudioClassificationOutputTransform"`, *optional*):
-                The function to apply to the output.
 
         Returns:
             `List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
@@ -1131,7 +1130,6 @@ def image_classification(
                 The model to use for image classification. Can be a model ID hosted on the Hugging Face Hub or a URL to a
                 deployed Inference Endpoint. If not provided, the default recommended model for image classification will be used.
             function_to_apply (`"ImageClassificationOutputTransform"`, *optional*):
-                The function to apply to the output.
             top_k (`int`, *optional*):
                 When specified, limits the output to the top K most probable classes.
         Returns:
@@ -1814,7 +1812,6 @@ def text_classification(
             top_k (`int`, *optional*):
                 When specified, limits the output to the top K most probable classes.
             function_to_apply (`"TextClassificationOutputTransform"`, *optional*):
-                The function to apply to the output.
 
         Returns:
             `List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
@@ -2494,11 +2491,11 @@ def text_to_speech(
             max_length (`int`, *optional*):
                 The maximum length (in tokens) of the generated text, including the input.
             max_new_tokens (`int`, *optional*):
-                The maximum number of tokens to generate. Takes precedence over maxLength.
+                The maximum number of tokens to generate. Takes precedence over max_length.
             min_length (`int`, *optional*):
                 The minimum length (in tokens) of the generated text, including the input.
             min_new_tokens (`int`, *optional*):
-                The minimum number of tokens to generate. Takes precedence over maxLength.
+                The minimum number of tokens to generate. Takes precedence over min_length.
             num_beam_groups (`int`, *optional*):
                 Number of groups to divide num_beams into in order to ensure diversity among different groups of beams.
                 See [this paper](https://hf.co/papers/1610.02424) for more details.
@@ -2777,6 +2774,7 @@ def zero_shot_classification(
         multi_label: Optional[bool] = False,
         hypothesis_template: Optional[str] = None,
         model: Optional[str] = None,
+        candidate_labels: List[str] = None,
     ) -> List[ZeroShotClassificationOutputElement]:
         """
         Provide as input a text and a set of candidate labels to classify the input text.
@@ -2791,11 +2789,13 @@ def zero_shot_classification(
                 the label likelihoods for each sequence is 1. If true, the labels are considered independent and
                 probabilities are normalized for each candidate.
             hypothesis_template (`str`, *optional*):
-                The sentence used in conjunction with candidateLabels to attempt the text classification by replacing
-                the placeholder with the candidate labels.
+                The sentence used in conjunction with `candidate_labels` to attempt the text classification by
+                replacing the placeholder with the candidate labels.
             model (`str`, *optional*):
                 The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
                 Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot classification model will be used.
+            candidate_labels (`List[str`, ):
+                The set of possible class labels to classify the text into.
 
         Returns:
             `List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
@@ -2877,6 +2877,7 @@ def zero_shot_image_classification(
         *,
         model: Optional[str] = None,
         hypothesis_template: Optional[str] = None,
+        candidate_labels: List[str] = None,
     ) -> List[ZeroShotImageClassificationOutputElement]:
         """
         Provide input image and text labels to predict text labels for the image.
@@ -2890,8 +2891,10 @@ def zero_shot_image_classification(
                 The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
                 Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot image classification model will be used.
             hypothesis_template (`str`, *optional*):
-                The sentence used in conjunction with candidateLabels to attempt the text classification by replacing
-                the placeholder with the candidate labels.
+                The sentence used in conjunction with `candidate_labels` to attempt the image classification by
+                replacing the placeholder with the candidate labels.
+            candidate_labels (`List[str`, ):
+                The candidate labels for this image
         Returns:
             `List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
 

diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py
@@ -381,7 +381,6 @@ async def audio_classification(
             top_k (`int`, *optional*):
                 When specified, limits the output to the top K most probable classes.
             function_to_apply (`"AudioClassificationOutputTransform"`, *optional*):
-                The function to apply to the output.
 
         Returns:
             `List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
@@ -1176,7 +1175,6 @@ async def image_classification(
                 The model to use for image classification. Can be a model ID hosted on the Hugging Face Hub or a URL to a
                 deployed Inference Endpoint. If not provided, the default recommended model for image classification will be used.
             function_to_apply (`"ImageClassificationOutputTransform"`, *optional*):
-                The function to apply to the output.
             top_k (`int`, *optional*):
                 When specified, limits the output to the top K most probable classes.
         Returns:
@@ -1876,7 +1874,6 @@ async def text_classification(
             top_k (`int`, *optional*):
                 When specified, limits the output to the top K most probable classes.
             function_to_apply (`"TextClassificationOutputTransform"`, *optional*):
-                The function to apply to the output.
 
         Returns:
             `List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
@@ -2559,11 +2556,11 @@ async def text_to_speech(
             max_length (`int`, *optional*):
                 The maximum length (in tokens) of the generated text, including the input.
             max_new_tokens (`int`, *optional*):
-                The maximum number of tokens to generate. Takes precedence over maxLength.
+                The maximum number of tokens to generate. Takes precedence over max_length.
             min_length (`int`, *optional*):
                 The minimum length (in tokens) of the generated text, including the input.
             min_new_tokens (`int`, *optional*):
-                The minimum number of tokens to generate. Takes precedence over maxLength.
+                The minimum number of tokens to generate. Takes precedence over min_length.
             num_beam_groups (`int`, *optional*):
                 Number of groups to divide num_beams into in order to ensure diversity among different groups of beams.
                 See [this paper](https://hf.co/papers/1610.02424) for more details.
@@ -2846,6 +2843,7 @@ async def zero_shot_classification(
         multi_label: Optional[bool] = False,
         hypothesis_template: Optional[str] = None,
         model: Optional[str] = None,
+        candidate_labels: List[str] = None,
     ) -> List[ZeroShotClassificationOutputElement]:
         """
         Provide as input a text and a set of candidate labels to classify the input text.
@@ -2860,11 +2858,13 @@ async def zero_shot_classification(
                 the label likelihoods for each sequence is 1. If true, the labels are considered independent and
                 probabilities are normalized for each candidate.
             hypothesis_template (`str`, *optional*):
-                The sentence used in conjunction with candidateLabels to attempt the text classification by replacing
-                the placeholder with the candidate labels.
+                The sentence used in conjunction with `candidate_labels` to attempt the text classification by
+                replacing the placeholder with the candidate labels.
             model (`str`, *optional*):
                 The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
                 Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot classification model will be used.
+            candidate_labels (`List[str`, ):
+                The set of possible class labels to classify the text into.
 
         Returns:
             `List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
@@ -2948,6 +2948,7 @@ async def zero_shot_image_classification(
         *,
         model: Optional[str] = None,
         hypothesis_template: Optional[str] = None,
+        candidate_labels: List[str] = None,
     ) -> List[ZeroShotImageClassificationOutputElement]:
         """
         Provide input image and text labels to predict text labels for the image.
@@ -2961,8 +2962,10 @@ async def zero_shot_image_classification(
                 The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
                 Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot image classification model will be used.
             hypothesis_template (`str`, *optional*):
-                The sentence used in conjunction with candidateLabels to attempt the text classification by replacing
-                the placeholder with the candidate labels.
+                The sentence used in conjunction with `candidate_labels` to attempt the image classification by
+                replacing the placeholder with the candidate labels.
+            candidate_labels (`List[str`, ):
+                The candidate labels for this image
         Returns:
             `List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
 

diff --git a/src/huggingface_hub/inference/_generated/types/__init__.py b/src/huggingface_hub/inference/_generated/types/__init__.py
@@ -167,19 +167,17 @@
 )
 from .zero_shot_classification import (
     ZeroShotClassificationInput,
-    ZeroShotClassificationInputData,
     ZeroShotClassificationOutputElement,
     ZeroShotClassificationParameters,
 )
 from .zero_shot_image_classification import (
     ZeroShotImageClassificationInput,
-    ZeroShotImageClassificationInputData,
     ZeroShotImageClassificationOutputElement,
     ZeroShotImageClassificationParameters,
 )
 from .zero_shot_object_detection import (
     ZeroShotObjectDetectionBoundingBox,
     ZeroShotObjectDetectionInput,
-    ZeroShotObjectDetectionInputData,
     ZeroShotObjectDetectionOutputElement,
+    ZeroShotObjectDetectionParameters,
 )
diff --git a/src/huggingface_hub/inference/_generated/types/audio_classification.py b/src/huggingface_hub/inference/_generated/types/audio_classification.py
@@ -19,7 +19,6 @@ class AudioClassificationParameters(BaseInferenceType):
     """
 
     function_to_apply: Optional["AudioClassificationOutputTransform"] = None
-    """The function to apply to the output."""
     top_k: Optional[int] = None
     """When specified, limits the output to the top K most probable classes."""
 

diff --git a/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py b/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py
@@ -80,7 +80,7 @@ class AutomaticSpeechRecognitionParameters(BaseInferenceType):
     Additional inference parameters for Automatic Speech Recognition
     """
 
-    generate: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
+    generation_parameters: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
     """Parametrization of the text generation process"""
     return_timestamps: Optional[bool] = None
     """Whether to output corresponding timestamps with the generated text"""

diff --git a/src/huggingface_hub/inference/_generated/types/document_question_answering.py b/src/huggingface_hub/inference/_generated/types/document_question_answering.py
@@ -81,5 +81,3 @@ class DocumentQuestionAnsweringOutputElement(BaseInferenceType):
     """The start word index of the answer (in the OCR’d version of the input or provided word
     boxes).
     """
-    words: List[int]
-    """The index of each word/box pair that is in the answer"""
diff --git a/src/huggingface_hub/inference/_generated/types/image_classification.py b/src/huggingface_hub/inference/_generated/types/image_classification.py
@@ -19,7 +19,6 @@ class ImageClassificationParameters(BaseInferenceType):
     """
 
     function_to_apply: Optional["ImageClassificationOutputTransform"] = None
-    """The function to apply to the output."""
     top_k: Optional[int] = None
     """When specified, limits the output to the top K most probable classes."""
 

diff --git a/src/huggingface_hub/inference/_generated/types/image_to_text.py b/src/huggingface_hub/inference/_generated/types/image_to_text.py
@@ -80,7 +80,7 @@ class ImageToTextParameters(BaseInferenceType):
     Additional inference parameters for Image To Text
     """
 
-    generate: Optional[ImageToTextGenerationParameters] = None
+    generation_parameters: Optional[ImageToTextGenerationParameters] = None
     """Parametrization of the text generation process"""
     max_new_tokens: Optional[int] = None
     """The amount of maximum tokens to generate."""

diff --git a/src/huggingface_hub/inference/_generated/types/text_classification.py b/src/huggingface_hub/inference/_generated/types/text_classification.py
@@ -14,18 +14,13 @@
 
 @dataclass
 class TextClassificationParameters(BaseInferenceType):
-    """
-    Additional inference parameters for Text Classification.
+    """Additional inference parameters
+    Additional inference parameters for Text Classification
     """
 
     function_to_apply: Optional["TextClassificationOutputTransform"] = None
-    """
-    The function to apply to the output.
-    """
     top_k: Optional[int] = None
-    """
-    When specified, limits the output to the top K most probable classes.
-    """
+    """When specified, limits the output to the top K most probable classes."""
 
 
 @dataclass

diff --git a/src/huggingface_hub/inference/_generated/types/text_to_audio.py b/src/huggingface_hub/inference/_generated/types/text_to_audio.py
@@ -40,11 +40,11 @@ class TextToAudioGenerationParameters(BaseInferenceType):
     max_length: Optional[int] = None
     """The maximum length (in tokens) of the generated text, including the input."""
     max_new_tokens: Optional[int] = None
-    """The maximum number of tokens to generate. Takes precedence over maxLength."""
+    """The maximum number of tokens to generate. Takes precedence over max_length."""
     min_length: Optional[int] = None
     """The minimum length (in tokens) of the generated text, including the input."""
     min_new_tokens: Optional[int] = None
-    """The minimum number of tokens to generate. Takes precedence over maxLength."""
+    """The minimum number of tokens to generate. Takes precedence over min_length."""
     num_beam_groups: Optional[int] = None
     """Number of groups to divide num_beams into in order to ensure diversity among different
     groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
@@ -80,7 +80,7 @@ class TextToAudioParameters(BaseInferenceType):
     Additional inference parameters for Text To Audio
     """
 
-    generate: Optional[TextToAudioGenerationParameters] = None
+    generation_parameters: Optional[TextToAudioGenerationParameters] = None
     """Parametrization of the text generation process"""
 
 

diff --git a/src/huggingface_hub/inference/_generated/types/text_to_speech.py b/src/huggingface_hub/inference/_generated/types/text_to_speech.py
@@ -40,11 +40,11 @@ class TextToSpeechGenerationParameters(BaseInferenceType):
     max_length: Optional[int] = None
     """The maximum length (in tokens) of the generated text, including the input."""
     max_new_tokens: Optional[int] = None
-    """The maximum number of tokens to generate. Takes precedence over maxLength."""
+    """The maximum number of tokens to generate. Takes precedence over max_length."""
     min_length: Optional[int] = None
     """The minimum length (in tokens) of the generated text, including the input."""
     min_new_tokens: Optional[int] = None
-    """The minimum number of tokens to generate. Takes precedence over maxLength."""
+    """The minimum number of tokens to generate. Takes precedence over min_length."""
     num_beam_groups: Optional[int] = None
     """Number of groups to divide num_beams into in order to ensure diversity among different
     groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
@@ -80,7 +80,7 @@ class TextToSpeechParameters(BaseInferenceType):
     Additional inference parameters for Text To Speech
     """
 
-    generate: Optional[TextToSpeechGenerationParameters] = None
+    generation_parameters: Optional[TextToSpeechGenerationParameters] = None
     """Parametrization of the text generation process"""