From 4011b5a2836d7bb036d8da54ed656f88bc0d2f7f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lina?= <hanouticelina@gmail.com>
Date: Mon, 4 Nov 2024 17:10:40 +0100
Subject: [PATCH] Fix max tokens default value in text generation and chat
 completion (#2653)

---
 src/huggingface_hub/inference/_client.py                  | 4 ++--
 src/huggingface_hub/inference/_generated/_async_client.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py
index ed473e6d11..d3b38ce750 100644
--- a/src/huggingface_hub/inference/_client.py
+++ b/src/huggingface_hub/inference/_client.py
@@ -584,7 +584,7 @@ def chat_completion(
                 Whether to return log probabilities of the output tokens or not. If true, returns the log
                 probabilities of each output token returned in the content of message.
             max_tokens (`int`, *optional*):
-                Maximum number of tokens allowed in the response. Defaults to 20.
+                Maximum number of tokens allowed in the response. Defaults to 100.
             n (`int`, *optional*):
                 UNUSED.
             presence_penalty (`float`, *optional*):
@@ -2075,7 +2075,7 @@ def text_generation(
             grammar ([`TextGenerationInputGrammarType`], *optional*):
                 Grammar constraints. Can be either a JSONSchema or a regex.
             max_new_tokens (`int`, *optional*):
-                Maximum number of generated tokens
+                Maximum number of generated tokens. Defaults to 100.
             repetition_penalty (`float`, *optional*):
                 The parameter for repetition penalty. 1.0 means no penalty. See [this
                 paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py
index 74888bc0b8..0dd671c9be 100644
--- a/src/huggingface_hub/inference/_generated/_async_client.py
+++ b/src/huggingface_hub/inference/_generated/_async_client.py
@@ -620,7 +620,7 @@ async def chat_completion(
                 Whether to return log probabilities of the output tokens or not. If true, returns the log
                 probabilities of each output token returned in the content of message.
             max_tokens (`int`, *optional*):
-                Maximum number of tokens allowed in the response. Defaults to 20.
+                Maximum number of tokens allowed in the response. Defaults to 100.
             n (`int`, *optional*):
                 UNUSED.
             presence_penalty (`float`, *optional*):
@@ -2138,7 +2138,7 @@ async def text_generation(
             grammar ([`TextGenerationInputGrammarType`], *optional*):
                 Grammar constraints. Can be either a JSONSchema or a regex.
             max_new_tokens (`int`, *optional*):
-                Maximum number of generated tokens
+                Maximum number of generated tokens. Defaults to 100.
             repetition_penalty (`float`, *optional*):
                 The parameter for repetition penalty. 1.0 means no penalty. See [this
                 paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.