From 4011b5a2836d7bb036d8da54ed656f88bc0d2f7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9lina?= Date: Mon, 4 Nov 2024 17:10:40 +0100 Subject: [PATCH] Fix max tokens default value in text generation and chat completion (#2653) --- src/huggingface_hub/inference/_client.py | 4 ++-- src/huggingface_hub/inference/_generated/_async_client.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py index ed473e6d11..d3b38ce750 100644 --- a/src/huggingface_hub/inference/_client.py +++ b/src/huggingface_hub/inference/_client.py @@ -584,7 +584,7 @@ def chat_completion( Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message. max_tokens (`int`, *optional*): - Maximum number of tokens allowed in the response. Defaults to 20. + Maximum number of tokens allowed in the response. Defaults to 100. n (`int`, *optional*): UNUSED. presence_penalty (`float`, *optional*): @@ -2075,7 +2075,7 @@ def text_generation( grammar ([`TextGenerationInputGrammarType`], *optional*): Grammar constraints. Can be either a JSONSchema or a regex. max_new_tokens (`int`, *optional*): - Maximum number of generated tokens + Maximum number of generated tokens. Defaults to 100. repetition_penalty (`float`, *optional*): The parameter for repetition penalty. 1.0 means no penalty. See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py index 74888bc0b8..0dd671c9be 100644 --- a/src/huggingface_hub/inference/_generated/_async_client.py +++ b/src/huggingface_hub/inference/_generated/_async_client.py @@ -620,7 +620,7 @@ async def chat_completion( Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message. max_tokens (`int`, *optional*): - Maximum number of tokens allowed in the response. Defaults to 20. + Maximum number of tokens allowed in the response. Defaults to 100. n (`int`, *optional*): UNUSED. presence_penalty (`float`, *optional*): @@ -2138,7 +2138,7 @@ async def text_generation( grammar ([`TextGenerationInputGrammarType`], *optional*): Grammar constraints. Can be either a JSONSchema or a regex. max_new_tokens (`int`, *optional*): - Maximum number of generated tokens + Maximum number of generated tokens. Defaults to 100. repetition_penalty (`float`, *optional*): The parameter for repetition penalty. 1.0 means no penalty. See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.