Skip to content
This repository was archived by the owner on Oct 11, 2024. It is now read-only.

Commit 74eb6ab

Browse files
maor-psDarkLight1337
authored andcommitted
[Bugfix] OpenAI entrypoint limits logprobs while ignoring server defined --max-logprobs (vllm-project#5312)
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
1 parent c098739 commit 74eb6ab

File tree

4 files changed

+12
-9
lines changed

4 files changed

+12
-9
lines changed

tests/entrypoints/test_openai_server.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,9 @@ async def test_too_many_completion_logprobs(server, client: openai.AsyncOpenAI,
263263
prompt=[0, 0, 0, 0, 0],
264264
max_tokens=5,
265265
temperature=0.0,
266-
logprobs=6,
266+
# vLLM has higher default max_logprobs (20 instead of 5) to support
267+
# both Completion API and Chat Completion API
268+
logprobs=21,
267269
)
268270
...
269271
with pytest.raises(
@@ -273,7 +275,9 @@ async def test_too_many_completion_logprobs(server, client: openai.AsyncOpenAI,
273275
prompt=[0, 0, 0, 0, 0],
274276
max_tokens=5,
275277
temperature=0.0,
276-
logprobs=6,
278+
# vLLM has higher default max_logprobs (20 instead of 5) to support
279+
# both Completion API and Chat Completion API
280+
logprobs=30,
277281
stream=True,
278282
)
279283
async for chunk in stream:

vllm/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def __init__(
102102
enforce_eager: bool = False,
103103
max_context_len_to_capture: Optional[int] = None,
104104
max_seq_len_to_capture: Optional[int] = None,
105-
max_logprobs: int = 5,
105+
max_logprobs: int = 20,
106106
disable_sliding_window: bool = False,
107107
skip_tokenizer_init: bool = False,
108108
served_model_name: Optional[Union[str, List[str]]] = None,

vllm/engine/arg_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class EngineArgs:
5050
gpu_memory_utilization: float = 0.90
5151
max_num_batched_tokens: Optional[int] = None
5252
max_num_seqs: int = 256
53-
max_logprobs: int = 5 # OpenAI default value
53+
max_logprobs: int = 20 # Default value for OpenAI Chat Completions API
5454
disable_log_stats: bool = False
5555
revision: Optional[str] = None
5656
code_revision: Optional[str] = None

vllm/entrypoints/openai/protocol.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -322,9 +322,9 @@ def check_logprobs(cls, data):
322322
raise ValueError(
323323
"when using `top_logprobs`, `logprobs` must be set to true."
324324
)
325-
elif not 0 <= data["top_logprobs"] <= 20:
325+
elif data["top_logprobs"] < 0:
326326
raise ValueError(
327-
"`top_logprobs` must be a value in the interval [0, 20].")
327+
"`top_logprobs` must be a value a positive value.")
328328
return data
329329

330330

@@ -478,9 +478,8 @@ def check_guided_decoding_count(cls, data):
478478
@classmethod
479479
def check_logprobs(cls, data):
480480
if "logprobs" in data and data[
481-
"logprobs"] is not None and not 0 <= data["logprobs"] <= 5:
482-
raise ValueError(("if passed, `logprobs` must be a value",
483-
" in the interval [0, 5]."))
481+
"logprobs"] is not None and not data["logprobs"] >= 0:
482+
raise ValueError("if passed, `logprobs` must be a positive value.")
484483
return data
485484

486485
@model_validator(mode="before")

0 commit comments

Comments
 (0)