Skip to content

Commit 1cbe6db

Browse files
maor-psDarkLight1337
authored andcommitted
[Bugfix] OpenAI entrypoint limits logprobs while ignoring server defined --max-logprobs (vllm-project#5312)
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
1 parent 5906637 commit 1cbe6db

File tree

4 files changed

+12
-9
lines changed

4 files changed

+12
-9
lines changed

tests/entrypoints/test_openai_server.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,9 @@ async def test_too_many_completion_logprobs(server, client: openai.AsyncOpenAI,
264264
prompt=[0, 0, 0, 0, 0],
265265
max_tokens=5,
266266
temperature=0.0,
267-
logprobs=6,
267+
# vLLM has higher default max_logprobs (20 instead of 5) to support
268+
# both Completion API and Chat Completion API
269+
logprobs=21,
268270
)
269271
...
270272
with pytest.raises(
@@ -274,7 +276,9 @@ async def test_too_many_completion_logprobs(server, client: openai.AsyncOpenAI,
274276
prompt=[0, 0, 0, 0, 0],
275277
max_tokens=5,
276278
temperature=0.0,
277-
logprobs=6,
279+
# vLLM has higher default max_logprobs (20 instead of 5) to support
280+
# both Completion API and Chat Completion API
281+
logprobs=30,
278282
stream=True,
279283
)
280284
async for chunk in stream:

vllm/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def __init__(
100100
enforce_eager: bool = False,
101101
max_context_len_to_capture: Optional[int] = None,
102102
max_seq_len_to_capture: Optional[int] = None,
103-
max_logprobs: int = 5,
103+
max_logprobs: int = 20,
104104
disable_sliding_window: bool = False,
105105
skip_tokenizer_init: bool = False,
106106
served_model_name: Optional[Union[str, List[str]]] = None,

vllm/engine/arg_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class EngineArgs:
4848
gpu_memory_utilization: float = 0.90
4949
max_num_batched_tokens: Optional[int] = None
5050
max_num_seqs: int = 256
51-
max_logprobs: int = 5 # OpenAI default value
51+
max_logprobs: int = 20 # Default value for OpenAI Chat Completions API
5252
disable_log_stats: bool = False
5353
revision: Optional[str] = None
5454
code_revision: Optional[str] = None

vllm/entrypoints/openai/protocol.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -322,9 +322,9 @@ def check_logprobs(cls, data):
322322
raise ValueError(
323323
"when using `top_logprobs`, `logprobs` must be set to true."
324324
)
325-
elif not 0 <= data["top_logprobs"] <= 20:
325+
elif data["top_logprobs"] < 0:
326326
raise ValueError(
327-
"`top_logprobs` must be a value in the interval [0, 20].")
327+
"`top_logprobs` must be a value a positive value.")
328328
return data
329329

330330

@@ -478,9 +478,8 @@ def check_guided_decoding_count(cls, data):
478478
@classmethod
479479
def check_logprobs(cls, data):
480480
if "logprobs" in data and data[
481-
"logprobs"] is not None and not 0 <= data["logprobs"] <= 5:
482-
raise ValueError(("if passed, `logprobs` must be a value",
483-
" in the interval [0, 5]."))
481+
"logprobs"] is not None and not data["logprobs"] >= 0:
482+
raise ValueError("if passed, `logprobs` must be a positive value.")
484483
return data
485484

486485
@model_validator(mode="before")

0 commit comments

Comments
 (0)