Skip to content

Commit 3ce2c05

Browse files
authored
[Fix] Correct OpenAI batch response format (#5554)
1 parent 1c0afa1 commit 3ce2c05

File tree

2 files changed

+25
-5
lines changed

2 files changed

+25
-5
lines changed

vllm/entrypoints/openai/protocol.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,17 @@ class BatchRequestInput(OpenAIBaseModel):
672672
body: Union[ChatCompletionRequest, ]
673673

674674

675+
class BatchResponseData(OpenAIBaseModel):
676+
# HTTP status code of the response.
677+
status_code: int = 200
678+
679+
# An unique identifier for the API request.
680+
request_id: str
681+
682+
# The body of the response.
683+
body: Union[ChatCompletionResponse, ]
684+
685+
675686
class BatchRequestOutput(OpenAIBaseModel):
676687
"""
677688
The per-line object of the batch output and error files
@@ -683,7 +694,7 @@ class BatchRequestOutput(OpenAIBaseModel):
683694
# inputs.
684695
custom_id: str
685696

686-
response: Optional[ChatCompletionResponse]
697+
response: Optional[BatchResponseData]
687698

688699
# For requests that failed with a non-HTTP error, this will contain more
689700
# information on the cause of the failure.

vllm/entrypoints/openai/run_batch.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
from vllm.engine.async_llm_engine import AsyncLLMEngine
1111
from vllm.entrypoints.openai.protocol import (BatchRequestInput,
1212
BatchRequestOutput,
13-
ChatCompletionResponse)
13+
BatchResponseData,
14+
ChatCompletionResponse,
15+
ErrorResponse)
1416
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
1517
from vllm.logger import init_logger
1618
from vllm.usage.usage_lib import UsageContext
@@ -77,20 +79,27 @@ async def run_request(chat_serving: OpenAIServingChat,
7779
request: BatchRequestInput) -> BatchRequestOutput:
7880
chat_request = request.body
7981
chat_response = await chat_serving.create_chat_completion(chat_request)
82+
8083
if isinstance(chat_response, ChatCompletionResponse):
8184
batch_output = BatchRequestOutput(
8285
id=f"vllm-{random_uuid()}",
8386
custom_id=request.custom_id,
84-
response=chat_response,
87+
response=BatchResponseData(
88+
body=chat_response, request_id=f"vllm-batch-{random_uuid()}"),
8589
error=None,
8690
)
87-
else:
91+
elif isinstance(chat_response, ErrorResponse):
8892
batch_output = BatchRequestOutput(
8993
id=f"vllm-{random_uuid()}",
9094
custom_id=request.custom_id,
91-
response=None,
95+
response=BatchResponseData(
96+
status_code=chat_response.code,
97+
request_id=f"vllm-batch-{random_uuid()}"),
9298
error=chat_response,
9399
)
100+
else:
101+
raise ValueError("Request must not be sent in stream mode")
102+
94103
return batch_output
95104

96105

0 commit comments

Comments
 (0)