Skip to content

Commit

Permalink
[Fix] Correct OpenAI batch response format (vllm-project#5554)
Browse files Browse the repository at this point in the history
  • Loading branch information
zifeitong authored and jimpang committed Jul 24, 2024
1 parent 2ea6c27 commit 3914e55
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 5 deletions.
13 changes: 12 additions & 1 deletion vllm/entrypoints/openai/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,17 @@ class BatchRequestInput(OpenAIBaseModel):
body: Union[ChatCompletionRequest, ]


class BatchResponseData(OpenAIBaseModel):
# HTTP status code of the response.
status_code: int = 200

# An unique identifier for the API request.
request_id: str

# The body of the response.
body: Union[ChatCompletionResponse, ]


class BatchRequestOutput(OpenAIBaseModel):
"""
The per-line object of the batch output and error files
Expand All @@ -683,7 +694,7 @@ class BatchRequestOutput(OpenAIBaseModel):
# inputs.
custom_id: str

response: Optional[ChatCompletionResponse]
response: Optional[BatchResponseData]

# For requests that failed with a non-HTTP error, this will contain more
# information on the cause of the failure.
Expand Down
17 changes: 13 additions & 4 deletions vllm/entrypoints/openai/run_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.entrypoints.openai.protocol import (BatchRequestInput,
BatchRequestOutput,
ChatCompletionResponse)
BatchResponseData,
ChatCompletionResponse,
ErrorResponse)
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.logger import init_logger
from vllm.usage.usage_lib import UsageContext
Expand Down Expand Up @@ -77,20 +79,27 @@ async def run_request(chat_serving: OpenAIServingChat,
request: BatchRequestInput) -> BatchRequestOutput:
chat_request = request.body
chat_response = await chat_serving.create_chat_completion(chat_request)

if isinstance(chat_response, ChatCompletionResponse):
batch_output = BatchRequestOutput(
id=f"vllm-{random_uuid()}",
custom_id=request.custom_id,
response=chat_response,
response=BatchResponseData(
body=chat_response, request_id=f"vllm-batch-{random_uuid()}"),
error=None,
)
else:
elif isinstance(chat_response, ErrorResponse):
batch_output = BatchRequestOutput(
id=f"vllm-{random_uuid()}",
custom_id=request.custom_id,
response=None,
response=BatchResponseData(
status_code=chat_response.code,
request_id=f"vllm-batch-{random_uuid()}"),
error=chat_response,
)
else:
raise ValueError("Request must not be sent in stream mode")

return batch_output


Expand Down

0 comments on commit 3914e55

Please sign in to comment.