|
10 | 10 | from vllm.engine.async_llm_engine import AsyncLLMEngine |
11 | 11 | from vllm.entrypoints.openai.protocol import (BatchRequestInput, |
12 | 12 | BatchRequestOutput, |
13 | | - ChatCompletionResponse) |
| 13 | + BatchResponseData, |
| 14 | + ChatCompletionResponse, |
| 15 | + ErrorResponse) |
14 | 16 | from vllm.entrypoints.openai.serving_chat import OpenAIServingChat |
15 | 17 | from vllm.logger import init_logger |
16 | 18 | from vllm.usage.usage_lib import UsageContext |
@@ -77,20 +79,27 @@ async def run_request(chat_serving: OpenAIServingChat, |
77 | 79 | request: BatchRequestInput) -> BatchRequestOutput: |
78 | 80 | chat_request = request.body |
79 | 81 | chat_response = await chat_serving.create_chat_completion(chat_request) |
| 82 | + |
80 | 83 | if isinstance(chat_response, ChatCompletionResponse): |
81 | 84 | batch_output = BatchRequestOutput( |
82 | 85 | id=f"vllm-{random_uuid()}", |
83 | 86 | custom_id=request.custom_id, |
84 | | - response=chat_response, |
| 87 | + response=BatchResponseData( |
| 88 | + body=chat_response, request_id=f"vllm-batch-{random_uuid()}"), |
85 | 89 | error=None, |
86 | 90 | ) |
87 | | - else: |
| 91 | + elif isinstance(chat_response, ErrorResponse): |
88 | 92 | batch_output = BatchRequestOutput( |
89 | 93 | id=f"vllm-{random_uuid()}", |
90 | 94 | custom_id=request.custom_id, |
91 | | - response=None, |
| 95 | + response=BatchResponseData( |
| 96 | + status_code=chat_response.code, |
| 97 | + request_id=f"vllm-batch-{random_uuid()}"), |
92 | 98 | error=chat_response, |
93 | 99 | ) |
| 100 | + else: |
| 101 | + raise ValueError("Request must not be sent in stream mode") |
| 102 | + |
94 | 103 | return batch_output |
95 | 104 |
|
96 | 105 |
|
|
0 commit comments