Skip to content

Commit cea95df

Browse files
authored
[Frontend] Create ErrorResponse instead of raising exceptions in run_batch (vllm-project#8347)
1 parent 6a512a0 commit cea95df

File tree

2 files changed

+31
-4
lines changed

2 files changed

+31
-4
lines changed

tests/entrypoints/openai/test_run_batch.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
INPUT_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
99
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
1010
11-
{"custom_id": "request-3", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NonExistModel", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""
11+
{"custom_id": "request-3", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NonExistModel", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
12+
{"custom_id": "request-4", "method": "POST", "url": "/bad_url", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
13+
{"custom_id": "request-5", "method": "POST", "url": "/v1/chat/completions", "body": {"stream": "True", "model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""
1214

1315
INVALID_INPUT_BATCH = """{"invalid_field": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
1416
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""

vllm/entrypoints/openai/run_batch.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import asyncio
2+
from http import HTTPStatus
23
from io import StringIO
34
from typing import Awaitable, Callable, List, Optional
45

@@ -135,6 +136,25 @@ async def write_file(path_or_url: str, data: str) -> None:
135136
f.write(data)
136137

137138

139+
def make_error_request_output(request: BatchRequestInput,
140+
error_msg: str) -> BatchRequestOutput:
141+
batch_output = BatchRequestOutput(
142+
id=f"vllm-{random_uuid()}",
143+
custom_id=request.custom_id,
144+
response=BatchResponseData(
145+
status_code=HTTPStatus.BAD_REQUEST,
146+
request_id=f"vllm-batch-{random_uuid()}",
147+
),
148+
error=error_msg,
149+
)
150+
return batch_output
151+
152+
153+
async def make_async_error_request_output(
154+
request: BatchRequestInput, error_msg: str) -> BatchRequestOutput:
155+
return make_error_request_output(request, error_msg)
156+
157+
138158
async def run_request(serving_engine_func: Callable,
139159
request: BatchRequestInput,
140160
tracker: BatchProgressTracker) -> BatchRequestOutput:
@@ -158,7 +178,8 @@ async def run_request(serving_engine_func: Callable,
158178
error=response,
159179
)
160180
else:
161-
raise ValueError("Request must not be sent in stream mode")
181+
batch_output = make_error_request_output(
182+
request, error_msg="Request must not be sent in stream mode")
162183

163184
tracker.completed()
164185
return batch_output
@@ -225,8 +246,12 @@ async def main(args):
225246
tracker))
226247
tracker.submitted()
227248
else:
228-
raise ValueError("Only /v1/chat/completions and /v1/embeddings are"
229-
"supported in the batch endpoint.")
249+
response_futures.append(
250+
make_async_error_request_output(
251+
request,
252+
error_msg="Only /v1/chat/completions and "
253+
"/v1/embeddings are supported in the batch endpoint.",
254+
))
230255

231256
with tracker.pbar():
232257
responses = await asyncio.gather(*response_futures)

0 commit comments

Comments
 (0)