Skip to content

Commit 44658c7

Browse files
qandrewFeiDaLI
authored andcommitted
[gpt-oss][2] fix types for streaming (vllm-project#24556)
Signed-off-by: Andrew Xia <axia@meta.com>
1 parent b9833a6 commit 44658c7

File tree

3 files changed

+104
-96
lines changed

3 files changed

+104
-96
lines changed

vllm/entrypoints/openai/api_server.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
from fastapi.exceptions import RequestValidationError
2828
from fastapi.middleware.cors import CORSMiddleware
2929
from fastapi.responses import JSONResponse, Response, StreamingResponse
30-
from openai import BaseModel
3130
from prometheus_client import make_asgi_app
3231
from prometheus_fastapi_instrumentator import Instrumentator
3332
from starlette.concurrency import iterate_in_threadpool
@@ -67,7 +66,9 @@
6766
RerankRequest, RerankResponse,
6867
ResponsesRequest,
6968
ResponsesResponse, ScoreRequest,
70-
ScoreResponse, TokenizeRequest,
69+
ScoreResponse,
70+
StreamingResponsesResponse,
71+
TokenizeRequest,
7172
TokenizeResponse,
7273
TranscriptionRequest,
7374
TranscriptionResponse,
@@ -481,8 +482,8 @@ async def show_version():
481482

482483

483484
async def _convert_stream_to_sse_events(
484-
generator: AsyncGenerator[BaseModel,
485-
None]) -> AsyncGenerator[str, None]:
485+
generator: AsyncGenerator[StreamingResponsesResponse, None]
486+
) -> AsyncGenerator[str, None]:
486487
"""Convert the generator to a stream of events in SSE format"""
487488
async for event in generator:
488489
event_type = getattr(event, 'type', 'unknown')

vllm/entrypoints/openai/protocol.py

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,19 @@
1818
from openai.types.chat.chat_completion_message import (
1919
Annotation as OpenAIAnnotation)
2020
# yapf: enable
21-
from openai.types.responses import (ResponseFunctionToolCall,
22-
ResponseInputItemParam, ResponseOutputItem,
23-
ResponsePrompt, ResponseReasoningItem,
24-
ResponseStatus)
21+
from openai.types.responses import (
22+
ResponseCodeInterpreterCallCodeDeltaEvent,
23+
ResponseCodeInterpreterCallCodeDoneEvent,
24+
ResponseCodeInterpreterCallCompletedEvent,
25+
ResponseCodeInterpreterCallInProgressEvent,
26+
ResponseCodeInterpreterCallInterpretingEvent, ResponseCompletedEvent,
27+
ResponseContentPartAddedEvent, ResponseContentPartDoneEvent,
28+
ResponseCreatedEvent, ResponseFunctionToolCall, ResponseInProgressEvent,
29+
ResponseInputItemParam, ResponseOutputItem, ResponseOutputItemAddedEvent,
30+
ResponseOutputItemDoneEvent, ResponsePrompt, ResponseReasoningItem,
31+
ResponseReasoningTextDeltaEvent, ResponseReasoningTextDoneEvent,
32+
ResponseStatus, ResponseWebSearchCallCompletedEvent,
33+
ResponseWebSearchCallInProgressEvent, ResponseWebSearchCallSearchingEvent)
2534

2635
# Backward compatibility for OpenAI client versions
2736
try: # For older openai versions (< 1.100.0)
@@ -251,6 +260,26 @@ def get_logits_processors(processors: Optional[LogitsProcessors],
251260
ResponseReasoningItem,
252261
ResponseFunctionToolCall]
253262

263+
StreamingResponsesResponse: TypeAlias = Union[
264+
ResponseCreatedEvent,
265+
ResponseInProgressEvent,
266+
ResponseCompletedEvent,
267+
ResponseOutputItemAddedEvent,
268+
ResponseOutputItemDoneEvent,
269+
ResponseContentPartAddedEvent,
270+
ResponseContentPartDoneEvent,
271+
ResponseReasoningTextDeltaEvent,
272+
ResponseReasoningTextDoneEvent,
273+
ResponseCodeInterpreterCallInProgressEvent,
274+
ResponseCodeInterpreterCallCodeDeltaEvent,
275+
ResponseWebSearchCallInProgressEvent,
276+
ResponseWebSearchCallSearchingEvent,
277+
ResponseWebSearchCallCompletedEvent,
278+
ResponseCodeInterpreterCallCodeDoneEvent,
279+
ResponseCodeInterpreterCallInterpretingEvent,
280+
ResponseCodeInterpreterCallCompletedEvent,
281+
]
282+
254283

255284
class ResponsesRequest(OpenAIBaseModel):
256285
# Ordered by official OpenAI API documentation

0 commit comments

Comments
 (0)