[gpt-oss][2] fix types for streaming (vllm-project#24556)

qandrew · FeiDaLI · commit 44658c7d9331 · 2025-09-25T18:54:19.000+08:00
Signed-off-by: Andrew Xia &lt;axia@meta.com&gt;
diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
@@ -27,7 +27,6 @@
 from fastapi.exceptions import RequestValidationError
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, Response, StreamingResponse
-from openai import BaseModel
 from prometheus_client import make_asgi_app
 from prometheus_fastapi_instrumentator import Instrumentator
 from starlette.concurrency import iterate_in_threadpool
@@ -67,7 +66,9 @@
                                               RerankRequest, RerankResponse,
                                               ResponsesRequest,
                                               ResponsesResponse, ScoreRequest,
-                                              ScoreResponse, TokenizeRequest,
+                                              ScoreResponse,
+                                              StreamingResponsesResponse,
+                                              TokenizeRequest,
                                               TokenizeResponse,
                                               TranscriptionRequest,
                                               TranscriptionResponse,
@@ -481,8 +482,8 @@ async def show_version():
 
 
 async def _convert_stream_to_sse_events(
-        generator: AsyncGenerator[BaseModel,
-                                  None]) -> AsyncGenerator[str, None]:
+    generator: AsyncGenerator[StreamingResponsesResponse, None]
+) -> AsyncGenerator[str, None]:
     """Convert the generator to a stream of events in SSE format"""
     async for event in generator:
         event_type = getattr(event, 'type', 'unknown')
diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
@@ -18,10 +18,19 @@
 from openai.types.chat.chat_completion_message import (
     Annotation as OpenAIAnnotation)
 # yapf: enable
-from openai.types.responses import (ResponseFunctionToolCall,
-                                    ResponseInputItemParam, ResponseOutputItem,
-                                    ResponsePrompt, ResponseReasoningItem,
-                                    ResponseStatus)
+from openai.types.responses import (
+    ResponseCodeInterpreterCallCodeDeltaEvent,
+    ResponseCodeInterpreterCallCodeDoneEvent,
+    ResponseCodeInterpreterCallCompletedEvent,
+    ResponseCodeInterpreterCallInProgressEvent,
+    ResponseCodeInterpreterCallInterpretingEvent, ResponseCompletedEvent,
+    ResponseContentPartAddedEvent, ResponseContentPartDoneEvent,
+    ResponseCreatedEvent, ResponseFunctionToolCall, ResponseInProgressEvent,
+    ResponseInputItemParam, ResponseOutputItem, ResponseOutputItemAddedEvent,
+    ResponseOutputItemDoneEvent, ResponsePrompt, ResponseReasoningItem,
+    ResponseReasoningTextDeltaEvent, ResponseReasoningTextDoneEvent,
+    ResponseStatus, ResponseWebSearchCallCompletedEvent,
+    ResponseWebSearchCallInProgressEvent, ResponseWebSearchCallSearchingEvent)
 
 # Backward compatibility for OpenAI client versions
 try:  # For older openai versions (< 1.100.0)
@@ -251,6 +260,26 @@ def get_logits_processors(processors: Optional[LogitsProcessors],
                                            ResponseReasoningItem,
                                            ResponseFunctionToolCall]
 
+StreamingResponsesResponse: TypeAlias = Union[
+    ResponseCreatedEvent,
+    ResponseInProgressEvent,
+    ResponseCompletedEvent,
+    ResponseOutputItemAddedEvent,
+    ResponseOutputItemDoneEvent,
+    ResponseContentPartAddedEvent,
+    ResponseContentPartDoneEvent,
+    ResponseReasoningTextDeltaEvent,
+    ResponseReasoningTextDoneEvent,
+    ResponseCodeInterpreterCallInProgressEvent,
+    ResponseCodeInterpreterCallCodeDeltaEvent,
+    ResponseWebSearchCallInProgressEvent,
+    ResponseWebSearchCallSearchingEvent,
+    ResponseWebSearchCallCompletedEvent,
+    ResponseCodeInterpreterCallCodeDoneEvent,
+    ResponseCodeInterpreterCallInterpretingEvent,
+    ResponseCodeInterpreterCallCompletedEvent,
+]
+
 
 class ResponsesRequest(OpenAIBaseModel):
     # Ordered by official OpenAI API documentation
diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py