BerriAI · krrishdholakia · Oct 11, 2025 · Oct 11, 2025 · Oct 11, 2025 · Oct 11, 2025
diff --git a/docs/my-website/docs/providers/openai.md b/docs/my-website/docs/providers/openai.md
@@ -339,6 +339,72 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
 | fine tuned `gpt-3.5-turbo-1106` | `response = completion(model="ft:gpt-3.5-turbo-1106", messages=messages)` |
 | fine tuned `gpt-3.5-turbo-0613` | `response = completion(model="ft:gpt-3.5-turbo-0613", messages=messages)` |
 
+## Getting Reasoning Content in `/chat/completions`
+
+GPT-5 models return reasoning content when called via the Responses API. You can call these models via the `/chat/completions` endpoint by using the `openai/responses/` prefix.
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+```python
+response = litellm.completion(
+    model="openai/responses/gpt-5-mini", # tells litellm to call the model via the Responses API
+    messages=[{"role": "user", "content": "What is the capital of France?"}],
+    reasoning_effort="low",
+)
+```
+</TabItem>
+
+<TabItem value="proxy" label="PROXY">
+```bash
+curl -X POST 'http://0.0.0.0:4000/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-d '{ 
+    "model": "openai/responses/gpt-5-mini",
+    "messages": [{"role": "user", "content": "What is the capital of France?"}],
+    "reasoning_effort": "low"
+}'
+```
+</TabItem>
+</Tabs>
+
+Expected Response:
+```json
+{
+  "id": "chatcmpl-6382a222-43c9-40c4-856b-22e105d88075",
+  "created": 1760146746,
+  "model": "gpt-5-mini",
+  "object": "chat.completion",
+  "system_fingerprint": null,
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "Paris",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "reasoning_content": "**Identifying the capital**\n\nThe user wants me to think of the capital of France and write it down. That's pretty straightforward: it's Paris. There aren't any safety issues to consider here. I think it would be best to keep it concise, so maybe just \"Paris\" would suffice. I feel confident that I should just stick to that without adding anything else. So, let's write it down!",
+        "provider_specific_fields": null
+      }
+    }
+  ],
+  "usage": {
+    "completion_tokens": 7,
+    "prompt_tokens": 18,
+    "total_tokens": 25,
+    "completion_tokens_details": null,
+    "prompt_tokens_details": {
+      "audio_tokens": null,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
+  }
+}
+
+```
 
 ## OpenAI Chat Completion to Responses API Bridge
 

diff --git a/litellm/completion_extras/litellm_responses_transformation/transformation.py b/litellm/completion_extras/litellm_responses_transformation/transformation.py
@@ -18,13 +18,15 @@
     cast,
 )
 
+from openai.types.responses.tool_param import FunctionToolParam
+
 from litellm import ModelResponse
 from litellm._logging import verbose_logger
 from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
 from litellm.llms.base_llm.bridges.completion_transformation import (
     CompletionTransformationBridge,
 )
-from litellm.types.llms.openai import Reasoning
+from litellm.types.llms.openai import ChatCompletionToolParamFunctionChunk, Reasoning
 
 if TYPE_CHECKING:
     from openai.types.responses import ResponseInputImageParam
@@ -201,6 +203,11 @@ def transform_request(
             if value is not None:
                 if key == "instructions" and instructions:
                     request_data["instructions"] = instructions
+                elif key == "stream_options" and isinstance(value, dict):
+                    request_data["stream_options"] = value.get("include_obfuscation")
+                elif key == "user":  # string can't be longer than 64 characters
+                    if isinstance(value, str) and len(value) <= 64:
+                        request_data["user"] = value
                 else:
                     request_data[key] = value
 
@@ -221,7 +228,6 @@ def transform_response(
         json_mode: Optional[bool] = None,
     ) -> "ModelResponse":
         """Transform Responses API response to chat completion response"""
-
         from openai.types.responses import (
             ResponseFunctionToolCall,
             ResponseOutputMessage,
@@ -240,19 +246,35 @@ def transform_response(
 
         choices: List[Choices] = []
         index = 0
+
+        reasoning_content: Optional[str] = None
+
         for item in raw_response.output:
+
             if isinstance(item, ResponseReasoningItem):
-                pass  # ignore for now.
+
+                for content in item.summary:
+                    response_text = getattr(content, "text", "")
+                    reasoning_content = response_text if response_text else ""
+
             elif isinstance(item, ResponseOutputMessage):
                 for content in item.content:
                     response_text = getattr(content, "text", "")
                     msg = Message(
-                        role=item.role, content=response_text if response_text else ""
+                        role=item.role,
+                        content=response_text if response_text else "",
+                        reasoning_content=reasoning_content,
                     )
 
                     choices.append(
-                        Choices(message=msg, finish_reason="stop", index=index)
+                        Choices(
+                            message=msg,
+                            finish_reason="stop",
+                            index=index,
+                        )
                     )
+
+                    reasoning_content = None  # flush reasoning content
                     index += 1
             elif isinstance(item, ResponseFunctionToolCall):
                 msg = Message(
@@ -267,11 +289,13 @@ def transform_response(
                             "type": "function",
                         }
                     ],
+                    reasoning_content=reasoning_content,
                 )
 
                 choices.append(
                     Choices(message=msg, finish_reason="tool_calls", index=index)
                 )
+                reasoning_content = None  # flush reasoning content
                 index += 1
             else:
                 pass  # don't fail request if item in list is not supported
@@ -447,9 +471,25 @@ def _convert_tools_to_responses_format(
         self, tools: List[Dict[str, Any]]
     ) -> List["ALL_RESPONSES_API_TOOL_PARAMS"]:
         """Convert chat completion tools to responses API tools format"""
-        responses_tools = []
+        responses_tools: List["ALL_RESPONSES_API_TOOL_PARAMS"] = []
         for tool in tools:
-            responses_tools.append(tool)
+            # convert function tool from chat completion to responses API format
+            if tool.get("type") == "function":
+                function_tool = cast(
+                    ChatCompletionToolParamFunctionChunk, tool.get("function")
+                )
+                responses_tools.append(
+                    FunctionToolParam(
+                        name=function_tool["name"],
+                        parameters=function_tool.get("parameters"),
+                        strict=function_tool.get("strict"),
+                        type="function",
+                        description=function_tool.get("description"),
+                    )
+                )
+            else:
+                responses_tools.append(tool)  # type: ignore
+
         return cast(List["ALL_RESPONSES_API_TOOL_PARAMS"], responses_tools)
 
     def _map_reasoning_effort(self, reasoning_effort: str) -> Optional[Reasoning]:

diff --git a/litellm/llms/anthropic/experimental_pass_through/adapters/handler.py b/litellm/llms/anthropic/experimental_pass_through/adapters/handler.py
@@ -133,7 +133,6 @@ async def async_anthropic_messages_handler(
         **kwargs,
     ) -> Union[AnthropicMessagesResponse, AsyncIterator]:
         """Handle non-Anthropic models asynchronously using the adapter"""
-
         completion_kwargs = (
             LiteLLMMessagesToCompletionTransformationHandler._prepare_completion_kwargs(
                 max_tokens=max_tokens,

diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py
@@ -13,13 +13,13 @@
     cast,
 )
 
-from litellm._logging import verbose_logger
 import httpx  # type: ignore
 
 import litellm
 import litellm.litellm_core_utils
 import litellm.types
 import litellm.types.utils
+from litellm._logging import verbose_logger
 from litellm.litellm_core_utils.realtime_streaming import RealTimeStreaming
 from litellm.llms.base_llm.anthropic_messages.transformation import (
     BaseAnthropicMessagesConfig,
@@ -239,7 +239,7 @@ async def async_completion(
         json_mode: bool = False,
         signed_json_body: Optional[bytes] = None,
         shared_session: Optional["ClientSession"] = None,
-    ):  
+    ):
         if client is None:
             verbose_logger.debug(
                 f"Creating HTTP client with shared_session: {id(shared_session) if shared_session else None}"
@@ -1533,6 +1533,7 @@ def response_api_handler(
                         data=data,
                         fake_stream=fake_stream,
                     )
+
                 response = sync_httpx_client.post(
                     url=api_base,
                     headers=headers,

diff --git a/litellm/llms/openai/responses/transformation.py b/litellm/llms/openai/responses/transformation.py
@@ -161,6 +161,10 @@ def transform_response_api_response(
     ) -> ResponsesAPIResponse:
         """No transform applied since outputs are in OpenAI spec already"""
         try:
+            logging_obj.post_call(
+                original_response=raw_response.text,
+                additional_args={"complete_input_dict": {}},
+            )
             raw_response_json = raw_response.json()
             raw_response_json["created_at"] = _safe_convert_created_field(
                 raw_response_json["created_at"]
@@ -169,7 +173,13 @@ def transform_response_api_response(
             raise OpenAIError(
                 message=raw_response.text, status_code=raw_response.status_code
             )
-        return ResponsesAPIResponse.model_construct(**raw_response_json)
+        try:
+            return ResponsesAPIResponse(**raw_response_json)
+        except Exception:
+            verbose_logger.debug(
+                f"Error constructing ResponsesAPIResponse: {raw_response_json}, using model_construct"
+            )
+            return ResponsesAPIResponse.model_construct(**raw_response_json)
 
     def validate_environment(
         self, headers: dict, model: str, litellm_params: Optional[GenericLiteLLMParams]

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
@@ -13074,34 +13074,6 @@
         "supports_vision": true,
         "supports_web_search": true
     },
-    "gpt-5-codex": {
-        "cache_read_input_token_cost": 1.25e-07,
-        "input_cost_per_token": 1.25e-06,
-        "litellm_provider": "openai",
-        "max_input_tokens": 400000,
-        "max_output_tokens": 128000,
-        "max_tokens": 128000,
-        "mode": "chat",
-        "output_cost_per_token": 1e-05,
-        "supported_endpoints": [
-            "/v1/responses"
-        ],
-        "supported_modalities": [
-            "text",
-            "image"
-        ],
-        "supported_output_modalities": [
-            "text"
-        ],
-        "supports_function_calling": true,
-        "supports_native_streaming": true,
-        "supports_parallel_function_calling": true,
-        "supports_prompt_caching": true,
-        "supports_reasoning": true,
-        "supports_response_schema": true,
-        "supports_system_messages": true,
-        "supports_tool_choice": true
-    },
     "gpt-5-2025-08-07": {
         "cache_read_input_token_cost": 1.25e-07,
         "cache_read_input_token_cost_flex": 6.25e-08,

diff --git a/...roxy/_experimental/out/api-reference.html → ...experimental/out/api-reference/index.html b/...roxy/_experimental/out/api-reference.html → ...experimental/out/api-reference/index.html
diff --git a/...m/proxy/_experimental/out/guardrails.html → ...y/_experimental/out/guardrails/index.html b/...m/proxy/_experimental/out/guardrails.html → ...y/_experimental/out/guardrails/index.html
diff --git a/litellm/proxy/_experimental/out/logs.html → ...m/proxy/_experimental/out/logs/index.html b/litellm/proxy/_experimental/out/logs.html → ...m/proxy/_experimental/out/logs/index.html
diff --git a/...lm/proxy/_experimental/out/model-hub.html → ...xy/_experimental/out/model-hub/index.html b/...lm/proxy/_experimental/out/model-hub.html → ...xy/_experimental/out/model-hub/index.html
diff --git a/...xy/_experimental/out/model_hub_table.html → ...perimental/out/model_hub_table/index.html b/...xy/_experimental/out/model_hub_table.html → ...perimental/out/model_hub_table/index.html
diff --git a/...xperimental/out/models-and-endpoints.html → ...ental/out/models-and-endpoints/index.html b/...xperimental/out/models-and-endpoints.html → ...ental/out/models-and-endpoints/index.html
diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html
diff --git a/...roxy/_experimental/out/organizations.html → ...experimental/out/organizations/index.html b/...roxy/_experimental/out/organizations.html → ...experimental/out/organizations/index.html
diff --git a/litellm/proxy/_experimental/out/teams.html → .../proxy/_experimental/out/teams/index.html b/litellm/proxy/_experimental/out/teams.html → .../proxy/_experimental/out/teams/index.html
diff --git a/...llm/proxy/_experimental/out/test-key.html → ...oxy/_experimental/out/test-key/index.html b/...llm/proxy/_experimental/out/test-key.html → ...oxy/_experimental/out/test-key/index.html
diff --git a/litellm/proxy/_experimental/out/usage.html → .../proxy/_experimental/out/usage/index.html b/litellm/proxy/_experimental/out/usage.html → .../proxy/_experimental/out/usage/index.html
diff --git a/litellm/proxy/_experimental/out/users.html → .../proxy/_experimental/out/users/index.html b/litellm/proxy/_experimental/out/users.html → .../proxy/_experimental/out/users/index.html
diff --git a/...proxy/_experimental/out/virtual-keys.html → ..._experimental/out/virtual-keys/index.html b/...proxy/_experimental/out/virtual-keys.html → ..._experimental/out/virtual-keys/index.html
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
@@ -1,29 +1,4 @@
 model_list:
-  - model_name: gpt-5-mini
+  - model_name: gpt-5-codex
     litellm_params:
-      model: openai/gpt-4o-mini
-      api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5"
-      api_key: dummy
-  - model_name: "byok-wildcard/*"
-    litellm_params:
-      model: openai/*
-  - model_name: xai-grok-3
-    litellm_params:
-      model: xai/grok-3
-  - model_name: hosted_vllm/whisper-v3
-    litellm_params:
-      model: hosted_vllm/whisper-v3
-      api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5"
-      api_key: dummy
-
-mcp_servers:
-  my_api_mcp:
-    url: "http://0.0.0.0:8090"
-    spec_path: "/Users/krrishdholakia/Documents/temp_py_folder/example_openapi.json"
-    auth_type: none
-    allowed_tools: ["getpetbyid", "my_api_mcp-findpetsbystatus"]
-
-
-litellm_settings:
-  callbacks: ["prometheus"]
-  custom_prometheus_metadata_labels: ["metadata.initiative", "metadata.business-unit"]  
+      model: gpt-5-codex
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
@@ -13074,34 +13074,6 @@
         "supports_vision": true,
         "supports_web_search": true
     },
-    "gpt-5-codex": {
-        "cache_read_input_token_cost": 1.25e-07,
-        "input_cost_per_token": 1.25e-06,
-        "litellm_provider": "openai",
-        "max_input_tokens": 400000,
-        "max_output_tokens": 128000,
-        "max_tokens": 128000,
-        "mode": "chat",
-        "output_cost_per_token": 1e-05,
-        "supported_endpoints": [
-            "/v1/responses"
-        ],
-        "supported_modalities": [
-            "text",
-            "image"
-        ],
-        "supported_output_modalities": [
-            "text"
-        ],
-        "supports_function_calling": true,
-        "supports_native_streaming": true,
-        "supports_parallel_function_calling": true,
-        "supports_prompt_caching": true,
-        "supports_reasoning": true,
-        "supports_response_schema": true,
-        "supports_system_messages": true,
-        "supports_tool_choice": true
-    },
     "gpt-5-2025-08-07": {
         "cache_read_input_token_cost": 1.25e-07,
         "cache_read_input_token_cost_flex": 6.25e-08,