Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions docs/my-website/docs/providers/openai.md
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,72 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
| fine tuned `gpt-3.5-turbo-1106` | `response = completion(model="ft:gpt-3.5-turbo-1106", messages=messages)` |
| fine tuned `gpt-3.5-turbo-0613` | `response = completion(model="ft:gpt-3.5-turbo-0613", messages=messages)` |

## Getting Reasoning Content in `/chat/completions`

GPT-5 models return reasoning content when called via the Responses API. You can call these models via the `/chat/completions` endpoint by using the `openai/responses/` prefix.

<Tabs>
<TabItem value="sdk" label="SDK">
```python
response = litellm.completion(
model="openai/responses/gpt-5-mini", # tells litellm to call the model via the Responses API
messages=[{"role": "user", "content": "What is the capital of France?"}],
reasoning_effort="low",
)
```
</TabItem>

<TabItem value="proxy" label="PROXY">
```bash
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer sk-1234' \
-d '{
"model": "openai/responses/gpt-5-mini",
"messages": [{"role": "user", "content": "What is the capital of France?"}],
"reasoning_effort": "low"
}'
```
</TabItem>
</Tabs>

Expected Response:
```json
{
"id": "chatcmpl-6382a222-43c9-40c4-856b-22e105d88075",
"created": 1760146746,
"model": "gpt-5-mini",
"object": "chat.completion",
"system_fingerprint": null,
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": "Paris",
"role": "assistant",
"tool_calls": null,
"function_call": null,
"reasoning_content": "**Identifying the capital**\n\nThe user wants me to think of the capital of France and write it down. That's pretty straightforward: it's Paris. There aren't any safety issues to consider here. I think it would be best to keep it concise, so maybe just \"Paris\" would suffice. I feel confident that I should just stick to that without adding anything else. So, let's write it down!",
"provider_specific_fields": null
}
}
],
"usage": {
"completion_tokens": 7,
"prompt_tokens": 18,
"total_tokens": 25,
"completion_tokens_details": null,
"prompt_tokens_details": {
"audio_tokens": null,
"cached_tokens": 0,
"text_tokens": null,
"image_tokens": null
}
}
}

```

## OpenAI Chat Completion to Responses API Bridge

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,15 @@
cast,
)

from openai.types.responses.tool_param import FunctionToolParam

from litellm import ModelResponse
from litellm._logging import verbose_logger
from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
from litellm.llms.base_llm.bridges.completion_transformation import (
CompletionTransformationBridge,
)
from litellm.types.llms.openai import Reasoning
from litellm.types.llms.openai import ChatCompletionToolParamFunctionChunk, Reasoning

if TYPE_CHECKING:
from openai.types.responses import ResponseInputImageParam
Expand Down Expand Up @@ -201,6 +203,11 @@ def transform_request(
if value is not None:
if key == "instructions" and instructions:
request_data["instructions"] = instructions
elif key == "stream_options" and isinstance(value, dict):
request_data["stream_options"] = value.get("include_obfuscation")
elif key == "user": # string can't be longer than 64 characters
if isinstance(value, str) and len(value) <= 64:
request_data["user"] = value
else:
request_data[key] = value

Expand All @@ -221,7 +228,6 @@ def transform_response(
json_mode: Optional[bool] = None,
) -> "ModelResponse":
"""Transform Responses API response to chat completion response"""

from openai.types.responses import (
ResponseFunctionToolCall,
ResponseOutputMessage,
Expand All @@ -240,19 +246,35 @@ def transform_response(

choices: List[Choices] = []
index = 0

reasoning_content: Optional[str] = None

for item in raw_response.output:

if isinstance(item, ResponseReasoningItem):
pass # ignore for now.

for content in item.summary:
response_text = getattr(content, "text", "")
reasoning_content = response_text if response_text else ""

elif isinstance(item, ResponseOutputMessage):
for content in item.content:
response_text = getattr(content, "text", "")
msg = Message(
role=item.role, content=response_text if response_text else ""
role=item.role,
content=response_text if response_text else "",
reasoning_content=reasoning_content,
)

choices.append(
Choices(message=msg, finish_reason="stop", index=index)
Choices(
message=msg,
finish_reason="stop",
index=index,
)
)

reasoning_content = None # flush reasoning content
index += 1
elif isinstance(item, ResponseFunctionToolCall):
msg = Message(
Expand All @@ -267,11 +289,13 @@ def transform_response(
"type": "function",
}
],
reasoning_content=reasoning_content,
)

choices.append(
Choices(message=msg, finish_reason="tool_calls", index=index)
)
reasoning_content = None # flush reasoning content
index += 1
else:
pass # don't fail request if item in list is not supported
Expand Down Expand Up @@ -447,9 +471,25 @@ def _convert_tools_to_responses_format(
self, tools: List[Dict[str, Any]]
) -> List["ALL_RESPONSES_API_TOOL_PARAMS"]:
"""Convert chat completion tools to responses API tools format"""
responses_tools = []
responses_tools: List["ALL_RESPONSES_API_TOOL_PARAMS"] = []
for tool in tools:
responses_tools.append(tool)
# convert function tool from chat completion to responses API format
if tool.get("type") == "function":
function_tool = cast(
ChatCompletionToolParamFunctionChunk, tool.get("function")
)
responses_tools.append(
FunctionToolParam(
name=function_tool["name"],
parameters=function_tool.get("parameters"),
strict=function_tool.get("strict"),
type="function",
description=function_tool.get("description"),
)
)
else:
responses_tools.append(tool) # type: ignore

return cast(List["ALL_RESPONSES_API_TOOL_PARAMS"], responses_tools)

def _map_reasoning_effort(self, reasoning_effort: str) -> Optional[Reasoning]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ async def async_anthropic_messages_handler(
**kwargs,
) -> Union[AnthropicMessagesResponse, AsyncIterator]:
"""Handle non-Anthropic models asynchronously using the adapter"""

completion_kwargs = (
LiteLLMMessagesToCompletionTransformationHandler._prepare_completion_kwargs(
max_tokens=max_tokens,
Expand Down
5 changes: 3 additions & 2 deletions litellm/llms/custom_httpx/llm_http_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
cast,
)

from litellm._logging import verbose_logger
import httpx # type: ignore

import litellm
import litellm.litellm_core_utils
import litellm.types
import litellm.types.utils
from litellm._logging import verbose_logger
from litellm.litellm_core_utils.realtime_streaming import RealTimeStreaming
from litellm.llms.base_llm.anthropic_messages.transformation import (
BaseAnthropicMessagesConfig,
Expand Down Expand Up @@ -239,7 +239,7 @@ async def async_completion(
json_mode: bool = False,
signed_json_body: Optional[bytes] = None,
shared_session: Optional["ClientSession"] = None,
):
):
if client is None:
verbose_logger.debug(
f"Creating HTTP client with shared_session: {id(shared_session) if shared_session else None}"
Expand Down Expand Up @@ -1533,6 +1533,7 @@ def response_api_handler(
data=data,
fake_stream=fake_stream,
)

response = sync_httpx_client.post(
url=api_base,
headers=headers,
Expand Down
12 changes: 11 additions & 1 deletion litellm/llms/openai/responses/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,10 @@ def transform_response_api_response(
) -> ResponsesAPIResponse:
"""No transform applied since outputs are in OpenAI spec already"""
try:
logging_obj.post_call(
original_response=raw_response.text,
additional_args={"complete_input_dict": {}},
)
raw_response_json = raw_response.json()
raw_response_json["created_at"] = _safe_convert_created_field(
raw_response_json["created_at"]
Expand All @@ -169,7 +173,13 @@ def transform_response_api_response(
raise OpenAIError(
message=raw_response.text, status_code=raw_response.status_code
)
return ResponsesAPIResponse.model_construct(**raw_response_json)
try:
return ResponsesAPIResponse(**raw_response_json)
except Exception:
verbose_logger.debug(
f"Error constructing ResponsesAPIResponse: {raw_response_json}, using model_construct"
)
return ResponsesAPIResponse.model_construct(**raw_response_json)

def validate_environment(
self, headers: dict, model: str, litellm_params: Optional[GenericLiteLLMParams]
Expand Down
28 changes: 0 additions & 28 deletions litellm/model_prices_and_context_window_backup.json
Original file line number Diff line number Diff line change
Expand Up @@ -13074,34 +13074,6 @@
"supports_vision": true,
"supports_web_search": true
},
"gpt-5-codex": {
"cache_read_input_token_cost": 1.25e-07,
"input_cost_per_token": 1.25e-06,
"litellm_provider": "openai",
"max_input_tokens": 400000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "chat",
"output_cost_per_token": 1e-05,
"supported_endpoints": [
"/v1/responses"
],
"supported_modalities": [
"text",
"image"
],
"supported_output_modalities": [
"text"
],
"supports_function_calling": true,
"supports_native_streaming": true,
"supports_parallel_function_calling": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
"supports_system_messages": true,
"supports_tool_choice": true
},
"gpt-5-2025-08-07": {
"cache_read_input_token_cost": 1.25e-07,
"cache_read_input_token_cost_flex": 6.25e-08,
Expand Down
1 change: 0 additions & 1 deletion litellm/proxy/_experimental/out/onboarding.html

This file was deleted.

29 changes: 2 additions & 27 deletions litellm/proxy/_new_secret_config.yaml
Original file line number Diff line number Diff line change
@@ -1,29 +1,4 @@
model_list:
- model_name: gpt-5-mini
- model_name: gpt-5-codex
litellm_params:
model: openai/gpt-4o-mini
api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5"
api_key: dummy
- model_name: "byok-wildcard/*"
litellm_params:
model: openai/*
- model_name: xai-grok-3
litellm_params:
model: xai/grok-3
- model_name: hosted_vllm/whisper-v3
litellm_params:
model: hosted_vllm/whisper-v3
api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5"
api_key: dummy

mcp_servers:
my_api_mcp:
url: "http://0.0.0.0:8090"
spec_path: "/Users/krrishdholakia/Documents/temp_py_folder/example_openapi.json"
auth_type: none
allowed_tools: ["getpetbyid", "my_api_mcp-findpetsbystatus"]


litellm_settings:
callbacks: ["prometheus"]
custom_prometheus_metadata_labels: ["metadata.initiative", "metadata.business-unit"]
model: gpt-5-codex
28 changes: 0 additions & 28 deletions model_prices_and_context_window.json
Original file line number Diff line number Diff line change
Expand Up @@ -13074,34 +13074,6 @@
"supports_vision": true,
"supports_web_search": true
},
"gpt-5-codex": {
"cache_read_input_token_cost": 1.25e-07,
"input_cost_per_token": 1.25e-06,
"litellm_provider": "openai",
"max_input_tokens": 400000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "chat",
"output_cost_per_token": 1e-05,
"supported_endpoints": [
"/v1/responses"
],
"supported_modalities": [
"text",
"image"
],
"supported_output_modalities": [
"text"
],
"supports_function_calling": true,
"supports_native_streaming": true,
"supports_parallel_function_calling": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
"supports_system_messages": true,
"supports_tool_choice": true
},
"gpt-5-2025-08-07": {
"cache_read_input_token_cost": 1.25e-07,
"cache_read_input_token_cost_flex": 6.25e-08,
Expand Down
Loading
Loading