Closed
Description
Describe the question
Running the LiteLLM sample from the doc works. However modifying the sample to use the run_streamed API to log responses, tool calls etc results in an exception.
Debug information
- Agents SDK version: v0.0.12
- Python version: 3.13
Repro steps
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.13"
# dependencies = [
# "openai-agents[litellm]",
# ]
# ///
from __future__ import annotations
import asyncio
from agents import Agent, ItemHelpers, Runner, function_tool, set_tracing_disabled
from agents.extensions.models.litellm_model import LitellmModel
set_tracing_disabled(disabled=True)
@function_tool
def get_weather(city: str):
print(f"[debug] getting weather for {city}")
return f"The weather in {city} is sunny."
async def main(model: str, api_key: str):
agent = Agent(
name="Assistant",
instructions="You only respond in haikus.",
model=LitellmModel(model=model, api_key=api_key),
tools=[get_weather],
)
result = Runner.run_streamed(agent, "What's the weather in Tokyo?")
async for event in result.stream_events():
# We'll ignore the raw responses event deltas
if event.type == "raw_response_event":
continue
# When the agent updates, print that
elif event.type == "agent_updated_stream_event":
print(f"Agent updated: {event.new_agent.name}")
continue
# When items are generated, print them
elif event.type == "run_item_stream_event":
if event.item.type == "tool_call_item":
print(f"-- Tool was called: {event.item.raw_item}")
elif event.item.type == "tool_call_output_item":
print(f"-- Tool output: {event.item.output}")
elif event.item.type == "message_output_item":
print(f"-- Message output:\n {ItemHelpers.text_message_output(event.item)}")
else:
pass # Ignore other event types
print(result.final_output)
if __name__ == "__main__":
# First try to get model/api key from args
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str, required=False)
parser.add_argument("--api-key", type=str, required=False)
args = parser.parse_args()
model = args.model
if not model:
model = input("Enter a model name for Litellm: ")
api_key = args.api_key
if not api_key:
api_key = input("Enter an API key for Litellm: ")
asyncio.run(main(model, api_key))
❯ ./demo-streaming.py --api-key=... --model=together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo
Agent updated: Assistant
Traceback (most recent call last):
File "/Users/shoda/.cache/uv/environments-v2/demo-streaming-44c9a8b0431f886f/lib/python3.13/site-packages/pydantic/main.py", line 986, in __getattr__
return pydantic_extra[item]
~~~~~~~~~~~~~~^^^^^^
KeyError: 'usage'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/private/var/folders/ct/x2gct7yn2bxfqs891n8h1dxr0000gn/T/tmp.FRPiXhVbRG/./demo-streaming.py", line 73, in <module>
asyncio.run(main(model, api_key))
~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/Cellar/python@3.13/3.13.3/Frameworks/Python.framework/Versions/3.13/lib/python3.13/asyncio/runners.py", line 195, in run
return runner.run(main)
~~~~~~~~~~^^^^^^
File "/usr/local/Cellar/python@3.13/3.13.3/Frameworks/Python.framework/Versions/3.13/lib/python3.13/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^
File "/usr/local/Cellar/python@3.13/3.13.3/Frameworks/Python.framework/Versions/3.13/lib/python3.13/asyncio/base_events.py", line 719, in run_until_complete
return future.result()
~~~~~~~~~~~~~^^
File "/private/var/folders/ct/x2gct7yn2bxfqs891n8h1dxr0000gn/T/tmp.FRPiXhVbRG/./demo-streaming.py", line 34, in main
async for event in result.stream_events():
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
...<16 lines>...
pass # Ignore other event types
^^^^
File "/Users/shoda/.cache/uv/environments-v2/demo-streaming-44c9a8b0431f886f/lib/python3.13/site-packages/agents/result.py", line 191, in stream_events
raise self._stored_exception
File "/Users/shoda/.cache/uv/environments-v2/demo-streaming-44c9a8b0431f886f/lib/python3.13/site-packages/agents/run.py", line 560, in _run_streamed_impl
turn_result = await cls._run_single_turn_streamed(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
...<9 lines>...
)
^
File "/Users/shoda/.cache/uv/environments-v2/demo-streaming-44c9a8b0431f886f/lib/python3.13/site-packages/agents/run.py", line 671, in _run_single_turn_streamed
async for event in model.stream_response(
...<28 lines>...
streamed_result._event_queue.put_nowait(RawResponsesStreamEvent(data=event))
File "/Users/shoda/.cache/uv/environments-v2/demo-streaming-44c9a8b0431f886f/lib/python3.13/site-packages/agents/extensions/models/litellm_model.py", line 167, in stream_response
async for chunk in ChatCmplStreamHandler.handle_stream(response, stream):
...<3 lines>...
final_response = chunk.response
File "/Users/shoda/.cache/uv/environments-v2/demo-streaming-44c9a8b0431f886f/lib/python3.13/site-packages/agents/models/chatcmpl_stream_handler.py", line 59, in handle_stream
usage = chunk.usage
^^^^^^^^^^^
File "/Users/shoda/.cache/uv/environments-v2/demo-streaming-44c9a8b0431f886f/lib/python3.13/site-packages/pydantic/main.py", line 988, in __getattr__
raise AttributeError(f'{type(self).__name__!r} object has no attribute {item!r}') from exc
AttributeError: 'ModelResponseStream' object has no attribute 'usage'
The sample given in the docs works:
❯ ./demo.py --api-key=... --model=together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo
[debug] getting weather for Tokyo
Sakura blooms dance
Gentle Tokyo spring breeze
Warmth on skin so sweet
Expected behavior
run_streamed should work or there should be some way of streaming events while using LiteLLM models.