-
Notifications
You must be signed in to change notification settings - Fork 2.8k
Description
Thanks for building livekit! We rely heavily on the testing functionality so far, and really found it useful.
Bug Description
When using session.start(agent, capture_run=True) in combination with await self.generate_reply() inside of on_enter, there is no event captured for the generated speech.
Additionally (not sure if this is related): When handing off to the agent, we see duplicate events in the run result.
Another thought: When changing the chat context, we do not get any events for this, which I think is not quite intuitive. A change to the chat context could also be a relevant event in this case, or at least adding messages.
Reproduction Steps
Examples script:
from __future__ import annotations
import asyncio
from typing import Any
from dotenv import load_dotenv
from livekit.agents import Agent, AgentSession, ChatContext, RunContext
from livekit.agents.llm import FunctionCall, FunctionCallOutput, function_tool
from livekit.agents.voice.run_result import (
AgentHandoffEvent,
ChatMessageEvent,
FunctionCallEvent,
FunctionCallOutputEvent,
RunResult,
)
from livekit.plugins import openai
# Change path to load azure or other llm providers
load_dotenv("services/agent_worker/.agent_worker.env")
async def append_message_to_chat_ctx(session: AgentSession[Any], content: str) -> None:
chat_ctx = session.current_agent.chat_ctx.copy()
chat_ctx.add_message(role="system", content=content)
await session.current_agent.update_chat_ctx(chat_ctx)
class EnterAgent(Agent):
def __init__(self) -> None:
super().__init__(
instructions="You are a helpful assistant. Greet the user. Always only call the ice_cream_types tool to get the ice cream types."
)
async def on_enter(self) -> None:
await append_message_to_chat_ctx(
session=self.session, content="ENTER AGENT: system message"
)
await self.session.say("ENTER AGENT: Say something deterministic")
await self.session.generate_reply(
instructions="ENTER AGENT: Say 'Say something generated'"
)
@function_tool()
async def ice_cream_types(self, context: RunContext[Any]):
"""Get all the ice cream types"""
return SecondAgent(chat_ctx=self.chat_ctx)
class SecondAgent(Agent):
def __init__(self, chat_ctx: ChatContext) -> None:
super().__init__(instructions="Always follow instructions.", chat_ctx=chat_ctx)
async def on_enter(self) -> None:
await append_message_to_chat_ctx(
session=self.session, content="SECOND AGENT: system message"
)
await self.session.say("SECOND AGENT: Say something deterministic")
await self.session.generate_reply(
instructions="Say 'SECOND AGENT: Say something generated' and DO NOT LIST ICE CREAM TYPES."
)
def print_chat_ctx(chat_ctx: ChatContext) -> None:
print("Chat ctx:")
for i in chat_ctx.items:
if isinstance(i, FunctionCall):
print(f"\t\ttool: {i.name}: {i.arguments}")
elif isinstance(i, FunctionCallOutput):
print(f"\t\ttool output: {i.name}: {i.output}")
else:
print(f"\t\t{i.role}: {i.content}")
def print_result(result: RunResult) -> None:
print("Result:")
for event in result.events:
if isinstance(event, FunctionCallEvent):
print(
f"\t\ttool: {event.item.name}: {event.item.arguments} (id {event.item.id})"
)
elif isinstance(event, FunctionCallOutputEvent):
print(
f"\t\ttool output: {event.item.name}: {event.item.output} (id {event.item.id})"
)
elif isinstance(event, ChatMessageEvent):
print(f"\t\t{event.item.role}: {event.item.content} (id {event.item.id})")
elif isinstance(event, AgentHandoffEvent):
print(f"\t\t{event.type}: {event.old_agent}")
else:
print(f"\t\tevent: {type(event).__name__}")
async def main():
agent = EnterAgent()
async with openai.LLM.with_azure(model="gpt-4.1", temperature=0) as llm:
async with AgentSession(llm=llm) as session:
result_1 = await session.start(agent, capture_run=True)
print_result(result_1)
result_2 = await session.run(user_input="Give me all ice cream types.")
print_result(result_2)
print_chat_ctx(session.current_agent.chat_ctx.copy())
if __name__ == "__main__":
asyncio.run(main())
Output:
Result:
agent_handoff: None
assistant: ['ENTER AGENT: Say something deterministic'] (id item_165f32fa3e4a)
Result:
tool: ice_cream_types: {} (id item_f1e3d022f58d/fnc_0)
tool output: ice_cream_types: (id item_6093dedffb37)
agent_handoff: <__main__.EnterAgent object at 0x11d1c95b0>
assistant: ['SECOND AGENT: Say something deterministic'] (id item_85f66b754045)
assistant: ['SECOND AGENT: Say something generated'] (id item_58765ca22ad6)
agent_handoff: <__main__.SecondAgent object at 0x11d5ebf50>
assistant: ['SECOND AGENT: Say something deterministic'] (id item_9017a30b8c6e)
assistant: ['SECOND AGENT: Say something generated'] (id item_e0ef8156feff)
Chat ctx:
system: ['Always follow instructions.']
system: ['ENTER AGENT: system message']
assistant: ['ENTER AGENT: Say something deterministic']
user: ['Give me all ice cream types.']
system: ['SECOND AGENT: system message']
system: ['SECOND AGENT: system message']
assistant: ['SECOND AGENT: Say something deterministic']
assistant: ['SECOND AGENT: Say something generated']
Expected Behavior
Expected output:
Result:
agent_handoff: None
assistant: ['ENTER AGENT: Say something deterministic'] (id item_165f32fa3e4a)
assistant: ['ENTER AGENT: Say something generated'] (id item_....)
Result:
tool: ice_cream_types: {} (id item_f1e3d022f58d/fnc_0)
tool output: ice_cream_types: (id item_6093dedffb37)
agent_handoff: <__main__.EnterAgent object at 0x11d1c95b0>
assistant: ['SECOND AGENT: Say something deterministic'] (id item_85f66b754045)
assistant: ['SECOND AGENT: Say something generated'] (id item_58765ca22ad6)
Chat ctx:
system: ['Always follow instructions.']
system: ['ENTER AGENT: system message']
assistant: ['ENTER AGENT: Say something deterministic']
user: ['Give me all ice cream types.']
system: ['SECOND AGENT: system message']
system: ['SECOND AGENT: system message']
assistant: ['SECOND AGENT: Say something deterministic']
assistant: ['SECOND AGENT: Say something generated']
And optionally having additional events for system messages that are added.
Operating System
macOS
Models Used
No response
Package Versions
livekit==1.0.23
livekit-agents==1.3.12
livekit-api==1.1.0
livekit-protocol==1.1.2Session/Room/Call IDs
No response
Proposed Solution
Additional Context
No response
Screenshots and Recordings
No response