Skip to content

Commit 6885289

Browse files
committed
ai realtime docs
1 parent a0fa3cc commit 6885289

File tree

1 file changed

+104
-0
lines changed

1 file changed

+104
-0
lines changed
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# Realtime LLM Plugin Development
2+
3+
Here is a minimal example of developing a new Realtime LLM.
4+
5+
```python
6+
from vision_agents.core.llm.llm import LLM, LLMResponseEvent
7+
from vision_agents.core.llm.events import LLMResponseCompletedEvent, LLMResponseChunkEvent
8+
from vision_agents.core.processors import Processor
9+
from vision_agents.core.llm import realtime
10+
11+
class MyRealtime(realtime.Realtime):
12+
def __init__(self, model: str, client: Optional[ClientType]):
13+
# it should be possible to pass the client (makes it easier for users to customize things)
14+
# settings that are common to change, like model should be specified as well
15+
super().__init__()
16+
self.model = model
17+
self.client = client
18+
19+
async def connect(self):
20+
# create the websocket or webrtc connection to the realtime LLM
21+
pass
22+
23+
async def _handle_events(self):
24+
# handle the events from the connect method
25+
26+
# when receiving audio do this
27+
audio_event = RealtimeAudioOutputEvent(
28+
plugin_name="gemini",
29+
audio_data=audio_content,
30+
sample_rate=24000
31+
)
32+
self.events.send(audio_event)
33+
34+
await self.output_track.write(audio_content)
35+
36+
# for transcriptions...
37+
# TODO document this
38+
pass
39+
40+
async def _close_impl(self):
41+
pass
42+
43+
# native method wrapped. wrap the native method, every llm has its own name for this
44+
# openai calls it create response, anthropic create message. so the name depends on your llm
45+
async def mynativemethod(self, *args, **kwargs):
46+
47+
# some details to get right here...
48+
# ensure conversation history is maintained. typically by passing it ie:
49+
enhanced_instructions = self._build_enhanced_instructions()
50+
if enhanced_instructions:
51+
kwargs["system"] = [{"text": enhanced_instructions}]
52+
53+
response_iterator = await self.client.mynativemethod(self, *args, **kwargs)
54+
55+
# while receiving streaming do this
56+
total_text = ""
57+
for chunk in response_iterator:
58+
self.events.send(LLMResponseChunkEvent(
59+
plugin_name="gemini",
60+
content_index=0,
61+
item_id="",
62+
output_index=0,
63+
sequence_number=0,
64+
delta=chunk.text,
65+
))
66+
total_text += chunk.text
67+
68+
llm_response = LLMResponseEvent(response_iterator, total_text)
69+
# and when completed
70+
self.events.send(LLMResponseCompletedEvent(
71+
plugin_name="gemini",
72+
original=llm_response.original,
73+
text=llm_response.text
74+
))
75+
76+
async def simple_response(
77+
self,
78+
text: str,
79+
processors: Optional[List[Processor]] = None,
80+
participant: Participant = None,
81+
):
82+
# call the LLM with the given text
83+
# be sure to use the streaming version
84+
self.mynativemethod(...)
85+
86+
async def simple_audio_response(self, pcm: PcmData):
87+
# respond to this audio
88+
pass
89+
90+
```
91+
92+
## Things to get right
93+
94+
* Use the streaming API/version in your native method
95+
* Have 1 endpoint wrap the native method (with *args, **kwargs)
96+
* Simple response is the standardized way. this should call mynativemethod
97+
* Messages are standardized in _normalize_message
98+
99+
## Other examples
100+
101+
If you need more examples look in
102+
103+
- gemini_llm.py
104+
- bedrock_llm.py

0 commit comments

Comments
 (0)