Skip to content

Commit d2eeba7

Browse files
committed
ai tts instructions
1 parent 98a4f9d commit d2eeba7

File tree

3 files changed

+53
-31
lines changed

3 files changed

+53
-31
lines changed

docs/ai/instructions/ai-tts.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
## TTS
2+
3+
Here's a minimal example for building a new TTS plugin
4+
5+
```python
6+
7+
8+
class MyTTS(tts.TTS):
9+
def __init__(
10+
self,
11+
voice_id: str = "VR6AewLTigWG4xSOukaG", # Default ElevenLabs voice
12+
model_id: str = "eleven_multilingual_v2",
13+
client: Optional[MyClient] = None,
14+
):
15+
# it should be possible to pass the client (makes it easier for users to customize things)
16+
# settings that are common to change, like voice id or model id should be configurable as well
17+
super().__init__()
18+
self.voice_id = voice_id
19+
self.client = client if client is not None else MyClient(api_key=api_key)
20+
21+
async def stream_audio(self, text: str, *_, **__) -> AsyncIterator[bytes]:
22+
23+
audio_stream = self.client.text_to_speech.stream(
24+
text=text,
25+
voice_id=self.voice_id,
26+
output_format=self.output_format,
27+
model_id=self.model_id,
28+
request_options={"chunk_size": 64000},
29+
)
30+
31+
return audio_stream
32+
33+
```
34+
35+
TODO: the stop part can be generic
36+
TODO: Track handling can be improved
37+
38+
## Testing your TTS
39+
40+
TOOD: no good test suite yet

plugins/bedrock/vision_agents/plugins/bedrock/bedrock_realtime.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,12 @@ class Realtime(realtime.Realtime):
5252
Input event docs: https://docs.aws.amazon.com/nova/latest/userguide/input-events.html
5353
Available voices are documented here:
5454
https://docs.aws.amazon.com/nova/latest/userguide/available-voices.html
55-
55+
56+
Resumption example:
57+
https://github.com/aws-samples/amazon-nova-samples/tree/main/speech-to-speech/repeatable-patterns/resume-conversation
58+
59+
60+
5661
Examples:
5762
5863
from vision_agents.plugins import bedrock
@@ -160,7 +165,6 @@ async def simple_audio_response(self, pcm: PcmData):
160165
if not self.connected:
161166
self.logger.warning("realtime is not active. can't call simple_audio_response")
162167

163-
logger.info("sar")
164168
# Resample from 48kHz to 24kHz if needed
165169
pcm = pcm.resample(24000)
166170

@@ -268,6 +272,7 @@ async def start_prompt(self):
268272
}
269273
}
270274
}
275+
# TODO: tool support
271276
await self.send_event(event)
272277

273278

@@ -351,13 +356,11 @@ async def _handle_events(self):
351356
"""Process incoming responses from Bedrock."""
352357
try:
353358
while True:
354-
logger.info("iter")
355359
try:
356360
output = await self.stream.await_output()
357361
result = await output[1].receive()
358362
if result.value and result.value.bytes_:
359363
try:
360-
logger.info("received...")
361364
response_data = result.value.bytes_.decode('utf-8')
362365
json_data = json.loads(response_data)
363366

@@ -385,7 +388,6 @@ async def _handle_events(self):
385388
elif 'completionStart' in json_data['event']:
386389
logger.info("Completion start from Bedrock", json_data['event']['completionStart'])
387390
elif 'audioOutput' in json_data['event']:
388-
logger.info("Audio output from Bedrock")
389391
audio_content = json_data['event']['audioOutput']['content']
390392
audio_bytes = base64.b64decode(audio_content)
391393
#await self.audio_output_queue.put(audio_bytes)
@@ -422,7 +424,12 @@ async def _handle_events(self):
422424
await self.send_tool_result_event(toolContent, toolResult)
423425
await self.send_tool_content_end_event(toolContent)
424426
elif 'contentEnd' in json_data['event']:
425-
logger.info(f"Content end from Bedrock: {json_data['event']['contentEnd']}")
427+
428+
429+
stopReason = json_data['event']['contentEnd']['stopReason']
430+
if stopReason == "INTERRUPTED":
431+
logger.info("TODO: should flush audio buffer")
432+
logger.info(f"Content end from Bedrock {stopReason}: {json_data['event']['contentEnd']}")
426433

427434
elif 'completionEnd' in json_data['event']:
428435
logger.info(f"Completion end from Bedrock: {json_data['event']['completionEnd']}")

uv.lock

Lines changed: 0 additions & 25 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)