GetStream · d3xvn · Aug 20, 2025 · Aug 20, 2025 · Aug 20, 2025 · Aug 20, 2025
diff --git a/examples/stt_assemblyai_transcription/README.md b/examples/stt_assemblyai_transcription/README.md
@@ -0,0 +1,89 @@
+# Stream + AssemblyAI STT Example
+
+This example demonstrates how to build a real-time transcription bot that joins a Stream video call and transcribes speech using AssemblyAI's Speech-to-Text API.
+
+## What it does
+
+- 🤖 Creates a transcription bot that joins a Stream video call
+- 🌐 Opens a browser interface for users to join the call
+- 🎙️ Transcribes speech in real-time using AssemblyAI STT
+- 📝 Displays transcriptions with timestamps and confidence scores in the terminal
+
+## Prerequisites
+
+1. **Stream Account**: Get your API credentials from [Stream Dashboard](https://dashboard.getstream.io)
+2. **AssemblyAI Account**: Get your API key from [AssemblyAI Console](https://www.assemblyai.com/)
+3. **Python 3.10+**: Required for running the example
+
+## Installation
+
+You can use your preferred package manager, but we recommend [`uv`](https://docs.astral.sh/uv/).
+
+1. **Navigate to this directory:**
+   ```bash
+   cd examples/stt_assemblyai_transcription
+   ```
+
+2. **Install dependencies:**
+   ```bash
+   uv sync
+   ```
+
+3. **Set up environment variables:**
+   Rename `env.example` to `.env` and fill in your actual credentials.
+
+## Usage
+
+Run the example:
+```bash
+uv run main.py
+```
+
+## Configuration Options
+
+You can customize the AssemblyAI STT settings in the `main.py` file:
+
+```python
+stt = AssemblyAISTT(
+    sample_rate=48000,                    # Audio sample rate
+    language="en",                        # Language code
+    interim_results=True,                 # Enable interim results
+    enable_partials=True,                 # Enable partial transcripts
+    enable_automatic_punctuation=True,    # Auto-punctuation
+    enable_utterance_end_detection=True,  # Utterance detection
+)
+```
+
+## Features
+
+- **Real-time transcription** with low latency
+- **Partial transcripts** for immediate feedback
+- **Automatic punctuation** for better readability
+- **Utterance end detection** for natural speech segmentation
+- **Multi-language support** (change the `language` parameter)
+- **Confidence scoring** for transcription quality
+
+## How it works
+
+1. **Call Setup**: Creates a Stream video call with unique IDs
+2. **Bot Joins**: A transcription bot joins the call as a participant
+3. **Audio Processing**: Captures audio from all participants
+4. **Real-time Transcription**: Sends audio to AssemblyAI for processing
+5. **Results Display**: Shows transcripts in the terminal with timestamps
+
+## Troubleshooting
+
+- **No audio detected**: Ensure your microphone is working and permissions are granted
+- **API errors**: Check your AssemblyAI API key and account status
+- **Connection issues**: Verify your internet connection and Stream credentials
+
+## AssemblyAI Features
+
+AssemblyAI provides high-quality transcription with:
+- **Nova-2 model** for best accuracy
+- **Real-time streaming** for low latency
+- **Automatic language detection** support
+- **Speaker diarization** capabilities
+- **Custom vocabulary** support
+
+For more information, visit [AssemblyAI Documentation](https://www.assemblyai.com/docs).
diff --git a/examples/stt_assemblyai_transcription/__init__.py b/examples/stt_assemblyai_transcription/__init__.py
@@ -0,0 +1 @@
+# AssemblyAI STT Transcription Example
diff --git a/examples/stt_assemblyai_transcription/env.example b/examples/stt_assemblyai_transcription/env.example
@@ -0,0 +1,7 @@
+# Stream API credentials
+STREAM_API_KEY=your_stream_api_key_here
+STREAM_API_SECRET=your_stream_api_secret_here
+EXAMPLE_BASE_URL=https://pronto.getstream.io
+
+# AssemblyAI API credentials
+ASSEMBLYAI_API_KEY=your_assemblyai_api_key_here
diff --git a/examples/stt_assemblyai_transcription/main.py b/examples/stt_assemblyai_transcription/main.py
@@ -0,0 +1,185 @@
+#!/usr/bin/env python3
+"""
+Example: Real-time Call Transcription with AssemblyAI STT
+
+This example demonstrates how to:
+1. Join a Stream video call
+2. Transcribe audio in real-time using AssemblyAI
+3. Open a browser link for users to join the call
+
+Usage:
+    python main.py
+
+Requirements:
+    - Create a .env file with your Stream and AssemblyAI credentials (see env.example)
+    - Install dependencies: pip install -e .
+"""
+
+import asyncio
+import logging
+import os
+import time
+import traceback
+import uuid
+import webbrowser
+from urllib.parse import urlencode
+
+from dotenv import load_dotenv
+
+from getstream.models import UserRequest
+from getstream.plugins.assemblyai.stt import AssemblyAISTT
+from getstream.stream import Stream
+from getstream.video import rtc
+from getstream.video.rtc.track_util import PcmData
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
+
+
+def create_user(client: Stream, id: str, name: str) -> None:
+    """
+    Create a user with a unique Stream ID.
+
+    Args:
+        client: Stream client instance
+        id: Unique user ID
+        name: Display name for the user
+    """
+    user_request = UserRequest(id=id, name=name)
+    client.upsert_users(user_request)
+
+
+def open_browser(api_key: str, token: str, call_id: str) -> str:
+    """
+    Helper function to open browser with Stream call link.
+
+    Args:
+        api_key: Stream API key
+        token: JWT token for the user
+        call_id: ID of the call
+
+    Returns:
+        The URL that was opened
+    """
+    base_url = f"{os.getenv('EXAMPLE_BASE_URL')}/join/"
+    params = {"api_key": api_key, "token": token, "skip_lobby": "true"}
+
+    url = f"{base_url}{call_id}?{urlencode(params)}"
+    print(f"Opening browser to: {url}")
+
+    try:
+        webbrowser.open(url)
+        print("Browser opened successfully!")
+    except Exception as e:
+        print(f"Failed to open browser: {e}")
+        print(f"Please manually open this URL: {url}")
+
+    return url
+
+
+async def main():
+    """Main example function."""
+    print("🎙️  Stream + AssemblyAI Real-time Transcription Example")
+    print("=" * 58)
+
+    # Load environment variables
+    load_dotenv()
+
+    # Initialize Stream client from ENV
+    client = Stream.from_env()
+
+    # Create a unique call ID for this session
+    call_id = str(uuid.uuid4())
+    print(f"📞 Call ID: {call_id}")
+
+    user_id = f"user-{uuid.uuid4()}"
+    create_user(client, user_id, "My User")
+    logging.info("👤 Created user: %s", user_id)
+
+    user_token = client.create_token(user_id, expiration=3600)
+    logging.info("🔑 Created token for user: %s", user_id)
+
+    bot_user_id = f"transcription-bot-{uuid.uuid4()}"
+    create_user(client, bot_user_id, "Transcription Bot")
+    logging.info("🤖 Created bot user: %s", bot_user_id)
+
+    # Create the call
+    call = client.video.call("default", call_id)
+    call.get_or_create(data={"created_by_id": bot_user_id})
+    print(f"📞 Call created: {call_id}")
+
+    # Open browser for users to join with the user token
+    open_browser(client.api_key, user_token, call_id)
+
+    print("\n🤖 Starting transcription bot...")
+    print("The bot will join the call and transcribe all audio it receives.")
+    print("Join the call in your browser and speak to see transcriptions appear here!")
+    print("\nPress Ctrl+C to stop the transcription bot.\n")
+
+    # Initialize AssemblyAI STT (api_key comes from .env)
+    stt = AssemblyAISTT(
+        sample_rate=48000,
+        language="en",
+        interim_results=True,
+        enable_partials=True,
+        enable_automatic_punctuation=True,
+        enable_utterance_end_detection=True,
+    )
+
+    try:
+        async with await rtc.join(call, bot_user_id) as connection:
+            print(f"✅ Bot joined call: {call_id}")
+
+            # Set up transcription handlers
+            @connection.on("audio")
+            async def on_audio(pcm: PcmData, user):
+                # Process audio through AssemblyAI STT with user metadata
+                user_metadata = {"user": user} if user else None
+                await stt.process_audio(pcm, user_metadata)
+
+            @stt.on("transcript")
+            async def on_transcript(event):
+                timestamp = time.strftime("%H:%M:%S")
+                user_info = "unknown"
+                if event.user_metadata and "user" in event.user_metadata:
+                    user = event.user_metadata["user"]
+                    user_info = user.name if hasattr(user, "name") else str(user)
+                print(f"[{timestamp}] {user_info}: {event.text}")
+                if hasattr(event, 'confidence') and event.confidence:
+                    print(f"    └─ confidence: {event.confidence:.2%}")
+                if hasattr(event, 'processing_time_ms') and event.processing_time_ms:
+                    print(f"    └─ processing time: {event.processing_time_ms:.1f}ms")
+
+            @stt.on("partial_transcript")
+            async def on_partial_transcript(event):
+                if event.text.strip():  # Only show non-empty partial transcripts
+                    user_info = "unknown"
+                    if event.user_metadata and "user" in event.user_metadata:
+                        user = event.user_metadata["user"]
+                        user_info = user.name if hasattr(user, "name") else str(user)
+                    print(
+                        f"    {user_info} (partial): {event.text}", end="\r"
+                    )  # Overwrite line
+
+            @stt.on("error")
+            async def on_stt_error(event):
+                print(f"\n❌ STT Error: {event.error_message}")
+                if hasattr(event, 'context') and event.context:
+                    print(f"    └─ context: {event.context}")
+
+            # Keep the connection alive and wait for audio
+            print("🎧 Listening for audio... (Press Ctrl+C to stop)")
+            await connection.wait()
+
+    except asyncio.CancelledError:
+        print("\n⏹️  Stopping transcription bot...")
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        traceback.print_exc()
+    finally:
+        await stt.close()
+        client.delete_users([user_id, bot_user_id])
+        print("🧹 Cleanup completed")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/stt_assemblyai_transcription/pyproject.toml b/examples/stt_assemblyai_transcription/pyproject.toml
@@ -0,0 +1,13 @@
+[project]
+name = "getstream-stt-assemblyai-transcription-example"
+version = "0.1.0"
+description = "Example project showing how to transcribe a call using STT with AssemblyAI"
+readme = "README.md"
+requires-python = ">=3.10"
+license = {text = "MIT"}
+
+dependencies = [
+    "getstream[webrtc]>=2.3.0a0",
+    "getstream-plugins-assemblyai>=0.1.0",
+    "python-dotenv>=1.1.1",
+]