updated AVSynchronizer readme (livekit#326)

davidzhao · web-flow · commit adfedcafed74 · 2024-12-12T02:44:43.000-06:00
diff --git a/examples/video-stream/README.md b/examples/video-stream/README.md
@@ -22,17 +22,25 @@ await av_sync.push(audio_frame)
 ## Examples
 
 ### 1. Video File Playback (`video_play.py`)
-Shows how to stream video and audio from separate sources while maintaining sync:
 
-- Reads video and audio streams separately from a media file
+Demonstrates synchronizing video and audio from separate sources while maintaining sync:
+
+- Reads video and audio streams separately from a media file (using `av` library)
 - Uses separate tasks to push video and audio frames to the synchronizer
 - Since the streams are continuous, a larger `queue_size_ms` can be used, though this will increase memory usage
 
-### 2. Audio Visualization (`audio_wave.py`) 
+#### Usage:
+
+```bash
+python video_play.py <room-name> </path/to/video>
+```
+
+### 2. Audio Visualization (`audio_wave.py`)
+
 Demonstrates generating video based on audio input:
 
 - Generates audio frames with alternating sine waves and silence
-- Creates video frames visualizing the audio waveform
+- Creates video frames visualizing the audio waveform (using `cv2` library)
 - Shows how to handle cases with and without audio:
   - When audio is present: Push synchronized video and audio frames
   - During silence: Push only video frames
diff --git a/examples/video-stream/audio_wave.py b/examples/video-stream/audio_wave.py
@@ -9,6 +9,7 @@
 
 import numpy as np
 from livekit import rtc, api
+import sys
 
 try:
     import cv2
@@ -218,15 +219,15 @@ def _np_to_video_frame(image: np.ndarray) -> rtc.VideoFrame:
             yield video_frame, sub_audio_frame
 
 
-async def main(room: rtc.Room):
+async def main(room: rtc.Room, room_name: str):
     token = (
         api.AccessToken()
         .with_identity("python-publisher")
         .with_name("Python Publisher")
         .with_grants(
             api.VideoGrants(
                 room_join=True,
-                room="room-ysBA-Q0hM",
+                room=room_name,
                 agent=True,
             )
         )
@@ -303,14 +304,19 @@ async def main(room: rtc.Room):
         handlers=[logging.FileHandler("audio_wave.log"), logging.StreamHandler()],
     )
 
+    if len(sys.argv) != 2:
+        print("Usage: python audio_wave.py <room-name>")
+        sys.exit(1)
+
+    room_name = sys.argv[1]
     loop = asyncio.get_event_loop()
     room = rtc.Room(loop=loop)
 
     async def cleanup():
         await room.disconnect()
         loop.stop()
 
-    asyncio.ensure_future(main(room))
+    asyncio.ensure_future(main(room, room_name))
     for signal in [signal.SIGINT, signal.SIGTERM]:
         loop.add_signal_handler(signal, lambda: asyncio.ensure_future(cleanup()))
 
diff --git a/examples/video-stream/video_play.py b/examples/video-stream/video_play.py
@@ -3,6 +3,7 @@
 from dataclasses import dataclass
 from pathlib import Path
 from typing import AsyncIterable, Union
+import sys
 
 import numpy as np
 import os
@@ -92,15 +93,15 @@ async def aclose(self) -> None:
         self._audio_container.close()
 
 
-async def main(room: rtc.Room):
+async def main(room: rtc.Room, room_name: str, media_path: str):
     token = (
         api.AccessToken()
         .with_identity("python-publisher")
         .with_name("Python Publisher")
         .with_grants(
             api.VideoGrants(
                 room_join=True,
-                room="my-room",
+                room=room_name,
             )
         )
         .to_jwt()
@@ -116,7 +117,6 @@ async def main(room: rtc.Room):
         return
 
     # Create media streamer
-    media_path = "/path/to/video.mp4"
     streamer = MediaFileStreamer(media_path)
     media_info = streamer.info
 
@@ -137,7 +137,13 @@ async def main(room: rtc.Room):
     audio_track = rtc.LocalAudioTrack.create_audio_track("audio", audio_source)
 
     # Publish tracks
-    video_options = rtc.TrackPublishOptions(source=rtc.TrackSource.SOURCE_CAMERA)
+    video_options = rtc.TrackPublishOptions(
+        source=rtc.TrackSource.SOURCE_CAMERA,
+        video_encoding=rtc.VideoEncoding(
+            max_framerate=30,
+            max_bitrate=5_000_000,
+        ),
+    )
     audio_options = rtc.TrackPublishOptions(source=rtc.TrackSource.SOURCE_MICROPHONE)
 
     await room.local_participant.publish_track(video_track, video_options)
@@ -183,14 +189,21 @@ async def _push_frames(
         handlers=[logging.FileHandler("video_play.log"), logging.StreamHandler()],
     )
 
+    if len(sys.argv) != 3:
+        print("Usage: python video_play.py <room-name> </path/to/video>")
+        sys.exit(1)
+
+    room_name = sys.argv[1]
+    media_path = sys.argv[2]
+
     loop = asyncio.get_event_loop()
     room = rtc.Room(loop=loop)
 
     async def cleanup():
         await room.disconnect()
         loop.stop()
 
-    asyncio.ensure_future(main(room))
+    asyncio.ensure_future(main(room, room_name, media_path))
     for signal in [signal.SIGINT, signal.SIGTERM]:
         loop.add_signal_handler(signal, lambda: asyncio.ensure_future(cleanup()))