@@ -56,31 +56,38 @@ def __init__(self, media_file: Union[str, Path]) -> None:
56
56
def info (self ) -> MediaInfo :
57
57
return self ._info
58
58
59
- async def stream_video (self ) -> AsyncIterable [rtc .VideoFrame ]:
59
+ async def stream_video (self ) -> AsyncIterable [tuple [ rtc .VideoFrame , float ] ]:
60
60
"""Streams video frames from the media file in an endless loop."""
61
- for av_frame in self ._video_container .decode (video = 0 ):
61
+ for i , av_frame in enumerate ( self ._video_container .decode (video = 0 ) ):
62
62
# Convert video frame to RGBA
63
63
frame = av_frame .to_rgb ().to_ndarray ()
64
64
frame_rgba = np .ones ((frame .shape [0 ], frame .shape [1 ], 4 ), dtype = np .uint8 )
65
65
frame_rgba [:, :, :3 ] = frame
66
- yield rtc .VideoFrame (
67
- width = frame .shape [1 ],
68
- height = frame .shape [0 ],
69
- type = rtc .VideoBufferType .RGBA ,
70
- data = frame_rgba .tobytes (),
66
+ yield (
67
+ rtc .VideoFrame (
68
+ width = frame .shape [1 ],
69
+ height = frame .shape [0 ],
70
+ type = rtc .VideoBufferType .RGBA ,
71
+ data = frame_rgba .tobytes (),
72
+ ),
73
+ av_frame .time ,
71
74
)
72
75
73
- async def stream_audio (self ) -> AsyncIterable [rtc .AudioFrame ]:
76
+ async def stream_audio (self ) -> AsyncIterable [tuple [ rtc .AudioFrame , float ] ]:
74
77
"""Streams audio frames from the media file in an endless loop."""
75
78
for av_frame in self ._audio_container .decode (audio = 0 ):
76
79
# Convert audio frame to raw int16 samples
77
80
frame = av_frame .to_ndarray ().T # Transpose to (samples, channels)
78
81
frame = (frame * 32768 ).astype (np .int16 )
79
- yield rtc .AudioFrame (
80
- data = frame .tobytes (),
81
- sample_rate = self .info .audio_sample_rate ,
82
- num_channels = frame .shape [1 ],
83
- samples_per_channel = frame .shape [0 ],
82
+ duration = len (frame ) / self .info .audio_sample_rate
83
+ yield (
84
+ rtc .AudioFrame (
85
+ data = frame .tobytes (),
86
+ sample_rate = self .info .audio_sample_rate ,
87
+ num_channels = frame .shape [1 ],
88
+ samples_per_channel = frame .shape [0 ],
89
+ ),
90
+ av_frame .time + duration ,
84
91
)
85
92
86
93
def reset (self ):
@@ -102,6 +109,7 @@ async def main(room: rtc.Room, room_name: str, media_path: str):
102
109
api .VideoGrants (
103
110
room_join = True ,
104
111
room = room_name ,
112
+ agent = True ,
105
113
)
106
114
)
107
115
.to_jwt ()
@@ -121,7 +129,7 @@ async def main(room: rtc.Room, room_name: str, media_path: str):
121
129
media_info = streamer .info
122
130
123
131
# Create video and audio sources/tracks
124
- queue_size_ms = 1000 # 1 second
132
+ queue_size_ms = 1000
125
133
video_source = rtc .VideoSource (
126
134
width = media_info .video_width ,
127
135
height = media_info .video_height ,
@@ -157,26 +165,54 @@ async def main(room: rtc.Room, room_name: str, media_path: str):
157
165
)
158
166
159
167
async def _push_frames (
160
- stream : AsyncIterable [rtc .VideoFrame | rtc .AudioFrame ],
168
+ stream : AsyncIterable [tuple [ rtc .VideoFrame | rtc .AudioFrame , float ] ],
161
169
av_sync : rtc .AVSynchronizer ,
162
170
):
163
- async for frame in stream :
164
- await av_sync .push (frame )
171
+ async for frame , timestamp in stream :
172
+ await av_sync .push (frame , timestamp )
165
173
await asyncio .sleep (0 )
166
174
175
+ async def _log_fps (av_sync : rtc .AVSynchronizer ):
176
+ start_time = asyncio .get_running_loop ().time ()
177
+ while True :
178
+ await asyncio .sleep (2 )
179
+ wall_time = asyncio .get_running_loop ().time () - start_time
180
+ diff = av_sync .last_video_time - av_sync .last_audio_time
181
+ logger .info (
182
+ f"fps: { av_sync .actual_fps :.2f} , wall_time: { wall_time :.3f} s, "
183
+ f"video_time: { av_sync .last_video_time :.3f} s, "
184
+ f"audio_time: { av_sync .last_audio_time :.3f} s, diff: { diff :.3f} s"
185
+ )
186
+
167
187
try :
168
188
while True :
169
189
streamer .reset ()
170
- video_task = asyncio .create_task (
171
- _push_frames (streamer .stream_video (), av_sync )
172
- )
173
- audio_task = asyncio .create_task (
174
- _push_frames (streamer .stream_audio (), av_sync )
190
+
191
+ video_stream = streamer .stream_video ()
192
+ audio_stream = streamer .stream_audio ()
193
+
194
+ # read the head frames and push them at the same time
195
+ first_video_frame , video_timestamp = await video_stream .__anext__ ()
196
+ first_audio_frame , audio_timestamp = await audio_stream .__anext__ ()
197
+ logger .info (
198
+ f"first video duration: { 1 / media_info .video_fps :.3f} s, "
199
+ f"first audio duration: { first_audio_frame .duration :.3f} s"
175
200
)
201
+ await av_sync .push (first_video_frame , video_timestamp )
202
+ await av_sync .push (first_audio_frame , audio_timestamp )
203
+
204
+ video_task = asyncio .create_task (_push_frames (video_stream , av_sync ))
205
+ audio_task = asyncio .create_task (_push_frames (audio_stream , av_sync ))
206
+
207
+ log_fps_task = asyncio .create_task (_log_fps (av_sync ))
176
208
177
209
# wait for both tasks to complete
178
210
await asyncio .gather (video_task , audio_task )
179
211
await av_sync .wait_for_playout ()
212
+
213
+ # clean up
214
+ av_sync .reset ()
215
+ log_fps_task .cancel ()
180
216
logger .info ("playout finished" )
181
217
finally :
182
218
await streamer .aclose ()
0 commit comments