@@ -41,6 +41,9 @@ class CloudDetectionProcessor(
4141 which can be increased by contacting the Moondream team. If you are deploying
4242 to your own infrastructure, consider using LocalDetectionProcessor instead.
4343
44+ Detection runs asynchronously in the background while frames pass through at
45+ full FPS. The last known detection results are overlaid on each frame.
46+
4447 Args:
4548 api_key: API key for Moondream Cloud API. If not provided, will attempt to read
4649 from MOONDREAM_API_KEY environment variable.
@@ -49,6 +52,8 @@ class CloudDetectionProcessor(
4952 so any object string works. Examples: "person", "car",
5053 "basketball", ["person", "car", "dog"]. Default: "person"
5154 fps: Frame processing rate (default: 30)
55+ detection_fps: Rate at which to send frames for detection (default: 2).
56+ Lower values reduce API calls while maintaining smooth video.
5257 interval: Processing interval in seconds (default: 0)
5358 max_workers: Number of worker threads for CPU-intensive operations (default: 10)
5459 """
@@ -61,6 +66,7 @@ def __init__(
6166 conf_threshold : float = 0.3 ,
6267 detect_objects : Union [str , List [str ]] = "person" ,
6368 fps : int = 30 ,
69+ detection_fps : float = 5.0 ,
6470 interval : int = 0 ,
6571 max_workers : int = 10 ,
6672 ):
@@ -69,6 +75,7 @@ def __init__(
6975 self .api_key = api_key or os .getenv ("MOONDREAM_API_KEY" )
7076 self .conf_threshold = conf_threshold
7177 self .fps = fps
78+ self .detection_fps = detection_fps
7279 self .max_workers = max_workers
7380 self ._shutdown = False
7481
@@ -77,6 +84,11 @@ def __init__(
7784 self ._last_frame_time : Optional [float ] = None
7885 self ._last_frame_pil : Optional [Image .Image ] = None
7986
87+ # Async detection state
88+ self ._detection_in_progress = False
89+ self ._last_detection_time : float = 0.0
90+ self ._cached_results : Dict [str , Any ] = {"detections" : []}
91+
8092 # Font configuration constants for drawing efficiency
8193 self ._font = cv2 .FONT_HERSHEY_SIMPLEX
8294 self ._font_scale = 0.5
@@ -110,6 +122,7 @@ def __init__(
110122
111123 logger .info ("🌙 Moondream Processor initialized" )
112124 logger .info (f"🎯 Detection configured for objects: { self .detect_objects } " )
125+ logger .info (f"📹 Video FPS: { fps } , Detection FPS: { detection_fps } " )
113126
114127 async def process_video (
115128 self ,
@@ -213,28 +226,41 @@ def _run_detection_sync(self, frame_array: np.ndarray) -> List[Dict]:
213226 return all_detections
214227
215228 async def _process_and_add_frame (self , frame : av .VideoFrame ):
229+ """Process frame: pass through immediately, run detection asynchronously."""
216230 try :
217231 frame_array = frame .to_ndarray (format = "rgb24" )
232+ now = asyncio .get_event_loop ().time ()
218233
219- results = await self ._run_inference (frame_array )
234+ # Check if we should start a new detection
235+ detection_interval = 1.0 / self .detection_fps if self .detection_fps > 0 else float ("inf" )
236+ should_detect = (
237+ not self ._detection_in_progress
238+ and (now - self ._last_detection_time ) >= detection_interval
239+ )
220240
221- self ._last_results = results
222- self ._last_frame_time = asyncio .get_event_loop ().time ()
241+ if should_detect :
242+ # Start detection in background (don't await)
243+ self ._detection_in_progress = True
244+ self ._last_detection_time = now
245+ asyncio .create_task (self ._run_detection_background (frame_array .copy ()))
246+
247+ # Always use cached results for annotation (don't wait for detection)
248+ self ._last_frame_time = now
223249 self ._last_frame_pil = Image .fromarray (frame_array )
224250
225- # Annotate frame with detections
226- if results .get ("detections" ):
251+ # Annotate frame with cached detections
252+ if self . _cached_results .get ("detections" ):
227253 frame_array = annotate_detections (
228254 frame_array ,
229- results ,
255+ self . _cached_results ,
230256 font = self ._font ,
231257 font_scale = self ._font_scale ,
232258 font_thickness = self ._font_thickness ,
233259 bbox_color = self ._bbox_color ,
234260 text_color = self ._text_color ,
235261 )
236262
237- # Convert back to av.VideoFrame and publish
263+ # Convert back to av.VideoFrame and publish immediately
238264 processed_frame = av .VideoFrame .from_ndarray (frame_array , format = "rgb24" )
239265 await self ._video_track .add_frame (processed_frame )
240266
@@ -243,6 +269,18 @@ async def _process_and_add_frame(self, frame: av.VideoFrame):
243269 # Pass through original frame on error
244270 await self ._video_track .add_frame (frame )
245271
272+ async def _run_detection_background (self , frame_array : np .ndarray ):
273+ """Run detection in background and update cached results."""
274+ try :
275+ results = await self ._run_inference (frame_array )
276+ self ._cached_results = results
277+ self ._last_results = results
278+ logger .debug (f"🔍 Detection complete: { len (results .get ('detections' , []))} objects" )
279+ except Exception as e :
280+ logger .warning (f"⚠️ Background detection failed: { e } " )
281+ finally :
282+ self ._detection_in_progress = False
283+
246284 def close (self ):
247285 """Clean up resources."""
248286 self ._shutdown = True
0 commit comments