Determine number of tasks on per-process basis

sjmonson · sjmonson · commit 0c28b6ab857a · 2025-04-17T15:35:54.000-04:00
diff --git a/src/guidellm/scheduler/scheduler.py b/src/guidellm/scheduler/scheduler.py
@@ -189,9 +189,6 @@ async def _start_processes(
             maxsize=scheduling_strategy.queued_requests_limit
         )
         responses_queue = manager.Queue()
-        per_process_requests_limit = scheduling_strategy.processing_requests_limit // (
-            scheduling_strategy.processes_limit
-        )
 
         futures = []
         loop = asyncio.get_event_loop()
@@ -207,16 +204,17 @@ async def _start_processes(
                     )
                 )
             elif scheduling_strategy.processing_mode == "async":
-                futures.append(
-                    loop.run_in_executor(
-                        executor,
-                        self.worker.process_loop_asynchronous,
-                        requests_queue,
-                        responses_queue,
-                        per_process_requests_limit,
-                        process_id,
+                if scheduling_strategy.process_requests_limits[process_id]:
+                    futures.append(
+                        loop.run_in_executor(
+                            executor,
+                            self.worker.process_loop_asynchronous,
+                            requests_queue,
+                            responses_queue,
+                            scheduling_strategy.process_requests_limits[process_id],
+                            process_id,
+                        )
                     )
-                )
             else:
                 raise ValueError(
                     f"Invalid processing mode: {scheduling_strategy.processing_mode} "
diff --git a/src/guidellm/scheduler/strategy.py b/src/guidellm/scheduler/strategy.py
@@ -4,6 +4,7 @@
 import time
 from typing import (
     Generator,
+    List,
     Literal,
     Optional,
     Union,
@@ -94,6 +95,23 @@ def processing_requests_limit(self) -> int:
         """
         return settings.max_concurrency
 
+    @property
+    def process_requests_limits(self) -> List[int]:
+        """
+        The maximum number of requests per process for the scheduling strategy.
+        It determines how many requests can be processed by each worker process
+        for the scheduling strategy.
+
+        :return: A per-process list of the maximum number of requests per process.
+        """
+        split = self.processing_requests_limit // self.processes_limit
+        remain = self.processing_requests_limit % self.processes_limit
+
+        return [
+            split + 1 if i < remain else split
+            for i in range(self.processes_limit)
+        ]
+
     def request_times(self) -> Generator[float, None, None]:
         """
         A generator that yields timestamps for when requests should be sent.
@@ -168,6 +186,18 @@ def processing_requests_limit(self) -> int:
         """
         return 1
 
+    @property
+    def process_requests_limits(self) -> List[int]:
+        """
+        The maximum number of requests per process for the scheduling strategy.
+        It determines how many requests can be processed by each worker process
+        for the scheduling strategy.
+
+        :return: A per-process list of the maximum number of requests per process.
+        """
+
+        return [1]
+
     def request_times(self) -> Generator[float, None, None]:
         """
         A generator that yields time.time() so requests are sent immediately,
diff --git a/src/guidellm/scheduler/worker.py b/src/guidellm/scheduler/worker.py
@@ -226,7 +226,15 @@ def process_loop_asynchronous(
         process_id: int,
     ):
         async def _process_runner():
-            pending = asyncio.Semaphore(max_concurrency) if max_concurrency else None
+            if max_concurrency is not None:
+                if max_concurrency < 1:
+                    raise ValueError(
+                        f"max_concurrency must be greater than 0, got {max_concurrency}"
+                    )
+
+                pending = asyncio.Semaphore(max_concurrency)
+            else:
+                pending = None
 
             while (
                 process_request := await self.get_request(requests_queue)