Skip to content

Commit a646d2a

Browse files
committed
Polishing for scheduler
1 parent 42dd326 commit a646d2a

File tree

5 files changed

+143
-105
lines changed

5 files changed

+143
-105
lines changed

src/guidellm/backend/response.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,8 @@ class ResponseSummary(BaseModel):
7979
value: str
8080
request_args: RequestArgs
8181
iterations: int = 0
82-
start_time: float
83-
end_time: float
82+
start_time: Optional[float]
83+
end_time: Optional[float]
8484
request_prompt_tokens: Optional[int] = None
8585
request_output_tokens: Optional[int] = None
8686
response_prompt_tokens: Optional[int] = None

src/guidellm/config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,9 @@ class Settings(BaseSettings):
143143
request_http2: bool = True
144144
max_concurrency: int = 512
145145
max_worker_processes: int = 10
146-
num_sweep_profiles: int = 9
146+
default_async_loop_sleep: float = 0.0001
147147
logging: LoggingSettings = LoggingSettings()
148+
num_sweep_profiles: int = 9
148149

149150
# Data settings
150151
dataset: DatasetSettings = DatasetSettings()

src/guidellm/scheduler/backend_worker.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,6 @@ def __init__(self, backend: Backend):
112112
async def resolve(
113113
self,
114114
request: GenerationRequest,
115-
start_time: float,
116115
timeout_time: float,
117116
) -> ResponseSummary:
118117
"""
@@ -121,7 +120,6 @@ async def resolve(
121120
and handles any errors that may occur during the process.
122121
123122
:param request: The request to resolve.
124-
:param start_time: The time to start the request.
125123
:param timeout_time: The time to wait for a response before timing out.
126124
If timeout_time is math.inf, the request will not timeout.
127125
:return: A ResponseSummary object containing the response from the backend.
@@ -140,10 +138,6 @@ async def _runner():
140138
nonlocal response
141139
response = resp
142140

143-
if (wait_time := start_time - time.time()) > 0:
144-
await asyncio.sleep(wait_time)
145-
146-
start_time = time.time()
147141
await asyncio.wait_for(
148142
_runner(),
149143
timeout=timeout_time - time.time() if timeout_time < math.inf else None,
@@ -164,7 +158,7 @@ async def _runner():
164158
except Exception as exc: # noqa: BLE001
165159
error = str(exc)
166160

167-
return self._handle_response(request, response, error, start_time)
161+
return self._handle_response(request, response, error)
168162

169163
def _create_request_func_kwargs(
170164
self,
@@ -208,7 +202,6 @@ def _handle_response(
208202
request: GenerationRequest,
209203
response: Any,
210204
error: Optional[str],
211-
start_time: float,
212205
) -> ResponseSummary:
213206
if response is None or not isinstance(
214207
response, (ResponseSummary, StreamingTextResponse)
@@ -228,8 +221,8 @@ def _handle_response(
228221
headers={},
229222
payload={},
230223
),
231-
start_time=start_time,
232-
end_time=time.time(),
224+
start_time=None,
225+
end_time=None,
233226
request_id=request.request_id,
234227
error=error or "Unknown error",
235228
)
@@ -243,7 +236,7 @@ def _handle_response(
243236
payload={},
244237
),
245238
start_time=response.start_time,
246-
end_time=time.time(),
239+
end_time=None,
247240
request_prompt_tokens=request.stats.get("prompt_tokens", None),
248241
request_output_tokens=None,
249242
response_prompt_tokens=None,

0 commit comments

Comments
 (0)