Skip to content

Commit d8032e7

Browse files
committed
Reapply Fix errors with metric accumulation (#266)
<!-- Include a short paragraph of the changes introduced in this PR. If this PR requires additional context or rationale, explain why the changes are necessary. --> Fixes a issue in metric calculation that caused incorrect statistics at extreme changes in concurrency and an issue where the first decode token was not counted in total tokens per second. <!-- Provide a detailed list of all changes introduced in this pull request. --> - [x] Fixed issue where merged concurrency change events would double-count concurrency - [x] Ensure first decode token is counted when calculating total tokens per second <!-- List the steps needed to test this PR. --> - Run unit tests: `tox -e test-unit -- -m "regression and sanity"` --- - [x] "I certify that all code in this PR is my own, except as noted below." - [x] Includes AI-assisted code completion - [ ] Includes code generated by an AI application - [x] Includes AI-generated tests (NOTE: AI written tests should have a docstring that includes `## WRITTEN BY AI ##`) --------- Signed-off-by: Samuel Monson <smonson@redhat.com>
1 parent 46b5e87 commit d8032e7

File tree

1 file changed

+13
-12
lines changed

1 file changed

+13
-12
lines changed

src/guidellm/utils/statistics.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -275,18 +275,9 @@ def from_request_times(
275275
"""
276276
if distribution_type == "concurrency":
277277
# convert to delta changes based on when requests were running
278-
time_deltas: dict[float, int] = defaultdict(int)
279-
for start, end in requests:
280-
time_deltas[start] += 1
281-
time_deltas[end] -= 1
282-
283-
# convert to the events over time measuring concurrency changes
284-
events = []
285-
active = 0
286-
287-
for time, delta in sorted(time_deltas.items()):
288-
active += delta
289-
events.append((time, active))
278+
events = [(start, 1) for start, _ in requests] + [
279+
(end, -1) for _, end in requests
280+
]
290281
elif distribution_type == "rate":
291282
# convert to events for when requests finished
292283
global_start = min(start for start, _ in requests) if requests else 0
@@ -313,6 +304,16 @@ def from_request_times(
313304
else:
314305
flattened_events.append((time, val))
315306

307+
if distribution_type == "concurrency":
308+
# convert to the events over time measuring concurrency changes
309+
events_over_time: list[tuple[float, float]] = []
310+
active = 0
311+
for time, delta in flattened_events:
312+
active += delta # type: ignore [assignment]
313+
events_over_time.append((time, active))
314+
315+
flattened_events = events_over_time
316+
316317
# convert to value distribution function
317318
distribution: dict[float, float] = defaultdict(float)
318319

0 commit comments

Comments
 (0)