Reapply Fix errors with metric accumulation (#266)

sjmonson · sjmonson · commit d8032e785e0c · 2025-10-08T16:05:56.000-04:00
&lt;!--
Include a short paragraph of the changes introduced in this PR.
If this PR requires additional context or rationale, explain why
the changes are necessary.
--&gt;
Fixes a issue in metric calculation that caused incorrect statistics at
extreme changes in concurrency and an issue where the first decode token
was not counted in total tokens per second.

&lt;!--
Provide a detailed list of all changes introduced in this pull request.
--&gt;
- [x] Fixed issue where merged concurrency change events would
double-count concurrency
- [x] Ensure first decode token is counted when calculating total tokens
per second

&lt;!--
List the steps needed to test this PR.
--&gt;
- Run unit tests: `tox -e test-unit -- -m "regression and sanity"`

---

- [x] "I certify that all code in this PR is my own, except as noted
below."

- [x] Includes AI-assisted code completion
- [ ] Includes code generated by an AI application
- [x] Includes AI-generated tests (NOTE: AI written tests should have a
docstring that includes `## WRITTEN BY AI ##`)

---------

Signed-off-by: Samuel Monson &lt;smonson@redhat.com&gt;
diff --git a/src/guidellm/utils/statistics.py b/src/guidellm/utils/statistics.py
@@ -275,18 +275,9 @@ def from_request_times(
         """
         if distribution_type == "concurrency":
             # convert to delta changes based on when requests were running
-            time_deltas: dict[float, int] = defaultdict(int)
-            for start, end in requests:
-                time_deltas[start] += 1
-                time_deltas[end] -= 1
-
-            # convert to the events over time measuring concurrency changes
-            events = []
-            active = 0
-
-            for time, delta in sorted(time_deltas.items()):
-                active += delta
-                events.append((time, active))
+            events = [(start, 1) for start, _ in requests] + [
+                (end, -1) for _, end in requests
+            ]
         elif distribution_type == "rate":
             # convert to events for when requests finished
             global_start = min(start for start, _ in requests) if requests else 0
@@ -313,6 +304,16 @@ def from_request_times(
             else:
                 flattened_events.append((time, val))
 
+        if distribution_type == "concurrency":
+            # convert to the events over time measuring concurrency changes
+            events_over_time: list[tuple[float, float]] = []
+            active = 0
+            for time, delta in flattened_events:
+                active += delta  # type: ignore [assignment]
+                events_over_time.append((time, active))
+
+            flattened_events = events_over_time
+
         # convert to value distribution function
         distribution: dict[float, float] = defaultdict(float)