Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 18 additions & 8 deletions src/guidellm/core/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,15 +147,19 @@ def _create_benchmark_report_data_tokens_summary(
for benchmark in report.benchmarks_sorted:
table.add_row(
_benchmark_rate_id(benchmark),
f"{benchmark.prompt_token:.2f}",
f"{benchmark.prompt_token_distribution.mean:.2f}",
", ".join(
f"{percentile:.1f}"
for percentile in benchmark.prompt_token_percentiles
for percentile in benchmark.prompt_token_distribution.percentiles(
[1, 5, 50, 95, 99]
)
),
f"{benchmark.output_token:.2f}",
f"{benchmark.output_token_distribution.mean:.2f}",
", ".join(
f"{percentile:.1f}"
for percentile in benchmark.output_token_percentiles
for percentile in benchmark.output_token_distribution.percentiles(
[1, 5, 50, 95, 99]
)
),
)
logger.debug("Created data tokens summary table for the report.")
Expand All @@ -177,7 +181,7 @@ def _create_benchmark_report_dist_perf_summary(
"Benchmark",
"Request Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (sec)",
"Time to First Token [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)",
"Inter Token Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)",
"Inter Token Latency [1%, 5%, 10%, 50%, 90% 95%, 99%] (ms)",
title="[magenta]Performance Stats by Benchmark[/magenta]",
title_style="bold",
title_justify="left",
Expand All @@ -189,15 +193,21 @@ def _create_benchmark_report_dist_perf_summary(
_benchmark_rate_id(benchmark),
", ".join(
f"{percentile:.2f}"
for percentile in benchmark.request_latency_percentiles
for percentile in benchmark.request_latency_distribution.percentiles(
[1, 5, 10, 50, 90, 95, 99]
)
),
", ".join(
f"{percentile * 1000:.1f}"
for percentile in benchmark.time_to_first_token_percentiles
for percentile in benchmark.ttft_distribution.percentiles(
[1, 5, 10, 50, 90, 95, 99]
)
),
", ".join(
f"{percentile * 1000:.1f}"
for percentile in benchmark.inter_token_latency_percentiles
for percentile in benchmark.itl_distribution.percentiles(
[1, 5, 10, 50, 90, 95, 99]
)
),
)
logger.debug("Created distribution performance summary table for the report.")
Expand Down
94 changes: 1 addition & 93 deletions src/guidellm/core/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Any, Dict, List, Literal, Optional, Union

from loguru import logger
from pydantic import Field, computed_field
from pydantic import Field

from guidellm.core.distribution import Distribution
from guidellm.core.request import TextGenerationRequest
Expand Down Expand Up @@ -221,7 +221,6 @@ def __iter__(self):
"""
return iter(self.results)

@computed_field # type: ignore[misc]
@property
def request_count(self) -> int:
"""
Expand All @@ -232,7 +231,6 @@ def request_count(self) -> int:
"""
return len(self.results)

@computed_field # type: ignore[misc]
@property
def error_count(self) -> int:
"""
Expand All @@ -243,7 +241,6 @@ def error_count(self) -> int:
"""
return len(self.errors)

@computed_field # type: ignore[misc]
@property
def total_count(self) -> int:
"""
Expand All @@ -254,7 +251,6 @@ def total_count(self) -> int:
"""
return self.request_count + self.error_count

@computed_field # type: ignore[misc]
@property
def start_time(self) -> Optional[float]:
"""
Expand All @@ -268,7 +264,6 @@ def start_time(self) -> Optional[float]:

return self.results[0].start_time

@computed_field # type: ignore[misc]
@property
def end_time(self) -> Optional[float]:
"""
Expand All @@ -282,7 +277,6 @@ def end_time(self) -> Optional[float]:

return self.results[-1].end_time

@computed_field # type: ignore[misc]
@property
def duration(self) -> float:
"""
Expand All @@ -296,7 +290,6 @@ def duration(self) -> float:

return self.end_time - self.start_time

@computed_field # type: ignore[misc]
@property
def completed_request_rate(self) -> float:
"""
Expand All @@ -310,7 +303,6 @@ def completed_request_rate(self) -> float:

return len(self.results) / self.duration

@computed_field # type: ignore[misc]
@property
def request_latency(self) -> float:
"""
Expand Down Expand Up @@ -340,19 +332,6 @@ def request_latency_distribution(self) -> Distribution:
]
)

@computed_field # type: ignore[misc]
@property
def request_latency_percentiles(self) -> List[float]:
"""
Get standard percentiles of request latency in seconds.

:return: List of percentile request latency in seconds
:rtype: List[float]
"""
return self.request_latency_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])


@computed_field # type: ignore[misc]
@property
def time_to_first_token(self) -> float:
"""
Expand Down Expand Up @@ -382,20 +361,6 @@ def ttft_distribution(self) -> Distribution:
]
)

@computed_field # type: ignore[misc]
@property
def time_to_first_token_percentiles(self) -> List[float]:
"""
Get standard percentiles for time taken to decode the first token
in milliseconds.

:return: List of percentile time taken to decode the first token
in milliseconds.
:rtype: List[float]
"""
return self.ttft_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])

@computed_field # type: ignore[misc]
@property
def inter_token_latency(self) -> float:
"""
Expand Down Expand Up @@ -423,18 +388,6 @@ def itl_distribution(self) -> Distribution:
]
)

@computed_field # type: ignore[misc]
@property
def inter_token_latency_percentiles(self) -> List[float]:
"""
Get standard percentiles for the time between tokens in milliseconds.

:return: List of percentiles for the average time between tokens.
:rtype: List[float]
"""
return self.itl_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])

@computed_field # type: ignore[misc]
@property
def output_token_throughput(self) -> float:
"""
Expand All @@ -450,17 +403,6 @@ def output_token_throughput(self) -> float:

return total_tokens / self.duration

@computed_field # type: ignore[misc]
@property
def prompt_token(self) -> float:
"""
Get the average number of prompt tokens.

:return: The average number of prompt tokens.
:rtype: float
"""
return self.prompt_token_distribution.mean

@property
def prompt_token_distribution(self) -> Distribution:
"""
Expand All @@ -471,28 +413,6 @@ def prompt_token_distribution(self) -> Distribution:
"""
return Distribution(data=[result.prompt_token_count for result in self.results])

@computed_field # type: ignore[misc]
@property
def prompt_token_percentiles(self) -> List[float]:
"""
Get standard percentiles for number of prompt tokens.

:return: List of percentiles of number of prompt tokens.
:rtype: List[float]
"""
return self.prompt_token_distribution.percentiles([1, 5, 50, 95, 99])

@computed_field # type: ignore[misc]
@property
def output_token(self) -> float:
"""
Get the average number of output tokens.

:return: The average number of output tokens.
:rtype: float
"""
return self.output_token_distribution.mean

@property
def output_token_distribution(self) -> Distribution:
"""
Expand All @@ -503,18 +423,6 @@ def output_token_distribution(self) -> Distribution:
"""
return Distribution(data=[result.output_token_count for result in self.results])

@computed_field # type: ignore[misc]
@property
def output_token_percentiles(self) -> List[float]:
"""
Get standard percentiles for number of output tokens.

:return: List of percentiles of number of output tokens.
:rtype: List[float]
"""
return self.output_token_distribution.percentiles([1, 5, 50, 95, 99])

@computed_field # type: ignore[misc]
@property
def overloaded(self) -> bool:
if (
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/core/test_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,23 +66,23 @@ def test_guidance_report_print(sample_benchmark_report):
report.print() # This will output to the console


@pytest.mark.regression()
@pytest.mark.sanity()
def test_guidance_report_json(sample_benchmark_report):
report = GuidanceReport(benchmarks=[sample_benchmark_report])
json_str = report.to_json()
loaded_report = GuidanceReport.from_json(json_str)
assert compare_guidance_reports(report, loaded_report)


@pytest.mark.regression()
@pytest.mark.sanity()
def test_guidance_report_yaml(sample_benchmark_report):
report = GuidanceReport(benchmarks=[sample_benchmark_report])
yaml_str = report.to_yaml()
loaded_report = GuidanceReport.from_yaml(yaml_str)
assert compare_guidance_reports(report, loaded_report)


@pytest.mark.regression()
@pytest.mark.sanity()
def test_guidance_report_save_load_file(sample_benchmark_report):
report = GuidanceReport(benchmarks=[sample_benchmark_report])
with tempfile.TemporaryDirectory() as temp_dir:
Expand Down