Skip to content
This repository was archived by the owner on Aug 7, 2025. It is now read-only.

Commit ff9809a

Browse files
authored
Merge branch 'master' into msaroufim-patch-7
2 parents 3aec11f + 067fd91 commit ff9809a

File tree

4 files changed

+191
-6
lines changed

4 files changed

+191
-6
lines changed

.github/workflows/benchmark_nightly.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ jobs:
6565
if_no_artifact_found: ignore
6666
path: /tmp/ts_artifacts
6767
name: ${{ matrix.hardware }}_benchmark_validation
68+
- name: Validate Benchmark result
69+
run: python benchmarks/validate_report.py --input-artifacts-dir /tmp/ts_artifacts/${{ matrix.hardware }}_benchmark_validation
6870
- name: Update benchmark artifacts for auto validation
6971
run: python benchmarks/utils/update_artifacts.py --output /tmp/ts_artifacts/${{ matrix.hardware }}_benchmark_validation
7072
- name: Upload the updated benchmark artifacts for auto validation

benchmarks/auto_benchmark.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@
1717

1818

1919
class BenchmarkConfig:
20-
def __init__(self, yaml_dict, skip_ts_install):
20+
def __init__(self, yaml_dict, skip_ts_install, skip_upload):
2121
self.yaml_dict = yaml_dict
2222
self.skip_ts_install = skip_ts_install
23+
self.skip_upload = skip_upload
2324
self.bm_config = {}
2425
yesterday = datetime.date.today() - datetime.timedelta(days=1)
2526
self.bm_config["version"] = "torchserve-nightly=={}.{}.{}".format(
@@ -89,9 +90,9 @@ def load_config(self):
8990
self.models(v)
9091
elif k == "hardware":
9192
self.hardware(v)
92-
elif k == "metrics_cmd":
93+
elif k == "metrics_cmd" and not self.skip_upload:
9394
self.metrics_cmd(v)
94-
elif k == "report_cmd":
95+
elif k == "report_cmd" and not self.skip_upload:
9596
report_cmd = v
9697

9798
self.bm_config["model_config_path"] = (
@@ -110,12 +111,12 @@ def load_config(self):
110111
print("{}={}".format(k, v))
111112

112113

113-
def load_benchmark_config(bm_config_path, skip_ts_install):
114+
def load_benchmark_config(bm_config_path, skip_ts_install, skip_upload):
114115
yaml = ruamel.yaml.YAML()
115116
with open(bm_config_path, "r") as f:
116117
yaml_dict = yaml.load(f)
117118

118-
benchmark_config = BenchmarkConfig(yaml_dict, skip_ts_install)
119+
benchmark_config = BenchmarkConfig(yaml_dict, skip_ts_install, skip_upload)
119120
benchmark_config.load_config()
120121

121122
return benchmark_config.bm_config
@@ -285,14 +286,23 @@ def main():
285286
action="store",
286287
help="true: skip torchserve installation. default: true",
287288
)
289+
parser.add_argument(
290+
"--skip_upload",
291+
help="true: skip uploading commands . default: false",
292+
)
288293

289294
arguments = parser.parse_args()
290295
skip_ts_config = (
291296
False
292297
if arguments.skip is not None and arguments.skip.lower() == "false"
293298
else True
294299
)
295-
bm_config = load_benchmark_config(arguments.input, skip_ts_config)
300+
skip_upload = (
301+
True
302+
if arguments.skip_upload is not None and arguments.skip_upload.lower() == "true"
303+
else False
304+
)
305+
bm_config = load_benchmark_config(arguments.input, skip_ts_config, skip_upload)
296306
benchmark_env_setup(bm_config, skip_ts_config)
297307
run_benchmark(bm_config)
298308
clean_up_benchmark_env(bm_config)

benchmarks/utils/report.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import csv
2+
3+
METRICS_VALIDATED = [
4+
"TS throughput",
5+
"TS latency P50",
6+
"TS latency P90",
7+
"TS latency P99",
8+
"Model_p50",
9+
"Model_p90",
10+
"Model_p99",
11+
"memory_percentage_mean",
12+
"gpu_memory_used_mean",
13+
"cpu_percentage_mean",
14+
"gpu_percentage_mean",
15+
]
16+
17+
18+
# Acceptable metric deviation needs a more complicated logic.
19+
# Example: For latencies in 2 digits, 50% might be acceptable
20+
# For 3 digit latencies, 20-30% might be the right value
21+
# For cpu_memory < 15%, 50% deviation works but for CPU > 40%, 10-15%
22+
# might be the right value
23+
ACCEPTABLE_METRIC_DEVIATION = 0.3
24+
25+
26+
class Report:
27+
def __init__(self, deviation=0, num_reports=0):
28+
self.properties = {}
29+
self.mode = None
30+
self.throughput = 0
31+
self.batch_size = 0
32+
self.workers = 0
33+
self.deviation = deviation
34+
self.num_reports = num_reports
35+
36+
def _get_mode(self, csv_file):
37+
cfg = csv_file.split("/")[-2]
38+
cfg = cfg.split("_")
39+
mode = cfg[0] + "_" + cfg[1]
40+
self.mode = mode
41+
42+
def read_csv(self, csv_file):
43+
with open(csv_file, newline="") as f:
44+
reader = csv.DictReader(f)
45+
for k, v in next(reader).items():
46+
if k in METRICS_VALIDATED:
47+
self.properties[k] = float(v)
48+
self._get_mode(csv_file)
49+
50+
def update(self, report):
51+
for property in self.properties:
52+
# sum the properties to find the mean later
53+
self.properties[property] += report.properties[property]
54+
55+
def mean(self):
56+
for k, v in self.properties.items():
57+
self.properties[k] = v / self.num_reports
58+
59+
60+
def metric_valid(key, obs_val, exp_val, threshold):
61+
# In case of throughput, higher is better
62+
# In case of memory, lower is better.
63+
# We ignore lower values for memory related metrices
64+
lower = False
65+
if "throughput" not in key:
66+
lower = True
67+
return check_if_within_threshold(exp_val, obs_val, threshold) or (
68+
(obs_val < exp_val and lower)
69+
)
70+
71+
72+
def check_if_within_threshold(value1, value2, threshold):
73+
if float(value1) == 0.0:
74+
return True
75+
return abs((value1 - value2) / float(value1)) <= threshold

benchmarks/validate_report.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import argparse
2+
import os
3+
4+
from utils.report import (
5+
ACCEPTABLE_METRIC_DEVIATION,
6+
METRICS_VALIDATED,
7+
Report,
8+
metric_valid,
9+
)
10+
from utils.update_artifacts import (
11+
BENCHMARK_ARTIFACTS_PATH,
12+
BENCHMARK_REPORT_FILE,
13+
BENCHMARK_REPORT_PATH,
14+
)
15+
16+
17+
def validate_reports(artifacts_dir, report_dir, deviation):
18+
# Read baseline reports
19+
baseline_reports = {}
20+
num_reports = len(os.listdir(artifacts_dir))
21+
for _d in sorted(os.listdir(artifacts_dir)):
22+
dir = os.path.join(artifacts_dir, _d)
23+
for subdir in sorted(os.listdir(dir)):
24+
csv_file = os.path.join(dir, subdir, BENCHMARK_REPORT_FILE)
25+
26+
report = Report(deviation, num_reports)
27+
report.read_csv(csv_file)
28+
if subdir not in baseline_reports:
29+
baseline_reports[subdir] = report
30+
else:
31+
baseline_reports[subdir].update(report)
32+
33+
# Get the mean value each of the properties for every report
34+
for model, report in baseline_reports.items():
35+
report.mean()
36+
baseline_reports[model] = report
37+
38+
# Read generated reports
39+
generated_reports = {}
40+
for subdir in sorted(os.listdir(report_dir)):
41+
if os.path.isdir(os.path.join(report_dir, subdir)):
42+
csv_file = os.path.join(report_dir, subdir, BENCHMARK_REPORT_FILE)
43+
report = Report()
44+
report.read_csv(csv_file)
45+
generated_reports[subdir] = report
46+
47+
# Compare generated reports with baseline reports
48+
error = False
49+
for model, report in generated_reports.items():
50+
for key in METRICS_VALIDATED:
51+
if not metric_valid(
52+
key,
53+
report.properties[key],
54+
baseline_reports[model].properties[key],
55+
baseline_reports[model].deviation,
56+
):
57+
print(
58+
f"Error while validating {key} for model: {model}, "
59+
f"Expected value: {baseline_reports[model].properties[key]:.2f}, "
60+
f"Observed value: {report.properties[key]:.2f}"
61+
)
62+
error = True
63+
if not error:
64+
print(f"Model {model} successfully validated")
65+
66+
if error:
67+
raise Exception("Failures in benchmark validation")
68+
69+
70+
def main():
71+
parser = argparse.ArgumentParser()
72+
73+
parser.add_argument(
74+
"--input-artifacts-dir",
75+
help="directory where benchmark artifacts have been saved",
76+
type=str,
77+
default=BENCHMARK_ARTIFACTS_PATH,
78+
)
79+
80+
parser.add_argument(
81+
"--input-report-dir",
82+
help="directory where current benchmark report is saved",
83+
type=str,
84+
default=BENCHMARK_REPORT_PATH,
85+
)
86+
87+
parser.add_argument(
88+
"--deviation",
89+
help="acceptable variation in metrics values ",
90+
type=float,
91+
default=ACCEPTABLE_METRIC_DEVIATION,
92+
)
93+
args = parser.parse_args()
94+
validate_reports(args.input_artifacts_dir, args.input_report_dir, args.deviation)
95+
96+
97+
if __name__ == "__main__":
98+
main()

0 commit comments

Comments
 (0)