Skip to content
This repository was archived by the owner on Aug 7, 2025. It is now read-only.

Commit c3893ac

Browse files
authored
Merge branch 'master' into issue_2140
2 parents 156c3ee + fa95a66 commit c3893ac

File tree

8 files changed

+195
-14
lines changed

8 files changed

+195
-14
lines changed

.github/workflows/benchmark_nightly.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ jobs:
6565
if_no_artifact_found: ignore
6666
path: /tmp/ts_artifacts
6767
name: ${{ matrix.hardware }}_benchmark_validation
68+
- name: Validate Benchmark result
69+
run: python benchmarks/validate_report.py --input-artifacts-dir /tmp/ts_artifacts/${{ matrix.hardware }}_benchmark_validation
6870
- name: Update benchmark artifacts for auto validation
6971
run: python benchmarks/utils/update_artifacts.py --output /tmp/ts_artifacts/${{ matrix.hardware }}_benchmark_validation
7072
- name: Upload the updated benchmark artifacts for auto validation

.github/workflows/lint.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ jobs:
7777
echo "cd serve/"
7878
echo "pre-commit install"
7979
echo "pre-commit will lint your code for you, so git add and commit those new changes and this check should become green"
80+
echo "If you've already pushed some files remotely then run git diff --name-only main | xargs pre-commit run --files"
8081
8182
spellcheck:
8283
runs-on: ubuntu-20.04

benchmarks/auto_benchmark.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@
1717

1818

1919
class BenchmarkConfig:
20-
def __init__(self, yaml_dict, skip_ts_install):
20+
def __init__(self, yaml_dict, skip_ts_install, skip_upload):
2121
self.yaml_dict = yaml_dict
2222
self.skip_ts_install = skip_ts_install
23+
self.skip_upload = skip_upload
2324
self.bm_config = {}
2425
yesterday = datetime.date.today() - datetime.timedelta(days=1)
2526
self.bm_config["version"] = "torchserve-nightly=={}.{}.{}".format(
@@ -89,9 +90,9 @@ def load_config(self):
8990
self.models(v)
9091
elif k == "hardware":
9192
self.hardware(v)
92-
elif k == "metrics_cmd":
93+
elif k == "metrics_cmd" and not self.skip_upload:
9394
self.metrics_cmd(v)
94-
elif k == "report_cmd":
95+
elif k == "report_cmd" and not self.skip_upload:
9596
report_cmd = v
9697

9798
self.bm_config["model_config_path"] = (
@@ -110,12 +111,12 @@ def load_config(self):
110111
print("{}={}".format(k, v))
111112

112113

113-
def load_benchmark_config(bm_config_path, skip_ts_install):
114+
def load_benchmark_config(bm_config_path, skip_ts_install, skip_upload):
114115
yaml = ruamel.yaml.YAML()
115116
with open(bm_config_path, "r") as f:
116117
yaml_dict = yaml.load(f)
117118

118-
benchmark_config = BenchmarkConfig(yaml_dict, skip_ts_install)
119+
benchmark_config = BenchmarkConfig(yaml_dict, skip_ts_install, skip_upload)
119120
benchmark_config.load_config()
120121

121122
return benchmark_config.bm_config
@@ -285,14 +286,23 @@ def main():
285286
action="store",
286287
help="true: skip torchserve installation. default: true",
287288
)
289+
parser.add_argument(
290+
"--skip_upload",
291+
help="true: skip uploading commands . default: false",
292+
)
288293

289294
arguments = parser.parse_args()
290295
skip_ts_config = (
291296
False
292297
if arguments.skip is not None and arguments.skip.lower() == "false"
293298
else True
294299
)
295-
bm_config = load_benchmark_config(arguments.input, skip_ts_config)
300+
skip_upload = (
301+
True
302+
if arguments.skip_upload is not None and arguments.skip_upload.lower() == "true"
303+
else False
304+
)
305+
bm_config = load_benchmark_config(arguments.input, skip_ts_config, skip_upload)
296306
benchmark_env_setup(bm_config, skip_ts_config)
297307
run_benchmark(bm_config)
298308
clean_up_benchmark_env(bm_config)

benchmarks/utils/report.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import csv
2+
3+
METRICS_VALIDATED = [
4+
"TS throughput",
5+
"TS latency P50",
6+
"TS latency P90",
7+
"TS latency P99",
8+
"Model_p50",
9+
"Model_p90",
10+
"Model_p99",
11+
"memory_percentage_mean",
12+
"gpu_memory_used_mean",
13+
"cpu_percentage_mean",
14+
"gpu_percentage_mean",
15+
]
16+
17+
18+
# Acceptable metric deviation needs a more complicated logic.
19+
# Example: For latencies in 2 digits, 50% might be acceptable
20+
# For 3 digit latencies, 20-30% might be the right value
21+
# For cpu_memory < 15%, 50% deviation works but for CPU > 40%, 10-15%
22+
# might be the right value
23+
ACCEPTABLE_METRIC_DEVIATION = 0.3
24+
25+
26+
class Report:
27+
def __init__(self, deviation=0, num_reports=0):
28+
self.properties = {}
29+
self.mode = None
30+
self.throughput = 0
31+
self.batch_size = 0
32+
self.workers = 0
33+
self.deviation = deviation
34+
self.num_reports = num_reports
35+
36+
def _get_mode(self, csv_file):
37+
cfg = csv_file.split("/")[-2]
38+
cfg = cfg.split("_")
39+
mode = cfg[0] + "_" + cfg[1]
40+
self.mode = mode
41+
42+
def read_csv(self, csv_file):
43+
with open(csv_file, newline="") as f:
44+
reader = csv.DictReader(f)
45+
for k, v in next(reader).items():
46+
if k in METRICS_VALIDATED:
47+
self.properties[k] = float(v)
48+
self._get_mode(csv_file)
49+
50+
def update(self, report):
51+
for property in self.properties:
52+
# sum the properties to find the mean later
53+
self.properties[property] += report.properties[property]
54+
55+
def mean(self):
56+
for k, v in self.properties.items():
57+
self.properties[k] = v / self.num_reports
58+
59+
60+
def metric_valid(key, obs_val, exp_val, threshold):
61+
# In case of throughput, higher is better
62+
# In case of memory, lower is better.
63+
# We ignore lower values for memory related metrices
64+
lower = False
65+
if "throughput" not in key:
66+
lower = True
67+
return check_if_within_threshold(exp_val, obs_val, threshold) or (
68+
(obs_val < exp_val and lower)
69+
)
70+
71+
72+
def check_if_within_threshold(value1, value2, threshold):
73+
if float(value1) == 0.0:
74+
return True
75+
return abs((value1 - value2) / float(value1)) <= threshold

benchmarks/validate_report.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import argparse
2+
import os
3+
4+
from utils.report import (
5+
ACCEPTABLE_METRIC_DEVIATION,
6+
METRICS_VALIDATED,
7+
Report,
8+
metric_valid,
9+
)
10+
from utils.update_artifacts import (
11+
BENCHMARK_ARTIFACTS_PATH,
12+
BENCHMARK_REPORT_FILE,
13+
BENCHMARK_REPORT_PATH,
14+
)
15+
16+
17+
def validate_reports(artifacts_dir, report_dir, deviation):
18+
# Read baseline reports
19+
baseline_reports = {}
20+
num_reports = len(os.listdir(artifacts_dir))
21+
for _d in sorted(os.listdir(artifacts_dir)):
22+
dir = os.path.join(artifacts_dir, _d)
23+
for subdir in sorted(os.listdir(dir)):
24+
csv_file = os.path.join(dir, subdir, BENCHMARK_REPORT_FILE)
25+
26+
report = Report(deviation, num_reports)
27+
report.read_csv(csv_file)
28+
if subdir not in baseline_reports:
29+
baseline_reports[subdir] = report
30+
else:
31+
baseline_reports[subdir].update(report)
32+
33+
# Get the mean value each of the properties for every report
34+
for model, report in baseline_reports.items():
35+
report.mean()
36+
baseline_reports[model] = report
37+
38+
# Read generated reports
39+
generated_reports = {}
40+
for subdir in sorted(os.listdir(report_dir)):
41+
if os.path.isdir(os.path.join(report_dir, subdir)):
42+
csv_file = os.path.join(report_dir, subdir, BENCHMARK_REPORT_FILE)
43+
report = Report()
44+
report.read_csv(csv_file)
45+
generated_reports[subdir] = report
46+
47+
# Compare generated reports with baseline reports
48+
error = False
49+
for model, report in generated_reports.items():
50+
for key in METRICS_VALIDATED:
51+
if not metric_valid(
52+
key,
53+
report.properties[key],
54+
baseline_reports[model].properties[key],
55+
baseline_reports[model].deviation,
56+
):
57+
print(
58+
f"Error while validating {key} for model: {model}, "
59+
f"Expected value: {baseline_reports[model].properties[key]:.2f}, "
60+
f"Observed value: {report.properties[key]:.2f}"
61+
)
62+
error = True
63+
if not error:
64+
print(f"Model {model} successfully validated")
65+
66+
if error:
67+
raise Exception("Failures in benchmark validation")
68+
69+
70+
def main():
71+
parser = argparse.ArgumentParser()
72+
73+
parser.add_argument(
74+
"--input-artifacts-dir",
75+
help="directory where benchmark artifacts have been saved",
76+
type=str,
77+
default=BENCHMARK_ARTIFACTS_PATH,
78+
)
79+
80+
parser.add_argument(
81+
"--input-report-dir",
82+
help="directory where current benchmark report is saved",
83+
type=str,
84+
default=BENCHMARK_REPORT_PATH,
85+
)
86+
87+
parser.add_argument(
88+
"--deviation",
89+
help="acceptable variation in metrics values ",
90+
type=float,
91+
default=ACCEPTABLE_METRIC_DEVIATION,
92+
)
93+
args = parser.parse_args()
94+
validate_reports(args.input_artifacts_dir, args.input_report_dir, args.deviation)
95+
96+
97+
if __name__ == "__main__":
98+
main()

docs/torchserve_on_win_native.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
- At present, it has only been certified on windows server 2019 however should work fine on Windows 10.
1212
- Make sure you are an admin user or have admin rights
1313
- The instruction given here will use anaconda Powershell terminal to install torchserve
14-
- Install Anaconda as given [here](https://docs.anaconda.com/free/anaconda/getting-started/install/windows/)
14+
- Install Anaconda as given [here](https://docs.anaconda.com/anaconda/install/windows/)
1515
- Install Git as given [here](https://github.com/git-for-windows/git/releases/download/v2.28.0.windows.1/Git-2.28.0-64-bit.exe)
1616
- Install openjdk17
1717
- Download [openjdk17](https://download.oracle.com/java/17/archive/jdk-17.0.3_windows-x64_bin.zip)

examples/pt2/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ As an example let's expand our getting started guide with the only difference be
2323

2424
```
2525
mkdir model_store
26-
torch-model-archiver --model-name densenet161 --version 1.0 --model-file ./serve/examples/image_classifier/densenet_161/model.py --export-path model_store --extra-files ./serve/examples/image_classifier/index_to_name.json --handler image_classifier
27-
torchserve --start --ncs --model-store model_store --models densenet161.mar --config-file model_config.yaml
26+
torch-model-archiver --model-name densenet161 --version 1.0 --model-file ./serve/examples/image_classifier/densenet_161/model.py --export-path model_store --extra-files ./serve/examples/image_classifier/index_to_name.json --handler image_classifier --config-file model_config.yaml
27+
torchserve --start --ncs --model-store model_store --models densenet161.mar
2828
```
2929

3030
The exact same approach works with any other model, what's going on is the below

kubernetes/kserve/kserve_wrapper/TorchserveModel.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -129,10 +129,5 @@ def load(self) -> bool:
129129
existing_paths = [path for path in paths if path.exists()]
130130
if len(existing_paths) == 0:
131131
raise ModelMissingError(model_path)
132-
elif len(existing_paths) > 1:
133-
raise RuntimeError(
134-
"More than one model file is detected, "
135-
f"Only one is allowed within model_dir: {existing_paths}"
136-
)
137132
self.ready = True
138133
return self.ready

0 commit comments

Comments
 (0)