Skip to content
This repository was archived by the owner on Aug 7, 2025. It is now read-only.

Commit 07bd3cc

Browse files
authored
Merge branch 'master' into examples/yolov8
2 parents b1be076 + bb4eb8b commit 07bd3cc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+1331
-261
lines changed

.pre-commit-config.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ repos:
1212
- id: check-json
1313
- id: check-toml
1414
- id: check-yaml
15+
args: [--allow-multiple-documents, --unsafe]
1516
- id: end-of-file-fixer
1617
- id: mixed-line-ending
1718
- id: trailing-whitespace
@@ -24,12 +25,12 @@ repos:
2425
- id: python-no-log-warn
2526
- id: python-use-type-annotations
2627
- repo: https://github.com/hadialqattan/pycln
27-
rev: v2.1.3
28+
rev: v2.1.5
2829
hooks:
2930
- id: pycln
3031
args: [--all]
3132
- repo: https://github.com/psf/black
32-
rev: 23.1.0
33+
rev: 23.7.0
3334
hooks:
3435
- id: black
3536
additional_dependencies: ['click==8.0.4']

benchmarks/benchmark-ab.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
"image": "",
3131
"docker_runtime": "",
3232
"backend_profiling": False,
33+
"handler_profiling": False,
3334
"generate_graphs": False,
3435
"config_properties": "config.properties",
3536
"inference_model_url": "predictions/benchmark",
@@ -95,6 +96,12 @@ def json_provider(file_path, cmd_name):
9596
default=False,
9697
help="Enable backend profiling using CProfile. Default False",
9798
)
99+
@click.option(
100+
"--handler_profiling",
101+
"-hp",
102+
default=False,
103+
help="Enable handler profiling. Default False",
104+
)
98105
@click.option(
99106
"--generate_graphs",
100107
"-gg",
@@ -143,6 +150,7 @@ def benchmark(
143150
image,
144151
docker_runtime,
145152
backend_profiling,
153+
handler_profiling,
146154
config_properties,
147155
inference_model_url,
148156
report_location,
@@ -163,6 +171,7 @@ def benchmark(
163171
"image": image,
164172
"docker_runtime": docker_runtime,
165173
"backend_profiling": backend_profiling,
174+
"handler_profiling": handler_profiling,
166175
"config_properties": config_properties,
167176
"inference_model_url": inference_model_url,
168177
"report_location": report_location,
@@ -469,13 +478,26 @@ def generate_report(warm_up_lines):
469478
}
470479

471480

481+
def update_metrics():
482+
if execution_params["handler_profiling"]:
483+
opt_metrics = {
484+
"handler_preprocess.txt": "ts_handler_preprocess",
485+
"handler_inference.txt": "ts_handler_inference",
486+
"handler_postprocess.txt": "ts_handler_postprocess",
487+
}
488+
metrics.update(opt_metrics)
489+
return metrics
490+
491+
472492
def extract_metrics(warm_up_lines):
473493
with open(execution_params["metric_log"]) as f:
474494
lines = f.readlines()
475495

476496
click.secho(f"Dropping {warm_up_lines} warmup lines from log", fg="green")
477497
lines = lines[warm_up_lines:]
478498

499+
metrics = update_metrics()
500+
479501
for k, v in metrics.items():
480502
all_lines = []
481503
pattern = re.compile(v)

benchmarks/utils/gen_model_config_json.py

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
import copy
33
import json
44
import os
5+
56
import yaml
67

78

89
def main():
9-
1010
parser = argparse.ArgumentParser()
1111

1212
parser.add_argument(
@@ -22,6 +22,7 @@ def main():
2222
arguments = parser.parse_args()
2323
convert_yaml_to_json(arguments.input, arguments.output)
2424

25+
2526
MODEL_CONFIG_KEY = {
2627
"batch_size",
2728
"batch_delay",
@@ -30,12 +31,18 @@ def main():
3031
"concurrency",
3132
"workers",
3233
"input",
33-
"processors"
34+
"processors",
35+
"handler_profiling",
3436
}
3537

38+
3639
def convert_yaml_to_json(yaml_file_path, output_dir):
37-
print("convert_yaml_to_json yaml_file_path={}, output_dir={}".format(yaml_file_path, output_dir))
38-
with open(yaml_file_path, 'r') as f:
40+
print(
41+
"convert_yaml_to_json yaml_file_path={}, output_dir={}".format(
42+
yaml_file_path, output_dir
43+
)
44+
)
45+
with open(yaml_file_path, "r") as f:
3946
yaml_dict = yaml.safe_load(f)
4047

4148
for model, config in yaml_dict.items():
@@ -58,10 +65,9 @@ def convert_yaml_to_json(yaml_file_path, output_dir):
5865
batch_worker_list = []
5966
for batch_size in batch_size_list:
6067
for workers in workers_list:
61-
batch_worker_list.append({
62-
"batch_size" : batch_size,
63-
"workers" : workers
64-
})
68+
batch_worker_list.append(
69+
{"batch_size": batch_size, "workers": workers}
70+
)
6571

6672
benchmark_configs = []
6773
for batch_worker in batch_worker_list:
@@ -72,25 +78,34 @@ def convert_yaml_to_json(yaml_file_path, output_dir):
7278
for bConfig in benchmark_configs:
7379
for i in range(len(processors)):
7480
if type(processors[i]) is str:
75-
path = '{}/{}'.format(output_dir, processors[i])
81+
path = "{}/{}".format(output_dir, processors[i])
7682
if not os.path.isdir(path):
7783
continue
7884

79-
benchmark_config_file = '{}/{}_w{}_b{}.json'\
80-
.format(path, model_name, bConfig["workers"], bConfig["batch_size"])
85+
benchmark_config_file = "{}/{}_w{}_b{}.json".format(
86+
path,
87+
model_name,
88+
bConfig["workers"],
89+
bConfig["batch_size"],
90+
)
8191
with open(benchmark_config_file, "w") as outfile:
8292
json.dump(bConfig, outfile, indent=4)
8393
elif type(processors[i]) is dict:
84-
path = '{}/gpu'.format(output_dir)
94+
path = "{}/gpu".format(output_dir)
8595
if not os.path.isdir(path):
8696
continue
8797

8898
bConfig["gpus"] = processors[i]["gpus"]
89-
benchmark_config_file = '{}/{}_w{}_b{}.json'\
90-
.format(path, model_name, bConfig["workers"], bConfig["batch_size"])
99+
benchmark_config_file = "{}/{}_w{}_b{}.json".format(
100+
path,
101+
model_name,
102+
bConfig["workers"],
103+
bConfig["batch_size"],
104+
)
91105
with open(benchmark_config_file, "w") as outfile:
92106
json.dump(bConfig, outfile, indent=4)
93107
del bConfig["gpus"]
94108

109+
95110
if __name__ == "__main__":
96111
main()

docs/FAQs.md

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# FAQ'S
22
Contents of this document.
33
* [General](#general)
4+
* [Performance](#performance)
45
* [Deployment and config](#deployment-and-config)
56
* [API](#api)
67
* [Handler](#handler)
@@ -34,9 +35,23 @@ No, As of now only python based models are supported.
3435
Torchserve is derived from Multi-Model-Server. However, Torchserve is specifically tuned for Pytorch models. It also has new features like Snapshot and model versioning.
3536

3637
### How to decode international language in inference response on client side?
37-
By default, Torchserve uses utf-8 to encode if the inference response is string. So client can use utf-8 to decode.
38+
By default, Torchserve uses utf-8 to encode if the inference response is string. So client can use utf-8 to decode.
3839

39-
If a model converts international language string to bytes, client needs to use the codec mechanism specified by the model such as in https://github.com/pytorch/serve/blob/master/examples/nmt_transformer/model_handler_generalized.py#L55
40+
If a model converts international language string to bytes, client needs to use the codec mechanism specified by the model such as in https://github.com/pytorch/serve/blob/master/examples/nmt_transformer/model_handler_generalized.py
41+
42+
## Performance
43+
44+
Relevant documents.
45+
- [Performance Guide](performance_guide.md)
46+
47+
### How do I improve TorchServe performance on CPU?
48+
CPU performance is heavily influenced by launcher core pinning. We recommend setting the following properties in your `config.properties`:
49+
50+
```bash
51+
cpu_launcher_enable=true
52+
cpu_launcher_args=--use_logical_core
53+
```
54+
More background on improving CPU performance can be found in this [blog post](https://pytorch.org/tutorials/intermediate/torchserve_with_ipex#grokking-pytorch-intel-cpu-performance-from-first-principles).
4055

4156
## Deployment and config
4257
Relevant documents.
@@ -97,7 +112,7 @@ TorchServe looks for the config.property file according to the order listed in t
97112

98113
- [models](configuration.md): Defines a list of models' configuration in config.property. A model's configuration can be overridden by [management API](management_api.md). It does not decide which models will be loaded during TorchServe start. There is no relationship b.w "models" and "load_models" (ie. TorchServe command line option [--models](configuration.md)).
99114

100-
###
115+
###
101116

102117
## API
103118
Relevant documents
@@ -133,7 +148,7 @@ Refer to [default handlers](default_handlers.md) for more details.
133148

134149
### Is it possible to deploy Hugging Face models?
135150
Yes, you can deploy Hugging Face models using a custom handler.
136-
Refer to [HuggingFace_Transformers](https://github.com/pytorch/serve/blob/master/examples/Huggingface_Transformers/README.md#huggingface-transformers) for example.
151+
Refer to [HuggingFace_Transformers](https://github.com/pytorch/serve/blob/master/examples/Huggingface_Transformers/README.md#huggingface-transformers) for example.
137152

138153
## Model-archiver
139154
Relevant documents

docs/index.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,13 @@ What's going on in TorchServe?
5656
:link: performance_guide.html
5757
:tags: Performance,Troubleshooting
5858

59+
.. customcarditem::
60+
:header: Large Model Inference
61+
:card_description: Serving Large Models with TorchServe
62+
:image: https://raw.githubusercontent.com/pytorch/serve/master/docs/images/ts-lmi-internal.png
63+
:link: large_model_inference.html
64+
:tags: Large-Models,Performance
65+
5966
.. customcarditem::
6067
:header: Troubleshooting
6168
:card_description: Various updates on Torcherve and use cases.

0 commit comments

Comments
 (0)