Skip to content

Commit 6d6aa01

Browse files
authored
Add support for eager mode performance (#1539)
* Add support for eager mode performance Summary: Added "compile" filed to "extra_info" that allows us to record eager mode performance as well context is eager, eager + compile, eager + compile + autoquant can all have performance improvements/changes over time, so we want to track: (1) eager perf on some previous date (configurable by user) (2) current eager perf (3) current compile perf (4) current autoqunat + compile perf Test Plan: tested locally: https://gist.github.com/jerryzh168/2a15322b0c8f40f35e52956837c67fec Reviewers: Subscribers: Tasks: Tags: * move min_sqnr * format * remove redundant headers * add upload_to_s3 script * format
1 parent 24a78fe commit 6d6aa01

File tree

5 files changed

+116
-16
lines changed

5 files changed

+116
-16
lines changed

examples/sam2_amg_server/server.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ def set_autoquant(mask_generator, autoquant_type, min_sqnr):
413413
mask_generator.predictor._transforms_device = mask_generator.predictor.device
414414
torch.set_float32_matmul_precision("high")
415415
# NOTE: this fails when we run
416-
# python server.py ~/checkpoints/sam2 large --port 8000 --host localhost --fast --use_autoquant --unittest
416+
# python server.py ~/checkpoints/sam2 large --port 8000 --host localhost --fast --autoquant_type autoquant --unittest
417417
# https://gist.github.com/jerryzh168/d337cb5de0a1dec306069fe48ac8225e
418418
# mask_generator.predictor.model.sam_mask_decoder = autoquant(mask_generator.predictor.model.sam_mask_decoder, qtensor_class_list=DEFAULT_FLOAT_AUTOQUANT_CLASS_LIST, min_sqnr=40)
419419

@@ -508,7 +508,7 @@ def main(
508508

509509
# since autoquant is replicating what furious mode is doing, don't use these two together
510510
if autoquant_type is not None:
511-
assert not furious, "use autoquant can't be used together with furious"
511+
assert not furious, "autoquant can't be used together with furious"
512512
set_autoquant(mask_generator, autoquant_type, min_sqnr)
513513

514514
with open("dog.jpg", "rb") as f:
@@ -568,10 +568,22 @@ def main(
568568
benchmark_fn(image_tensors_to_masks, random_images, mask_generator)
569569

570570
if output_json_path:
571-
headers = ["name", "dtype", "device", "arch", "metric", "actual", "target"]
571+
headers = [
572+
"name",
573+
"dtype",
574+
"min_sqnr",
575+
"compile",
576+
"device",
577+
"arch",
578+
"metric",
579+
"actual",
580+
"target",
581+
]
572582
name = "sam2-" + model_type
573583
arch = get_arch_name()
574584
dtype = autoquant_type or "noquant"
585+
# boolean flag to indicate whether it's eager or compile
586+
compile = fast
575587
(
576588
avg_time_per_run,
577589
max_memory_allocated_bytes,
@@ -580,24 +592,19 @@ def main(
580592
memory_result = [
581593
name,
582594
dtype,
595+
min_sqnr,
596+
compile,
583597
device,
584598
arch,
585599
"memory(MiB)",
586600
max_memory_allocated_bytes,
587601
None,
588602
]
589-
memory_percent_result = [
590-
name,
591-
dtype,
592-
device,
593-
arch,
594-
"memory(%)",
595-
max_memory_allocated_percentage,
596-
None,
597-
]
598603
performance_result = [
599604
name,
600605
dtype,
606+
min_sqnr,
607+
compile,
601608
device,
602609
arch,
603610
"time_s(avg)",
@@ -610,7 +617,6 @@ def main(
610617
else write_json_result_ossci
611618
)
612619
write_json_result(output_json_path, headers, memory_result)
613-
write_json_result(output_json_path, headers, memory_percent_result)
614620
write_json_result(output_json_path, headers, performance_result)
615621

616622
if profile is not None:

scripts/upload_to_s3.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import io
2+
import json
3+
import os
4+
from functools import lru_cache
5+
from typing import Any
6+
7+
import boto3
8+
9+
10+
@lru_cache
11+
def get_s3_resource() -> Any:
12+
return boto3.resource("s3")
13+
14+
15+
def upload_to_s3(
16+
bucket_name: str,
17+
key: str,
18+
json_path: str,
19+
) -> None:
20+
print(f"Writing {json_path} documents to S3")
21+
data = []
22+
with open(f"{os.path.splitext(json_path)[0]}.json", "r") as f:
23+
for l in f.readlines():
24+
data.append(json.loads(l))
25+
26+
body = io.StringIO()
27+
for benchmark_entry in data:
28+
json.dump(benchmark_entry, body)
29+
body.write("\n")
30+
31+
try:
32+
get_s3_resource().Object(
33+
f"{bucket_name}",
34+
f"{key}",
35+
).put(
36+
Body=body.getvalue(),
37+
ContentType="application/json",
38+
)
39+
except Exception as e:
40+
print("fail to upload to s3:", e)
41+
return
42+
print("Done!")
43+
44+
45+
if __name__ == "__main__":
46+
import argparse
47+
import datetime
48+
49+
parser = argparse.ArgumentParser(
50+
description="Upload benchmark result json file to clickhouse"
51+
)
52+
parser.add_argument(
53+
"--json-path",
54+
type=str,
55+
help="json file path to upload to click house",
56+
required=True,
57+
)
58+
args = parser.parse_args()
59+
today = datetime.date.today()
60+
today = datetime.datetime.combine(today, datetime.time.min)
61+
today_timestamp = str(int(today.timestamp()))
62+
print("Today timestamp:", today_timestamp)
63+
import subprocess
64+
65+
# Execute the command and capture the output
66+
output = subprocess.check_output(["hostname", "-s"])
67+
# Decode the output from bytes to string
68+
hostname = output.decode("utf-8").strip()
69+
upload_to_s3(
70+
"ossci-benchmarks",
71+
f"v3/pytorch/ao/{hostname}/torchao-models-" + today_timestamp + ".json",
72+
args.json_path,
73+
)

torchao/_models/llama/generate.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1028,6 +1028,7 @@ def callback(x):
10281028
"name",
10291029
"dtype",
10301030
"min_sqnr",
1031+
"compile",
10311032
"device",
10321033
"arch",
10331034
"metric",
@@ -1037,11 +1038,22 @@ def callback(x):
10371038
name = checkpoint_path.parent.name
10381039
arch = get_arch_name()
10391040
dtype = quantization or "noquant"
1040-
memory_result = [name, dtype, min_sqnr, device, arch, "mem/s", bandwidth, None]
1041+
memory_result = [
1042+
name,
1043+
dtype,
1044+
min_sqnr,
1045+
compile,
1046+
device,
1047+
arch,
1048+
"mem/s",
1049+
bandwidth,
1050+
None,
1051+
]
10411052
performance_result = [
10421053
name,
10431054
dtype,
10441055
min_sqnr,
1056+
compile,
10451057
device,
10461058
arch,
10471059
"tok/s",

torchao/_models/sam/eval_combo.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,7 @@ def mlp_only(mod, name):
642642
"name",
643643
"dtype",
644644
"min_sqnr",
645+
"compile",
645646
"device",
646647
"arch",
647648
"metric",
@@ -651,10 +652,13 @@ def mlp_only(mod, name):
651652
name = sam_model_type
652653
arch = get_arch_name()
653654
dtype = compress or "noquant"
655+
# boolean flag to indicate whether compile is used
656+
compile = use_compile != "False"
654657
memory_result = [
655658
name,
656659
dtype,
657660
min_sqnr,
661+
compile,
658662
device,
659663
arch,
660664
"memory(MiB)",
@@ -665,6 +669,7 @@ def mlp_only(mod, name):
665669
name,
666670
dtype,
667671
min_sqnr,
672+
compile,
668673
device,
669674
arch,
670675
"img_s(avg)",

torchao/_models/utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,12 @@ def write_json_result_ossci(output_json_path, headers, row):
3030
"name": "TorchAO benchmark",
3131
"mode": "inference",
3232
"dtype": mapping_headers["dtype"],
33-
"min_sqnr": mapping_headers["min_sqnr"],
3433
"extra_info": {
3534
"device": mapping_headers["device"],
3635
"arch": mapping_headers["arch"],
36+
"min_sqnr": mapping_headers["min_sqnr"],
37+
# True means compile is enabled, False means eager mode
38+
"complie": mapping_headers["compile"],
3739
},
3840
},
3941
"model": {
@@ -80,10 +82,12 @@ def write_json_result_local(output_json_path, headers, row):
8082
"name": "TorchAO benchmark",
8183
"mode": "inference",
8284
"dtype": mapping_headers["dtype"],
83-
"min_sqnr": mapping_headers["min_sqnr"],
8485
"extra_info": {
8586
"device": mapping_headers["device"],
8687
"arch": mapping_headers["arch"],
88+
"min_sqnr": mapping_headers["min_sqnr"],
89+
# True means compile is enabled, False means eager mode
90+
"complie": mapping_headers["compile"],
8791
},
8892
},
8993
"model": {

0 commit comments

Comments
 (0)