Skip to content

Commit 0622182

Browse files
committed
Merge remote-tracking branch 'origin/bench_structure' into bench_sparsity
2 parents 9cb642d + 2193189 commit 0622182

File tree

9 files changed

+116
-95
lines changed

9 files changed

+116
-95
lines changed

benchmarks/__init__.py

Whitespace-only changes.

benchmarks/microbenchmarks/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,15 @@ model_params:
4747
[4096, 4096, 1024]
4848
]
4949
high_precision_dtype: "torch.bfloat16"
50-
compile: true
51-
compile_mode: "max-autotune"
50+
compile: "max-autotune" # Options: "default", "max-autotune", "false"
5251
device: "cuda" # Options: "cuda", "mps", "xpu", "cpu"
5352
model_type: "linear" # Options: "linear", "ln_linear_sigmoid"
5453
```
5554
5655
## Configuration Options
5756
5857
### Quantization Methods
58+
Currently, quantization string is in same format as the one being passed in llama/generate.py.
5959
- `baseline`: No quantization
6060
- `int8wo`: 8-bit weight-only quantization
6161
- `int4wo-{group_size}`: 4-bit weight-only quantization with specified group size

benchmarks/microbenchmarks/benchmark_inference.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,21 @@
77

88
from copy import deepcopy
99
from pathlib import Path
10-
from typing import Dict
1110

1211
import torch
1312

14-
from utils import (
13+
from benchmarks.microbenchmarks.utils import (
1514
BenchmarkConfig,
16-
benchmark_model_inference_in_microseconds,
15+
BenchmarkResult,
1716
clean_caches,
1817
create_model_and_input,
19-
quantization_string_to_quantization_config,
18+
model_inference_time_in_ms,
19+
string_to_config,
2020
)
2121
from torchao.quantization import quantize_
2222

2323

24-
def run(config: BenchmarkConfig) -> Dict[str, float]:
24+
def run(config: BenchmarkConfig) -> BenchmarkResult:
2525
"""Run inference benchmarks"""
2626
clean_caches() # Clean caches
2727

@@ -39,25 +39,22 @@ def run(config: BenchmarkConfig) -> Dict[str, float]:
3939

4040
# Use quantize_ to apply each quantization function to the model
4141
m_copy = deepcopy(base_model).eval().to(config.device)
42-
quantization_config = quantization_string_to_quantization_config(
43-
config.quantization,
44-
config.sparsity,
45-
high_precision_dtype=config.high_precision_dtype
42+
quantization_config = string_to_config(
43+
config.quantization, high_precision_dtype=config.high_precision_dtype
4644
)
47-
if quantization_config:
45+
if quantization_config is not None:
4846
quantize_(m_copy, quantization_config)
49-
if config.compile:
47+
if config.use_torch_compile:
5048
print("Compiling model....")
51-
m_copy = torch.compile(m_copy, mode=config.compile_mode, fullgraph=True)
49+
m_copy = torch.compile(m_copy, mode=config.torch_compile_mode, fullgraph=True)
5250

5351
# Run benchmarks
54-
result = {**config.to_dict()}
52+
result = BenchmarkResult(config=config)
5553

5654
# Benchmark time to run an inference call for quantized model
57-
model_time = benchmark_model_inference_in_microseconds(
55+
result.model_inference_time_in_ms = model_inference_time_in_ms(
5856
model=m_copy, input_data=input_data
5957
)
60-
result["benchmark_model_inference_in_microseconds"] = model_time
6158

6259
# TODO: Benchmark time using profiler
6360
# Profile dtype model evaluation

benchmarks/microbenchmarks/benchmark_runner.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
import yaml
2121

22-
from utils import (
22+
from benchmarks.microbenchmarks.utils import (
2323
BenchmarkConfig,
2424
generate_results_csv,
2525
print_results,
@@ -66,9 +66,9 @@ def load_benchmark_configs(config_path: str) -> List[BenchmarkConfig]:
6666
return configs
6767

6868

69-
def run_benchmarks_from_config(config_path: str) -> None:
69+
def run_inference_benchmarks_from_config(config_path: str) -> None:
7070
"""Run benchmarks using configurations from YAML file"""
71-
from benchmark_inference import run as run_inference
71+
from benchmarks.microbenchmarks.benchmark_inference import run as run_inference
7272

7373
configs = load_benchmark_configs(config_path)
7474
results = []
@@ -100,5 +100,24 @@ def run_benchmarks_from_config(config_path: str) -> None:
100100
required=True,
101101
help="Path to benchmark configuration file",
102102
)
103+
parser.add_argument(
104+
"--benchmark_mode",
105+
"-m",
106+
type=str,
107+
default="inference",
108+
choices=["inference", "training"],
109+
help="Benchmark mode to run: inference or training",
110+
)
103111
args = parser.parse_args()
104-
run_benchmarks_from_config(args.config)
112+
113+
# Run benchmarks
114+
if args.benchmark_mode == "inference":
115+
run_inference_benchmarks_from_config(args.config)
116+
elif args.benchmark_mode == "training":
117+
print("Training mode not implemented yet")
118+
else:
119+
raise ValueError(
120+
f"Invalid benchmark mode: {args.benchmark_mode}, choose from inference or training"
121+
)
122+
123+
# TODO: Add support for args to override config values and run smaller benchmarks

benchmarks/microbenchmarks/test/benchmark_config.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ model_params:
1313
[4096, 4096, 1024]
1414
]
1515
high_precision_dtype: "torch.bfloat16"
16-
compile: "max-autotune"
16+
use_torch_compile: true
17+
torch_compile_mode: "max-autotune"
1718
device: "cuda" # Change this to "cuda", "mps", "xpu", or "cpu" as needed
1819
model_type: "linear"
1920
sparsity: "2:4"

benchmarks/microbenchmarks/test/test_benchmark_inference.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ class TestBenchmarkInference(unittest.TestCase):
88
def setUp(self):
99
self.params = {
1010
"high_precision_dtype": "torch.float32", # Use float32 for testing
11-
"compile": False,
11+
"use_torch_compile": False,
1212
"device": "cpu", # Use CPU for testing
1313
"model_type": "linear",
1414
}
@@ -23,13 +23,9 @@ def setUp(self):
2323
def test_run_inference(self):
2424
result = run(self.config)
2525

26-
# Check result contains all config attributes
27-
for key in self.config.to_dict():
28-
self.assertIn(key, result)
29-
3026
# Check benchmark result is present and reasonable
31-
self.assertIn("benchmark_model_inference_in_microseconds", result)
32-
self.assertGreater(result["benchmark_model_inference_in_microseconds"], 0)
27+
self.assertTrue(hasattr(result, "model_inference_time_in_ms"))
28+
self.assertGreater(result.model_inference_time_in_ms, 0)
3329

3430

3531
if __name__ == "__main__":

benchmarks/microbenchmarks/test/test_benchmark_runner.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from benchmarks.microbenchmarks.benchmark_runner import (
88
get_shapes_for_config,
99
load_benchmark_configs,
10-
run_benchmarks_from_config,
10+
run_inference_benchmarks_from_config,
1111
)
1212

1313

@@ -24,7 +24,7 @@ def setUp(self):
2424
}
2525
],
2626
"high_precision_dtype": "torch.float32",
27-
"compile": False,
27+
"use_torch_compile": False,
2828
"device": "cpu",
2929
"model_type": "linear",
3030
},
@@ -72,8 +72,8 @@ def test_load_benchmark_configs(self):
7272
self.assertEqual(configs[0].quantization, "baseline")
7373
self.assertEqual(configs[1].quantization, "int8wo")
7474

75-
def test_run_benchmarks_from_config(self):
76-
run_benchmarks_from_config(self.config_path)
75+
def test_run_inference_benchmarks_from_config(self):
76+
run_inference_benchmarks_from_config(self.config_path)
7777
results_file = os.path.join(self.config["output_dir"], "results.csv")
7878
self.assertTrue(os.path.exists(results_file))
7979

benchmarks/microbenchmarks/test/test_utils.py

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from benchmarks.microbenchmarks.utils import (
88
BenchmarkConfig,
9+
BenchmarkResult,
910
LNLinearSigmoid,
1011
ToyLinearModel,
1112
clean_caches,
@@ -18,8 +19,8 @@ class TestUtils(unittest.TestCase):
1819
def test_benchmark_config(self):
1920
params = {
2021
"high_precision_dtype": "torch.bfloat16",
21-
"compile": True,
22-
"compile_mode": "max-autotune",
22+
"use_torch_compile": True,
23+
"torch_compile_mode": "max-autotune",
2324
"device": "cuda",
2425
"model_type": "linear",
2526
}
@@ -36,8 +37,8 @@ def test_benchmark_config(self):
3637
self.assertEqual(config.k, 1024)
3738
self.assertEqual(config.n, 1024)
3839
self.assertEqual(config.high_precision_dtype, torch.bfloat16)
39-
self.assertEqual(config.compile, True)
40-
self.assertEqual(config.compile_mode, "max-autotune")
40+
self.assertEqual(config.use_torch_compile, True)
41+
self.assertEqual(config.torch_compile_mode, "max-autotune")
4142
self.assertEqual(config.device, "cuda")
4243
self.assertEqual(config.model_type, "linear")
4344
self.assertEqual(config.output_dir, "test_output")
@@ -88,20 +89,24 @@ def test_create_model_and_input(self):
8889

8990
def test_generate_results_csv(self):
9091
results = [
91-
{
92-
"quantization": "int8wo",
93-
"m": 1024,
94-
"k": 1024,
95-
"n": 1024,
96-
"time_us": 100.0,
97-
},
98-
{
99-
"quantization": "int4wo",
100-
"m": 1024,
101-
"k": 1024,
102-
"n": 1024,
103-
"time_us": 50.0,
104-
},
92+
BenchmarkResult(
93+
BenchmarkConfig(
94+
quantization="int8wo",
95+
params={},
96+
shape_name="custom",
97+
shape=[1024, 1024, 1024],
98+
output_dir="test_output",
99+
),
100+
),
101+
BenchmarkResult(
102+
BenchmarkConfig(
103+
quantization="int4wo",
104+
params={},
105+
shape_name="custom",
106+
shape=[1024, 1024, 1024],
107+
output_dir="test_output",
108+
),
109+
),
105110
]
106111

107112
with tempfile.TemporaryDirectory() as tmp_dir:

0 commit comments

Comments
 (0)