Skip to content

Commit 1f8ae37

Browse files
remi-oryonigozlan
authored andcommitted
Reduce the number of benchmark in the CI (huggingface#42008)
Changed how benchmark cfgs are chosen
1 parent 6a5d5ce commit 1f8ae37

File tree

3 files changed

+88
-120
lines changed

3 files changed

+88
-120
lines changed

.github/workflows/benchmark.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ jobs:
5252
commit_id=$GITHUB_SHA
5353
fi
5454
commit_msg=$(git show -s --format=%s | cut -c1-70)
55-
python3 benchmark_v2/run_benchmarks.py -b 32 -s 128 -n 256 --cross-generate --branch-name "$BRANCH_NAME" --commit-id "$commit_id" --commit-message "$commit_msg" --model-id "$MODEL_ID" --log-level INFO --push-result-to-dataset "$DATASET_ID"
55+
python3 benchmark_v2/run_benchmarks.py -b 32 -s 128 -n 256 --level 2 --branch-name "$BRANCH_NAME" --commit-id "$commit_id" --commit-message "$commit_msg" --model-id "$MODEL_ID" --log-level INFO --push-result-to-dataset "$DATASET_ID"
5656
env:
5757
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
5858
PUSH_TO_HUB_TOKEN: ${{ secrets.PUSH_TO_HUB_TOKEN }}

benchmark_v2/framework/benchmark_config.py

Lines changed: 63 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import hashlib
2+
import itertools
23
import json
34
import logging
45
from typing import Any
@@ -146,60 +147,68 @@ def from_dict(cls, data: dict[str, Any], skip_validity_check: bool = False) -> "
146147
)
147148

148149

149-
def cross_generate_configs(
150-
attn_impl_and_sdpa_backend: list[tuple[str, str | None]],
151-
compiled_mode: list[str | None],
152-
kernelized: list[bool],
153-
warmup_iterations: int = 5,
154-
measurement_iterations: int = 20,
155-
batch_size: int = 1,
156-
sequence_length: int = 128,
157-
num_tokens_to_generate: int = 128,
158-
gpu_monitoring: bool = True,
150+
def adapt_configs(
151+
configs: list[BenchmarkConfig],
152+
warmup_iterations: int | list[int] = 5,
153+
measurement_iterations: int | list[int] = 20,
154+
batch_size: int | list[int] = 1,
155+
sequence_length: int | list[int] = 128,
156+
num_tokens_to_generate: int | list[int] = 128,
157+
gpu_monitoring: bool | list[bool] = True,
159158
) -> list[BenchmarkConfig]:
160-
# Create kwargs common to all configs
161-
kwargs = {
162-
"warmup_iterations": warmup_iterations,
163-
"measurement_iterations": measurement_iterations,
164-
"batch_size": batch_size,
165-
"sequence_length": sequence_length,
166-
"num_tokens_to_generate": num_tokens_to_generate,
167-
"gpu_monitoring": gpu_monitoring,
168-
}
169-
# Cross-generate all combinations of attn_implementation, compiled_mode, and kernelized
159+
parameters = (
160+
x if isinstance(x, list) else [x]
161+
for x in [
162+
warmup_iterations,
163+
measurement_iterations,
164+
batch_size,
165+
sequence_length,
166+
num_tokens_to_generate,
167+
gpu_monitoring,
168+
]
169+
)
170+
iterator = itertools.product(*parameters)
171+
172+
adapted_configs = []
173+
for warmup_iters, measurement_iters, bs, seqlen, ntok, monitor in iterator:
174+
for config in configs:
175+
config = config.to_dict()
176+
config["warmup_iterations"] = warmup_iters
177+
config["measurement_iterations"] = measurement_iters
178+
config["batch_size"] = bs
179+
config["sequence_length"] = seqlen
180+
config["num_tokens_to_generate"] = ntok
181+
config["gpu_monitoring"] = monitor
182+
adapted_configs.append(BenchmarkConfig.from_dict(config))
183+
return adapted_configs
184+
185+
186+
def get_config_by_level(level: int) -> list[BenchmarkConfig]:
170187
configs = []
171-
for attn_implementation, sdpa_backend in list(dict.fromkeys(attn_impl_and_sdpa_backend)):
172-
for cm in list(dict.fromkeys(compiled_mode)):
173-
for kernelize_on in list(dict.fromkeys(kernelized)):
174-
config = BenchmarkConfig(
175-
attn_implementation=attn_implementation,
176-
sdpa_backend=sdpa_backend,
177-
compile_mode=cm,
178-
kernelize=kernelize_on,
179-
**kwargs,
180-
)
181-
configs.append(config)
188+
# Early return if level is greater than 3: we generate all combinations of configs, maybe even w/ all compile modes
189+
if level >= 3:
190+
for attn_implementation, sdpa_backend in BenchmarkConfig.all_attn_implementations:
191+
# Usually there is not much to gain by compiling with other modes, but we allow it for level 4
192+
compile_modes = BenchmarkConfig.all_compiled_modes if level >= 4 else [None, "default"]
193+
for cm in compile_modes:
194+
for kernelize_on in [False, KERNELIZATION_AVAILABLE]:
195+
configs.append(
196+
BenchmarkConfig(
197+
attn_implementation=attn_implementation,
198+
sdpa_backend=sdpa_backend,
199+
compile_mode=cm,
200+
kernelize=kernelize_on,
201+
)
202+
)
203+
return configs
204+
# Otherwise, we add the configs for the given level
205+
if level >= 0:
206+
configs.append(BenchmarkConfig(attn_implementation="flex_attention", compile_mode="default"))
207+
if level >= 1:
208+
configs.append(BenchmarkConfig(attn_implementation="flash_attention_2"))
209+
configs.append(BenchmarkConfig(attn_implementation="eager", compile_mode="default"))
210+
if level >= 2:
211+
configs.append(BenchmarkConfig(attn_implementation="sdpa", compile_mode="default"))
212+
configs.append(BenchmarkConfig(attn_implementation="flex_attention", compile_mode="default", kernelize=True))
213+
configs.append(BenchmarkConfig(attn_implementation="flash_attention_2", kernelize=True))
182214
return configs
183-
184-
185-
def generate_main_configs(
186-
warmup_iterations: int = 5,
187-
measurement_iterations: int = 20,
188-
batch_size: int = 1,
189-
sequence_length: int = 128,
190-
num_tokens_to_generate: int = 128,
191-
) -> list[BenchmarkConfig]:
192-
# Create kwargs common to all configs
193-
kwargs = {
194-
"warmup_iterations": warmup_iterations,
195-
"measurement_iterations": measurement_iterations,
196-
"batch_size": batch_size,
197-
"sequence_length": sequence_length,
198-
"num_tokens_to_generate": num_tokens_to_generate,
199-
}
200-
return [ # TODO: test max-autotune instead of default
201-
BenchmarkConfig(attn_implementation="flex_attention", compile_mode="default", gpu_monitoring=False, **kwargs),
202-
BenchmarkConfig(attn_implementation="flex_attention", compile_mode="default", gpu_monitoring=True, **kwargs),
203-
BenchmarkConfig(attn_implementation="eager", compile_mode="default", gpu_monitoring=True, **kwargs),
204-
BenchmarkConfig(attn_implementation="flash_attention_2", gpu_monitoring=True, **kwargs),
205-
]

benchmark_v2/run_benchmarks.py

Lines changed: 24 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,7 @@
2323
import sys
2424
import uuid
2525

26-
from framework.benchmark_config import (
27-
KERNELIZATION_AVAILABLE,
28-
BenchmarkConfig,
29-
cross_generate_configs,
30-
generate_main_configs,
31-
)
26+
from framework.benchmark_config import adapt_configs, get_config_by_level
3227
from framework.benchmark_runner import BenchmarkRunner
3328

3429

@@ -45,7 +40,14 @@
4540
parser.add_argument("--sequence-length", "-s", type=int, nargs="+", help="Sequence length")
4641
parser.add_argument("--num-tokens-to-generate", "-n", type=int, nargs="+", help="Number of tokens to generate")
4742

48-
parser.add_argument("--cross-generate", action="store_true", help="Cross-generate all combinations of configs")
43+
parser.add_argument(
44+
"--level",
45+
type=int,
46+
default=1,
47+
help="Level of coverage for the benchmark. 0: only the main config, 1: a few important configs, 2: a config for"
48+
" each attn implementation an option, 3: cross-generate all combinations of configs, 4: cross-generate all"
49+
" combinations of configs w/ all compile modes",
50+
)
4951
parser.add_argument("--num-tokens-to-profile", "-p", type=int, default=0, help="Number of tokens to profile")
5052

5153
parser.add_argument("--branch-name", type=str, help="Git branch name")
@@ -84,67 +86,24 @@
8486
"At least one of the arguments --batch-size, --sequence-length, or --num-tokens-to-generate is required"
8587
)
8688

87-
# If there is only one (batch_size, sequence_length, num_tokens_to_generate), we benchmark across configs
88-
elif len(args.batch_size) * len(args.sequence_length) * len(args.num_tokens_to_generate) == 1:
89-
if args.cross_generate:
90-
benchmark_configs = cross_generate_configs(
91-
attn_impl_and_sdpa_backend=BenchmarkConfig.all_attn_implementations,
92-
compiled_mode=[None, "default"], # usually there is not much to gain by compiling with other modes
93-
kernelized=[False, KERNELIZATION_AVAILABLE],
94-
warmup_iterations=args.warmup,
95-
measurement_iterations=args.iterations,
96-
batch_size=args.batch_size[0],
97-
sequence_length=args.sequence_length[0],
98-
num_tokens_to_generate=args.num_tokens_to_generate[0],
99-
gpu_monitoring=not args.no_gpu_monitoring,
100-
)
101-
else:
102-
benchmark_configs = generate_main_configs(
103-
warmup_iterations=args.warmup,
104-
measurement_iterations=args.iterations,
105-
batch_size=args.batch_size[0],
106-
sequence_length=args.sequence_length[0],
107-
num_tokens_to_generate=args.num_tokens_to_generate[0],
108-
)
109-
110-
# Otherwise, we benchmark across all combinations of dimensions
111-
else:
112-
main_config = generate_main_configs(
113-
warmup_iterations=args.warmup,
114-
measurement_iterations=args.iterations,
115-
batch_size=args.batch_size[0],
116-
sequence_length=args.sequence_length[0],
117-
num_tokens_to_generate=args.num_tokens_to_generate[0],
118-
)[0]
119-
benchmark_configs = []
120-
for num_tokens_to_generate in args.num_tokens_to_generate:
121-
for sequence_length in args.sequence_length:
122-
for batch_size in args.batch_size:
123-
cfg_dict = main_config.to_dict()
124-
cfg_dict["batch_size"] = batch_size
125-
cfg_dict["sequence_length"] = sequence_length
126-
cfg_dict["num_tokens_to_generate"] = num_tokens_to_generate
127-
cfg_dict.pop("name")
128-
benchmark_configs.append(BenchmarkConfig.from_dict(cfg_dict))
129-
130-
runner = BenchmarkRunner(
131-
logger,
132-
args.output_dir,
133-
args.branch_name,
134-
args.commit_id,
135-
args.commit_message,
89+
# Get the configs for the given coverage level
90+
configs = get_config_by_level(args.level)
91+
# Adapt the configs to the given arguments
92+
configs = adapt_configs(
93+
configs,
94+
args.warmup,
95+
args.iterations,
96+
args.batch_size,
97+
args.sequence_length,
98+
args.num_tokens_to_generate,
99+
not args.no_gpu_monitoring,
136100
)
101+
102+
runner = BenchmarkRunner(logger, args.output_dir, args.branch_name, args.commit_id, args.commit_message)
137103
timestamp, results = runner.run_benchmarks(
138-
args.model_id,
139-
benchmark_configs,
140-
args.num_tokens_to_profile,
141-
pretty_print_summary=True,
104+
args.model_id, configs, args.num_tokens_to_profile, pretty_print_summary=True
142105
)
143106

144107
dataset_id = args.push_result_to_dataset
145108
if dataset_id is not None and len(results) > 0:
146-
runner.push_results_to_hub(
147-
dataset_id,
148-
results,
149-
timestamp,
150-
)
109+
runner.push_results_to_hub(dataset_id, results, timestamp)

0 commit comments

Comments
 (0)