Skip to content

Commit

Permalink
[TESTING] Remove the fast_flush parameter from do_bench (#4485)
Browse files Browse the repository at this point in the history
The parameter was introduced in
#840, and it looks like it
exists mainly to ease migration. In general there's no reason to use
fast_flush=False, so let's remove it.

---------

Co-authored-by: Keren Zhou <kerenzhou@openai.com>
  • Loading branch information
int3 and Jokeren authored Oct 7, 2024
1 parent c54f988 commit 2cc227d
Showing 1 changed file with 2 additions and 8 deletions.
10 changes: 2 additions & 8 deletions python/triton/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,7 @@ def do_bench_cudagraph(fn, rep=20, grad_to_none=None, quantiles=None, return_mod
return _summarize_statistics(torch.tensor(ret), quantiles, return_mode)


def do_bench(fn, warmup=25, rep=100, grad_to_none=None, quantiles=None, fast_flush=True, return_mode="mean",
device_type="cuda"):
def do_bench(fn, warmup=25, rep=100, grad_to_none=None, quantiles=None, return_mode="mean", device_type="cuda"):
"""
Benchmark the runtime of the provided function. By default, return the median runtime of :code:`fn` along with
the 20-th and 80-th performance percentile.
Expand All @@ -108,8 +107,6 @@ def do_bench(fn, warmup=25, rep=100, grad_to_none=None, quantiles=None, fast_flu
:type grad_to_none: torch.tensor, optional
:param quantiles: Performance percentile to return in addition to the median.
:type quantiles: list[float], optional
:param fast_flush: Use faster kernel to flush L2 cache between measurements
:type fast_flush: bool, default is True
:param return_mode: The statistical measure to return. Options are "min", "max", "mean", "median", or "all" Default is "mean". :type return_mode: str
"""
assert return_mode in ["min", "max", "mean", "median", "all"]
Expand All @@ -124,10 +121,7 @@ def do_bench(fn, warmup=25, rep=100, grad_to_none=None, quantiles=None, fast_flu
# before each kernel call to make sure that the L2 cache
# doesn't contain any input data before the run
cache_size = 256 * 1024 * 1024
if fast_flush:
cache = torch.empty(int(cache_size // 4), dtype=torch.int, device=device_type)
else:
cache = torch.empty(int(cache_size), dtype=torch.int8, device=device_type)
cache = torch.empty(int(cache_size // 4), dtype=torch.int, device='cuda')

# Estimate the runtime of the function
start_event = di.Event(enable_timing=True)
Expand Down

0 comments on commit 2cc227d

Please sign in to comment.