Skip to content

Commit

Permalink
[functorch] refactor code, add docstring (pytorch/functorch#908)
Browse files Browse the repository at this point in the history
* refactor code, add docstring

* lint

* lint

* lint

* lint
  • Loading branch information
yushangdi authored and bigfootjon committed Jul 21, 2022
1 parent f6dea84 commit 8445222
Show file tree
Hide file tree
Showing 2 changed files with 154 additions and 64 deletions.
73 changes: 10 additions & 63 deletions functorch/benchmarks/chrome_trace_parser.py
Original file line number Diff line number Diff line change
@@ -1,65 +1,29 @@
#!/usr/bin/env python
import argparse
import json

import os
import logging
import pandas as pd

from functorch._src.benchmark_utils import compute_utilization

# process the chrome traces output by the pytorch profiler
# require the json input file's name to be in format {model_name}_chrome_trace_*.json
# the runtimes file should have format (model_name, time)

gpu_pids = []

def is_gpu_compute_event(event):
global gpu_pids
return "pid" in event and event["pid"] in gpu_pids and "ph" in event and event["ph"] == "X"

def get_events(filename):
f = open(filename)
data = json.load(f)
events = data["traceEvents"]
return events

def get_sorted_gpu_events(events):
sorted_gpu_events = []
for event in events:
if(not is_gpu_compute_event(event)):
continue
sorted_gpu_events.append(event)
return sorted(sorted_gpu_events, key=lambda x: x["ts"])
# the runtimes file should have format (model_name, runtime)

def get_sorted_gpu_mm_conv_events(events):
def is_mm_conv_event(event):
return "name" in event and ("gemm" in event["name"] or "conv" in event["name"]
or "cutlass" in event["name"] or "wgrad" in event["name"])
gpu_events = get_sorted_gpu_events(events)
sorted_events = []
for event in gpu_events:
if(not is_mm_conv_event(event)):
continue
sorted_events.append(event)
return sorted_events

def get_duration(sorted_gpu_events):
event = sorted_gpu_events[0]
current_end_time = event["ts"] + event["dur"]
total_duration = event["dur"]
for event in sorted_gpu_events[1:]:
start_time = max(event["ts"], current_end_time)
end_time = event["ts"] + event["dur"]
total_duration = total_duration + max(end_time - start_time, 0)
current_end_time = max(current_end_time, end_time)
return total_duration

def get_model_name(filename):
"""
Get model name from a file in format {model_name}_chrome_trace_*.json
"""
_, tail = os.path.split(filename)
modelname = tail[:tail.find("_chrome_trace")]
return modelname

def get_total_length(run_times_df, modelname):
return float(run_times_df[run_times_df["name"]==modelname]["runtime"])


def main():
parser = argparse.ArgumentParser()
group = parser.add_mutually_exclusive_group(required=True)
Expand All @@ -74,7 +38,6 @@ def main():
)
args = parser.parse_args()

run_times_df = pd.read_csv(args.runtime)

if args.filename:
filenames = args.filename
Expand All @@ -90,32 +53,16 @@ def main():

print(f"modelname, GPU Utilization, MM and Conv time")

run_times_df = pd.read_csv(args.runtime)
for filename in filenames:
try:
events = get_events(filename)

# get pids of GPU events
global gpu_pids
for event in events:
if "name" not in event:
continue
if event["name"] == 'process_labels' and "GPU" in event["args"]["labels"]:
gpu_pids.append(event["pid"])

modelname = get_model_name(filename)
total_length = get_total_length(run_times_df, modelname) * 1e6

sorted_gpu_events = get_sorted_gpu_events(events)
utilization = get_duration(sorted_gpu_events) / total_length

sorted_gpu_mm_conv_events = get_sorted_gpu_mm_conv_events(events)
mm_conv_utilization = get_duration(sorted_gpu_mm_conv_events) / total_length

utilization, mm_conv_utilization = compute_utilization(filenames, total_length)
print(f"{modelname}, {utilization}, {mm_conv_utilization}")
except:
logging.exception(f"{filename}, ERROR")
print(f"{filename}, ERROR")


if __name__ == "__main__":
main()
145 changes: 144 additions & 1 deletion functorch/functorch/_src/benchmark_utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
import time
import os
import json

import torch
from torch.profiler import profile
from torch.profiler import profile, ProfilerActivity


def synchronize():
pass


class NullContext:
def __enter__(self):
pass

def __exit__(self, exc_type, exc_val, exc_tb):
pass


def dump_chrome_trace(f, input, trace_filename, optimize_ctx, activities, num_runs=1,
devices=None, kwargs_for_f=None, kwargs_for_profiler=None):
"""
Expand Down Expand Up @@ -55,3 +65,136 @@ def dump_chrome_trace(f, input, trace_filename, optimize_ctx, activities, num_ru
prof.export_chrome_trace(trace_filename)

return timing


def get_chrome_trace_events(filename):
f = open(filename)
data = json.load(f)
events = data["traceEvents"]
return events


def is_gpu_compute_event(event):
global gpu_pids
return "pid" in event and event["pid"] in gpu_pids and "ph" in event and event["ph"] == "X"


def get_sorted_gpu_events(events):
sorted_gpu_events = []
for event in events:
if(not is_gpu_compute_event(event)):
continue
sorted_gpu_events.append(event)
return sorted(sorted_gpu_events, key=lambda x: x["ts"])


def get_duration(sorted_gpu_events):
if len(sorted_gpu_events) == 0:
return 0
event = sorted_gpu_events[0]
current_end_time = event["ts"] + event["dur"]
total_duration = event["dur"]
for event in sorted_gpu_events[1:]:
start_time = max(event["ts"], current_end_time)
end_time = event["ts"] + event["dur"]
total_duration = total_duration + max(end_time - start_time, 0)
current_end_time = max(current_end_time, end_time)
return total_duration


def get_sorted_gpu_mm_conv_events(events):
def is_mm_conv_event(event):
return "name" in event and ("gemm" in event["name"] or "conv" in event["name"]
or "cutlass" in event["name"] or "wgrad" in event["name"])
gpu_events = get_sorted_gpu_events(events)
sorted_events = []
for event in gpu_events:
if(not is_mm_conv_event(event)):
continue
sorted_events.append(event)
return sorted_events


gpu_pids = []


def compute_utilization(filename: str, total_length: float):
"""
Process the chrome traces outputs by the pytorch profiler to compute GPU Utilization
and percent of times spent on matmal and convolution
Args:
filename(str): Name of chrome traces file produced by pytorch profiler
total_length(float): total length of the process without profiler in second
Return:
tuple: (GPU Utilization, percent of time spent on matmal and convolution)
"""
events = get_chrome_trace_events(filename)

# get pids of GPU events
global gpu_pids
gpu_pids = []
for event in events:
if "name" not in event:
continue
if event["name"] == 'process_labels' and "GPU" in event["args"]["labels"]:
gpu_pids.append(event["pid"])

total_length = total_length * 1e6
sorted_gpu_events = get_sorted_gpu_events(events)
utilization = get_duration(sorted_gpu_events) / total_length

sorted_gpu_mm_conv_events = get_sorted_gpu_mm_conv_events(events)
mm_conv_utilization = get_duration(sorted_gpu_mm_conv_events) / total_length

return utilization, mm_conv_utilization


def benchmark_utilization(f, input, trace_folder, optimize_ctx=None, trace_file_name="tmp_chrome_trace", num_runs=1):
"""
Benchmark the GPU Utilization and percent of time spent on matmal and convolution operations of
running f(input, **kwargs_for_f) with [optimize_ctx] [num_runs] times.
It will produce a chrome trace file in trace_folder/trace_file_name.json
Example:
```
def f(a):
return a.sum()
a = torch.rand(2**20, device="cuda")
utilization, mm_conv_utilization = benchmark_utilization(f, a, "tmp", trace_file_name = "tmp_chrome_trace")
```
Args:
f: function to benchmark
input: input to :attr:`f`
trace_folder: name of the folder to store the chrome trace
optimize_ctx: the context in which f will run
trace_file_name: name of the dumped chrome trace file, default to "tmp_chrome_trace"
num_runs: number of times to run f, excluding the warm-up runs, default to 1.
Return:
tuple: (GPU Utilization, percent of time spent on matmal and convolution)
"""
isExist = os.path.exists(trace_folder)
if not isExist:
os.makedirs(trace_folder)
print("create folder " + trace_folder)

if optimize_ctx is None:
optimize_ctx = NullContext()

chrome_trace_file_name = os.path.join(trace_folder, trace_file_name + ".json")
total_length = dump_chrome_trace(f, input, chrome_trace_file_name, optimize_ctx,
[ProfilerActivity.CUDA], num_runs=num_runs, devices="cuda")
utilization, mm_conv_utilization = compute_utilization(chrome_trace_file_name, total_length)

return utilization, mm_conv_utilization

0 comments on commit 8445222

Please sign in to comment.