Skip to content

Commit 1025e82

Browse files
ahmadsharif1facebook-github-bot
authored andcommitted
[torchcodec] Improve benchmark to test all combinations
Summary: Test all combinations of CPU+GPU for decode+preproc. Reviewed By: kaiyuey Differential Revision: D60692261 fbshipit-source-id: 791d7530fd9a3b10ee614c3744663ae7c95c057c
1 parent 0595e3c commit 1025e82

File tree

1 file changed

+79
-41
lines changed

1 file changed

+79
-41
lines changed

benchmarks/decoders/gpu_benchmark.py

Lines changed: 79 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -8,31 +8,57 @@
88
import torchcodec
99
from torchvision.transforms import Resize
1010

11+
RESIZED_WIDTH = 256
12+
RESIZED_HEIGHT = 256
1113

12-
def transfer_and_resize_frame(frame):
14+
15+
def transfer_and_resize_frame(frame, resize_device_string):
1316
# This should be a no-op if the frame is already on the GPU.
14-
frame = frame.to("cuda:0")
15-
frame = Resize((256, 256))(frame)
17+
frame = frame.to(resize_device_string)
18+
frame = Resize((RESIZED_HEIGHT, RESIZED_WIDTH))(frame)
1619
return frame
1720

1821

19-
def decode_full_video(video_path, device_string, do_gpu_preproc):
20-
decoder = torchcodec.decoders.SimpleVideoDecoder(
21-
video_path, device=torch.device(device_string)
22+
def decode_full_video(video_path, decode_device_string, resize_device_string):
23+
# We use the core API instead of SimpleVideoDecoder because the core API
24+
# allows us to natively resize as part of the decode step.
25+
print(f"{decode_device_string=} {resize_device_string=}")
26+
decoder = torchcodec.decoders._core.create_from_file(video_path)
27+
num_threads = None
28+
if "cuda" in decode_device_string:
29+
num_threads = 1
30+
width = None
31+
height = None
32+
if "native" in resize_device_string:
33+
width = RESIZED_WIDTH
34+
height = RESIZED_HEIGHT
35+
torchcodec.decoders._core.add_video_stream(
36+
decoder,
37+
stream_index=-1,
38+
device_string=decode_device_string,
39+
num_threads=num_threads,
40+
width=width,
41+
height=height,
2242
)
43+
2344
start_time = time.time()
2445
frame_count = 0
25-
for frame in decoder:
26-
# You can do a resize to simulate extra preproc work that happens
27-
# on the GPU by uncommenting the following line:
28-
if do_gpu_preproc:
29-
frame = transfer_and_resize_frame(frame)
30-
frame_count += 1
46+
while True:
47+
try:
48+
frame, *_ = torchcodec.decoders._core.get_next_frame(decoder)
49+
if resize_device_string != "none" and "native" not in resize_device_string:
50+
frame = transfer_and_resize_frame(frame, resize_device_string)
51+
52+
frame_count += 1
53+
except Exception as e:
54+
print("EXCEPTION", e)
55+
break
56+
3157
end_time = time.time()
3258
elapsed = end_time - start_time
3359
fps = frame_count / (end_time - start_time)
3460
print(
35-
f"****** DECODED full video {device_string=} {frame_count=} {elapsed=} {fps=}"
61+
f"****** DECODED full video {decode_device_string=} {frame_count=} {elapsed=} {fps=}"
3662
)
3763
return frame_count, end_time - start_time
3864

@@ -45,6 +71,12 @@ def main():
4571
type=str,
4672
help="Comma-separated devices to test decoding on.",
4773
)
74+
parser.add_argument(
75+
"--resize_devices",
76+
default="cuda:0,cpu,native,none",
77+
type=str,
78+
help="Comma-separated devices to test preroc (resize) on. Use 'none' to specify no resize.",
79+
)
4880
parser.add_argument(
4981
"--video",
5082
type=str,
@@ -60,15 +92,6 @@ def main():
6092
"to measure the cold start time."
6193
),
6294
)
63-
parser.add_argument(
64-
"--do_gpu_preproc",
65-
action=argparse.BooleanOptionalAction,
66-
default=True,
67-
help=(
68-
"Do a transfer to GPU and resize operation after the decode to "
69-
"simulate a real-world transform."
70-
),
71-
)
7295
args = parser.parse_args()
7396
video_path = args.video
7497

@@ -78,29 +101,44 @@ def main():
78101
decode_full_video(video_path, device)
79102
return
80103

81-
label = "Decode"
82-
if args.do_gpu_preproc:
83-
label += " + GPU Preproc"
84-
label += " Time"
104+
resize_devices = args.resize_devices.split(",")
105+
resize_devices = [d for d in resize_devices if d != ""]
106+
if len(resize_devices) == 0:
107+
resize_devices.append("none")
108+
109+
label = "Decode+Resize Time"
85110

86111
results = []
87-
for device in args.devices.split(","):
88-
print("device", device)
89-
t = benchmark.Timer(
90-
stmt="decode_full_video(video_path, device, do_gpu_preproc)",
91-
globals={
92-
"device": device,
93-
"video_path": video_path,
94-
"decode_full_video": decode_full_video,
95-
"do_gpu_preproc": args.do_gpu_preproc,
96-
},
97-
label=label,
98-
sub_label=f"video={os.path.basename(video_path)}",
99-
description=f"decode_device={device}",
100-
).blocked_autorange()
101-
results.append(t)
112+
for decode_device_string in args.devices.split(","):
113+
for resize_device_string in resize_devices:
114+
decode_label = decode_device_string
115+
if "cuda" in decode_label:
116+
# Shorten "cuda:0" to "cuda"
117+
decode_label = "cuda"
118+
resize_label = resize_device_string
119+
if "cuda" in resize_device_string:
120+
# Shorten "cuda:0" to "cuda"
121+
resize_label = "cuda"
122+
print("decode_device", decode_device_string)
123+
print("resize_device", resize_device_string)
124+
t = benchmark.Timer(
125+
stmt="decode_full_video(video_path, decode_device_string, resize_device_string)",
126+
globals={
127+
"decode_device_string": decode_device_string,
128+
"video_path": video_path,
129+
"decode_full_video": decode_full_video,
130+
"resize_device_string": resize_device_string,
131+
},
132+
label=label,
133+
sub_label=f"video={os.path.basename(video_path)}",
134+
description=f"D={decode_label},R={resize_label}",
135+
).blocked_autorange()
136+
results.append(t)
102137
compare = benchmark.Compare(results)
103138
compare.print()
139+
print("Key: D=Decode, R=Resize")
140+
print("Native resize is done as part of the decode step")
141+
print("none resize means there is no resize step -- native or otherwise")
104142

105143

106144
if __name__ == "__main__":

0 commit comments

Comments
 (0)