Revert "[torchcodec] Improve benchmark to test all combinations"

ahmadsharif1 · ahmadsharif1 · commit f938db893e20 · 2024-08-15T08:46:22.000-07:00
This reverts commit 1025e82.
diff --git a/benchmarks/decoders/gpu_benchmark.py b/benchmarks/decoders/gpu_benchmark.py
@@ -7,57 +7,31 @@
 import torchcodec
 from torchvision.transforms import Resize
 
-RESIZED_WIDTH = 256
-RESIZED_HEIGHT = 256
 
-
-def transfer_and_resize_frame(frame, resize_device_string):
+def transfer_and_resize_frame(frame):
     # This should be a no-op if the frame is already on the GPU.
-    frame = frame.to(resize_device_string)
-    frame = Resize((RESIZED_HEIGHT, RESIZED_WIDTH))(frame)
+    frame = frame.to("cuda:0")
+    frame = Resize((256, 256))(frame)
     return frame
 
 
-def decode_full_video(video_path, decode_device_string, resize_device_string):
-    # We use the core API instead of SimpleVideoDecoder because the core API
-    # allows us to natively resize as part of the decode step.
-    print(f"{decode_device_string=} {resize_device_string=}")
-    decoder = torchcodec.decoders._core.create_from_file(video_path)
-    num_threads = None
-    if "cuda" in decode_device_string:
-        num_threads = 1
-    width = None
-    height = None
-    if "native" in resize_device_string:
-        width = RESIZED_WIDTH
-        height = RESIZED_HEIGHT
-    torchcodec.decoders._core.add_video_stream(
-        decoder,
-        stream_index=-1,
-        device_string=decode_device_string,
-        num_threads=num_threads,
-        width=width,
-        height=height,
+def decode_full_video(video_path, device_string, do_gpu_preproc):
+    decoder = torchcodec.decoders.SimpleVideoDecoder(
+        video_path, device=torch.device(device_string)
     )
-
     start_time = time.time()
     frame_count = 0
-    while True:
-        try:
-            frame, *_ = torchcodec.decoders._core.get_next_frame(decoder)
-            if resize_device_string != "none" and "native" not in resize_device_string:
-                frame = transfer_and_resize_frame(frame, resize_device_string)
-
-            frame_count += 1
-        except Exception as e:
-            print("EXCEPTION", e)
-            break
-
+    for frame in decoder:
+        # You can do a resize to simulate extra preproc work that happens
+        # on the GPU by uncommenting the following line:
+        if do_gpu_preproc:
+            frame = transfer_and_resize_frame(frame)
+        frame_count += 1
     end_time = time.time()
     elapsed = end_time - start_time
     fps = frame_count / (end_time - start_time)
     print(
-        f"****** DECODED full video {decode_device_string=} {frame_count=} {elapsed=} {fps=}"
+        f"****** DECODED full video {device_string=} {frame_count=} {elapsed=} {fps=}"
     )
     return frame_count, end_time - start_time
 
@@ -70,12 +44,6 @@ def main():
         type=str,
         help="Comma-separated devices to test decoding on.",
     )
-    parser.add_argument(
-        "--resize_devices",
-        default="cuda:0,cpu,native,none",
-        type=str,
-        help="Comma-separated devices to test preroc (resize) on. Use 'none' to specify no resize.",
-    )
     parser.add_argument(
         "--video",
         type=str,
@@ -91,6 +59,15 @@ def main():
             "to measure the cold start time."
         ),
     )
+    parser.add_argument(
+        "--do_gpu_preproc",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+        help=(
+            "Do a transfer to GPU and resize operation after the decode to "
+            "simulate a real-world transform."
+        ),
+    )
     args = parser.parse_args()
     video_path = args.video
 
@@ -100,44 +77,29 @@ def main():
             decode_full_video(video_path, device)
         return
 
-    resize_devices = args.resize_devices.split(",")
-    resize_devices = [d for d in resize_devices if d != ""]
-    if len(resize_devices) == 0:
-        resize_devices.append("none")
-
-    label = "Decode+Resize Time"
+    label = "Decode"
+    if args.do_gpu_preproc:
+        label += " + GPU Preproc"
+    label += " Time"
 
     results = []
-    for decode_device_string in args.devices.split(","):
-        for resize_device_string in resize_devices:
-            decode_label = decode_device_string
-            if "cuda" in decode_label:
-                # Shorten "cuda:0" to "cuda"
-                decode_label = "cuda"
-            resize_label = resize_device_string
-            if "cuda" in resize_device_string:
-                # Shorten "cuda:0" to "cuda"
-                resize_label = "cuda"
-            print("decode_device", decode_device_string)
-            print("resize_device", resize_device_string)
-            t = benchmark.Timer(
-                stmt="decode_full_video(video_path, decode_device_string, resize_device_string)",
-                globals={
-                    "decode_device_string": decode_device_string,
-                    "video_path": video_path,
-                    "decode_full_video": decode_full_video,
-                    "resize_device_string": resize_device_string,
-                },
-                label=label,
-                sub_label=f"video={os.path.basename(video_path)}",
-                description=f"D={decode_label},R={resize_label}",
-            ).blocked_autorange()
-            results.append(t)
+    for device in args.devices.split(","):
+        print("device", device)
+        t = benchmark.Timer(
+            stmt="decode_full_video(video_path, device, do_gpu_preproc)",
+            globals={
+                "device": device,
+                "video_path": video_path,
+                "decode_full_video": decode_full_video,
+                "do_gpu_preproc": args.do_gpu_preproc,
+            },
+            label=label,
+            sub_label=f"video={os.path.basename(video_path)}",
+            description=f"decode_device={device}",
+        ).blocked_autorange()
+        results.append(t)
     compare = benchmark.Compare(results)
     compare.print()
-    print("Key: D=Decode, R=Resize")
-    print("Native resize is done as part of the decode step")
-    print("none resize means there is no resize step -- native or otherwise")
 
 
 if __name__ == "__main__":