88import torchcodec
99from torchvision .transforms import Resize
1010
11+ RESIZED_WIDTH = 256
12+ RESIZED_HEIGHT = 256
1113
12- def transfer_and_resize_frame (frame ):
14+
15+ def transfer_and_resize_frame (frame , resize_device_string ):
1316 # This should be a no-op if the frame is already on the GPU.
14- frame = frame .to ("cuda:0" )
15- frame = Resize ((256 , 256 ))(frame )
17+ frame = frame .to (resize_device_string )
18+ frame = Resize ((RESIZED_HEIGHT , RESIZED_WIDTH ))(frame )
1619 return frame
1720
1821
19- def decode_full_video (video_path , device_string , do_gpu_preproc ):
20- decoder = torchcodec .decoders .SimpleVideoDecoder (
21- video_path , device = torch .device (device_string )
22+ def decode_full_video (video_path , decode_device_string , resize_device_string ):
23+ # We use the core API instead of SimpleVideoDecoder because the core API
24+ # allows us to natively resize as part of the decode step.
25+ print (f"{ decode_device_string = } { resize_device_string = } " )
26+ decoder = torchcodec .decoders ._core .create_from_file (video_path )
27+ num_threads = None
28+ if "cuda" in decode_device_string :
29+ num_threads = 1
30+ width = None
31+ height = None
32+ if "native" in resize_device_string :
33+ width = RESIZED_WIDTH
34+ height = RESIZED_HEIGHT
35+ torchcodec .decoders ._core .add_video_stream (
36+ decoder ,
37+ stream_index = - 1 ,
38+ device_string = decode_device_string ,
39+ num_threads = num_threads ,
40+ width = width ,
41+ height = height ,
2242 )
43+
2344 start_time = time .time ()
2445 frame_count = 0
25- for frame in decoder :
26- # You can do a resize to simulate extra preproc work that happens
27- # on the GPU by uncommenting the following line:
28- if do_gpu_preproc :
29- frame = transfer_and_resize_frame (frame )
30- frame_count += 1
46+ while True :
47+ try :
48+ frame , * _ = torchcodec .decoders ._core .get_next_frame (decoder )
49+ if resize_device_string != "none" and "native" not in resize_device_string :
50+ frame = transfer_and_resize_frame (frame , resize_device_string )
51+
52+ frame_count += 1
53+ except Exception as e :
54+ print ("EXCEPTION" , e )
55+ break
56+
3157 end_time = time .time ()
3258 elapsed = end_time - start_time
3359 fps = frame_count / (end_time - start_time )
3460 print (
35- f"****** DECODED full video { device_string = } { frame_count = } { elapsed = } { fps = } "
61+ f"****** DECODED full video { decode_device_string = } { frame_count = } { elapsed = } { fps = } "
3662 )
3763 return frame_count , end_time - start_time
3864
@@ -45,6 +71,12 @@ def main():
4571 type = str ,
4672 help = "Comma-separated devices to test decoding on." ,
4773 )
74+ parser .add_argument (
75+ "--resize_devices" ,
76+ default = "cuda:0,cpu,native,none" ,
77+ type = str ,
78+ help = "Comma-separated devices to test preroc (resize) on. Use 'none' to specify no resize." ,
79+ )
4880 parser .add_argument (
4981 "--video" ,
5082 type = str ,
@@ -60,15 +92,6 @@ def main():
6092 "to measure the cold start time."
6193 ),
6294 )
63- parser .add_argument (
64- "--do_gpu_preproc" ,
65- action = argparse .BooleanOptionalAction ,
66- default = True ,
67- help = (
68- "Do a transfer to GPU and resize operation after the decode to "
69- "simulate a real-world transform."
70- ),
71- )
7295 args = parser .parse_args ()
7396 video_path = args .video
7497
@@ -78,29 +101,44 @@ def main():
78101 decode_full_video (video_path , device )
79102 return
80103
81- label = "Decode"
82- if args .do_gpu_preproc :
83- label += " + GPU Preproc"
84- label += " Time"
104+ resize_devices = args .resize_devices .split ("," )
105+ resize_devices = [d for d in resize_devices if d != "" ]
106+ if len (resize_devices ) == 0 :
107+ resize_devices .append ("none" )
108+
109+ label = "Decode+Resize Time"
85110
86111 results = []
87- for device in args .devices .split ("," ):
88- print ("device" , device )
89- t = benchmark .Timer (
90- stmt = "decode_full_video(video_path, device, do_gpu_preproc)" ,
91- globals = {
92- "device" : device ,
93- "video_path" : video_path ,
94- "decode_full_video" : decode_full_video ,
95- "do_gpu_preproc" : args .do_gpu_preproc ,
96- },
97- label = label ,
98- sub_label = f"video={ os .path .basename (video_path )} " ,
99- description = f"decode_device={ device } " ,
100- ).blocked_autorange ()
101- results .append (t )
112+ for decode_device_string in args .devices .split ("," ):
113+ for resize_device_string in resize_devices :
114+ decode_label = decode_device_string
115+ if "cuda" in decode_label :
116+ # Shorten "cuda:0" to "cuda"
117+ decode_label = "cuda"
118+ resize_label = resize_device_string
119+ if "cuda" in resize_device_string :
120+ # Shorten "cuda:0" to "cuda"
121+ resize_label = "cuda"
122+ print ("decode_device" , decode_device_string )
123+ print ("resize_device" , resize_device_string )
124+ t = benchmark .Timer (
125+ stmt = "decode_full_video(video_path, decode_device_string, resize_device_string)" ,
126+ globals = {
127+ "decode_device_string" : decode_device_string ,
128+ "video_path" : video_path ,
129+ "decode_full_video" : decode_full_video ,
130+ "resize_device_string" : resize_device_string ,
131+ },
132+ label = label ,
133+ sub_label = f"video={ os .path .basename (video_path )} " ,
134+ description = f"D={ decode_label } ,R={ resize_label } " ,
135+ ).blocked_autorange ()
136+ results .append (t )
102137 compare = benchmark .Compare (results )
103138 compare .print ()
139+ print ("Key: D=Decode, R=Resize" )
140+ print ("Native resize is done as part of the decode step" )
141+ print ("none resize means there is no resize step -- native or otherwise" )
104142
105143
106144if __name__ == "__main__" :
0 commit comments