77import torchcodec
88from torchvision .transforms import Resize
99
10- RESIZED_WIDTH = 256
11- RESIZED_HEIGHT = 256
1210
13-
14- def transfer_and_resize_frame (frame , resize_device_string ):
11+ def transfer_and_resize_frame (frame ):
1512 # This should be a no-op if the frame is already on the GPU.
16- frame = frame .to (resize_device_string )
17- frame = Resize ((RESIZED_HEIGHT , RESIZED_WIDTH ))(frame )
13+ frame = frame .to ("cuda:0" )
14+ frame = Resize ((256 , 256 ))(frame )
1815 return frame
1916
2017
21- def decode_full_video (video_path , decode_device_string , resize_device_string ):
22- # We use the core API instead of SimpleVideoDecoder because the core API
23- # allows us to natively resize as part of the decode step.
24- print (f"{ decode_device_string = } { resize_device_string = } " )
25- decoder = torchcodec .decoders ._core .create_from_file (video_path )
26- num_threads = None
27- if "cuda" in decode_device_string :
28- num_threads = 1
29- width = None
30- height = None
31- if "native" in resize_device_string :
32- width = RESIZED_WIDTH
33- height = RESIZED_HEIGHT
34- torchcodec .decoders ._core .add_video_stream (
35- decoder ,
36- stream_index = - 1 ,
37- device_string = decode_device_string ,
38- num_threads = num_threads ,
39- width = width ,
40- height = height ,
18+ def decode_full_video (video_path , device_string , do_gpu_preproc ):
19+ decoder = torchcodec .decoders .SimpleVideoDecoder (
20+ video_path , device = torch .device (device_string )
4121 )
42-
4322 start_time = time .time ()
4423 frame_count = 0
45- while True :
46- try :
47- frame , * _ = torchcodec .decoders ._core .get_next_frame (decoder )
48- if resize_device_string != "none" and "native" not in resize_device_string :
49- frame = transfer_and_resize_frame (frame , resize_device_string )
50-
51- frame_count += 1
52- except Exception as e :
53- print ("EXCEPTION" , e )
54- break
55-
24+ for frame in decoder :
25+ # You can do a resize to simulate extra preproc work that happens
26+ # on the GPU by uncommenting the following line:
27+ if do_gpu_preproc :
28+ frame = transfer_and_resize_frame (frame )
29+ frame_count += 1
5630 end_time = time .time ()
5731 elapsed = end_time - start_time
5832 fps = frame_count / (end_time - start_time )
5933 print (
60- f"****** DECODED full video { decode_device_string = } { frame_count = } { elapsed = } { fps = } "
34+ f"****** DECODED full video { device_string = } { frame_count = } { elapsed = } { fps = } "
6135 )
6236 return frame_count , end_time - start_time
6337
@@ -70,12 +44,6 @@ def main():
7044 type = str ,
7145 help = "Comma-separated devices to test decoding on." ,
7246 )
73- parser .add_argument (
74- "--resize_devices" ,
75- default = "cuda:0,cpu,native,none" ,
76- type = str ,
77- help = "Comma-separated devices to test preroc (resize) on. Use 'none' to specify no resize." ,
78- )
7947 parser .add_argument (
8048 "--video" ,
8149 type = str ,
@@ -91,6 +59,15 @@ def main():
9159 "to measure the cold start time."
9260 ),
9361 )
62+ parser .add_argument (
63+ "--do_gpu_preproc" ,
64+ action = argparse .BooleanOptionalAction ,
65+ default = True ,
66+ help = (
67+ "Do a transfer to GPU and resize operation after the decode to "
68+ "simulate a real-world transform."
69+ ),
70+ )
9471 args = parser .parse_args ()
9572 video_path = args .video
9673
@@ -100,44 +77,29 @@ def main():
10077 decode_full_video (video_path , device )
10178 return
10279
103- resize_devices = args .resize_devices .split ("," )
104- resize_devices = [d for d in resize_devices if d != "" ]
105- if len (resize_devices ) == 0 :
106- resize_devices .append ("none" )
107-
108- label = "Decode+Resize Time"
80+ label = "Decode"
81+ if args .do_gpu_preproc :
82+ label += " + GPU Preproc"
83+ label += " Time"
10984
11085 results = []
111- for decode_device_string in args .devices .split ("," ):
112- for resize_device_string in resize_devices :
113- decode_label = decode_device_string
114- if "cuda" in decode_label :
115- # Shorten "cuda:0" to "cuda"
116- decode_label = "cuda"
117- resize_label = resize_device_string
118- if "cuda" in resize_device_string :
119- # Shorten "cuda:0" to "cuda"
120- resize_label = "cuda"
121- print ("decode_device" , decode_device_string )
122- print ("resize_device" , resize_device_string )
123- t = benchmark .Timer (
124- stmt = "decode_full_video(video_path, decode_device_string, resize_device_string)" ,
125- globals = {
126- "decode_device_string" : decode_device_string ,
127- "video_path" : video_path ,
128- "decode_full_video" : decode_full_video ,
129- "resize_device_string" : resize_device_string ,
130- },
131- label = label ,
132- sub_label = f"video={ os .path .basename (video_path )} " ,
133- description = f"D={ decode_label } ,R={ resize_label } " ,
134- ).blocked_autorange ()
135- results .append (t )
86+ for device in args .devices .split ("," ):
87+ print ("device" , device )
88+ t = benchmark .Timer (
89+ stmt = "decode_full_video(video_path, device, do_gpu_preproc)" ,
90+ globals = {
91+ "device" : device ,
92+ "video_path" : video_path ,
93+ "decode_full_video" : decode_full_video ,
94+ "do_gpu_preproc" : args .do_gpu_preproc ,
95+ },
96+ label = label ,
97+ sub_label = f"video={ os .path .basename (video_path )} " ,
98+ description = f"decode_device={ device } " ,
99+ ).blocked_autorange ()
100+ results .append (t )
136101 compare = benchmark .Compare (results )
137102 compare .print ()
138- print ("Key: D=Decode, R=Resize" )
139- print ("Native resize is done as part of the decode step" )
140- print ("none resize means there is no resize step -- native or otherwise" )
141103
142104
143105if __name__ == "__main__" :
0 commit comments