@@ -149,15 +149,16 @@ def quantize_model(model, inp):
149149# The PyTorch frontend has support for converting a quantized PyTorch model to
150150# an equivalent Relay module enriched with quantization-aware operators.
151151# We call this representation Relay QNN dialect.
152- #
152+ input_name = "input" # the input name can be be arbitrary for PyTorch frontend.
153+ input_shapes = [(input_name , (1 , 3 , 224 , 224 ))]
154+ mod , params = relay .frontend .from_pytorch (script_module , input_shapes )
155+
153156# You can print the output from the frontend to see how quantized models are
154157# represented.
155158#
156159# You would see operators specific to quantization such as
157160# qnn.quantize, qnn.dequantize, qnn.requantize, and qnn.conv2d etc.
158- input_name = "input" # the input name can be be arbitrary for PyTorch frontend.
159- input_shapes = [(input_name , (1 , 3 , 224 , 224 ))]
160- mod , params = relay .frontend .from_pytorch (script_module , input_shapes )
161+ #
161162# print(mod)
162163
163164##############################################################################
@@ -178,16 +179,15 @@ def quantize_model(model, inp):
178179pt_top3_labels = np .argsort (pt_result [0 ])[::- 1 ][:3 ]
179180tvm_top3_labels = np .argsort (tvm_result [0 ])[::- 1 ][:3 ]
180181
181- print ("PyTorch top3 label :" , [synset [label ] for label in pt_top3_labels ])
182- print ("TVM top3 label :" , [synset [label ] for label in tvm_top3_labels ])
182+ print ("PyTorch top3 labels :" , [synset [label ] for label in pt_top3_labels ])
183+ print ("TVM top3 labels :" , [synset [label ] for label in tvm_top3_labels ])
183184
184185###########################################################################################
185186# However, due to the difference in numerics, in general the raw floating point
186187# outputs are not expected to be identical. Here, we print how many floating point
187188# output values are identical out of 1000 outputs from mobilenet v2.
188189print ("%d in 1000 raw floating outputs identical." % np .sum (tvm_result [0 ] == pt_result [0 ]))
189190
190-
191191##########################################################################
192192# Measure performance
193193# -------------------------
@@ -197,7 +197,7 @@ def quantize_model(model, inp):
197197ftimer = rt_mod .module .time_evaluator ("run" , ctx , number = 1 ,
198198 repeat = n_repeat )
199199prof_res = np .array (ftimer ().results ) * 1e3
200- print ("Elapsed ms:" , np .mean (prof_res ))
200+ print ("Elapsed average ms:" , np .mean (prof_res ))
201201
202202######################################################################
203203# .. note::
@@ -216,7 +216,7 @@ def quantize_model(model, inp):
216216# not expected to be any faster than FP32 models. Without fast 8 bit instructions, TVM does
217217# quantized convolution in 16 bit, even if the model itself is 8 bit.
218218#
219- # For x86, the best performance can be acheived on CPUs with AVX512 instructions set.
219+ # For x86, the best performance can be achieved on CPUs with AVX512 instructions set.
220220# In this case, TVM utilizes the fastest available 8 bit instructions for the given target.
221221# This includes support for the VNNI 8 bit dot product instruction (CascadeLake or newer).
222222#
0 commit comments