Skip to content

Commit 9e3c5c0

Browse files
committed
type and nit fix
1 parent af88314 commit 9e3c5c0

File tree

2 files changed

+10
-10
lines changed

2 files changed

+10
-10
lines changed

docs/dev/relay_pass_infra.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -612,7 +612,7 @@ sequential pass example could be like the following to enable IR dumping for
612612
seq = tvm.transform.Sequential([
613613
relay.transform.InferType(),
614614
relay.transform.FoldConstant(),
615-
relay.transform.PrintIR(),
615+
transform.PrintIR(),
616616
relay.transform.EliminateCommonSubexpr(),
617617
relay.transform.AlterOpLayout()
618618
])

tutorials/frontend/deploy_prequantized.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -149,15 +149,16 @@ def quantize_model(model, inp):
149149
# The PyTorch frontend has support for converting a quantized PyTorch model to
150150
# an equivalent Relay module enriched with quantization-aware operators.
151151
# We call this representation Relay QNN dialect.
152-
#
152+
input_name = "input" # the input name can be be arbitrary for PyTorch frontend.
153+
input_shapes = [(input_name, (1, 3, 224, 224))]
154+
mod, params = relay.frontend.from_pytorch(script_module, input_shapes)
155+
153156
# You can print the output from the frontend to see how quantized models are
154157
# represented.
155158
#
156159
# You would see operators specific to quantization such as
157160
# qnn.quantize, qnn.dequantize, qnn.requantize, and qnn.conv2d etc.
158-
input_name = "input" # the input name can be be arbitrary for PyTorch frontend.
159-
input_shapes = [(input_name, (1, 3, 224, 224))]
160-
mod, params = relay.frontend.from_pytorch(script_module, input_shapes)
161+
#
161162
# print(mod)
162163

163164
##############################################################################
@@ -178,16 +179,15 @@ def quantize_model(model, inp):
178179
pt_top3_labels = np.argsort(pt_result[0])[::-1][:3]
179180
tvm_top3_labels = np.argsort(tvm_result[0])[::-1][:3]
180181

181-
print("PyTorch top3 label:", [synset[label] for label in pt_top3_labels])
182-
print("TVM top3 label:", [synset[label] for label in tvm_top3_labels])
182+
print("PyTorch top3 labels:", [synset[label] for label in pt_top3_labels])
183+
print("TVM top3 labels:", [synset[label] for label in tvm_top3_labels])
183184

184185
###########################################################################################
185186
# However, due to the difference in numerics, in general the raw floating point
186187
# outputs are not expected to be identical. Here, we print how many floating point
187188
# output values are identical out of 1000 outputs from mobilenet v2.
188189
print("%d in 1000 raw floating outputs identical." % np.sum(tvm_result[0] == pt_result[0]))
189190

190-
191191
##########################################################################
192192
# Measure performance
193193
# -------------------------
@@ -197,7 +197,7 @@ def quantize_model(model, inp):
197197
ftimer = rt_mod.module.time_evaluator("run", ctx, number=1,
198198
repeat=n_repeat)
199199
prof_res = np.array(ftimer().results) * 1e3
200-
print("Elapsed ms:", np.mean(prof_res))
200+
print("Elapsed average ms:", np.mean(prof_res))
201201

202202
######################################################################
203203
# .. note::
@@ -216,7 +216,7 @@ def quantize_model(model, inp):
216216
# not expected to be any faster than FP32 models. Without fast 8 bit instructions, TVM does
217217
# quantized convolution in 16 bit, even if the model itself is 8 bit.
218218
#
219-
# For x86, the best performance can be acheived on CPUs with AVX512 instructions set.
219+
# For x86, the best performance can be achieved on CPUs with AVX512 instructions set.
220220
# In this case, TVM utilizes the fastest available 8 bit instructions for the given target.
221221
# This includes support for the VNNI 8 bit dot product instruction (CascadeLake or newer).
222222
#

0 commit comments

Comments
 (0)