pytorch · jerryzh168 · Sep 24, 2024 · Sep 24, 2024
diff --git a/tutorials/quantize_vit/run_vit_b_quant.py b/tutorials/quantize_vit/run_vit_b_quant.py
@@ -36,6 +36,9 @@
 if not TORCH_VERSION_AT_LEAST_2_5:
     unwrap_tensor_subclass(model)
 
+# temporary workaround to recover the perf with quantized model under torch.compile
+torch.backends.mha.set_fastpath_enabled(False)
+
 model = torch.compile(model, mode='max-autotune')
 
 # Must run with no_grad when optimizing for inference