Skip to content

Commit 4af687a

Browse files
authored
Revert "Qualcomm AI Engine Direct - Quantizer refine for qat (#6513)" (#6722)
1 parent 437168e commit 4af687a

File tree

13 files changed

+584
-790
lines changed

13 files changed

+584
-790
lines changed

backends/qualcomm/quantizer/custom_annotation.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@
66
from typing import Sequence
77

88
import torch
9-
from executorch.backends.qualcomm.quantizer.annotators import QUANT_ANNOTATION_KEY
109
from executorch.backends.qualcomm.quantizer.quantizer import (
1110
get_16a8w_qnn_ptq_config,
12-
get_8a8w_qnn_ptq_config,
11+
get_default_8bit_qnn_ptq_config,
1312
QuantizationConfig,
1413
)
14+
from executorch.backends.qualcomm.quantizer.utils import QUANT_ANNOTATION_KEY
1515
from executorch.exir.dialects._ops import ops as exir_ops
1616
from torch.ao.quantization.quantizer import (
1717
QuantizationAnnotation,
@@ -110,7 +110,7 @@ def annotate_matmul_input1(node: Node, quantization_config: QuantizationConfig):
110110
# Annotate 16a8w for matmul op to get better performance
111111
quantization_config_16a8w = get_16a8w_qnn_ptq_config()
112112
# Annotate 8a8w for second input of matmul until past_kv_cache
113-
quantization_config_8a8w = get_8a8w_qnn_ptq_config(act_symmetric=True)
113+
quantization_config_8a8w = get_default_8bit_qnn_ptq_config(act_symmetric=True)
114114
for node in gm.graph.nodes:
115115
if node.op == "call_function" and node.target == torch.ops.aten.matmul.default:
116116
if "nn_module_stack" in node.meta:

backends/qualcomm/quantizer/observers/per_channel_param_observer.py

Lines changed: 0 additions & 104 deletions
This file was deleted.

0 commit comments

Comments
 (0)