pytorch
diff --git a/‎backends/qualcomm/_passes/decompose_einsum.py
Lines changed: 3 additions & 0 deletions b/‎backends/qualcomm/_passes/decompose_einsum.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/qualcomm/_passes/decompose_linalg_vector_norm.py
Lines changed: 3 additions & 0 deletions b/‎backends/qualcomm/_passes/decompose_linalg_vector_norm.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/qualcomm/_passes/utils.py
Lines changed: 12 additions & 1 deletion b/‎backends/qualcomm/_passes/utils.py
Lines changed: 12 additions & 1 deletion
diff --git a/‎backends/qualcomm/quantizer/quantizer.py
Lines changed: 103 additions & 51 deletions b/‎backends/qualcomm/quantizer/quantizer.py
Lines changed: 103 additions & 51 deletions
diff --git a/‎backends/qualcomm/tests/models.py
Lines changed: 12 additions & 0 deletions b/‎backends/qualcomm/tests/models.py
Lines changed: 12 additions & 0 deletions
diff --git a/‎backends/qualcomm/tests/test_qnn_delegate.py
Lines changed: 26 additions & 8 deletions b/‎backends/qualcomm/tests/test_qnn_delegate.py
Lines changed: 26 additions & 8 deletions
@@ -8,6 +8,8 @@
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch.fx.experimental.proxy_tensor import make_fx
 
+from .utils import copy_nn_module_stack
+
 
 class DecomposeEinsum(ExportPass):
     """
@@ -36,6 +38,7 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
                         remap[f"arg1_{i+1}"] = arg
 
                     for decomposed_node in decomposed_module.graph.nodes:
+                        copy_nn_module_stack(node, decomposed_node)
                         # This is the arg[0] equation string, which is not required anymore after decomposition
                         if "arg0" in decomposed_node.name:
                             continue
 
@@ -8,6 +8,8 @@
 from executorch.exir import to_edge
 from executorch.exir.pass_base import ExportPass, PassResult
 
+from .utils import copy_nn_module_stack
+
 
 class LinalgVectorNorm(torch.nn.Module):
     def __init__(self, exp, dim, keepdim):
@@ -62,6 +64,7 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
                     remap = {"x": node.args[0]}
 
                     for decomposed_node in decomposed_module.graph.nodes:
+                        copy_nn_module_stack(node, decomposed_node)
                         # no need to copy existent 'output'
                         if decomposed_node.op == "output":
                             for user in node.users.copy():
 
@@ -6,7 +6,10 @@
 
 import torch
 from executorch.backends.qualcomm.builders.utils import get_parameter
-from executorch.backends.qualcomm.utils.constants import QCOM_ENCODING
+from executorch.backends.qualcomm.utils.constants import (
+    QCOM_ENCODING,
+    QCOM_NN_MODULE_STACK,
+)
 from executorch.exir.dialects._ops import ops as exir_ops
 from torch._subclasses import FakeTensor
 
@@ -107,6 +110,14 @@ def get_passes_dependency_for_capture_program():
     }
 
 
+def copy_nn_module_stack(src, target):
+    """
+    Copy meta["nn_module_stack"] from src node to target node if existing.
+    """
+    if value := src.meta.get(QCOM_NN_MODULE_STACK):
+        target.meta[QCOM_NN_MODULE_STACK] = value
+
+
 def is_float_tensor(node: torch.fx.Node) -> bool:
     if "val" not in node.meta or not isinstance(node.meta["val"], FakeTensor):
         return False
 
@@ -3,9 +3,11 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+import importlib
+from dataclasses import dataclass
 from enum import IntEnum, unique
 from functools import partial
-from typing import Callable, Optional, Sequence, Set
+from typing import Callable, Dict, Optional, Sequence, Set
 
 import torch
 from executorch.backends.qualcomm._passes import (
@@ -66,7 +68,7 @@ class QuantDtype(IntEnum):
     use_8a8w = 3
 
 
-quant_config_dict = {
+QUANT_CONFIG_DICT = {
     # PTQ
     (QuantDtype.use_16a16w, False): (
         get_16a16w_qnn_ptq_config,
@@ -112,18 +114,60 @@ class QuantDtype(IntEnum):
 }
 
 
+@dataclass
+class ModuleQConfig:
+    quant_dtype: QuantDtype = QuantDtype.use_8a8w
+    is_qat: bool = False
+    is_conv_per_channel: bool = False
+    is_linear_per_channel: bool = False
+    act_observer: Optional[
+        torch.ao.quantization.observer.UniformQuantizationObserverBase
+    ] = None
+
+    def __post_init__(self):
+        if (self.quant_dtype, self.is_qat) not in QUANT_CONFIG_DICT:
+            raise RuntimeError(
+                f"the quant config, (quant_dtype: {self.quant_dtype}, is_qat: {self.is_qat}) is not support"
+            )
+        quant_config_func, per_channel_quant_config_func = QUANT_CONFIG_DICT[
+            (self.quant_dtype, self.is_qat)
+        ]
+        self.quant_config = (
+            quant_config_func(act_observer=self.act_observer)
+            if self.act_observer
+            else quant_config_func()
+        )
+        self.per_channel_quant_config = (
+            per_channel_quant_config_func(act_observer=self.act_observer)
+            if self.act_observer
+            else per_channel_quant_config_func()
+        )
+        self.use_per_channel_weight_quant_ops = set()
+        if self.is_conv_per_channel:
+            self.use_per_channel_weight_quant_ops.update(
+                {
+                    torch.ops.aten.conv1d.default,
+                    torch.ops.aten.conv2d.default,
+                    torch.ops.aten.conv_transpose2d.input,
+                }
+            )
+        if self.is_linear_per_channel:
+            self.use_per_channel_weight_quant_ops.update(
+                {
+                    torch.ops.aten.linear.default,
+                }
+            )
+
+
 class QnnQuantizer(Quantizer):
     SUPPORTED_OPS: Set = set(OP_ANNOTATOR.keys())
 
     def __init__(self):
         super().__init__()
         self.quant_ops: Set[OpOverload] = self.SUPPORTED_OPS.copy()
 
-        self.is_qat = False
-        self.quant_dtype = QuantDtype.use_8a8w
-        self.quant_config: QuantizationConfig = get_8a8w_qnn_ptq_config()
-        self.per_channel_quant_config = get_ptq_per_channel_quant_config()
-        self.use_per_channel_weight_quant_ops: Set[OpOverload] = set()
+        self.default_quant_config = ModuleQConfig()
+        self.module_qconfig_dict: Dict[torch.nn.Module, ModuleQConfig] = {}
 
         self.custom_quant_annotations: Sequence[Callable] = []
         self.discard_nodes: Set[str] = set()
@@ -133,37 +177,55 @@ def _annotate(self, gm: GraphModule) -> None:
             if node.name in self.discard_nodes:
                 continue
 
-            quant_config = self._get_quant_config(node.target)
+            quant_config = self._get_quant_config(node)
             if quant_config:
                 OP_ANNOTATOR[node.target](node, quant_config)
 
     def _annotate_custom_annotation(self, gm: GraphModule) -> None:
         for annotation_func in self.custom_quant_annotations:
             annotation_func(gm)
 
-    def _get_quant_config(self, op: str | OpOverload) -> Optional[QuantizationConfig]:
+    def _get_submodule(self, node: torch.fx.Node):
+        """
+        An example of nn_module_stack
+        {
+            'L__self__': ('', 'executorch.backends.qualcomm.tests.models.SubModules'),
+            'L__self___add': ('add', 'executorch.backends.qualcomm.tests.models.Add')
+        }
+        """
+
+        nn_module_stack = node.meta.get("nn_module_stack")
+        if nn_module_stack:
+            module_source_str, module_str = list(nn_module_stack.values())[-1][
+                -1
+            ].rsplit(".", 1)
+            module_source = importlib.import_module(module_source_str)
+            return getattr(module_source, module_str)
+        return None
+
+    def _get_quant_config(self, node: torch.fx.Node) -> Optional[QuantizationConfig]:
         """
-        Priority:
-            1. is one of use_per_channel_weight_quant_ops
-            2. quant config
+        How to pick:
+            1. Choose specific submodule config if given.
+            2. Pick one if op belongs to use_per_channel_weight_quant_ops
+            3. If not 2, pick normal quant config
         """
+        op = node.target
         if isinstance(op, str):
             return
 
-        if op in self.use_per_channel_weight_quant_ops:
-            return self.per_channel_quant_config
+        config = self.module_qconfig_dict.get(
+            self._get_submodule(node), self.default_quant_config
+        )
+
+        if op in config.use_per_channel_weight_quant_ops:
+            return config.per_channel_quant_config
 
         if op in self.quant_ops:
-            return self.quant_config
+            return config.quant_config
 
         print(f"No quant config is implemented for op, {op}")
 
-    def _update_per_channel_weight_quant_ops(self, ops: Set[OpOverload], enable: bool):
-        if enable:
-            self.use_per_channel_weight_quant_ops.update(ops)
-        else:
-            self.use_per_channel_weight_quant_ops.difference_update(ops)
-
     def add_custom_quant_annotations(
         self, custom_quant_annotations: Sequence[Callable]
     ) -> None:
@@ -185,39 +247,29 @@ def annotate(self, model: GraphModule) -> GraphModule:
     def get_supported_ops(self) -> Set[OpOverload]:
         return self.SUPPORTED_OPS
 
-    def set_quant_config(
-        self, quant_dtype: QuantDtype, is_qat=False, act_observer=None
+    def set_default_quant_config(
+        self,
+        quant_dtype: QuantDtype,
+        is_qat=False,
+        is_conv_per_channel=False,
+        is_linear_per_channel=False,
+        act_observer=None,
     ) -> None:
-        self.quant_dtype = quant_dtype
-        self.is_qat = is_qat
-        if (quant_dtype, is_qat) not in quant_config_dict:
-            raise RuntimeError(
-                f"the quant config, (quant_dtype: {quant_dtype}, is_qat: {is_qat}) is not support"
-            )
-
-        quant_config_fuc, per_channel_quant_config_fuc = quant_config_dict[
-            (quant_dtype, is_qat)
-        ]
-        self.quant_config = (
-            quant_config_fuc(act_observer=act_observer)
-            if act_observer
-            else quant_config_fuc()
-        )
-        self.per_channel_quant_config = (
-            per_channel_quant_config_fuc(act_observer=act_observer)
-            if act_observer
-            else per_channel_quant_config_fuc()
+        self.default_quant_config = ModuleQConfig(
+            quant_dtype,
+            is_qat,
+            is_conv_per_channel,
+            is_linear_per_channel,
+            act_observer,
         )
 
-    def set_per_channel_conv_quant(self, enable: bool) -> None:
-        conv_ops = {torch.ops.aten.conv1d.default, torch.ops.aten.conv2d.default}
-        self._update_per_channel_weight_quant_ops(conv_ops, enable)
-
-    def set_per_channel_linear_quant(self, enable: bool) -> None:
-        linear_ops = {
-            torch.ops.aten.linear.default,
-        }
-        self._update_per_channel_weight_quant_ops(linear_ops, enable)
+    def set_submodule_quant_config(
+        self, submodule: torch.nn.Module, module_qconfig: ModuleQConfig
+    ) -> None:
+        """
+        Set the quant config specific for a submodule
+        """
+        self.module_qconfig_dict[submodule] = module_qconfig
 
     def transform_for_annotation(self, model: GraphModule) -> GraphModule:
         model = ReduceDynamicRange()(model).graph_module
 
@@ -1398,6 +1398,18 @@ def forward(self, x):
         return 10 - x
 
 
+class SimpleSubModules(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.add = Add()
+        self.sub = Sub()
+
+    def forward(self, a, b, c, d):
+        lhs = self.add(a, b)
+        rhs = self.sub(c, d)
+        return torch.mul(lhs, rhs)
+
+
 class SumIntList(torch.nn.Module):
     def __init__(self):
         super().__init__()
 
@@ -15,6 +15,7 @@
 import torch
 from executorch.backends.qualcomm.tests.utils import (
     generate_context_binary,
+    ModuleQConfig,
     QnnPartitioner,
     QnnQuantizer,
     QuantDtype,
@@ -1219,8 +1220,8 @@ def test_qnn_backend_element_wise_add(self):
             for module in comb[QCOM_MODULE]:
                 for sample_input in comb[QCOM_SAMPLE_INPUTS]:
                     with self.subTest(i=index):
-                        module = self.get_qdq_module(module, sample_input)
-                        self.lower_module_and_test_output(module, sample_input)
+                        gm = self.get_qdq_module(module, sample_input)
+                        self.lower_module_and_test_output(gm, sample_input)
                         index += 1
 
     def test_qnn_backend_element_wise_ceil(self):
@@ -1251,8 +1252,8 @@ def test_qnn_backend_element_wise_div(self):
             for module in comb[QCOM_MODULE]:
                 for sample_input in comb[QCOM_SAMPLE_INPUTS]:
                     with self.subTest(i=index):
-                        module = self.get_qdq_module(module, sample_input)
-                        self.lower_module_and_test_output(module, sample_input)
+                        gm = self.get_qdq_module(module, sample_input)
+                        self.lower_module_and_test_output(gm, sample_input)
                         index += 1
 
     def test_qnn_backend_element_wise_mul(self):
@@ -1279,8 +1280,8 @@ def test_qnn_backend_element_wise_mul(self):
             for module in comb[QCOM_MODULE]:
                 for sample_input in comb[QCOM_SAMPLE_INPUTS]:
                     with self.subTest(i=index):
-                        module = self.get_qdq_module(module, sample_input)
-                        self.lower_module_and_test_output(module, sample_input)
+                        gm = self.get_qdq_module(module, sample_input)
+                        self.lower_module_and_test_output(gm, sample_input)
                         index += 1
 
     def test_qnn_backend_element_wise_or(self):
@@ -1339,8 +1340,8 @@ def test_qnn_backend_element_wise_sub(self):
             for module in comb[QCOM_MODULE]:
                 for sample_input in comb[QCOM_SAMPLE_INPUTS]:
                     with self.subTest(i=index):
-                        module = self.get_qdq_module(module, sample_input)
-                        self.lower_module_and_test_output(module, sample_input)
+                        gm = self.get_qdq_module(module, sample_input)
+                        self.lower_module_and_test_output(gm, sample_input)
                         index += 1
 
     def test_qnn_backend_embedding(self):
@@ -1985,6 +1986,23 @@ def test_qnn_backend_simple_model(self):
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)
 
+    def test_qnn_backend_submodules(self):
+        module = SimpleSubModules()  # noqa: F405
+        sample_input = (
+            torch.rand(1, 3, 8, 8),
+            torch.rand(1, 3, 8, 8),
+            torch.rand(1, 3, 8, 8),
+            torch.rand(1, 3, 8, 8),
+        )
+
+        submodule_quant_config = {
+            Add: ModuleQConfig(QuantDtype.use_16a16w)  # noqa: F405
+        }
+        module = self.get_qdq_module(
+            module, sample_input, submodule_quant_config=submodule_quant_config
+        )
+        self.lower_module_and_test_output(module, sample_input)
+
     def test_qnn_backend_topk_and_index(self):
         module = TopKandIndex()  # noqa: F405
         sample_input = (torch.randn(3, 10),)