pytorch · cccclai · Apr 2, 2025 · Mar 12, 2025
@@ -4,51 +4,51 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from .annotate_decomposed import AnnotateDecomposed
 from .annotate_quant_attrs import AnnotateQuantAttrs
-from .constant_i64_to_i32 import ConstantI64toI32
+from .annotate_stack import AnnotateStack
+from .annotate_unbind import AnnotateUnbind
 from .convert_bmm_to_matmul import ConvertBmmToMatmul
 from .convert_conv1d_to_conv2d import ConvertConv1dToConv2d
-from .convert_to_linear import ConvertToLinear
 from .decompose_any import DecomposeAny
 from .decompose_einsum import DecomposeEinsum
 from .decompose_expm1 import DecomposeExpM1
 from .decompose_linalg_vector_norm import DecomposeLinalgVectorNorm
 from .decompose_silu import DecomposeSilu
 from .expand_broadcast_tensor_shape import ExpandBroadcastTensorShape
+from .fixed_linear_keep_dim import FixedLinearKeepDim
 from .fold_qdq import FoldQDQ
 from .fuse_consecutive_transpose import FuseConsecutiveTranspose
+from .i64_to_i32 import I64toI32
 from .insert_io_qdq import InsertIOQDQ
 from .insert_requantize import InsertRequantize
 from .layout_transform import LayoutTransform
 from .lift_constant_scalar_operands import LiftConstantScalarOperands
 from .recompose_pixel_unshuffle import RecomposePixelUnshuffle
-from .recompose_prelu import RecomposePReLU
 from .recompose_rms_norm import RecomposeRmsNorm
 from .reduce_dynamic_range import ReduceDynamicRange
 from .remove_redundancy import RemoveRedundancy
 from .replace_arange_args import ReplaceArangeArgs
 from .replace_index_put_input import ReplaceIndexPutInput
 from .replace_inf_values import ReplaceInfValues
-from .tensor_i64_to_i32 import TensorI64toI32
+from .tag_quant_io import TagQuantIO
 
 
 __all__ = [
-    AnnotateDecomposed,
     AnnotateQuantAttrs,
-    ConstantI64toI32,
+    AnnotateStack,
+    AnnotateUnbind,
     ConvertBmmToMatmul,
     ConvertConv1dToConv2d,
-    RecomposePReLU,
-    ConvertToLinear,
     DecomposeAny,
     DecomposeEinsum,
     DecomposeExpM1,
     DecomposeLinalgVectorNorm,
     DecomposeSilu,
     ExpandBroadcastTensorShape,
+    FixedLinearKeepDim,
     FoldQDQ,
     FuseConsecutiveTranspose,
+    I64toI32,
     InsertIOQDQ,
     InsertRequantize,
     LayoutTransform,
@@ -60,5 +60,5 @@
     ReplaceArangeArgs,
     ReplaceIndexPutInput,
     ReplaceInfValues,
-    TensorI64toI32,
+    TagQuantIO,
 ]
@@ -8,31 +8,21 @@
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch.fx.passes.utils.source_matcher_utils import get_source_partitions
 
-from .utils import dq_ops, get_quant_attrs, q_ops
+from .utils import get_quant_attrs, q_ops
 
 
-class AnnotateDecomposed(ExportPass):
+class AnnotateStack(ExportPass):
     """
     Add "quant_attrs" to graph nodes' meta from the QDQ information
     generated after quantization process.
     """
 
-    decomp_ops = [torch.ops.aten.stack.default, torch.ops.aten.unbind.int]
+    decomp_ops = [torch.ops.aten.unbind.int]
 
     def __init__(self, edge_program: torch.export.ExportedProgram):
-        super(AnnotateDecomposed, self).__init__()
+        super(AnnotateStack, self).__init__()
         self.edge_program = edge_program
 
-    def _annotate_unbind(self, graph_module: torch.fx.GraphModule):
-        partitions = get_source_partitions(graph_module.graph, [torch.unbind, "unbind"])
-        for _, src_partitions in partitions.items():
-            for src_partition in src_partitions:
-                if src_partition.input_nodes[0].target in dq_ops:
-                    q_node = src_partition.input_nodes[0].args[0]
-                    quant_attrs = get_quant_attrs(self.edge_program, q_node)
-                    for n in src_partition.nodes:
-                        n.meta[QCOM_QUANT_ATTRS] = quant_attrs.copy()
-
     def _annotate_stack(self, graph_module: torch.fx.GraphModule):
         partitions = get_source_partitions(graph_module.graph, [torch.stack, "stack"])
         for _, src_partitions in partitions.items():
@@ -46,7 +36,6 @@ def _annotate_stack(self, graph_module: torch.fx.GraphModule):
                         n.meta[QCOM_QUANT_ATTRS] = quant_attrs.copy()
 
     def call(self, graph_module: torch.fx.GraphModule):
-        self._annotate_unbind(graph_module)
         self._annotate_stack(graph_module)
         graph_module.recompile()
         return PassResult(graph_module, True)
@@ -0,0 +1,39 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+from executorch.backends.qualcomm.utils.constants import QCOM_QUANT_ATTRS
+from executorch.exir.pass_base import ExportPass, PassResult
+from torch.fx.passes.utils.source_matcher_utils import get_source_partitions
+
+from .utils import dq_ops, get_quant_attrs
+
+
+class AnnotateUnbind(ExportPass):
+    """
+    Add "quant_attrs" to graph nodes' meta from the QDQ information
+    generated after quantization process.
+    """
+
+    decomp_ops = [torch.ops.aten.unbind.int]
+
+    def __init__(self, edge_program: torch.export.ExportedProgram):
+        super(AnnotateUnbind, self).__init__()
+        self.edge_program = edge_program
+
+    def _annotate_unbind(self, graph_module: torch.fx.GraphModule):
+        partitions = get_source_partitions(graph_module.graph, [torch.unbind, "unbind"])
+        for _, src_partitions in partitions.items():
+            for src_partition in src_partitions:
+                if src_partition.input_nodes[0].target in dq_ops:
+                    q_node = src_partition.input_nodes[0].args[0]
+                    quant_attrs = get_quant_attrs(self.edge_program, q_node)
+                    for n in src_partition.nodes:
+                        n.meta[QCOM_QUANT_ATTRS] = quant_attrs.copy()
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        self._annotate_unbind(graph_module)
+        graph_module.recompile()
+        return PassResult(graph_module, True)
@@ -27,25 +27,17 @@ def _make_spec(self, x):
             return None
 
     def _build(self, graph_module: torch.fx.GraphModule) -> torch.fx.GraphModule:
-        # forcely update delegate node's meta['spec'] to get correct output
+        # Forcedly update delegate node's meta['spec'] to get correct output
         # tensor size in runtime
         call_delegate = [
             node
             for node in graph_module.graph.nodes
             if node.op == "call_function" and node.name == "executorch_call_delegate"
         ]
         assert len(call_delegate) == 1
-        spec = []
         for n in graph_module.graph.nodes:
             if QCOM_QUANTIZED_IO in n.meta:
                 n.meta["val"] = n.meta["val"].to(dtype=n.meta[QCOM_QUANTIZED_IO])
-            if n.op == "call_function" and "getitem" in n.name:
-                fake_tensor = n.meta["val"]
-                if QCOM_QUANTIZED_IO in n.meta:
-                    fake_tensor = fake_tensor.to(dtype=n.meta[QCOM_QUANTIZED_IO])
-                spec.append(self._make_spec(fake_tensor))
-
-        call_delegate[0].meta["spec"] = tuple(spec)
 
     def call(self, graph_module: torch.fx.GraphModule):
         self._build(graph_module)