Skip to content

Qualcomm AI Engine Direct - Add QNN support for to_edge_transform_and_lower #9643

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions backends/qualcomm/_passes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,51 +4,51 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from .annotate_decomposed import AnnotateDecomposed
from .annotate_quant_attrs import AnnotateQuantAttrs
from .constant_i64_to_i32 import ConstantI64toI32
from .annotate_stack import AnnotateStack
from .annotate_unbind import AnnotateUnbind
from .convert_bmm_to_matmul import ConvertBmmToMatmul
from .convert_conv1d_to_conv2d import ConvertConv1dToConv2d
from .convert_to_linear import ConvertToLinear
from .decompose_any import DecomposeAny
from .decompose_einsum import DecomposeEinsum
from .decompose_expm1 import DecomposeExpM1
from .decompose_linalg_vector_norm import DecomposeLinalgVectorNorm
from .decompose_silu import DecomposeSilu
from .expand_broadcast_tensor_shape import ExpandBroadcastTensorShape
from .fixed_linear_keep_dim import FixedLinearKeepDim
from .fold_qdq import FoldQDQ
from .fuse_consecutive_transpose import FuseConsecutiveTranspose
from .i64_to_i32 import I64toI32
from .insert_io_qdq import InsertIOQDQ
from .insert_requantize import InsertRequantize
from .layout_transform import LayoutTransform
from .lift_constant_scalar_operands import LiftConstantScalarOperands
from .recompose_pixel_unshuffle import RecomposePixelUnshuffle
from .recompose_prelu import RecomposePReLU
from .recompose_rms_norm import RecomposeRmsNorm
from .reduce_dynamic_range import ReduceDynamicRange
from .remove_redundancy import RemoveRedundancy
from .replace_arange_args import ReplaceArangeArgs
from .replace_index_put_input import ReplaceIndexPutInput
from .replace_inf_values import ReplaceInfValues
from .tensor_i64_to_i32 import TensorI64toI32
from .tag_quant_io import TagQuantIO


__all__ = [
AnnotateDecomposed,
AnnotateQuantAttrs,
ConstantI64toI32,
AnnotateStack,
AnnotateUnbind,
ConvertBmmToMatmul,
ConvertConv1dToConv2d,
RecomposePReLU,
ConvertToLinear,
DecomposeAny,
DecomposeEinsum,
DecomposeExpM1,
DecomposeLinalgVectorNorm,
DecomposeSilu,
ExpandBroadcastTensorShape,
FixedLinearKeepDim,
FoldQDQ,
FuseConsecutiveTranspose,
I64toI32,
InsertIOQDQ,
InsertRequantize,
LayoutTransform,
Expand All @@ -60,5 +60,5 @@
ReplaceArangeArgs,
ReplaceIndexPutInput,
ReplaceInfValues,
TensorI64toI32,
TagQuantIO,
]
Original file line number Diff line number Diff line change
Expand Up @@ -8,31 +8,21 @@
from executorch.exir.pass_base import ExportPass, PassResult
from torch.fx.passes.utils.source_matcher_utils import get_source_partitions

from .utils import dq_ops, get_quant_attrs, q_ops
from .utils import get_quant_attrs, q_ops


class AnnotateDecomposed(ExportPass):
class AnnotateStack(ExportPass):
"""
Add "quant_attrs" to graph nodes' meta from the QDQ information
generated after quantization process.
"""

decomp_ops = [torch.ops.aten.stack.default, torch.ops.aten.unbind.int]
decomp_ops = [torch.ops.aten.unbind.int]

def __init__(self, edge_program: torch.export.ExportedProgram):
super(AnnotateDecomposed, self).__init__()
super(AnnotateStack, self).__init__()
self.edge_program = edge_program

def _annotate_unbind(self, graph_module: torch.fx.GraphModule):
partitions = get_source_partitions(graph_module.graph, [torch.unbind, "unbind"])
for _, src_partitions in partitions.items():
for src_partition in src_partitions:
if src_partition.input_nodes[0].target in dq_ops:
q_node = src_partition.input_nodes[0].args[0]
quant_attrs = get_quant_attrs(self.edge_program, q_node)
for n in src_partition.nodes:
n.meta[QCOM_QUANT_ATTRS] = quant_attrs.copy()

def _annotate_stack(self, graph_module: torch.fx.GraphModule):
partitions = get_source_partitions(graph_module.graph, [torch.stack, "stack"])
for _, src_partitions in partitions.items():
Expand All @@ -46,7 +36,6 @@ def _annotate_stack(self, graph_module: torch.fx.GraphModule):
n.meta[QCOM_QUANT_ATTRS] = quant_attrs.copy()

def call(self, graph_module: torch.fx.GraphModule):
self._annotate_unbind(graph_module)
self._annotate_stack(graph_module)
graph_module.recompile()
return PassResult(graph_module, True)
39 changes: 39 additions & 0 deletions backends/qualcomm/_passes/annotate_unbind.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import torch
from executorch.backends.qualcomm.utils.constants import QCOM_QUANT_ATTRS
from executorch.exir.pass_base import ExportPass, PassResult
from torch.fx.passes.utils.source_matcher_utils import get_source_partitions

from .utils import dq_ops, get_quant_attrs


class AnnotateUnbind(ExportPass):
"""
Add "quant_attrs" to graph nodes' meta from the QDQ information
generated after quantization process.
"""

decomp_ops = [torch.ops.aten.unbind.int]

def __init__(self, edge_program: torch.export.ExportedProgram):
super(AnnotateUnbind, self).__init__()
self.edge_program = edge_program

def _annotate_unbind(self, graph_module: torch.fx.GraphModule):
partitions = get_source_partitions(graph_module.graph, [torch.unbind, "unbind"])
for _, src_partitions in partitions.items():
for src_partition in src_partitions:
if src_partition.input_nodes[0].target in dq_ops:
q_node = src_partition.input_nodes[0].args[0]
quant_attrs = get_quant_attrs(self.edge_program, q_node)
for n in src_partition.nodes:
n.meta[QCOM_QUANT_ATTRS] = quant_attrs.copy()

def call(self, graph_module: torch.fx.GraphModule):
self._annotate_unbind(graph_module)
graph_module.recompile()
return PassResult(graph_module, True)
10 changes: 1 addition & 9 deletions backends/qualcomm/_passes/build_quant_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,25 +27,17 @@ def _make_spec(self, x):
return None

def _build(self, graph_module: torch.fx.GraphModule) -> torch.fx.GraphModule:
# forcely update delegate node's meta['spec'] to get correct output
# Forcedly update delegate node's meta['spec'] to get correct output
# tensor size in runtime
call_delegate = [
node
for node in graph_module.graph.nodes
if node.op == "call_function" and node.name == "executorch_call_delegate"
]
assert len(call_delegate) == 1
spec = []
for n in graph_module.graph.nodes:
if QCOM_QUANTIZED_IO in n.meta:
n.meta["val"] = n.meta["val"].to(dtype=n.meta[QCOM_QUANTIZED_IO])
if n.op == "call_function" and "getitem" in n.name:
fake_tensor = n.meta["val"]
if QCOM_QUANTIZED_IO in n.meta:
fake_tensor = fake_tensor.to(dtype=n.meta[QCOM_QUANTIZED_IO])
spec.append(self._make_spec(fake_tensor))

call_delegate[0].meta["spec"] = tuple(spec)

def call(self, graph_module: torch.fx.GraphModule):
self._build(graph_module)
Expand Down
81 changes: 0 additions & 81 deletions backends/qualcomm/_passes/constant_i64_to_i32.py

This file was deleted.

Loading
Loading