Skip to content

Commit 79506fc

Browse files
committed
Qualcomm AI Engine Direct - XR model enablement pipe_clean
summary - support linalg_vector_norm, instance_norm - expand coverage of quantization annotator - test cases - small refactor for _pass importing
1 parent 524ec78 commit 79506fc

21 files changed

+831
-105
lines changed

backends/qualcomm/_passes/__init__.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,54 @@
11
from .annotate_and_quant_scalar import AnnotateAndQuantScalar
22
from .annotate_decomposed import AnnotateDecomposed
33
from .annotate_quant_attrs import AnnotateQuantAttrs
4+
from .convert_binary_op_with_scalar import ConvertBinaryOpsWithScalar
45
from .convert_bmm_to_matmul import ConvertBmmToMatmul
56
from .convert_interpolate_with_upsample2d import ConvertInterpolateWithUpsample2D
67
from .convert_prelu import ConvertPReLU
78
from .convert_to_linear import ConvertToLinear
9+
from .decompose_any import DecomposeAny
10+
from .decompose_einsum import DecomposeEinsum
11+
from .decompose_linalg_vector_norm import DecomposeLinalgVectorNorm
12+
from .decompose_silu import DecomposeSilu
813
from .expand_broadcast_tensor_shape import ExpandBroadcastTensorShape
914
from .fold_qdq import FoldQDQ
15+
from .fuse_consecutive_transpose import FuseConsecutiveTranspose
1016
from .i64_to_i32 import I64toI32
17+
from .insert_io_qdq import InsertIOQDQ
18+
from .insert_requantize import InsertRequantize
1119
from .layout_transform import LayoutTransform
1220
from .recompose_pixel_unshuffle import RecomposePixelUnshuffle
1321
from .recompose_rms_norm import RecomposeRmsNorm
22+
from .reduce_dynamic_range import ReduceDynamicRange
1423
from .remove_redundancy import RemoveRedundancy
1524
from .replace_index_put_input import ReplaceIndexPutInput
25+
from .replace_inf_buffer import ReplaceInfBuffer
1626

1727

1828
__all__ = [
1929
AnnotateAndQuantScalar,
2030
AnnotateDecomposed,
2131
AnnotateQuantAttrs,
2232
ConvertBmmToMatmul,
33+
ConvertBinaryOpsWithScalar,
2334
ConvertInterpolateWithUpsample2D,
2435
ConvertPReLU,
2536
ConvertToLinear,
37+
DecomposeAny,
38+
DecomposeEinsum,
39+
DecomposeLinalgVectorNorm,
40+
DecomposeSilu,
2641
ExpandBroadcastTensorShape,
2742
FoldQDQ,
43+
FuseConsecutiveTranspose,
2844
I64toI32,
45+
InsertIOQDQ,
46+
InsertRequantize,
2947
LayoutTransform,
3048
RecomposePixelUnshuffle,
3149
RecomposeRmsNorm,
50+
ReduceDynamicRange,
3251
RemoveRedundancy,
3352
ReplaceIndexPutInput,
53+
ReplaceInfBuffer,
3454
]

backends/qualcomm/_passes/convert_to_linear.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ class ConvertToLinear(ExportPass):
3939
mm = exir_ops.edge.aten.mm.default
4040

4141
addmm_patterns = [
42+
{view_copy: 1, permute_copy: 1, addmm: 1},
4243
{view_copy: 2, permute_copy: 1, addmm: 1},
4344
{permute_copy: 1, addmm: 1},
4445
]
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
import torch
8+
from executorch.exir import to_edge
9+
from executorch.exir.pass_base import ExportPass, PassResult
10+
11+
12+
class Any(torch.nn.Module):
13+
def __init__(self, dim, keepdim):
14+
super().__init__()
15+
self.dim = tuple(dim) if isinstance(dim, list) else dim
16+
self.keepdim = keepdim
17+
18+
def forward(self, x):
19+
if self.dim is None:
20+
x = torch.flatten(x)
21+
self.dim = 0
22+
23+
x = x.to(torch.int32)
24+
x = torch.sum(x, dim=self.dim, keepdim=self.keepdim, dtype=torch.int32)
25+
return torch.not_equal(x, torch.zeros(1, dtype=torch.int32))
26+
27+
28+
class DecomposeAny(ExportPass):
29+
"""
30+
Decompose for math equivalent op.
31+
"""
32+
33+
def __init__(self, quantization_capture=False) -> None:
34+
super().__init__()
35+
self.quantization_capture = quantization_capture
36+
37+
def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
38+
graph = graph_module.graph
39+
for node in graph.nodes:
40+
if "any.dim" in str(node.target):
41+
dim = node.args[1] if len(node.args) > 1 else None
42+
keepdim = node.args[2] if len(node.args) > 2 else False
43+
model = Any(dim, keepdim)
44+
if self.quantization_capture:
45+
decomposed_module = torch.export.export(
46+
model, (node.args[0].meta["val"],)
47+
).module()
48+
else:
49+
edge_mgr = to_edge(
50+
torch.export.export(model, (node.args[0].meta["val"],))
51+
)
52+
decomposed_module = edge_mgr.exported_program()
53+
54+
with graph.inserting_before(node):
55+
# remap is used to map original node values to new node values,
56+
# which ensures that reference to nodes are correclty updated in the new graph
57+
remap = {"x": node.args[0]}
58+
59+
for decomposed_node in decomposed_module.graph.nodes:
60+
# no need to copy existent 'output'
61+
if decomposed_node.op == "output":
62+
for user in node.users.copy():
63+
# remap
64+
user.replace_input_with(
65+
node,
66+
remap[decomposed_node.args[0][0]],
67+
)
68+
# no need to copy existent placeholders
69+
elif decomposed_node.op == "placeholder":
70+
# replace node map from string to graph node
71+
remap[decomposed_node] = remap.pop(decomposed_node.name)
72+
else:
73+
remap[decomposed_node] = graph.node_copy(
74+
decomposed_node,
75+
arg_transform=lambda x, remap=remap: remap[x],
76+
)
77+
78+
graph.erase_node(node)
79+
80+
graph.eliminate_dead_code()
81+
graph_module.recompile()
82+
return PassResult(graph_module, True)
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
import torch
8+
from executorch.exir import to_edge
9+
from executorch.exir.pass_base import ExportPass, PassResult
10+
11+
12+
class LinalgVectorNorm(torch.nn.Module):
13+
def __init__(self, exp, dim, keepdim):
14+
super().__init__()
15+
self.exp = exp
16+
self.dim = tuple(dim) if dim is not None else None
17+
self.keepdim = keepdim
18+
19+
def forward(self, x):
20+
if self.dim is None:
21+
x = torch.flatten(x)
22+
self.dim = 0
23+
24+
x = torch.abs(x)
25+
x = torch.pow(x, self.exp)
26+
x = torch.sum(x, dim=self.dim, keepdim=self.keepdim)
27+
return torch.pow(x, 1.0 / self.exp)
28+
29+
30+
class DecomposeLinalgVectorNorm(ExportPass):
31+
"""
32+
Decompose for math equivalent op.
33+
"""
34+
35+
def __init__(self, quantization_capture=False) -> None:
36+
super().__init__()
37+
self.quantization_capture = quantization_capture
38+
39+
def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
40+
graph = graph_module.graph
41+
for node in graph.nodes:
42+
if "linalg_vector_norm" in str(node.target):
43+
ord = node.args[1] if len(node.args) > 1 else 2.0
44+
dim = node.args[2] if len(node.args) > 2 else None
45+
keepdim = node.args[3] if len(node.args) > 3 else False
46+
model = LinalgVectorNorm(ord, dim, keepdim)
47+
if self.quantization_capture:
48+
decomposed_module = torch.export.export(
49+
model, (node.args[0].meta["val"],)
50+
).module()
51+
else:
52+
edge_mgr = to_edge(
53+
torch.export.export(model, (node.args[0].meta["val"],))
54+
)
55+
decomposed_module = edge_mgr.exported_program()
56+
57+
with graph.inserting_before(node):
58+
# remap is used to map original node values to new node values,
59+
# which ensures that reference to nodes are correclty updated in the new graph
60+
remap = {"x": node.args[0]}
61+
62+
for decomposed_node in decomposed_module.graph.nodes:
63+
# no need to copy existent 'output'
64+
if decomposed_node.op == "output":
65+
for user in node.users.copy():
66+
# remap
67+
user.replace_input_with(
68+
node,
69+
remap[decomposed_node.args[0][0]],
70+
)
71+
# no need to copy existent placeholders
72+
elif decomposed_node.op == "placeholder":
73+
# replace node map from string to graph node
74+
remap[decomposed_node] = remap.pop(decomposed_node.name)
75+
else:
76+
remap[decomposed_node] = graph.node_copy(
77+
decomposed_node,
78+
arg_transform=lambda x, remap=remap: remap[x],
79+
)
80+
81+
graph.erase_node(node)
82+
83+
graph.eliminate_dead_code()
84+
graph_module.recompile()
85+
return PassResult(graph_module, True)

backends/qualcomm/_passes/layout_transform.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,10 @@ class LayoutTransform(ExportPass):
3333
exir_ops.edge.aten.adaptive_avg_pool2d.default,
3434
exir_ops.edge.aten.avg_pool2d.default,
3535
exir_ops.edge.aten.convolution.default,
36+
exir_ops.edge.aten.instance_norm.default,
3637
exir_ops.edge.aten.max_pool2d_with_indices.default,
3738
exir_ops.edge.aten._native_batch_norm_legit_no_training.default,
39+
exir_ops.edge.aten._native_batch_norm_legit.no_stats,
3840
exir_ops.edge.aten.native_group_norm.default,
3941
exir_ops.edge.aten.pixel_shuffle.default,
4042
exir_ops.edge.aten.pixel_unshuffle.default,
@@ -55,6 +57,7 @@ class LayoutTransform(ExportPass):
5557
exir_ops.edge.aten.eq.Scalar,
5658
exir_ops.edge.aten.eq.Tensor,
5759
exir_ops.edge.aten.full.default,
60+
exir_ops.edge.aten.full_like.default,
5861
exir_ops.edge.aten.ge.Scalar,
5962
exir_ops.edge.aten.ge.Tensor,
6063
exir_ops.edge.aten.gelu.default,
@@ -75,6 +78,8 @@ class LayoutTransform(ExportPass):
7578
exir_ops.edge.aten.mean.dim,
7679
exir_ops.edge.aten.minimum.default,
7780
exir_ops.edge.aten.mul.Tensor,
81+
exir_ops.edge.aten.ne.Scalar,
82+
exir_ops.edge.aten.ne.Tensor,
7883
exir_ops.edge.aten.neg.default,
7984
exir_ops.edge.aten.pow.Tensor_Scalar,
8085
exir_ops.edge.aten.prelu.default,

backends/qualcomm/_passes/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ def get_passes_dependency_for_capture_program():
6464
ConvertInterpolateWithUpsample2D,
6565
ConvertPReLU,
6666
ConvertToLinear,
67+
DecomposeAny,
68+
DecomposeLinalgVectorNorm,
6769
ExpandBroadcastTensorShape,
6870
FoldQDQ,
6971
I64toI32,
@@ -81,6 +83,8 @@ def get_passes_dependency_for_capture_program():
8183
ConvertPReLU: [RemoveRedundancy],
8284
ConvertBmmToMatmul: [ConvertToLinear],
8385
ConvertInterpolateWithUpsample2D: [RemoveRedundancy],
86+
DecomposeAny: [RemoveRedundancy],
87+
DecomposeLinalgVectorNorm: [RemoveRedundancy],
8488
I64toI32: [RemoveRedundancy],
8589
AnnotateQuantAttrs: [
8690
RecomposePixelUnshuffle,

backends/qualcomm/builders/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
op_embedding,
2626
op_eq,
2727
op_expand,
28+
op_full,
2829
op_full_like,
2930
op_ge,
3031
op_gelu,
@@ -35,6 +36,7 @@
3536
op_hardtanh,
3637
op_index,
3738
op_index_put,
39+
op_instance_norm,
3840
op_layer_norm,
3941
op_le,
4042
op_linear,
@@ -47,6 +49,7 @@
4749
op_mean_dim,
4850
op_min,
4951
op_mul,
52+
op_ne,
5053
op_neg,
5154
op_pad,
5255
op_pow,
@@ -99,6 +102,7 @@
99102
op_embedding,
100103
op_eq,
101104
op_expand,
105+
op_full,
102106
op_full_like,
103107
op_ge,
104108
op_gelu,
@@ -109,6 +113,7 @@
109113
op_hardsigmoid,
110114
op_index,
111115
op_index_put,
116+
op_instance_norm,
112117
op_layer_norm,
113118
op_le,
114119
op_linear,
@@ -122,6 +127,7 @@
122127
op_min,
123128
op_mul,
124129
op_neg,
130+
op_ne,
125131
op_pad,
126132
op_pow,
127133
op_prelu,

0 commit comments

Comments
 (0)