Updates to use torchao's updated choose_qparams_affine and quantize/dequantize_affine (#11070)

jainapurva · facebook-github-bot · commit 6b3810f1ca57 · 2025-05-22T12:09:33.000-07:00
Summary: Pull Request resolved: #11070 Updates to use torchao's updated choose_qparams_affine and quantize/dequantize_affine without the zero_point_domain arg Differential Revision: D75228037
diff --git a/backends/xnnpack/utils/quant_utils.py b/backends/xnnpack/utils/quant_utils.py
@@ -222,9 +222,6 @@ def extract_qdq_affine_op_args_for_decomposed_ops(node: torch.fx.Node):
 
     # add target_dtype_node after quant_min/quant_max
     args.append(target_dtype)
-    # zero_point_domain
-    if len(node.args) > 7 and node.args[7] != "INT":
-        return None, None
 
     if is_per_channel_group(node):
         block_sizes = cast(list[int], node.args[1])
diff --git a/exir/passes/_quant_patterns_and_replacements.py b/exir/passes/_quant_patterns_and_replacements.py
@@ -1017,7 +1017,6 @@ def embedding_byte_dtype_pattern(
             torch.int8,
             -128,
             127,
-            "INT",
             output_dtype,
         )
         return torch.ops.aten.embedding.default(dq, indices)
@@ -1062,7 +1061,6 @@ def embedding_2bit_dtype_pattern(
             torch.int8,
             -2,
             1,
-            "INT",
             output_dtype,
         )
         return torch.ops.aten.embedding.default(dq, indices)
@@ -1110,7 +1108,6 @@ def embedding_4bit_dtype_pattern(
             torch.int8,
             -8,
             7,
-            "INT",
             output_dtype,
         )
         return torch.ops.aten.embedding.default(dq, indices)
diff --git a/third-party/ao b/third-party/ao
@@ -1 +1 @@
-Subproject commit 5e5db7176cb6a966b1f2a56eac86c2b83c8b189f
+Subproject commit 214e70476f0093e84983ec015d1c737183e2f2ca