up

metascroy · metascroy · commit 8856c1e6ea66 · 2025-03-04T10:01:52.000-08:00
diff --git a/torchao/experimental/tests/test_int8_dynamic_activation_intx_weight.py b/torchao/experimental/tests/test_int8_dynamic_activation_intx_weight.py
@@ -10,58 +10,41 @@
 import unittest
 
 import torch
-from parameterized import param, parameterized
 from torch.testing import FileCheck
 
 from torchao.dtypes import PlainLayout
-from torchao.dtypes.affine_quantized_tensor import AffineQuantizedTensor
 from torchao.experimental.packed_linear_int8_dynamic_activation_intx_weight_layout import (
     PackedLinearInt8DynamicActivationIntxWeightLayout,
 )
 from torchao.experimental.q_dq_layout import QDQLayout
-from torchao.experimental.quant_api import int8_dynamic_activation_intx_weight
-from torchao.quantization.granularity import PerGroup, PerRow
-from torchao.quantization.linear_activation_quantized_tensor import (
-    LinearActivationQuantizedTensor,
+from torchao.experimental.quant_api import (
+    int8_dynamic_activation_intx_weight,
+)
+from torchao.quantization.granularity import (
+    PerGroup,
+    PerRow,
 )
 from torchao.quantization.quant_api import quantize_
 from torchao.utils import unwrap_tensor_subclass
 
 
-# def _truncate_weight_to_bf16(weight):
-#     assert isinstance(weight, AffineQuantizedTensor)
-#     assert isinstance(weight.tensor_impl.get_layout(), PlainLayout)
-#     assert weight.tensor_impl.scale.dtype == torch.float32
-#     print("BEFORE", weight.tensor_impl.scale[0][0])
-#     weight.tensor_impl.scale = weight.tensor_impl.scale.to(torch.bfloat16).to(
-#         torch.float32
-#     )
-#     print("AFTER", weight.tensor_impl.scale[0][0])
-#     print("CHANGE", weight.tensor_impl.scale.to(torch.bfloat16).to(torch.float32)[0][0])
-
-
-# def _truncate_model_to_bf16(model):
-#     for name, param in model.named_parameters():
-#         print(name, param.dtype)
-#         # print(param)
-#         if isinstance(param, LinearActivationQuantizedTensor):
-#             print("FOUND ONE")
-#             _truncate_weight_to_bf16(param.original_weight_tensor)
-
-
 class TestInt8DynamicActivationIntxWeight(unittest.TestCase):
-    TEST_ACCURACY_CASES = [
-        param(
-            layout=layout,
-            weight_dtype=weight_dtype,
-            has_weight_zeros=has_weight_zeros,
-            granularity=granularity,
-        )
-        for layout in [
+    def test_accuracy(self):
+        """
+        Checks the accuracy of different layouts by comparing the results to PlainLayout()
+        """
+        m = 1
+        n = 1071
+        k = 4096
+        activations = torch.randn(m, k)
+        model = torch.nn.Sequential(*[torch.nn.Linear(k, n, bias=False)])
+
+        reference_layout = PlainLayout()
+        test_layouts = [
             PackedLinearInt8DynamicActivationIntxWeightLayout(),
             QDQLayout(),
         ]
-        for weight_dtype in [
+        test_weight_dtypes = [
             torch.int1,
             torch.int2,
             torch.int3,
@@ -71,68 +54,37 @@ class TestInt8DynamicActivationIntxWeight(unittest.TestCase):
             torch.int7,
             torch.int8,
         ]
-        for has_weight_zeros in [
-            True,
-            False,
-        ]
-        for granularity in [
-            PerGroup(128),
-            PerRow(),
-        ]
-    ]
-
-    @parameterized.expand(
-        TEST_ACCURACY_CASES,
-        name_func=lambda f, _, params: f.__name__ + f"_{params.kwargs}",
-    )
-    def test_accuracy(self, layout, weight_dtype, has_weight_zeros, granularity):
-        """
-        Checks the accuracy of different layouts by comparing the results to PlainLayout()
-        """
-        m = 1
-        n = 1071
-        k = 4096
-        activations = torch.randn(m, k)
-        model = torch.nn.Sequential(*[torch.nn.Linear(k, n, bias=False)])
-
-        reference_layout = PlainLayout()
-        quantized_model = copy.deepcopy(model)
-        quantize_(
-            quantized_model,
-            int8_dynamic_activation_intx_weight(
-                weight_dtype=weight_dtype,
-                granularity=granularity,
-                has_weight_zeros=has_weight_zeros,
-                layout=layout,
-            ),
-        )
-
-        quantized_model_reference = copy.deepcopy(model)
-        quantize_(
-            quantized_model_reference,
-            int8_dynamic_activation_intx_weight(
-                weight_dtype=weight_dtype,
-                granularity=granularity,
-                has_weight_zeros=has_weight_zeros,
-                layout=reference_layout,
-            ),
-        )
-
-        with torch.no_grad():
-            result = quantized_model(activations)
-            expected_result = quantized_model_reference(activations)
-
-        if (
-            isinstance(layout, PackedLinearInt8DynamicActivationIntxWeightLayout)
-            and weight_dtype == torch.int4
-            and not has_weight_zeros
+        test_has_weight_zeros = [True, False]
+        test_granularities = [PerGroup(128), PerRow()]
+        for layout, weight_dtype, has_weight_zeros, granularity in itertools.product(
+            test_layouts, test_weight_dtypes, test_has_weight_zeros, test_granularities
         ):
-            # Use relaxed MSE accuracy criteria for KleidiAI kernels
-            self.assertTrue(
-                torch.nn.functional.mse_loss(result, expected_result) <= 1e-5
+            quantized_model = copy.deepcopy(model)
+            quantize_(
+                quantized_model,
+                int8_dynamic_activation_intx_weight(
+                    weight_dtype=weight_dtype,
+                    granularity=granularity,
+                    has_weight_zeros=has_weight_zeros,
+                    layout=layout,
+                ),
+            )
+
+            quantized_model_reference = copy.deepcopy(model)
+            quantize_(
+                quantized_model_reference,
+                int8_dynamic_activation_intx_weight(
+                    weight_dtype=weight_dtype,
+                    granularity=granularity,
+                    has_weight_zeros=has_weight_zeros,
+                    layout=reference_layout,
+                ),
             )
-        else:
-            self.assertTrue(torch.allclose(result, expected_result, atol=1e-5))
+
+            with torch.no_grad():
+                result = quantized_model(activations)
+                expected_result = quantized_model_reference(activations)
+            self.assertTrue(torch.allclose(result, expected_result, atol=1e-6))
 
     def test_export_compile_aoti_PackedLinearInt8DynamicActivationIntxWeightLayout(
         self,
@@ -232,7 +184,3 @@ def test_export_QDQLayout(self):
             FileCheck().check_count(line, 1, exactly=True).run(
                 exported.graph_module.code
             )
-
-
-if __name__ == "__main__":
-    unittest.main()