pytorch · jerryzh168 · Apr 22, 2025 · Apr 22, 2025
diff --git a/torchao/quantization/qat/embedding.py b/torchao/quantization/qat/embedding.py
@@ -196,7 +196,7 @@ def convert(
         """
         self._convert_helper(model)
         return model
-
+    
     @staticmethod
     def quantize_weights(
         weight: torch.Tensor,
@@ -207,11 +207,12 @@ def quantize_weights(
         Helper function to quantize weights
         """
         (qmin, qmax) = _get_qmin_qmax(bit_width)
-        (s, zp) = get_group_qparams_symmetric(weight, bit_width, group_size)
+        (s, zp) = get_group_qparams_symmetric(
+            weight, bit_width, group_size
+        )
         from torchao._executorch_ops import (
             _quantized_decomposed_quantize_per_channel_group_wrapper,
         )
-
         q_weight = _quantized_decomposed_quantize_per_channel_group_wrapper(
             weight,
             s,
@@ -223,6 +224,7 @@ def quantize_weights(
         )
         return (q_weight, s, zp)
 
+
     def _convert_helper(self, module: torch.nn.Module):
         """
         Helper function to recursively swap `Int4WeightOnlyQATEmbedding`
@@ -253,9 +255,7 @@ def _convert_helper(self, module: torch.nn.Module):
                 )
                 setattr(module, name, quantized_embedding)
 
-                q_weight, s, zp = self.quantize_weights(
-                    child.weight, self.bit_width, group_size
-                )
+                q_weight, s, zp = self.quantize_weights(child.weight, self.bit_width, group_size)
                 # Load weights and qparams into quantized embedding
                 quantized_embedding.weight = q_weight
                 quantized_embedding.scale = s.to(scale_precision)

diff --git a/torchao/quantization/qat/linear.py b/torchao/quantization/qat/linear.py
@@ -197,7 +197,7 @@ def convert(
     ) -> torch.nn.Module:
         self._convert_qat_linear_8da4w(model)
         return model
-
+    
     @staticmethod
     def quantize_weights(
         weight: torch.Tensor,
@@ -209,7 +209,9 @@ def quantize_weights(
         # Load weights and qparams into quantized linear
         n_bit = 4
         (qmin, qmax) = _get_qmin_qmax(n_bit)
-        (s, zp) = get_group_qparams_symmetric(weight, n_bit, group_size)
+        (s, zp) = get_group_qparams_symmetric(
+            weight, n_bit, group_size
+        )
         from torchao._executorch_ops import (
             _quantized_decomposed_quantize_per_channel_group_wrapper,
         )
@@ -225,6 +227,7 @@ def quantize_weights(
         )
         return (q_weight, s, zp)
 
+
     def _convert_qat_linear_8da4w(self, module: torch.nn.Module):
         """
         Replace all `Int8DynActInt4WeightQATLinear` with `Int8DynActInt4WeightLinear`.
@@ -242,9 +245,7 @@ def _convert_qat_linear_8da4w(self, module: torch.nn.Module):
                 )
                 setattr(module, name, quantized_linear)
 
-                q_weight, scales, zeros = self.quantize_weights(
-                    child.weight, config.group_size
-                )
+                q_weight, scales, zeros = self.quantize_weights(child.weight, config.group_size)         
                 quantized_linear.weight = q_weight
                 quantized_linear.scales = scales
                 quantized_linear.zeros = zeros