inline methods

Qubitium · Qubitium · commit 70e4b0455965 · 2025-11-08T07:38:32.000Z
diff --git a/gptqmodel/nn_modules/qlinear/torch_fused_awq.py b/gptqmodel/nn_modules/qlinear/torch_fused_awq.py
@@ -72,10 +72,12 @@ def __init__(
             **kwargs,
         )
         if register_buffers:
-            qweight_shape = self.awq_qweight_shape()
+            # AWQ packs each input row into pack_factor-wide columns for int4 lanes.
+            pack_cols = max(1, self.out_features // self.pack_factor)
+            qweight_shape = (self.in_features, pack_cols)
             group_size = max(int(self.group_size), 1)
-            group_rows = self.awq_group_count()
-            pack_cols = qweight_shape[1]
+            # Each group holds group_size input rows; ceil ensures trailing rows are captured.
+            group_rows = max(1, math.ceil(self.in_features / group_size))
 
             self.register_buffer(
                 "qweight",
@@ -96,14 +98,6 @@ def __init__(
             else:
                 self.bias = None
 
-    def awq_qweight_shape(self):
-        pack_cols = max(1, self.out_features // self.pack_factor)
-        return self.in_features, pack_cols
-
-    def awq_group_count(self):
-        group_size = max(int(self.group_size), 1)
-        return max(1, math.ceil(self.in_features / group_size))
-
     def transform_cpu_awq(self, dtype):
         src_scales = self.scales
         if src_scales.dtype != torch.float16: