Skip to content

Commit

Permalink
Merge pull request AutoGPTQ#140 from geekinglcq/fix_issue95
Browse files Browse the repository at this point in the history
fix weights not transpose for Conv1D/2D in qlinear_cuda_old
  • Loading branch information
PanQiWei authored Jun 6, 2023
2 parents bf521cb + 618a5f5 commit 2ea2329
Showing 1 changed file with 7 additions and 1 deletion.
8 changes: 7 additions & 1 deletion auto_gptq/nn_modules/qlinear/qlinear_cuda_old.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@ def __init__(
self.trainable = trainable

def pack(self, linear, scales, zeros, g_idx):
W = linear.weight.data.clone()
if isinstance(linear, nn.Conv2d):
W = W.flatten(1)
if isinstance(linear, transformers.pytorch_utils.Conv1D):
W = W.t()

scales = scales.t().contiguous()
zeros = zeros.t().contiguous()
scale_zeros = zeros * scales
Expand All @@ -101,7 +107,7 @@ def pack(self, linear, scales, zeros, g_idx):
g_idx = idx // self.group_size
intweight.append(
torch.round(
(linear.weight.data[:, idx] + scale_zeros[g_idx]) / self.scales[g_idx]
(W[:, idx] + scale_zeros[g_idx]) / self.scales[g_idx]
).to(torch.int)[:, None]
)
intweight = torch.cat(intweight, dim=1)
Expand Down

0 comments on commit 2ea2329

Please sign in to comment.