Skip to content

Commit 903da49

Browse files
authored
fix awq padding issue (#1790)
Signed-off-by: Mengni Wang <mengni.wang@intel.com>
1 parent 4728fdc commit 903da49

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

neural_compressor/adaptor/ox_utils/weight_only.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,7 @@ def apply_awq_scale(model, weight_config, absorb_pairs, output_dicts, num_bits,
471471
scales = np.clip(np.power(inp_scale, ratio) / np.power(w_scale, (1 - ratio)), 1e-4, None)
472472
scales = scales / np.sqrt(np.max(scales) * np.min(scales))
473473
weight = weight.T * scales
474-
weight = pad_tensor(weight, group_size, (org_w_shape[0] + group_size - 1) // group_size).T
474+
weight = pad_tensor(weight.T, group_size, (org_w_shape[0] + group_size - 1) // group_size)
475475

476476
if (Version(ort.__version__) > ONNXRT1161_VERSION and num_bits == 4) or (
477477
Version(ort.__version__) >= ONNXRT116_VERSION and num_bits == 4 and group_size == 32
@@ -485,6 +485,7 @@ def apply_awq_scale(model, weight_config, absorb_pairs, output_dicts, num_bits,
485485
q_weight = qdq_tensor(weight, num_bits, group_size, scheme, "int") / np.expand_dims(scales, axis=-1)
486486

487487
q_weight = np.reshape(q_weight, (org_w_shape[1], -1))[:, : org_w_shape[0]]
488+
488489
out = np.matmul(inp, q_weight.T)
489490
loss += np.mean(np.power((org_out - out), 2))
490491

0 commit comments

Comments
 (0)