From 5c598cc7ed981644372627a58957889d6ace0b28 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 14 Aug 2024 08:53:17 +0200 Subject: [PATCH] Fixing the other pathways. --- .../layers/gptq/exllamav2.py | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/server/text_generation_server/layers/gptq/exllamav2.py b/server/text_generation_server/layers/gptq/exllamav2.py index 4bd2f63f452..920a6adf4b1 100644 --- a/server/text_generation_server/layers/gptq/exllamav2.py +++ b/server/text_generation_server/layers/gptq/exllamav2.py @@ -115,13 +115,14 @@ def ext_make_q_matrix( w.qweight, extra.q_perm, extra.q_invperm, - none_tensor, - none_tensor, - none_tensor, - none_tensor, + none_tensor, # q_scale + none_tensor, # q_scale_max + none_tensor, # q_groups + none_tensor, # q_group_map w.qzeros, w.scales, w.g_idx.cpu(), + none_tensor, # bias temp_dq, max_dq_rows, ) @@ -129,15 +130,16 @@ def ext_make_q_matrix( else: return make_q_matrix( w.qweight, - none_tensor, - none_tensor, - none_tensor, - none_tensor, - none_tensor, - none_tensor, + none_tensor, # q_perm + none_tensor, # q_invperm + none_tensor, # q_scale + none_tensor, # q_scale_max + none_tensor, # q_groups + none_tensor, # q_group_map w.qzeros, w.scales, - none_tensor, + none_tensor, # g_idx + none_tensor, # bias temp_dq, max_dq_rows, )