fix typo; add test

neuralmagic · Sep 30, 2024 · b54b633 · b54b633
1 parent 3d12554
commit b54b633
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 1 deletion.
diff --git a/tests/weight_loading/models-large.txt b/tests/weight_loading/models-large.txt
@@ -2,3 +2,4 @@ compressed-tensors, nm-testing/Mixtral-8x7B-Instruct-v0.1-W4A16-quantized, main
 compressed-tensors, nm-testing/Mixtral-8x7B-Instruct-v0.1-W4A16-channel-quantized, main
 compressed-tensors, nm-testing/Mixtral-8x7B-Instruct-v0.1-W8A16-quantized, main
 gptq_marlin, TheBloke/Mixtral-8x7B-v0.1-GPTQ, main
+awq_marlin, casperhansen/deepseek-coder-v2-instruct-awq, main
diff --git a/vllm/model_executor/layers/quantization/awq_marlin.py b/vllm/model_executor/layers/quantization/awq_marlin.py
@@ -293,7 +293,7 @@ def create_weights(self, layer: torch.nn.Module, num_experts: int,
             "is_transposed":
             True,
             "quant_method":
-            FusedMoeWeightScaleSupported.GROUP,
+            FusedMoeWeightScaleSupported.GROUP.value,
         })
 
         w13_qweight = Parameter(torch.empty(num_experts,