[FIX] allow auto_round lm_head quantization (#282)

* enable auto_round lm_head quantize * Update base.py --------- Co-authored-by: LRL-ModelCloud <lrl@modelcloud.ai> Co-authored-by: Qubitium-ModelCloud <qubitium@modelcloud.ai>
ModelCloud · Jul 23, 2024 · 015a76f · 015a76f
1 parent 9197738
commit 015a76f
Showing 1 changed file with 2 additions and 3 deletions.
diff --git a/gptqmodel/models/base.py b/gptqmodel/models/base.py
@@ -170,9 +170,8 @@ def quantize(
             logger.warning("According to the issue https://github.com/ModelCloud/GPTQModel/issues/278, transformers version 4.43.0 has broken batch_size. until the issue is resolved, hard set the batch_size to 1.")
             batch_size = 1
 
-        # TODO: lm_head quantization is yet ready but pending
-        if self.quantize_config.lm_head:
-            raise ValueError("lm_head quantization is currently inference only and not applicable for quantization. Please set `lm_head=False`.")
+        if self.quantize_config.lm_head and not isinstance(self.quantize_config, AutoRoundQuantizeConfig):
+            raise ValueError("`lm_head=True` quantization is only available with AutoRound quantizer. Please use `AutoRoundQuantizeConfig` instead of `QuantizeConfig` and set `lm_head=True` or set `lm_head=False`.")
 
         if len(calibration_dataset) == 0:
             raise ValueError("Calibration dataset must not be empty.")