Skip to content

Commit

Permalink
[FIX] allow auto_round lm_head quantization (#282)
Browse files Browse the repository at this point in the history
* enable auto_round lm_head quantize

* Update base.py

---------

Co-authored-by: LRL-ModelCloud <lrl@modelcloud.ai>
Co-authored-by: Qubitium-ModelCloud <qubitium@modelcloud.ai>
  • Loading branch information
3 people authored Jul 23, 2024
1 parent 9197738 commit 015a76f
Showing 1 changed file with 2 additions and 3 deletions.
5 changes: 2 additions & 3 deletions gptqmodel/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,8 @@ def quantize(
logger.warning("According to the issue https://github.com/ModelCloud/GPTQModel/issues/278, transformers version 4.43.0 has broken batch_size. until the issue is resolved, hard set the batch_size to 1.")
batch_size = 1

# TODO: lm_head quantization is yet ready but pending
if self.quantize_config.lm_head:
raise ValueError("lm_head quantization is currently inference only and not applicable for quantization. Please set `lm_head=False`.")
if self.quantize_config.lm_head and not isinstance(self.quantize_config, AutoRoundQuantizeConfig):
raise ValueError("`lm_head=True` quantization is only available with AutoRound quantizer. Please use `AutoRoundQuantizeConfig` instead of `QuantizeConfig` and set `lm_head=True` or set `lm_head=False`.")

if len(calibration_dataset) == 0:
raise ValueError("Calibration dataset must not be empty.")
Expand Down

0 comments on commit 015a76f

Please sign in to comment.