Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions vllm/model_executor/layers/quantization/awq_marlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class AWQMarlinConfig(QuantizationConfig):
def __init__(self, weight_bits: int, group_size: int, has_zp: bool,
lm_head_quantized: bool) -> None:
self.weight_bits = weight_bits
self.pack_factor = 32 // self.weight_bits # packed into int32
self.pack_factor = 32 // self.weight_bits # packed into 32bits
self.group_size = group_size
self.has_zp = has_zp
self.lm_head_quantized = lm_head_quantized
Expand Down Expand Up @@ -69,7 +69,8 @@ def from_config(cls, config: Dict[str, Any]) -> "AWQMarlinConfig":
def override_quantization_method(cls, hf_quant_cfg,
user_quant) -> Optional[str]:
can_convert = cls.is_awq_marlin_compatible(hf_quant_cfg)
is_valid_user_quant = (user_quant is None or user_quant == "marlin")
is_valid_user_quant = (user_quant is None or user_quant == "marlin"
or user_quant == "awq_marlin")

if can_convert and is_valid_user_quant:
msg = ("The model is convertible to {} during runtime."
Expand Down
3 changes: 2 additions & 1 deletion vllm/model_executor/layers/quantization/gptq_marlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ def override_quantization_method(cls, hf_quant_cfg,
user_quant) -> Optional[str]:
can_convert = cls.is_gptq_marlin_compatible(hf_quant_cfg)

is_valid_user_quant = (user_quant is None or user_quant == "marlin")
is_valid_user_quant = (user_quant is None or user_quant == "marlin"
or user_quant == "gptq_marlin")

if can_convert and is_valid_user_quant:
msg = ("The model is convertible to {} during runtime."
Expand Down