Skip to content

Commit 635eded

Browse files
authored
fix cuda CI backend issue, fixtypo (#974)
1 parent 9ecfaa5 commit 635eded

File tree

3 files changed

+33
-14
lines changed

3 files changed

+33
-14
lines changed

auto_round/export/export_to_autogptq/export.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -258,17 +258,17 @@ def save_quantized_as_autogptq(output_dir, inplace=True, backend="auto_gptq:exll
258258

259259
all_to_quantized = True
260260
modules_in_block_to_quantize = []
261-
if not dynamic: # Only uniform precision
262-
for block_names in all_blocks:
263-
first_block = get_module(model, block_names[0])
264-
for n, m in first_block.named_modules():
265-
if m.tmp_name not in layer_config:
266-
continue
267-
if not check_to_quantized(layer_config[m.tmp_name]):
268-
all_to_quantized = False
269-
else:
270-
modules_in_block_to_quantize.append(n)
271-
modules_in_block_to_quantize = [modules_in_block_to_quantize]
261+
# for backward compatibility
262+
for block_names in all_blocks:
263+
first_block = get_module(model, block_names[0])
264+
for n, m in first_block.named_modules():
265+
if m.tmp_name not in layer_config:
266+
continue
267+
if not check_to_quantized(layer_config[m.tmp_name]):
268+
all_to_quantized = False
269+
else:
270+
modules_in_block_to_quantize.append(n)
271+
modules_in_block_to_quantize = [modules_in_block_to_quantize]
272272

273273
if all_to_quantized:
274274
modules_in_block_to_quantize = None

auto_round/inference/backend.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,20 @@ def feature_multiply_checker_group_size(
132132
)
133133

134134

135+
def feature_compatible_multiply_checker(
136+
in_feature, out_feature, config, in_feature_multiplier, out_feature_multiplier=None
137+
):
138+
group_size = config["group_size"]
139+
if out_feature_multiplier is None:
140+
out_feature_multiplier = in_feature_multiplier
141+
compatible_flag = in_feature < group_size and (in_feature * out_feature) % group_size == 0
142+
return (
143+
in_feature % in_feature_multiplier == 0
144+
and out_feature % out_feature_multiplier == 0
145+
and (in_feature % group_size == 0 or compatible_flag)
146+
)
147+
148+
135149
def in_feature_checker_group_size(in_feature, out_feature, config):
136150
group_size = config["group_size"]
137151
return in_feature % group_size == 0
@@ -148,6 +162,9 @@ def in_feature_checker_group_size(in_feature, out_feature, config):
148162
exllamav2_feature_checker = functools.partial(
149163
feature_multiply_checker_group_size, in_feature_multiplier=32, out_feature_multiplier=32
150164
)
165+
compatible_exllamav2_feature_checker = functools.partial(
166+
feature_compatible_multiply_checker, in_feature_multiplier=32, out_feature_multiplier=32
167+
)
151168

152169
gptqmodel_marlin_feature_checker = functools.partial(
153170
feature_multiply_checker_group_size, in_feature_multiplier=1, out_feature_multiplier=64
@@ -185,9 +202,9 @@ def fp8_static_scheme_checker(
185202
act_bits=WOQ_DEFAULT_ACT_BITS,
186203
# 16, 384,768 accuracy issue
187204
group_size=[-1, 32, 64, 128, 256, 512, 1024, 2048],
188-
checkers=[exllamav2_feature_checker],
205+
checkers=[compatible_exllamav2_feature_checker],
189206
alias=["gptq", "auto_gptq", "exllamav2", "gptq:exllamav2", "auto_gptq:exllamav2"],
190-
requirements=["auto-gptq>=0.7.1"],
207+
requirements=["torch<2.6.0", "auto-gptq>=0.7.1"],
191208
)
192209

193210
BackendInfos["auto_gptq:tritonv2"] = BackendInfo(

auto_round/special_model_handler.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,9 @@ def _handle_moe_model(model, formats=None):
8585
parent = model.get_submodule(parent)
8686
setattr(parent, child, new_module)
8787

88-
logger.warning("Llama4 experts are converted, the quantized model can not run on transformers.")
88+
logger.warning(
89+
f"{model.config.model_type} experts are converted, the quantized model can not run on transformers."
90+
)
8991
return model
9092

9193

0 commit comments

Comments
 (0)