-
Notifications
You must be signed in to change notification settings - Fork 127
Closed
Labels
bugSomething isn't workingSomething isn't working
Description
return func(*args, **kwargs)
File "/home/wenhuach/anaconda3/envs/autoround/lib/python3.10/site-packages/transformers/modeling_utils.py", line 5130, in from_pretrained
hf_quantizer.preprocess_model(
File "/home/wenhuach/anaconda3/envs/autoround/lib/python3.10/site-packages/transformers/quantizers/base.py", line 238, in preprocess_model
return self._process_model_before_weight_loading(model, **kwargs)
File "/home/wenhuach/anaconda3/envs/autoround/lib/python3.10/site-packages/transformers/quantizers/quantizer_auto_round.py", line 64, in _process_model_before_weight_loading
model, used_backends = convert_hf_model(model, target_device)
File "/home/wenhuach/auto-round-main/auto_round/inference/convert_model.py", line 590, in convert_hf_model
used_backends = _replace_by_quant_layers(model, layer_configs, backend, target_device, orig_backend)
File "/home/wenhuach/auto-round-main/auto_round/inference/convert_model.py", line 372, in _replace_by_quant_layers
new_layer = _create_quant_layer(layer, layer_backend, config, in_features, out_features)
File "/home/wenhuach/auto-round-main/auto_round/inference/convert_model.py", line 418, in _create_quant_layer
QuantLinear = dynamic_import_inference_linear(layer_backend, config["bits"], config["group_size"], config["sym"])
File "/home/wenhuach/auto-round-main/auto_round/inference/backend.py", line 486, in dynamic_import_inference_linear
return get_gptqmodel_infer_linear(backend, bits, group_size, sym)
File "/home/wenhuach/auto-round-main/auto_round/inference/backend.py", line 524, in get_gptqmodel_infer_linear
import gptqmodel # pylint: disable=E0401
File "/home/wenhuach/anaconda3/envs/autoround/lib/python3.10/site-packages/gptqmodel/__init__.py", line 19, in <module>
from .models import GPTQModel, get_best_device
File "/home/wenhuach/anaconda3/envs/autoround/lib/python3.10/site-packages/gptqmodel/models/__init__.py", line 18, in <module>
from .auto import MODEL_MAP, GPTQModel
File "/home/wenhuach/anaconda3/envs/autoround/lib/python3.10/site-packages/gptqmodel/models/auto.py", line 62, in <module>
from ..quantization import QUANT_CONFIG_FILENAME # noqa: E402
File "/home/wenhuach/anaconda3/envs/autoround/lib/python3.10/site-packages/gptqmodel/quantization/__init__.py", line 19, in <module>
from .gptq import GPTQ
File "/home/wenhuach/anaconda3/envs/autoround/lib/python3.10/site-packages/gptqmodel/quantization/gptq.py", line 47, in <module>
torch.linalg.cholesky(tmp_eye)
RuntimeError: "cholesky_cusolver" not implemented for 'BFloat16'
We couldn’t identify the root cause of the issue, so the straightforward workaround is to set tmp_eye to float32. This change should have no impact on the GPTQmodel.
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working