diff --git a/vllm/model_executor/model_loader/loader.py b/vllm/model_executor/model_loader/loader.py index f697c3245f098..e9779878710ee 100644 --- a/vllm/model_executor/model_loader/loader.py +++ b/vllm/model_executor/model_loader/loader.py @@ -1076,8 +1076,8 @@ def _load_weights(self, model_config: ModelConfig, # weight tensor. So TP does not work with pre_quantized bnb models. if pre_quant and get_tensor_model_parallel_world_size() > 1: raise ValueError( - "Prequant BitsAndBytes models with TP is not supported." - "Please try with PP.") + "Prequant BitsAndBytes models with tensor parallelism is not " + "supported. Please try with pipeline parallelism.") load_8bit = False if pre_quant: