Skip to content

[Bug]: TransformersModel fails if model config does not have head_dim attr #14139

Closed
@nopperl

Description

@nopperl

Your current environment

vllm@cf069aa

🐛 Describe the bug

Running models using the transformers fallback fails if vllm_config.model_config.hf_config does not contain head_dim. For example, using Qwen/Qwen2.5-0.5B-Instruct:

from vllm import LLM, SamplingParams

prompts = [
    "Hello, my name is",
    "The president of the United States is",
    "The capital of France is",
    "The future of AI is",
]
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)

llm = LLM(model="Qwen/Qwen2.5-0.5B-Instruct", model_impl="transformers")

outputs = llm.generate(prompts, sampling_params)

# Print the outputs.
for output in outputs:
    prompt = output.prompt
    generated_text = output.outputs[0].text
    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")

Error:

[rank0]: Traceback (most recent call last):
[rank0]:   File "/scratch/test_qwen.py", line 20, in <module>
[rank0]:     llm = LLM(model="Qwen/Qwen2.5-0.5B-Instruct", enable_prefix_caching=False, compilation_config=3, model_impl="transformers")
[rank0]:           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]:   File "/scratch/vllm/vllm/utils.py", line 1045, in inner
[rank0]:     return fn(*args, **kwargs)
[rank0]:            ^^^^^^^^^^^^^^^^^^^
[rank0]:   File "/scratch/vllm/vllm/entrypoints/llm.py", line 243, in __init__
[rank0]:     self.llm_engine = self.engine_class.from_engine_args(
[rank0]:                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]:   File "/scratch/vllm/vllm/engine/llm_engine.py", line 494, in from_engine_args
[rank0]:     engine = cls(
[rank0]:              ^^^^
[rank0]:   File "/scratch/vllm/vllm/engine/llm_engine.py", line 274, in __init__
[rank0]:     self.model_executor = executor_class(vllm_config=vllm_config, )
[rank0]:                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]:   File "/scratch/vllm/vllm/executor/executor_base.py", line 52, in __init__
[rank0]:     self._init_executor()
[rank0]:   File "/scratch/vllm/vllm/executor/uniproc_executor.py", line 47, in _init_executor
[rank0]:     self.collective_rpc("load_model")
[rank0]:   File "/scratch/vllm/vllm/executor/uniproc_executor.py", line 56, in collective_rpc
[rank0]:     answer = run_method(self.driver_worker, method, args, kwargs)
[rank0]:              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]:   File "/scratch/vllm/vllm/utils.py", line 2232, in run_method
[rank0]:     return func(*args, **kwargs)
[rank0]:            ^^^^^^^^^^^^^^^^^^^^^
[rank0]:   File "/scratch/vllm/vllm/worker/worker.py", line 183, in load_model
[rank0]:     self.model_runner.load_model()
[rank0]:   File "/scratch/vllm/vllm/worker/model_runner.py", line 1113, in load_model
[rank0]:     self.model = get_model(vllm_config=self.vllm_config)
[rank0]:                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]:   File "/scratch/vllm/vllm/model_executor/model_loader/__init__.py", line 14, in get_model
[rank0]:     return loader.load_model(vllm_config=vllm_config)
[rank0]:            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]:   File "/scratch/vllm/vllm/model_executor/model_loader/loader.py", line 416, in load_model
[rank0]:     model = _initialize_model(vllm_config=vllm_config)
[rank0]:             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]:   File "/scratch/vllm/vllm/model_executor/model_loader/loader.py", line 126, in _initialize_model
[rank0]:     return model_class(vllm_config=vllm_config, prefix=prefix)
[rank0]:            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]:   File "/scratch/vllm/vllm/model_executor/models/transformers.py", line 152, in __init__
[rank0]:     head_size=config.head_dim,
[rank0]:               ^^^^^^^^^^^^^^^
[rank0]:   File "/scratch/vllm/transformers/configuration_utils.py", line 214, in __getattribute__
[rank0]:     return super().__getattribute__(key)
[rank0]:            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: AttributeError: 'Qwen2Config' object has no attribute 'head_dim'

I think it's fine to calculate the head size if head_dim does not exist. Using this diff the model works:

+++ a/vllm/model_executor/models/transformers.py
+++ b/vllm/model_executor/models/transformers.py
@@ -146,14 +146,13 @@ class TransformersModel(nn.Module, SupportsQuant, SupportsLoRA):
 
         # Attention modifications (assumes 1 attention op per hidden layer)
         tp_size = get_tensor_model_parallel_world_size()
-        head_size = vllm_config.model_config.get_head_size()
         self.attention_instances = [
             Attention(
                 num_heads=divide(config.num_attention_heads, tp_size),
-                head_size=head_size,
+                head_size=config.head_dim,
                 # NOTE: We use Llama scale as default, if it's set by
                 # Transformers, it's updated in vllm_flash_attention_forward
-                scale=head_size**-0.5,
+                scale=config.head_dim**-0.5,
                 num_kv_heads=divide(config.num_key_value_heads, tp_size),
                 cache_config=cache_config,
                 quant_config=self.quant_config,

Before submitting a new issue...

  • Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.

Metadata

Metadata

Assignees

Labels

bugSomething isn't working

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions