We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 013087c commit 45ea3f1Copy full SHA for 45ea3f1
vllm/model_executor/models/llama.py
@@ -175,7 +175,8 @@ def __init__(
175
self.self_attn = LlamaAttention(
176
hidden_size=self.hidden_size,
177
num_heads=config.num_attention_heads,
178
- num_kv_heads=config.num_key_value_heads,
+ num_kv_heads=getattr(config, "num_key_value_heads",
179
+ config.num_attention_heads),
180
rope_theta=rope_theta,
181
rope_scaling=rope_scaling,
182
max_position_embeddings=max_position_embeddings,
0 commit comments