[Bugfix] Fix KV head calculation for MPT models when using GQA (vllm-…

…project#5142)
xjpang · Jul 24, 2024 · a16cecd · a16cecd
1 parent e00c6d4
commit a16cecd
Showing 1 changed file with 5 additions and 1 deletion.
diff --git a/vllm/config.py b/vllm/config.py
@@ -302,7 +302,11 @@ def get_total_num_kv_heads(self) -> int:
             return 1
 
         # For DBRX and MPT
-        if self.hf_config.model_type in ["dbrx", "mpt"]:
+        if self.hf_config.model_type == "mpt":
+            if "kv_n_heads" in self.hf_config.attn_config:
+                return self.hf_config.attn_config["kv_n_heads"]
+            return self.hf_config.num_attention_heads
+        if self.hf_config.model_type == "dbrx":
             return getattr(self.hf_config.attn_config, "kv_n_heads",
                            self.hf_config.num_attention_heads)