Skip to content

Commit

Permalink
[Bugfix]Fix MiniCPM's LoRA bug (vllm-project#9286)
Browse files Browse the repository at this point in the history
  • Loading branch information
jeejeelee authored Oct 12, 2024
1 parent 2b184dd commit 250e26a
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 17 deletions.
6 changes: 5 additions & 1 deletion vllm/lora/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,11 @@ def __init__(
self.packed_modules_mapping = copy.deepcopy(
self.model.packed_modules_mapping)
# Used to indicate whether the model is a multimodal model
self.supports_mm: bool = supports_multimodal(self.model)
self.supports_mm: bool = (
supports_multimodal(self.model)
# In case the model only supports LoRA for
# text modules (e.g. ChatGLM)
and hasattr(self.model, "get_mm_mapping"))
self.packed_modules: Dict[str, List[str]] = {}
self.modules: Dict[str, "BaseLayerWithLoRA"] = {}
# Dict instead of a Set for compatibility with LRUCache.
Expand Down
29 changes: 13 additions & 16 deletions vllm/model_executor/models/minicpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,17 +474,18 @@ def __init__(
unpadded_vocab_size = config.vocab_size
if lora_config:
unpadded_vocab_size += lora_config.lora_extra_vocab_size
if not self.config.tie_word_embeddings:
self.lm_head = ParallelLMHead(
unpadded_vocab_size,
config.hidden_size,
org_num_embeddings=config.vocab_size,
padding_size=DEFAULT_VOCAB_PADDING_SIZE
# We need bigger padding if using lora for kernel
# compatibility
if not lora_config else lora_config.lora_vocab_padding_size,
quant_config=quant_config,
)
self.lm_head = ParallelLMHead(
unpadded_vocab_size,
config.hidden_size,
org_num_embeddings=config.vocab_size,
padding_size=DEFAULT_VOCAB_PADDING_SIZE
# We need bigger padding if using lora for kernel
# compatibility
if not lora_config else lora_config.lora_vocab_padding_size,
quant_config=quant_config,
)
if config.tie_word_embeddings:
self.lm_head = self.lm_head.tie_weights(self.model.embed_tokens)
self.scale_width = self.config.hidden_size / self.config.dim_model_base

self.logits_processor = LogitsProcessor(unpadded_vocab_size,
Expand Down Expand Up @@ -517,11 +518,7 @@ def compute_logits(
sampling_metadata: SamplingMetadata,
) -> Optional[torch.Tensor]:
hidden_states = hidden_states / self.scale_width
if self.config.tie_word_embeddings:
lm_head = self.model.embed_tokens
else:
lm_head = self.lm_head
logits = self.logits_processor(lm_head, hidden_states,
logits = self.logits_processor(self.lm_head, hidden_states,
sampling_metadata)
return logits

Expand Down
22 changes: 22 additions & 0 deletions vllm/model_executor/models/minicpm3.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,28 @@ def _init_layers(


class MiniCPM3ForCausalLM(MiniCPMForCausalLM):
packed_modules_mapping = {
"gate_up_proj": [
"gate_proj",
"up_proj",
],
}

# LoRA specific attributes
supported_lora_modules = [
"kv_a_proj_with_mqa",
"q_a_proj",
"q_b_proj",
"kv_b_proj",
"o_proj",
"gate_up_proj",
"down_proj",
"embed_tokens",
"lm_head",
]

# `embedding_modules` and `embedding_padding_modules`
# are inherited from MiniCPMForCausalLM

def _init_model(self):
self.model = MiniCPM3Model(config=self.config,
Expand Down

0 comments on commit 250e26a

Please sign in to comment.