Skip to content

Commit 845bf6b

Browse files
committed
[Core] More fixes to MultiModalEmbeddings type handling
This is a follow up to PR vllm-project#19446. In that PR, get_multimodal_embeddings() was changed to return `MultiModalEmbeddings` instead of `Optional[MultiModalEmbeddings]` because code in the model runner was requiring that the result was not `None`. Several models needed tweaks to account for this. Many were missed because they were not tested in CI. This should fix the rest of the common changes needed that weren't caught by CI. Signed-off-by: Russell Bryant <rbryant@redhat.com>
1 parent 12575cf commit 845bf6b

33 files changed

+34
-34
lines changed

vllm/model_executor/models/aria.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,7 @@ def get_input_embeddings(
620620
multimodal_embeddings: Optional[MultiModalEmbeddings] = None,
621621
) -> torch.Tensor:
622622
inputs_embeds = self.language_model.get_input_embeddings(input_ids)
623-
if multimodal_embeddings is not None:
623+
if multimodal_embeddings:
624624
inputs_embeds = merge_multimodal_embeddings(
625625
input_ids, inputs_embeds, multimodal_embeddings,
626626
self.config.image_token_index)

vllm/model_executor/models/aya_vision.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ def get_input_embeddings(
430430
multimodal_embeddings: Optional[MultiModalEmbeddings] = None,
431431
) -> torch.Tensor:
432432
inputs_embeds = self.language_model.get_input_embeddings(input_ids)
433-
if multimodal_embeddings is not None:
433+
if multimodal_embeddings:
434434
inputs_embeds = merge_multimodal_embeddings(
435435
input_ids=input_ids,
436436
inputs_embeds=inputs_embeds,

vllm/model_executor/models/blip2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -641,7 +641,7 @@ def get_input_embeddings(
641641
multimodal_embeddings: Optional[MultiModalEmbeddings] = None,
642642
) -> torch.Tensor:
643643
inputs_embeds = self.language_model.get_input_embeddings(input_ids)
644-
if multimodal_embeddings is not None:
644+
if multimodal_embeddings:
645645
inputs_embeds = merge_multimodal_embeddings(
646646
input_ids, inputs_embeds, multimodal_embeddings,
647647
_IMAGE_TOKEN_ID)

vllm/model_executor/models/chameleon.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1005,7 +1005,7 @@ def get_input_embeddings(
10051005
) -> torch.Tensor:
10061006

10071007
inputs_embeds = self.model.get_input_embeddings(input_ids)
1008-
if multimodal_embeddings is not None:
1008+
if multimodal_embeddings:
10091009
inputs_embeds = merge_multimodal_embeddings(
10101010
input_ids, inputs_embeds, multimodal_embeddings,
10111011
self.model.vocabulary_mapping.image_token_id)

vllm/model_executor/models/deepseek_vl2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,7 @@ def get_input_embeddings(
600600
multimodal_embeddings: Optional[MultiModalEmbeddings] = None,
601601
) -> torch.Tensor:
602602
inputs_embeds = self.language_model.get_input_embeddings(input_ids)
603-
if multimodal_embeddings is not None:
603+
if multimodal_embeddings:
604604
inputs_embeds = merge_multimodal_embeddings(
605605
input_ids, inputs_embeds, multimodal_embeddings,
606606
self.image_token_id)

vllm/model_executor/models/florence2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1046,7 +1046,7 @@ def get_input_embeddings(
10461046
multimodal_embeddings: Optional[MultiModalEmbeddings] = None,
10471047
) -> torch.Tensor:
10481048
inputs_embeds = self.language_model.get_input_embeddings(input_ids)
1049-
if multimodal_embeddings is not None:
1049+
if multimodal_embeddings:
10501050
inputs_embeds = merge_multimodal_embeddings(
10511051
input_ids, inputs_embeds, multimodal_embeddings,
10521052
self.pad_token_id)

vllm/model_executor/models/fuyu.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ def get_input_embeddings(
345345
multimodal_embeddings: Optional[MultiModalEmbeddings] = None,
346346
) -> torch.Tensor:
347347
inputs_embeds = self.language_model.get_input_embeddings(input_ids)
348-
if multimodal_embeddings is not None:
348+
if multimodal_embeddings:
349349
inputs_embeds = merge_multimodal_embeddings(
350350
input_ids,
351351
inputs_embeds,

vllm/model_executor/models/gemma3_mm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -592,7 +592,7 @@ def get_input_embeddings(
592592
multimodal_embeddings: Optional[MultiModalEmbeddings] = None,
593593
) -> torch.Tensor:
594594
inputs_embeds = self.language_model.get_input_embeddings(input_ids)
595-
if multimodal_embeddings is not None:
595+
if multimodal_embeddings:
596596
inputs_embeds = merge_multimodal_embeddings(
597597
input_ids,
598598
inputs_embeds,

vllm/model_executor/models/glm4v.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -609,7 +609,7 @@ def get_input_embeddings(
609609
) -> torch.Tensor:
610610
inputs_embeds = self.transformer.get_input_embeddings(input_ids)
611611

612-
if multimodal_embeddings is not None:
612+
if multimodal_embeddings:
613613
inputs_embeds = merge_multimodal_embeddings(
614614
input_ids=input_ids,
615615
inputs_embeds=inputs_embeds,

vllm/model_executor/models/granite_speech.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -721,7 +721,7 @@ def get_input_embeddings(
721721
multimodal_embeddings: Optional[MultiModalEmbeddings] = None,
722722
) -> torch.Tensor:
723723
"""Compute the merged LLM / audio embeddings."""
724-
if multimodal_embeddings is None:
724+
if not multimodal_embeddings:
725725
return self.language_model.get_input_embeddings(input_ids)
726726

727727
inputs_embeds = embed_multimodal(

0 commit comments

Comments
 (0)