Skip to content

Commit 7357a97

Browse files
hmellorxuebwang-amd
authored andcommitted
Refactor Transformers backend to use mixins (vllm-project#26906)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: xuebwang-amd <xuebwang@amd.com>
1 parent 06a091e commit 7357a97

File tree

17 files changed

+1510
-1248
lines changed

17 files changed

+1510
-1248
lines changed

.github/CODEOWNERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson
5757
/tests/v1/offloading @ApostaC
5858

5959
# Transformers backend
60-
/vllm/model_executor/models/transformers.py @hmellor
60+
/vllm/model_executor/models/transformers @hmellor
6161
/tests/models/test_transformers.py @hmellor
6262

6363
# Docs

tests/models/registry.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -912,11 +912,11 @@ def check_available_online(
912912
"TransformersForCausalLM": _HfExamplesInfo(
913913
"hmellor/Ilama-3.2-1B", trust_remote_code=True
914914
),
915-
"TransformersForMultimodalLM": _HfExamplesInfo("BAAI/Emu3-Chat-hf"),
915+
"TransformersMultiModalForCausalLM": _HfExamplesInfo("BAAI/Emu3-Chat-hf"),
916916
"TransformersMoEForCausalLM": _HfExamplesInfo(
917917
"allenai/OLMoE-1B-7B-0924", min_transformers_version="4.57.0.dev0"
918918
),
919-
"TransformersMoEForMultimodalLM": _HfExamplesInfo(
919+
"TransformersMultiModalMoEForCausalLM": _HfExamplesInfo(
920920
"Qwen/Qwen3-VL-30B-A3B-Instruct", min_transformers_version="4.57.0.dev0"
921921
),
922922
"TransformersMoEEmbeddingModel": _HfExamplesInfo(
@@ -925,6 +925,10 @@ def check_available_online(
925925
"TransformersMoEForSequenceClassification": _HfExamplesInfo(
926926
"Qwen/Qwen3-30B-A3B", min_transformers_version="4.57.0.dev0"
927927
),
928+
"TransformersMultiModalEmbeddingModel": _HfExamplesInfo("google/gemma-3-4b-it"),
929+
"TransformersMultiModalForSequenceClassification": _HfExamplesInfo(
930+
"google/gemma-3-4b-it"
931+
),
928932
}
929933

930934
_EXAMPLE_MODELS = {

tests/models/test_initialization.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
"JinaVLForRanking",
3838
"InternVLChatModel",
3939
"InternLM2ForRewardModel",
40-
"TransformersForMultimodalLM",
40+
"TransformersMultiModalForCausalLM",
4141
"PrithviGeoSpatialMAE",
4242
"UltravoxModel",
4343
"DeepSeekMTPModel",

tests/models/test_transformers.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -211,11 +211,7 @@ def test_embed_loading(vllm_runner, model):
211211
def test_pooling(hf_runner, vllm_runner, example_prompts, arch):
212212
model = get_model(arch)
213213

214-
vllm_kwargs = dict(
215-
max_model_len=None,
216-
model_impl="transformers",
217-
compilation_config=dict(cudagraph_capture_sizes=[8]),
218-
)
214+
vllm_kwargs = dict(max_model_len=None, model_impl="transformers")
219215

220216
hf_kwargs = dict()
221217
if arch == "TransformersEmbeddingModel":

vllm/config/model.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,10 @@ class ModelConfig:
147147
seed: int | None = None
148148
"""Random seed for reproducibility. Initialized to None in V0, but
149149
initialized to 0 in V1."""
150+
hf_config: PretrainedConfig = field(init=False)
151+
"""The Hugging Face config of the model."""
152+
hf_text_config: PretrainedConfig = field(init=False)
153+
"""The Hugging Face config of the text model (same as hf_config for text models)."""
150154
hf_config_path: str | None = None
151155
"""Name or path of the Hugging Face config to use. If unspecified, model
152156
name or path will be used."""
@@ -771,8 +775,10 @@ def validate_model_config_after(self: "ModelConfig") -> "ModelConfig":
771775
def _get_transformers_backend_cls(self) -> str:
772776
"""Determine which Transformers backend class will be used if
773777
`model_impl` is set to `transformers` or `auto`."""
774-
prefix = "Transformers"
775-
prefix += "MoE" if self.get_num_experts() > 1 else ""
778+
cls = "Transformers"
779+
# If 'hf_config != hf_text_config' it's a nested config, i.e. multimodal
780+
cls += "MultiModal" if self.hf_config != self.hf_text_config else ""
781+
cls += "MoE" if self.get_num_experts() > 1 else ""
776782
# Check if the architecture we're wrapping has defaults
777783
runner = None
778784
convert = None
@@ -788,18 +794,15 @@ def _get_transformers_backend_cls(self) -> str:
788794
runner = "generate"
789795
if convert in {None, "none"}:
790796
convert = "embed"
791-
# Resolve Transformers backend pooling classes
797+
# Resolve Transformers backend task
792798
if runner == "pooling":
793799
if convert == "embed":
794-
return prefix + "EmbeddingModel"
800+
return cls + "EmbeddingModel"
795801
if convert == "classify":
796-
return prefix + "ForSequenceClassification"
797-
# Resolve Transformers backend generate classes
798-
if self.hf_config != self.hf_text_config:
799-
# If 'hf_text_config' is the same as 'hf_config'. If not, it is
800-
# probably a composite config, i.e. multimodal
801-
return prefix + "ForMultimodalLM"
802-
return prefix + "ForCausalLM"
802+
return cls + "ForSequenceClassification"
803+
else:
804+
cls += "ForCausalLM"
805+
return cls
803806

804807
def using_transformers_backend(self) -> bool:
805808
"""Check if the model is using the Transformers backend class."""

vllm/model_executor/models/deepseek_vl2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from vllm.distributed import get_tensor_model_parallel_world_size
2020
from vllm.model_executor.layers.quantization import QuantizationConfig
2121
from vllm.model_executor.model_loader.utils import set_default_torch_dtype
22-
from vllm.model_executor.models.transformers import replace_linear_class
22+
from vllm.model_executor.models.transformers.utils import replace_linear_class
2323
from vllm.multimodal import MULTIMODAL_REGISTRY
2424
from vllm.multimodal.inputs import (
2525
MultiModalDataDict,

vllm/model_executor/models/registry.py

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -401,32 +401,44 @@
401401
# Text generation models
402402
"SmolLM3ForCausalLM": ("transformers", "TransformersForCausalLM"),
403403
# Multimodal models
404-
"Emu3ForConditionalGeneration": ("transformers", "TransformersForMultimodalLM"), # noqa: E501
404+
"Emu3ForConditionalGeneration": (
405+
"transformers",
406+
"TransformersMultiModalForCausalLM",
407+
),
405408
}
406409

407410
_TRANSFORMERS_BACKEND_MODELS = {
411+
# Text generation models
408412
"TransformersForCausalLM": ("transformers", "TransformersForCausalLM"),
409-
"TransformersForMultimodalLM": ("transformers", "TransformersForMultimodalLM"), # noqa: E501
410-
"TransformersMoEForCausalLM": ("transformers_moe", "TransformersMoEForCausalLM"), # noqa: E501
411-
"TransformersMoEForMultimodalLM": (
412-
"transformers_moe",
413-
"TransformersMoEForMultimodalLM",
413+
"TransformersMoEForCausalLM": ("transformers", "TransformersMoEForCausalLM"),
414+
# Multimodal models
415+
"TransformersMultiModalForCausalLM": (
416+
"transformers",
417+
"TransformersMultiModalForCausalLM",
418+
),
419+
"TransformersMultiModalMoEForCausalLM": (
420+
"transformers",
421+
"TransformersMultiModalMoEForCausalLM",
414422
),
415-
"TransformersEmbeddingModel": (
416-
"transformers_pooling",
417-
"TransformersEmbeddingModel",
423+
# Embedding models
424+
"TransformersEmbeddingModel": ("transformers", "TransformersEmbeddingModel"),
425+
"TransformersMoEEmbeddingModel": ("transformers", "TransformersMoEEmbeddingModel"),
426+
"TransformersMultiModalEmbeddingModel": (
427+
"transformers",
428+
"TransformersMultiModalEmbeddingModel",
418429
),
430+
# Sequence classification models
419431
"TransformersForSequenceClassification": (
420-
"transformers_pooling",
432+
"transformers",
421433
"TransformersForSequenceClassification",
422434
),
423435
"TransformersMoEForSequenceClassification": (
424-
"transformers_pooling",
436+
"transformers",
425437
"TransformersMoEForSequenceClassification",
426438
),
427-
"TransformersMoEEmbeddingModel": (
428-
"transformers_pooling",
429-
"TransformersMoEEmbeddingModel",
439+
"TransformersMultiModalForSequenceClassification": (
440+
"transformers",
441+
"TransformersMultiModalForSequenceClassification",
430442
),
431443
}
432444

0 commit comments

Comments
 (0)