Skip to content

Commit 3105f42

Browse files
hmellorlk-chen
authored andcommitted
Upgrade transformers to v4.50.3 (vllm-project#13905)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
1 parent 9e9c2be commit 3105f42

File tree

8 files changed

+68
-49
lines changed

8 files changed

+68
-49
lines changed

docs/source/models/supported_models.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ The Transformers fallback explicitly supports the following features:
7373

7474
- <project:#quantization-index> (except GGUF)
7575
- <project:#lora-adapter>
76-
- <project:#distributed-serving> (requires `transformers>=4.49.0`)
76+
- <project:#distributed-serving>
7777

7878
#### Remote code
7979

requirements/common.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ requests >= 2.26.0
66
tqdm
77
blake3
88
py-cpuinfo
9-
transformers >= 4.48.2 # Required for Bamba model and Transformers backend.
9+
transformers >= 4.50.3
1010
tokenizers >= 0.19.1 # Required for Llama 3.
1111
protobuf # Required by LlamaTokenizer.
1212
fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.

requirements/test.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ matplotlib # required for qwen-vl test
3030
mistral_common[opencv] >= 1.5.4 # required for pixtral test
3131
datamodel_code_generator # required for minicpm3 test
3232
lm-eval[api]==0.4.4 # required for model evaluation test
33-
transformers==4.48.2
33+
transformers==4.50.3
3434
# quantization
3535
bitsandbytes>=0.45.3
3636
buildkite-test-collector==0.1.9

requirements/test.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -643,7 +643,7 @@ tqdm==4.66.6
643643
# transformers
644644
tqdm-multiprocess==0.0.11
645645
# via lm-eval
646-
transformers==4.48.2
646+
transformers==4.50.3
647647
# via
648648
# -r requirements/test.in
649649
# genai-perf

tests/distributed/test_pipeline_parallel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ def iter_params(self, model_id: str):
245245
# [LANGUAGE GENERATION]
246246
"microsoft/Phi-3.5-MoE-instruct",
247247
"meta-llama/Llama-3.2-1B-Instruct",
248-
# "ArthurZ/Ilama-3.2-1B", NOTE: Uncomment after #13905
248+
"ArthurZ/Ilama-3.2-1B",
249249
"ibm/PowerLM-3b",
250250
# [LANGUAGE EMBEDDING]
251251
"intfloat/e5-mistral-7b-instruct",

tests/models/decoder_only/vision_language/test_models.py

Lines changed: 19 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,7 @@
88
from pathlib import PosixPath
99

1010
import pytest
11-
from packaging.version import Version
1211
from transformers import AutoModelForImageTextToText, AutoModelForVision2Seq
13-
from transformers import __version__ as TRANSFORMERS_VERSION
1412

1513
from vllm.platforms import current_platform
1614
from vllm.utils import identity
@@ -126,25 +124,6 @@
126124
dtype="bfloat16",
127125
marks=[pytest.mark.skip(reason="vLLM does not support PrefixLM attention mask")], # noqa: E501
128126
),
129-
# TODO(ywang96): Move Qwen2-VL out of core models in favor of Qwen2.5-VL
130-
# once we upgraded to transformers>=4.49.0.
131-
"qwen2_vl": VLMTestInfo(
132-
models=["Qwen/Qwen2-VL-2B-Instruct"],
133-
test_type=(
134-
VLMTestType.IMAGE,
135-
VLMTestType.MULTI_IMAGE,
136-
VLMTestType.VIDEO
137-
),
138-
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
139-
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
140-
video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501
141-
max_model_len=4096,
142-
max_num_seqs=2,
143-
auto_cls=AutoModelForVision2Seq,
144-
vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
145-
image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
146-
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
147-
),
148127
"qwen2_5_vl": VLMTestInfo(
149128
models=["Qwen/Qwen2.5-VL-3B-Instruct"],
150129
test_type=(
@@ -218,12 +197,6 @@
218197
hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output,
219198
stop_str=["<|end▁of▁sentence|>", "<|begin▁of▁sentence|>"], # noqa: E501
220199
image_size_factors=[(), (1.0, ), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)],
221-
marks=[
222-
pytest.mark.skipif(
223-
Version(TRANSFORMERS_VERSION) >= Version("4.48"),
224-
reason="HF model is not compatible with transformers>=4.48",
225-
)
226-
],
227200
),
228201
"fuyu": VLMTestInfo(
229202
models=["adept/fuyu-8b"],
@@ -336,6 +309,7 @@
336309
prompt_formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
337310
num_video_frames=16,
338311
max_model_len=16384,
312+
hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"), # noqa: E501
339313
auto_cls=AutoModelForVision2Seq,
340314
vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
341315
custom_test_opts=[CustomTestOptions(
@@ -365,12 +339,6 @@
365339
auto_cls=AutoModelForImageTextToText,
366340
vllm_output_post_proc=model_utils.mantis_vllm_to_hf_output,
367341
patch_hf_runner=model_utils.mantis_patch_hf_runner,
368-
marks=[
369-
pytest.mark.skipif(
370-
Version(TRANSFORMERS_VERSION) >= Version("4.48"),
371-
reason="HF model is not compatible with transformers>=4.48",
372-
)
373-
],
374342
),
375343
"minicpmv_25": VLMTestInfo(
376344
models=["openbmb/MiniCPM-Llama3-V-2_5"],
@@ -450,6 +418,23 @@
450418
vllm_output_post_proc=model_utils.qwen_vllm_to_hf_output,
451419
prompt_path_encoder=model_utils.qwen_prompt_path_encoder,
452420
),
421+
"qwen2_vl": VLMTestInfo(
422+
models=["Qwen/Qwen2-VL-2B-Instruct"],
423+
test_type=(
424+
VLMTestType.IMAGE,
425+
VLMTestType.MULTI_IMAGE,
426+
VLMTestType.VIDEO
427+
),
428+
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
429+
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
430+
video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501
431+
max_model_len=4096,
432+
max_num_seqs=2,
433+
auto_cls=AutoModelForVision2Seq,
434+
vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
435+
image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
436+
marks=[pytest.mark.cpu_model],
437+
),
453438
"skywork_r1v": VLMTestInfo(
454439
models=["Skywork/Skywork-R1V-38B"],
455440
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
@@ -515,6 +500,7 @@
515500
max_model_len=16384,
516501
max_num_seqs=2,
517502
auto_cls=AutoModelForVision2Seq,
503+
hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"), # noqa: E501
518504
vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
519505
custom_test_opts=[CustomTestOptions(
520506
inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(

tests/models/decoder_only/vision_language/vlm_utils/model_utils.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,13 @@ def _llava_vllm_to_hf_output(vllm_output: RunnerOutput, model: str,
104104
return hf_output_ids, hf_output_str, out_logprobs
105105

106106

107+
def llava_onevision_hf_model_kwargs(model: str) -> dict:
108+
"""Workaround to fix the sliding window issue in llava_onevision."""
109+
config = AutoConfig.from_pretrained(model)
110+
config.text_config.sliding_window = None
111+
return config.to_dict()
112+
113+
107114
def llava_onevision_vllm_to_hf_output(vllm_output: RunnerOutput,
108115
model: str) -> RunnerOutput:
109116
"""Sanitize vllm output [llava-onevision] to compare with hf output."""

tests/models/registry.py

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,16 @@ class _HfExamplesInfo:
3434
The minimum version of HF Transformers that is required to run this model.
3535
"""
3636

37+
max_transformers_version: Optional[str] = None
38+
"""
39+
The maximum version of HF Transformers that this model runs on.
40+
"""
41+
42+
transformers_version_reason: Optional[str] = None
43+
"""
44+
The reason for the minimum/maximum version requirement.
45+
"""
46+
3747
is_available_online: bool = True
3848
"""
3949
Set this to ``False`` if the name of this architecture no longer exists on
@@ -57,21 +67,28 @@ def check_transformers_version(
5767
If the installed transformers version does not meet the requirements,
5868
perform the given action.
5969
"""
60-
if self.min_transformers_version is None:
70+
if (self.min_transformers_version is None
71+
and self.max_transformers_version is None):
6172
return
6273

6374
current_version = TRANSFORMERS_VERSION
64-
required_version = self.min_transformers_version
65-
if Version(current_version) < Version(required_version):
66-
msg = (
67-
f"You have `transformers=={current_version}` installed, but "
68-
f"`transformers>={required_version}` is required to run this "
69-
"model")
75+
min_version = self.min_transformers_version
76+
max_version = self.max_transformers_version
77+
msg = f"`transformers=={current_version}` installed, but `transformers"
78+
if min_version and Version(current_version) < Version(min_version):
79+
msg += f">={min_version}` is required to run this model."
80+
elif max_version and Version(current_version) > Version(max_version):
81+
msg += f"<={max_version}` is required to run this model."
82+
else:
83+
return
7084

71-
if on_fail == "error":
72-
raise RuntimeError(msg)
73-
else:
74-
pytest.skip(msg)
85+
if self.transformers_version_reason:
86+
msg += f" Reason: {self.transformers_version_reason}"
87+
88+
if on_fail == "error":
89+
raise RuntimeError(msg)
90+
else:
91+
pytest.skip(msg)
7592

7693
def check_available_online(
7794
self,
@@ -245,6 +262,9 @@ def check_available_online(
245262
"Blip2ForConditionalGeneration": _HfExamplesInfo("Salesforce/blip2-opt-2.7b"), # noqa: E501
246263
"ChameleonForConditionalGeneration": _HfExamplesInfo("facebook/chameleon-7b"), # noqa: E501
247264
"DeepseekVLV2ForCausalLM": _HfExamplesInfo("deepseek-ai/deepseek-vl2-tiny", # noqa: E501
265+
extras={"fork": "Isotr0py/deepseek-vl2-tiny"}, # noqa: E501
266+
max_transformers_version="4.48", # noqa: E501
267+
transformers_version_reason="HF model is not compatible.", # noqa: E501
248268
hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]}), # noqa: E501
249269
"FuyuForCausalLM": _HfExamplesInfo("adept/fuyu-8b"),
250270
"Gemma3ForConditionalGeneration": _HfExamplesInfo("google/gemma-3-4b-it",
@@ -266,13 +286,19 @@ def check_available_online(
266286
"LlavaNextVideoForConditionalGeneration": _HfExamplesInfo("llava-hf/LLaVA-NeXT-Video-7B-hf"), # noqa: E501
267287
"LlavaOnevisionForConditionalGeneration": _HfExamplesInfo("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"), # noqa: E501
268288
"MantisForConditionalGeneration": _HfExamplesInfo("TIGER-Lab/Mantis-8B-siglip-llama3", # noqa: E501
289+
max_transformers_version="4.48", # noqa: E501
290+
transformers_version_reason="HF model is not compatible.", # noqa: E501
269291
hf_overrides={"architectures": ["MantisForConditionalGeneration"]}), # noqa: E501
270292
"MiniCPMO": _HfExamplesInfo("openbmb/MiniCPM-o-2_6",
293+
max_transformers_version="4.48",
294+
transformers_version_reason="Use of deprecated imports which have been removed.", # noqa: E501
271295
trust_remote_code=True),
272296
"MiniCPMV": _HfExamplesInfo("openbmb/MiniCPM-Llama3-V-2_5",
273297
extras={"2.6": "openbmb/MiniCPM-V-2_6"}, # noqa: E501
274298
trust_remote_code=True),
275299
"MolmoForCausalLM": _HfExamplesInfo("allenai/Molmo-7B-D-0924",
300+
max_transformers_version="4.48",
301+
transformers_version_reason="Use of private method which no longer exists.", # noqa: E501
276302
extras={"olmo": "allenai/Molmo-7B-O-0924"}, # noqa: E501
277303
trust_remote_code=True),
278304
"NVLM_D": _HfExamplesInfo("nvidia/NVLM-D-72B",

0 commit comments

Comments
 (0)