11# SPDX-License-Identifier: Apache-2.0
22# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+ from typing import NamedTuple
34
45import pytest
56import torch
7+ from packaging .version import Version
68from transformers import AutoConfig
9+ from transformers import __version__ as TRANSFORMERS_VERSION
710
811from vllm .model_executor .layers .rotary_embedding import get_rope
912from vllm .platforms import current_platform
@@ -15,6 +18,7 @@ def generate_test_data(num_tokens: int, num_q_heads: int, num_kv_heads: int,
1518 head_size : int , max_position_embeddings : int ,
1619 dtype : torch .dtype , device : torch .device ):
1720 """Generate test data for given configuration."""
21+ current_platform .seed_everything (42 )
1822 # Create 2D positions (3, num_tokens) for multimodal case
1923 positions = torch .randint (0 ,
2024 max_position_embeddings // 4 , (3 , num_tokens ),
@@ -33,43 +37,67 @@ def generate_test_data(num_tokens: int, num_q_heads: int, num_kv_heads: int,
3337 return positions , query , key
3438
3539
36- def unroll_model_tp_dict (model_tp_dict ):
37- return [(model_name , tp_size )
38- for model_name , tp_sizes in model_tp_dict .items ()
39- for tp_size in tp_sizes ]
40-
41-
42- model_tp_dict = {
43- "Qwen/Qwen2-VL-7B-Instruct" : [1 , 2 ],
44- "Qwen/Qwen2-VL-72B-Instruct" : [1 , 2 ],
45- "Qwen/Qwen2.5-VL-72B-Instruct" : [1 , 2 ],
46- "zai-org/GLM-4.1V-9B-Thinking" : [1 , 2 ],
47- }
48-
49- # https://github.com/pytorch/pytorch/blob/main/torch/testing/_comparison.py#L1317
50- dtype_atol_rtol_list = [
51- [torch .bfloat16 , 1e-2 , 1.6e-2 ],
40+ class MRoPETestInfo (NamedTuple ):
41+ model_name : str
42+ # https://github.com/pytorch/pytorch/blob/main/torch/testing/_comparison.py#L1317
43+ atol : float = 1e-2
44+ rtol : float = 1.6e-2
45+ marks : list [pytest .MarkDecorator ] = []
46+
47+
48+ TRANSFORMERS_BASE_VERSION = Version (TRANSFORMERS_VERSION ).base_version
49+
50+ MODELS_TO_TEST = [
51+ MRoPETestInfo (model_name = "zai-org/GLM-4.1V-9B-Thinking" ),
52+ MRoPETestInfo (model_name = "Qwen/Qwen2-VL-7B-Instruct" ),
53+ MRoPETestInfo (model_name = "Qwen/Qwen2-VL-72B-Instruct" ),
54+ MRoPETestInfo (model_name = "Qwen/Qwen2.5-VL-72B-Instruct" ),
55+ MRoPETestInfo (
56+ model_name = "Qwen/Qwen3-VL-4B-Instruct" ,
57+ marks = [
58+ pytest .mark .skipif (
59+ Version (TRANSFORMERS_BASE_VERSION ) < Version ("4.57.0" ),
60+ reason = "Qwen3-VL only available after Transformers v4.57" ,
61+ )
62+ ]),
63+ MRoPETestInfo (
64+ model_name = "Qwen/Qwen3-VL-30B-A3B-Instruct" ,
65+ marks = [
66+ pytest .mark .skipif (
67+ Version (TRANSFORMERS_BASE_VERSION ) < Version ("4.57.0" ),
68+ reason = "Qwen3-VL only available after Transformers v4.57" ,
69+ )
70+ ]),
5271]
5372
5473num_tokens_list = [11 , 8192 ]
5574
5675
5776@pytest .mark .skipif (not current_platform .is_cuda_alike (),
5877 reason = "Skipping CUDA/ROCm only tests." )
59- @pytest .mark .parametrize ("model_name, tp_size" ,
60- unroll_model_tp_dict (model_tp_dict ))
61- @pytest .mark .parametrize ("dtype, atol, rtol" , dtype_atol_rtol_list )
78+ @pytest .mark .parametrize ("model_info, model_name" , [
79+ pytest .param (test_config , test_config .model_name , marks = test_config .marks )
80+ for test_config in MODELS_TO_TEST
81+ ])
82+ @pytest .mark .parametrize ("tp_size" , [1 , 2 ])
83+ @pytest .mark .parametrize ("dtype" , [torch .bfloat16 ])
6284@pytest .mark .parametrize ("num_tokens" , num_tokens_list )
63- def test_mrope (model_name , tp_size , dtype , atol , rtol , num_tokens ):
85+ def test_mrope (model_name : str , model_info : MRoPETestInfo , tp_size : int ,
86+ dtype : torch .dtype , num_tokens : int ):
87+
88+ atol = model_info .atol
89+ rtol = model_info .rtol
6490
6591 config = AutoConfig .from_pretrained (model_name )
92+ config = config .get_text_config ()
6693
6794 # get the model config
6895 total_num_kv_heads = config .num_key_value_heads
6996 total_num_heads = config .num_attention_heads
7097 num_heads = total_num_heads // tp_size
7198 num_kv_heads = max (1 , total_num_kv_heads // tp_size )
72- head_dim = config .hidden_size // total_num_heads
99+ head_dim = (config .head_dim if hasattr (config , "head_dim" ) else
100+ config .hidden_size // total_num_heads )
73101 is_neox_style = True
74102
75103 rope_theta = config .rope_theta
@@ -111,24 +139,30 @@ def test_mrope(model_name, tp_size, dtype, atol, rtol, num_tokens):
111139
112140@pytest .mark .skipif (not current_platform .is_cuda_alike (),
113141 reason = "Skipping CUDA/ROCm only tests." )
114- @pytest .mark .parametrize (
115- "model_name, tp_size" ,
116- unroll_model_tp_dict ({
117- "Qwen/Qwen2-VL-7B-Instruct" : [1 , 2 ],
118- "zai-org/GLM-4.1V-9B-Thinking" : [1 , 2 ]
119- }))
120- @pytest .mark .parametrize ("dtype, atol, rtol" , dtype_atol_rtol_list )
121- @pytest .mark .parametrize ("num_tokens" , [4 ])
122- def test_mrope_torch_compile_tracing (model_name , tp_size , dtype , atol , rtol ,
123- num_tokens ):
142+ @pytest .mark .parametrize ("model_info, model_name" , [
143+ pytest .param (test_config , test_config .model_name , marks = test_config .marks )
144+ for test_config in MODELS_TO_TEST
145+ ])
146+ @pytest .mark .parametrize ("tp_size" , [1 , 2 ])
147+ @pytest .mark .parametrize ("dtype" , [torch .bfloat16 ])
148+ @pytest .mark .parametrize ("num_tokens" , num_tokens_list )
149+ def test_mrope_torch_compile_tracing (model_name : str ,
150+ model_info : MRoPETestInfo , tp_size : int ,
151+ dtype : torch .dtype , num_tokens : int ):
152+
153+ atol = model_info .atol
154+ rtol = model_info .rtol
155+
124156 config = AutoConfig .from_pretrained (model_name )
157+ config = config .get_text_config ()
125158
126159 # get the model config
127160 total_num_kv_heads = config .num_key_value_heads
128161 total_num_heads = config .num_attention_heads
129162 num_heads = total_num_heads // tp_size
130163 num_kv_heads = max (1 , total_num_kv_heads // tp_size )
131- head_dim = config .hidden_size // total_num_heads
164+ head_dim = (config .head_dim if hasattr (config , "head_dim" ) else
165+ config .hidden_size // total_num_heads )
132166 is_neox_style = True
133167 rope_theta = config .rope_theta
134168 max_position = config .max_position_embeddings
0 commit comments