11# SPDX-License-Identifier: Apache-2.0 
22# SPDX-FileCopyrightText: Copyright contributors to the vLLM project 
3+ from  typing  import  NamedTuple 
34
45import  pytest 
56import  torch 
7+ from  packaging .version  import  Version 
68from  transformers  import  AutoConfig 
9+ from  transformers  import  __version__  as  TRANSFORMERS_VERSION 
710
811from  vllm .model_executor .layers .rotary_embedding  import  get_rope 
912from  vllm .platforms  import  current_platform 
@@ -15,6 +18,7 @@ def generate_test_data(num_tokens: int, num_q_heads: int, num_kv_heads: int,
1518                       head_size : int , max_position_embeddings : int ,
1619                       dtype : torch .dtype , device : torch .device ):
1720    """Generate test data for given configuration.""" 
21+     current_platform .seed_everything (42 )
1822    # Create 2D positions (3, num_tokens) for multimodal case 
1923    positions  =  torch .randint (0 ,
2024                              max_position_embeddings  //  4 , (3 , num_tokens ),
@@ -33,43 +37,67 @@ def generate_test_data(num_tokens: int, num_q_heads: int, num_kv_heads: int,
3337    return  positions , query , key 
3438
3539
36- def  unroll_model_tp_dict (model_tp_dict ):
37-     return  [(model_name , tp_size )
38-             for  model_name , tp_sizes  in  model_tp_dict .items ()
39-             for  tp_size  in  tp_sizes ]
40- 
41- 
42- model_tp_dict  =  {
43-     "Qwen/Qwen2-VL-7B-Instruct" : [1 , 2 ],
44-     "Qwen/Qwen2-VL-72B-Instruct" : [1 , 2 ],
45-     "Qwen/Qwen2.5-VL-72B-Instruct" : [1 , 2 ],
46-     "zai-org/GLM-4.1V-9B-Thinking" : [1 , 2 ],
47- }
48- 
49- # https://github.com/pytorch/pytorch/blob/main/torch/testing/_comparison.py#L1317 
50- dtype_atol_rtol_list  =  [
51-     [torch .bfloat16 , 1e-2 , 1.6e-2 ],
40+ class  MRoPETestInfo (NamedTuple ):
41+     model_name : str 
42+     # https://github.com/pytorch/pytorch/blob/main/torch/testing/_comparison.py#L1317 
43+     atol : float  =  1e-2 
44+     rtol : float  =  1.6e-2 
45+     marks : list [pytest .MarkDecorator ] =  []
46+ 
47+ 
48+ TRANSFORMERS_BASE_VERSION  =  Version (TRANSFORMERS_VERSION ).base_version 
49+ 
50+ MODELS_TO_TEST  =  [
51+     MRoPETestInfo (model_name = "zai-org/GLM-4.1V-9B-Thinking" ),
52+     MRoPETestInfo (model_name = "Qwen/Qwen2-VL-7B-Instruct" ),
53+     MRoPETestInfo (model_name = "Qwen/Qwen2-VL-72B-Instruct" ),
54+     MRoPETestInfo (model_name = "Qwen/Qwen2.5-VL-72B-Instruct" ),
55+     MRoPETestInfo (
56+         model_name = "Qwen/Qwen3-VL-4B-Instruct" ,
57+         marks = [
58+             pytest .mark .skipif (
59+                 Version (TRANSFORMERS_BASE_VERSION ) <  Version ("4.57.0" ),
60+                 reason = "Qwen3-VL only available after Transformers v4.57" ,
61+             )
62+         ]),
63+     MRoPETestInfo (
64+         model_name = "Qwen/Qwen3-VL-30B-A3B-Instruct" ,
65+         marks = [
66+             pytest .mark .skipif (
67+                 Version (TRANSFORMERS_BASE_VERSION ) <  Version ("4.57.0" ),
68+                 reason = "Qwen3-VL only available after Transformers v4.57" ,
69+             )
70+         ]),
5271]
5372
5473num_tokens_list  =  [11 , 8192 ]
5574
5675
5776@pytest .mark .skipif (not  current_platform .is_cuda_alike (), 
5877                    reason = "Skipping CUDA/ROCm only tests." ) 
59- @pytest .mark .parametrize ("model_name, tp_size" , 
60-                          unroll_model_tp_dict (model_tp_dict )) 
61- @pytest .mark .parametrize ("dtype, atol, rtol" , dtype_atol_rtol_list ) 
78+ @pytest .mark .parametrize ("model_info, model_name" , [ 
79+     pytest .param (test_config , test_config .model_name , marks = test_config .marks ) 
80+     for  test_config  in  MODELS_TO_TEST  
81+ ]) 
82+ @pytest .mark .parametrize ("tp_size" , [1 , 2 ]) 
83+ @pytest .mark .parametrize ("dtype" , [torch .bfloat16 ]) 
6284@pytest .mark .parametrize ("num_tokens" , num_tokens_list ) 
63- def  test_mrope (model_name , tp_size , dtype , atol , rtol , num_tokens ):
85+ def  test_mrope (model_name : str , model_info : MRoPETestInfo , tp_size : int ,
86+                dtype : torch .dtype , num_tokens : int ):
87+ 
88+     atol  =  model_info .atol 
89+     rtol  =  model_info .rtol 
6490
6591    config  =  AutoConfig .from_pretrained (model_name )
92+     config  =  config .get_text_config ()
6693
6794    # get the model config 
6895    total_num_kv_heads  =  config .num_key_value_heads 
6996    total_num_heads  =  config .num_attention_heads 
7097    num_heads  =  total_num_heads  //  tp_size 
7198    num_kv_heads  =  max (1 , total_num_kv_heads  //  tp_size )
72-     head_dim  =  config .hidden_size  //  total_num_heads 
99+     head_dim  =  (config .head_dim  if  hasattr (config , "head_dim" ) else 
100+                 config .hidden_size  //  total_num_heads )
73101    is_neox_style  =  True 
74102
75103    rope_theta  =  config .rope_theta 
@@ -111,24 +139,30 @@ def test_mrope(model_name, tp_size, dtype, atol, rtol, num_tokens):
111139
112140@pytest .mark .skipif (not  current_platform .is_cuda_alike (), 
113141                    reason = "Skipping CUDA/ROCm only tests." ) 
114- @pytest .mark .parametrize ( 
115-     "model_name, tp_size" , 
116-     unroll_model_tp_dict ({ 
117-         "Qwen/Qwen2-VL-7B-Instruct" : [1 , 2 ], 
118-         "zai-org/GLM-4.1V-9B-Thinking" : [1 , 2 ] 
119-     })) 
120- @pytest .mark .parametrize ("dtype, atol, rtol" , dtype_atol_rtol_list ) 
121- @pytest .mark .parametrize ("num_tokens" , [4 ]) 
122- def  test_mrope_torch_compile_tracing (model_name , tp_size , dtype , atol , rtol ,
123-                                      num_tokens ):
142+ @pytest .mark .parametrize ("model_info, model_name" , [ 
143+     pytest .param (test_config , test_config .model_name , marks = test_config .marks ) 
144+     for  test_config  in  MODELS_TO_TEST  
145+ ]) 
146+ @pytest .mark .parametrize ("tp_size" , [1 , 2 ]) 
147+ @pytest .mark .parametrize ("dtype" , [torch .bfloat16 ]) 
148+ @pytest .mark .parametrize ("num_tokens" , num_tokens_list ) 
149+ def  test_mrope_torch_compile_tracing (model_name : str ,
150+                                      model_info : MRoPETestInfo , tp_size : int ,
151+                                      dtype : torch .dtype , num_tokens : int ):
152+ 
153+     atol  =  model_info .atol 
154+     rtol  =  model_info .rtol 
155+ 
124156    config  =  AutoConfig .from_pretrained (model_name )
157+     config  =  config .get_text_config ()
125158
126159    # get the model config 
127160    total_num_kv_heads  =  config .num_key_value_heads 
128161    total_num_heads  =  config .num_attention_heads 
129162    num_heads  =  total_num_heads  //  tp_size 
130163    num_kv_heads  =  max (1 , total_num_kv_heads  //  tp_size )
131-     head_dim  =  config .hidden_size  //  total_num_heads 
164+     head_dim  =  (config .head_dim  if  hasattr (config , "head_dim" ) else 
165+                 config .hidden_size  //  total_num_heads )
132166    is_neox_style  =  True 
133167    rope_theta  =  config .rope_theta 
134168    max_position  =  config .max_position_embeddings 
0 commit comments