1818from vllm .config .compilation import CompilationMode
1919from vllm .config .model import RunnerOption
2020from vllm .logger import init_logger
21+ from vllm .utils import is_torch_equal_or_newer
2122
2223from ..models .registry import HF_EXAMPLE_MODELS
2324from ..utils import compare_two_settings , create_new_process_for_each_test
@@ -159,6 +160,7 @@ def _compare_sp(
159160 runner : RunnerOption ,
160161 test_options : SPTestOptions ,
161162 num_gpus_available : int ,
163+ use_inductor_graph_partition : bool ,
162164 * ,
163165 method : Literal ["generate" , "encode" ],
164166 is_multimodal : bool ,
@@ -243,6 +245,7 @@ def _compare_sp(
243245 "enable_fusion" : enable_fusion ,
244246 "enable_noop" : True ,
245247 },
248+ "use_inductor_graph_partition" : use_inductor_graph_partition ,
246249 }
247250
248251 tp_sp_args = [
@@ -297,6 +300,7 @@ def _compare_sp(
297300 if model_id in SP_TEST_MODELS
298301 ],
299302)
303+ @pytest .mark .parametrize ("use_inductor_graph_partition" , [True , False ])
300304@create_new_process_for_each_test ()
301305def test_tp_sp_generation (
302306 model_id : str ,
@@ -305,14 +309,19 @@ def test_tp_sp_generation(
305309 runner : RunnerOption ,
306310 test_options : SPTestOptions ,
307311 num_gpus_available ,
312+ use_inductor_graph_partition : bool ,
308313):
314+ if use_inductor_graph_partition and not is_torch_equal_or_newer ("2.9.0.dev" ):
315+ pytest .skip ("inductor graph partition is only available in PyTorch 2.9+" )
316+
309317 _compare_sp (
310318 model_id ,
311319 parallel_setup ,
312320 distributed_backend ,
313321 runner ,
314322 test_options ,
315323 num_gpus_available ,
324+ use_inductor_graph_partition ,
316325 method = "generate" ,
317326 is_multimodal = False ,
318327 )
0 commit comments