enable v1 by default

bigPYJ1151 · bigPYJ1151 · commit f7de05c899cd · 2025-06-03T08:29:49.000Z
Signed-off-by: jiang.li &lt;jiang1.li@intel.com&gt;
diff --git a/tests/models/language/generation/test_common.py b/tests/models/language/generation/test_common.py
@@ -86,7 +86,6 @@
         pytest.param("bigcode/starcoder2-3b"),  # starcoder2
         pytest.param(
             "TitanML/tiny-mixtral",  # mixtral
-            marks=[pytest.mark.cpu_model],
         )
     ])
 @pytest.mark.parametrize("max_tokens", [32])
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
@@ -1386,6 +1386,7 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
             "FLASHINFER",
             "FLASHINFER_VLLM_V1",
             "ROCM_AITER_MLA",
+            "TORCH_SDPA_VLLM_V1",
         ]
         if (envs.is_set("VLLM_ATTENTION_BACKEND")
                 and envs.VLLM_ATTENTION_BACKEND not in V1_BACKENDS):
@@ -1418,7 +1419,8 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
 
         # Non-[CUDA, TPU] may be supported on V1, but off by default for now.
         v0_hardware = not any(
-            (current_platform.is_cuda(), current_platform.is_tpu()))
+            (current_platform.is_cuda(), current_platform.is_tpu(),
+             current_platform.is_cpu()))
         if v0_hardware and _warn_or_fallback(  # noqa: SIM103
                 current_platform.device_name):
             return False

Original file line number	Diff line number	Diff line change
`@@ -86,7 +86,6 @@`
`86`	`86`	`pytest.param("bigcode/starcoder2-3b"), # starcoder2`
`87`	`87`	`pytest.param(`
`88`	`88`	`"TitanML/tiny-mixtral", # mixtral`
`89`		`- marks=[pytest.mark.cpu_model],`
`90`	`89`	`)`
`91`	`90`	`])`
`92`	`91`	`@pytest.mark.parametrize("max_tokens", [32])`