Skip to content

Commit f7de05c

Browse files
committed
enable v1 by default
Signed-off-by: jiang.li <jiang1.li@intel.com>
1 parent f44b619 commit f7de05c

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

tests/models/language/generation/test_common.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@
8686
pytest.param("bigcode/starcoder2-3b"), # starcoder2
8787
pytest.param(
8888
"TitanML/tiny-mixtral", # mixtral
89-
marks=[pytest.mark.cpu_model],
9089
)
9190
])
9291
@pytest.mark.parametrize("max_tokens", [32])

vllm/engine/arg_utils.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1386,6 +1386,7 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
13861386
"FLASHINFER",
13871387
"FLASHINFER_VLLM_V1",
13881388
"ROCM_AITER_MLA",
1389+
"TORCH_SDPA_VLLM_V1",
13891390
]
13901391
if (envs.is_set("VLLM_ATTENTION_BACKEND")
13911392
and envs.VLLM_ATTENTION_BACKEND not in V1_BACKENDS):
@@ -1418,7 +1419,8 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
14181419

14191420
# Non-[CUDA, TPU] may be supported on V1, but off by default for now.
14201421
v0_hardware = not any(
1421-
(current_platform.is_cuda(), current_platform.is_tpu()))
1422+
(current_platform.is_cuda(), current_platform.is_tpu(),
1423+
current_platform.is_cpu()))
14221424
if v0_hardware and _warn_or_fallback( # noqa: SIM103
14231425
current_platform.device_name):
14241426
return False

0 commit comments

Comments
 (0)