Skip to content

Commit fabe89b

Browse files
authored
[Spec Decode] Don't fall back to V0 when spec decoding is enabled (#18265)
1 parent e73b7df commit fabe89b

File tree

1 file changed

+1
-9
lines changed

1 file changed

+1
-9
lines changed

vllm/engine/arg_utils.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1325,7 +1325,7 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
13251325
recommend_to_remove=False)
13261326
return False
13271327

1328-
# Only Ngram speculative decoding so far.
1328+
# V1 supports N-gram, Medusa, and Eagle speculative decoding.
13291329
is_ngram_enabled = False
13301330
is_eagle_enabled = False
13311331
is_medusa_enabled = False
@@ -1390,14 +1390,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
13901390
_raise_or_fallback(feature_name=name, recommend_to_remove=False)
13911391
return False
13921392

1393-
# ngram is supported on V1, but off by default for now.
1394-
if is_ngram_enabled and _warn_or_fallback("ngram"):
1395-
return False
1396-
1397-
# Eagle is under development, so we don't support it yet.
1398-
if is_eagle_enabled and _warn_or_fallback("Eagle"):
1399-
return False
1400-
14011393
# Non-[CUDA, TPU] may be supported on V1, but off by default for now.
14021394
v0_hardware = not any(
14031395
(current_platform.is_cuda(), current_platform.is_tpu()))

0 commit comments

Comments
 (0)