[Spec Decode] Don't fall back to V0 when spec decoding is enabled (#18265)

WoosukKwon · web-flow · commit fabe89bbc41b · 2025-05-16T16:10:27.000-07:00
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
@@ -1325,7 +1325,7 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
                                recommend_to_remove=False)
             return False
 
-        # Only Ngram speculative decoding so far.
+        # V1 supports N-gram, Medusa, and Eagle speculative decoding.
         is_ngram_enabled = False
         is_eagle_enabled = False
         is_medusa_enabled = False
@@ -1390,14 +1390,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
             _raise_or_fallback(feature_name=name, recommend_to_remove=False)
             return False
 
-        # ngram is supported on V1, but off by default for now.
-        if is_ngram_enabled and _warn_or_fallback("ngram"):
-            return False
-
-        # Eagle is under development, so we don't support it yet.
-        if is_eagle_enabled and _warn_or_fallback("Eagle"):
-            return False
-
         # Non-[CUDA, TPU] may be supported on V1, but off by default for now.
         v0_hardware = not any(
             (current_platform.is_cuda(), current_platform.is_tpu()))