From a2327faf05c21a2bff26b05315625fafa46d8085 Mon Sep 17 00:00:00 2001
From: Thomas Parnell <tpa@zurich.ibm.com>
Date: Fri, 28 Jun 2024 16:42:17 +0200
Subject: [PATCH] [Bugfix] Better error message for MLPSpeculator when
 `num_speculative_tokens` is set too high (#5894)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
---
 vllm/config.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index 31d30cfa73d1f..05bc570626a44 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -956,9 +956,9 @@ def maybe_create_spec_config(
                     # Verify provided value doesn't exceed the maximum
                     # supported by the draft model.
                     raise ValueError(
-                        "Expected both speculative_model and "
-                        "num_speculative_tokens to be provided, but found "
-                        f"{speculative_model=} and {num_speculative_tokens=}.")
+                        "This speculative model supports a maximum of "
+                        f"num_speculative_tokens={n_predict}, but "
+                        f"{num_speculative_tokens=} was provided.")
 
             draft_model_config.max_model_len = (
                 SpeculativeConfig._maybe_override_draft_max_model_len(