raise error to client when pre-request seed is set

NickLucche · NickLucche · commit 1025f938ef9c · 2025-04-08T07:31:54.000Z
Signed-off-by: NickLucche &lt;nlucches@redhat.com&gt;
diff --git a/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh b/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh
@@ -42,4 +42,4 @@ docker run --privileged --net host --shm-size=16G -it \
     && echo TEST_9 \
     && pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py \
     && echo TEST_10 \
-    && pytest -s -v /workspace/vllm/tests/tpu/test_custom_dispatcher.py" \
+    && pytest -s -v /workspace/vllm/tests/tpu/test_custom_dispatcher.py" \
diff --git a/tests/v1/tpu/test_sampler.py b/tests/v1/tpu/test_sampler.py
@@ -28,12 +28,14 @@ def test_sampler_different(model_name: str):
     prompts = [
         "Write a short story about a robot that dreams for the first time."
     ]
-    sampling_params = SamplingParams(temperature=0.9,
-                                     min_p=0.2,
-                                     max_tokens=64,
-                                     seed=42)
+    sampling_params = SamplingParams(temperature=0.9, min_p=0.2, max_tokens=64)
     output = llm.generate(prompts, sampling_params)
 
     sampling_params = SamplingParams(temperature=0.1, min_p=0.8, max_tokens=64)
     output2 = llm.generate(prompts, sampling_params)
     assert output[0].outputs[0].text != output2[0].outputs[0].text
+
+    with pytest.raises(ValueError):
+        # Unsupported `seed` param.
+        sampling_params = SamplingParams(temperature=0.3, seed=42)
+        output2 = llm.generate(prompts, sampling_params)
diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py
@@ -13,9 +13,10 @@
                              MultiModalRegistry)
 from vllm.multimodal.inputs import PlaceholderRange
 from vllm.multimodal.utils import merge_and_sort_multimodal_metadata
+from vllm.platforms import current_platform
 from vllm.pooling_params import PoolingParams
 from vllm.prompt_adapter.request import PromptAdapterRequest
-from vllm.sampling_params import SamplingParams
+from vllm.sampling_params import SamplingParams, SamplingType
 from vllm.transformers_utils.tokenizer_group import BaseTokenizerGroup
 from vllm.v1.engine import EngineCoreRequest
 from vllm.v1.structured_output.backend_guidance import (
@@ -73,6 +74,9 @@ def _validate_sampling_params(
         params: SamplingParams,
     ) -> None:
         self._validate_structured_output(params)
+        if (current_platform.is_tpu()
+                and params.sampling_type == SamplingType.RANDOM_SEED):
+            raise ValueError("Torch XLA does not support per-request seed.")
 
         if params.allowed_token_ids is None:
             return
diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py
@@ -23,7 +23,6 @@
 from vllm.multimodal import MULTIMODAL_REGISTRY
 from vllm.multimodal.inputs import MultiModalKwargs, PlaceholderRange
 from vllm.multimodal.utils import group_mm_inputs_by_modality
-from vllm.sampling_params import SamplingType
 from vllm.sequence import IntermediateTensors
 from vllm.utils import LayerBlockType, cdiv, is_pin_memory_available
 from vllm.v1.attention.backends.pallas import (PallasAttentionBackend,
@@ -265,9 +264,6 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> bool:
         for new_req_data in scheduler_output.scheduled_new_reqs:
             req_id = new_req_data.req_id
             sampling_params = new_req_data.sampling_params
-            if sampling_params.sampling_type == SamplingType.RANDOM_SEED:
-                logger.warning("Torch XLA does not support per-request seed."
-                               "Seed {sampling_params.seed} will be ignored")
 
             self.requests[req_id] = CachedRequestState(
                 req_id=req_id,