Update new chunked prefill distributed test to include non-Ray

njhill · njhill · commit 56a1ad439350 · 2024-04-15T13:16:08.000-07:00
diff --git a/tests/distributed/test_chunked_prefill_distributed.py b/tests/distributed/test_chunked_prefill_distributed.py
@@ -27,6 +27,7 @@
 @pytest.mark.parametrize("dtype", ["half"])
 @pytest.mark.parametrize("max_tokens", [5])
 @pytest.mark.parametrize("chunked_prefill_token_size", [16])
+@pytest.mark.parametrize("worker_use_ray", [False, True])
 def test_models(
     hf_runner,
     vllm_runner,
@@ -35,6 +36,7 @@ def test_models(
     dtype: str,
     max_tokens: int,
     chunked_prefill_token_size: int,
+    worker_use_ray: bool,
 ) -> None:
     # Add a chunked prefill config.
     max_num_seqs = min(chunked_prefill_token_size, 256)
@@ -53,6 +55,7 @@ def test_models(
         max_num_seqs=max_num_seqs,
         enable_chunked_prefill=enable_chunked_prefill,
         max_num_batched_tokens=max_num_batched_tokens,
+        worker_use_ray=worker_use_ray,
     )
     vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
     del vllm_model