From 38acae6e97897d78ddcb7b3d3608c2c076e1d623 Mon Sep 17 00:00:00 2001 From: Sage Moore Date: Thu, 27 Feb 2025 12:31:47 -0800 Subject: [PATCH] [ROCm] Fix the Kernels, Core, and Prefix Caching AMD CI groups (#13970) Signed-off-by: Sage Moore --- .buildkite/run-amd-test.sh | 4 +++- .../core/block/e2e/test_correctness_sliding_window.py | 10 ++++++++++ tests/prefix_caching/test_prefix_caching.py | 10 ++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/.buildkite/run-amd-test.sh b/.buildkite/run-amd-test.sh index f8bf1c87603f4..35d2ba1f8bab4 100755 --- a/.buildkite/run-amd-test.sh +++ b/.buildkite/run-amd-test.sh @@ -92,7 +92,9 @@ if [[ $commands == *" kernels "* ]]; then --ignore=kernels/test_moe.py \ --ignore=kernels/test_prefix_prefill.py \ --ignore=kernels/test_rand.py \ - --ignore=kernels/test_sampler.py" + --ignore=kernels/test_sampler.py \ + --ignore=kernels/test_cascade_flash_attn.py \ + --ignore=kernels/test_mamba_mixer2.py" fi #ignore certain Entrypoints tests diff --git a/tests/core/block/e2e/test_correctness_sliding_window.py b/tests/core/block/e2e/test_correctness_sliding_window.py index c874608e40a23..a7dafcf8be875 100644 --- a/tests/core/block/e2e/test_correctness_sliding_window.py +++ b/tests/core/block/e2e/test_correctness_sliding_window.py @@ -7,6 +7,7 @@ from tests.kernels.utils import override_backend_env_variable from vllm import LLM, SamplingParams +from vllm.platforms import current_platform from .conftest import get_text_from_llm_generator @@ -42,6 +43,11 @@ def test_sliding_window_retrival(baseline_llm_generator, test_llm_generator, Additionally, we compare the results of the v1 and v2 managers. """ + if backend == "FLASHINFER" and current_platform.is_rocm(): + pytest.skip("Flashinfer does not support ROCm/HIP.") + if backend == "XFORMERS" and current_platform.is_rocm(): + pytest.skip("Xformers does not support ROCm/HIP.") + override_backend_env_variable(monkeypatch, backend) sampling_params = SamplingParams( @@ -101,6 +107,10 @@ def test_sliding_window_chunked_prefill(test_llm_generator, batch_size, seed, The results with and without chunked prefill are not the same due to numerical instabilities. """ + if backend == "FLASHINFER" and current_platform.is_rocm(): + pytest.skip("Flashinfer does not support ROCm/HIP.") + if backend == "XFORMERS" and current_platform.is_rocm(): + pytest.skip("Xformers does not support ROCm/HIP.") override_backend_env_variable(monkeypatch, backend) sampling_params = SamplingParams( diff --git a/tests/prefix_caching/test_prefix_caching.py b/tests/prefix_caching/test_prefix_caching.py index 2773d27a6813b..d7d84bdcf382a 100644 --- a/tests/prefix_caching/test_prefix_caching.py +++ b/tests/prefix_caching/test_prefix_caching.py @@ -12,6 +12,7 @@ from vllm import SamplingParams, TokensPrompt from vllm.core.scheduler import Scheduler from vllm.engine.llm_engine import LLMEngine +from vllm.platforms import current_platform from ..models.utils import check_outputs_equal @@ -53,6 +54,10 @@ def test_mixed_requests( and the others don't. The cached position determines where the sequence is at among the batch of prefills. """ + if backend == "FLASHINFER" and current_platform.is_rocm(): + pytest.skip("Flashinfer does not support ROCm/HIP.") + if backend == "XFORMERS" and current_platform.is_rocm(): + pytest.skip("Xformers does not support ROCm/HIP.") override_backend_env_variable(monkeypatch, backend) with hf_runner(model, dtype=dtype) as hf_model: @@ -103,6 +108,11 @@ def test_unstable_prompt_sequence( backend: str, monkeypatch, ) -> None: + + if backend == "FLASHINFER" and current_platform.is_rocm(): + pytest.skip("Flashinfer does not support ROCm/HIP.") + if backend == "XFORMERS" and current_platform.is_rocm(): + pytest.skip("Xformers does not support ROCm/HIP.") override_backend_env_variable(monkeypatch, backend) with vllm_runner(