From 38acae6e97897d78ddcb7b3d3608c2c076e1d623 Mon Sep 17 00:00:00 2001
From: Sage Moore <sage@neuralmagic.com>
Date: Thu, 27 Feb 2025 12:31:47 -0800
Subject: [PATCH] [ROCm] Fix the Kernels, Core, and Prefix Caching AMD CI
 groups (#13970)

Signed-off-by: Sage Moore <sage@neuralmagic.com>
---
 .buildkite/run-amd-test.sh                             |  4 +++-
 .../core/block/e2e/test_correctness_sliding_window.py  | 10 ++++++++++
 tests/prefix_caching/test_prefix_caching.py            | 10 ++++++++++
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/.buildkite/run-amd-test.sh b/.buildkite/run-amd-test.sh
index f8bf1c87603f4..35d2ba1f8bab4 100755
--- a/.buildkite/run-amd-test.sh
+++ b/.buildkite/run-amd-test.sh
@@ -92,7 +92,9 @@ if [[ $commands == *" kernels "* ]]; then
   --ignore=kernels/test_moe.py \
   --ignore=kernels/test_prefix_prefill.py \
   --ignore=kernels/test_rand.py \
-  --ignore=kernels/test_sampler.py"
+  --ignore=kernels/test_sampler.py \
+  --ignore=kernels/test_cascade_flash_attn.py \
+  --ignore=kernels/test_mamba_mixer2.py"
 fi
 
 #ignore certain Entrypoints tests
diff --git a/tests/core/block/e2e/test_correctness_sliding_window.py b/tests/core/block/e2e/test_correctness_sliding_window.py
index c874608e40a23..a7dafcf8be875 100644
--- a/tests/core/block/e2e/test_correctness_sliding_window.py
+++ b/tests/core/block/e2e/test_correctness_sliding_window.py
@@ -7,6 +7,7 @@
 
 from tests.kernels.utils import override_backend_env_variable
 from vllm import LLM, SamplingParams
+from vllm.platforms import current_platform
 
 from .conftest import get_text_from_llm_generator
 
@@ -42,6 +43,11 @@ def test_sliding_window_retrival(baseline_llm_generator, test_llm_generator,
 
     Additionally, we compare the results of the v1 and v2 managers.
     """
+    if backend == "FLASHINFER" and current_platform.is_rocm():
+        pytest.skip("Flashinfer does not support ROCm/HIP.")
+    if backend == "XFORMERS" and current_platform.is_rocm():
+        pytest.skip("Xformers does not support ROCm/HIP.")
+
     override_backend_env_variable(monkeypatch, backend)
 
     sampling_params = SamplingParams(
@@ -101,6 +107,10 @@ def test_sliding_window_chunked_prefill(test_llm_generator, batch_size, seed,
     The results with and without chunked prefill are not the same due to
     numerical instabilities.
     """
+    if backend == "FLASHINFER" and current_platform.is_rocm():
+        pytest.skip("Flashinfer does not support ROCm/HIP.")
+    if backend == "XFORMERS" and current_platform.is_rocm():
+        pytest.skip("Xformers does not support ROCm/HIP.")
     override_backend_env_variable(monkeypatch, backend)
 
     sampling_params = SamplingParams(
diff --git a/tests/prefix_caching/test_prefix_caching.py b/tests/prefix_caching/test_prefix_caching.py
index 2773d27a6813b..d7d84bdcf382a 100644
--- a/tests/prefix_caching/test_prefix_caching.py
+++ b/tests/prefix_caching/test_prefix_caching.py
@@ -12,6 +12,7 @@
 from vllm import SamplingParams, TokensPrompt
 from vllm.core.scheduler import Scheduler
 from vllm.engine.llm_engine import LLMEngine
+from vllm.platforms import current_platform
 
 from ..models.utils import check_outputs_equal
 
@@ -53,6 +54,10 @@ def test_mixed_requests(
     and the others don't. The cached position determines where
     the sequence is at among the batch of prefills.
     """
+    if backend == "FLASHINFER" and current_platform.is_rocm():
+        pytest.skip("Flashinfer does not support ROCm/HIP.")
+    if backend == "XFORMERS" and current_platform.is_rocm():
+        pytest.skip("Xformers does not support ROCm/HIP.")
     override_backend_env_variable(monkeypatch, backend)
 
     with hf_runner(model, dtype=dtype) as hf_model:
@@ -103,6 +108,11 @@ def test_unstable_prompt_sequence(
     backend: str,
     monkeypatch,
 ) -> None:
+
+    if backend == "FLASHINFER" and current_platform.is_rocm():
+        pytest.skip("Flashinfer does not support ROCm/HIP.")
+    if backend == "XFORMERS" and current_platform.is_rocm():
+        pytest.skip("Xformers does not support ROCm/HIP.")
     override_backend_env_variable(monkeypatch, backend)
 
     with vllm_runner(