Skip to content

Commit 50b788a

Browse files
authored
[CI/Build] Fix AMD CI: test_cpu_gpu.py (#27388)
Signed-off-by: zhewenli <zhewenli@meta.com>
1 parent fc059c7 commit 50b788a

File tree

1 file changed

+13
-4
lines changed

1 file changed

+13
-4
lines changed

tests/v1/kv_offload/test_cpu_gpu.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,20 @@
88

99
from vllm.platforms import current_platform
1010
from vllm.v1.attention.backends.flash_attn import FlashAttentionBackend
11-
from vllm.v1.attention.backends.flashinfer import FlashInferBackend
12-
from vllm.v1.attention.backends.mla.flashattn_mla import FlashAttnMLABackend
1311
from vllm.v1.kv_offload.mediums import CPULoadStoreSpec, GPULoadStoreSpec
1412
from vllm.v1.kv_offload.worker.cpu_gpu import CpuGpuOffloadingHandler
1513

14+
BACKENDS_TO_TEST = [FlashAttentionBackend]
15+
16+
if not current_platform.is_rocm():
17+
from vllm.v1.attention.backends.flashinfer import FlashInferBackend
18+
19+
BACKENDS_TO_TEST.append(FlashInferBackend)
20+
21+
from vllm.v1.attention.backends.mla.flashattn_mla import FlashAttnMLABackend
22+
23+
BACKENDS_TO_TEST.append(FlashAttnMLABackend)
24+
1625
NUM_GPU_BLOCKS = [64]
1726
NUM_CPU_BLOCKS = [256]
1827
GPU_BLOCK_SIZES = [16]
@@ -55,8 +64,8 @@ def test_transfer(
5564
) -> None:
5665
current_platform.seed_everything(seed)
5766

58-
# create per-layer GPU KV caches
59-
attn_backends_list = [FlashAttentionBackend, FlashInferBackend, FlashAttnMLABackend]
67+
# create per-layer GPU KV caches based on available attn_backends
68+
attn_backends_list = BACKENDS_TO_TEST
6069

6170
gpu_caches = {}
6271
attn_backends = {}

0 commit comments

Comments
 (0)