Fix ROCm tests

vllm-project · simon-mo · Jul 20, 2024 · Jul 18, 2024 · Jul 18, 2024 · Jul 19, 2024
commit 93af20a16077d693da4a00b2f48658e24a1da331
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
@@ -44,7 +44,8 @@ steps:
   mirror_hardwares: [amd]
   fast_check: true
   commands:
-  - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp310-cp310-linux_x86_64.whl
+  # This flashinfer installation will fail on AMD ROCm, so it is set as optional.
+  - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp310-cp310-linux_x86_64.whl || true
   - pytest -v -s basic_correctness/test_basic_correctness.py
   - pytest -v -s basic_correctness/test_cpu_offload.py
   - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py

diff --git a/Dockerfile.rocm b/Dockerfile.rocm
@@ -43,7 +43,7 @@ RUN apt-get update && apt-get install -y \
 ARG APP_MOUNT=/vllm-workspace
 WORKDIR ${APP_MOUNT}
 
-RUN pip install --upgrade pip
+RUN python3 -m pip install --upgrade pip
 # Remove sccache so it doesn't interfere with ccache
 # TODO: implement sccache support across components
 RUN apt-get purge -y sccache; pip uninstall -y sccache; rm -f "$(which sccache)"
@@ -137,7 +137,7 @@ ENV TOKENIZERS_PARALLELISM=false
 
 RUN --mount=type=cache,target=${CCACHE_DIR} \
     --mount=type=cache,target=/root/.cache/pip \
-    pip install -U -r requirements-rocm.txt \
+    pip install -Ur requirements-rocm.txt \
     && case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \
         *"rocm-6.1"*) \
             # Bring in upgrades to HIP graph earlier than ROCm 6.2 for vLLM

diff --git a/requirements-rocm.txt b/requirements-rocm.txt
@@ -2,5 +2,9 @@
 -r requirements-common.txt
 
 # Dependencies for AMD GPUs
+awscli
+boto3
+botocore
 ray >= 2.10.0
+peft
 pytest-asyncio
diff --git a/vllm/spec_decode/draft_model_runner.py b/vllm/spec_decode/draft_model_runner.py
@@ -3,7 +3,14 @@
 import torch
 
 from vllm import _custom_ops as ops
-from vllm.attention.backends.flash_attn import FlashAttentionMetadata
+
+try:
+    from vllm.attention.backends.flash_attn import FlashAttentionMetadata
+except ModuleNotFoundError:
+    # vllm_flash_attn is not installed, use the identical ROCm FA metadata
+    from vllm.attention.backends.rocm_flash_attn import (
+        ROCmFlashAttentionMetadata as FlashAttentionMetadata)
+
 from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
                          ModelConfig, MultiModalConfig, ParallelConfig,
                          PromptAdapterConfig, SchedulerConfig)