Skip to content
This repository was archived by the owner on Oct 11, 2024. It is now read-only.

Commit cb46cfe

Browse files
hongxiayangRobert Shaw
authored andcommitted
[Bugfix][CI/Build][AMD][ROCm]Fixed the cmake build bug which generate garbage on certain devices (vllm-project#5641)
1 parent e4d2b6e commit cb46cfe

File tree

2 files changed

+12
-10
lines changed

2 files changed

+12
-10
lines changed

Dockerfile.rocm

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,8 @@ ARG BASE_IMAGE="rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1"
77

88
RUN echo "Base image is $BASE_IMAGE"
99

10-
# BASE_IMAGE for ROCm_5.7: "rocm/pytorch:rocm5.7_ubuntu22.04_py3.10_pytorch_2.0.1"
11-
# BASE_IMAGE for ROCm_6.0: "rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1"
12-
10+
ARG ROCm_5_7_BASE="rocm/pytorch:rocm5.7_ubuntu22.04_py3.10_pytorch_2.0.1" \
11+
ROCm_6_0_BASE="rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1"
1312

1413
ARG FA_GFX_ARCHS="gfx90a;gfx942"
1514
RUN echo "FA_GFX_ARCHS is $FA_GFX_ARCHS"
@@ -68,15 +67,15 @@ RUN if [ "$BUILD_FA" = "1" ]; then \
6867
&& git checkout ${FA_BRANCH} \
6968
&& git submodule update --init \
7069
&& export GPU_ARCHS=${FA_GFX_ARCHS} \
71-
&& if [ "$BASE_IMAGE" = "rocm/pytorch:rocm5.7_ubuntu22.04_py3.10_pytorch_2.0.1" ]; then \
70+
&& if [ "$BASE_IMAGE" = "$ROCm_5_7_BASE" ]; then \
7271
patch /opt/conda/envs/py_3.10/lib/python3.10/site-packages/torch/utils/hipify/hipify_python.py hipify_patch.patch; fi \
7372
&& python3 setup.py install \
7473
&& cd ..; \
7574
fi
7675

7776
# Error related to odd state for numpy 1.20.3 where there is no METADATA etc, but an extra LICENSES_bundled.txt.
7877
# Manually removed it so that later steps of numpy upgrade can continue
79-
RUN if [ "$BASE_IMAGE" = "rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1" ]; then \
78+
RUN if [ "$BASE_IMAGE" = "$ROCm_6_0_BASE" ]; then \
8079
rm -rf /opt/conda/envs/py_3.9/lib/python3.9/site-packages/numpy-1.20.3.dist-info/; fi
8180

8281
# build triton
@@ -107,11 +106,11 @@ ENV CCACHE_DIR=/root/.cache/ccache
107106
RUN --mount=type=cache,target=/root/.cache/ccache \
108107
--mount=type=cache,target=/root/.cache/pip \
109108
pip install -U -r requirements-rocm.txt \
110-
&& patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h ./rocm_patch/rocm_bf16.patch \
109+
&& if [ "$BASE_IMAGE" = "$ROCm_6_0_BASE" ]; then \
110+
patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h ./rocm_patch/rocm_bf16.patch; fi \
111111
&& python3 setup.py install \
112-
&& cp build/lib.linux-x86_64-cpython-39/vllm/_C.abi3.so vllm/ \
113-
&& cp build/lib.linux-x86_64-cpython-39/vllm/_punica_C.abi3.so vllm/ \
114-
&& cp build/lib.linux-x86_64-cpython-39/vllm/_moe_C.abi3.so vllm/ \
112+
&& export VLLM_PYTHON_VERSION=$(python -c "import sys; print(str(sys.version_info.major) + str(sys.version_info.minor))") \
113+
&& cp build/lib.linux-x86_64-cpython-${VLLM_PYTHON_VERSION}/vllm/*.so vllm/ \
115114
&& cd ..
116115

117116

cmake/utils.cmake

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,11 @@ macro(override_gpu_arches GPU_ARCHES GPU_LANG GPU_SUPPORTED_ARCHES)
155155
# Find the intersection of the supported + detected architectures to
156156
# set the module architecture flags.
157157
#
158+
159+
set(VLLM_ROCM_SUPPORTED_ARCHS "gfx908;gfx90a;gfx942;gfx1100")
160+
158161
set(${GPU_ARCHES})
159-
foreach (_ARCH ${CMAKE_HIP_ARCHITECTURES})
162+
foreach (_ARCH ${VLLM_ROCM_SUPPORTED_ARCHS})
160163
if (_ARCH IN_LIST _GPU_SUPPORTED_ARCHES_LIST)
161164
list(APPEND ${GPU_ARCHES} ${_ARCH})
162165
endif()

0 commit comments

Comments
 (0)