-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile.rocm
189 lines (154 loc) · 6.05 KB
/
Dockerfile.rocm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# default base image
ARG BASE_IMAGE="rocm/pytorch:rocm6.1_ubuntu20.04_py3.9_pytorch_2.1.2"
FROM $BASE_IMAGE
USER root
# Import BASE_IMAGE arg from pre-FROM
ARG BASE_IMAGE
RUN echo "Base image is $BASE_IMAGE"
# Used as ARCHes for all components
ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
RUN echo "PYTORCH_ROCM_ARCH is $PYTORCH_ROCM_ARCH"
# Install some basic utilities
RUN apt-get update && apt-get install python3 python3-pip -
RUN apt-get update && apt-get install -y \
sqlite3 libsqlite3-dev libfmt-dev libmsgpack-dev libsuitesparse-dev
### Mount Point ###
# When launching the container, mount the code directory to /app
ARG APP_MOUNT=/app
VOLUME [ ${APP_MOUNT} ]
WORKDIR ${APP_MOUNT}
ARG BUILD_HIPBLASLT="1"
ARG HIPBLASLT_BRANCH="ee51a9d1"
RUN if [ "$BUILD_HIPBLASLT" = "1" ]; then \
echo "HIPBLASLT_BRANCH is $HIPBLASLT_BRANCH"; \
fi
# Build HipblasLt
RUN if [ "$BUILD_HIPBLASLT" = "1" ] ; then \
apt-get purge -y hipblaslt \
&& mkdir -p libs \
&& cd libs \
&& git clone https://github.com/ROCm/hipBLASLt \
&& cd hipBLASLt \
&& git checkout ${HIPBLASLT_BRANCH} \
&& SCCACHE_IDLE_TIMEOUT=1800 ./install.sh -i --architecture ${PYTORCH_ROCM_ARCH} \
&& cd .. && rm -rf hipBLASLt \
&& sed -i 's/, hipblaslt-dev \(.*\), hipcub-dev/, hipcub-dev/g' /var/lib/dpkg/status \
&& sed -i 's/, hipblaslt \(.*\), hipfft/, hipfft/g' /var/lib/dpkg/status \
&& cd ..; \
fi
RUN python3 -m pip install --upgrade pip && rm -rf /var/lib/apt/lists/*
ARG BUILD_RCCL="1"
ARG RCCL_BRANCH="eeea3b6"
RUN if [ "$BUILD_RCCL" = "1" ]; then \
echo "RCCL_BRANCH is $RCCL_BRANCH"; \
fi
# Install RCCL
RUN if [ "$BUILD_RCCL" = "1" ]; then \
mkdir -p libs \
&& cd libs \
&& git clone https://github.com/ROCm/rccl \
&& cd rccl \
&& git checkout ${RCCL_BRANCH} \
&& ./install.sh -i --amdgpu_targets ${PYTORCH_ROCM_ARCH} \
&& cd .. \
&& rm -r rccl \
&& cd ..; \
fi
ENV LLVM_SYMBOLIZER_PATH=/opt/rocm/llvm/bin/llvm-symbolizer
ENV PATH=$PATH:/opt/rocm/bin:/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/bin:
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib/:/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/lib:
ENV CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/include:/opt/conda/envs/py_3.9/lib/python3.9/site-packages/torch/include/torch/csrc/api/include/:/opt/rocm/include/:
# whether to build flash-attention
# if 0, will not build flash attention
# this is useful for gfx target where flash-attention is not supported
# In that case, we need to use the python reference attention implementation in vllm
ARG BUILD_FA="1"
ARG FA_BRANCH="ae7928c"
RUN if [ "$BUILD_FA" = "1" ]; then \
echo "FA_BRANCH is $FA_BRANCH"; \
fi
# Install ROCm flash-attention
RUN if [ "$BUILD_FA" = "1" ]; then \
mkdir -p libs \
&& cd libs \
&& git clone https://github.com/ROCm/flash-attention.git \
&& cd flash-attention \
&& git checkout ${FA_BRANCH} \
&& git submodule update --init \
&& GPU_ARCHS=${PYTORCH_ROCM_ARCH} python3 setup.py install \
&& cd .. \
&& rm -rf flash-attention \
&& cd ..; \
fi
# Error related to odd state for numpy 1.20.3 where there is no METADATA etc, but an extra LICENSES_bundled.txt.
# Manually removed it so that later steps of numpy upgrade can continue
RUN if [ "$BASE_IMAGE" = "rocm/pytorch:rocm6.1_ubuntu20.04_py3.9_pytorch_2.1.2" ]; then \
rm -rf /opt/conda/envs/py_3.9/lib/python3.9/site-packages/numpy-1.20.3.dist-info/; fi
# Whether to build CuPy. 0.3.3 <= vLLM < 0.4.0 might need it for HIPgraph.
ARG BUILD_CUPY="0"
ARG CUPY_BRANCH="hipgraph_enablement"
RUN if [ "$BUILD_CUPY" = "1" ]; then \
echo "CUPY_BRANCH is $CUPY_BRANCH"; \
fi
# Build cupy
RUN if [ "$BUILD_CUPY" = "1" ]; then \
mkdir -p libs \
&& cd libs \
&& git clone $CUPY_BRANCH --recursive https://github.com/ROCm/cupy.git \
&& cd cupy \
&& pip install mpi4py-mpich scipy==1.9.3 cython==0.29.* \
&& CC=$MPI_HOME/bin/mpicc python -m pip install mpi4py \
&& CUPY_INSTALL_USE_HIP=1 ROCM_HOME=/opt/rocm HCC_AMDGPU_TARGET=${PYTORCH_ROCM_ARCH} pip install . \
&& cd .. \
&& rm -rf cupy \
&& cd ..; \
fi
# whether to build triton on rocm
ARG BUILD_TRITON="1"
ARG TRITON_BRANCH="main"
RUN if [ "$BUILD_TRITON" = "1" ]; then \
echo "TRITON_BRANCH is $TRITON_BRANCH"; \
fi
# build triton
RUN if [ "$BUILD_TRITON" = "1" ]; then \
mkdir -p libs \
&& cd libs \
&& pip uninstall -y triton \
&& git clone https://github.com/OpenAI/triton.git \
&& cd triton \
&& git checkout ${TRITON_BRANCH} \
&& cd python \
&& pip install . \
&& cd ../.. \
&& rm -rf triton \
&& cd ..; \
fi
COPY ./ /app/vllm
# Fix HIP runtime on ROCm 6.1
RUN if [ "$BASE_IMAGE" = "rocm/pytorch:rocm6.1_ubuntu20.04_py3.9_pytorch_2.1.2" ]; then \
cp /app/vllm/rocm_patch/libamdhip64.so.6 /opt/rocm-6.1.0/lib/libamdhip64.so.6; fi
RUN python3 -m pip install --upgrade pip numba
RUN python3 -m pip install xformers==0.0.23 --no-deps
# Install vLLM
ARG VLLM_BUILD_MODE="install"
# developer might choose to use "develop" mode. But for end-users, we should do an install mode.
# the current "develop" mode has issues with ImportError: cannot import name '_custom_C' from 'vllm' (/app/vllm/vllm/__init__.py)
RUN cd /app \
&& cd vllm \
&& pip install -U -r requirements-rocm.txt \
&& if [ "$BUILD_FA" = "1" ]; then \
bash patch_xformers.rocm.sh; fi \
&& if [ "$BASE_IMAGE" = "rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1" ]; then \
patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h /app/vllm/rocm_patch/rocm_bf16.patch; fi \
&& python3 setup.py clean --all && python3 setup.py $VLLM_BUILD_MODE \
&& cd ..
# Install gradlib
RUN cd /app/vllm/gradlib \
&& pip install . \
&& cd ../..
# Update Ray to latest version + set environment variable to ensure it works on TP > 1
RUN python3 -m pip install --no-cache-dir 'ray[all]>=2.10.0'
ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
# HIPgraph performance environment variable.
ENV HIP_FORCE_DEV_KERNARG=1
CMD ["/bin/bash"]