Skip to content

Commit 6b7de1a

Browse files
authored
[ROCm] add support to ROCm 6.0 and MI300 (#2274)
1 parent 5265631 commit 6b7de1a

File tree

8 files changed

+96
-13
lines changed

8 files changed

+96
-13
lines changed

Dockerfile.rocm

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,24 @@
1-
FROM rocm/pytorch:rocm5.7_ubuntu22.04_py3.10_pytorch_2.0.1
1+
# default base image
2+
ARG BASE_IMAGE="rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1"
3+
4+
FROM $BASE_IMAGE
5+
6+
ARG BASE_IMAGE="rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1"
7+
8+
RUN echo "Base image is $BASE_IMAGE"
9+
10+
# BASE_IMAGE for ROCm_5.7: "rocm/pytorch:rocm5.7_ubuntu22.04_py3.10_pytorch_2.0.1"
11+
# BASE_IMAGE for ROCm_6.0: "rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1"
12+
13+
# this does not always work for all rocm versions
14+
RUN LLVM_GFX_ARCH=$(/opt/rocm/llvm/bin/amdgpu-offload-arch) && \
15+
echo "LLVM_GFX_ARCH is $LLVM_GFX_ARCH"
16+
17+
ARG FA_GFX_ARCHS="gfx90a;gfx942"
18+
RUN echo "FA_GFX_ARCHS is $FA_GFX_ARCHS"
19+
20+
ARG FA_BRANCH="3d2b6f5"
21+
RUN echo "FA_BRANCH is $FA_BRANCH"
222

323
# Install some basic utilities
424
RUN apt-get update && apt-get install python3 python3-pip -y
@@ -37,17 +57,23 @@ RUN mkdir libs \
3757
&& cd libs \
3858
&& git clone https://github.com/ROCmSoftwarePlatform/flash-attention.git \
3959
&& cd flash-attention \
40-
&& git checkout 3d2b6f5 \
60+
&& git checkout ${FA_BRANCH} \
4161
&& git submodule update --init \
42-
&& export GPU_ARCHS=$(/opt/rocm/llvm/bin/amdgpu-offload-arch) \
43-
&& patch /opt/conda/envs/py_3.10/lib/python3.10/site-packages/torch/utils/hipify/hipify_python.py hipify_patch.patch \
62+
&& export GPU_ARCHS=${FA_GFX_ARCHS} \
63+
&& if [ "$BASE_IMAGE" = "rocm/pytorch:rocm5.7_ubuntu22.04_py3.10_pytorch_2.0.1" ]; then \
64+
patch /opt/conda/envs/py_3.10/lib/python3.10/site-packages/torch/utils/hipify/hipify_python.py hipify_patch.patch; fi \
4465
&& python3 setup.py install \
4566
&& cd ..
4667

4768
COPY ./ /app/vllm
4869

4970
RUN python3 -m pip install --upgrade pip
50-
RUN pip install xformers==0.0.23 --no-deps
71+
RUN python3 -m pip install xformers==0.0.23 --no-deps
72+
73+
# Error related to odd state for numpy 1.20.3 where there is no METADATA etc, but an extra LICENSES_bundled.txt.
74+
# Manually removed it so that later steps of numpy upgrade can continue
75+
RUN if [ "$BASE_IMAGE" = "rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1" ]; then \
76+
rm -rf /opt/conda/envs/py_3.9/lib/python3.9/site-packages/numpy-1.20.3.dist-info/; fi
5177

5278
RUN cd /app \
5379
&& cd vllm \

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ Please register [here](https://lu.ma/ygxbpzhl) and join us!
2626
---
2727

2828
*Latest News* 🔥
29-
- [2023/12] Added ROCm support to vLLM.
29+
- [2024/01] Added ROCm 6.0 support to vLLM.
30+
- [2023/12] Added ROCm 5.7 support to vLLM.
3031
- [2023/10] We hosted [the first vLLM meetup](https://lu.ma/first-vllm-meetup) in SF! Please find the meetup slides [here](https://docs.google.com/presentation/d/1QL-XPFXiFpDBh86DbEegFXBXFXjix4v032GhShbKf3s/edit?usp=sharing).
3132
- [2023/09] We created our [Discord server](https://discord.gg/jz7wjKhh6g)! Join us to discuss vLLM and LLM serving! We will also post the latest announcements and updates there.
3233
- [2023/09] We released our [PagedAttention paper](https://arxiv.org/abs/2309.06180) on arXiv!

csrc/cuda_utils.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,6 @@
55
int get_device_attribute(
66
int attribute,
77
int device_id);
8+
9+
int get_max_shared_memory_per_block_device_attribute(
10+
int device_id);

csrc/cuda_utils_kernels.cu

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#ifdef USE_ROCM
22
#include <hip/hip_runtime.h>
3+
#include <hip/hip_runtime_api.h>
34
#endif
45
int get_device_attribute(
56
int attribute,
@@ -15,3 +16,20 @@ int get_device_attribute(
1516
cudaDeviceGetAttribute(&value, static_cast<cudaDeviceAttr>(attribute), device);
1617
return value;
1718
}
19+
20+
21+
int get_max_shared_memory_per_block_device_attribute(
22+
int device_id)
23+
{
24+
int attribute;
25+
// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html
26+
// cudaDevAttrMaxSharedMemoryPerBlockOptin = 97 if not is_hip() else 74
27+
28+
#ifdef USE_ROCM
29+
attribute = hipDeviceAttributeMaxSharedMemoryPerBlock;
30+
#else
31+
attribute = cudaDevAttrMaxSharedMemoryPerBlockOptin;
32+
#endif
33+
34+
return get_device_attribute(attribute, device_id);
35+
}

csrc/pybind.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,4 +81,10 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
8181
"get_device_attribute",
8282
&get_device_attribute,
8383
"Gets the specified device attribute.");
84+
85+
cuda_utils.def(
86+
"get_max_shared_memory_per_block_device_attribute",
87+
&get_max_shared_memory_per_block_device_attribute,
88+
"Gets the maximum shared memory per block device attribute.");
89+
8490
}

docs/source/getting_started/amd-installation.rst

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ Requirements
1111
------------
1212

1313
* OS: Linux
14-
* Python: 3.8 -- 3.11 (Verified on 3.10)
15-
* GPU: MI200s
14+
* Python: 3.8 -- 3.11
15+
* GPU: MI200s (gfx90a), MI300 (gfx942)
1616
* Pytorch 2.0.1/2.1.1/2.2
17-
* ROCm 5.7
17+
* ROCm 5.7 (Verified on python 3.10) or ROCm 6.0 (Verified on python 3.9)
1818

1919
Installation options:
2020

@@ -27,6 +27,8 @@ Installation options:
2727
(Recommended) Option 1: Quick start with vLLM pre-installed in Docker Image
2828
---------------------------------------------------------------------------
2929

30+
This option is for ROCm 5.7 only:
31+
3032
.. code-block:: console
3133
3234
$ docker pull embeddedllminfo/vllm-rocm:vllm-v0.2.4
@@ -50,6 +52,9 @@ Option 2: Build from source
5052

5153
You can build and install vLLM from source:
5254

55+
Below instruction is for ROCm 5.7 only.
56+
At the time of this documentation update, PyTorch on ROCm 6.0 wheel is not yet available on the PyTorch website.
57+
5358
0. Install prerequisites (skip if you are already in an environment/docker with the following installed):
5459

5560
- `ROCm <https://rocm.docs.amd.com/en/latest/deploy/linux/index.html>`_
@@ -95,6 +100,23 @@ You can build and install vLLM from source:
95100

96101
Build a docker image from `Dockerfile.rocm`, and launch a docker container.
97102

103+
The `Dokerfile.rocm` is designed to support both ROCm 5.7 and ROCm 6.0 and later versions. It provides flexibility to customize the build of docker image using the following arguments:
104+
105+
* `BASE_IMAGE`: specifies the base image used when running ``docker build``, specifically the PyTorch on ROCm base image. We have tested ROCm 5.7 and ROCm 6.0. The default is `rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1`
106+
* `FX_GFX_ARCHS`: specifies the GFX architecture that is used to build flash-attention, for example, `gfx90a;gfx942` for MI200 and MI300. The default is `gfx90a;gfx942`
107+
* `FA_BRANCH`: specifies the branch used to build the flash-attention in `ROCmSoftwarePlatform's flash-attention repo <https://github.com/ROCmSoftwarePlatform/flash-attention>`_. The default is `3d2b6f5`
108+
109+
Their values can be passed in when running ``docker build`` with ``--build-arg`` options.
110+
111+
For example, to build docker image for vllm on ROCm 5.7, you can run:
112+
113+
.. code-block:: console
114+
115+
$ docker build --build-arg BASE_IMAGE="rocm/pytorch:rocm5.7_ubuntu22.04_py3.10_pytorch_2.0.1" \
116+
-f Dockerfile.rocm -t vllm-rocm .
117+
118+
To build vllm on ROCm 6.0, you can use the default:
119+
98120
.. code-block:: console
99121
100122
$ docker build -f Dockerfile.rocm -t vllm-rocm .
@@ -142,3 +164,8 @@ Alternatively, if you plan to install vLLM-ROCm on a local machine or start from
142164
$ cd vllm
143165
$ pip install -U -r requirements-rocm.txt
144166
$ python setup.py install # This may take 5-10 minutes.
167+
168+
.. note::
169+
170+
- You may need to turn on the ``--enforce-eager`` flag if you experience process hang when running the `benchmark_thoughput.py` script to test your installation.
171+

setup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ def _is_cuda() -> bool:
5151
"Cannot find ROCM_HOME. ROCm must be available to build the package."
5252
)
5353
NVCC_FLAGS += ["-DUSE_ROCM"]
54+
NVCC_FLAGS += [f"-U__HIP_NO_HALF_CONVERSIONS__"]
55+
NVCC_FLAGS += [f"-U__HIP_NO_HALF_OPERATORS__"]
5456

5557
if _is_cuda() and CUDA_HOME is None:
5658
raise RuntimeError(

vllm/utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,10 @@ def get_max_shared_memory_bytes(gpu: int = 0) -> int:
112112
# the Neuron-X backend does not have the `cuda_utils` module.
113113
from vllm._C import cuda_utils
114114

115-
# https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html
116-
cudaDevAttrMaxSharedMemoryPerBlockOptin = 97 if not is_hip() else 74
117-
max_shared_mem = cuda_utils.get_device_attribute(
118-
cudaDevAttrMaxSharedMemoryPerBlockOptin, gpu)
115+
max_shared_mem = cuda_utils.get_max_shared_memory_per_block_device_attribute(
116+
gpu)
117+
# value 0 will cause MAX_SEQ_LEN become negative and test_attention.py will fail
118+
assert max_shared_mem > 0, "max_shared_mem can not be zero"
119119
return int(max_shared_mem)
120120

121121

0 commit comments

Comments
 (0)