diff --git a/Dockerfile.rocm b/Dockerfile.rocm index 3c76305303037..f49b321372ed0 100644 --- a/Dockerfile.rocm +++ b/Dockerfile.rocm @@ -76,6 +76,7 @@ RUN cd /app \ && cd vllm \ && pip install -U -r requirements-rocm.txt \ && bash patch_xformers.rocm.sh \ + && patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h /app/vllm/rocm_patch/rocm_bf16.patch \ && python3 setup.py install \ && cd .. diff --git a/rocm_patch/rocm_bf16.patch b/rocm_patch/rocm_bf16.patch new file mode 100644 index 0000000000000..a0f07da2a3e2b --- /dev/null +++ b/rocm_patch/rocm_bf16.patch @@ -0,0 +1,15 @@ +--- amd_hip_bf16.h 2024-02-06 18:28:58.268699142 +0000 ++++ amd_hip_bf16.h.new 2024-02-06 18:28:31.988647133 +0000 +@@ -90,10 +90,10 @@ + #include "math_fwd.h" // ocml device functions + + #if defined(__HIPCC_RTC__) +-#define __HOST_DEVICE__ __device__ ++#define __HOST_DEVICE__ __device__ static + #else + #include +-#define __HOST_DEVICE__ __host__ __device__ ++#define __HOST_DEVICE__ __host__ __device__ static inline + #endif + + // Since we are using unsigned short to represent data in bfloat16, it can be of different sizes on