Skip to content

Commit

Permalink
[XPU] support XRE 5.0 (PaddlePaddle#64632)
Browse files Browse the repository at this point in the history
* support xre5

* bug fix

* support xre 5.0

* bug fix

* support xpu2

* support xre 4 and 5

* bug fix

* revert check scripts

* reine codegen cmake

* refine check scripts

* bug fix

* bug fix

* add paddle.version.xpu back
  • Loading branch information
lj970926 authored Jun 11, 2024
1 parent 5a53c68 commit 3733511
Show file tree
Hide file tree
Showing 10 changed files with 246 additions and 232 deletions.
17 changes: 8 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,7 @@ option(WITH_XPU "Compile PaddlePaddle with BAIDU KUNLUN XPU" OFF)
option(WITH_XPU_KP "Compile PaddlePaddle with BAIDU XPU compiler " OFF)
option(WITH_XPU_XFT "Compile PaddlePaddle with BAIDU XPU-XFT" OFF)
option(WITH_XPU_PLUGIN "Compile PaddlePaddle with BAIDU XPU plugin" OFF)
option(WITH_XPU_XHPC "Compile PaddlePaddle with BAIDU XPU-HPC library"
${WITH_XPU})
option(WITH_XPU_XRE5 "Compile PaddlePaddle with BAIDU XPU XRE 5" OFF)
option(WITH_WIN_DUMP_DBG "Compile with windows core dump debug mode" OFF)
option(WITH_ROCM "Compile PaddlePaddle with ROCM platform" OFF)
option(WITH_IPU "Compile PaddlePaddle with Graphcore IPU" OFF)
Expand Down Expand Up @@ -91,8 +90,8 @@ endif()
if(WITH_GPU AND WITH_XPU_XFT)
message(FATAL_ERROR "Error when compile GPU and XPU-XFT at the same time")
endif()
if(WITH_GPU AND WITH_XPU_XHPC)
message(FATAL_ERROR "Error when compile GPU and XPU-HPC at the same time")
if(WITH_GPU AND WITH_XPU_XRE5)
message(FATAL_ERROR "Error when compile GPU and XPU-XRE5 at the same time")
endif()
if(WITH_GPU AND WITH_ROCM)
message(FATAL_ERROR "Error when compile CUDA and ROCM at the same time")
Expand Down Expand Up @@ -468,12 +467,12 @@ if(NOT WITH_XPU AND WITH_XPU_BKCL)
CACHE STRING "Disable BKCL when compiling without XPU" FORCE)
endif()

if(NOT WITH_XPU AND WITH_XPU_XHPC)
message(
WARNING "Disable XHPC when compiling without XPU. Force WITH_XPU_XHPC=OFF.")
set(WITH_XPU_XHPC
if(NOT WITH_XPU AND WITH_XPU_XRE5)
message((WARNING
"Disable XRE5 when compiling without XPU. Force WITH_XPU_XRE5=OFF"))
set(WITH_XPU_XRE5
OFF
CACHE STRING "Disable XHPC when compiling without XPU" FORCE)
CACHE STRING "Disable XRE5 when compiling without XPU" FORCE)
endif()

if(WITH_NCCL)
Expand Down
108 changes: 48 additions & 60 deletions cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,14 @@ include(ExternalProject)
set(XPU_PROJECT "extern_xpu")
set(XPU_API_LIB_NAME "libxpuapi.so")
set(XPU_RT_LIB_NAME "libxpurt.so")
set(XPU_CUDA_LIB_NAME "libxpucuda.so")
set(XPU_XFT_LIB_NAME "libxft.so")
set(XPU_XPTI_LIB_NAME "libxpti.so")
set(XPU_XBLAS_LIB_NAME "libxpu_blas.so")
set(XPU_XFA_LIB_NAME "libxpu_flash_attention.so")

if(NOT DEFINED XPU_BASE_DATE)
set(XPU_BASE_DATE "20240104")
endif()
if(NOT DEFINED XPU_XDNN_BASE_DATE)
set(XPU_XDNN_BASE_DATE "20240327")
if(NOT DEFINED XPU_XRE_BASE_VERSION)
set(XPU_XRE_BASE_VERSION "4.32.0.1")
endif()
if(NOT DEFINED XPU_XHPC_BASE_DATE)
set(XPU_XHPC_BASE_DATE "20240515")
Expand All @@ -40,16 +38,8 @@ if(NOT DEFINED XPU_XFT_BASE_VERSION)
endif()
set(XPU_XPTI_BASE_VERSION "0.0.1")

if(NOT DEFINED XPU_BASE_URL)
set(XPU_BASE_URL_WITHOUT_DATE
"https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev")
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/${XPU_BASE_DATE}")
else()
set(XPU_BASE_URL "${XPU_BASE_URL}")
endif()

set(XPU_XDNN_BASE_URL
"https://klx-sdk-release-public.su.bcebos.com/xdnn/stable/${XPU_XDNN_BASE_DATE}"
set(XPU_XRE_BASE_URL
"https://klx-sdk-release-public.su.bcebos.com/xre/release/${XPU_XRE_BASE_VERSION}"
)

set(XPU_XCCL_BASE_URL
Expand All @@ -66,6 +56,14 @@ set(XPU_XPTI_BASE_URL
"https://klx-sdk-release-public.su.bcebos.com/xpti/dev/${XPU_XPTI_BASE_VERSION}"
)

if(WITH_XPU_XRE5)
set(XPU_XRE_BASE_VERSION "5.0.3.1")
set(XPU_XHPC_BASE_DATE "20240601")
set(XPU_XRE_BASE_URL
"https://klx-sdk-release-public.su.bcebos.com/xre/kl3-release/${XPU_XRE_BASE_VERSION}"
)
endif()

if(WITH_XCCL_RDMA)
set(XPU_XCCL_PREFIX "xccl_rdma")
else()
Expand All @@ -74,45 +72,42 @@ endif()

if(WITH_AARCH64)
set(XPU_XRE_DIR_NAME "xre-kylin_aarch64")
set(XPU_XDNN_DIR_NAME "xdnn-kylin_aarch64")
set(XPU_XCCL_DIR_NAME "${XPU_XCCL_PREFIX}-kylin_aarch64")
set(XPU_XFT_DIR_NAME "") # TODO: xft has no kylin output at now.
elseif(WITH_SUNWAY)
set(XPU_XRE_DIR_NAME "xre-deepin_sw6_64")
set(XPU_XDNN_DIR_NAME "xdnn-deepin_sw6_64")
set(XPU_XCCL_DIR_NAME "") # TODO: xccl has no deepin output at now.
set(XPU_XFT_DIR_NAME "") # TODO: xft has no deepin output at now.
elseif(WITH_BDCENTOS)
set(XPU_XRE_DIR_NAME "xre-bdcentos_x86_64")
set(XPU_XDNN_DIR_NAME "xdnn-bdcentos_x86_64")
if(WITH_XPU_XRE5)
set(XPU_XRE_DIR_NAME "xre-bdcentos-x86_64-${XPU_XRE_BASE_VERSION}")
set(XPU_XHPC_DIR_NAME "xhpc-bdcentos7_x86_64")
else()
set(XPU_XRE_DIR_NAME "xre-bdcentos_x86_64")
set(XPU_XHPC_DIR_NAME "xhpc-bdcentos_x86_64")
endif()
set(XPU_XCCL_DIR_NAME "${XPU_XCCL_PREFIX}-bdcentos_x86_64")
set(XPU_XFT_DIR_NAME "xft_bdcentos6u3_x86_64_gcc82")
set(XPU_XHPC_DIR_NAME "xhpc-bdcentos_x86_64")
elseif(WITH_UBUNTU)
set(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64")
set(XPU_XDNN_DIR_NAME "xdnn-ubuntu_x86_64")
set(XPU_XCCL_DIR_NAME "${XPU_XCCL_PREFIX}-ubuntu_x86_64")
set(XPU_XFT_DIR_NAME "xft_ubuntu1604_x86_64")
set(XPU_XHPC_DIR_NAME "xhpc-ubuntu_x86_64")
elseif(WITH_CENTOS)
set(XPU_XRE_DIR_NAME "xre-centos7_x86_64")
set(XPU_XDNN_DIR_NAME "xdnn-centos7_x86_64")
set(XPU_XCCL_DIR_NAME "${XPU_XCCL_PREFIX}-bdcentos_x86_64")
set(XPU_XFT_DIR_NAME "xft_bdcentos6u3_x86_64_gcc82")
else()
set(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64")
set(XPU_XDNN_DIR_NAME "xdnn-ubuntu_x86_64")
# Ubuntu as default
if(WITH_XPU_XRE5)
set(XPU_XRE_DIR_NAME "xre-ubuntu_2004-x86_64-${XPU_XRE_BASE_VERSION}")
set(XPU_XHPC_DIR_NAME "xhpc-ubuntu2004_x86_64")
else()
set(XPU_XRE_DIR_NAME "xre-ubuntu_1604_x86_64")
set(XPU_XHPC_DIR_NAME "xhpc-ubuntu_x86_64")
endif()
set(XPU_XCCL_DIR_NAME "${XPU_XCCL_PREFIX}-ubuntu_x86_64")
set(XPU_XFT_DIR_NAME "xft_ubuntu1604_x86_64")
set(XPU_XHPC_DIR_NAME "xhpc-ubuntu_x86_64")
endif()
set(XPU_XPTI_DIR_NAME "xpti")

set(XPU_XRE_URL
"${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
set(XPU_XDNN_URL
"${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
"${XPU_XRE_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
set(XPU_XCCL_URL
"${XPU_XCCL_BASE_URL}/${XPU_XCCL_DIR_NAME}.tar.gz"
Expand All @@ -125,10 +120,14 @@ set(XPU_XFT_GET_DEPENCE_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/get_xft_dependence.sh"
CACHE STRING "" FORCE)

if(WITH_XPU_XHPC)
set(XPU_XHPC_URL
"https://klx-sdk-release-public.su.bcebos.com/xhpc/dev/${XPU_XHPC_BASE_DATE}/${XPU_XHPC_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
set(XPU_XHPC_URL
"https://klx-sdk-release-public.su.bcebos.com/xhpc/dev/${XPU_XHPC_BASE_DATE}/${XPU_XHPC_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)

if(DEFINED XPU_BASE_URL)
set(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz")
set(XPU_XHPC_URL "${XPU_BASE_URL}/${XPU_XHPC_DIR_NAME}.tar.gz")
set(XPU_XCCL_URL "${XPU_BASE_URL}/${XPU_XCCL_DIR_NAME}.tar.gz")
endif()

set(SNAPPY_PREFIX_DIR "${THIRD_PARTY_PATH}/xpu")
Expand All @@ -140,6 +139,7 @@ set(XPU_LIB_DIR "${THIRD_PARTY_PATH}/install/xpu/lib")
set(XPU_API_LIB "${XPU_LIB_DIR}/${XPU_API_LIB_NAME}")
set(XPU_XBLAS_LIB "${XPU_LIB_DIR}/${XPU_XBLAS_LIB_NAME}")
set(XPU_RT_LIB "${XPU_LIB_DIR}/${XPU_RT_LIB_NAME}")
set(XPU_CUDA_LIB "${XPU_LIB_DIR}/${XPU_CUDA_LIB_NAME}")
set(XPU_XFA_LIB "${XPU_LIB_DIR}/${XPU_XFA_LIB_NAME}")

set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${XPU_INSTALL_DIR}/lib")
Expand All @@ -166,10 +166,8 @@ ExternalProject_Add(
PREFIX ${SNAPPY_PREFIX_DIR}
DOWNLOAD_DIR ${XPU_DOWNLOAD_DIR}
DOWNLOAD_COMMAND
bash ${CMAKE_SOURCE_DIR}/tools/xpu/check_xpu_dependence.sh ${XPU_BASE_URL}
${XPU_XCCL_BASE_URL} && WITH_XPU_XHPC=${WITH_XPU_XHPC} bash
${CMAKE_SOURCE_DIR}/tools/xpu/pack_paddle_depence.sh ${XPU_XRE_URL}
${XPU_XRE_DIR_NAME} ${XPU_XDNN_URL} ${XPU_XDNN_DIR_NAME} ${XPU_XCCL_URL}
bash ${CMAKE_SOURCE_DIR}/tools/xpu/pack_paddle_depence.sh ${XPU_XRE_URL}
${XPU_XRE_DIR_NAME} ${XPU_XHPC_URL} ${XPU_XHPC_DIR_NAME} ${XPU_XCCL_URL}
${XPU_XCCL_DIR_NAME} ${XPU_XHPC_URL} ${XPU_XHPC_DIR_NAME} && wget
${XPU_XFT_GET_DEPENCE_URL} && bash get_xft_dependence.sh ${XPU_XFT_URL}
${XPU_XFT_DIR_NAME} && WITH_XPTI=${WITH_XPTI} bash
Expand Down Expand Up @@ -203,18 +201,13 @@ if(WITH_XPU_XFT)
set(XPU_XFT_LIB "${XPU_LIB_DIR}/${XPU_XFT_LIB_NAME}")
endif()

if(WITH_XPU_XHPC)
message(STATUS "Compile with XPU XHPC!")
add_definitions(-DPADDLE_WITH_XPU_XHPC)
set(XPU_XHPC_INC_DIR "${XPU_INC_DIR}/xhpc")
include_directories(${XPU_XHPC_INC_DIR})
set(XPU_XBLAS_INC_DIR "${XPU_INC_DIR}/xhpc/xblas")
include_directories(${XPU_XBLAS_INC_DIR})

set(XPU_XHPC_INC_DIR "${XPU_INC_DIR}/xhpc")
include_directories(${XPU_XHPC_INC_DIR})
set(XPU_XBLAS_INC_DIR "${XPU_INC_DIR}/xhpc/xblas")
include_directories(${XPU_XBLAS_INC_DIR})

set(XPU_XFA_INC_DIR "${XPU_INC_DIR}/xhpc/xfa")
include_directories(${XPU_XFA_INC_DIR})
endif()
set(XPU_XFA_INC_DIR "${XPU_INC_DIR}/xhpc/xfa")
include_directories(${XPU_XFA_INC_DIR})

if(WITH_XPTI)
message(STATUS "Compile with XPU XPTI!")
Expand All @@ -228,17 +221,12 @@ if(WITH_XPU_PLUGIN)
include_directories(${CMAKE_SOURCE_DIR}/paddle/phi/kernels/xpu/plugin/include)
endif()

target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ${XPU_BKCL_LIB}
${XPU_XFT_LIB})

if(WITH_XPTI)
target_link_libraries(xpulib ${XPU_XPTI_LIB})
endif()

if(WITH_XPU_XHPC)
target_link_libraries(xpulib ${XPU_RT_LIB} ${XPU_XBLAS_LIB} ${XPU_API_LIB}
${XPU_XFA_LIB})
endif()
target_link_libraries(xpulib ${XPU_RT_LIB} ${XPU_BKCL_LIB} ${XPU_XBLAS_LIB}
${XPU_API_LIB} ${XPU_XFA_LIB})

add_dependencies(xpulib ${XPU_PROJECT})

Expand Down
2 changes: 0 additions & 2 deletions paddle/phi/backends/xpu/xpu3_op_list.cc
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,6 @@ XPUOpMap& get_kl3_ops() {
phi::DataType::FLOAT64,
phi::DataType::FLOAT16,
phi::DataType::BFLOAT16})},
#ifdef PADDLE_WITH_XPU_XHPC
{"flash_attn_grad",
XPUKernelSet({phi::DataType::BFLOAT16,
phi::DataType::FLOAT32,
Expand All @@ -438,7 +437,6 @@ XPUOpMap& get_kl3_ops() {
XPUKernelSet({phi::DataType::BFLOAT16,
phi::DataType::FLOAT32,
phi::DataType::FLOAT16})},
#endif
{"flatten2_grad",
XPUKernelSet({phi::DataType::INT64,
phi::DataType::INT32,
Expand Down
8 changes: 0 additions & 8 deletions paddle/phi/kernels/xpu/flash_attn_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"

#ifdef PADDLE_WITH_XPU_XHPC
#include "xfa/flash_api.h"
#endif

namespace phi {

Expand All @@ -38,8 +36,6 @@ void FlashAttnGradKernel(const Context& ctx,
DenseTensor* dq,
DenseTensor* dk,
DenseTensor* dv) {
#ifdef PADDLE_WITH_XPU_XHPC

ctx.template Alloc<T>(dq);
ctx.template Alloc<T>(dk);
ctx.template Alloc<T>(dv);
Expand Down Expand Up @@ -150,10 +146,6 @@ void FlashAttnGradKernel(const Context& ctx,
bias_data // bias
);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "mha_varlen_bwd");
#else
PADDLE_THROW(phi::errors::PreconditionNotMet(
"re-compile using -DWITH_XPU_XHPC=ON to use FlashAttnGradKernel"));
#endif
}

} // namespace phi
Expand Down
12 changes: 0 additions & 12 deletions paddle/phi/kernels/xpu/flash_attn_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@
#include "paddle/phi/common/memory_utils.h"
#include "paddle/phi/core/kernel_registry.h"

#ifdef PADDLE_WITH_XPU_XHPC
#include "xfa/flash_api.h"
#endif

namespace phi {

Expand All @@ -45,7 +43,6 @@ void FlashAttnUnpaddedKernel(
DenseTensor* softmax,
DenseTensor* softmax_lse,
DenseTensor* seed_offset) {
#ifdef PADDLE_WITH_XPU_XHPC
xpu::ctx_guard RAII_GUARD(ctx.x_context());
// q, k, v [batch_size * seq_len, num_heads, head_dim]
std::vector<int64_t> dims = common::vectorize(q.dims());
Expand Down Expand Up @@ -172,10 +169,6 @@ void FlashAttnUnpaddedKernel(
nullptr);
PADDLE_ENFORCE_EQ(r, 0, "xpu::qk_v_attention failed.");
}
#else
PADDLE_THROW(phi::errors::PreconditionNotMet(
"re-compile using -DWITH_XPU_XHPC=ON to use FlashAttnKernel"));
#endif
}

template <typename T, typename Context>
Expand All @@ -194,7 +187,6 @@ void FlashAttnKernel(const Context& ctx,
DenseTensor* softmax,
DenseTensor* softmax_lse,
DenseTensor* seed_offset) {
#ifdef PADDLE_WITH_XPU_XHPC
if (return_softmax == true) {
PADDLE_THROW(phi::errors::Unimplemented("return_softmax should be false"));
}
Expand Down Expand Up @@ -327,10 +319,6 @@ void FlashAttnKernel(const Context& ctx,
bias_data // bias
);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "mha_varlen_fwd");
#else
PADDLE_THROW(phi::errors::PreconditionNotMet(
"re-compile using -DWITH_XPU_XHPC=ON to use FlashAttnKernel"));
#endif
}

} // namespace phi
Expand Down
3 changes: 2 additions & 1 deletion python/env_dict.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,14 @@ env_dict={
'XPU_API_LIB_NAME':'@XPU_API_LIB_NAME@',
'XPU_RT_LIB':'@XPU_RT_LIB@',
'XPU_RT_LIB_NAME':'@XPU_RT_LIB_NAME@',
'XPU_CUDA_LIB':'@XPU_CUDA_LIB@',
'XPU_CUDA_LIB_NAME':'@XPU_CUDA_LIB_NAME@',
'WITH_XPU_BKCL':'@WITH_XPU_BKCL@',
'XPU_BKCL_LIB':'@XPU_BKCL_LIB@',
'XPU_BKCL_LIB_NAME':'@XPU_BKCL_LIB_NAME@',
'WITH_XPU_XFT':'@WITH_XPU_XFT@',
'XPU_XFT_LIB':'@XPU_XFT_LIB@',
'XPU_XFT_LIB_NAME':'@XPU_XFT_LIB_NAME@',
'WITH_XPU_XHPC':'@WITH_XPU_XHPC@',
'XPU_XBLAS_LIB':'@XPU_XBLAS_LIB@',
'XPU_XBLAS_LIB_NAME':'@XPU_XBLAS_LIB_NAME@',
'XPU_XFA_LIB':'@XPU_XFA_LIB@',
Expand Down
Loading

0 comments on commit 3733511

Please sign in to comment.