Skip to content

Commit

Permalink
Support HIP/ROCm backends for GPUs (#101)
Browse files Browse the repository at this point in the history
* Add hydrogen error handling mechanisms
* new cuda management infrastructure
* everything in rocm compiles i think. linker issues pending
* remove override decoration from Element/BlockMatrix functions
* patch for finding rocblas; not sure if this is strictly necessary any more
* forward kernel arguments by reference
* a few tweaks to the CMakeLists
* Make sure ROCm and CUDA aren't enabled at the same time.
* correct a discrepancy in hipMemcpy2DAsync semantics
* clean up HAVE_CUDA macro usage; streamline copy syntax
* use nonblocking stream; clean up the mempool
* straggler HAVE_CUDA use in include tree
* preprocessor macro cleanup in blaslike tests
* Remove debugging print statements
* add short-circuit returns to copy/fill routines when size is zero
* some cleanup
* fix some new rocm issues
* update aluminum version number
* update version number
* remove some unneeded CMake
* revert changes related to the hip override bug
* add support for hipCUB and generalize cublas tensor option
* fix annoying clang warnings (that GCC _should_ throw, too, but it doesn't)
* address some review comments
* fix use of streams that should have been SyncInfos
* Clean up device library functions
* cleanup timer nonsense in Gemm test
* fix some hipCUB linkage
* Apply suggestions from code review
Co-authored-by: Tim Moon <moon13@llnl.gov>
* Apply suggestions from code review
Co-authored-by: Tim Moon <moon13@llnl.gov>
* remove unneeded metafunction. DiHydrogen has a cleaner implementation anyway.
  • Loading branch information
benson31 authored Jun 5, 2020
1 parent 48de387 commit d2feee8
Show file tree
Hide file tree
Showing 179 changed files with 4,733 additions and 1,917 deletions.
133 changes: 98 additions & 35 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ endif (__GIT_EXECUTABLE)

# This must be set because version tags
set(HYDROGEN_VERSION_MAJOR 1)
set(HYDROGEN_VERSION_MINOR 3)
set(HYDROGEN_VERSION_PATCH 4)
set(HYDROGEN_VERSION_MINOR 4)
set(HYDROGEN_VERSION_PATCH 0)
set(HYDROGEN_VERSION_MAJOR_MINOR
"${HYDROGEN_VERSION_MAJOR}.${HYDROGEN_VERSION_MINOR}")
set(HYDROGEN_VERSION
Expand Down Expand Up @@ -154,20 +154,32 @@ option(Hydrogen_ENABLE_CUDA
"Search for CUDA support and enable related features if found."
OFF)

if (Hydrogen_ENABLE_CUDA)
option(Hydrogen_ENABLE_ROCM
"Search for ROCm/HIP support and enable related features if found."
OFF)

if (Hydrogen_ENABLE_CUDA OR Hydrogen_ENABLE_ROCM)
option(Hydrogen_ENABLE_CUB
"Search for CUB support and enable related features if found."
ON)

option(Hydrogen_ENABLE_CUBLAS_TENSOR_MATH
"Use the cuBLAS tensor operation math."
option(Hydrogen_ENABLE_GPU_TENSOR_MATH
"Use the GPU tensor operations when available."
OFF)

option(Hydrogen_ENABLE_GPU_FP16
"Enable FP16 arithmetic in GPU code."
ON)
endif ()

if (Hydrogen_ENABLE_ROCM AND Hydrogen_ENABLE_CUDA)
message(FATAL_ERROR
"ROCm and CUDA code paths are mutually exclusive. "
"Please enable the one that corresponds to your hardware. "
"If you have mixed hardware, please contact the Hydrogen developers "
"as this would be of great interest.")
endif ()

#
# MEMORY-RELATED OPTIONS
#
Expand Down Expand Up @@ -334,8 +346,8 @@ if (Hydrogen_ENABLE_CUDA)
find_package(CUDA REQUIRED) # Enable all the macros
find_package(NVML REQUIRED)

if (Hydrogen_ENABLE_CUBLAS_TENSOR_MATH)
set(HYDROGEN_CUBLAS_USE_TENSOR_OP_MATH TRUE)
if (Hydrogen_ENABLE_GPU_TENSOR_MATH)
set(HYDROGEN_GPU_USE_TENSOR_OP_MATH TRUE)
endif ()

if (Hydrogen_ENABLE_GPU_FP16)
Expand Down Expand Up @@ -387,38 +399,64 @@ if (Hydrogen_ENABLE_CUDA)
set(HYDROGEN_HAVE_CUDA FALSE)

endif ()

endif (Hydrogen_ENABLE_CUDA)

set(HYDROGEN_HAVE_GPU ${HYDROGEN_HAVE_CUDA})
if (Hydrogen_ENABLE_ROCM)
set(CMAKE_MODULE_PATH "/opt/rocm/hip/cmake" ${CMAKE_MODULE_PATH})
find_package(HIP REQUIRED)

if (Hydrogen_ENABLE_CUB)
set(CMAKE_PREFIX_PATH "/opt/rocm/hip" ${CMAKE_PREFIX_PATH})
set(HIP_FOUND FALSE)
find_package(HIP CONFIG REQUIRED)
find_package(rocPRIM REQUIRED)
find_package(hipCUB REQUIRED)
set(HYDROGEN_HAVE_CUB TRUE)
else ()
set(HYDROGEN_HAVE_CUB FALSE)
endif ()

if (HIP_FOUND)
set(CMAKE_CXX_EXTENSIONS FALSE)
find_package(ROCBLAS REQUIRED)
set(HYDROGEN_HAVE_ROCM TRUE)
message(STATUS "Found ROCm/HIP toolchain. Using HIP/ROCm.")
else ()
message(FATAL_ERROR "ROCm requested but not found.")
endif ()
endif (Hydrogen_ENABLE_ROCM)

if (HYDROGEN_HAVE_CUDA OR HYDROGEN_HAVE_ROCM)
set(HYDROGEN_HAVE_GPU TRUE)
endif ()

if (Hydrogen_ENABLE_ALUMINUM)
find_package(Aluminum 0.3.0 NO_MODULE
find_package(Aluminum 0.4.0 NO_MODULE
HINTS ${Aluminum_DIR} ${ALUMINUM_DIR} ${AL_DIR}
$ENV{Aluminum_DIR} $ENV{ALUMINUM_DIR} $ENV{AL_DIR}
PATH_SUFFIXES lib64/cmake/aluminum lib/cmake/aluminum
NO_DEFAULT_PATH)
if (NOT Aluminum_FOUND)
find_package(Aluminum 0.3.0 NO_MODULE)
find_package(Aluminum 0.4.0 NO_MODULE)
endif ()

if (Aluminum_FOUND)
set(HYDROGEN_HAVE_ALUMINUM TRUE)
message(STATUS "Found Aluminum: ${Aluminum_DIR}")

if (HYDROGEN_HAVE_CUDA AND AL_HAS_NCCL)
if (HYDROGEN_HAVE_GPU AND AL_HAS_NCCL)
set(HYDROGEN_HAVE_NCCL2 TRUE)
message(STATUS "Aluminum detected with NCCL2 backend support.")
else ()
set(HYDROGEN_HAVE_NCCL2 FALSE)
endif (HYDROGEN_HAVE_CUDA AND AL_HAS_NCCL)
endif (HYDROGEN_HAVE_GPU AND AL_HAS_NCCL)

if (HYDROGEN_HAVE_CUDA AND AL_HAS_MPI_CUDA)
if (HYDROGEN_HAVE_GPU AND AL_HAS_MPI_CUDA)
set(HYDROGEN_HAVE_AL_MPI_CUDA TRUE)
message(STATUS "Aluminum detected with MPI-CUDA backend support.")
else ()
set(HYDROGEN_HAVE_AL_MPI_CUDA FALSE)
endif (HYDROGEN_HAVE_CUDA AND AL_HAS_MPI_CUDA)
endif (HYDROGEN_HAVE_GPU AND AL_HAS_MPI_CUDA)
else ()
set(HYDROGEN_HAVE_ALUMINUM FALSE)
set(HYDROGEN_HAVE_NCCL2 FALSE)
Expand Down Expand Up @@ -497,7 +535,12 @@ configure_file("${PROJECT_SOURCE_DIR}/cmake/configure_files/hydrogen_config.h.in
configure_file("${PROJECT_SOURCE_DIR}/doxy/Doxyfile.in"
"${PROJECT_BINARY_DIR}/doxy/Doxyfile")

add_library(Hydrogen_CXX "${HYDROGEN_SOURCES}" "${HYDROGEN_HEADERS}")
if (HYDROGEN_HAVE_ROCM)
hip_add_library(Hydrogen_CXX "${HYDROGEN_SOURCES}" "${HYDROGEN_HEADERS}")
else ()
add_library(Hydrogen_CXX "${HYDROGEN_SOURCES}" "${HYDROGEN_HEADERS}")
endif ()

target_include_directories(Hydrogen_CXX PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include/El>
Expand All @@ -509,40 +552,60 @@ target_include_directories(Hydrogen_CXX PUBLIC
# be forced to build with that (even though they maybe should)...
target_compile_options(Hydrogen_CXX PRIVATE ${EXTRA_CXX_FLAGS})

target_link_libraries(Hydrogen_CXX PUBLIC ${Aluminum_LIBRARIES})
target_link_libraries(Hydrogen_CXX PUBLIC ${HALF_LIBRARIES})

if (TARGET OpenMP::OpenMP_CXX)
target_link_libraries(Hydrogen_CXX PUBLIC OpenMP::OpenMP_CXX)
endif ()
target_link_libraries(Hydrogen_CXX PUBLIC MPI::MPI_CXX)
target_link_libraries(Hydrogen_CXX PUBLIC LAPACK::lapack)
target_link_libraries(Hydrogen_CXX PUBLIC EP::extended_precision)

target_link_libraries(Hydrogen_CXX PUBLIC ${VTUNE_LIBRARIES})
target_link_libraries(Hydrogen_CXX PUBLIC ${NVTX_LIBRARIES})
if (HYDROGEN_HAVE_CUDA)
target_link_libraries(Hydrogen_CXX PUBLIC cuda::toolkit)
endif ()
target_link_libraries(
Hydrogen_CXX PUBLIC
${Aluminum_LIBRARIES}
${HALF_LIBRARIES}
${VTUNE_LIBRARIES}
${NVTX_LIBRARIES}
${ROCBLAS_LIBRARIES}
$<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
$<TARGET_NAME_IF_EXISTS:MPI::MPI_CXX>
$<TARGET_NAME_IF_EXISTS:LAPACK::lapack>
$<TARGET_NAME_IF_EXISTS:EP::extended_precision>
$<TARGET_NAME_IF_EXISTS:cuda::toolkit>
$<TARGET_NAME_IF_EXISTS:hip::rocprim_hip>
$<TARGET_NAME_IF_EXISTS:hip::hipcub>
)

# Add the CXX library to "Hydrogen"
set(HYDROGEN_LIBRARIES Hydrogen_CXX)

if (HYDROGEN_HAVE_CUDA)
add_library(Hydrogen_CUDA "${HYDROGEN_CUDA_SOURCES}")
add_library(Hydrogen_CUDA "${HYDROGEN_GPU_SOURCES}")
target_include_directories(Hydrogen_CUDA PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include>
$<INSTALL_INTERFACE:include>)

target_link_libraries(Hydrogen_CUDA PUBLIC ${HALF_LIBRARIES})
target_link_libraries(Hydrogen_CUDA PUBLIC ${NVTX_LIBRARIES})
target_link_libraries(Hydrogen_CUDA PUBLIC cuda::toolkit)
target_link_libraries(
Hydrogen_CUDA PUBLIC
${HALF_LIBRARIES}
${NVTX_LIBRARIES}
$<TARGET_NAME_IF_EXISTS:cuda::toolkit>
)

target_link_libraries(Hydrogen_CXX PUBLIC Hydrogen_CUDA)
list(APPEND HYDROGEN_LIBRARIES Hydrogen_CUDA)
endif ()

if (HYDROGEN_HAVE_ROCM)
hip_add_library(Hydrogen_ROCM STATIC "${HYDROGEN_GPU_SOURCES}")
target_include_directories(Hydrogen_ROCM PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include>
$<INSTALL_INTERFACE:include>
)

target_link_libraries(Hydrogen_ROCM PUBLIC
${HALF_LIBRARIES}
${ROCBLAS_LIBRARIES}
)

#set_target_properties(Hydrogen_ROCM PROPERTIES LINKER_LANGUAGE CXX)
list(APPEND HYDROGEN_LIBRARIES Hydrogen_ROCM)
endif ()

# Setup the tests
if (Hydrogen_ENABLE_TESTING OR Hydrogen_ENABLE_UNIT_TESTS)
include(CTest)
Expand Down
27 changes: 20 additions & 7 deletions cmake/configure_files/HydrogenConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,14 @@ set(HYDROGEN_MPI_CXX_COMPILER "@MPI_CXX_COMPILER@")
set(MPI_CXX_COMPILER "${HYDROGEN_MPI_CXX_COMPILER}"
CACHE FILEPATH "The MPI CXX compiler.")

set(_OpenMP_DIR "@OpenMP_DIR@")
if (NOT OpenMP_DIR)
set(OpenMP_DIR "${_OpenMP_DIR}")
endif ()
include (FindAndVerifyOpenMP)
set(_HYDROGEN_HAVE_OPENMP @EL_HAVE_OPENMP@)
if (_HYDROGEN_HAVE_OPENMP)
set(_OpenMP_DIR "@OpenMP_DIR@")
if (NOT OpenMP_DIR)
set(OpenMP_DIR "${_OpenMP_DIR}")
endif ()
include (FindAndVerifyOpenMP)
endif (_HYDROGEN_HAVE_OPENMP)
# FIXME: I should do verification to make sure all found features are
# the same.
include (FindAndVerifyMPI)
Expand All @@ -33,14 +36,14 @@ set(_HYDROGEN_HAVE_NCCL2 @HYDROGEN_HAVE_NCCL2@)
set(_HYDROGEN_HAVE_AL_MPI_CUDA @HYDROGEN_HAVE_AL_MPI_CUDA@)
if (_HYDROGEN_HAVE_ALUMINUM)
if (NOT Aluminum_FOUND)
find_package(Aluminum 0.3.0 NO_MODULE QUIET
find_package(Aluminum 0.4.0 NO_MODULE QUIET
HINTS ${Aluminum_DIR} ${ALUMINUM_DIR} ${AL_DIR}
$ENV{Aluminum_DIR} $ENV{ALUMINUM_DIR} $ENV{AL_DIR}
PATH_SUFFIXES lib64/cmake/aluminum lib/cmake/aluminum
NO_DEFAULT_PATH)
if (NOT Aluminum_FOUND)
set(Aluminum_DIR "@Aluminum_DIR@")
find_package(Aluminum 0.3.0 NO_MODULE REQUIRED)
find_package(Aluminum 0.4.0 NO_MODULE REQUIRED)
endif ()
endif ()

Expand All @@ -56,6 +59,16 @@ if (_HYDROGEN_HAVE_ALUMINUM)
endif ()
endif (_HYDROGEN_HAVE_ALUMINUM)

# ROCm
set(_HYDROGEN_HAVE_ROCM @HYDROGEN_HAVE_ROCM@)
if (_HYDROGEN_HAVE_ROCM)
find_package(HIP REQUIRED)
find_package(ROCBLAS REQUIRED)

# query this beforehand, to set to what it was?
set(CMAKE_CXX_EXTENSIONS FALSE)
endif (_HYDROGEN_HAVE_ROCM)

# CUDA!
set(_HYDROGEN_HAVE_CUDA @HYDROGEN_HAVE_CUDA@)
set(_HYDROGEN_HAVE_CUB @HYDROGEN_HAVE_CUB@)
Expand Down
6 changes: 4 additions & 2 deletions cmake/configure_files/HydrogenConfigVersion.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
# [0.87 1.0.0)
# [1.0.0 1.1.0)
# [1.1.0 1.2.0)
# [1.2.0 ???)
# [1.2.0 1.3.0)
# [1.3.0 1.4.0)
# [1.4.0 ???)
#
# IMPORTANT: IF YOU MAKE A BREAKING CHANGE TO HYDROGEN, THE UPDATE
# MUST BE GIVEN A NEW VERSION NUMBER, WHICH THEN MUST BE APPENDED TO
# THIS LIST.

set(_version_compat_ranges 0.0.0 0.87.0 1.0.0 1.1.0 1.2.0)
set(_version_compat_ranges 0.0.0 0.87.0 1.0.0 1.1.0 1.2.0 1.3.0 1.4.0)

# This is the version that has been installed.
set(PACKAGE_VERSION "@HYDROGEN_VERSION@")
Expand Down
15 changes: 10 additions & 5 deletions cmake/configure_files/hydrogen_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,18 @@
#cmakedefine HYDROGEN_HAVE_MKL
#cmakedefine HYDROGEN_HAVE_MKL_GEMMT

#cmakedefine HYDROGEN_HAVE_GPU

// CUDA stuff
#cmakedefine HYDROGEN_HAVE_CUDA
#cmakedefine HYDROGEN_HAVE_CUB
#cmakedefine HYDROGEN_CUBLAS_USE_TENSOR_OP_MATH

// ROCm stuff
#cmakedefine HYDROGEN_HAVE_ROCM

// General GPU stuff
#ifdef HYDROGEN_HAVE_CUDA
#define HYDROGEN_HAVE_GPU
#cmakedefine HYDROGEN_HAVE_CUB
#cmakedefine HYDROGEN_GPU_USE_TENSOR_OP_MATH
#cmakedefine HYDROGEN_GPU_USE_FP16
#endif // HYDROGEN_HAVE_CUDA

// Aluminum stuff
#cmakedefine HYDROGEN_HAVE_ALUMINUM
Expand All @@ -62,4 +64,7 @@

#cmakedefine HYDROGEN_DO_BOUNDS_CHECKING

#define H_RESTRICT __restrict__
#define H_PRETTY_FUNCTION __PRETTY_FUNCTION__

#endif /* HYDROGEN_CONFIG_H */
46 changes: 46 additions & 0 deletions cmake/modules/FindROCBLAS.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Find rocBLAS library and supporting header
#
# rocBLAS_DIR or ROCBLAS_DIR[in]: The prefix for rocBLAS
#
# ROCBLAS_INCLUDE_PATH[out,cache]: The include path for rocBLAS
# ROCBLAS_LIBRARY[out,cache]: The rocBLAS library
#
# ROCBLAS_LIBRARIES[out]: The thing to link to for rocBLAS
# ROCBLAS_FOUND[out]: Variable indicating whether rocBLAS has been found
#
# rocm::rocblas: Imported library for rocBLAS
#

find_path(ROCBLAS_INCLUDE_PATH rocblas.h
HINTS ${rocBLAS_DIR} $ENV{rocBLAS_DIR} ${ROCBLAS_DIR} $ENV{ROCBLAS_DIR}
PATH_SUFFIXES include
NO_DEFAULT_PATH
DOC "The rocBLAS include path.")
find_path(ROCBLAS_INCLUDE_PATH rocblas.h)

find_library(ROCBLAS_LIBRARY rocblas
HINTS ${rocBLAS_DIR} $ENV{rocBLAS_DIR} ${ROCBLAS_DIR} $ENV{ROCBLAS_DIR}
PATH_SUFFIXES lib64 lib
NO_DEFAULT_PATH
DOC "The rocBLAS library.")
find_library(ROCBLAS_LIBRARY rocblas)

# Standard handling of the package arguments
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Rocblas
REQUIRED_VARS ROCBLAS_LIBRARY ROCBLAS_INCLUDE_PATH)

if (NOT TARGET rocblas::rocblas)
add_library(rocblas::rocblas INTERFACE IMPORTED)
endif ()

if (ROCBLAS_INCLUDE_PATH AND ROCBLAS_LIBRARY)
set_target_properties(rocblas::rocblas PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES
"${ROCBLAS_INCLUDE_PATH};/opt/rocm/hsa/include;/opt/rocm/hip/include"
INTERFACE_LINK_LIBRARIES "${ROCBLAS_LIBRARY}")
endif ()

set(ROCBLAS_LIBRARIES rocblas::rocblas)
mark_as_advanced(ROCBLAS_INCLUDE_PATH)
mark_as_advanced(ROCBLAS_LIBRARY)
4 changes: 2 additions & 2 deletions include/El/blas_like/level1/AllReduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,11 @@ void AllReduce(AbstractMatrix<T>& A, mpi::Comm const& comm, mpi::Op op)
case Device::CPU:
AllReduce(static_cast<Matrix<T,Device::CPU>&>(A), comm, op);
break;
#ifdef HYDROGEN_HAVE_CUDA
#ifdef HYDROGEN_HAVE_GPU
case Device::GPU:
AllReduce(static_cast<Matrix<T,Device::GPU>&>(A), comm, op);
break;
#endif // HYDROGEN_HAVE_CUDA
#endif // HYDROGEN_HAVE_GPU
default:
LogicError("AllReduce: Bad device!");
}
Expand Down
Loading

0 comments on commit d2feee8

Please sign in to comment.