Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support HIP/ROCm backends for GPUs #101

Merged
merged 36 commits into from
Jun 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
bae39e6
Add hydrogen error handling mechanisms
benson31 Jun 13, 2019
609a151
new cuda management infrastructure
benson31 Jun 13, 2019
ac94d61
everything in rocm compiles i think. linker issues pending
benson31 Jul 25, 2019
d0f9dd8
remove override decoration from Element/BlockMatrix functions
benson31 Jan 30, 2020
a2746b7
Merge branch 'hydrogen' of https://github.com/llnl/elemental into fea…
benson31 Jan 30, 2020
aa545c5
patch for finding rocblas; not sure if this is strictly necessary any…
benson31 Jan 30, 2020
6957e09
forward kernel arguments by reference
benson31 Jan 30, 2020
9a42bad
a few tweaks to the CMakeLists
benson31 Jan 30, 2020
4144df4
Make sure ROCm and CUDA aren't enabled at the same time.
benson31 Jan 30, 2020
b824bbb
correct a discrepancy in hipMemcpy2DAsync semantics
benson31 Jan 31, 2020
6512c9e
clean up HAVE_CUDA macro usage; streamline copy syntax
benson31 Jan 31, 2020
f654343
use nonblocking stream; clean up the mempool
benson31 Jan 31, 2020
e2a887f
straggler HAVE_CUDA use in include tree
benson31 Jan 31, 2020
bb4db99
preprocessor macro cleanup in blaslike tests
benson31 Jan 31, 2020
1887636
Remove debugging print statements
benson31 Jan 31, 2020
a4967af
add short-circuit returns to copy/fill routines when size is zero
benson31 Feb 3, 2020
bc2737a
some cleanup
benson31 Apr 16, 2020
a7d49d9
a variety of fixes
benson31 May 4, 2020
3e52a5d
fix some new rocm issues
benson31 May 5, 2020
0839f83
update aluminum version number
benson31 May 5, 2020
ebabe95
update version number
benson31 May 5, 2020
8982b49
remove some unneeded CMake
benson31 May 5, 2020
1bcad07
revert changes related to the hip override bug
benson31 May 6, 2020
9817919
add support for hipCUB and generalize cublas tensor option
benson31 May 6, 2020
dc8ea50
fix annoying clang warnings (that GCC _should_ throw, too, but it doe…
benson31 May 6, 2020
808a6cd
address some review comments
benson31 May 6, 2020
5f9d0fe
fix use of streams that should have been SyncInfos
benson31 May 6, 2020
b08feb4
Clean up device library functions
benson31 May 6, 2020
0080781
cleanup timer nonsense in Gemm test
benson31 May 6, 2020
caf00e8
fix some hipCUB linkage
benson31 May 7, 2020
8be571a
Apply suggestions from code review
benson31 Jun 4, 2020
0b96cd2
Merge branch 'hydrogen' of https://github.com/llnl/elemental into fea…
benson31 Jun 4, 2020
da98b67
Apply suggestions from code review
benson31 Jun 4, 2020
675d91e
Merge branch 'feature-rocm-port' of https://github.com/benson31/Eleme…
benson31 Jun 4, 2020
bae1cc7
remove unneeded metafunction. DiHydrogen has a cleaner implementation…
benson31 Jun 5, 2020
178601b
Merge branch 'hydrogen' of https://github.com/llnl/elemental into fea…
benson31 Jun 5, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 98 additions & 35 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ endif (__GIT_EXECUTABLE)

# This must be set because version tags
set(HYDROGEN_VERSION_MAJOR 1)
set(HYDROGEN_VERSION_MINOR 3)
set(HYDROGEN_VERSION_PATCH 4)
set(HYDROGEN_VERSION_MINOR 4)
set(HYDROGEN_VERSION_PATCH 0)
set(HYDROGEN_VERSION_MAJOR_MINOR
"${HYDROGEN_VERSION_MAJOR}.${HYDROGEN_VERSION_MINOR}")
set(HYDROGEN_VERSION
Expand Down Expand Up @@ -154,20 +154,32 @@ option(Hydrogen_ENABLE_CUDA
"Search for CUDA support and enable related features if found."
OFF)

if (Hydrogen_ENABLE_CUDA)
option(Hydrogen_ENABLE_ROCM
"Search for ROCm/HIP support and enable related features if found."
OFF)

if (Hydrogen_ENABLE_CUDA OR Hydrogen_ENABLE_ROCM)
option(Hydrogen_ENABLE_CUB
"Search for CUB support and enable related features if found."
ON)

option(Hydrogen_ENABLE_CUBLAS_TENSOR_MATH
"Use the cuBLAS tensor operation math."
option(Hydrogen_ENABLE_GPU_TENSOR_MATH
"Use the GPU tensor operations when available."
OFF)

option(Hydrogen_ENABLE_GPU_FP16
"Enable FP16 arithmetic in GPU code."
ON)
endif ()

if (Hydrogen_ENABLE_ROCM AND Hydrogen_ENABLE_CUDA)
message(FATAL_ERROR
"ROCm and CUDA code paths are mutually exclusive. "
"Please enable the one that corresponds to your hardware. "
"If you have mixed hardware, please contact the Hydrogen developers "
"as this would be of great interest.")
endif ()

#
# MEMORY-RELATED OPTIONS
#
Expand Down Expand Up @@ -334,8 +346,8 @@ if (Hydrogen_ENABLE_CUDA)
find_package(CUDA REQUIRED) # Enable all the macros
find_package(NVML REQUIRED)

if (Hydrogen_ENABLE_CUBLAS_TENSOR_MATH)
set(HYDROGEN_CUBLAS_USE_TENSOR_OP_MATH TRUE)
if (Hydrogen_ENABLE_GPU_TENSOR_MATH)
set(HYDROGEN_GPU_USE_TENSOR_OP_MATH TRUE)
endif ()

if (Hydrogen_ENABLE_GPU_FP16)
Expand Down Expand Up @@ -387,38 +399,64 @@ if (Hydrogen_ENABLE_CUDA)
set(HYDROGEN_HAVE_CUDA FALSE)

endif ()

endif (Hydrogen_ENABLE_CUDA)

set(HYDROGEN_HAVE_GPU ${HYDROGEN_HAVE_CUDA})
if (Hydrogen_ENABLE_ROCM)
set(CMAKE_MODULE_PATH "/opt/rocm/hip/cmake" ${CMAKE_MODULE_PATH})
find_package(HIP REQUIRED)

if (Hydrogen_ENABLE_CUB)
set(CMAKE_PREFIX_PATH "/opt/rocm/hip" ${CMAKE_PREFIX_PATH})
set(HIP_FOUND FALSE)
find_package(HIP CONFIG REQUIRED)
find_package(rocPRIM REQUIRED)
find_package(hipCUB REQUIRED)
set(HYDROGEN_HAVE_CUB TRUE)
else ()
set(HYDROGEN_HAVE_CUB FALSE)
endif ()

if (HIP_FOUND)
set(CMAKE_CXX_EXTENSIONS FALSE)
find_package(ROCBLAS REQUIRED)
set(HYDROGEN_HAVE_ROCM TRUE)
message(STATUS "Found ROCm/HIP toolchain. Using HIP/ROCm.")
else ()
message(FATAL_ERROR "ROCm requested but not found.")
endif ()
endif (Hydrogen_ENABLE_ROCM)

if (HYDROGEN_HAVE_CUDA OR HYDROGEN_HAVE_ROCM)
set(HYDROGEN_HAVE_GPU TRUE)
endif ()

if (Hydrogen_ENABLE_ALUMINUM)
find_package(Aluminum 0.3.0 NO_MODULE
find_package(Aluminum 0.4.0 NO_MODULE
HINTS ${Aluminum_DIR} ${ALUMINUM_DIR} ${AL_DIR}
$ENV{Aluminum_DIR} $ENV{ALUMINUM_DIR} $ENV{AL_DIR}
PATH_SUFFIXES lib64/cmake/aluminum lib/cmake/aluminum
NO_DEFAULT_PATH)
if (NOT Aluminum_FOUND)
find_package(Aluminum 0.3.0 NO_MODULE)
find_package(Aluminum 0.4.0 NO_MODULE)
endif ()

if (Aluminum_FOUND)
set(HYDROGEN_HAVE_ALUMINUM TRUE)
message(STATUS "Found Aluminum: ${Aluminum_DIR}")

if (HYDROGEN_HAVE_CUDA AND AL_HAS_NCCL)
if (HYDROGEN_HAVE_GPU AND AL_HAS_NCCL)
set(HYDROGEN_HAVE_NCCL2 TRUE)
message(STATUS "Aluminum detected with NCCL2 backend support.")
else ()
set(HYDROGEN_HAVE_NCCL2 FALSE)
endif (HYDROGEN_HAVE_CUDA AND AL_HAS_NCCL)
endif (HYDROGEN_HAVE_GPU AND AL_HAS_NCCL)

if (HYDROGEN_HAVE_CUDA AND AL_HAS_MPI_CUDA)
if (HYDROGEN_HAVE_GPU AND AL_HAS_MPI_CUDA)
set(HYDROGEN_HAVE_AL_MPI_CUDA TRUE)
message(STATUS "Aluminum detected with MPI-CUDA backend support.")
else ()
set(HYDROGEN_HAVE_AL_MPI_CUDA FALSE)
endif (HYDROGEN_HAVE_CUDA AND AL_HAS_MPI_CUDA)
endif (HYDROGEN_HAVE_GPU AND AL_HAS_MPI_CUDA)
else ()
set(HYDROGEN_HAVE_ALUMINUM FALSE)
set(HYDROGEN_HAVE_NCCL2 FALSE)
Expand Down Expand Up @@ -497,7 +535,12 @@ configure_file("${PROJECT_SOURCE_DIR}/cmake/configure_files/hydrogen_config.h.in
configure_file("${PROJECT_SOURCE_DIR}/doxy/Doxyfile.in"
"${PROJECT_BINARY_DIR}/doxy/Doxyfile")

add_library(Hydrogen_CXX "${HYDROGEN_SOURCES}" "${HYDROGEN_HEADERS}")
if (HYDROGEN_HAVE_ROCM)
hip_add_library(Hydrogen_CXX "${HYDROGEN_SOURCES}" "${HYDROGEN_HEADERS}")
else ()
add_library(Hydrogen_CXX "${HYDROGEN_SOURCES}" "${HYDROGEN_HEADERS}")
endif ()

target_include_directories(Hydrogen_CXX PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include/El>
Expand All @@ -509,40 +552,60 @@ target_include_directories(Hydrogen_CXX PUBLIC
# be forced to build with that (even though they maybe should)...
target_compile_options(Hydrogen_CXX PRIVATE ${EXTRA_CXX_FLAGS})

target_link_libraries(Hydrogen_CXX PUBLIC ${Aluminum_LIBRARIES})
target_link_libraries(Hydrogen_CXX PUBLIC ${HALF_LIBRARIES})

if (TARGET OpenMP::OpenMP_CXX)
target_link_libraries(Hydrogen_CXX PUBLIC OpenMP::OpenMP_CXX)
endif ()
target_link_libraries(Hydrogen_CXX PUBLIC MPI::MPI_CXX)
target_link_libraries(Hydrogen_CXX PUBLIC LAPACK::lapack)
target_link_libraries(Hydrogen_CXX PUBLIC EP::extended_precision)

target_link_libraries(Hydrogen_CXX PUBLIC ${VTUNE_LIBRARIES})
target_link_libraries(Hydrogen_CXX PUBLIC ${NVTX_LIBRARIES})
if (HYDROGEN_HAVE_CUDA)
target_link_libraries(Hydrogen_CXX PUBLIC cuda::toolkit)
endif ()
target_link_libraries(
Hydrogen_CXX PUBLIC
${Aluminum_LIBRARIES}
${HALF_LIBRARIES}
${VTUNE_LIBRARIES}
${NVTX_LIBRARIES}
${ROCBLAS_LIBRARIES}
$<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
$<TARGET_NAME_IF_EXISTS:MPI::MPI_CXX>
$<TARGET_NAME_IF_EXISTS:LAPACK::lapack>
$<TARGET_NAME_IF_EXISTS:EP::extended_precision>
$<TARGET_NAME_IF_EXISTS:cuda::toolkit>
$<TARGET_NAME_IF_EXISTS:hip::rocprim_hip>
$<TARGET_NAME_IF_EXISTS:hip::hipcub>
)

# Add the CXX library to "Hydrogen"
set(HYDROGEN_LIBRARIES Hydrogen_CXX)

if (HYDROGEN_HAVE_CUDA)
add_library(Hydrogen_CUDA "${HYDROGEN_CUDA_SOURCES}")
add_library(Hydrogen_CUDA "${HYDROGEN_GPU_SOURCES}")
target_include_directories(Hydrogen_CUDA PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include>
$<INSTALL_INTERFACE:include>)

target_link_libraries(Hydrogen_CUDA PUBLIC ${HALF_LIBRARIES})
target_link_libraries(Hydrogen_CUDA PUBLIC ${NVTX_LIBRARIES})
target_link_libraries(Hydrogen_CUDA PUBLIC cuda::toolkit)
target_link_libraries(
Hydrogen_CUDA PUBLIC
${HALF_LIBRARIES}
${NVTX_LIBRARIES}
$<TARGET_NAME_IF_EXISTS:cuda::toolkit>
)

target_link_libraries(Hydrogen_CXX PUBLIC Hydrogen_CUDA)
list(APPEND HYDROGEN_LIBRARIES Hydrogen_CUDA)
endif ()

if (HYDROGEN_HAVE_ROCM)
hip_add_library(Hydrogen_ROCM STATIC "${HYDROGEN_GPU_SOURCES}")
target_include_directories(Hydrogen_ROCM PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include>
$<INSTALL_INTERFACE:include>
)

target_link_libraries(Hydrogen_ROCM PUBLIC
${HALF_LIBRARIES}
${ROCBLAS_LIBRARIES}
)

#set_target_properties(Hydrogen_ROCM PROPERTIES LINKER_LANGUAGE CXX)
list(APPEND HYDROGEN_LIBRARIES Hydrogen_ROCM)
endif ()

# Setup the tests
if (Hydrogen_ENABLE_TESTING OR Hydrogen_ENABLE_UNIT_TESTS)
include(CTest)
Expand Down
27 changes: 20 additions & 7 deletions cmake/configure_files/HydrogenConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,14 @@ set(HYDROGEN_MPI_CXX_COMPILER "@MPI_CXX_COMPILER@")
set(MPI_CXX_COMPILER "${HYDROGEN_MPI_CXX_COMPILER}"
CACHE FILEPATH "The MPI CXX compiler.")

set(_OpenMP_DIR "@OpenMP_DIR@")
if (NOT OpenMP_DIR)
set(OpenMP_DIR "${_OpenMP_DIR}")
endif ()
include (FindAndVerifyOpenMP)
set(_HYDROGEN_HAVE_OPENMP @EL_HAVE_OPENMP@)
if (_HYDROGEN_HAVE_OPENMP)
set(_OpenMP_DIR "@OpenMP_DIR@")
if (NOT OpenMP_DIR)
set(OpenMP_DIR "${_OpenMP_DIR}")
endif ()
include (FindAndVerifyOpenMP)
endif (_HYDROGEN_HAVE_OPENMP)
# FIXME: I should do verification to make sure all found features are
# the same.
include (FindAndVerifyMPI)
Expand All @@ -33,14 +36,14 @@ set(_HYDROGEN_HAVE_NCCL2 @HYDROGEN_HAVE_NCCL2@)
set(_HYDROGEN_HAVE_AL_MPI_CUDA @HYDROGEN_HAVE_AL_MPI_CUDA@)
if (_HYDROGEN_HAVE_ALUMINUM)
if (NOT Aluminum_FOUND)
find_package(Aluminum 0.3.0 NO_MODULE QUIET
find_package(Aluminum 0.4.0 NO_MODULE QUIET
HINTS ${Aluminum_DIR} ${ALUMINUM_DIR} ${AL_DIR}
$ENV{Aluminum_DIR} $ENV{ALUMINUM_DIR} $ENV{AL_DIR}
PATH_SUFFIXES lib64/cmake/aluminum lib/cmake/aluminum
NO_DEFAULT_PATH)
if (NOT Aluminum_FOUND)
set(Aluminum_DIR "@Aluminum_DIR@")
find_package(Aluminum 0.3.0 NO_MODULE REQUIRED)
find_package(Aluminum 0.4.0 NO_MODULE REQUIRED)
endif ()
endif ()

Expand All @@ -56,6 +59,16 @@ if (_HYDROGEN_HAVE_ALUMINUM)
endif ()
endif (_HYDROGEN_HAVE_ALUMINUM)

# ROCm
set(_HYDROGEN_HAVE_ROCM @HYDROGEN_HAVE_ROCM@)
if (_HYDROGEN_HAVE_ROCM)
find_package(HIP REQUIRED)
find_package(ROCBLAS REQUIRED)

# query this beforehand, to set to what it was?
set(CMAKE_CXX_EXTENSIONS FALSE)
endif (_HYDROGEN_HAVE_ROCM)

# CUDA!
set(_HYDROGEN_HAVE_CUDA @HYDROGEN_HAVE_CUDA@)
set(_HYDROGEN_HAVE_CUB @HYDROGEN_HAVE_CUB@)
Expand Down
6 changes: 4 additions & 2 deletions cmake/configure_files/HydrogenConfigVersion.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
# [0.87 1.0.0)
# [1.0.0 1.1.0)
# [1.1.0 1.2.0)
# [1.2.0 ???)
# [1.2.0 1.3.0)
# [1.3.0 1.4.0)
# [1.4.0 ???)
#
# IMPORTANT: IF YOU MAKE A BREAKING CHANGE TO HYDROGEN, THE UPDATE
# MUST BE GIVEN A NEW VERSION NUMBER, WHICH THEN MUST BE APPENDED TO
# THIS LIST.

set(_version_compat_ranges 0.0.0 0.87.0 1.0.0 1.1.0 1.2.0)
set(_version_compat_ranges 0.0.0 0.87.0 1.0.0 1.1.0 1.2.0 1.3.0 1.4.0)

# This is the version that has been installed.
set(PACKAGE_VERSION "@HYDROGEN_VERSION@")
Expand Down
15 changes: 10 additions & 5 deletions cmake/configure_files/hydrogen_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,18 @@
#cmakedefine HYDROGEN_HAVE_MKL
#cmakedefine HYDROGEN_HAVE_MKL_GEMMT

#cmakedefine HYDROGEN_HAVE_GPU

// CUDA stuff
#cmakedefine HYDROGEN_HAVE_CUDA
#cmakedefine HYDROGEN_HAVE_CUB
#cmakedefine HYDROGEN_CUBLAS_USE_TENSOR_OP_MATH

// ROCm stuff
#cmakedefine HYDROGEN_HAVE_ROCM

// General GPU stuff
#ifdef HYDROGEN_HAVE_CUDA
#define HYDROGEN_HAVE_GPU
#cmakedefine HYDROGEN_HAVE_CUB
#cmakedefine HYDROGEN_GPU_USE_TENSOR_OP_MATH
#cmakedefine HYDROGEN_GPU_USE_FP16
#endif // HYDROGEN_HAVE_CUDA

// Aluminum stuff
#cmakedefine HYDROGEN_HAVE_ALUMINUM
Expand All @@ -62,4 +64,7 @@

#cmakedefine HYDROGEN_DO_BOUNDS_CHECKING

#define H_RESTRICT __restrict__
#define H_PRETTY_FUNCTION __PRETTY_FUNCTION__

#endif /* HYDROGEN_CONFIG_H */
46 changes: 46 additions & 0 deletions cmake/modules/FindROCBLAS.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Find rocBLAS library and supporting header
#
# rocBLAS_DIR or ROCBLAS_DIR[in]: The prefix for rocBLAS
#
# ROCBLAS_INCLUDE_PATH[out,cache]: The include path for rocBLAS
# ROCBLAS_LIBRARY[out,cache]: The rocBLAS library
#
# ROCBLAS_LIBRARIES[out]: The thing to link to for rocBLAS
# ROCBLAS_FOUND[out]: Variable indicating whether rocBLAS has been found
#
# rocm::rocblas: Imported library for rocBLAS
#

find_path(ROCBLAS_INCLUDE_PATH rocblas.h
HINTS ${rocBLAS_DIR} $ENV{rocBLAS_DIR} ${ROCBLAS_DIR} $ENV{ROCBLAS_DIR}
PATH_SUFFIXES include
NO_DEFAULT_PATH
DOC "The rocBLAS include path.")
find_path(ROCBLAS_INCLUDE_PATH rocblas.h)

find_library(ROCBLAS_LIBRARY rocblas
HINTS ${rocBLAS_DIR} $ENV{rocBLAS_DIR} ${ROCBLAS_DIR} $ENV{ROCBLAS_DIR}
PATH_SUFFIXES lib64 lib
NO_DEFAULT_PATH
DOC "The rocBLAS library.")
find_library(ROCBLAS_LIBRARY rocblas)

# Standard handling of the package arguments
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Rocblas
REQUIRED_VARS ROCBLAS_LIBRARY ROCBLAS_INCLUDE_PATH)

if (NOT TARGET rocblas::rocblas)
add_library(rocblas::rocblas INTERFACE IMPORTED)
endif ()

if (ROCBLAS_INCLUDE_PATH AND ROCBLAS_LIBRARY)
set_target_properties(rocblas::rocblas PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES
"${ROCBLAS_INCLUDE_PATH};/opt/rocm/hsa/include;/opt/rocm/hip/include"
INTERFACE_LINK_LIBRARIES "${ROCBLAS_LIBRARY}")
endif ()

set(ROCBLAS_LIBRARIES rocblas::rocblas)
mark_as_advanced(ROCBLAS_INCLUDE_PATH)
mark_as_advanced(ROCBLAS_LIBRARY)
4 changes: 2 additions & 2 deletions include/El/blas_like/level1/AllReduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,11 @@ void AllReduce(AbstractMatrix<T>& A, mpi::Comm const& comm, mpi::Op op)
case Device::CPU:
AllReduce(static_cast<Matrix<T,Device::CPU>&>(A), comm, op);
break;
#ifdef HYDROGEN_HAVE_CUDA
#ifdef HYDROGEN_HAVE_GPU
case Device::GPU:
AllReduce(static_cast<Matrix<T,Device::GPU>&>(A), comm, op);
break;
#endif // HYDROGEN_HAVE_CUDA
#endif // HYDROGEN_HAVE_GPU
default:
LogicError("AllReduce: Bad device!");
}
Expand Down
Loading