Skip to content

Commit

Permalink
Merge pull request CompFUSE#326 from PDoakORNL/fix_hip_mishap
Browse files Browse the repository at this point in the history
Fix hip mishap
  • Loading branch information
PDoakORNL authored Apr 1, 2024
2 parents 8ae4b4b + e3d97e4 commit aae08d8
Show file tree
Hide file tree
Showing 99 changed files with 929 additions and 783 deletions.
13 changes: 7 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ set(DCA_LIBS
signals
coarsegraining
${DCA_CONCURRENCY_LIB}
parallel_no_concurrency
${DCA_THREADING_LIBS}
lapack
models
Expand All @@ -195,12 +196,12 @@ if (DCA_HAVE_GPU)
)
endif()

if (DCA_WITH_ADIOS2 AND DCA_HAVE_ADIOS2)
list(APPEND DCA_LIBS
dca_adios2 adios2::adios2
)
message("-- Add dca_adios2 to targets")
endif()
# if (DCA_WITH_ADIOS2 AND DCA_HAVE_ADIOS2)
# list(APPEND DCA_LIBS
# dca_adios2 adios2::adios2
# )
# message("-- Add dca_adios2 to targets")
# endif()

# The BLAS and LAPACK libraries in DCA_EXTERNAL_LIBS should be linked after MAGMA.
list(APPEND DCA_LIBS lapack ${DCA_EXTERNAL_LIBS})
Expand Down
2 changes: 1 addition & 1 deletion applications/analysis/chi_q_omega.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ int main(int argc, char** argv) {
if (dca::io::stringToIOType(parameters.get_output_format()) == dca::io::IOType::ADIOS2) {
int rank = concurrency.id();
std::cout << "\nProcessor " << concurrency.id() << " is writing data." << std::endl;
dca::io::Writer writer(adios, concurrency, parameters.get_output_format(), true);
dca::io::Writer writer(concurrency, parameters.get_output_format(), true);
std::string filename_bse(parameters.get_directory() + parameters.getAppropriateFilenameAnalysis());
writer.open_file(filename_bse);

Expand Down
8 changes: 2 additions & 6 deletions applications/analysis/main_analysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,13 @@ int main(int argc, char** argv) {
// Create and initialize the DCA data object and read the output of the DCA(+) calculation.
DcaDataType dca_data(parameters);
dca_data.initialize();
#ifdef DCA_HAVE_ADIOS2
adios2::ADIOS adios;

if (dca::io::stringToIOType(parameters.get_output_format()) == dca::io::IOType::ADIOS2) {
std::cout << "\nProcessor " << concurrency.id() << " is writing data." << std::endl;
dca::io::Writer writer(adios, concurrency, parameters.get_output_format(), true);
dca::io::Writer writer(concurrency, parameters.get_output_format(), true);
std::string filename_bse(parameters.get_directory() + parameters.getAppropriateFilenameAnalysis());
writer.open_file(filename_bse);

dca_data.read(adios, parameters.get_directory() + parameters.get_filename_dca());
dca_data.read(parameters.get_directory() + parameters.get_filename_dca());
BseSolverType bse_solver(parameters, dca_data);
bse_solver.calculateSusceptibilities();

Expand All @@ -82,7 +79,6 @@ int main(int argc, char** argv) {
}
}
else
#endif
{
dca_data.read(parameters.get_directory() + parameters.get_filename_dca());
BseSolverType bse_solver(parameters, dca_data);
Expand Down
4 changes: 2 additions & 2 deletions applications/dca/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ if (DCA_BUILD_DCA)
target_include_directories(main_dca PRIVATE ${DCA_INCLUDE_DIRS})

if (DCA_HAVE_GPU)
target_link_libraries(main_dca PRIVATE ${DCA_GPU_LIBS} g0_interpolation ${DCA_KERNEL_LIBS})
target_link_libraries(main_dca PRIVATE ${DCA_KERNEL_LIBS})
endif()

target_link_libraries(main_dca PUBLIC FFTW::Double signals ${DCA_LIBS})
target_link_libraries(main_dca PUBLIC FFTW::Double signals ${DCA_LIBS} dca_io)
endif()
2 changes: 2 additions & 0 deletions applications/dca/main_dca.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include "dca/config/dca.hpp"
#include "dca/application/dca_loop_dispatch.hpp"
#include "dca/config/cmake_options.hpp"
#include "dca/config/haves_defines.hpp"

// Defines Concurrency, Threading, ParametersType, DcaData, DcaLoop, and Profiler.
#include "dca/io/json/json_reader.hpp"
#include "dca/util/git_version.hpp"
Expand Down
40 changes: 40 additions & 0 deletions build-aux/frontier_rocm6_build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@

cmake -DDCA_WITH_CUDA=off -DDCA_WITH_HIP=ON \
-DFFTW_ROOT=$FFTW_PATH \
-DDCA_FIX_BROKEN_MPICH=ON \
-DROCM_ROOT=${ROCM_PATH} \
-DMAGMA_ROOT=${MAGMA_ROOT} \
-DLAPACK_ROOT=${OPENBLAS_ROOT} \
-DBLAS_ROOT=${OPENBLAS_ROOT} \
-DDCA_WITH_TESTS_FAST=ON \
-DTEST_RUNNER="srun" \
-DGPU_TARGETS=gfx90a \
-DAMDGPU_TARGETS=gfx90a \
-DCMAKE_C_COMPILER=mpicc \
-DCMAKE_CXX_COMPILER=mpic++ \
-DCMAKE_HIP_COMPILER=/opt/rocm-6.0.0/llvm/bin/clang++ \
-DCMAKE_INSTALL_PREFIX=$INST \
-DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" \
-GNinja \
..

#cmake -DDCA_WITH_CUDA=off -DDCA_WITH_HIP=ON \
-DFFTW_ROOT=$FFTW_PATH \
-DDCA_FIX_BROKEN_MPICH=ON \
-DROCM_ROOT=${ROCM_PATH} \
-DMAGMA_ROOT=${MAGMA_ROOT} \
-DLAPACK_ROOT=${OPENBLAS_ROOT} \
-DBLAS_ROOT=${OPENBLAS_ROOT} \
-DDCA_WITH_TESTS_FAST=ON \
-DTEST_RUNNER="srun" \
-DGPU_TARGETS=gfx90a \
-DAMDGPU_TARGETS=gfx90a \
-DCMAKE_C_COMPILER=mpicc \
-DCMAKE_CXX_COMPILER=mpic++ \
-DCMAKE_HIP_COMPILER=/opt/rocm-6.0.0/llvm/bin/clang++ \
-DCMAKE_INSTALL_PREFIX=$INST \
-DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" \
-GNinja \
..
# cmake -DDCA_WITH_CUDA=off -DDCA_WITH_HIP=ON -DFFTW_ROOT=$FFTW_PATH -DDCA_FIX_BROKEN_MPICH=ON -DROCM_ROOT=${ROCM_PATH} -DMAGMA_ROOT=${MAGMA_ROOT} -DLAPACK_ROOT=${OPENBLAS_ROOT} -DBLAS_ROOT=${OPENBLAS_ROOT} -DDCA_WITH_TESTS_FAST=ON -DTEST_RUNNER="srun" -DGPU_TARGETS=gfx90a -DAMDGPU_TARGETS=gfx90a -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpic++ -DCMAKE_HIP_COMPILER=/opt/rocm-6.0.0/llvm/bin/clang++ -DCMAKE_INSTALL_PREFIX=$INST -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" -GNinja ..
..
7 changes: 7 additions & 0 deletions build-aux/frontier_rocm6_just_exports.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
export CC=mpicc
export CXX=mpicxx

export OPENBLAS_ROOT=/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/gcc-11.2.0/openblas-0.3.25-scaywvuh5zsm5u7smg54plj2oyf7nekv
export HDF5_ROOT=/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/rocmcc-6.0.0/hdf5-1.12.1-ajskwiaabdvgc36ozb6hzqnrwu2becha
export MAGMA_ROOT=/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/rocmcc-6.0.0/magma-master-rizw3ajkhfcq5cjutoykgkkv5hexftoz
export FFTW_PATH=/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/rocmcc-6.0.0/fftw-3.3.10-2mykijticsr5rfbyunax4zrwhhzcb7qm
26 changes: 26 additions & 0 deletions build-aux/frontier_rocm6_load_modules.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash
#
# Loads all modules that are required to build DCA++ on ORNL's Frontier.
# A reset is done at the beginning to restore to the default programming environment on Frontier.
# This is for development only at this point.
#
# Usage: source frontier_load_modules.sh


module reset
module load amd-mixed/6.0.0
spack load cmake%gcc@11.2.0
spack load ninja%gcc@11.2.0
spack load magma@master amdgpu_target=gfx90a
spack load hdf5@1.12.1 +cxx ~mpi api=v112 %rocmcc@6.0.0
spack load fftw ~mpi %rocmcc@6.0.0
spack load openblas@0.3.25 %gcc@11.2.0

export CC=mpicc
export CXX=mpicxx

export OPENBLAS_ROOT=/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/gcc-11.2.0/openblas-0.3.25-scaywvuh5zsm5u7smg54plj2oyf7nekv
export HDF5_ROOT=/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/rocmcc-6.0.0/hdf5-1.12.1-ajskwiaabdvgc36ozb6hzqnrwu2becha
export MAGMA_ROOT=/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/rocmcc-6.0.0/magma-master-rizw3ajkhfcq5cjutoykgkkv5hexftoz
export FFTW_PATH=/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/rocmcc-6.0.0/fftw-3.3.10-2mykijticsr5rfbyunax4zrwhhzcb7qm
#export LD_PRELOAD=/opt/cray/pe/lib64/cce/libtcmalloc_minimal.so.1
2 changes: 1 addition & 1 deletion cmake/dca_cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ if (CMAKE_CUDA_COMPILER)
dca_add_haves_define(DCA_HAVE_CUDA)
dca_add_haves_define(DCA_HAVE_GPU)

list(APPEND DCA_GPU_LIBS CUDA::cudart CUDA::cublas)
list(APPEND DCA_GPU_LIBS CUDA::cudart CUDA::cublas CUDA::cusparse)
set(DCA_CUDA_PROPERTIES "CMAKE_CUDA_ARCHITECTURES 70")
list(APPEND CUDAFLAGS "--expt-relaxed-constexpr" ${DCA_CUDA_OPTIONS})
set(CMAKE_CUDA_STANDARD 17)
Expand Down
2 changes: 1 addition & 1 deletion cmake/dca_external_libs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ list(APPEND DCA_EXTERNAL_LIBS ${BLAS_LIBRARIES})
# HDF5

if (NOT HDF5_LIBRARIES)
set(HDF5_NO_FIND_PACKAGE_CONFIG_FILE true)
set(HDF5_NO_FIND_PACKAGE_CONFIG_FILE false)
set(HDF5_PREFER_PARALLEL false)
find_package(HDF5 REQUIRED COMPONENTS C CXX)
message("HDF5: ${HDF5_FOUND} ${HDF5_LIBRARIES}")
Expand Down
15 changes: 11 additions & 4 deletions cmake/dca_hip.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,19 @@ if(DCA_WITH_HIP)
#-------------------------------------------------------------------
# set up HIP compiler options
#-------------------------------------------------------------------
set(CMAKE_MODULE_PATH "${ROCM_ROOT}/hip/cmake" ${CMAKE_MODULE_PATH})
set(CMAKE_MODULE_PATH "${ROCM_ROOT}/hip/cmake" "${ROCM_ROOT}/lib/cmake/hip" "${ROCM_ROOT}/lib/cmake/hipblas" "${ROCM_ROOT}/lib/cmake/rocthrust" ${CMAKE_MODULE_PATH})
find_package(HIP REQUIRED)
find_package(hipblas REQUIRED)
find_package(hipsparse REQUIRED)
find_package(rocsolver REQUIRED)
find_package(rocthrust REQUIRED)

endif(DCA_WITH_HIP)

get_property(hipblas_include_dirs TARGET roc::hipblas PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
message("hipblas includes: ${hipblas_include_dirs}")


#set(CUDA_ARCHITECTURES "sm_60" CACHE STRING "Name of the real architecture to build for.")
set(MAGMA_ROOT "" CACHE PATH "Path to the MAGMA installation directory. Hint for CMake to find MAGMA.")

Expand All @@ -66,11 +71,12 @@ if (CMAKE_HIP_COMPILER)
dca_add_haves_define(DCA_HAVE_GPU)
dca_add_haves_define(__HIP_PLATFORM_AMD__)
list(APPEND DCA_GPU_LIBS hip::host roc::hipblas roc::hipsparse)
set(DCA_HIP_PROPERTIES "CMAKE_HIP_ARCHITECTURES gfx906,gfx908")
set(DCA_HIP_PROPERTIES "CMAKE_HIP_ARCHITECTURES gfx908,gfx90a")
set(CMAKE_HIP_STANDARD 17)
list(APPEND HIP_HIPCC_FLAGS "-fPIC")
# doesn't appear to work
set(CMAKE_HIP_SOURCE_FILE_EXTENSIONS cu)
message("Enabled HIP as a language")
# NOTE: this is solved by dca_linking.cmake: dca_gpu_device_link()
# alternative method (same issue)
#file(GLOB_RECURSE CUDA_KERNELS_SRC ${PROJECT_SOURCE_DIR} *.cu)
Expand Down Expand Up @@ -106,6 +112,7 @@ if (MAGMA_LIBRARY AND MAGMA_INCLUDE_DIR)
# I have built magma without openmp for
# CI. But if you naively use a random systems
# magma expect to have a link error.
target_link_libraries(magma::sparse INTERFACE magma::magma roc::hipblas roc::hipsparse)
list(APPEND DCA_GPU_LIBS ${MAGMA_LIBRARY} roc::hipsparse)
target_link_libraries(magma::magma INTERFACE roc::hipblas roc::hipsparse LAPACK::LAPACK BLAS::BLAS)
target_link_libraries(magma::sparse INTERFACE magma::magma)
list(APPEND DCA_GPU_LIBS ${MAGMA_LIBRARY} roc::hipsparse roc::hipblas)
endif()
6 changes: 3 additions & 3 deletions cmake/dca_linking.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#link the correct gpu runtime library
function(dca_gpu_runtime_link target_name)
if(DCA_HAVE_HIP)
target_link_libraries(${target_name} PUBLIC hip::host)
target_link_libraries(${target_name} PUBLIC hip::host roc::hipblas roc::hipsparse)
message("linking target ${target_name} to hip::host")
elseif(DCA_HAVE_CUDA)
target_link_libraries(${target_name} PUBLIC CUDA::cudart)
Expand All @@ -12,7 +12,7 @@ endfunction()
#link the correct gpu runtime library
function(dca_gpu_blas_link target_name)
if(DCA_HAVE_HIP)
target_link_libraries(${target_name} PUBLIC roc::hipblas)
target_link_libraries(${target_name} PUBLIC roc::hipblas roc::hipsparse)
message("linking target ${target_name} to roc::hipblas")
elseif(DCA_HAVE_CUDA)
target_link_libraries(${target_name} PUBLIC CUDA::cublas)
Expand All @@ -26,7 +26,7 @@ function(dca_gpu_device_link target_name)
PROPERTIES HIP_SEPARABLE_COMPILATION ON)
set_target_properties( ${target_name}
PROPERTIES HIP_RESOLVE_DEVICE_SYMBOLS ON)
target_link_libraries(${target_name} PRIVATE hip::device)
target_link_libraries(${target_name} PRIVATE hip::device roc::hipblas roc::hipsparse roc::rocthrust)
get_target_property(_srcs ${target_name} SOURCES)
get_target_property(_src_dir ${target_name} SOURCE_DIR)
#
Expand Down
6 changes: 5 additions & 1 deletion cmake/dca_testing.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ endif()
# MPI or CUDA may be given to indicate that the test requires these libraries. MPI_NUMPROC is the
# number of MPI processes to use for a test with MPI, the default value is 1.
function(dca_add_gtest name)
set(options FAST EXTENSIVE STOCHASTIC PERFORMANCE GTEST_MAIN THREADED MPI CUDA CUDA_MPI)
set(options FAST EXTENSIVE STOCHASTIC PERFORMANCE GTEST_MAIN GTEST_MPI_MAIN THREADED MPI CUDA CUDA_MPI)
set(oneValueArgs MPI_NUMPROC)
set(multiValueArgs INCLUDE_DIRS SOURCES LIBS)
cmake_parse_arguments(DCA_ADD_GTEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
Expand Down Expand Up @@ -108,6 +108,10 @@ function(dca_add_gtest name)
set(DCA_ADD_GTEST_SOURCES ${PROJECT_SOURCE_DIR}/test/dca_gtest_main.cpp ${DCA_ADD_GTEST_SOURCES})
endif()

if (DCA_ADD_GTEST_GTEST_MPI_MAIN)
set(DCA_ADD_GTEST_SOURCES ${PROJECT_SOURCE_DIR}/test/dca_gtest_main_mpi.cpp ${DCA_ADD_GTEST_SOURCES})
endif()

add_executable(${name} ${name}.cpp ${DCA_ADD_GTEST_SOURCES})

target_compile_definitions(${name} PRIVATE DCA_SOURCE_DIR=\"${PROJECT_SOURCE_DIR}\")
Expand Down
10 changes: 5 additions & 5 deletions include/dca/function/function.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ function<scalartype, domain, DT>::function(const std::string& name)
std::cerr << "large functions need names give yourself a chance.\n";
}
// will zero real or complex values
fnc_values_.resize(dmn.get_size(), {});
fnc_values_.resize(dmn.get_size());
}

/** copy constructor
Expand Down Expand Up @@ -512,7 +512,7 @@ function<scalartype, domain, DT>::function(std::initializer_list<scalartype> ini
Nb_sbdms(dmn.get_leaf_domain_sizes().size()) {
start_ = 0;
end_ = dmn.get_size();
fnc_values_.resize(dmn.get_size(), {});
fnc_values_.resize(dmn.get_size());
std::copy_n(init_list.begin(), init_list.size(), fnc_values_.begin());
}

Expand Down Expand Up @@ -657,7 +657,7 @@ function<scalartype, domain, DT>& function<scalartype, domain, DT>::operator=(
Nb_sbdms = other.dmn.get_leaf_domain_sizes().size();
start_ = other.start_;
end_ = other.end_;
fnc_values_.resize(other.size(), {});
fnc_values_.resize(other.size());
}
fnc_values_ = other.fnc_values_;
}
Expand All @@ -679,7 +679,7 @@ inline function<Scalar, domain, DT>& function<Scalar, domain, DT>::operator=(
Nb_sbdms = other.dmn.get_leaf_domain_sizes().size();
start_ = other.start_;
end_ = other.end_;
fnc_values_.resize(other.size(), {});
fnc_values_.resize(other.size());
}
fnc_values_ = other.fnc_values_;
}
Expand All @@ -694,7 +694,7 @@ inline function<Scalar, domain, DT>& function<Scalar, domain, DT>::operator=(
Nb_sbdms = other.dmn.get_leaf_domain_sizes().size();
start_ = other.start_;
end_ = other.end_;
fnc_values_.resize(other.size(), {});
fnc_values_.resize(other.size());
}
auto kConvert = [](auto& kvec) -> std::vector<Scalar> {
std::vector<Scalar> k_converted(kvec.size());
Expand Down
2 changes: 1 addition & 1 deletion include/dca/io/adios2/adios2_global.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
// ADIOS2 global object


/** For testing only, in main_dca adios2::ADIOS is a member of dca_loop
/** For testing only, in main_dca adios2::ADIOS is a member of concurrency the concurrency context owned by main.
*/
class GlobalAdios
{
Expand Down
Loading

0 comments on commit aae08d8

Please sign in to comment.