diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d0d32bc1..abea4d695 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,6 +171,7 @@ set(DCA_LIBS signals coarsegraining ${DCA_CONCURRENCY_LIB} + parallel_no_concurrency ${DCA_THREADING_LIBS} lapack models @@ -195,12 +196,12 @@ if (DCA_HAVE_GPU) ) endif() -if (DCA_WITH_ADIOS2 AND DCA_HAVE_ADIOS2) - list(APPEND DCA_LIBS - dca_adios2 adios2::adios2 - ) - message("-- Add dca_adios2 to targets") -endif() +# if (DCA_WITH_ADIOS2 AND DCA_HAVE_ADIOS2) +# list(APPEND DCA_LIBS +# dca_adios2 adios2::adios2 +# ) +# message("-- Add dca_adios2 to targets") +# endif() # The BLAS and LAPACK libraries in DCA_EXTERNAL_LIBS should be linked after MAGMA. list(APPEND DCA_LIBS lapack ${DCA_EXTERNAL_LIBS}) diff --git a/applications/analysis/chi_q_omega.cpp b/applications/analysis/chi_q_omega.cpp index dbd655783..8c31370d9 100644 --- a/applications/analysis/chi_q_omega.cpp +++ b/applications/analysis/chi_q_omega.cpp @@ -77,7 +77,7 @@ int main(int argc, char** argv) { if (dca::io::stringToIOType(parameters.get_output_format()) == dca::io::IOType::ADIOS2) { int rank = concurrency.id(); std::cout << "\nProcessor " << concurrency.id() << " is writing data." << std::endl; - dca::io::Writer writer(adios, concurrency, parameters.get_output_format(), true); + dca::io::Writer writer(concurrency, parameters.get_output_format(), true); std::string filename_bse(parameters.get_directory() + parameters.getAppropriateFilenameAnalysis()); writer.open_file(filename_bse); diff --git a/applications/analysis/main_analysis.cpp b/applications/analysis/main_analysis.cpp index e3cbf939a..617ff415c 100644 --- a/applications/analysis/main_analysis.cpp +++ b/applications/analysis/main_analysis.cpp @@ -60,16 +60,13 @@ int main(int argc, char** argv) { // Create and initialize the DCA data object and read the output of the DCA(+) calculation. DcaDataType dca_data(parameters); dca_data.initialize(); -#ifdef DCA_HAVE_ADIOS2 - adios2::ADIOS adios; - if (dca::io::stringToIOType(parameters.get_output_format()) == dca::io::IOType::ADIOS2) { std::cout << "\nProcessor " << concurrency.id() << " is writing data." << std::endl; - dca::io::Writer writer(adios, concurrency, parameters.get_output_format(), true); + dca::io::Writer writer(concurrency, parameters.get_output_format(), true); std::string filename_bse(parameters.get_directory() + parameters.getAppropriateFilenameAnalysis()); writer.open_file(filename_bse); - dca_data.read(adios, parameters.get_directory() + parameters.get_filename_dca()); + dca_data.read(parameters.get_directory() + parameters.get_filename_dca()); BseSolverType bse_solver(parameters, dca_data); bse_solver.calculateSusceptibilities(); @@ -82,7 +79,6 @@ int main(int argc, char** argv) { } } else -#endif { dca_data.read(parameters.get_directory() + parameters.get_filename_dca()); BseSolverType bse_solver(parameters, dca_data); diff --git a/applications/dca/CMakeLists.txt b/applications/dca/CMakeLists.txt index 6bc3d37e6..aac9a9cf6 100644 --- a/applications/dca/CMakeLists.txt +++ b/applications/dca/CMakeLists.txt @@ -5,8 +5,8 @@ if (DCA_BUILD_DCA) target_include_directories(main_dca PRIVATE ${DCA_INCLUDE_DIRS}) if (DCA_HAVE_GPU) - target_link_libraries(main_dca PRIVATE ${DCA_GPU_LIBS} g0_interpolation ${DCA_KERNEL_LIBS}) + target_link_libraries(main_dca PRIVATE ${DCA_KERNEL_LIBS}) endif() - target_link_libraries(main_dca PUBLIC FFTW::Double signals ${DCA_LIBS}) + target_link_libraries(main_dca PUBLIC FFTW::Double signals ${DCA_LIBS} dca_io) endif() diff --git a/applications/dca/main_dca.cpp b/applications/dca/main_dca.cpp index dcb5fbe23..835561e18 100644 --- a/applications/dca/main_dca.cpp +++ b/applications/dca/main_dca.cpp @@ -17,6 +17,8 @@ #include "dca/config/dca.hpp" #include "dca/application/dca_loop_dispatch.hpp" #include "dca/config/cmake_options.hpp" +#include "dca/config/haves_defines.hpp" + // Defines Concurrency, Threading, ParametersType, DcaData, DcaLoop, and Profiler. #include "dca/io/json/json_reader.hpp" #include "dca/util/git_version.hpp" diff --git a/build-aux/frontier_rocm6_build.sh b/build-aux/frontier_rocm6_build.sh new file mode 100644 index 000000000..cf68705ed --- /dev/null +++ b/build-aux/frontier_rocm6_build.sh @@ -0,0 +1,40 @@ + +cmake -DDCA_WITH_CUDA=off -DDCA_WITH_HIP=ON \ + -DFFTW_ROOT=$FFTW_PATH \ + -DDCA_FIX_BROKEN_MPICH=ON \ + -DROCM_ROOT=${ROCM_PATH} \ + -DMAGMA_ROOT=${MAGMA_ROOT} \ + -DLAPACK_ROOT=${OPENBLAS_ROOT} \ + -DBLAS_ROOT=${OPENBLAS_ROOT} \ + -DDCA_WITH_TESTS_FAST=ON \ + -DTEST_RUNNER="srun" \ + -DGPU_TARGETS=gfx90a \ + -DAMDGPU_TARGETS=gfx90a \ + -DCMAKE_C_COMPILER=mpicc \ + -DCMAKE_CXX_COMPILER=mpic++ \ + -DCMAKE_HIP_COMPILER=/opt/rocm-6.0.0/llvm/bin/clang++ \ + -DCMAKE_INSTALL_PREFIX=$INST \ + -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" \ + -GNinja \ + .. + +#cmake -DDCA_WITH_CUDA=off -DDCA_WITH_HIP=ON \ + -DFFTW_ROOT=$FFTW_PATH \ + -DDCA_FIX_BROKEN_MPICH=ON \ + -DROCM_ROOT=${ROCM_PATH} \ + -DMAGMA_ROOT=${MAGMA_ROOT} \ + -DLAPACK_ROOT=${OPENBLAS_ROOT} \ + -DBLAS_ROOT=${OPENBLAS_ROOT} \ + -DDCA_WITH_TESTS_FAST=ON \ + -DTEST_RUNNER="srun" \ + -DGPU_TARGETS=gfx90a \ + -DAMDGPU_TARGETS=gfx90a \ + -DCMAKE_C_COMPILER=mpicc \ + -DCMAKE_CXX_COMPILER=mpic++ \ + -DCMAKE_HIP_COMPILER=/opt/rocm-6.0.0/llvm/bin/clang++ \ + -DCMAKE_INSTALL_PREFIX=$INST \ + -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" \ + -GNinja \ + .. +# cmake -DDCA_WITH_CUDA=off -DDCA_WITH_HIP=ON -DFFTW_ROOT=$FFTW_PATH -DDCA_FIX_BROKEN_MPICH=ON -DROCM_ROOT=${ROCM_PATH} -DMAGMA_ROOT=${MAGMA_ROOT} -DLAPACK_ROOT=${OPENBLAS_ROOT} -DBLAS_ROOT=${OPENBLAS_ROOT} -DDCA_WITH_TESTS_FAST=ON -DTEST_RUNNER="srun" -DGPU_TARGETS=gfx90a -DAMDGPU_TARGETS=gfx90a -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpic++ -DCMAKE_HIP_COMPILER=/opt/rocm-6.0.0/llvm/bin/clang++ -DCMAKE_INSTALL_PREFIX=$INST -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" -GNinja .. + .. diff --git a/build-aux/frontier_rocm6_just_exports.sh b/build-aux/frontier_rocm6_just_exports.sh new file mode 100644 index 000000000..059c296f3 --- /dev/null +++ b/build-aux/frontier_rocm6_just_exports.sh @@ -0,0 +1,7 @@ +export CC=mpicc +export CXX=mpicxx + +export OPENBLAS_ROOT=/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/gcc-11.2.0/openblas-0.3.25-scaywvuh5zsm5u7smg54plj2oyf7nekv +export HDF5_ROOT=/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/rocmcc-6.0.0/hdf5-1.12.1-ajskwiaabdvgc36ozb6hzqnrwu2becha +export MAGMA_ROOT=/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/rocmcc-6.0.0/magma-master-rizw3ajkhfcq5cjutoykgkkv5hexftoz +export FFTW_PATH=/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/rocmcc-6.0.0/fftw-3.3.10-2mykijticsr5rfbyunax4zrwhhzcb7qm diff --git a/build-aux/frontier_rocm6_load_modules.sh b/build-aux/frontier_rocm6_load_modules.sh new file mode 100644 index 000000000..44951217c --- /dev/null +++ b/build-aux/frontier_rocm6_load_modules.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# +# Loads all modules that are required to build DCA++ on ORNL's Frontier. +# A reset is done at the beginning to restore to the default programming environment on Frontier. +# This is for development only at this point. +# +# Usage: source frontier_load_modules.sh + + +module reset +module load amd-mixed/6.0.0 +spack load cmake%gcc@11.2.0 +spack load ninja%gcc@11.2.0 +spack load magma@master amdgpu_target=gfx90a +spack load hdf5@1.12.1 +cxx ~mpi api=v112 %rocmcc@6.0.0 +spack load fftw ~mpi %rocmcc@6.0.0 +spack load openblas@0.3.25 %gcc@11.2.0 + +export CC=mpicc +export CXX=mpicxx + +export OPENBLAS_ROOT=/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/gcc-11.2.0/openblas-0.3.25-scaywvuh5zsm5u7smg54plj2oyf7nekv +export HDF5_ROOT=/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/rocmcc-6.0.0/hdf5-1.12.1-ajskwiaabdvgc36ozb6hzqnrwu2becha +export MAGMA_ROOT=/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/rocmcc-6.0.0/magma-master-rizw3ajkhfcq5cjutoykgkkv5hexftoz +export FFTW_PATH=/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/rocmcc-6.0.0/fftw-3.3.10-2mykijticsr5rfbyunax4zrwhhzcb7qm +#export LD_PRELOAD=/opt/cray/pe/lib64/cce/libtcmalloc_minimal.so.1 diff --git a/cmake/dca_cuda.cmake b/cmake/dca_cuda.cmake index 54a2295b2..746eace27 100644 --- a/cmake/dca_cuda.cmake +++ b/cmake/dca_cuda.cmake @@ -31,7 +31,7 @@ if (CMAKE_CUDA_COMPILER) dca_add_haves_define(DCA_HAVE_CUDA) dca_add_haves_define(DCA_HAVE_GPU) - list(APPEND DCA_GPU_LIBS CUDA::cudart CUDA::cublas) + list(APPEND DCA_GPU_LIBS CUDA::cudart CUDA::cublas CUDA::cusparse) set(DCA_CUDA_PROPERTIES "CMAKE_CUDA_ARCHITECTURES 70") list(APPEND CUDAFLAGS "--expt-relaxed-constexpr" ${DCA_CUDA_OPTIONS}) set(CMAKE_CUDA_STANDARD 17) diff --git a/cmake/dca_external_libs.cmake b/cmake/dca_external_libs.cmake index f2b01f519..29af1285f 100644 --- a/cmake/dca_external_libs.cmake +++ b/cmake/dca_external_libs.cmake @@ -41,7 +41,7 @@ list(APPEND DCA_EXTERNAL_LIBS ${BLAS_LIBRARIES}) # HDF5 if (NOT HDF5_LIBRARIES) -set(HDF5_NO_FIND_PACKAGE_CONFIG_FILE true) +set(HDF5_NO_FIND_PACKAGE_CONFIG_FILE false) set(HDF5_PREFER_PARALLEL false) find_package(HDF5 REQUIRED COMPONENTS C CXX) message("HDF5: ${HDF5_FOUND} ${HDF5_LIBRARIES}") diff --git a/cmake/dca_hip.cmake b/cmake/dca_hip.cmake index bc19fc9ec..2d3e0ebb3 100644 --- a/cmake/dca_hip.cmake +++ b/cmake/dca_hip.cmake @@ -39,14 +39,19 @@ if(DCA_WITH_HIP) #------------------------------------------------------------------- # set up HIP compiler options #------------------------------------------------------------------- - set(CMAKE_MODULE_PATH "${ROCM_ROOT}/hip/cmake" ${CMAKE_MODULE_PATH}) + set(CMAKE_MODULE_PATH "${ROCM_ROOT}/hip/cmake" "${ROCM_ROOT}/lib/cmake/hip" "${ROCM_ROOT}/lib/cmake/hipblas" "${ROCM_ROOT}/lib/cmake/rocthrust" ${CMAKE_MODULE_PATH}) find_package(HIP REQUIRED) find_package(hipblas REQUIRED) find_package(hipsparse REQUIRED) find_package(rocsolver REQUIRED) + find_package(rocthrust REQUIRED) endif(DCA_WITH_HIP) +get_property(hipblas_include_dirs TARGET roc::hipblas PROPERTY INTERFACE_INCLUDE_DIRECTORIES) +message("hipblas includes: ${hipblas_include_dirs}") + + #set(CUDA_ARCHITECTURES "sm_60" CACHE STRING "Name of the real architecture to build for.") set(MAGMA_ROOT "" CACHE PATH "Path to the MAGMA installation directory. Hint for CMake to find MAGMA.") @@ -66,11 +71,12 @@ if (CMAKE_HIP_COMPILER) dca_add_haves_define(DCA_HAVE_GPU) dca_add_haves_define(__HIP_PLATFORM_AMD__) list(APPEND DCA_GPU_LIBS hip::host roc::hipblas roc::hipsparse) - set(DCA_HIP_PROPERTIES "CMAKE_HIP_ARCHITECTURES gfx906,gfx908") + set(DCA_HIP_PROPERTIES "CMAKE_HIP_ARCHITECTURES gfx908,gfx90a") set(CMAKE_HIP_STANDARD 17) list(APPEND HIP_HIPCC_FLAGS "-fPIC") # doesn't appear to work set(CMAKE_HIP_SOURCE_FILE_EXTENSIONS cu) + message("Enabled HIP as a language") # NOTE: this is solved by dca_linking.cmake: dca_gpu_device_link() # alternative method (same issue) #file(GLOB_RECURSE CUDA_KERNELS_SRC ${PROJECT_SOURCE_DIR} *.cu) @@ -106,6 +112,7 @@ if (MAGMA_LIBRARY AND MAGMA_INCLUDE_DIR) # I have built magma without openmp for # CI. But if you naively use a random systems # magma expect to have a link error. - target_link_libraries(magma::sparse INTERFACE magma::magma roc::hipblas roc::hipsparse) - list(APPEND DCA_GPU_LIBS ${MAGMA_LIBRARY} roc::hipsparse) + target_link_libraries(magma::magma INTERFACE roc::hipblas roc::hipsparse LAPACK::LAPACK BLAS::BLAS) + target_link_libraries(magma::sparse INTERFACE magma::magma) + list(APPEND DCA_GPU_LIBS ${MAGMA_LIBRARY} roc::hipsparse roc::hipblas) endif() diff --git a/cmake/dca_linking.cmake b/cmake/dca_linking.cmake index 040e0bada..9fd39f2b7 100644 --- a/cmake/dca_linking.cmake +++ b/cmake/dca_linking.cmake @@ -2,7 +2,7 @@ #link the correct gpu runtime library function(dca_gpu_runtime_link target_name) if(DCA_HAVE_HIP) - target_link_libraries(${target_name} PUBLIC hip::host) + target_link_libraries(${target_name} PUBLIC hip::host roc::hipblas roc::hipsparse) message("linking target ${target_name} to hip::host") elseif(DCA_HAVE_CUDA) target_link_libraries(${target_name} PUBLIC CUDA::cudart) @@ -12,7 +12,7 @@ endfunction() #link the correct gpu runtime library function(dca_gpu_blas_link target_name) if(DCA_HAVE_HIP) - target_link_libraries(${target_name} PUBLIC roc::hipblas) + target_link_libraries(${target_name} PUBLIC roc::hipblas roc::hipsparse) message("linking target ${target_name} to roc::hipblas") elseif(DCA_HAVE_CUDA) target_link_libraries(${target_name} PUBLIC CUDA::cublas) @@ -26,7 +26,7 @@ function(dca_gpu_device_link target_name) PROPERTIES HIP_SEPARABLE_COMPILATION ON) set_target_properties( ${target_name} PROPERTIES HIP_RESOLVE_DEVICE_SYMBOLS ON) - target_link_libraries(${target_name} PRIVATE hip::device) + target_link_libraries(${target_name} PRIVATE hip::device roc::hipblas roc::hipsparse roc::rocthrust) get_target_property(_srcs ${target_name} SOURCES) get_target_property(_src_dir ${target_name} SOURCE_DIR) # diff --git a/cmake/dca_testing.cmake b/cmake/dca_testing.cmake index 9fa851207..cc597e4e4 100644 --- a/cmake/dca_testing.cmake +++ b/cmake/dca_testing.cmake @@ -34,7 +34,7 @@ endif() # MPI or CUDA may be given to indicate that the test requires these libraries. MPI_NUMPROC is the # number of MPI processes to use for a test with MPI, the default value is 1. function(dca_add_gtest name) - set(options FAST EXTENSIVE STOCHASTIC PERFORMANCE GTEST_MAIN THREADED MPI CUDA CUDA_MPI) + set(options FAST EXTENSIVE STOCHASTIC PERFORMANCE GTEST_MAIN GTEST_MPI_MAIN THREADED MPI CUDA CUDA_MPI) set(oneValueArgs MPI_NUMPROC) set(multiValueArgs INCLUDE_DIRS SOURCES LIBS) cmake_parse_arguments(DCA_ADD_GTEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) @@ -108,6 +108,10 @@ function(dca_add_gtest name) set(DCA_ADD_GTEST_SOURCES ${PROJECT_SOURCE_DIR}/test/dca_gtest_main.cpp ${DCA_ADD_GTEST_SOURCES}) endif() + if (DCA_ADD_GTEST_GTEST_MPI_MAIN) + set(DCA_ADD_GTEST_SOURCES ${PROJECT_SOURCE_DIR}/test/dca_gtest_main_mpi.cpp ${DCA_ADD_GTEST_SOURCES}) + endif() + add_executable(${name} ${name}.cpp ${DCA_ADD_GTEST_SOURCES}) target_compile_definitions(${name} PRIVATE DCA_SOURCE_DIR=\"${PROJECT_SOURCE_DIR}\") diff --git a/include/dca/function/function.hpp b/include/dca/function/function.hpp index 487786a3a..359402f1e 100644 --- a/include/dca/function/function.hpp +++ b/include/dca/function/function.hpp @@ -436,7 +436,7 @@ function::function(const std::string& name) std::cerr << "large functions need names give yourself a chance.\n"; } // will zero real or complex values - fnc_values_.resize(dmn.get_size(), {}); + fnc_values_.resize(dmn.get_size()); } /** copy constructor @@ -512,7 +512,7 @@ function::function(std::initializer_list ini Nb_sbdms(dmn.get_leaf_domain_sizes().size()) { start_ = 0; end_ = dmn.get_size(); - fnc_values_.resize(dmn.get_size(), {}); + fnc_values_.resize(dmn.get_size()); std::copy_n(init_list.begin(), init_list.size(), fnc_values_.begin()); } @@ -657,7 +657,7 @@ function& function::operator=( Nb_sbdms = other.dmn.get_leaf_domain_sizes().size(); start_ = other.start_; end_ = other.end_; - fnc_values_.resize(other.size(), {}); + fnc_values_.resize(other.size()); } fnc_values_ = other.fnc_values_; } @@ -679,7 +679,7 @@ inline function& function::operator=( Nb_sbdms = other.dmn.get_leaf_domain_sizes().size(); start_ = other.start_; end_ = other.end_; - fnc_values_.resize(other.size(), {}); + fnc_values_.resize(other.size()); } fnc_values_ = other.fnc_values_; } @@ -694,7 +694,7 @@ inline function& function::operator=( Nb_sbdms = other.dmn.get_leaf_domain_sizes().size(); start_ = other.start_; end_ = other.end_; - fnc_values_.resize(other.size(), {}); + fnc_values_.resize(other.size()); } auto kConvert = [](auto& kvec) -> std::vector { std::vector k_converted(kvec.size()); diff --git a/include/dca/io/adios2/adios2_global.hpp b/include/dca/io/adios2/adios2_global.hpp index 0e2772596..6f06144fc 100644 --- a/include/dca/io/adios2/adios2_global.hpp +++ b/include/dca/io/adios2/adios2_global.hpp @@ -10,7 +10,7 @@ // ADIOS2 global object -/** For testing only, in main_dca adios2::ADIOS is a member of dca_loop +/** For testing only, in main_dca adios2::ADIOS is a member of concurrency the concurrency context owned by main. */ class GlobalAdios { diff --git a/include/dca/io/adios2/adios2_reader.hpp b/include/dca/io/adios2/adios2_reader.hpp index b6b7e63fa..8e5e08a96 100644 --- a/include/dca/io/adios2/adios2_reader.hpp +++ b/include/dca/io/adios2/adios2_reader.hpp @@ -43,12 +43,8 @@ class ADIOS2Reader { typedef adios2::ADIOS file_type; public: - // In: verbose. If true, the reader outputs a short log whenever it is executed. - ADIOS2Reader(adios2::ADIOS& adios, const CT* concurrency, - bool verbose = false); - - /** maybe this one uses a singleton adios2::ADIOS accessor */ - ADIOS2Reader(const CT* concurrency, + ADIOS2Reader() = delete; + ADIOS2Reader(CT& concurrency, bool verbose = false); ~ADIOS2Reader(); @@ -150,9 +146,10 @@ class ADIOS2Reader { template std::string VectorToString(const std::vector& v); + // concurrency_ must be before the adios reference + const CT& concurrency_; adios2::ADIOS& adios_; const bool verbose_; - const CT* concurrency_; adios2::IO io_; std::string io_name_; @@ -394,7 +391,7 @@ bool ADIOS2Reader::execute(const std::string& name, func::function::execute(const std::string& name, func::function::execute(const std::string& name, func::function::execute(const std::string& name, func::function::VectorToString(const std::vector& v) { } extern template class ADIOS2Reader; +extern template class ADIOS2Reader; #ifdef DCA_HAVE_MPI extern template class ADIOS2Reader; +//this has become required for some reason. Const correctness for the ADIOS2Reader has gone wrong +extern template class ADIOS2Reader; #endif } // namespace io } // namespace dca #endif // DCA_IO_ADIOS2_ADIOS2_READER_HPP + diff --git a/include/dca/io/adios2/adios2_writer.hpp b/include/dca/io/adios2/adios2_writer.hpp index 9c3f9ed77..e1ff3481a 100644 --- a/include/dca/io/adios2/adios2_writer.hpp +++ b/include/dca/io/adios2/adios2_writer.hpp @@ -48,6 +48,8 @@ class ADIOS2Writer { public: ADIOS2Writer() = delete; // In: verbose. If true, the writer outputs a short log whenever it is executed. + // Concurrency objects now carry an adios pointer if DCA_HAVE_ADIOS2. + ADIOS2Writer(const CT* concurrency, bool verbose = false); ADIOS2Writer(adios2::ADIOS& adios, const CT* concurrency, bool verbose = false); ~ADIOS2Writer(); @@ -171,9 +173,9 @@ class ADIOS2Writer { } private: + const CT* concurrency_; adios2::ADIOS& adios_; bool verbose_; - const CT* concurrency_; /** For the case of sample it is necessary to explicitly write a rank "local" variable * of this type. diff --git a/include/dca/io/hdf5/hdf5_reader.hpp b/include/dca/io/hdf5/hdf5_reader.hpp index d8147f351..efe60c060 100644 --- a/include/dca/io/hdf5/hdf5_reader.hpp +++ b/include/dca/io/hdf5/hdf5_reader.hpp @@ -185,7 +185,7 @@ bool HDF5Reader::execute(const std::string& name, std::vector data(size); - H5::DataSet dataset = file_->openDataSet(name.c_str()); + H5::DataSet dataset = file_->openDataSet(full_name.c_str()); dataset.read(data.data(), type); value.resize(size); diff --git a/include/dca/io/reader.hpp b/include/dca/io/reader.hpp index ba9251641..930cb4f6d 100644 --- a/include/dca/io/reader.hpp +++ b/include/dca/io/reader.hpp @@ -34,18 +34,20 @@ namespace dca::io { template class Reader { public: - using DCAReaderVariant = std::variant + io::ADIOS2Reader, #endif - >; + io::HDF5Reader, io::JSONReader>; /** * \param[in] concureency reference to current concurrency env * \param[in] format format as IOType * \param[in] verbose If true, reader does some logging */ - Reader(const Concurrency& concurrency, const IOType format, bool verbose = true) + Reader(Concurrency& concurrency, const std::string& format_tag, bool verbose = true) + : Reader(concurrency, stringToIOType(format_tag), verbose) {} + + Reader(Concurrency& concurrency, IOType format, bool verbose = true) : concurrency_(concurrency) { switch (format) { case IOType::HDF5: @@ -56,81 +58,126 @@ class Reader { break; case IOType::ADIOS2: #ifdef DCA_HAVE_ADIOS2 - reader_.template emplace>(&concurrency, verbose); + reader_.template emplace>(concurrency, verbose); #endif break; } } - /** DEPRECATED -- Support for format type as string - * \param[in] format string representation of format, since parameters still use string - * - * \todo remove need for this constructor store IO format as enum class type. - */ - Reader(const Concurrency& concurrency, const std::string& format, bool verbose = true) - : Reader(concurrency, stringToIOType(format), verbose) {} - -#ifdef DCA_HAVE_ADIOS2 - Reader(adios2::ADIOS& adios, const Concurrency& concurrency, const std::string& format, - bool verbose = true) - : concurrency_(concurrency) { - if (format == "HDF5") { - throw(std::logic_error("ADIOS2 reference not an argument for hdf5 reader")); - } - else if (format == "JSON") { - throw(std::logic_error("ADIOS2 reference not an argument for json reader")); - } - else if (format == "ADIOS2") { - reader_.template emplace>(adios, &concurrency, verbose); - } - else { - throw(std::logic_error("Invalid input format")); - } - } -#endif - + constexpr static bool is_reader = true; constexpr static bool is_writer = false; void open_file(const std::string& file_name) { - std::visit([&](auto& var) { var.open_file(file_name); }, reader_); + std::visit( + [&](auto& var) { + if constexpr (std::is_same_v) + throw std::runtime_error( + "No operations should ever occur on monostate in reader variant"); + else + var.open_file(file_name); + }, + reader_); } void close_file() { - std::visit([&](auto& var) { var.close_file(); }, reader_); + std::visit( + [&](auto& var) { + if constexpr (std::is_same_v) + throw std::runtime_error( + "No operations should ever occur on monostate in reader variant"); + else + var.close_file(); + }, + reader_); } /** For reading input there is great utility in knowing if a group is present. * It isn't an exceptional circumstance if a group is not present. */ bool open_group(const std::string& new_path) { - return std::visit([&](auto& var) -> bool { return var.open_group(new_path); }, reader_); + return std::visit( + [&](auto& var) -> bool { + if constexpr (std::is_same_v) + throw std::runtime_error( + "No operations should ever occur on monostate in reader variant"); + else + return var.open_group(new_path); + }, + reader_); } void close_group() { - std::visit([&](auto& var) { var.close_group(); }, reader_); + std::visit( + [&](auto& var) { + if constexpr (std::is_same_v) + throw std::runtime_error( + "No operations should ever occur on monostate in reader variant"); + else + var.close_group(); + }, + reader_); } long getStepCount() { - return std::visit([&](auto& var) ->std::size_t { return var.getStepCount(); }, reader_); + return std::visit( + [&](auto& var) -> std::size_t { + if constexpr (std::is_same_v) + throw std::runtime_error( + "No operations should ever occur on monostate in reader variant"); + else + return var.getStepCount(); + }, + reader_); } void begin_step() { - std::visit([&](auto& var) { var.begin_step(); }, reader_); + std::visit( + [&](auto& var) { + if constexpr (std::is_same_v) + throw std::runtime_error( + "No operations should ever occur on monostate in reader variant"); + else + var.begin_step(); + }, + reader_); } void end_step() { - std::visit([&](auto& var) { var.end_step(); }, reader_); + std::visit( + [&](auto& var) { + if constexpr (std::is_same_v) + throw std::runtime_error( + "No operations should ever occur on monostate in reader variant"); + else + var.end_step(); + }, + reader_); } std::string get_path() { - return std::visit([&](auto& var) -> std::string { return var.get_path(); }, reader_); + return std::visit( + [&](auto& var) -> std::string { + if constexpr (std::is_same_v) + throw std::runtime_error( + "No operations should ever occur on monostate in reader variant"); + else + return var.get_path(); + }, + reader_); } template bool execute(Args&&... args) noexcept { - return std::visit([&](auto& var) -> bool { return var.execute(std::forward(args)...); }, - reader_); + return std::visit( + [&](auto& var) -> bool { + if constexpr (std::is_same_v) + throw std::runtime_error( + "No operations should ever occur on monostate in reader variant"); + else + return var.execute(std::forward(args)...); + }, + reader_); } DCAReaderVariant& getUnderlying() { @@ -139,7 +186,7 @@ class Reader { private: DCAReaderVariant reader_; - const Concurrency& concurrency_; + Concurrency& concurrency_; }; extern template class Reader; diff --git a/include/dca/io/writer.hpp b/include/dca/io/writer.hpp index ec79f8484..3a1767f06 100644 --- a/include/dca/io/writer.hpp +++ b/include/dca/io/writer.hpp @@ -38,7 +38,6 @@ class Writer { #endif >; /** Constructor for writer, pretty tortured due to optional Adios2. - * \param [in] adios if build with ADIOS2 support adios2 env reference. * \param [in] concurrency reference to the concurrency environment * \param [in] format output format: ADIOS2, HDF5 or JSON. * \param [in] verbose If true, the writer outputs a short log whenever it is executed. @@ -47,13 +46,10 @@ class Writer { * used by the writer. */ Writer( -#ifdef DCA_HAVE_ADIOS2 - adios2::ADIOS& adios, -#endif Concurrency& concurrency, const std::string& format, bool verbose = true) : #ifdef DCA_HAVE_ADIOS2 - adios_(adios), + adios_(concurrency.get_adios()), #endif concurrency_(concurrency) { if (format == "HDF5") { @@ -64,7 +60,7 @@ class Writer { } #ifdef DCA_HAVE_ADIOS2 else if (format == "ADIOS2") { - writer_.template emplace>(adios_, &concurrency_, verbose); + writer_.template emplace>(&concurrency_, verbose); } #endif else { diff --git a/include/dca/linalg/matrix.hpp b/include/dca/linalg/matrix.hpp index 138eff146..e55181138 100644 --- a/include/dca/linalg/matrix.hpp +++ b/include/dca/linalg/matrix.hpp @@ -306,7 +306,7 @@ Matrix::Matrix(const Matrix::Matrix(const std::string& name, std::pai assert(capacity.first >= size_.first && capacity.second >= size_.second); assert(capacity_.first >= capacity.first && capacity_.second >= capacity.second); - data_ = Allocator::allocate(nrElements(capacity_)); + data_ = ALLOC::allocate(nrElements(capacity_)); util::Memory::setToZero(data_, nrElements(capacity_)); } diff --git a/include/dca/linalg/matrixop.hpp b/include/dca/linalg/matrixop.hpp index 2fb4a4feb..87c650d25 100644 --- a/include/dca/linalg/matrixop.hpp +++ b/include/dca/linalg/matrixop.hpp @@ -68,7 +68,8 @@ inline void copyMatrixToArray(const Matrix& mat, Scalar* a, // Copies the m by n matrix stored in a to the matrix mat. // Preconditions: lda >= m. template -inline void copyArrayToMatrix(int m, int n, const Scalar* a, int lda, Matrix& mat) { +inline void copyArrayToMatrix(int m, int n, const Scalar* a, int lda, + Matrix& mat) { assert(lda >= m); mat.resizeNoCopy(std::make_pair(m, n)); lapack::lacpy("A", mat.nrRows(), mat.nrCols(), a, lda, mat.ptr(), mat.leadingDimension()); @@ -95,8 +96,9 @@ inline void copyCol(const Matrix& mat_x, int jx, // 0 <= j_x[i] < mat_x.nrCols() for 0 <= i < j_x.size(), // 0 <= j_y[i] < mat_y.nrCols() for 0 <= i < j_x.size(). template -inline void copyCols(const Matrix& mat_x, const Vec& j_x, Matrix& mat_y, - const Vec& j_y, int /*thread_id*/ = 0, int /*stream_id*/ = 0) { +inline void copyCols(const Matrix& mat_x, const Vec& j_x, + Matrix& mat_y, const Vec& j_y, int /*thread_id*/ = 0, + int /*stream_id*/ = 0) { assert(j_x.size() <= j_y.size()); for (int ind_j = 0; ind_j < j_x.size(); ++ind_j) @@ -127,7 +129,6 @@ inline void copyCols(const Matrix& mat_x, const Vector& j blas::copyCols(mat_x.nrRows(), j_x.size(), j_x.ptr(), mat_x.ptr(), mat_x.leadingDimension(), mat_y.ptr(), mat_y.leadingDimension(), thread_id, stream_id); checkErrorsCudaDebug(); - } #endif // DCA_HAVE_GPU @@ -153,8 +154,9 @@ inline void copyRow(const Matrix& mat_x, int ix, // 0 <= i_x[i] < mat_x.nrRows() for 0 <= i < i_x.size(), // 0 <= i_y[i] < mat_y.nrRows() for 0 <= i < i_x.size(). template -inline void copyRows(const Matrix& mat_x, const Vec& i_x, Matrix& mat_y, - const Vec& i_y, const int /*thread_id*/ = 0, const int /*stream_id*/ = 0) { +inline void copyRows(const Matrix& mat_x, const Vec& i_x, + Matrix& mat_y, const Vec& i_y, const int /*thread_id*/ = 0, + const int /*stream_id*/ = 0) { assert(i_x.size() <= i_y.size()); assert(mat_x.nrCols() == mat_y.nrCols()); @@ -200,7 +202,6 @@ auto difference(const Matrix& a, const Matrix& a, const Matrix +template auto difference(const Matrix& a, const Matrix& b, double diff_threshold = 1e-3) { Matrix cp_a(a); @@ -295,8 +296,9 @@ void insertRow(Matrix& mat, int i) { // Preconditions: mat is a square matrix. // Postconditions: ipiv and work are resized to the needed dimension. // \todo consider doing inverse at full precision reguardless of incoming Scalar precision - template class MatrixType> - void inverse(MatrixType& mat, Vector& ipiv, +template class ALLOC, + template class MatrixType> +void inverse(MatrixType>& mat, Vector& ipiv, Vector& work) { assert(mat.is_square()); @@ -312,15 +314,17 @@ void insertRow(Matrix& mat, int i) { work.ptr(), lwork); } - template class MatrixType> - void inverse(MatrixType& mat) { + template class ALLOC, + template class MatrixType> +void inverse(MatrixType>& mat) { Vector ipiv; Vector work; inverse(mat, ipiv, work); } template -void smallInverse(Matrix& m_inv, Vector& ipiv, Vector& work) { +void smallInverse(Matrix& m_inv, Vector& ipiv, + Vector& work) { assert(m_inv.is_square()); switch (m_inv.nrCols()) { case 1: @@ -631,8 +635,8 @@ inline void swapRowAndCol(Matrix& mat, int i1, int i2, int // a.nrRows() == y.size() if transa == 'N', a.nrCols() == y.size() otherwise, // a.nrCols() == x.size() if transa == 'N', a.nrRows() == x.size() otherwise. template -void gemv(char transa, Scalar alpha, const Matrix& a, const Vector& x, - Scalar beta, Vector& y) { +void gemv(char transa, Scalar alpha, const Matrix& a, + const Vector& x, Scalar beta, Vector& y) { if (transa == 'N') { assert(a.nrRows() == y.size()); assert(a.nrCols() == x.size()); @@ -669,11 +673,12 @@ void gemv(char transa, const Matrix& a, const Vector class MatrixA, - template class MatrixB, template class MatrixC, class ALLOC1, class ALLOC2, class ALLOC3> - void gemm(char transa, char transb, Scalar alpha, const MatrixA& a, - const MatrixB& b, Scalar beta, MatrixC& c, - int thread_id = 0, int stream_id = 0) { +template class MatrixA, + template class MatrixB, + template class MatrixC, class ALLOC1, class ALLOC2, class ALLOC3> +void gemm(char transa, char transb, Scalar alpha, const MatrixA& a, + const MatrixB& b, Scalar beta, + MatrixC& c, int thread_id = 0, int stream_id = 0) { int m = c.nrRows(); int n = c.nrCols(); int k; @@ -707,21 +712,26 @@ void gemv(char transa, const Matrix& a, const Vector class MatrixA, - template class MatrixB, template class MatrixC> - inline void gemm(const MatrixA& a, const MatrixB& b, - MatrixC& c, int thread_id = 0, int stream_id = 0) { +template class MatrixA, + template class MatrixB, + template class MatrixC> +inline void gemm(const MatrixA& a, + const MatrixB& b, + MatrixC& c, int thread_id = 0, int stream_id = 0) { gemm('N', 'N', 1., a, b, 0., c, thread_id, stream_id); } // Performs the matrix-matrix multiplication c <- alpha * a * b + beta * c, // In/Out: c ('In' only if beta != 0) // Preconditions: a.nrRows() == c.nrRows(), b.nrCols() == c.nrCols() and a.nrCols() == b.nrRows() - template class MatrixA, - template class MatrixB, template class MatrixC> - inline void gemm(Scalar alpha, const MatrixA& a, - const MatrixB& b, Scalar beta, - MatrixC& c, int thread_id = 0, int stream_id = 0) { +template class MatrixA, + template class MatrixB, + template class MatrixC> +inline void gemm(Scalar alpha, const MatrixA& a, + const MatrixB& b, Scalar beta, + MatrixC& c, int thread_id = 0, int stream_id = 0) { gemm('N', 'N', alpha, a, b, beta, c, thread_id, stream_id); } @@ -734,7 +744,7 @@ void gemv(char transa, const Matrix& a, const Vector +template inline void gemm(char transa, char transb, const Matrix& a, const Matrix& b, Matrix& c, int thread_id = 0, int stream_id = 0) { @@ -774,8 +784,8 @@ void trsm(char uplo, char diag, const Matrix& a, // ka == kb, where ka = a.nrCols() if transa == 'N', ka = a.nrRows() otherwise and // kb = b.nrRows() if transb == 'N', kb = b.nrCols() otherwise. template -void gemm(char transa, char transb, Matrix& a, Matrix, CPU, ALLOC>& b, - Matrix, CPU, ALLOC>& c) { +void gemm(char transa, char transb, Matrix& a, + Matrix, CPU, ALLOC>& b, Matrix, CPU, ALLOC>& c) { Matrix b_part(b.size()); Matrix c_re(c.size()); Matrix c_im(c.size()); @@ -858,7 +868,8 @@ static void gemm(char transa, char transb, Matrix, CPU, ALL // and kb = b[0].nrRows() if transb == 'N', kb = b[0].nrCols() otherwise. template void multiply(char transa, char transb, const std::array, 2>& a, - const std::array, 2>& b, std::array, 2>& c, + const std::array, 2>& b, + std::array, 2>& c, std::array, 5>& work) { assert(a[0].size() == a[1].size()); assert(b[0].size() == b[1].size()); @@ -895,7 +906,8 @@ void multiply(char transa, char transb, const std::array void multiply(const std::array, 2>& a, - const std::array, 2>& b, std::array, 2>& c, + const std::array, 2>& b, + std::array, 2>& c, std::array, 5>& work) { multiply('N', 'N', a, b, c, work); } @@ -1018,8 +1030,9 @@ inline void multiplyDiagonalRight(const Matrix& a, const Vector -void eigensolver(char jobvl, char jobvr, const Matrix& a, Vector& lambda_re, - Vector& lambda_im, Matrix& vl, Matrix& vr) { +void eigensolver(char jobvl, char jobvr, const Matrix& a, + Vector& lambda_re, Vector& lambda_im, + Matrix& vl, Matrix& vr) { assert(a.is_square()); Matrix a_copy(a); @@ -1055,8 +1068,9 @@ void eigensolver(char jobvl, char jobvr, const Matrix& a, Ve // a is a square matrix. // Postcondition: lambda, is resized, vl if jobvl == 'V', vr if jobvr == 'V' are resized. template -void eigensolver(char jobvl, char jobvr, const Matrix, CPU>& a, - Vector, CPU>& lambda, Matrix, CPU, ALLOC>& vl, +void eigensolver(char jobvl, char jobvr, const Matrix, CPU, ALLOC>& a, + Vector, CPU>& lambda, + Matrix, CPU, ALLOC>& vl, Matrix, CPU, ALLOC>& vr) { assert(a.is_square()); @@ -1145,7 +1159,8 @@ void eigensolverHermitian(char jobv, char uplo, const Matrix -void eigensolverGreensFunctionMatrix(char jobv, char uplo, const Matrix, CPU, ALLOC>& a, +void eigensolverGreensFunctionMatrix(char jobv, char uplo, + const Matrix, CPU, ALLOC>& a, Vector& lambda, Matrix, CPU, ALLOC>& v) { assert(a.is_square()); @@ -1174,7 +1189,8 @@ void eigensolverGreensFunctionMatrix(char jobv, char uplo, const Matrix -void pseudoInverse(const Matrix& a, Matrix& a_inv, double eps = 1.e-6) { +void pseudoInverse(const Matrix& a, Matrix& a_inv, + double eps = 1.e-6) { int m = a.nrRows(); int n = a.nrCols(); a_inv.resizeNoCopy(std::make_pair(n, m)); @@ -1236,7 +1252,7 @@ void pseudoInverse(const Matrix& a, Matrix class MatrixType, typename Scalar, class ALLOC> +template