Skip to content

Commit

Permalink
Merge pull request CompFUSE#317 from PDoakORNL/warning_fixes
Browse files Browse the repository at this point in the history
Fix many compilation Warnings
  • Loading branch information
PDoakORNL authored Jan 5, 2024
2 parents a0502d7 + 1371cce commit b60e876
Show file tree
Hide file tree
Showing 17 changed files with 47 additions and 56 deletions.
2 changes: 1 addition & 1 deletion include/dca/linalg/reshapable_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ void ReshapableMatrix<ScalarType, device_name, Allocator>::setAsync(

template <typename ScalarType, DeviceType device_name, class Allocator>
void ReshapableMatrix<ScalarType, device_name, Allocator>::setToZero(cudaStream_t stream) {
cudaMemsetAsync(data_, 0, leadingDimension() * nrCols() * sizeof(ScalarType), stream);
checkRC(cudaMemsetAsync(data_, 0, leadingDimension() * nrCols() * sizeof(ScalarType), stream));
}

#else // DCA_HAVE_GPU
Expand Down
2 changes: 1 addition & 1 deletion include/dca/linalg/util/allocators/managed_allocator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class ManagedAllocator {
}

if (stream_)
cudaStreamAttachMemAsync(stream_, reinterpret_cast<void**>(&ptr_));
checkRC(cudaStreamAttachMemAsync(stream_, reinterpret_cast<void**>(&ptr_)));

return ptr_;
}
Expand Down
10 changes: 5 additions & 5 deletions include/dca/linalg/util/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,24 +87,24 @@ struct Memory<GPU> {
/// Specialization for float2, double2, cuComplex, cuDoubleComplex
template <typename ScalarType>
static std::enable_if_t<dca::util::IsCUDAComplex_t<ScalarType>::value == true, void> setToZero(ScalarType ptr, size_t size) {
cudaMemset(ptr, 0, size * sizeof(ScalarType));
checkRC(cudaMemset(ptr, 0, size * sizeof(ScalarType)));
}

template <typename ScalarType>
static std::enable_if_t<std::is_arithmetic<ScalarType>::value == true, void> setToZero(
ScalarType* ptr, size_t size) {
cudaMemset(ptr, 0, size * sizeof(ScalarType));
checkRC(cudaMemset(ptr, 0, size * sizeof(ScalarType)));
}
template <typename ScalarType>
static std::enable_if_t<std::is_arithmetic<ScalarType>::value == true, void> setToZero(
std::complex<ScalarType>* ptr, size_t size) {
cudaMemset(ptr, 0, size * sizeof(std::complex<ScalarType>));
checkRC(cudaMemset(ptr, 0, size * sizeof(std::complex<ScalarType>)));
}

template <typename Scalar>
static std::enable_if_t<dca::util::IsCUDAComplex_t<Scalar>::value == true, void> setToZero(
Scalar* ptr, size_t size) {
cudaMemset(ptr, 0, size * sizeof(Scalar));
checkRC(cudaMemset(ptr, 0, size * sizeof(Scalar)));
}


Expand All @@ -117,7 +117,7 @@ struct Memory<GPU> {

template <typename ScalarType>
static void setToZeroAsync(ScalarType* ptr, size_t size, const GpuStream& stream) {
cudaMemsetAsync(ptr, 0, size * sizeof(ScalarType), stream);
checkRC(cudaMemsetAsync(ptr, 0, size * sizeof(ScalarType), stream));
}

template <typename ScalarType>
Expand Down
20 changes: 10 additions & 10 deletions include/dca/math/nfft/dnfft_1d_gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,11 @@ Dnfft1DGpu<Scalar, WDmn, RDmn, oversampling, CUBIC>::Dnfft1DGpu(const double bet
template <typename Scalar, typename WDmn, typename RDmn, int oversampling>
void Dnfft1DGpu<Scalar, WDmn, RDmn, oversampling, CUBIC>::getDeviceData(
const linalg::Matrix<Scalar, linalg::GPU>& data) {
cudaMemcpy2DAsync(f_tau_.values(), f_tau_[0] * sizeof(Scalar),
checkRC(cudaMemcpy2DAsync(f_tau_.values(), f_tau_[0] * sizeof(Scalar),
data.ptr(), data.leadingDimension() * sizeof(Scalar),
data.nrRows() * sizeof(Scalar), data.nrCols(), cudaMemcpyDeviceToHost,
stream_);
cudaStreamSynchronize(stream_.streamActually());
stream_));
checkRC(cudaStreamSynchronize(stream_.streamActually()));
}

template <typename Scalar, typename WDmn, typename RDmn, int oversampling>
Expand All @@ -174,8 +174,8 @@ void Dnfft1DGpu<Scalar, WDmn, RDmn, oversampling, CUBIC>::initializeDeviceCoeffi
const auto& host_coeff = BaseClass::get_cubic_convolution_matrices();
auto& dev_coeff = get_device_cubic_coeff();
dev_coeff.resizeNoCopy(host_coeff.size());
cudaMemcpy(dev_coeff.ptr(), host_coeff.values(), host_coeff.size() * sizeof(Real),
cudaMemcpyHostToDevice);
checkRC(cudaMemcpy(dev_coeff.ptr(), host_coeff.values(), host_coeff.size() * sizeof(Real),
cudaMemcpyHostToDevice));

const auto& sub_matrix = RDmn::parameter_type::get_subtract_matrix();
const auto& add_matrix = RDmn::parameter_type::get_add_matrix();
Expand Down Expand Up @@ -237,10 +237,10 @@ template <typename OtherScalar>
void Dnfft1DGpu<Scalar, WDmn, RDmn, oversampling, CUBIC>::finalize(
func::function<std::complex<OtherScalar>, func::dmn_variadic<WDmn, PDmn>>& f_w, bool get_square) {
auto get_device_data = [&](const linalg::Matrix<Scalar, linalg::GPU>& data) {
cudaMemcpy2DAsync(f_tau_.values(), f_tau_[0] * sizeof(Scalar), data.ptr(),
checkRC(cudaMemcpy2DAsync(f_tau_.values(), f_tau_[0] * sizeof(Scalar), data.ptr(),
data.leadingDimension() * sizeof(Scalar), data.nrRows() * sizeof(Scalar),
data.nrCols(), cudaMemcpyDeviceToHost, stream_);
cudaStreamSynchronize(stream_);
data.nrCols(), cudaMemcpyDeviceToHost, stream_));
checkRC(cudaStreamSynchronize(stream_));
};

if (!get_square)
Expand All @@ -254,7 +254,7 @@ void Dnfft1DGpu<Scalar, WDmn, RDmn, oversampling, CUBIC>::finalize(
template <typename Scalar, typename WDmn, typename RDmn, int oversampling>
Dnfft1DGpu<Scalar, WDmn, RDmn, oversampling, CUBIC>& Dnfft1DGpu<Scalar, WDmn, RDmn, oversampling,
CUBIC>::operator+=(ThisType& other) {
cudaStreamSynchronize(other.stream_);
checkRC(cudaStreamSynchronize(other.stream_));

details::sum(other.accumulation_matrix_.ptr(), other.accumulation_matrix_.leadingDimension(),
accumulation_matrix_.ptr(), accumulation_matrix_.leadingDimension(),
Expand All @@ -265,7 +265,7 @@ Dnfft1DGpu<Scalar, WDmn, RDmn, oversampling, CUBIC>& Dnfft1DGpu<Scalar, WDmn, RD
accumulation_matrix_sqr_.leadingDimension(), accumulation_matrix_sqr_.nrRows(),
accumulation_matrix_sqr_.nrCols(), stream_);

cudaStreamSynchronize(stream_);
checkRC(cudaStreamSynchronize(stream_));
return *this;
}

Expand Down
3 changes: 2 additions & 1 deletion include/dca/phys/dca_loop/dca_loop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,9 @@ void DcaLoop<ParametersType, DDT, MCIntegratorType, DIST>::initialize() {
static_assert(std::is_same<DDT, dca::phys::DcaData<ParametersType, DIST>>::value);
int last_completed = -1;
auto& autoresume_filename = parameters.get_autoresume_filename();
io::IOType iotype = io::extensionToIOType(autoresume_filename);
if (parameters.autoresume()) {
#ifdef DCA_HAVE_ADIOS2
io::IOType iotype = io::extensionToIOType(autoresume_filename);
if (iotype == io::IOType::ADIOS2)
last_completed = DCA_info_struct.readData(autoresume_filename, parameters.get_output_format(),
concurrency, concurrency.get_adios());
Expand All @@ -236,6 +236,7 @@ void DcaLoop<ParametersType, DDT, MCIntegratorType, DIST>::initialize() {
}
else if (parameters.get_initial_self_energy() != "zero") {
#ifdef DCA_HAVE_ADIOS2
io::IOType iotype = io::extensionToIOType(autoresume_filename);
if (io::extensionToIOType(parameters.get_initial_self_energy()) == io::IOType::ADIOS2)
MOMS.initializeSigma(concurrency.get_adios(), parameters.get_initial_self_energy());
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,8 @@ template <class Parameters, DistType DIST>
void CtintWalkerSubmatrixGpu<Parameters, DIST>::synchronize() const {
Profiler profiler(__FUNCTION__, "CT-INT GPU walker", __LINE__, thread_id_);

cudaStreamSynchronize(get_stream(0));
cudaStreamSynchronize(get_stream(1));
checkRC(cudaStreamSynchronize(get_stream(0)));
checkRC(cudaStreamSynchronize(get_stream(1)));
}

template <class Parameters, DistType DIST>
Expand Down
3 changes: 3 additions & 0 deletions include/dca/platform/dca_gpu_complex.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@

#if defined(DCA_HAVE_HIP)
// hipComplex types are faulty so we use the magma complex types and operators
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#include <magma_operators.h>
#pragma GCC diagnostic pop
#include "dca/util/cuda2hip.h"

namespace dca {
Expand Down
6 changes: 6 additions & 0 deletions libs/googletest-1.8.0/include/gtest/gtest.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@
#include <ostream>
#include <vector>

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wsuggest-override"
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"

#include "gtest/internal/gtest-internal.h"
#include "gtest/internal/gtest-string.h"
#include "gtest/gtest-death-test.h"
Expand All @@ -65,6 +69,8 @@
#include "gtest/gtest-test-part.h"
#include "gtest/gtest-typed-test.h"

#pragma GCC diagnostic_pop

// Depending on the platform, different string classes are available.
// On Linux, in addition to ::std::string, Google also makes use of
// class ::string, which has the same interface as ::std::string, but
Expand Down
2 changes: 2 additions & 0 deletions src/io/io_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ std::string toString(const IOType type) {
case IOType::ADIOS2:
return "ADIOS2";
}
return "UNKNOWN";
}

IOType extensionToIOType(const std::string& file_name) {
Expand All @@ -61,6 +62,7 @@ std::string extensionFromIOType(const IOType type) {
case IOType::ADIOS2:
return ".bp";
}
return ".txt";
}


Expand Down
2 changes: 1 addition & 1 deletion test/unit/linalg/matrix_gpu_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ TEST(MatrixGPUTest, setTo) {
dca::linalg::Matrix<std::complex<double>, dca::linalg::GPU> mat_dev(name, size);
dca::linalg::Matrix<std::complex<double>, dca::linalg::CPU> mat_host_ret(name, size);

auto el_value = [](int i, int j) ->std::complex<double> { return {3 * i - 2 * j, 2*i - 3*j}; };
auto el_value = [](int i, int j) ->std::complex<double> { return {static_cast<double>(3 * i - 2 * j), static_cast<double>(2*i - 3*j)}; };
for (int j = 0; j < mat_host.nrCols(); ++j)
for (int i = 0; i < mat_host.nrRows(); ++i)
mat_host(i,j) = el_value(i,j);
Expand Down
2 changes: 1 addition & 1 deletion test/unit/linalg/reshapable_matrix_gpu_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ TEST(MatrixCPUTest, SetAsync) {

mat_gpu.setAsync(mat_cpu, stream);
mat_cpu_out.setAsync(mat_gpu, stream);
cudaStreamSynchronize(stream);
checkRC(cudaStreamSynchronize(stream));

EXPECT_EQ(mat_cpu, mat_cpu_out);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ TEST_F(CtauxWalkerTest, InsertAndRemoveVertex) {
dca::linalg::Matrix<Scalar, dca::linalg::CPU> G0_dn_GPU;
G0_up_GPU.setAsync(gpu_walker.getG0Up(), 0, 0);
G0_dn_GPU.setAsync(gpu_walker.getG0Dn(), 0, 0);
cudaStreamSynchronize(0);
checkRC(cudaStreamSynchronize(0));

EXPECT_TRUE(G0_up_GPU == cpu_walker.getG0Up());
EXPECT_TRUE(G0_dn_GPU == cpu_walker.getG0Dn())
Expand All @@ -160,7 +160,7 @@ TEST_F(CtauxWalkerTest, InsertAndRemoveVertex) {
dca::linalg::Matrix<Scalar, dca::linalg::CPU> G_dn_GPU;
G_up_GPU.setAsync(gpu_walker.getGUp(), 0, 0);
G_dn_GPU.setAsync(gpu_walker.getGDn(), 0, 0);
cudaStreamSynchronize(0);
checkRC(cudaStreamSynchronize(0));

EXPECT_TRUE(G_up_GPU == cpu_walker.getGUp());
EXPECT_TRUE(G_dn_GPU == cpu_walker.getGDn())
Expand All @@ -170,7 +170,7 @@ TEST_F(CtauxWalkerTest, InsertAndRemoveVertex) {
dca::linalg::Matrix<Scalar, dca::linalg::CPU> N_dn_GPU;
N_up_GPU.setAsync(gpu_walker.getNUp(), 0, 0);
N_dn_GPU.setAsync(gpu_walker.getNDn(), 0, 0);
cudaStreamSynchronize(0);
checkRC(cudaStreamSynchronize(0));

EXPECT_TRUE(N_up_GPU == cpu_walker.getNUp());
EXPECT_TRUE(N_dn_GPU == cpu_walker.getNDn())
Expand All @@ -186,21 +186,21 @@ TEST_F(CtauxWalkerTest, InsertAndRemoveVertex) {
dca::linalg::Vector<Scalar, dca::linalg::CPU> expV_GPU;
expV_CPU.setAsync(cpu_walker.getExpV(), 0, 0);
expV_GPU.setAsync(gpu_walker.getExpV(), 0, 0);
cudaStreamSynchronize(0);
checkRC(cudaStreamSynchronize(0));
EXPECT_EQ(expV_CPU, expV_GPU);

dca::linalg::Vector<Scalar, dca::linalg::CPU> expDeltaV_CPU;
dca::linalg::Vector<Scalar, dca::linalg::CPU> expDeltaV_GPU;
expV_CPU.setAsync(cpu_walker.getExpDeltaV(), 0, 0);
expV_GPU.setAsync(gpu_walker.getExpDeltaV(), 0, 0);
cudaStreamSynchronize(0);
checkRC(cudaStreamSynchronize(0));
EXPECT_EQ(expDeltaV_CPU, expDeltaV_GPU);

dca::linalg::Vector<int, dca::linalg::CPU> vertex_ind_CPU;
dca::linalg::Vector<int, dca::linalg::CPU> vertex_ind_GPU;
vertex_ind_CPU.setAsync(cpu_walker.getVertexInd(), 0, 0);
vertex_ind_GPU.setAsync(gpu_walker.getVertexInd(), 0, 0);
cudaStreamSynchronize(0);
checkRC(cudaStreamSynchronize(0));
EXPECT_EQ(vertex_ind_CPU, vertex_ind_GPU);

cpu_walker.read_Gamma_matrices(dca::phys::e_UP);
Expand All @@ -213,18 +213,18 @@ TEST_F(CtauxWalkerTest, InsertAndRemoveVertex) {

expV_CPU.setAsync(cpu_walker.getExpV(), 0, 0);
expV_GPU.setAsync(gpu_walker.getExpV(), 0, 0);
cudaStreamSynchronize(0);
checkRC(cudaStreamSynchronize(0));

EXPECT_EQ(expV_CPU, expV_GPU);

expV_CPU.setAsync(cpu_walker.getExpDeltaV(), 0, 0);
expV_GPU.setAsync(gpu_walker.getExpDeltaV(), 0, 0);
cudaStreamSynchronize(0);
checkRC(cudaStreamSynchronize(0));
EXPECT_EQ(expDeltaV_CPU, expDeltaV_GPU);

vertex_ind_CPU.setAsync(cpu_walker.getVertexInd(), 0, 0);
vertex_ind_GPU.setAsync(gpu_walker.getVertexInd(), 0, 0);
cudaStreamSynchronize(0);
checkRC(cudaStreamSynchronize(0));
EXPECT_EQ(vertex_ind_CPU, vertex_ind_GPU);

cpu_walker.read_Gamma_matrices(dca::phys::e_DN);
Expand Down Expand Up @@ -261,7 +261,7 @@ TEST_F(CtauxWalkerTest, InsertAndRemoveVertex) {
cpu_walker.compute_Gamma_matrices();
gpu_walker.compute_Gamma_matrices();

cudaStreamSynchronize(0);
checkRC(cudaStreamSynchronize(0));
auto expected_size = std::pair<int, int>{0, 0};
EXPECT_TRUE(Gamma_up_CPU.size() == expected_size);
EXPECT_EQ(Gamma_up_CPU.size(), Gamma_up_GPU.size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ TYPED_TEST(DMatrixBuilderGpuTest, RemoveAndInstertVertex) {

builder_cpu.computeG0(G0, configuration.getSector(s), n_init, size, right_sector);
builder.computeG0(G0_dev, device_config.getDeviceData(s), n_init, right_sector, stream.streamActually());
cudaStreamSynchronize(stream.streamActually());
checkRC(cudaStreamSynchronize(stream.streamActually()));

Matrix<Scalar, CPU> G0_dev_copy(G0_dev);
constexpr RealAlias<Scalar> tolerance = 100 * std::numeric_limits<RealAlias<Scalar>>::epsilon();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ struct WalkerWrapperSubmatrix : public WalkerSelector<Parameters, device_t, DIST

BaseClass::computeM(M);
#ifdef DCA_HAVE_GPU
cudaDeviceSynchronize();
checkRC(cudaDeviceSynchronize());
#endif

std::array<dca::linalg::Matrix<Scalar, CPU>, 2> M_copy{M[0], M[1]};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ double computeWithFastNDFT(const typename CachedNdftGpuTest<Real>::Configuration

dca::profiling::WallTime start_time;
nft_obj.execute(config, M_dev, result_device);
cudaStreamSynchronize(nft_obj.get_stream());
checkRC(cudaStreamSynchronize(nft_obj.get_stream()));
dca::profiling::WallTime end_time;

dca::linalg::ReshapableMatrix<std::complex<Real>, dca::linalg::CPU> result_host(result_device);
Expand All @@ -106,7 +106,6 @@ double computeWithFastNDFT(const typename CachedNdftGpuTest<Real>::Configuration
const int nb = BDmn::dmn_size();
const int nr = RDmn::dmn_size();
const int n_w = PosFreqDmn::dmn_size();
auto invert_w = [=](const int w) { return n_w - 1 - w; };
for (int b2 = 0; b2 < nb; ++b2)
for (int b1 = 0; b1 < nb; ++b1)
for (int r2 = 0; r2 < nr; ++r2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,24 +121,6 @@ TYPED_TEST(TpAccumulatorComplexG0GpuTest, Accumulate) {
this->host_setup.parameters_.set_four_point_channels(four_point_channels);
this->gpu_setup.parameters_.set_four_point_channels(four_point_channels);

// this->host_setup.data_->initializeSigma("zero");
// this->gpu_setup.data_->initializeSigma("zero"); //this->gpu_setup.parameters_.get_initial_self_energy());

using ParametersHost = typename decltype(this->host_setup)::Parameters;
using ParametersGPU = typename decltype(this->gpu_setup)::Parameters;

// LatticeMapSpType<ParametersHost,
// k_DCA<ParametersHost>,
// k_HOST<ParametersHost>> lattice_mapping_obj_host(this->host_setup.parameters_);
// auto& host_data = this->host_setup.data_;
// lattice_mapping_obj_host.execute(host_data->Sigma, host_data->Sigma_lattice_interpolated,
// host_data->Sigma_lattice_coarsegrained, host_data->Sigma_lattice);

// LatticeMapSpType<ParametersGPU, k_DCA<ParametersGPU>, k_HOST<ParametersGPU>>
// lattice_mapping_obj_gpu(this->gpu_setup.parameters_); auto& gpu_data = this->gpu_setup.data_;
// lattice_mapping_obj_gpu.execute(gpu_data->Sigma, gpu_data->Sigma_lattice_interpolated,
// gpu_data->Sigma_lattice_coarsegrained, gpu_data->Sigma_lattice);

dca::phys::solver::accumulator::TpAccumulator<decltype(this->host_setup.parameters_),
dca::DistType::NONE, dca::linalg::CPU>
accumulatorHost(this->host_setup.data_->G0_k_w_cluster_excluded, this->host_setup.parameters_);
Expand Down
2 changes: 0 additions & 2 deletions test/unit/phys/domains/cluster/cluster_domain_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ class ClusterDomainsTest : public ::testing::Test {

TEST(ClusterDomainsTest, initializeKDmn) {
dca::io::JSONReader reader;
const int dimension = 2;
using DomainsParameters = dca::phys::params::DomainsParameters;
using AnalysisParameters = dca::phys::params::AnalysisParameters;
using Model = ClusterDomainsTest::Model;
Expand All @@ -59,7 +58,6 @@ TEST(ClusterDomainsTest, initializeKDmn) {
};

ClusterDomainsTestParameters pars;
//DmnParameters pars(dimension);

reader.open_file(DCA_SOURCE_DIR
"/test/unit/phys/domains/cluster/input.json");
Expand Down

0 comments on commit b60e876

Please sign in to comment.