Skip to content

Commit f8aab65

Browse files
committed
SPD solver only works for CUDA for now, disable for SYCL and HIP
1 parent 1c0eef4 commit f8aab65

File tree

4 files changed

+33
-38
lines changed

4 files changed

+33
-38
lines changed

src/sparse/fronts/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,6 @@ if(STRUMPACK_USE_CUDA OR STRUMPACK_USE_HIP OR STRUMPACK_USE_SYCL)
3636
PRIVATE
3737
${CMAKE_CURRENT_LIST_DIR}/FrontalMatrixGPU.cpp
3838
${CMAKE_CURRENT_LIST_DIR}/FrontalMatrixGPU.hpp
39-
${CMAKE_CURRENT_LIST_DIR}/FrontGPUSPD.cpp
40-
${CMAKE_CURRENT_LIST_DIR}/FrontGPUSPD.hpp
4139
${CMAKE_CURRENT_LIST_DIR}/FrontalMatrixGPUKernels.hpp)
4240

4341
endif()
@@ -51,7 +49,9 @@ endif()
5149
if(STRUMPACK_USE_CUDA)
5250
target_sources(strumpack PRIVATE
5351
${CMAKE_CURRENT_LIST_DIR}/FrontalMatrixCUDA.cu
54-
${CMAKE_CURRENT_LIST_DIR}/FrontGPUSPD.cu)
52+
${CMAKE_CURRENT_LIST_DIR}/FrontGPUSPD.cu
53+
${CMAKE_CURRENT_LIST_DIR}/FrontGPUSPD.cpp
54+
${CMAKE_CURRENT_LIST_DIR}/FrontGPUSPD.hpp)
5555
endif()
5656

5757
if(STRUMPACK_USE_SYCL)

src/sparse/fronts/FrontFactory.cpp

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@
5151
#endif
5252
#if defined(STRUMPACK_USE_GPU)
5353
#include "FrontalMatrixGPU.hpp"
54+
#endif
55+
#if defined(STRUMPACK_USE_CUDA)
5456
#include "FrontGPUSPD.hpp"
5557
#endif
5658
#if defined(STRUMPACK_USE_ZFP)
@@ -142,18 +144,18 @@ namespace strumpack {
142144
};
143145
if (front) return front;
144146
if (is_GPU(opts)) {
145-
#if defined(STRUMPACK_USE_MAGMA)
146-
front = std::make_unique<FrontMAGMA<scalar_t,integer_t>>
147-
(s, sbegin, send, upd);
148-
#else
149-
#if defined(STRUMPACK_USE_GPU)
147+
#if defined(STRUMPACK_USE_CUDA)
150148
if (is_symmetric(opts) && is_positive_definite(opts))
151-
front.reset
152-
(new FrontGPUSPD<scalar_t,integer_t>(s, sbegin, send, upd));
149+
front = std::make_unique<FrontGPUSPD<scalar_t,integer_t>
150+
(s, sbegin, send, upd);
153151
else
154-
front.reset
155-
(new FrontalMatrixGPU<scalar_t,integer_t>(s, sbegin, send, upd));
156152
#endif
153+
#if defined(STRUMPACK_USE_MAGMA)
154+
front = std::make_unique<FrontMAGMA<scalar_t,integer_t>>
155+
(s, sbegin, send, upd);
156+
#else
157+
front = std::make_unique<FrontalMatrixGPU<scalar_t,integer_t>>
158+
(s, sbegin, send, upd);
157159
#endif
158160
if (root) fc.dense++;
159161
}

src/sparse/fronts/FrontGPUSPD.cpp

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
*
3131
*/
3232
#include <array>
33+
#include <cstring>
3334

3435
#include "FrontGPUSPD.hpp"
3536
#include "FrontalMatrixGPUKernels.hpp"
@@ -655,11 +656,13 @@ namespace strumpack {
655656
old_work = work_mem;
656657

657658
// default stream
658-
gpu_check(cudaDeviceSynchronize());
659+
// gpu_check(cudaDeviceSynchronize());
660+
gpu::synchronize_default_stream();
659661
front_assembly(A, L, hea_mem, dea_mem);
660662
gpu::Event e_assemble;
661663
e_assemble.record();
662-
gpu_check(cudaDeviceSynchronize());
664+
gpu::synchronize_default_stream();
665+
// gpu_check(cudaDeviceSynchronize());
663666

664667
// default stream
665668
factor_small_fronts(L, fdata, L.dev_getrf_err, opts);
@@ -726,8 +729,8 @@ namespace strumpack {
726729
// for (std::size_t i=0; i<L.factors_small; i++)
727730
// host_factors[i] = pinned[i];
728731
// host_factors += L.factors_small;
729-
memcpy(host_factors_diagonal, pinned, L.factors_diagonal_small * sizeof(scalar_t));
730-
memcpy(host_factors_off_diagonal, pinned + L.factors_diagonal_small, L.factors_off_diagonal_small * sizeof(scalar_t));
732+
std::memcpy(host_factors_diagonal, pinned, L.factors_diagonal_small * sizeof(scalar_t));
733+
std::memcpy(host_factors_off_diagonal, pinned + L.factors_diagonal_small, L.factors_off_diagonal_small * sizeof(scalar_t));
731734
host_factors_diagonal += L.factors_diagonal_small;
732735
host_factors_off_diagonal += L.factors_off_diagonal_small;
733736

@@ -754,10 +757,10 @@ namespace strumpack {
754757
// host_factors += fc;
755758
auto fdc = factors_diagonal_chunk[c-1];
756759
auto fodc = factors_off_diagonal_chunk[c-1];
757-
memcpy(host_factors_diagonal,
758-
pin[(c-1) % 2], fdc * sizeof(scalar_t));
759-
memcpy(host_factors_off_diagonal,
760-
pin[(c-1) % 2] + fdc, fodc * sizeof(scalar_t));
760+
std::memcpy(host_factors_diagonal,
761+
pin[(c-1) % 2], fdc * sizeof(scalar_t));
762+
std::memcpy(host_factors_off_diagonal,
763+
pin[(c-1) % 2] + fdc, fodc * sizeof(scalar_t));
761764
host_factors_diagonal += fdc;
762765
host_factors_off_diagonal += fodc;
763766
}
@@ -809,8 +812,8 @@ namespace strumpack {
809812
// host_factors[i] = pin[(chunks.size()-1) % 2][i];
810813
auto fdc = factors_diagonal_chunk.back();
811814
auto fodc = factors_off_diagonal_chunk.back();
812-
memcpy(host_factors_diagonal, pin[(chunks.size()-1) % 2], fdc * sizeof(scalar_t));
813-
memcpy(host_factors_off_diagonal, pin[(chunks.size()-1) % 2] + fdc, fodc * sizeof(scalar_t));
815+
std::memcpy(host_factors_diagonal, pin[(chunks.size()-1) % 2], fdc * sizeof(scalar_t));
816+
std::memcpy(host_factors_off_diagonal, pin[(chunks.size()-1) % 2] + fdc, fodc * sizeof(scalar_t));
814817
}
815818

816819
// L.f[0]->pivot_mem_.resize(L.piv_size);
@@ -862,11 +865,11 @@ namespace strumpack {
862865
(const SpMat_t& A, const SPOptions<scalar_t>& opts,
863866
int etree_level, int task_depth) {
864867
if (!A.symm_sparse()) {
865-
std::cerr << "The Matrix is not symmetric, please unable_symmetric in option settings" << std::endl;
866-
exit(EXIT_FAILURE); // stop
867-
}else{
868-
return multifrontal_factorization_symmetric(A, opts, etree_level, task_depth);
868+
std::cerr << "The Matrix is not symmetric, please use enable_symmetric in option settings" << std::endl;
869+
// TODO return something?
870+
exit(EXIT_FAILURE);
869871
}
872+
return multifrontal_factorization_symmetric(A, opts, etree_level, task_depth);
870873
}
871874

872875
template<typename scalar_t,typename integer_t> void

src/sparse/fronts/FrontGPUSPD.hpp

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,7 @@
55
#pragma once
66

77
#include "FrontalMatrixDense.hpp"
8-
9-
#if defined(STRUMPACK_USE_CUDA)
10-
#include "dense/CUDAWrapper.hpp"
11-
#endif
12-
#if defined(STRUMPACK_USE_HIP)
13-
#include "dense/HIPWrapper.hpp"
14-
#endif
15-
#if defined(STRUMPACK_USE_SYCL)
16-
#include "dense/DPCPPWrapper.hpp"
17-
#endif
8+
#include "dense/GPUWrapper.hpp"
189

1910
namespace strumpack {
2011

@@ -25,7 +16,6 @@ namespace strumpack {
2516
// template<typename scalar_t> struct FwdSolveData;
2617
}
2718

28-
2919
template<typename scalar_t,typename integer_t> class FrontGPUSPD
3020
: public FrontalMatrix<scalar_t,integer_t> {
3121
using F_t = FrontalMatrix<scalar_t,integer_t>;
@@ -55,7 +45,7 @@ namespace strumpack {
5545
ReturnCode multifrontal_factorization_symmetric(const SpMat_t& A,
5646
const SPOptions<scalar_t>& opts,
5747
int etree_level=0,
58-
int task_depth=0);
48+
int task_depth=0) override;
5949

6050
void extract_CB_sub_matrix(const std::vector<std::size_t>& I,
6151
const std::vector<std::size_t>& J,

0 commit comments

Comments
 (0)