30
30
*
31
31
*/
32
32
#include < array>
33
+ #include < cstring>
33
34
34
35
#include " FrontGPUSPD.hpp"
35
36
#include " FrontalMatrixGPUKernels.hpp"
@@ -655,11 +656,13 @@ namespace strumpack {
655
656
old_work = work_mem;
656
657
657
658
// default stream
658
- gpu_check (cudaDeviceSynchronize ());
659
+ // gpu_check(cudaDeviceSynchronize());
660
+ gpu::synchronize_default_stream ();
659
661
front_assembly (A, L, hea_mem, dea_mem);
660
662
gpu::Event e_assemble;
661
663
e_assemble.record ();
662
- gpu_check (cudaDeviceSynchronize ());
664
+ gpu::synchronize_default_stream ();
665
+ // gpu_check(cudaDeviceSynchronize());
663
666
664
667
// default stream
665
668
factor_small_fronts (L, fdata, L.dev_getrf_err , opts);
@@ -726,8 +729,8 @@ namespace strumpack {
726
729
// for (std::size_t i=0; i<L.factors_small; i++)
727
730
// host_factors[i] = pinned[i];
728
731
// host_factors += L.factors_small;
729
- memcpy (host_factors_diagonal, pinned, L.factors_diagonal_small * sizeof (scalar_t ));
730
- memcpy (host_factors_off_diagonal, pinned + L.factors_diagonal_small , L.factors_off_diagonal_small * sizeof (scalar_t ));
732
+ std:: memcpy (host_factors_diagonal, pinned, L.factors_diagonal_small * sizeof (scalar_t ));
733
+ std:: memcpy (host_factors_off_diagonal, pinned + L.factors_diagonal_small , L.factors_off_diagonal_small * sizeof (scalar_t ));
731
734
host_factors_diagonal += L.factors_diagonal_small ;
732
735
host_factors_off_diagonal += L.factors_off_diagonal_small ;
733
736
@@ -754,10 +757,10 @@ namespace strumpack {
754
757
// host_factors += fc;
755
758
auto fdc = factors_diagonal_chunk[c-1 ];
756
759
auto fodc = factors_off_diagonal_chunk[c-1 ];
757
- memcpy (host_factors_diagonal,
758
- pin[(c-1 ) % 2 ], fdc * sizeof (scalar_t ));
759
- memcpy (host_factors_off_diagonal,
760
- pin[(c-1 ) % 2 ] + fdc, fodc * sizeof (scalar_t ));
760
+ std:: memcpy (host_factors_diagonal,
761
+ pin[(c-1 ) % 2 ], fdc * sizeof (scalar_t ));
762
+ std:: memcpy (host_factors_off_diagonal,
763
+ pin[(c-1 ) % 2 ] + fdc, fodc * sizeof (scalar_t ));
761
764
host_factors_diagonal += fdc;
762
765
host_factors_off_diagonal += fodc;
763
766
}
@@ -809,8 +812,8 @@ namespace strumpack {
809
812
// host_factors[i] = pin[(chunks.size()-1) % 2][i];
810
813
auto fdc = factors_diagonal_chunk.back ();
811
814
auto fodc = factors_off_diagonal_chunk.back ();
812
- memcpy (host_factors_diagonal, pin[(chunks.size ()-1 ) % 2 ], fdc * sizeof (scalar_t ));
813
- memcpy (host_factors_off_diagonal, pin[(chunks.size ()-1 ) % 2 ] + fdc, fodc * sizeof (scalar_t ));
815
+ std:: memcpy (host_factors_diagonal, pin[(chunks.size ()-1 ) % 2 ], fdc * sizeof (scalar_t ));
816
+ std:: memcpy (host_factors_off_diagonal, pin[(chunks.size ()-1 ) % 2 ] + fdc, fodc * sizeof (scalar_t ));
814
817
}
815
818
816
819
// L.f[0]->pivot_mem_.resize(L.piv_size);
@@ -862,11 +865,11 @@ namespace strumpack {
862
865
(const SpMat_t& A, const SPOptions<scalar_t >& opts,
863
866
int etree_level, int task_depth) {
864
867
if (!A.symm_sparse ()) {
865
- std::cerr << " The Matrix is not symmetric, please unable_symmetric in option settings" << std::endl;
866
- exit (EXIT_FAILURE); // stop
867
- }else {
868
- return multifrontal_factorization_symmetric (A, opts, etree_level, task_depth);
868
+ std::cerr << " The Matrix is not symmetric, please use enable_symmetric in option settings" << std::endl;
869
+ // TODO return something?
870
+ exit (EXIT_FAILURE);
869
871
}
872
+ return multifrontal_factorization_symmetric (A, opts, etree_level, task_depth);
870
873
}
871
874
872
875
template <typename scalar_t ,typename integer_t > void
0 commit comments