Skip to content

Commit 3462e8c

Browse files
committed
OpenMP fixes for NVHPC
1 parent 7b0fbee commit 3462e8c

File tree

4 files changed

+20
-8
lines changed

4 files changed

+20
-8
lines changed

src/BLR/BLRMatrix.GPU.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,15 +216,21 @@ namespace strumpack {
216216
b3.run(scalar_t(-1.), scalar_t(1.), comp_stream, handle);
217217
}
218218
if (i > 0) {
219+
#if defined(STRUMPACK_USE_OPENMP_TASKLOOP)
219220
#pragma omp taskloop
221+
#endif
220222
for (std::size_t j=0; j<rb; j++)
221223
B11.tile(j, i-1).move_to_cpu
222224
(copy_stream, pinned+B11.tileroff(j)*B11.tilecols(i-1));
225+
#if defined(STRUMPACK_USE_OPENMP_TASKLOOP)
223226
#pragma omp taskloop
227+
#endif
224228
for (std::size_t j=0; j<rb2; j++)
225229
B12.tile(i-1, j).move_to_cpu
226230
(copy_stream, pinned+B12.tilecoff(j)*B12.tilerows(i-1));
231+
#if defined(STRUMPACK_USE_OPENMP_TASKLOOP)
227232
#pragma omp taskloop
233+
#endif
228234
for (std::size_t j=0; j<rb2; j++)
229235
B21.tile(j, i-1).move_to_cpu
230236
(copy_stream, pinned+B21.tileroff(j)*B21.tilecols(i-1));

src/BLR/BLRMatrix.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,7 +1401,7 @@ namespace strumpack {
14011401
#endif
14021402
{
14031403
for (std::size_t i=0; i<rb; i+=CP) { // F11 and F21
1404-
#pragma omp taskwait
1404+
// #pragma omp taskwait
14051405
#if defined(STRUMPACK_USE_OPENMP_TASK_DEPEND)
14061406
[[maybe_unused]] std::size_t ifirst = lrb*i;
14071407
#pragma omp task default(shared) firstprivate(i,ifirst) \
@@ -1534,9 +1534,10 @@ namespace strumpack {
15341534
}
15351535
}
15361536
}
1537-
}
1538-
for (std::size_t i=0; i<rb2; i+=CP) { // F12 and F22
15391537
#pragma omp taskwait
1538+
}
1539+
for (std::size_t i=0; i<rb2; i+=CP) { // F12 and F22
1540+
// #pragma omp taskwait
15401541
#if defined(STRUMPACK_USE_OPENMP_TASK_DEPEND)
15411542
[[maybe_unused]] std::size_t ifirst = lrb*(i+rb);
15421543
#pragma omp task default(shared) firstprivate(i,ifirst) \
@@ -1603,7 +1604,8 @@ namespace strumpack {
16031604
}
16041605
}
16051606
}
1606-
}
1607+
#pragma omp taskwait
1608+
}
16071609
}
16081610
}
16091611
for (std::size_t i=0; i<rb; i++)

src/dense/DenseMatrix.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1032,7 +1032,7 @@ namespace strumpack {
10321032
* \param ld leading dimension of matrix allocated at D. ld >= m
10331033
*/
10341034
DenseMatrixWrapper(std::size_t m, std::size_t n,
1035-
scalar_t* D, std::size_t ld) {
1035+
scalar_t* D, std::size_t ld) noexcept {
10361036
this->data_ = D; this->rows_ = m; this->cols_ = n;
10371037
this->ld_ = std::max(std::size_t(1), ld);
10381038
}
@@ -1051,13 +1051,13 @@ namespace strumpack {
10511051
* submatrix
10521052
*/
10531053
DenseMatrixWrapper(std::size_t m, std::size_t n, DenseMatrix<scalar_t>& D,
1054-
std::size_t i, std::size_t j)
1054+
std::size_t i, std::size_t j) noexcept
10551055
: DenseMatrixWrapper<scalar_t>(m, n, &D(i, j), D.ld()) {
10561056
assert(i+m <= D.rows());
10571057
assert(j+n <= D.cols());
10581058
}
10591059

1060-
DenseMatrixWrapper(DenseMatrixWrapper<scalar_t>& D) {
1060+
DenseMatrixWrapper(DenseMatrixWrapper<scalar_t>& D) noexcept {
10611061
this->data_ = D.data();
10621062
this->rows_ = D.rows();
10631063
this->cols_ = D.cols();
@@ -1067,7 +1067,7 @@ namespace strumpack {
10671067
/**
10681068
* Default move constructor.
10691069
*/
1070-
DenseMatrixWrapper(DenseMatrixWrapper<scalar_t>&& D) {
1070+
DenseMatrixWrapper(DenseMatrixWrapper<scalar_t>&& D) noexcept {
10711071
this->data_ = D.data();
10721072
this->rows_ = D.rows();
10731073
this->cols_ = D.cols();

src/structured/StructuredMatrix.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,9 @@ namespace strumpack {
225225
std::vector<std::size_t> J(n);
226226
std::iota(J.begin(), J.end(), c);
227227
DenseMW_t Rrsub(n, Rr.cols(), Rr, c, 0);
228+
#if defined(STRUMPACK_USE_OPENMP_TASKLOOP)
228229
#pragma omp taskloop firstprivate(c) shared(Rrsub)
230+
#endif
229231
for (int r=0; r<rows; r+=B) {
230232
int m = std::min(B, rows - r);
231233
DenseM_t Asub(m, n);
@@ -242,7 +244,9 @@ namespace strumpack {
242244
std::vector<std::size_t> I(m);
243245
std::iota(I.begin(), I.end(), r);
244246
DenseMW_t Rcsub(m, Rc.cols(), Rc, r, 0);
247+
#if defined(STRUMPACK_USE_OPENMP_TASKLOOP)
245248
#pragma omp taskloop firstprivate(r) shared(Rcsub)
249+
#endif
246250
for (int c=0; c<cols; c+=B) {
247251
int n = std::min(B, cols - c);
248252
DenseM_t Asub(m, n);

0 commit comments

Comments
 (0)