Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/GenerateCoarseProblem.cu
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ void GenerateCoarseProblem(const SparseMatrix & Af) {
local_int_t* d_f2cOperator;
local_int_t* d_c2fOperator;

GPU_CHECK(gpuMalloc((void**)&d_f2cOperator, sizeof(local_int_t) * localNumberOfRows));
GPU_CHECK(gpuMalloc((void**)&d_c2fOperator, sizeof(local_int_t) * nxf * nyf * nzf));
GPU_CHECK(deviceMalloc((void**)&d_f2cOperator, sizeof(local_int_t) * localNumberOfRows));
GPU_CHECK(deviceMalloc((void**)&d_c2fOperator, sizeof(local_int_t) * nxf * nyf * nzf));

GPU_CHECK(gpuMemset(d_c2fOperator, -1, sizeof(local_int_t) * nxf * nyf * nzf));

Expand Down
18 changes: 9 additions & 9 deletions src/GenerateProblem.cu
Original file line number Diff line number Diff line change
Expand Up @@ -406,13 +406,13 @@ void GenerateProblem(SparseMatrix & A, Vector * b, Vector * x, Vector * xexact)
if(xexact != NULL) HIPInitializeVector(*xexact, localNumberOfRows);

// Allocate structures
GPU_CHECK(gpuMalloc((void**)&A.d_mtxIndG, std::max(sizeof(double), sizeof(global_int_t)) * localNumberOfRows * numberOfNonzerosPerRow));
GPU_CHECK(gpuMalloc((void**)&A.d_matrixValues, sizeof(double) * localNumberOfRows * numberOfNonzerosPerRow));
GPU_CHECK(gpuMalloc((void**)&A.d_mtxIndL, sizeof(local_int_t) * localNumberOfRows * numberOfNonzerosPerRow));
GPU_CHECK(gpuMalloc((void**)&A.d_nonzerosInRow, sizeof(char) * localNumberOfRows));
GPU_CHECK(gpuMalloc((void**)&A.d_matrixDiagonal, sizeof(local_int_t) * localNumberOfRows));
GPU_CHECK(gpuMalloc((void**)&A.d_rowHash, sizeof(local_int_t) * localNumberOfRows));
GPU_CHECK(gpuMalloc((void**)&A.d_localToGlobalMap, sizeof(global_int_t) * localNumberOfRows));
GPU_CHECK(deviceMalloc((void**)&A.d_mtxIndG, std::max(sizeof(double), sizeof(global_int_t)) * localNumberOfRows * numberOfNonzerosPerRow));
GPU_CHECK(deviceMalloc((void**)&A.d_matrixValues, sizeof(double) * localNumberOfRows * numberOfNonzerosPerRow));
GPU_CHECK(deviceMalloc((void**)&A.d_mtxIndL, sizeof(local_int_t) * localNumberOfRows * numberOfNonzerosPerRow));
GPU_CHECK(deviceMalloc((void**)&A.d_nonzerosInRow, sizeof(char) * localNumberOfRows));
GPU_CHECK(deviceMalloc((void**)&A.d_matrixDiagonal, sizeof(local_int_t) * localNumberOfRows));
GPU_CHECK(deviceMalloc((void**)&A.d_rowHash, sizeof(local_int_t) * localNumberOfRows));
GPU_CHECK(deviceMalloc((void**)&A.d_localToGlobalMap, sizeof(global_int_t) * localNumberOfRows));

// Determine blocksize
unsigned int blocksize = 512 / numberOfNonzerosPerRow;
Expand Down Expand Up @@ -511,8 +511,8 @@ void CopyProblemToHost(SparseMatrix& A, Vector* b, Vector* x, Vector* xexact)
GPU_CHECK(gpuMemcpy(mtxDiag, A.d_matrixDiagonal, sizeof(local_int_t) * A.localNumberOfRows, gpuMemcpyDeviceToHost));
GPU_CHECK(gpuMemcpy(A.localToGlobalMap.data(), A.d_localToGlobalMap, sizeof(global_int_t) * A.localNumberOfRows, gpuMemcpyDeviceToHost));

GPU_CHECK(gpuFree(A.d_nonzerosInRow));
GPU_CHECK(gpuFree(A.d_matrixDiagonal));
GPU_CHECK(deviceFree(A.d_nonzerosInRow));
GPU_CHECK(deviceFree(A.d_matrixDiagonal));

// Initialize pointers
A.matrixDiagonal[0] = A.matrixValues[0] + mtxDiag[0];
Expand Down
20 changes: 10 additions & 10 deletions src/MultiColoring.cu
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ void JPLColoring(SparseMatrix& A)
{
local_int_t m = A.localNumberOfRows;

GPU_CHECK(gpuMalloc((void**)&A.perm, sizeof(local_int_t) * m));
GPU_CHECK(deviceMalloc((void**)&A.perm, sizeof(local_int_t) * m));
GPU_CHECK(gpuMemset(A.perm, -1, sizeof(local_int_t) * m));

A.nblocks = 0;
Expand Down Expand Up @@ -321,15 +321,15 @@ void JPLColoring(SparseMatrix& A)

A.ublocks = A.nblocks - 1;

GPU_CHECK(gpuFree(A.d_rowHash));
GPU_CHECK(deviceFree(A.d_rowHash));

local_int_t* tmp_color;
local_int_t* tmp_perm;
local_int_t* perm;

GPU_CHECK(gpuMalloc((void**)&tmp_color, sizeof(local_int_t) * m));
GPU_CHECK(gpuMalloc((void**)&tmp_perm, sizeof(local_int_t) * m));
GPU_CHECK(gpuMalloc((void**)&perm, sizeof(local_int_t) * m));
GPU_CHECK(deviceMalloc((void**)&tmp_color, sizeof(local_int_t) * m));
GPU_CHECK(deviceMalloc((void**)&tmp_perm, sizeof(local_int_t) * m));
GPU_CHECK(deviceMalloc((void**)&perm, sizeof(local_int_t) * m));

kernel_identity<1024><<<(m - 1) / 1024 + 1, 1024>>>(m, perm);

Expand All @@ -344,9 +344,9 @@ void JPLColoring(SparseMatrix& A)
int endbit = 32 - __builtin_clz(A.nblocks);

GPU_CHECK(rocprim::radix_sort_pairs(buf, size, keys, vals, m, startbit, endbit));
GPU_CHECK(gpuMalloc(&buf, size));
GPU_CHECK(deviceMalloc(&buf, size));
GPU_CHECK(rocprim::radix_sort_pairs(buf, size, keys, vals, m, startbit, endbit));
GPU_CHECK(gpuFree(buf));
GPU_CHECK(deviceFree(buf));

kernel_create_perm<1024><<<(m - 1) / 1024 + 1, 1024>>>(m, vals.current(), A.perm);
#else
Expand All @@ -367,9 +367,9 @@ void JPLColoring(SparseMatrix& A)
kernel_create_perm<1024><<<(m - 1) / 1024 + 1, 1024>>>(m, perm, A.perm);
#endif

GPU_CHECK(gpuFree(tmp_color));
GPU_CHECK(gpuFree(tmp_perm));
GPU_CHECK(gpuFree(perm));
GPU_CHECK(deviceFree(tmp_color));
GPU_CHECK(deviceFree(tmp_perm));
GPU_CHECK(deviceFree(perm));

#ifndef HPCG_REFERENCE
--A.ublocks;
Expand Down
4 changes: 2 additions & 2 deletions src/Permute.cu
Original file line number Diff line number Diff line change
Expand Up @@ -237,10 +237,10 @@ __global__ void kernel_permute(local_int_t size,
void PermuteVector(local_int_t size, Vector& v, const local_int_t* perm)
{
double* buffer;
GPU_CHECK(gpuMalloc((void**)&buffer, sizeof(double) * v.localLength));
GPU_CHECK(deviceMalloc((void**)&buffer, sizeof(double) * v.localLength));

kernel_permute<1024><<<(size - 1) / 1024 + 1, 1024>>>(size, perm, v.d_values, buffer);

GPU_CHECK(gpuFree(v.d_values));
GPU_CHECK(deviceFree(v.d_values));
v.d_values = buffer;
}
6 changes: 3 additions & 3 deletions src/SparseMatrix.cu
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ void ConvertToELL(SparseMatrix& A)
else LAUNCH_TO_ELL_COL(27, 4)

// Free old matrix indices
GPU_CHECK(gpuFree(A.d_mtxIndL));
GPU_CHECK(deviceFree(A.d_mtxIndL));

#ifndef HPCG_NO_MPI
GPU_CHECK(gpuMemcpy(&A.halo_rows, d_halo_rows, sizeof(local_int_t), gpuMemcpyDeviceToHost));
Expand Down Expand Up @@ -408,8 +408,8 @@ void ExtractDiagonal(SparseMatrix& A)
local_int_t m = A.localNumberOfRows;

// Allocate memory to extract diagonal entries
GPU_CHECK(gpuMalloc((void**)&A.diag_idx, sizeof(local_int_t) * m));
GPU_CHECK(gpuMalloc((void**)&A.inv_diag, sizeof(double) * m));
GPU_CHECK(deviceMalloc((void**)&A.diag_idx, sizeof(local_int_t) * m));
GPU_CHECK(deviceMalloc((void**)&A.inv_diag, sizeof(double) * m));

// Extract diagonal entries
kernel_extract_diag_index<1024><<<(m - 1) / 1024 + 1, 1024>>>(
Expand Down