Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,15 @@ set_csr_data(sycl::queue &queue, matrix_handle_t handle, intType num_rows, intTy
intType nnz, index_base index, intType *row_ptr, intType *col_ind, fpType *val,
const std::vector<sycl::event> &dependencies = {});

ONEMKL_EXPORT sycl::event optimize_gemm(sycl::queue &queue, transpose transpose_A,
matrix_handle_t handle,
const std::vector<sycl::event> &dependencies = {});

ONEMKL_EXPORT sycl::event optimize_gemm(sycl::queue &queue, transpose transpose_A,
transpose transpose_B, layout dense_matrix_layout,
const std::int64_t columns, matrix_handle_t handle,
const std::vector<sycl::event> &dependencies = {});

ONEMKL_EXPORT sycl::event optimize_gemv(sycl::queue &queue, transpose transpose_val,
matrix_handle_t handle,
const std::vector<sycl::event> &dependencies = {});
Expand Down
14 changes: 14 additions & 0 deletions include/oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,20 @@ std::enable_if_t<detail::are_fp_int_supported_v<fpType, intType>, sycl::event> s
row_ptr, col_ind, val, dependencies);
}

inline sycl::event optimize_gemm(backend_selector<backend::BACKEND> selector, transpose transpose_A,
matrix_handle_t handle,
const std::vector<sycl::event> &dependencies = {}) {
return BACKEND::optimize_gemm(selector.get_queue(), transpose_A, handle, dependencies);
}

inline sycl::event optimize_gemm(backend_selector<backend::BACKEND> selector, transpose transpose_A,
transpose transpose_B, layout dense_matrix_layout,
const std::int64_t columns, matrix_handle_t handle,
const std::vector<sycl::event> &dependencies = {}) {
return BACKEND::optimize_gemm(selector.get_queue(), transpose_A, transpose_B,
dense_matrix_layout, columns, handle, dependencies);
}

inline sycl::event optimize_gemv(backend_selector<backend::BACKEND> selector,
transpose transpose_val, matrix_handle_t handle,
const std::vector<sycl::event> &dependencies = {}) {
Expand Down
8 changes: 8 additions & 0 deletions include/oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ std::enable_if_t<detail::are_fp_int_supported_v<fpType, intType>, sycl::event> s
index_base index, intType *row_ptr, intType *col_ind, fpType *val,
const std::vector<sycl::event> &dependencies = {});

sycl::event optimize_gemm(sycl::queue &queue, transpose transpose_A, matrix_handle_t handle,
const std::vector<sycl::event> &dependencies = {});

sycl::event optimize_gemm(sycl::queue &queue, transpose transpose_A, transpose transpose_B,
layout dense_matrix_layout, const std::int64_t columns,
matrix_handle_t handle,
const std::vector<sycl::event> &dependencies = {});

sycl::event optimize_gemv(sycl::queue &queue, transpose transpose_val, matrix_handle_t handle,
const std::vector<sycl::event> &dependencies = {});

Expand Down
2 changes: 2 additions & 0 deletions src/sparse_blas/backends/backend_wrappers.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ oneapi::mkl::sparse::BACKEND::set_csr_data,
oneapi::mkl::sparse::BACKEND::set_csr_data,
oneapi::mkl::sparse::BACKEND::set_csr_data,
oneapi::mkl::sparse::BACKEND::set_csr_data,
oneapi::mkl::sparse::BACKEND::optimize_gemm,
oneapi::mkl::sparse::BACKEND::optimize_gemm,
oneapi::mkl::sparse::BACKEND::optimize_gemv,
oneapi::mkl::sparse::BACKEND::optimize_trsv,
oneapi::mkl::sparse::BACKEND::gemv,
Expand Down
48 changes: 37 additions & 11 deletions src/sparse_blas/backends/mkl_common/mkl_operations.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,29 @@
* SPDX-License-Identifier: Apache-2.0
*******************************************************************************/

sycl::event optimize_gemm(sycl::queue& queue, transpose /*transpose_A*/,
detail::matrix_handle* /*handle*/,
const std::vector<sycl::event>& dependencies) {
// TODO: Call to optimize_gemm with 2024.1 oneMKL release
// Return an event depending on the dependencies
return queue.submit([=](sycl::handler& cgh) {
cgh.depends_on(dependencies);
cgh.host_task([=]() { /* Empty kernel */ });
});
}

sycl::event optimize_gemm(sycl::queue& queue, transpose /*transpose_A*/, transpose /*transpose_B*/,
layout /*dense_matrix_layout*/, const std::int64_t /*columns*/,
detail::matrix_handle* /*handle*/,
const std::vector<sycl::event>& dependencies) {
// TODO: Call to optimize_gemm with 2024.1 oneMKL release
// Return an event depending on the dependencies
return queue.submit([=](sycl::handler& cgh) {
cgh.depends_on(dependencies);
cgh.host_task([=]() { /* Empty kernel */ });
});
}

sycl::event optimize_gemv(sycl::queue& queue, transpose transpose_val,
detail::matrix_handle* handle,
const std::vector<sycl::event>& dependencies) {
Expand Down Expand Up @@ -67,21 +90,24 @@ std::enable_if_t<detail::is_fp_supported_v<fpType>, sycl::event> trsv(

template <typename fpType>
std::enable_if_t<detail::is_fp_supported_v<fpType>> gemm(
sycl::queue& /*queue*/, layout /*dense_matrix_layout*/, transpose /*transpose_A*/,
transpose /*transpose_B*/, const fpType /*alpha*/, detail::matrix_handle* /*A_handle*/,
sycl::buffer<fpType, 1>& /*B*/, const std::int64_t /*columns*/, const std::int64_t /*ldb*/,
const fpType /*beta*/, sycl::buffer<fpType, 1>& /*C*/, const std::int64_t /*ldc*/) {
throw unimplemented("SPARSE_BLAS", "gemm");
sycl::queue& queue, layout dense_matrix_layout, transpose transpose_A, transpose transpose_B,
const fpType alpha, detail::matrix_handle* A_handle, sycl::buffer<fpType, 1>& B,
const std::int64_t columns, const std::int64_t ldb, const fpType beta,
sycl::buffer<fpType, 1>& C, const std::int64_t ldc) {
oneapi::mkl::sparse::gemm(queue, dense_matrix_layout, transpose_A, transpose_B, alpha,
detail::get_handle(A_handle), B, columns, ldb, beta, C, ldc);
}

template <typename fpType>
std::enable_if_t<detail::is_fp_supported_v<fpType>, sycl::event> gemm(
sycl::queue& /*queue*/, layout /*dense_matrix_layout*/, transpose /*transpose_A*/,
transpose /*transpose_B*/, const fpType /*alpha*/, detail::matrix_handle* /*A_handle*/,
const fpType* /*B*/, const std::int64_t /*columns*/, const std::int64_t /*ldb*/,
const fpType /*beta*/, fpType* /*C*/, const std::int64_t /*ldc*/,
const std::vector<sycl::event>& /*dependencies*/) {
throw unimplemented("SPARSE_BLAS", "gemm");
sycl::queue& queue, layout dense_matrix_layout, transpose transpose_A, transpose transpose_B,
const fpType alpha, detail::matrix_handle* A_handle, const fpType* B,
const std::int64_t columns, const std::int64_t ldb, const fpType beta, fpType* C,
const std::int64_t ldc, const std::vector<sycl::event>& dependencies) {
// TODO: Remove const_cast in future oneMKL release
return oneapi::mkl::sparse::gemm(queue, dense_matrix_layout, transpose_A, transpose_B, alpha,
detail::get_handle(A_handle), const_cast<fpType*>(B), columns,
ldb, beta, C, ldc, dependencies);
}

#define INSTANTIATE_GEMV(FP_TYPE) \
Expand Down
9 changes: 9 additions & 0 deletions src/sparse_blas/function_table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,15 @@ typedef struct {
FOR_EACH_FP_AND_INT_TYPE(DEFINE_SET_CSR_DATA);

// optimize_*
sycl::event (*optimize_gemm_v1)(sycl::queue &queue, oneapi::mkl::transpose transpose_A,
oneapi::mkl::sparse::matrix_handle_t handle,
const std::vector<sycl::event> &dependencies);
sycl::event (*optimize_gemm_v2)(sycl::queue &queue, oneapi::mkl::transpose transpose_A,
oneapi::mkl::transpose transpose_B,
oneapi::mkl::layout dense_matrix_layout,
const std::int64_t columns,
oneapi::mkl::sparse::matrix_handle_t handle,
const std::vector<sycl::event> &dependencies);
sycl::event (*optimize_gemv)(sycl::queue &queue, oneapi::mkl::transpose transpose_val,
oneapi::mkl::sparse::matrix_handle_t handle,
const std::vector<sycl::event> &dependencies);
Expand Down
14 changes: 14 additions & 0 deletions src/sparse_blas/sparse_blas_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,20 @@ sycl::event release_matrix_handle(sycl::queue &queue, matrix_handle_t *p_handle,
FOR_EACH_FP_AND_INT_TYPE(DEFINE_SET_CSR_DATA)
#undef DEFINE_SET_CSR_DATA

sycl::event optimize_gemm(sycl::queue &queue, transpose transpose_A, matrix_handle_t handle,
const std::vector<sycl::event> &dependencies) {
auto libkey = get_device_id(queue);
return function_tables[libkey].optimize_gemm_v1(queue, transpose_A, handle, dependencies);
}

sycl::event optimize_gemm(sycl::queue &queue, transpose transpose_A, transpose transpose_B,
layout dense_matrix_layout, const std::int64_t columns,
matrix_handle_t handle, const std::vector<sycl::event> &dependencies) {
auto libkey = get_device_id(queue);
return function_tables[libkey].optimize_gemm_v2(
queue, transpose_A, transpose_B, dense_matrix_layout, columns, handle, dependencies);
}

sycl::event optimize_gemv(sycl::queue &queue, transpose transpose_val, matrix_handle_t handle,
const std::vector<sycl::event> &dependencies) {
auto libkey = get_device_id(queue);
Expand Down
118 changes: 102 additions & 16 deletions tests/unit_tests/sparse_blas/include/sparse_reference.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#define _SPARSE_REFERENCE_HPP__

#include <stdexcept>
#include <string>
#include <tuple>

#include "oneapi/mkl.hpp"

Expand Down Expand Up @@ -99,35 +101,62 @@ void do_csr_transpose(const oneapi::mkl::transpose opA, intType *ia_t, intType *
ia_t[0] = a_ind;
}

// Transpose the given sparse matrix if needed
template <typename fpType, typename intType>
void prepare_reference_gemv_data(const intType *ia, const intType *ja, const fpType *a,
intType a_nrows, intType a_ncols, intType a_nnz, intType a_ind,
oneapi::mkl::transpose opA, fpType alpha, fpType beta,
const fpType *x, fpType *y_ref) {
std::size_t opa_nrows =
static_cast<std::size_t>((opA == oneapi::mkl::transpose::nontrans) ? a_nrows : a_ncols);
const std::size_t nnz = static_cast<std::size_t>(a_nnz);

// prepare op(A) locally
auto sparse_transpose_if_needed(const intType *ia, const intType *ja, const fpType *a,
intType a_nrows, intType a_ncols, std::size_t nnz, intType a_ind,
oneapi::mkl::transpose transpose_val) {
std::vector<intType> iopa;
std::vector<intType> jopa;
std::vector<fpType> opa;
if (opA == oneapi::mkl::transpose::nontrans) {
if (transpose_val == oneapi::mkl::transpose::nontrans) {
iopa.assign(ia, ia + a_nrows + 1);
jopa.assign(ja, ja + nnz);
opa.assign(a, a + nnz);
}
else if (opA == oneapi::mkl::transpose::trans || opA == oneapi::mkl::transpose::conjtrans) {
iopa.resize(opa_nrows + 1);
else if (transpose_val == oneapi::mkl::transpose::trans ||
transpose_val == oneapi::mkl::transpose::conjtrans) {
iopa.resize(static_cast<std::size_t>(a_ncols + 1));
jopa.resize(nnz);
opa.resize(nnz);
do_csr_transpose(opA, iopa.data(), jopa.data(), opa.data(), a_nrows, a_ncols, a_ind, ia, ja,
a);
do_csr_transpose(transpose_val, iopa.data(), jopa.data(), opa.data(), a_nrows, a_ncols,
a_ind, ia, ja, a);
}
else {
throw std::runtime_error("unsupported transpose_val=" +
std::to_string(static_cast<char>(transpose_val)));
}
return std::make_tuple(iopa, jopa, opa);
}

template <typename fpType>
auto dense_transpose_if_needed(const fpType *x, std::size_t outer_size, std::size_t inner_size,
std::size_t ld, oneapi::mkl::transpose transpose_val) {
std::vector<fpType> opx;
if (transpose_val == oneapi::mkl::transpose::nontrans) {
opx.assign(x, x + outer_size * ld);
}
else {
throw std::runtime_error(
"unsupported transpose_val (opA) in prepare_reference_gemv_data()");
opx.resize(outer_size * ld);
for (std::size_t i = 0; i < outer_size; ++i) {
for (std::size_t j = 0; j < inner_size; ++j) {
opx[i + j * ld] = x[i * ld + j];
}
}
}
return opx;
}

template <typename fpType, typename intType>
void prepare_reference_gemv_data(const intType *ia, const intType *ja, const fpType *a,
intType a_nrows, intType a_ncols, intType a_nnz, intType a_ind,
oneapi::mkl::transpose opA, fpType alpha, fpType beta,
const fpType *x, fpType *y_ref) {
std::size_t opa_nrows =
static_cast<std::size_t>((opA == oneapi::mkl::transpose::nontrans) ? a_nrows : a_ncols);
const std::size_t nnz = static_cast<std::size_t>(a_nnz);
auto [iopa, jopa, opa] =
sparse_transpose_if_needed(ia, ja, a, a_nrows, a_ncols, nnz, a_ind, opA);

//
// do GEMV operation
Expand All @@ -146,4 +175,61 @@ void prepare_reference_gemv_data(const intType *ia, const intType *ja, const fpT
}
}

template <typename fpType, typename intType>
void prepare_reference_gemm_data(const intType *ia, const intType *ja, const fpType *a,
intType a_nrows, intType a_ncols, intType c_ncols, intType a_nnz,
intType a_ind, oneapi::mkl::layout dense_matrix_layout,
oneapi::mkl::transpose opA, oneapi::mkl::transpose opB,
fpType alpha, fpType beta, intType ldb, intType ldc,
const fpType *b, fpType *c_ref) {
std::size_t opa_nrows =
static_cast<std::size_t>((opA == oneapi::mkl::transpose::nontrans) ? a_nrows : a_ncols);
std::size_t opa_ncols =
static_cast<std::size_t>((opA == oneapi::mkl::transpose::nontrans) ? a_ncols : a_nrows);
const std::size_t nnz = static_cast<std::size_t>(a_nnz);
const std::size_t ldb_u = static_cast<std::size_t>(ldb);
const std::size_t ldc_u = static_cast<std::size_t>(ldc);
auto [iopa, jopa, opa] =
sparse_transpose_if_needed(ia, ja, a, a_nrows, a_ncols, nnz, a_ind, opA);

std::size_t b_outer_size = static_cast<std::size_t>(opa_ncols);
std::size_t b_inner_size = static_cast<std::size_t>(c_ncols);
if (dense_matrix_layout == oneapi::mkl::layout::col_major) {
std::swap(b_outer_size, b_inner_size);
}
auto opb = dense_transpose_if_needed(b, b_outer_size, b_inner_size, ldb_u, opB);

//
// do GEMM operation
//
// C <- alpha * opA(A) * opB(B) + beta * C
//
if (dense_matrix_layout == oneapi::mkl::layout::row_major) {
for (std::size_t row = 0; row < opa_nrows; row++) {
for (std::size_t col = 0; col < static_cast<std::size_t>(c_ncols); col++) {
fpType tmp = 0;
for (std::size_t i = static_cast<std::size_t>(iopa[row] - a_ind);
i < static_cast<std::size_t>(iopa[row + 1] - a_ind); i++) {
tmp += opa[i] * opb[static_cast<std::size_t>(jopa[i] - a_ind) * ldb_u + col];
}
fpType &c = c_ref[row * ldc_u + col];
c = alpha * tmp + beta * c;
}
}
}
else {
for (std::size_t col = 0; col < static_cast<std::size_t>(c_ncols); col++) {
for (std::size_t row = 0; row < opa_nrows; row++) {
fpType tmp = 0;
for (std::size_t i = static_cast<std::size_t>(iopa[row] - a_ind);
i < static_cast<std::size_t>(iopa[row + 1] - a_ind); i++) {
tmp += opa[i] * opb[static_cast<std::size_t>(jopa[i] - a_ind) + col * ldb_u];
}
fpType &c = c_ref[row + col * ldc_u];
c = alpha * tmp + beta * c;
}
}
}
}

#endif // _SPARSE_REFERENCE_HPP__
54 changes: 40 additions & 14 deletions tests/unit_tests/sparse_blas/include/test_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,20 @@ sycl::buffer<typename vec::value_type, 1> make_buffer(const vec &v) {
return buf;
}

template <typename fpType>
struct set_fp_value {
inline fpType operator()(fpType real, fpType /*imag*/) {
return real;
}
};

template <typename scalarType>
struct set_fp_value<std::complex<scalarType>> {
inline auto operator()(scalarType real, scalarType imag) {
return std::complex<scalarType>(real, imag);
}
};

template <typename fpType>
struct rand_scalar {
inline fpType operator()(double min, double max) {
Expand All @@ -113,6 +127,28 @@ void rand_vector(std::vector<fpType> &v, std::size_t n) {
}
}

template <typename fpType>
void rand_matrix(std::vector<fpType> &m, oneapi::mkl::layout layout_val, std::size_t nrows,
std::size_t ncols, std::size_t ld) {
using fpRealType = typename complex_info<fpType>::real_type;
std::size_t outer_size = nrows;
std::size_t inner_size = ncols;
if (layout_val == oneapi::mkl::layout::col_major) {
std::swap(outer_size, inner_size);
}
m.resize(outer_size * ld);
rand_scalar<fpType> rand;
for (std::size_t i = 0; i < outer_size; ++i) {
std::size_t j = 0;
for (; j < inner_size; ++j) {
m[i * ld + j] = rand(fpRealType(-0.5), fpRealType(0.5));
}
for (; j < ld; ++j) {
m[i * ld + j] = set_fp_value<fpType>()(-1.f, 0.f);
}
}
}

// Creating the 3arrays CSR representation (ia, ja, values)
// of general random sparse matrix
// with density (0 < density <= 1.0)
Expand Down Expand Up @@ -161,20 +197,6 @@ void shuffle_data(const intType *ia, intType *ja, fpType *a, const std::size_t n
}
}

template <typename fpType>
struct set_fp_value {
inline fpType operator()(fpType real, fpType /*imag*/) {
return real;
}
};

template <typename scalarType>
struct set_fp_value<std::complex<scalarType>> {
inline auto operator()(scalarType real, scalarType imag) {
return std::complex<scalarType>(real, imag);
}
};

inline void wait_and_free(sycl::queue &main_queue, oneapi::mkl::sparse::matrix_handle_t *p_handle) {
main_queue.wait();
sycl::event ev_release;
Expand Down Expand Up @@ -213,6 +235,10 @@ bool check_equal_vector(const vecType1 &v, const vecType2 &v_ref, double abs_err
out << "Mismatching size got " << n << " expected " << v_ref.size() << "\n";
return false;
}
if (n == 0) {
return true;
}

auto max_norm_ref =
*std::max_element(std::begin(v_ref), std::end(v_ref),
[](const T &a, const T &b) { return std::abs(a) < std::abs(b); });
Expand Down
Loading