From 7a56304b346ef44a7d37f3cf4382ded9eccaf0bf Mon Sep 17 00:00:00 2001 From: vlad-perevezentsev Date: Mon, 29 Jan 2024 13:16:44 +0100 Subject: [PATCH] Update dpnp.linalg.inv() function (#1665) * Impl dpnp.linalg.inv for 2d array * Remove an old impl of dpnp_inv * Add batch implementation of dpnp.linalg.inv func * Add cupy tests for dpnp.linalg.inf * Add dpnp tests for dpnp.linalg.inv * Add check_lapack_dev_info func * Add dev_info size check for getri_batch and getrf_batch * Add size check dev_info and error_matrices_ids * Remove dpnp_inv_ext_c * Rename check_lapack_dev_info to _check_lapack_dev_info * Skip test_inv in TestInvInvalid --------- Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- dpnp/backend/extensions/lapack/CMakeLists.txt | 1 + .../backend/extensions/lapack/getrf_batch.cpp | 17 + dpnp/backend/extensions/lapack/getri.hpp | 56 ++++ .../backend/extensions/lapack/getri_batch.cpp | 293 ++++++++++++++++++ dpnp/backend/extensions/lapack/lapack_py.cpp | 10 + .../extensions/lapack/types_matrix.hpp | 26 ++ dpnp/backend/include/dpnp_iface_fptr.hpp | 2 - dpnp/backend/kernels/dpnp_krnl_linalg.cpp | 30 -- dpnp/dpnp_algo/dpnp_algo.pxd | 2 - dpnp/linalg/dpnp_algo_linalg.pyx | 41 --- dpnp/linalg/dpnp_iface_linalg.py | 56 +++- dpnp/linalg/dpnp_utils_linalg.py | 167 ++++++++++ tests/test_linalg.py | 134 +++++++- tests/test_sycl_queue.py | 37 ++- tests/test_usm_type.py | 30 ++ .../cupy/linalg_tests/test_solve.py | 84 ++++- 16 files changed, 871 insertions(+), 115 deletions(-) create mode 100644 dpnp/backend/extensions/lapack/getri.hpp create mode 100644 dpnp/backend/extensions/lapack/getri_batch.cpp diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt index 0cfdc1a1677..626615e3e53 100644 --- a/dpnp/backend/extensions/lapack/CMakeLists.txt +++ b/dpnp/backend/extensions/lapack/CMakeLists.txt @@ -30,6 +30,7 @@ set(_module_src ${CMAKE_CURRENT_SOURCE_DIR}/gesv.cpp ${CMAKE_CURRENT_SOURCE_DIR}/getrf.cpp ${CMAKE_CURRENT_SOURCE_DIR}/getrf_batch.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/getri_batch.cpp ${CMAKE_CURRENT_SOURCE_DIR}/heevd.cpp ${CMAKE_CURRENT_SOURCE_DIR}/potrf.cpp ${CMAKE_CURRENT_SOURCE_DIR}/potrf_batch.cpp diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp index 76977bf6628..a8993121809 100644 --- a/dpnp/backend/extensions/lapack/getrf_batch.cpp +++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp @@ -116,6 +116,15 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q, // Get the indices of the first zero diagonal elements of these matrices auto error_info = be.exceptions(); + auto error_matrices_ids_size = error_matrices_ids.size(); + auto dev_info_size = static_cast(py::len(dev_info)); + if (error_matrices_ids_size != dev_info_size) { + throw py::value_error("The size of `dev_info` must be equal to" + + std::to_string(error_matrices_ids_size) + + ", but currently it is " + + std::to_string(dev_info_size) + "."); + } + for (size_t i = 0; i < error_matrices_ids.size(); ++i) { // Assign the index of the first zero diagonal element in each // error matrix to the corresponding index in 'dev_info' @@ -190,6 +199,14 @@ std::pair ", but a 2-dimensional array is expected."); } + const int dev_info_size = py::len(dev_info); + if (dev_info_size != batch_size) { + throw py::value_error("The size of 'dev_info' (" + + std::to_string(dev_info_size) + + ") does not match the expected batch size (" + + std::to_string(batch_size) + ")."); + } + // check compatibility of execution queue and allocation queue if (!dpctl::utils::queues_are_compatible(exec_q, {a_array, ipiv_array})) { throw py::value_error( diff --git a/dpnp/backend/extensions/lapack/getri.hpp b/dpnp/backend/extensions/lapack/getri.hpp new file mode 100644 index 00000000000..75e9b16d4ef --- /dev/null +++ b/dpnp/backend/extensions/lapack/getri.hpp @@ -0,0 +1,56 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +#include + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ +extern std::pair + getri_batch(sycl::queue exec_q, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray ipiv_array, + py::list dev_info, + std::int64_t n, + std::int64_t stride_a, + std::int64_t stride_ipiv, + std::int64_t batch_size, + const std::vector &depends = {}); + +extern void init_getri_batch_dispatch_vector(void); +} // namespace lapack +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/lapack/getri_batch.cpp b/dpnp/backend/extensions/lapack/getri_batch.cpp new file mode 100644 index 00000000000..c6315e29427 --- /dev/null +++ b/dpnp/backend/extensions/lapack/getri_batch.cpp @@ -0,0 +1,293 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/type_utils.hpp" + +#include "getri.hpp" +#include "types_matrix.hpp" + +#include "dpnp_utils.hpp" + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ +namespace mkl_lapack = oneapi::mkl::lapack; +namespace py = pybind11; +namespace type_utils = dpctl::tensor::type_utils; + +typedef sycl::event (*getri_batch_impl_fn_ptr_t)( + sycl::queue, + std::int64_t, + char *, + std::int64_t, + std::int64_t, + std::int64_t *, + std::int64_t, + std::int64_t, + py::list, + std::vector &, + const std::vector &); + +static getri_batch_impl_fn_ptr_t + getri_batch_dispatch_vector[dpctl_td_ns::num_types]; + +template +static sycl::event getri_batch_impl(sycl::queue exec_q, + std::int64_t n, + char *in_a, + std::int64_t lda, + std::int64_t stride_a, + std::int64_t *ipiv, + std::int64_t stride_ipiv, + std::int64_t batch_size, + py::list dev_info, + std::vector &host_task_events, + const std::vector &depends) +{ + type_utils::validate_type_for_device(exec_q); + + T *a = reinterpret_cast(in_a); + + const std::int64_t scratchpad_size = + mkl_lapack::getri_batch_scratchpad_size(exec_q, n, lda, stride_a, + stride_ipiv, batch_size); + T *scratchpad = nullptr; + + std::stringstream error_msg; + std::int64_t info = 0; + bool is_exception_caught = false; + + sycl::event getri_batch_event; + try { + scratchpad = sycl::malloc_device(scratchpad_size, exec_q); + + getri_batch_event = mkl_lapack::getri_batch( + exec_q, + n, // The order of each square matrix in the batch; (0 ≤ n). + // It must be a non-negative integer. + a, // Pointer to the batch of square matrices, each of size (n x n). + lda, // The leading dimension of each matrix in the batch. + stride_a, // Stride between consecutive matrices in the batch. + ipiv, // Pointer to the array of pivot indices for each matrix in + // the batch. + stride_ipiv, // Stride between pivot indices: Spacing between pivot + // arrays in 'ipiv'. + batch_size, // Total number of matrices in the batch. + scratchpad, // Pointer to scratchpad memory to be used by MKL + // routine for storing intermediate results. + scratchpad_size, depends); + } catch (mkl_lapack::batch_error const &be) { + // Get the indices of matrices within the batch that encountered an + // error + auto error_matrices_ids = be.ids(); + // Get the indices of the first zero diagonal elements of these matrices + auto error_info = be.exceptions(); + + auto error_matrices_ids_size = error_matrices_ids.size(); + auto dev_info_size = static_cast(py::len(dev_info)); + if (error_matrices_ids_size != dev_info_size) { + throw py::value_error("The size of `dev_info` must be equal to" + + std::to_string(error_matrices_ids_size) + + ", but currently it is " + + std::to_string(dev_info_size) + "."); + } + + for (size_t i = 0; i < error_matrices_ids.size(); ++i) { + // Assign the index of the first zero diagonal element in each + // error matrix to the corresponding index in 'dev_info' + dev_info[error_matrices_ids[i]] = error_info[i]; + } + } catch (mkl_lapack::exception const &e) { + is_exception_caught = true; + info = e.info(); + + if (info < 0) { + error_msg << "Parameter number " << -info + << " had an illegal value."; + } + else if (info == scratchpad_size && e.detail() != 0) { + error_msg + << "Insufficient scratchpad size. Required size is at least " + << e.detail(); + } + else { + error_msg << "Unexpected MKL exception caught during getri_batch() " + "call:\nreason: " + << e.what() << "\ninfo: " << e.info(); + } + } catch (sycl::exception const &e) { + is_exception_caught = true; + error_msg + << "Unexpected SYCL exception caught during getri_batch() call:\n" + << e.what(); + } + + if (is_exception_caught) // an unexpected error occurs + { + if (scratchpad != nullptr) { + sycl::free(scratchpad, exec_q); + } + + throw std::runtime_error(error_msg.str()); + } + + sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(getri_batch_event); + auto ctx = exec_q.get_context(); + cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); }); + }); + host_task_events.push_back(clean_up_event); + return getri_batch_event; +} + +std::pair + getri_batch(sycl::queue exec_q, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray ipiv_array, + py::list dev_info, + std::int64_t n, + std::int64_t stride_a, + std::int64_t stride_ipiv, + std::int64_t batch_size, + const std::vector &depends) +{ + const int a_array_nd = a_array.get_ndim(); + const int ipiv_array_nd = ipiv_array.get_ndim(); + + if (a_array_nd < 3) { + throw py::value_error( + "The input array has ndim=" + std::to_string(a_array_nd) + + ", but an array with ndim >= 3 is expected."); + } + + if (ipiv_array_nd != 2) { + throw py::value_error("The array of pivot indices has ndim=" + + std::to_string(ipiv_array_nd) + + ", but a 2-dimensional array is expected."); + } + + const int dev_info_size = py::len(dev_info); + if (dev_info_size != batch_size) { + throw py::value_error("The size of 'dev_info' (" + + std::to_string(dev_info_size) + + ") does not match the expected batch size (" + + std::to_string(batch_size) + ")."); + } + + // check compatibility of execution queue and allocation queue + if (!dpctl::utils::queues_are_compatible(exec_q, {a_array, ipiv_array})) { + throw py::value_error( + "Execution queue is not compatible with allocation queues"); + } + + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(a_array, ipiv_array)) { + throw py::value_error("The input array and the array of pivot indices " + "are overlapping segments of memory"); + } + + bool is_a_array_c_contig = a_array.is_c_contiguous(); + bool is_ipiv_array_c_contig = ipiv_array.is_c_contiguous(); + if (!is_a_array_c_contig) { + throw py::value_error("The input array " + "must be C-contiguous"); + } + if (!is_ipiv_array_c_contig) { + throw py::value_error("The array of pivot indices " + "must be C-contiguous"); + } + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int a_array_type_id = + array_types.typenum_to_lookup_id(a_array.get_typenum()); + + getri_batch_impl_fn_ptr_t getri_batch_fn = + getri_batch_dispatch_vector[a_array_type_id]; + if (getri_batch_fn == nullptr) { + throw py::value_error( + "No getri_batch implementation defined for the provided type " + "of the input matrix."); + } + + auto ipiv_types = dpctl_td_ns::usm_ndarray_types(); + int ipiv_array_type_id = + ipiv_types.typenum_to_lookup_id(ipiv_array.get_typenum()); + + if (ipiv_array_type_id != static_cast(dpctl_td_ns::typenum_t::INT64)) { + throw py::value_error("The type of 'ipiv_array' must be int64."); + } + + char *a_array_data = a_array.get_data(); + const std::int64_t lda = std::max(1UL, n); + + char *ipiv_array_data = ipiv_array.get_data(); + std::int64_t *d_ipiv = reinterpret_cast(ipiv_array_data); + + std::vector host_task_events; + sycl::event getri_batch_ev = getri_batch_fn( + exec_q, n, a_array_data, lda, stride_a, d_ipiv, stride_ipiv, batch_size, + dev_info, host_task_events, depends); + + sycl::event args_ev = dpctl::utils::keep_args_alive( + exec_q, {a_array, ipiv_array}, host_task_events); + + return std::make_pair(args_ev, getri_batch_ev); +} + +template +struct GetriBatchContigFactory +{ + fnT get() + { + if constexpr (types::GetriBatchTypePairSupportFactory::is_defined) { + return getri_batch_impl; + } + else { + return nullptr; + } + } +}; + +void init_getri_batch_dispatch_vector(void) +{ + dpctl_td_ns::DispatchVectorBuilder + contig; + contig.populate_dispatch_vector(getri_batch_dispatch_vector); +} +} // namespace lapack +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp index f97a0dc4a43..71991be3652 100644 --- a/dpnp/backend/extensions/lapack/lapack_py.cpp +++ b/dpnp/backend/extensions/lapack/lapack_py.cpp @@ -32,6 +32,7 @@ #include "gesv.hpp" #include "getrf.hpp" +#include "getri.hpp" #include "heevd.hpp" #include "linalg_exceptions.hpp" #include "potrf.hpp" @@ -46,6 +47,7 @@ void init_dispatch_vectors(void) lapack_ext::init_gesv_dispatch_vector(); lapack_ext::init_getrf_batch_dispatch_vector(); lapack_ext::init_getrf_dispatch_vector(); + lapack_ext::init_getri_batch_dispatch_vector(); lapack_ext::init_potrf_batch_dispatch_vector(); lapack_ext::init_potrf_dispatch_vector(); lapack_ext::init_syevd_dispatch_vector(); @@ -88,6 +90,14 @@ PYBIND11_MODULE(_lapack_impl, m) py::arg("stride_ipiv"), py::arg("batch_size"), py::arg("depends") = py::list()); + m.def("_getri_batch", &lapack_ext::getri_batch, + "Call `getri_batch` from OneMKL LAPACK library to return " + "the inverses of a batch of LU-factored matrices", + py::arg("sycl_queue"), py::arg("a_array"), py::arg("ipiv_array"), + py::arg("dev_info"), py::arg("n"), py::arg("stride_a"), + py::arg("stride_ipiv"), py::arg("batch_size"), + py::arg("depends") = py::list()); + m.def("_heevd", &lapack_ext::heevd, "Call `heevd` from OneMKL LAPACK library to return " "the eigenvalues and eigenvectors of a complex Hermitian matrix", diff --git a/dpnp/backend/extensions/lapack/types_matrix.hpp b/dpnp/backend/extensions/lapack/types_matrix.hpp index 0277e630738..7e5413b84c8 100644 --- a/dpnp/backend/extensions/lapack/types_matrix.hpp +++ b/dpnp/backend/extensions/lapack/types_matrix.hpp @@ -122,6 +122,32 @@ struct GetrfBatchTypePairSupportFactory dpctl_td_ns::NotDefinedEntry>::is_defined; }; +/** + * @brief A factory to define pairs of supported types for which + * MKL LAPACK library provides support in oneapi::mkl::lapack::getri_batch + * function. + * + * @tparam T Type of array containing input matrix (LU-factored form), + * as well as the output array for storing the inverse of the matrix. + */ +template +struct GetriBatchTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction< + dpctl_td_ns::TypePairDefinedEntry, + dpctl_td_ns::TypePairDefinedEntry, + dpctl_td_ns::TypePairDefinedEntry, + T, + std::complex>, + dpctl_td_ns::TypePairDefinedEntry, + T, + std::complex>, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; + /** * @brief A factory to define pairs of supported types for which * MKL LAPACK library provides support in oneapi::mkl::lapack::heevd diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp index 87826cc3bb9..2e2ce5ab144 100644 --- a/dpnp/backend/include/dpnp_iface_fptr.hpp +++ b/dpnp/backend/include/dpnp_iface_fptr.hpp @@ -174,8 +174,6 @@ enum class DPNPFuncName : size_t DPNP_FN_INITVAL_EXT, /**< Used in numpy ones, ones_like, zeros, zeros_like impls */ DPNP_FN_INV, /**< Used in numpy.linalg.inv() impl */ - DPNP_FN_INV_EXT, /**< Used in numpy.linalg.inv() impl, requires extra - parameters */ DPNP_FN_INVERT, /**< Used in numpy.invert() impl */ DPNP_FN_KRON, /**< Used in numpy.kron() impl */ DPNP_FN_KRON_EXT, /**< Used in numpy.kron() impl, requires extra parameters diff --git a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp index 297f963ba28..e0b6de5b1b6 100644 --- a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp @@ -378,15 +378,6 @@ template void (*dpnp_inv_default_c)(void *, void *, shape_elem_type *, size_t) = dpnp_inv_c<_DataType, _ResultType>; -template -DPCTLSyclEventRef (*dpnp_inv_ext_c)(DPCTLSyclQueueRef, - void *, - void *, - shape_elem_type *, - size_t, - const DPCTLEventVectorRef) = - dpnp_inv_c<_DataType, _ResultType>; - template class dpnp_kron_c_kernel; @@ -869,27 +860,6 @@ void func_map_init_linalg_func(func_map_t &fmap) fmap[DPNPFuncName::DPNP_FN_INV][eft_DBL][eft_DBL] = { eft_DBL, (void *)dpnp_inv_default_c}; - fmap[DPNPFuncName::DPNP_FN_INV_EXT][eft_INT][eft_INT] = { - get_default_floating_type(), - (void *)dpnp_inv_ext_c< - int32_t, func_type_map_t::find_type>, - get_default_floating_type(), - (void *)dpnp_inv_ext_c< - int32_t, func_type_map_t::find_type< - get_default_floating_type()>>}; - fmap[DPNPFuncName::DPNP_FN_INV_EXT][eft_LNG][eft_LNG] = { - get_default_floating_type(), - (void *)dpnp_inv_ext_c< - int64_t, func_type_map_t::find_type>, - get_default_floating_type(), - (void *)dpnp_inv_ext_c< - int64_t, func_type_map_t::find_type< - get_default_floating_type()>>}; - fmap[DPNPFuncName::DPNP_FN_INV_EXT][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_inv_ext_c}; - fmap[DPNPFuncName::DPNP_FN_INV_EXT][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_inv_ext_c}; - fmap[DPNPFuncName::DPNP_FN_KRON][eft_INT][eft_INT] = { eft_INT, (void *)dpnp_kron_default_c}; fmap[DPNPFuncName::DPNP_FN_KRON][eft_INT][eft_LNG] = { diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index 3fcb67aac9e..895b393aeff 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -78,8 +78,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_FMOD_EXT DPNP_FN_FULL DPNP_FN_FULL_LIKE - DPNP_FN_INV - DPNP_FN_INV_EXT DPNP_FN_KRON DPNP_FN_KRON_EXT DPNP_FN_MATRIX_RANK diff --git a/dpnp/linalg/dpnp_algo_linalg.pyx b/dpnp/linalg/dpnp_algo_linalg.pyx index 6cf3bd3578f..1d94a893fff 100644 --- a/dpnp/linalg/dpnp_algo_linalg.pyx +++ b/dpnp/linalg/dpnp_algo_linalg.pyx @@ -48,7 +48,6 @@ __all__ = [ "dpnp_cond", "dpnp_eig", "dpnp_eigvals", - "dpnp_inv", "dpnp_matrix_rank", "dpnp_norm", "dpnp_qr", @@ -186,46 +185,6 @@ cpdef utils.dpnp_descriptor dpnp_eigvals(utils.dpnp_descriptor input): return res_val -cpdef utils.dpnp_descriptor dpnp_inv(utils.dpnp_descriptor input): - cdef shape_type_c input_shape = input.shape - - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) - - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_INV_EXT, param1_type, param1_type) - - input_obj = input.get_array() - - cdef (DPNPFuncType, void *) ret_type_and_func = utils.get_ret_type_and_func(kernel_data, - input_obj.sycl_device.has_aspect_fp64) - cdef DPNPFuncType return_type = ret_type_and_func[0] - cdef custom_linalg_1in_1out_func_ptr_t func = < custom_linalg_1in_1out_func_ptr_t > ret_type_and_func[1] - - # create result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(input_shape, - return_type, - None, - device=input_obj.sycl_device, - usm_type=input_obj.usm_type, - sycl_queue=input_obj.sycl_queue) - - result_sycl_queue = result.get_array().sycl_queue - - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, - input.get_data(), - result.get_data(), - input_shape.data(), - input.ndim, - NULL) # dep_events_ref - - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) - c_dpctl.DPCTLEvent_Delete(event_ref) - - return result - - cpdef utils.dpnp_descriptor dpnp_matrix_rank(utils.dpnp_descriptor input): cdef shape_type_c input_shape = input.shape cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index 2f7420d1329..800aa8de1bb 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -50,6 +50,7 @@ dpnp_cholesky, dpnp_det, dpnp_eigh, + dpnp_inv, dpnp_slogdet, dpnp_solve, ) @@ -316,30 +317,51 @@ def eigvals(input): return call_origin(numpy.linalg.eigvals, input) -def inv(input): +def inv(a): """ - Divide arguments element-wise. + Compute the (multiplicative) inverse of a matrix. + + Given a square matrix a, return the matrix ainv + satisfying ``dot(a, ainv) = dot(ainv, a) = eye(a.shape[0])``. For full documentation refer to :obj:`numpy.linalg.inv`. - Limitations - ----------- - Input array is supported as :obj:`dpnp.ndarray`. - Dimension of input array is supported to be equal to ``2``. - Shape of input array is limited by ``input.shape[0] == input.shape[1]``, ``input.shape[0] >= 2``. - Otherwise the function will be executed sequentially on CPU. + Parameters + ---------- + a : (..., M, M) {dpnp.ndarray, usm_ndarray} + Matrix to be inverted. + + Returns + ------- + out : (..., M, M) dpnp.ndarray + (Multiplicative) inverse of the matrix a. + + Examples + -------- + >>> import dpnp as np + >>> a = np.array([[1., 2.], [3., 4.]]) + >>> ainv = np.linalg.inv(a) + >>> np.allclose(np.dot(a, ainv), np.eye(2)) + array([ True]) + >>> np.allclose(np.dot(ainv, a), np.eye(2)) + array([ True]) + + Inverses of several matrices can be computed at once: + >>> a = np.array([[[1., 2.], [3., 4.]], [[1, 3], [3, 5]]]) + >>> np.linalg.inv(a) + array([[[-2. , 1. ], + [ 1.5 , -0.5 ]], + + [[-1.25, 0.75], + [ 0.75, -0.25]]]) + """ - x1_desc = dpnp.get_dpnp_descriptor(input, copy_when_nondefault_queue=False) - if x1_desc: - if ( - x1_desc.ndim == 2 - and x1_desc.shape[0] == x1_desc.shape[1] - and x1_desc.shape[0] >= 2 - ): - return dpnp_inv(x1_desc).get_pyobj() + dpnp.check_supported_arrays_type(a) + check_stacked_2d(a) + check_stacked_square(a) - return call_origin(numpy.linalg.inv, input) + return dpnp_inv(a) def matrix_power(input, count): diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 40159ac02bc..f2632b5b6a4 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -37,6 +37,7 @@ "dpnp_cholesky", "dpnp_det", "dpnp_eigh", + "dpnp_inv", "dpnp_slogdet", "dpnp_solve", ] @@ -96,6 +97,33 @@ def _calculate_determinant_sign(ipiv, diag, res_type, n): return sign.astype(res_type) +def _check_lapack_dev_info(dev_info, error_msg=None): + """ + Check `dev_info` from OneMKL LAPACK routines, raising an error for failures. + + Parameters + ---------- + dev_info : list of ints + Each element of the list indicates the status of OneMKL LAPACK routine calls. + A non-zero value signifies a failure. + + error_message : str, optional + Custom error message for detected LAPACK errors. + Default: `Singular matrix` + + Raises + ------ + dpnp.linalg.LinAlgError + On non-zero elements in dev_info, indicating LAPACK errors. + + """ + + if any(dev_info): + error_msg = error_msg or "Singular matrix" + + raise dpnp.linalg.LinAlgError(error_msg) + + def _real_type(dtype, device=None): """ Returns the real data type corresponding to a given dpnp data type. @@ -740,6 +768,145 @@ def dpnp_eigh(a, UPLO): return w, out_v +def dpnp_inv_batched(a, res_type): + """ + dpnp_inv_batched(a, res_type) + + Return the inverses of each matrix in a batch of matrices `a`. + + The inverse of a matrix is such that if it is multiplied by the original matrix, + it results in the identity matrix. This function computes the inverses of a batch + of square matrices. + """ + + orig_shape = a.shape + # get 3d input arrays by reshape + a = a.reshape(-1, orig_shape[-2], orig_shape[-1]) + batch_size = a.shape[0] + a_usm_arr = dpnp.get_usm_ndarray(a) + a_sycl_queue = a.sycl_queue + a_usm_type = a.usm_type + n = a.shape[1] + + # oneMKL LAPACK getri_batch overwrites `a` + a_h = dpnp.empty_like(a, order="C", dtype=res_type, usm_type=a_usm_type) + ipiv_h = dpnp.empty( + (batch_size, n), + dtype=dpnp.int64, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + dev_info = [0] * batch_size + + # use DPCTL tensor function to fill the matrix array + # with content from the input array `a` + a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a.sycl_queue + ) + + ipiv_stride = n + a_stride = a_h.strides[0] + + # Call the LAPACK extension function _getrf_batch + # to perform LU decomposition of a batch of general matrices + ht_getrf_ev, getrf_ev = li._getrf_batch( + a_sycl_queue, + a_h.get_array(), + ipiv_h.get_array(), + dev_info, + n, + a_stride, + ipiv_stride, + batch_size, + [a_copy_ev], + ) + + _check_lapack_dev_info(dev_info) + + # Call the LAPACK extension function _getri_batch + # to compute the inverse of a batch of matrices using the results + # from the LU decomposition performed by _getrf_batch + ht_getri_ev, _ = li._getri_batch( + a_sycl_queue, + a_h.get_array(), + ipiv_h.get_array(), + dev_info, + n, + a_stride, + ipiv_stride, + batch_size, + [getrf_ev], + ) + + _check_lapack_dev_info(dev_info) + + ht_getri_ev.wait() + ht_getrf_ev.wait() + a_ht_copy_ev.wait() + + return a_h.reshape(orig_shape) + + +def dpnp_inv(a): + """ + dpnp_inv(a) + + Return the inverse of `a` matrix. + + The inverse of a matrix is such that if it is multiplied by the original matrix, + it results in the identity matrix. This function computes the inverse of a single + square matrix. + + """ + + res_type = _common_type(a) + if a.size == 0: + return dpnp.empty_like(a, dtype=res_type) + + if a.ndim >= 3: + return dpnp_inv_batched(a, res_type) + + a_usm_arr = dpnp.get_usm_ndarray(a) + a_sycl_queue = a.sycl_queue + a_usm_type = a.usm_type + + a_order = "C" if a.flags.c_contiguous else "F" + a_shape = a.shape + + # oneMKL LAPACK gesv overwrites `a` and assumes fortran-like array as input. + # To use C-contiguous arrays, we transpose them before passing to gesv. + # This transposition is effective because the input array `a` is square. + a_f = dpnp.empty_like(a, order=a_order, dtype=res_type) + + # use DPCTL tensor function to fill the coefficient matrix array + # with content from the input array `a` + a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr, dst=a_f.get_array(), sycl_queue=a_sycl_queue + ) + + b_f = dpnp.eye( + a_shape[0], + dtype=res_type, + order=a_order, + sycl_queue=a_sycl_queue, + usm_type=a_usm_type, + ) + + if a_order == "F": + ht_lapack_ev, _ = li._gesv( + a_sycl_queue, a_f.get_array(), b_f.get_array(), [a_copy_ev] + ) + else: + ht_lapack_ev, _ = li._gesv( + a_sycl_queue, a_f.T.get_array(), b_f.T.get_array(), [a_copy_ev] + ) + + ht_lapack_ev.wait() + a_ht_copy_ev.wait() + + return b_f + + def dpnp_solve(a, b): """ dpnp_solve(a, b) diff --git a/tests/test_linalg.py b/tests/test_linalg.py index 5f38421c6ec..5ea536c2887 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -435,18 +435,128 @@ def test_eigvals(type): assert_allclose(expected, result, atol=0.5) -@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) -@pytest.mark.parametrize( - "array", - [[[1.0, 2.0], [3.0, 4.0]], [[0, 1, 2], [3, 2, -1], [4, -2, 3]]], - ids=["[[1., 2.], [3., 4.]]", "[[0, 1, 2], [3, 2, -1], [4, -2, 3]]"], -) -def test_inv(type, array): - a = numpy.array(array, dtype=type) - ia = inp.array(a) - result = inp.linalg.inv(ia) - expected = numpy.linalg.inv(a) - assert_allclose(expected, result, rtol=1e-06) +class TestInv: + @pytest.mark.parametrize( + "array", + [ + [[1, 2], [3, 4]], + [[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]], + [ + [[[1, 2], [3, 4]], [[1, 2], [2, 1]]], + [[[1, 3], [3, 1]], [[0, 1], [1, 3]]], + ], + ], + ids=[ + "2D_array", + "3D_array", + "4D_array", + ], + ) + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + def test_inv(self, array, dtype): + a = numpy.array(array, dtype=dtype) + ia = inp.array(a) + result = inp.linalg.inv(ia) + expected = numpy.linalg.inv(a) + assert_dtype_allclose(result, expected) + + def test_inv_strides(self): + a_np = numpy.array( + [ + [2, 3, 1, 4, 5], + [5, 6, 7, 8, 9], + [9, 7, 7, 2, 3], + [1, 4, 5, 1, 8], + [8, 9, 8, 5, 3], + ] + ) + + a_dp = inp.array(a_np) + + # positive strides + expected = numpy.linalg.inv(a_np[::2, ::2]) + result = inp.linalg.inv(a_dp[::2, ::2]) + assert_allclose(expected, result, rtol=1e-3, atol=1e-4) + + # negative strides + expected = numpy.linalg.inv(a_np[::-2, ::-2]) + result = inp.linalg.inv(a_dp[::-2, ::-2]) + assert_allclose(expected, result, rtol=1e-3, atol=1e-4) + + @pytest.mark.parametrize( + "shape", + [ + (0, 0), + (3, 0, 0), + (0, 2, 2), + ], + ids=[ + "(0, 0)", + "(3, 0, 0)", + "(0, 2, 2)", + ], + ) + def test_inv_empty(self, shape): + a = numpy.empty(shape) + ia = inp.array(a) + result = inp.linalg.inv(ia) + expected = numpy.linalg.inv(a) + assert_dtype_allclose(result, expected) + + # TODO: remove skipif when MKLD-16626 is resolved + @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626") + @pytest.mark.parametrize( + "matrix", + [ + [[1, 2], [2, 4]], + [[0, 0], [0, 0]], + [[1, 1], [1, 1]], + [[2, 4], [1, 2]], + [[1, 2], [0, 0]], + [[1, 0], [2, 0]], + ], + ids=[ + "Linearly dependent rows", + "Zero matrix", + "Identical rows", + "Linearly dependent columns", + "Zero row", + "Zero column", + ], + ) + def test_inv_singular_matrix(self, matrix): + a_np = numpy.array(matrix, dtype="float32") + a_dp = inp.array(a_np) + + assert_raises(numpy.linalg.LinAlgError, numpy.linalg.inv, a_np) + assert_raises(inp.linalg.LinAlgError, inp.linalg.inv, a_dp) + + # TODO: remove skipif when MKLD-16626 is resolved + # _getrf_batch does not raise an error with singular matrices. + @pytest.mark.skip("MKLD-16626") + def test_inv_singular_matrix_3D(self): + a_np = numpy.array( + [[[1, 2], [3, 4]], [[1, 2], [1, 2]], [[1, 3], [3, 1]]] + ) + a_dp = inp.array(a_np) + + assert_raises(numpy.linalg.LinAlgError, numpy.linalg.inv, a_np) + assert_raises(inp.linalg.LinAlgError, inp.linalg.inv, a_dp) + + def test_inv_errors(self): + a_dp = inp.array([[1, 2], [2, 5]], dtype="float32") + + # unsupported type + a_np = inp.asnumpy(a_dp) + assert_raises(TypeError, inp.linalg.inv, a_np) + + # a.ndim < 2 + a_dp_ndim_1 = a_dp.flatten() + assert_raises(inp.linalg.LinAlgError, inp.linalg.inv, a_dp_ndim_1) + + # a is not square + a_dp = inp.ones((2, 3)) + assert_raises(inp.linalg.LinAlgError, inp.linalg.inv, a_dp) @pytest.mark.parametrize( diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index ee247aa5414..4b0d21eae40 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -1136,21 +1136,42 @@ def test_eigvals(device): assert_sycl_queue_equal(result_queue, expected_queue) +@pytest.mark.parametrize( + "shape, is_empty", + [ + ((2, 2), False), + ((3, 2, 2), False), + ((0, 0), True), + ((0, 2, 2), True), + ], + ids=[ + "(2, 2)", + "(3, 2, 2)", + "(0, 0)", + "(0, 2, 2)", + ], +) @pytest.mark.parametrize( "device", valid_devices, ids=[device.filter_string for device in valid_devices], ) -def test_inv(device): - data = [[1.0, 2.0], [3.0, 4.0]] - numpy_data = numpy.array(data) - dpnp_data = dpnp.array(data, device=device) +def test_inv(shape, is_empty, device): + if is_empty: + numpy_x = numpy.empty(shape, dtype=dpnp.default_float_type(device)) + else: + count_elem = numpy.prod(shape) + numpy_x = numpy.arange( + 1, count_elem + 1, dtype=dpnp.default_float_type() + ).reshape(shape) - result = dpnp.linalg.inv(dpnp_data) - expected = numpy.linalg.inv(numpy_data) - assert_allclose(expected, result) + dpnp_x = dpnp.array(numpy_x, device=device) - expected_queue = dpnp_data.get_array().sycl_queue + result = dpnp.linalg.inv(dpnp_x) + expected = numpy.linalg.inv(numpy_x) + assert_dtype_allclose(result, expected) + + expected_queue = dpnp_x.get_array().sycl_queue result_queue = result.get_array().sycl_queue assert_sycl_queue_equal(result_queue, expected_queue) diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index 554f939b208..ada68ebfa6c 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -710,3 +710,33 @@ def test_det(shape, is_empty, usm_type): det = dp.linalg.det(x) assert x.usm_type == det.usm_type + + +@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize( + "shape, is_empty", + [ + ((2, 2), False), + ((3, 2, 2), False), + ((0, 0), True), + ((0, 2, 2), True), + ], + ids=[ + "(2, 2)", + "(3, 2, 2)", + "(0, 0)", + "(0, 2, 2)", + ], +) +def test_inv(shape, is_empty, usm_type): + if is_empty: + x = dp.empty(shape, dtype=dp.default_float_type(), usm_type=usm_type) + else: + count_elem = numpy.prod(shape) + x = dp.arange( + 1, count_elem + 1, dtype=dp.default_float_type(), usm_type=usm_type + ).reshape(shape) + + result = dp.linalg.inv(x) + + assert x.usm_type == result.usm_type diff --git a/tests/third_party/cupy/linalg_tests/test_solve.py b/tests/third_party/cupy/linalg_tests/test_solve.py index 6194cf6b8ac..b31082c8e84 100644 --- a/tests/third_party/cupy/linalg_tests/test_solve.py +++ b/tests/third_party/cupy/linalg_tests/test_solve.py @@ -4,8 +4,13 @@ import pytest import dpnp as cupy -from tests.helper import has_support_aspect64 +from tests.helper import ( + assert_dtype_allclose, + has_support_aspect64, + is_cpu_device, +) from tests.third_party.cupy import testing +from tests.third_party.cupy.testing import condition @testing.parameterize( @@ -38,8 +43,8 @@ def check_x(self, a_shape, b_shape, xp, dtype): a_copy = a.copy() b_copy = b.copy() result = xp.linalg.solve(a, b) - numpy.testing.assert_array_equal(a_copy, a) - numpy.testing.assert_array_equal(b_copy, b) + testing.assert_array_equal(a_copy, a) + testing.assert_array_equal(b_copy, b) return result def test_solve(self): @@ -88,3 +93,76 @@ def test_invalid_shape(self): self.check_shape((2, 3, 3), (3,), value_errors) self.check_shape((3, 3), (0,), value_errors) self.check_shape((0, 3, 4), (3,), linalg_errors) + + +@testing.parameterize( + *testing.product( + { + "order": ["C", "F"], + } + ) +) +class TestInv(unittest.TestCase): + @testing.for_dtypes("ifdFD") + @condition.retry(10) + def check_x(self, a_shape, dtype): + a_cpu = numpy.random.randint(0, 10, size=a_shape) + a_cpu = a_cpu.astype(dtype, order=self.order) + a_gpu = cupy.asarray(a_cpu, order=self.order) + a_gpu_copy = a_gpu.copy() + result_cpu = numpy.linalg.inv(a_cpu) + result_gpu = cupy.linalg.inv(a_gpu) + + assert_dtype_allclose(result_gpu, result_cpu) + testing.assert_array_equal(a_gpu_copy, a_gpu) + + def check_shape(self, a_shape): + a = cupy.random.rand(*a_shape) + with self.assertRaises( + (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError) + ): + cupy.linalg.inv(a) + + def test_inv(self): + self.check_x((3, 3)) + self.check_x((4, 4)) + self.check_x((5, 5)) + self.check_x((2, 5, 5)) + self.check_x((3, 4, 4)) + self.check_x((4, 2, 3, 3)) + self.check_x((0, 0)) + self.check_x((3, 0, 0)) + self.check_x((2, 0, 3, 4, 4)) + + def test_invalid_shape(self): + self.check_shape((2, 3)) + self.check_shape((4, 1)) + self.check_shape((4, 3, 2)) + self.check_shape((2, 4, 3)) + self.check_shape((2, 0)) + self.check_shape((0, 2, 3)) + + +class TestInvInvalid(unittest.TestCase): + # TODO: remove skipif when MKLD-16626 is resolved + # _gesv does not raise an error with singular matrices on CPU. + @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626") + @testing.for_dtypes("ifdFD") + def test_inv(self, dtype): + for xp in (numpy, cupy): + a = xp.array([[1, 2], [2, 4]]).astype(dtype) + with pytest.raises( + (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError) + ): + xp.linalg.inv(a) + + # TODO: remove skipif when MKLD-16626 is resolved + # _getrf_batch does not raise an error with singular matrices. + @pytest.mark.skip("MKLD-16626") + @testing.for_dtypes("ifdFD") + def test_batched_inv(self, dtype): + for xp in (numpy, cupy): + a = xp.array([[[1, 2], [2, 4]]]).astype(dtype) + assert a.ndim >= 3 # CuPy internally uses a batched function. + with pytest.raises(xp.linalg.LinAlgError): + xp.linalg.inv(a)