Update dpnp.linalg.qr() function (IntelPython#1673)

* Impl dpnp.linalg.qr for 2d array * Add cupy tests for dpnp.linalg.qr * Add batch implementation of dpnp.linalg.qr * Remove an old impl of dpnp_qr * Update test_qr in test_sycl_queue * Add test_qr in test_usm_type * Use _real_type for _orgqr * Use _real_type for _orgqr_batch * Update dpnp tests for dpnp.linalg.qr * Pass scratchpad_size to the error message test * Add additional checks * Extend error handler for mkl batch funcs * Add ungqr mkl extension to support complex dtype * Update tau array size check for orgqr * Add ungqr_batch mkl extension to support complex dtype * Add arrays type check * Fix test_det_singular_matrix * Expand tests for dpnp.linalg.qr with complex types * Update examples * Remove astype for output arrays * Use empty_like instead of empty * Use ht_list_ev with dpctl.SyclEvent.wait_for * Add _triu_inplace func * Use copy_usm for a_t array overwritten by geqrf/geqrf_batch --------- Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com>
antonwolfy · Feb 8, 2024 · 1e86753 · 1e86753
1 parent d45bb24
commit 1e86753
Show file tree

Hide file tree

Showing 22 changed files with 2,767 additions and 246 deletions.
diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt
@@ -27,15 +27,21 @@
 set(python_module_name _lapack_impl)
 set(_module_src
     ${CMAKE_CURRENT_SOURCE_DIR}/lapack_py.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/geqrf.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/geqrf_batch.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/gesv.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/gesvd.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/getrf.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/getrf_batch.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/getri_batch.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/heevd.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/orgqr.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/orgqr_batch.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/potrf.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/potrf_batch.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/syevd.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/ungqr.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/ungqr_batch.cpp
 )
 
 pybind11_add_module(${python_module_name} MODULE ${_module_src})

diff --git a/dpnp/backend/extensions/lapack/geqrf.cpp b/dpnp/backend/extensions/lapack/geqrf.cpp
@@ -0,0 +1,262 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <pybind11/pybind11.h>
+
+// dpctl tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/type_utils.hpp"
+
+#include "geqrf.hpp"
+#include "types_matrix.hpp"
+
+#include "dpnp_utils.hpp"
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace lapack
+{
+namespace mkl_lapack = oneapi::mkl::lapack;
+namespace py = pybind11;
+namespace type_utils = dpctl::tensor::type_utils;
+
+typedef sycl::event (*geqrf_impl_fn_ptr_t)(sycl::queue,
+                                           const std::int64_t,
+                                           const std::int64_t,
+                                           char *,
+                                           std::int64_t,
+                                           char *,
+                                           std::vector<sycl::event> &,
+                                           const std::vector<sycl::event> &);
+
+static geqrf_impl_fn_ptr_t geqrf_dispatch_vector[dpctl_td_ns::num_types];
+
+template <typename T>
+static sycl::event geqrf_impl(sycl::queue exec_q,
+                              const std::int64_t m,
+                              const std::int64_t n,
+                              char *in_a,
+                              std::int64_t lda,
+                              char *in_tau,
+                              std::vector<sycl::event> &host_task_events,
+                              const std::vector<sycl::event> &depends)
+{
+    type_utils::validate_type_for_device<T>(exec_q);
+
+    T *a = reinterpret_cast<T *>(in_a);
+    T *tau = reinterpret_cast<T *>(in_tau);
+
+    const std::int64_t scratchpad_size =
+        mkl_lapack::geqrf_scratchpad_size<T>(exec_q, m, n, lda);
+    T *scratchpad = nullptr;
+
+    std::stringstream error_msg;
+    std::int64_t info = 0;
+    bool is_exception_caught = false;
+
+    sycl::event geqrf_event;
+    try {
+        scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
+
+        geqrf_event = mkl_lapack::geqrf(
+            exec_q,
+            m,          // The number of rows in the matrix; (0 ≤ m).
+            n,          // The number of columns in the matrix; (0 ≤ n).
+            a,          // Pointer to the m-by-n matrix.
+            lda,        // The leading dimension of `a`; (1 ≤ m).
+            tau,        // Pointer to the array of scalar factors of the
+                        // elementary reflectors.
+            scratchpad, // Pointer to scratchpad memory to be used by MKL
+                        // routine for storing intermediate results.
+            scratchpad_size, depends);
+    } catch (mkl_lapack::exception const &e) {
+        is_exception_caught = true;
+        info = e.info();
+
+        if (info < 0) {
+            error_msg << "Parameter number " << -info
+                      << " had an illegal value.";
+        }
+        else if (info == scratchpad_size && e.detail() != 0) {
+            error_msg
+                << "Insufficient scratchpad size. Required size is at least "
+                << e.detail() << ", but current size is " << scratchpad_size
+                << ".";
+        }
+        else {
+            error_msg << "Unexpected MKL exception caught during geqrf() "
+                         "call:\nreason: "
+                      << e.what() << "\ninfo: " << info;
+        }
+    } catch (sycl::exception const &e) {
+        is_exception_caught = true;
+        error_msg << "Unexpected SYCL exception caught during geqrf() call:\n"
+                  << e.what();
+    }
+
+    if (is_exception_caught) // an unexpected error occurs
+    {
+        if (scratchpad != nullptr) {
+            sycl::free(scratchpad, exec_q);
+        }
+        throw std::runtime_error(error_msg.str());
+    }
+
+    sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(geqrf_event);
+        auto ctx = exec_q.get_context();
+        cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); });
+    });
+    host_task_events.push_back(clean_up_event);
+
+    return geqrf_event;
+}
+
+std::pair<sycl::event, sycl::event>
+    geqrf(sycl::queue q,
+          dpctl::tensor::usm_ndarray a_array,
+          dpctl::tensor::usm_ndarray tau_array,
+          const std::vector<sycl::event> &depends)
+{
+    const int a_array_nd = a_array.get_ndim();
+    const int tau_array_nd = tau_array.get_ndim();
+
+    if (a_array_nd != 2) {
+        throw py::value_error(
+            "The input array has ndim=" + std::to_string(a_array_nd) +
+            ", but a 2-dimensional array is expected.");
+    }
+
+    if (tau_array_nd != 1) {
+        throw py::value_error("The array of Householder scalars has ndim=" +
+                              std::to_string(tau_array_nd) +
+                              ", but a 1-dimensional array is expected.");
+    }
+
+    // check compatibility of execution queue and allocation queue
+    if (!dpctl::utils::queues_are_compatible(q, {a_array, tau_array})) {
+        throw py::value_error(
+            "Execution queue is not compatible with allocation queues");
+    }
+
+    auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+    if (overlap(a_array, tau_array)) {
+        throw py::value_error(
+            "The input array and the array of Householder scalars "
+            "are overlapping segments of memory");
+    }
+
+    bool is_a_array_c_contig = a_array.is_c_contiguous();
+    if (!is_a_array_c_contig) {
+        throw py::value_error("The input array "
+                              "must be C-contiguous");
+    }
+
+    bool is_tau_array_c_contig = tau_array.is_c_contiguous();
+    bool is_tau_array_f_contig = tau_array.is_f_contiguous();
+
+    if (!is_tau_array_c_contig || !is_tau_array_f_contig) {
+        throw py::value_error("The array of Householder scalars "
+                              "must be contiguous");
+    }
+
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
+    int a_array_type_id =
+        array_types.typenum_to_lookup_id(a_array.get_typenum());
+    int tau_array_type_id =
+        array_types.typenum_to_lookup_id(tau_array.get_typenum());
+
+    if (a_array_type_id != tau_array_type_id) {
+        throw py::value_error(
+            "The types of the input array and "
+            "the array of Householder scalars are mismatched");
+    }
+
+    geqrf_impl_fn_ptr_t geqrf_fn = geqrf_dispatch_vector[a_array_type_id];
+    if (geqrf_fn == nullptr) {
+        throw py::value_error(
+            "No geqrf implementation defined for the provided type "
+            "of the input matrix.");
+    }
+
+    char *a_array_data = a_array.get_data();
+    char *tau_array_data = tau_array.get_data();
+
+    const py::ssize_t *a_array_shape = a_array.get_shape_raw();
+
+    // The input array is transponded
+    // Change the order of getting m, n
+    const std::int64_t m = a_array_shape[1];
+    const std::int64_t n = a_array_shape[0];
+    const std::int64_t lda = std::max<size_t>(1UL, m);
+
+    const size_t tau_array_size = tau_array.get_size();
+    const size_t min_m_n = std::max<size_t>(1UL, std::min<size_t>(m, n));
+
+    if (tau_array_size != min_m_n) {
+        throw py::value_error("The array of Householder scalars has size=" +
+                              std::to_string(tau_array_size) + ", but a size=" +
+                              std::to_string(min_m_n) + " array is expected.");
+    }
+
+    std::vector<sycl::event> host_task_events;
+    sycl::event geqrf_ev = geqrf_fn(q, m, n, a_array_data, lda, tau_array_data,
+                                    host_task_events, depends);
+
+    sycl::event args_ev = dpctl::utils::keep_args_alive(q, {a_array, tau_array},
+                                                        host_task_events);
+
+    return std::make_pair(args_ev, geqrf_ev);
+}
+
+template <typename fnT, typename T>
+struct GeqrfContigFactory
+{
+    fnT get()
+    {
+        if constexpr (types::GeqrfTypePairSupportFactory<T>::is_defined) {
+            return geqrf_impl<T>;
+        }
+        else {
+            return nullptr;
+        }
+    }
+};
+
+void init_geqrf_dispatch_vector(void)
+{
+    dpctl_td_ns::DispatchVectorBuilder<geqrf_impl_fn_ptr_t, GeqrfContigFactory,
+                                       dpctl_td_ns::num_types>
+        contig;
+    contig.populate_dispatch_vector(geqrf_dispatch_vector);
+}
+} // namespace lapack
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/lapack/geqrf.hpp b/dpnp/backend/extensions/lapack/geqrf.hpp
@@ -0,0 +1,63 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <CL/sycl.hpp>
+#include <oneapi/mkl.hpp>
+
+#include <dpctl4pybind11.hpp>
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace lapack
+{
+extern std::pair<sycl::event, sycl::event>
+    geqrf(sycl::queue exec_q,
+          dpctl::tensor::usm_ndarray a_array,
+          dpctl::tensor::usm_ndarray tau_array,
+          const std::vector<sycl::event> &depends = {});
+
+extern std::pair<sycl::event, sycl::event>
+    geqrf_batch(sycl::queue exec_q,
+                dpctl::tensor::usm_ndarray a_array,
+                dpctl::tensor::usm_ndarray tau_array,
+                std::int64_t m,
+                std::int64_t n,
+                std::int64_t stride_a,
+                std::int64_t stride_tau,
+                std::int64_t batch_size,
+                const std::vector<sycl::event> &depends = {});
+
+extern void init_geqrf_batch_dispatch_vector(void);
+extern void init_geqrf_dispatch_vector(void);
+} // namespace lapack
+} // namespace ext
+} // namespace backend
+} // namespace dpnp