Skip to content

Commit

Permalink
Implement dpnp.cov() though existing dpnp methods (#1396)
Browse files Browse the repository at this point in the history
* Implement dpnp.cov() though existing dpnp methods

* Applied review comments

* Clean up the code to get rid of todo

* use dpnp.mean()
  • Loading branch information
antonwolfy authored Jun 15, 2023
1 parent 5a3f438 commit f5e6425
Show file tree
Hide file tree
Showing 9 changed files with 233 additions and 125 deletions.
1 change: 0 additions & 1 deletion dpnp/backend/include/dpnp_iface_fptr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ enum class DPNPFuncName : size_t
DPNP_FN_COUNT_NONZERO, /**< Used in numpy.count_nonzero() impl */
DPNP_FN_COUNT_NONZERO_EXT, /**< Used in numpy.count_nonzero() impl, requires extra parameters */
DPNP_FN_COV, /**< Used in numpy.cov() impl */
DPNP_FN_COV_EXT, /**< Used in numpy.cov() impl, requires extra parameters */
DPNP_FN_CROSS, /**< Used in numpy.cross() impl */
DPNP_FN_CROSS_EXT, /**< Used in numpy.cross() impl, requires extra parameters */
DPNP_FN_CUMPROD, /**< Used in numpy.cumprod() impl */
Expand Down
13 changes: 0 additions & 13 deletions dpnp/backend/kernels/dpnp_krnl_statistics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,14 +243,6 @@ void dpnp_cov_c(void* array1_in, void* result1, size_t nrows, size_t ncols)
template <typename _DataType>
void (*dpnp_cov_default_c)(void*, void*, size_t, size_t) = dpnp_cov_c<_DataType>;

template <typename _DataType>
DPCTLSyclEventRef (*dpnp_cov_ext_c)(DPCTLSyclQueueRef,
void*,
void*,
size_t,
size_t,
const DPCTLEventVectorRef) = dpnp_cov_c<_DataType>;

template <typename _DataType_input, typename _DataType_output>
DPCTLSyclEventRef dpnp_count_nonzero_c(DPCTLSyclQueueRef q_ref,
void* array1_in,
Expand Down Expand Up @@ -1373,11 +1365,6 @@ void func_map_init_statistics(func_map_t& fmap)
fmap[DPNPFuncName::DPNP_FN_COV][eft_FLT][eft_FLT] = {eft_DBL, (void*)dpnp_cov_default_c<double>};
fmap[DPNPFuncName::DPNP_FN_COV][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_cov_default_c<double>};

fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_cov_ext_c<double>};
fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_cov_ext_c<double>};
fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_cov_ext_c<float>};
fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_cov_ext_c<double>};

fmap[DPNPFuncName::DPNP_FN_MAX][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_max_default_c<int32_t>};
fmap[DPNPFuncName::DPNP_FN_MAX][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_max_default_c<int64_t>};
fmap[DPNPFuncName::DPNP_FN_MAX][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_max_default_c<float>};
Expand Down
3 changes: 0 additions & 3 deletions dpnp/dpnp_algo/dpnp_algo.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na
DPNP_FN_COS_EXT
DPNP_FN_COSH
DPNP_FN_COSH_EXT
DPNP_FN_COV
DPNP_FN_COV_EXT
DPNP_FN_COUNT_NONZERO
DPNP_FN_COUNT_NONZERO_EXT
DPNP_FN_CROSS
Expand Down Expand Up @@ -538,7 +536,6 @@ cpdef dpnp_descriptor dpnp_repeat(dpnp_descriptor array1, repeats, axes=*)
"""
Statistics functions
"""
cpdef dpnp_descriptor dpnp_cov(dpnp_descriptor array1)
cpdef dpnp_descriptor dpnp_min(dpnp_descriptor a, axis)


Expand Down
44 changes: 0 additions & 44 deletions dpnp/dpnp_algo/dpnp_algo_statistics.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ and the rest of the library
__all__ += [
"dpnp_average",
"dpnp_correlate",
"dpnp_cov",
"dpnp_max",
"dpnp_median",
"dpnp_min",
Expand Down Expand Up @@ -178,49 +177,6 @@ cpdef utils.dpnp_descriptor dpnp_correlate(utils.dpnp_descriptor x1, utils.dpnp_
return result


cpdef utils.dpnp_descriptor dpnp_cov(utils.dpnp_descriptor array1):
cdef shape_type_c input_shape = array1.shape

if array1.ndim == 1:
input_shape.insert(input_shape.begin(), 1)

# convert string type names (array.dtype) to C enum DPNPFuncType
cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(array1.dtype)

# get the FPTR data structure
cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_COV_EXT, param1_type, param1_type)

array1_obj = array1.get_array()

# ceate result array with type given by FPTR data
cdef shape_type_c result_shape = (input_shape[0], input_shape[0])
cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
kernel_data.return_type,
None,
device=array1_obj.sycl_device,
usm_type=array1_obj.usm_type,
sycl_queue=array1_obj.sycl_queue)

result_sycl_queue = result.get_array().sycl_queue

cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()

cdef fptr_custom_cov_1in_1out_t func = <fptr_custom_cov_1in_1out_t > kernel_data.ptr
# call FPTR function
cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
array1.get_data(),
result.get_data(),
input_shape[0],
input_shape[1],
NULL) # dep_events_ref

with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
c_dpctl.DPCTLEvent_Delete(event_ref)

return result


cdef utils.dpnp_descriptor _dpnp_max(utils.dpnp_descriptor x1, _axis_, shape_type_c result_shape):
cdef shape_type_c x1_shape = x1.shape
cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
Expand Down
35 changes: 18 additions & 17 deletions dpnp/dpnp_iface_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@
from numpy.core.numeric import normalize_axis_tuple
from dpnp.dpnp_algo import *
from dpnp.dpnp_utils import *
from dpnp.dpnp_utils.dpnp_utils_statistics import (
dpnp_cov
)
from dpnp.dpnp_array import dpnp_array
import dpnp

Expand Down Expand Up @@ -238,13 +241,18 @@ def correlate(x1, x2, mode='valid'):
return call_origin(numpy.correlate, x1, x2, mode=mode)


def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None):
"""cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None):
def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None, *, dtype=None):
"""cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None, *, dtype=None):
Estimate a covariance matrix, given data and weights.
For full documentation refer to :obj:`numpy.cov`.
Returns
-------
out : dpnp.ndarray
The covariance matrix of the variables.
Limitations
-----------
Input array ``m`` is supported as :obj:`dpnp.ndarray`.
Expand All @@ -258,7 +266,9 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
Otherwise the function will be executed sequentially on CPU.
Input array data types are limited by supported DPNP :ref:`Data types`.
.. see also:: :obj:`dpnp.corrcoef` normalized covariance matrix.
See Also
--------
:obj:`dpnp.corrcoef` : Normalized covariance matrix
Examples
--------
Expand All @@ -275,11 +285,10 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
[1.0, -1.0, -1.0, 1.0]
"""
if not isinstance(x1, (dpnp_array, dpt.usm_ndarray)):
pass
elif x1.ndim > 2:

if not isinstance(m, (dpnp_array, dpt.usm_ndarray)):
pass
elif y is not None:
elif m.ndim > 2:
pass
elif bias:
pass
Expand All @@ -290,17 +299,9 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
elif aweights is not None:
pass
else:
if not rowvar and x1.shape[0] != 1:
x1 = x1.T

if not x1.dtype in (dpnp.float32, dpnp.float64):
x1 = dpnp.astype(x1, dpnp.default_float_type(sycl_queue=x1.sycl_queue))

x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
if x1_desc:
return dpnp_cov(x1_desc).get_pyobj()
return dpnp_cov(m, y=y, rowvar=rowvar, dtype=dtype)

return call_origin(numpy.cov, x1, y, rowvar, bias, ddof, fweights, aweights)
return call_origin(numpy.cov, m, y, rowvar, bias, ddof, fweights, aweights, dtype=dtype)


def histogram(a, bins=10, range=None, density=None, weights=None):
Expand Down
117 changes: 117 additions & 0 deletions dpnp/dpnp_utils/dpnp_utils_statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# cython: language_level=3
# distutils: language = c++
# -*- coding: utf-8 -*-
# *****************************************************************************
# Copyright (c) 2023, Intel Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# - Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
# THE POSSIBILITY OF SUCH DAMAGE.
# *****************************************************************************


import dpnp
from dpnp.dpnp_array import dpnp_array
from dpnp.dpnp_utils import (
get_usm_allocations
)

import dpctl
import dpctl.tensor as dpt
import dpctl.tensor._tensor_impl as ti


__all__ = [
"dpnp_cov"
]

def dpnp_cov(m, y=None, rowvar=True, dtype=None):
"""
Estimate a covariance matrix based on passed data.
No support for given weights is provided now.
The implementation is done through existing dpnp and dpctl methods
instead of separate function call of dpnp backend.
"""

def _get_2dmin_array(x, dtype):
"""
Transform an input array to a form required for building a covariance matrix.
If applicable, it reshapes the input array to have 2 dimensions or greater.
If applicable, it transposes the input array when 'rowvar' is False.
It casts to another dtype, if the input array differs from requested one.
"""

if x.ndim == 0:
x = x.reshape((1, 1))
elif x.ndim == 1:
x = x[dpnp.newaxis, :]

if not rowvar and x.shape[0] != 1:
x = x.T

if x.dtype != dtype:
x = dpnp.astype(x, dtype)
return x


# input arrays must follow CFD paradigm
usm_type, queue = get_usm_allocations((m, ) if y is None else (m, y))

# calculate a type of result array if not passed explicitly
if dtype is None:
dtypes = [m.dtype, dpnp.default_float_type(sycl_queue=queue)]
if y is not None:
dtypes.append(y.dtype)
dtype = dpt.result_type(*dtypes)

X = _get_2dmin_array(m, dtype)
if y is not None:
y = _get_2dmin_array(y, dtype)

# TODO: replace with dpnp.concatenate((X, y), axis=0) once dpctl implementation is ready
if X.ndim != y.ndim:
raise ValueError("all the input arrays must have same number of dimensions")

if X.shape[1:] != y.shape[1:]:
raise ValueError("all the input array dimensions for the concatenation axis must match exactly")

res_shape = tuple(X.shape[i] if i > 0 else (X.shape[i] + y.shape[i]) for i in range(X.ndim))
res_usm = dpt.empty(res_shape, dtype=dtype, usm_type=usm_type, sycl_queue=queue)

# concatenate input arrays 'm' and 'y' into single array among 0-axis
hev1, _ = ti._copy_usm_ndarray_into_usm_ndarray(src=X.get_array(), dst=res_usm[:X.shape[0]], sycl_queue=queue)
hev2, _ = ti._copy_usm_ndarray_into_usm_ndarray(src=y.get_array(), dst=res_usm[X.shape[0]:], sycl_queue=queue)
dpctl.SyclEvent.wait_for([hev1, hev2])

X = dpnp_array._create_from_usm_ndarray(res_usm)

avg = X.mean(axis=1)

fact = X.shape[1] - 1
X -= avg[:, None]

c = dpnp.dot(X, X.T.conj())
c *= 1 / fact if fact != 0 else dpnp.nan

return dpnp.squeeze(c)
2 changes: 0 additions & 2 deletions tests/skipped_tests_gpu.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,6 @@ tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMult
tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_2_{d=4, shape=(4, 3, 2)}::test_normal
tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_3_{d=4, shape=(3, 2)}::test_normal

tests/third_party/cupy/statistics_tests/test_correlation.py::TestCov::test_cov_empty

tests/third_party/intel/test_zero_copy_test1.py::test_dpnp_interaction_with_dpctl_memory
tests/test_arraymanipulation.py::TestHstack::test_generator
tests/test_arraymanipulation.py::TestVstack::test_generator
Expand Down
Loading

0 comments on commit f5e6425

Please sign in to comment.