Skip to content

Align with dpctl changes for DLPack v1.0 support #1980

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 42 additions & 8 deletions dpnp/dpnp_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,27 +184,61 @@ def __copy__(self):
# '__divmod__',
# '__doc__',

def __dlpack__(self, stream=None):
def __dlpack__(
self, *, stream=None, max_version=None, dl_device=None, copy=None
):
"""
Produces DLPack capsule.

Parameters
----------
stream : {:class:`dpctl.SyclQueue`, None}, optional
Execution queue to synchronize with. If ``None``,
synchronization is not performed.
Execution queue to synchronize with. If ``None``, synchronization
is not performed.
Default: ``None``.
max_version {tuple of ints, None}, optional
The maximum DLPack version the consumer (caller of ``__dlpack__``)
supports. As ``__dlpack__`` may not always return a DLPack capsule
with version `max_version`, the consumer must verify the version
even if this argument is passed.
Default: ``None``.
dl_device {tuple, None}, optional:
The device the returned DLPack capsule will be placed on. The
device must be a 2-tuple matching the format of
``__dlpack_device__`` method, an integer enumerator representing
the device type followed by an integer representing the index of
the device.
Default: ``None``.
copy {bool, None}, optional:
Boolean indicating whether or not to copy the input.

* If `copy` is ``True``, the input will always be copied.
* If ``False``, a ``BufferError`` will be raised if a copy is
deemed necessary.
* If ``None``, a copy will be made only if deemed necessary,
otherwise, the existing memory buffer will be reused.

Default: ``None``.

Raises
------
MemoryError
MemoryError:
when host memory can not be allocated.
DLPackCreationError
when array is allocated on a partitioned
SYCL device, or with a non-default context.
DLPackCreationError:
when array is allocated on a partitioned SYCL device, or with
a non-default context.
BufferError:
when a copy is deemed necessary but `copy` is ``False`` or when
the provided `dl_device` cannot be handled.

"""

return self._array_obj.__dlpack__(stream=stream)
return self._array_obj.__dlpack__(
stream=stream,
max_version=max_version,
dl_device=dl_device,
copy=copy,
)

def __dlpack_device__(self):
"""
Expand Down
35 changes: 32 additions & 3 deletions dpnp/dpnp_iface.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ def default_float_type(device=None, sycl_queue=None):
return map_dtype_to_device(float64, _sycl_queue.sycl_device)


def from_dlpack(obj, /):
def from_dlpack(obj, /, *, device=None, copy=None):
"""
Create a dpnp array from a Python object implementing the ``__dlpack__``
protocol.
Expand All @@ -476,17 +476,46 @@ def from_dlpack(obj, /):
obj : object
A Python object representing an array that implements the ``__dlpack__``
and ``__dlpack_device__`` methods.
device : {:class:`dpctl.SyclDevice`, :class:`dpctl.SyclQueue`,
:class:`dpctl.tensor.Device`, tuple, None}, optional
Array API concept of a device where the output array is to be placed.
``device`` can be ``None``, an oneAPI filter selector string,
an instance of :class:`dpctl.SyclDevice` corresponding to
a non-partitioned SYCL device, an instance of :class:`dpctl.SyclQueue`,
a :class:`dpctl.tensor.Device` object returned by
:attr:`dpctl.tensor.usm_ndarray.device`, or a 2-tuple matching
the format of the output of the ``__dlpack_device__`` method,
an integer enumerator representing the device type followed by
an integer representing the index of the device.
Default: ``None``.
copy {bool, None}, optional
Boolean indicating whether or not to copy the input.

* If `copy``is ``True``, the input will always be copied.
* If ``False``, a ``BufferError`` will be raised if a copy is deemed
necessary.
* If ``None``, a copy will be made only if deemed necessary, otherwise,
the existing memory buffer will be reused.

Default: ``None``.

Returns
-------
out : dpnp_array
Returns a new dpnp array containing the data from another array
(obj) with the ``__dlpack__`` method on the same device as object.

Raises
------
TypeError:
if `obj` does not implement ``__dlpack__`` method
ValueError:
if the input array resides on an unsupported device

"""

usm_ary = dpt.from_dlpack(obj)
return dpnp_array._create_from_usm_ndarray(usm_ary)
usm_res = dpt.from_dlpack(obj, device=device, copy=copy)
return dpnp_array._create_from_usm_ndarray(usm_res)


def get_dpnp_descriptor(
Expand Down
74 changes: 74 additions & 0 deletions tests/test_dlpack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import numpy
import pytest
from numpy.testing import assert_array_equal

import dpnp

from .helper import (
get_all_dtypes,
)

device_oneAPI = 14 # DLDeviceType.kDLOneAPI


class TestDLPack:
@pytest.mark.parametrize("stream", [None, 1])
def test_stream(self, stream):
x = dpnp.arange(5)
x.__dlpack__(stream=stream)

@pytest.mark.parametrize("copy", [True, None, False])
def test_copy(self, copy):
x = dpnp.arange(5)
x.__dlpack__(copy=copy)

def test_wrong_copy(self):
x = dpnp.arange(5)
x.__dlpack__(copy=dpnp.array([1, 2, 3]))

@pytest.mark.parametrize("xp", [dpnp, numpy])
@pytest.mark.parametrize("dt", get_all_dtypes(no_none=True))
def test_dtype_passthrough(self, xp, dt):
x = xp.arange(5).astype(dt)
y = xp.from_dlpack(x)

assert y.dtype == x.dtype
assert_array_equal(x, y)

@pytest.mark.parametrize("xp", [dpnp, numpy])
def test_non_contiguous(self, xp):
x = xp.arange(25).reshape((5, 5))

y1 = x[0]
assert_array_equal(y1, xp.from_dlpack(y1))

y2 = x[:, 0]
assert_array_equal(y2, xp.from_dlpack(y2))

y3 = x[1, :]
assert_array_equal(y3, xp.from_dlpack(y3))

y4 = x[1]
assert_array_equal(y4, xp.from_dlpack(y4))

y5 = xp.diagonal(x).copy()
assert_array_equal(y5, xp.from_dlpack(y5))

def test_device(self):
x = dpnp.arange(5)
assert x.__dlpack_device__()[0] == device_oneAPI
y = dpnp.from_dlpack(x)
assert y.__dlpack_device__()[0] == device_oneAPI
z = y[::2]
assert z.__dlpack_device__()[0] == device_oneAPI

def test_ndim0(self):
x = dpnp.array(1.0)
y = dpnp.from_dlpack(x)
assert_array_equal(x, y)

def test_device(self):
x = dpnp.arange(5)
y = dpnp.from_dlpack(x, device=x.__dlpack_device__())
assert x.device == y.device
assert x.get_array()._pointer == y.get_array()._pointer
120 changes: 120 additions & 0 deletions tests/third_party/cupy/core_tests/test_dlpack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import unittest

import dpctl
import dpctl.tensor._dlpack as dlp
import numpy
import pytest

import dpnp as cupy
from tests.third_party.cupy import testing


def _gen_array(dtype, alloc_q=None):
if cupy.issubdtype(dtype, numpy.unsignedinteger):
array = cupy.random.randint(
0, 10, size=(2, 3), sycl_queue=alloc_q
).astype(dtype)
elif cupy.issubdtype(dtype, cupy.integer):
array = cupy.random.randint(
-10, 10, size=(2, 3), sycl_queue=alloc_q
).astype(dtype)
elif cupy.issubdtype(dtype, cupy.floating):
array = cupy.random.rand(2, 3, sycl_queue=alloc_q).astype(dtype)
elif cupy.issubdtype(dtype, cupy.complexfloating):
array = cupy.random.random((2, 3), sycl_queue=alloc_q).astype(dtype)
elif dtype == cupy.bool_:
array = cupy.random.randint(
0, 2, size=(2, 3), sycl_queue=alloc_q
).astype(cupy.bool_)
else:
assert False, f"unrecognized dtype: {dtype}"
return array


class TestDLPackConversion(unittest.TestCase):
@testing.for_all_dtypes(no_bool=False)
def test_conversion(self, dtype):
orig_array = _gen_array(dtype)
tensor = orig_array.__dlpack__()
out_array = dlp.from_dlpack_capsule(tensor)
testing.assert_array_equal(orig_array, out_array)
assert orig_array.get_array()._pointer == out_array._pointer


@testing.parameterize(*testing.product({"memory": ("device", "managed")}))
class TestNewDLPackConversion(unittest.TestCase):
def _get_stream(self, stream_name):
if stream_name == "null":
return dpctl.SyclQueue()
return dpctl.SyclQueue()

@testing.for_all_dtypes(no_bool=False)
def test_conversion(self, dtype):
orig_array = _gen_array(dtype)
out_array = cupy.from_dlpack(orig_array)
testing.assert_array_equal(orig_array, out_array)
assert orig_array.get_array()._pointer == out_array.get_array()._pointer

def test_stream(self):
allowed_streams = ["null", True]

# stream order is automatically established via DLPack protocol
for src_s in [self._get_stream(s) for s in allowed_streams]:
for dst_s in [self._get_stream(s) for s in allowed_streams]:
orig_array = _gen_array(cupy.float32, alloc_q=src_s)
dltensor = orig_array.__dlpack__(stream=orig_array)

out_array = dlp.from_dlpack_capsule(dltensor)
out_array = cupy.from_dlpack(out_array, device=dst_s)
testing.assert_array_equal(orig_array, out_array)
assert (
orig_array.get_array()._pointer
== out_array.get_array()._pointer
)


class TestDLTensorMemory(unittest.TestCase):
# def setUp(self):
# self.old_pool = cupy.get_default_memory_pool()
# self.pool = cupy.cuda.MemoryPool()
# cupy.cuda.set_allocator(self.pool.malloc)

# def tearDown(self):
# self.pool.free_all_blocks()
# cupy.cuda.set_allocator(self.old_pool.malloc)

def test_deleter(self):
# memory is freed when tensor is deleted, as it's not consumed
array = cupy.empty(10)
tensor = array.__dlpack__()
# str(tensor): <capsule object "dltensor" at 0x7f7c4c835330>
assert '"dltensor"' in str(tensor)
# assert self.pool.n_free_blocks() == 0
# del array
# assert self.pool.n_free_blocks() == 0
# del tensor
# assert self.pool.n_free_blocks() == 1

def test_deleter2(self):
# memory is freed when array2 is deleted, as tensor is consumed
array = cupy.empty(10)
tensor = array.__dlpack__()
assert '"dltensor"' in str(tensor)
array2 = dlp.from_dlpack_capsule(tensor)
assert '"used_dltensor"' in str(tensor)
# assert self.pool.n_free_blocks() == 0
# del array
# assert self.pool.n_free_blocks() == 0
# del array2
# assert self.pool.n_free_blocks() == 1
# del tensor
# assert self.pool.n_free_blocks() == 1

def test_multiple_consumption_error(self):
# Prevent segfault, see #3611
array = cupy.empty(10)
tensor = array.__dlpack__()
array2 = dlp.from_dlpack_capsule(tensor)
with pytest.raises(ValueError) as e:
array3 = dlp.from_dlpack_capsule(tensor)
assert "consumed multiple times" in str(e.value)
Loading