Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ht.array() default to copy=None (e.g., only if necessary) #1119

Merged
merged 29 commits into from
Jun 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
bb1e4db
Default to copy=None in `ht.array()` as per array-API
ClaudiaComito Mar 6, 2023
8f4e34b
remove unnecessary copy if split not None
ClaudiaComito Mar 6, 2023
fc59df1
return contiguous local tensor after torch.diagonal
ClaudiaComito Mar 7, 2023
0139c65
set default copy=None for asarray
ClaudiaComito Mar 7, 2023
eae8256
Update documentation
ClaudiaComito Mar 15, 2023
dafa614
allow copy for lists and tuples
ClaudiaComito Mar 15, 2023
aafa774
expand tests
ClaudiaComito Mar 15, 2023
c65eecd
Merge branch 'main' into features/#1117-array-copy-None
ClaudiaComito Mar 29, 2023
52e2c48
Merge branch 'main' into features/#1117-array-copy-None
ClaudiaComito Mar 30, 2023
f2c49fc
interpret numpy.ndarray.dtype
ClaudiaComito Mar 30, 2023
66069b5
Merge branch 'main' into features/#1117-array-copy-None
ClaudiaComito Apr 17, 2023
f5107b8
Merge branch 'main' into features/#1117-array-copy-None
ClaudiaComito Apr 17, 2023
729b9f3
expand tests
ClaudiaComito Apr 17, 2023
d56d8b8
fix dtype in numpy obj test
ClaudiaComito Apr 18, 2023
04b9259
Merge branch 'main' into features/#1117-array-copy-None
ClaudiaComito Apr 24, 2023
25a4cb9
Merge branch 'main' into features/#1117-array-copy-None
ClaudiaComito Apr 27, 2023
ac2bb29
Merge branch 'main' into features/#1117-array-copy-None
ClaudiaComito May 22, 2023
a64eaa2
improve docs after review
ClaudiaComito May 22, 2023
ca868ea
Merge branch 'main' into features/#1117-array-copy-None
ClaudiaComito May 31, 2023
e882c56
Merge branch 'main' into features/#1117-array-copy-None
ClaudiaComito Jun 6, 2023
c9ba402
Edit documentation
ClaudiaComito Jun 6, 2023
8514070
reduce memory footprint ht.array()
ClaudiaComito Jun 12, 2023
d3fb45b
skip tests on arm64 architecture
ClaudiaComito Jun 13, 2023
ef6b86a
assign correct split on 1 process as well
ClaudiaComito Jun 13, 2023
0312f86
add copy kwarg to asarray()
ClaudiaComito Jun 13, 2023
d6ebd8d
edit asarray docs
ClaudiaComito Jun 13, 2023
7b2a267
Merge branch 'main' into features/#1117-array-copy-None
ClaudiaComito Jun 19, 2023
49197b3
skip split mismatch test on 1 process
ClaudiaComito Jun 19, 2023
17ca24f
skip split mismatch test on 1 process
ClaudiaComito Jun 19, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 64 additions & 38 deletions heat/core/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def arange(
def array(
obj: Iterable,
dtype: Optional[Type[datatype]] = None,
copy: bool = True,
copy: Optional[bool] = None,
ndmin: int = 0,
order: str = "C",
split: Optional[int] = None,
Expand All @@ -172,8 +172,9 @@ def array(
to hold the objects in the sequence. This argument can only be used to ‘upcast’ the array. For downcasting, use
the :func:`~heat.core.dndarray.astype` method.
copy : bool, optional
If ``True`` (default), then the object is copied. Otherwise, a copy will only be made if obj is a nested
sequence or if a copy is needed to satisfy any of the other requirements, e.g. ``dtype``.
If ``True``, the input object is copied.
If ``False``, input which supports the buffer protocol is never copied.
If ``None`` (default), the function reuses the existing memory buffer if possible, and copies otherwise.
ndmin : int, optional
Specifies the minimum number of dimensions that the resulting array should have. Ones will, if needed, be
attached to the shape if ``ndim > 0`` and prefaced in case of ``ndim < 0`` to meet the requirement.
Expand All @@ -198,6 +199,10 @@ def array(
------
NotImplementedError
If order is one of the NumPy options ``'K'`` or ``'A'``.
ValueError
If ``copy`` is False but a copy is necessary to satisfy other requirements (e.g. different dtype, device, etc.).
TypeError
If the input object cannot be converted to a torch.Tensor, hence it cannot be converted to a :class:`~heat.core.dndarray.DNDarray`.

ClaudiaComito marked this conversation as resolved.
Show resolved Hide resolved
Examples
--------
Expand Down Expand Up @@ -285,18 +290,16 @@ def array(
[torch.LongStorage of size 6]
"""
# array already exists; no copy
if (
isinstance(obj, DNDarray)
and not copy
and (dtype is None or dtype == obj.dtype)
and (split is None or split == obj.split)
and (is_split is None or is_split == obj.split)
and (device is None or device == obj.device)
):
return obj

# extract the internal tensor in case of a heat tensor
if isinstance(obj, DNDarray):
if not copy:
if (
(dtype is None or dtype == obj.dtype)
and (split is None or split == obj.split)
and (is_split is None or is_split == obj.split)
and (device is None or device == obj.device)
):
return obj
# extract the internal tensor
obj = obj.larray

# sanitize the data type
Expand Down Expand Up @@ -324,13 +327,28 @@ def array(
except RuntimeError:
raise TypeError("invalid data of type {}".format(type(obj)))
else:
if not isinstance(obj, DNDarray):
if copy is False and not np.isscalar(obj) and not isinstance(obj, (Tuple, List)):
# Python array-API compliance, cf. https://data-apis.org/array-api/2022.12/API_specification/generated/array_api.asarray.html
if not (
(dtype is None or dtype == types.canonical_heat_type(obj.dtype))
and (
device is None
or device.torch_device
== str(getattr(obj, "device", devices.get_device().torch_device))
)
):
raise ValueError(
"argument `copy` is set to False, but copy of input object is necessary. \n Set copy=None to reuse the memory buffer whenever possible and allow for copies otherwise."
)
try:
obj = torch.as_tensor(
obj,
device=device.torch_device
if device is not None
else devices.get_device().torch_device,
)
except RuntimeError:
raise TypeError("invalid data of type {}".format(type(obj)))

# infer dtype from obj if not explicitly given
if dtype is None:
Expand Down Expand Up @@ -376,10 +394,18 @@ def array(
balanced = True

# content shall be split, chunk the passed data object up
if split is not None:
_, _, slices = comm.chunk(gshape, split)
obj = obj[slices].clone()
if comm.size == 1 or split is None and is_split is None:
obj = sanitize_memory_layout(obj, order=order)
split = is_split if is_split is not None else split

elif split is not None:
# only keep local slice
_, _, slices = comm.chunk(gshape, split)
_ = obj[slices].clone()
del obj

obj = sanitize_memory_layout(_, order=order)

# check with the neighboring rank whether the local shape would fit into a global shape
elif is_split is not None:
obj = sanitize_memory_layout(obj, order=order)
Expand All @@ -396,51 +422,47 @@ def array(
status = MPI.Status()
comm.Probe(source=comm.rank - 1, status=status)
length = status.Get_count() // lshape.dtype.itemsize
del status
# the number of shape elements does not match with the 'left' rank
if length != len(lshape):
discard_buffer = np.empty(length)
comm.Recv(discard_buffer, source=comm.rank - 1)
neighbour_shape[is_split] = np.iinfo(neighbour_shape.dtype).min
lshape[is_split] = np.iinfo(lshape.dtype).min
else:
# check whether the individual shape elements match
comm.Recv(neighbour_shape, source=comm.rank - 1)
for i in range(length):
if i == is_split:
continue
elif lshape[i] != neighbour_shape[i]:
neighbour_shape[is_split] = np.iinfo(neighbour_shape.dtype).min
if i != is_split:
if lshape[i] != neighbour_shape[i]:
lshape[is_split] = np.iinfo(lshape.dtype).min
del neighbour_shape

# sum up the elements along the split dimension
reduction_buffer = np.array(neighbour_shape[is_split])
reduction_buffer = np.array(lshape[is_split])
comm.Allreduce(MPI.IN_PLACE, reduction_buffer, MPI.MIN)
if reduction_buffer < 0:
raise ValueError(
"Unable to construct DNDarray. Local data slices have inconsistent shapes or dimensions."
)
ttl_shape = np.array(obj.shape)
ttl_shape[is_split] = lshape[is_split]
comm.Allreduce(MPI.IN_PLACE, ttl_shape, MPI.SUM)
gshape[is_split] = ttl_shape[is_split]

total_split_shape = np.array(lshape[is_split])
comm.Allreduce(MPI.IN_PLACE, total_split_shape, MPI.SUM)
gshape[is_split] = total_split_shape.item()
split = is_split
# compare to calculated balanced lshape (cf. dndarray.is_balanced())
gshape = tuple(int(ele) for ele in gshape)
lshape = tuple(int(ele) for ele in lshape)
_, _, chk = comm.chunk(gshape, split)
test_lshape = tuple([x.stop - x.start for x in chk])
match = 1 if test_lshape == lshape else 0
_, test_lshape, _ = comm.chunk(gshape, split)
match = (test_lshape == lshape).all().astype(int)
gmatch = comm.allreduce(match, MPI.SUM)
if gmatch != comm.size:
balanced = False

elif split is None and is_split is None:
obj = sanitize_memory_layout(obj, order=order)

return DNDarray(obj, tuple(gshape), dtype, split, device, comm, balanced)


def asarray(
obj: Iterable,
dtype: Optional[Type[datatype]] = None,
copy: Optional[bool] = None,
order: str = "C",
is_split: Optional[bool] = None,
device: Optional[Union[str, Device]] = None,
Expand All @@ -456,10 +478,14 @@ def asarray(
tuples of tuples, tuples of lists and ndarrays.
dtype : dtype, optional
By default, the data-type is inferred from the input data.
copy : bool, optional
If ``True``, then the object is copied. If ``False``, the object is not copied and a ``ValueError`` is
raised in the case a copy would be necessary. If ``None``, a copy will only be made if `obj` is a nested
sequence or if a copy is needed to satisfy any of the other requirements, e.g. ``dtype``.
order: str, optional
Whether to use row-major (C-style) or column-major (Fortran-style) memory representation. Defaults to ‘C’.
is_split : None or int, optional
Specifies the axis along which the local data portions, passed in obj, are split across all machines. Useful for
Specifies the axis along which the local data portions, passed in obj, are split across all MPI processes. Useful for
interfacing with other HPC code. The shape of the global tensor is automatically inferred.
device : str, ht.Device or None, optional
Specifies the device the tensor shall be allocated on. By default, it is inferred from the input data.
Expand Down Expand Up @@ -489,7 +515,7 @@ def asarray(
>>> ht.asarray(a, dtype=ht.float64) is a
False
"""
return array(obj, dtype=dtype, copy=False, order=order, is_split=is_split, device=device)
return array(obj, dtype=dtype, copy=copy, order=order, is_split=is_split, device=device)


def empty(
Expand Down
6 changes: 4 additions & 2 deletions heat/core/manipulations.py
Original file line number Diff line number Diff line change
Expand Up @@ -805,11 +805,13 @@ def diagonal(a: DNDarray, offset: int = 0, dim1: int = 0, dim2: int = 1) -> DNDa
split = len(shape) - 1

if a.split is None or a.split not in (dim1, dim2):
result = torch.diagonal(a.larray, offset=offset, dim1=dim1, dim2=dim2)
result = torch.diagonal(a.larray, offset=offset, dim1=dim1, dim2=dim2).contiguous()
else:
vz = 1 if a.split == dim1 else -1
off, _, _ = a.comm.chunk(a.shape, a.split)
result = torch.diagonal(a.larray, offset=offset + vz * off, dim1=dim1, dim2=dim2)
result = torch.diagonal(
a.larray, offset=offset + vz * off, dim1=dim1, dim2=dim2
).contiguous()
return factories.array(result, dtype=a.dtype, is_split=split, device=a.device, comm=a.comm)


Expand Down
13 changes: 8 additions & 5 deletions heat/core/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,17 +58,19 @@ def sanitize_memory_layout(x: torch.Tensor, order: str = "C") -> torch.Tensor:
if x.ndim < 2 or x.numel() == 0:
# do nothing
return x
dims = list(range(x.ndim))
stride = torch.tensor(x.stride())
stride_diff = torch.as_tensor(x.stride())
# since strides can get a bit wonky with operations like transpose
# we should assume that the tensors are row major or are distributed the default way
sdiff = stride[1:] - stride[:-1]
column_major = all(sdiff >= 0)
stride_diff = stride_diff[1:] - stride_diff[:-1]
column_major = torch.all(stride_diff >= 0)
row_major = True if not column_major else False
del stride_diff
if (order == "C" and row_major) or (order == "F" and column_major):
# do nothing
del column_major, row_major
return x
elif (order == "C" and column_major) or (order == "F" and row_major):
if (order == "C" and column_major) or (order == "F" and row_major):
dims = list(range(x.ndim))
dims = tuple(reversed(dims))
y = torch.empty_like(x)
permutation = x.permute(dims).contiguous()
Expand All @@ -78,6 +80,7 @@ def sanitize_memory_layout(x: torch.Tensor, order: str = "C") -> torch.Tensor:
x.shape,
tuple(reversed(permutation.stride())),
)
del permutation, dims, column_major, row_major, x
return y
else:
raise ValueError(
Expand Down
28 changes: 18 additions & 10 deletions heat/core/tests/test_factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,18 +184,19 @@ def test_array(self):
).all()
)

# distributed array, chunk local data (split)
tensor_2d = ht.array([[1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], split=0)
self.assertIsInstance(tensor_2d, ht.DNDarray)
self.assertEqual(tensor_2d.dtype, ht.float32)
self.assertEqual(tensor_2d.gshape, (3, 3))
self.assertEqual(len(tensor_2d.lshape), 2)
self.assertLessEqual(tensor_2d.lshape[0], 3)
self.assertEqual(tensor_2d.lshape[1], 3)
self.assertEqual(tensor_2d.split, 0)
# distributed array, chunk local data (split), copy True
array_2d = np.array([[1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0]])
dndarray_2d = ht.array(array_2d, split=0, copy=True)
self.assertIsInstance(dndarray_2d, ht.DNDarray)
self.assertEqual(dndarray_2d.dtype, ht.float64)
self.assertEqual(dndarray_2d.gshape, (3, 3))
self.assertEqual(len(dndarray_2d.lshape), 2)
self.assertLessEqual(dndarray_2d.lshape[0], 3)
self.assertEqual(dndarray_2d.lshape[1], 3)
self.assertEqual(dndarray_2d.split, 0)
self.assertTrue(
(
tensor_2d.larray == torch.tensor([1.0, 2.0, 3.0], device=self.device.torch_device)
dndarray_2d.larray == torch.tensor([1.0, 2.0, 3.0], device=self.device.torch_device)
).all()
)

Expand Down Expand Up @@ -292,6 +293,9 @@ def test_array(self):
# iterable, but unsuitable type
with self.assertRaises(TypeError):
ht.array("abc")
# iterable, but unsuitable type, with copy=True
with self.assertRaises(TypeError):
ht.array("abc", copy=True)
# unknown dtype
with self.assertRaises(TypeError):
ht.array((4,), dtype="a")
Expand All @@ -307,6 +311,10 @@ def test_array(self):
# invalid communicator
with self.assertRaises(TypeError):
ht.array((4,), comm={})
# copy=False but copy is necessary
data = np.arange(10)
with self.assertRaises(ValueError):
ht.array(data, dtype=ht.int32, copy=False)

# data already distributed but don't match in shape
if self.get_size() > 1:
Expand Down
13 changes: 8 additions & 5 deletions heat/core/tests/test_operations.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import torch
import platform

import heat as ht
import numpy as np
Expand Down Expand Up @@ -91,15 +92,17 @@ def test___binary_bit_op_broadcast(self):
self.assertTrue(ht.equal(a[0:1] * b, b))
self.assertTrue(ht.equal(b * a[0:1], b))

c = ht.array([1, 2, 3, 4], comm=ht.MPI_SELF)
if ht.MPI_WORLD.size > 1:
c = ht.array([1, 2, 3, 4], comm=ht.MPI_SELF)
with self.assertRaises(NotImplementedError):
b + c
with self.assertRaises(NotImplementedError):
a.resplit(1) * b
with self.assertRaises(TypeError):
ht.minimum(a, np.float128(1))
with self.assertRaises(TypeError):
ht.minimum(np.float128(1), a)
# skip tests on arm64 architecture
if platform.machine() != "arm64":
with self.assertRaises(TypeError):
ht.minimum(a, np.float128(1))
with self.assertRaises(TypeError):
ht.minimum(np.float128(1), a)
with self.assertRaises(ValueError):
a[2:] * b
3 changes: 3 additions & 0 deletions heat/core/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,9 @@ def canonical_heat_type(a_type: Union[str, Type[datatype], Any]) -> Type[datatyp
except TypeError:
pass

# extract type of numpy.dtype
a_type = getattr(a_type, "type", a_type)

# try to look the corresponding type up
try:
return __type_mappings[a_type]
Expand Down