helmholtz-analytics · mrfh92 · Jun 19, 2023 · Mar 6, 2023 · Mar 6, 2023 · Mar 7, 2023
diff --git a/heat/core/factories.py b/heat/core/factories.py
@@ -151,7 +151,7 @@ def arange(
 def array(
     obj: Iterable,
     dtype: Optional[Type[datatype]] = None,
-    copy: bool = True,
+    copy: Optional[bool] = None,
     ndmin: int = 0,
     order: str = "C",
     split: Optional[int] = None,
@@ -172,8 +172,9 @@ def array(
         to hold the objects in the sequence. This argument can only be used to ‘upcast’ the array. For downcasting, use
         the :func:`~heat.core.dndarray.astype` method.
     copy : bool, optional
-        If ``True`` (default), then the object is copied. Otherwise, a copy will only be made if obj is a nested
-        sequence or if a copy is needed to satisfy any of the other requirements, e.g. ``dtype``.
+        If ``True``, the input object is copied.
+        If ``False``, input which supports the buffer protocol is never copied.
+        If ``None`` (default), the function reuses the existing memory buffer if possible, and copies otherwise.
     ndmin : int, optional
         Specifies the minimum number of dimensions that the resulting array should have. Ones will, if needed, be
         attached to the shape if ``ndim > 0`` and prefaced in case of ``ndim < 0`` to meet the requirement.
@@ -198,6 +199,10 @@ def array(
     ------
     NotImplementedError
         If order is one of the NumPy options ``'K'`` or ``'A'``.
+    ValueError
+        If ``copy`` is False but a copy is necessary to satisfy other requirements (e.g. different dtype, device, etc.).
+    TypeError
+        If the input object cannot be converted to a torch.Tensor, hence it cannot be converted to a :class:`~heat.core.dndarray.DNDarray`.
 
     Examples
     --------
@@ -285,18 +290,16 @@ def array(
          [torch.LongStorage of size 6]
     """
     # array already exists; no copy
-    if (
-        isinstance(obj, DNDarray)
-        and not copy
-        and (dtype is None or dtype == obj.dtype)
-        and (split is None or split == obj.split)
-        and (is_split is None or is_split == obj.split)
-        and (device is None or device == obj.device)
-    ):
-        return obj
-
-    # extract the internal tensor in case of a heat tensor
     if isinstance(obj, DNDarray):
+        if not copy:
+            if (
+                (dtype is None or dtype == obj.dtype)
+                and (split is None or split == obj.split)
+                and (is_split is None or is_split == obj.split)
+                and (device is None or device == obj.device)
+            ):
+                return obj
+        # extract the internal tensor
         obj = obj.larray
 
     # sanitize the data type
@@ -324,13 +327,28 @@ def array(
             except RuntimeError:
                 raise TypeError("invalid data of type {}".format(type(obj)))
     else:
-        if not isinstance(obj, DNDarray):
+        if copy is False and not np.isscalar(obj) and not isinstance(obj, (Tuple, List)):
+            # Python array-API compliance, cf. https://data-apis.org/array-api/2022.12/API_specification/generated/array_api.asarray.html
+            if not (
+                (dtype is None or dtype == types.canonical_heat_type(obj.dtype))
+                and (
+                    device is None
+                    or device.torch_device
+                    == str(getattr(obj, "device", devices.get_device().torch_device))
+                )
+            ):
+                raise ValueError(
+                    "argument `copy` is set to False, but copy of input object is necessary. \n Set copy=None to reuse the memory buffer whenever possible and allow for copies otherwise."
+                )
+        try:
             obj = torch.as_tensor(
                 obj,
                 device=device.torch_device
                 if device is not None
                 else devices.get_device().torch_device,
             )
+        except RuntimeError:
+            raise TypeError("invalid data of type {}".format(type(obj)))
 
     # infer dtype from obj if not explicitly given
     if dtype is None:
@@ -376,10 +394,18 @@ def array(
     balanced = True
 
     # content shall be split, chunk the passed data object up
-    if split is not None:
-        _, _, slices = comm.chunk(gshape, split)
-        obj = obj[slices].clone()
+    if comm.size == 1 or split is None and is_split is None:
         obj = sanitize_memory_layout(obj, order=order)
+        split = is_split if is_split is not None else split
+
+    elif split is not None:
+        # only keep local slice
+        _, _, slices = comm.chunk(gshape, split)
+        _ = obj[slices].clone()
+        del obj
+
+        obj = sanitize_memory_layout(_, order=order)
+
     # check with the neighboring rank whether the local shape would fit into a global shape
     elif is_split is not None:
         obj = sanitize_memory_layout(obj, order=order)
@@ -396,51 +422,47 @@ def array(
             status = MPI.Status()
             comm.Probe(source=comm.rank - 1, status=status)
             length = status.Get_count() // lshape.dtype.itemsize
+            del status
             # the number of shape elements does not match with the 'left' rank
             if length != len(lshape):
                 discard_buffer = np.empty(length)
                 comm.Recv(discard_buffer, source=comm.rank - 1)
-                neighbour_shape[is_split] = np.iinfo(neighbour_shape.dtype).min
+                lshape[is_split] = np.iinfo(lshape.dtype).min
             else:
                 # check whether the individual shape elements match
                 comm.Recv(neighbour_shape, source=comm.rank - 1)
                 for i in range(length):
-                    if i == is_split:
-                        continue
-                    elif lshape[i] != neighbour_shape[i]:
-                        neighbour_shape[is_split] = np.iinfo(neighbour_shape.dtype).min
+                    if i != is_split:
+                        if lshape[i] != neighbour_shape[i]:
+                            lshape[is_split] = np.iinfo(lshape.dtype).min
+            del neighbour_shape
 
         # sum up the elements along the split dimension
-        reduction_buffer = np.array(neighbour_shape[is_split])
+        reduction_buffer = np.array(lshape[is_split])
         comm.Allreduce(MPI.IN_PLACE, reduction_buffer, MPI.MIN)
         if reduction_buffer < 0:
             raise ValueError(
                 "Unable to construct DNDarray. Local data slices have inconsistent shapes or dimensions."
             )
-        ttl_shape = np.array(obj.shape)
-        ttl_shape[is_split] = lshape[is_split]
-        comm.Allreduce(MPI.IN_PLACE, ttl_shape, MPI.SUM)
-        gshape[is_split] = ttl_shape[is_split]
+
+        total_split_shape = np.array(lshape[is_split])
+        comm.Allreduce(MPI.IN_PLACE, total_split_shape, MPI.SUM)
+        gshape[is_split] = total_split_shape.item()
         split = is_split
         # compare to calculated balanced lshape (cf. dndarray.is_balanced())
-        gshape = tuple(int(ele) for ele in gshape)
-        lshape = tuple(int(ele) for ele in lshape)
-        _, _, chk = comm.chunk(gshape, split)
-        test_lshape = tuple([x.stop - x.start for x in chk])
-        match = 1 if test_lshape == lshape else 0
+        _, test_lshape, _ = comm.chunk(gshape, split)
+        match = (test_lshape == lshape).all().astype(int)
         gmatch = comm.allreduce(match, MPI.SUM)
         if gmatch != comm.size:
             balanced = False
 
-    elif split is None and is_split is None:
-        obj = sanitize_memory_layout(obj, order=order)
-
     return DNDarray(obj, tuple(gshape), dtype, split, device, comm, balanced)
 
 
 def asarray(
     obj: Iterable,
     dtype: Optional[Type[datatype]] = None,
+    copy: Optional[bool] = None,
     order: str = "C",
     is_split: Optional[bool] = None,
     device: Optional[Union[str, Device]] = None,
@@ -456,10 +478,14 @@ def asarray(
         tuples of tuples, tuples of lists and ndarrays.
     dtype : dtype, optional
         By default, the data-type is inferred from the input data.
+    copy : bool, optional
+        If ``True``, then the object is copied.  If ``False``, the object is not copied and a ``ValueError`` is
+        raised in the case a copy would be necessary. If ``None``, a copy will only be made if `obj` is a nested
+        sequence or if a copy is needed to satisfy any of the other requirements, e.g. ``dtype``.
     order: str, optional
         Whether to use row-major (C-style) or column-major (Fortran-style) memory representation. Defaults to ‘C’.
     is_split : None or int, optional
-        Specifies the axis along which the local data portions, passed in obj, are split across all machines. Useful for
+        Specifies the axis along which the local data portions, passed in obj, are split across all MPI processes. Useful for
         interfacing with other HPC code. The shape of the global tensor is automatically inferred.
     device : str, ht.Device or None, optional
         Specifies the device the tensor shall be allocated on. By default, it is inferred from the input data.
@@ -489,7 +515,7 @@ def asarray(
     >>> ht.asarray(a, dtype=ht.float64) is a
     False
     """
-    return array(obj, dtype=dtype, copy=False, order=order, is_split=is_split, device=device)
+    return array(obj, dtype=dtype, copy=copy, order=order, is_split=is_split, device=device)
 
 
 def empty(

diff --git a/heat/core/manipulations.py b/heat/core/manipulations.py
@@ -805,11 +805,13 @@ def diagonal(a: DNDarray, offset: int = 0, dim1: int = 0, dim2: int = 1) -> DNDa
         split = len(shape) - 1
 
     if a.split is None or a.split not in (dim1, dim2):
-        result = torch.diagonal(a.larray, offset=offset, dim1=dim1, dim2=dim2)
+        result = torch.diagonal(a.larray, offset=offset, dim1=dim1, dim2=dim2).contiguous()
     else:
         vz = 1 if a.split == dim1 else -1
         off, _, _ = a.comm.chunk(a.shape, a.split)
-        result = torch.diagonal(a.larray, offset=offset + vz * off, dim1=dim1, dim2=dim2)
+        result = torch.diagonal(
+            a.larray, offset=offset + vz * off, dim1=dim1, dim2=dim2
+        ).contiguous()
     return factories.array(result, dtype=a.dtype, is_split=split, device=a.device, comm=a.comm)
 
 

diff --git a/heat/core/memory.py b/heat/core/memory.py
@@ -58,17 +58,19 @@ def sanitize_memory_layout(x: torch.Tensor, order: str = "C") -> torch.Tensor:
     if x.ndim < 2 or x.numel() == 0:
         # do nothing
         return x
-    dims = list(range(x.ndim))
-    stride = torch.tensor(x.stride())
+    stride_diff = torch.as_tensor(x.stride())
     # since strides can get a bit wonky with operations like transpose
     #   we should assume that the tensors are row major or are distributed the default way
-    sdiff = stride[1:] - stride[:-1]
-    column_major = all(sdiff >= 0)
+    stride_diff = stride_diff[1:] - stride_diff[:-1]
+    column_major = torch.all(stride_diff >= 0)
     row_major = True if not column_major else False
+    del stride_diff
     if (order == "C" and row_major) or (order == "F" and column_major):
         # do nothing
+        del column_major, row_major
         return x
-    elif (order == "C" and column_major) or (order == "F" and row_major):
+    if (order == "C" and column_major) or (order == "F" and row_major):
+        dims = list(range(x.ndim))
         dims = tuple(reversed(dims))
         y = torch.empty_like(x)
         permutation = x.permute(dims).contiguous()
@@ -78,6 +80,7 @@ def sanitize_memory_layout(x: torch.Tensor, order: str = "C") -> torch.Tensor:
             x.shape,
             tuple(reversed(permutation.stride())),
         )
+        del permutation, dims, column_major, row_major, x
         return y
     else:
         raise ValueError(

diff --git a/heat/core/tests/test_factories.py b/heat/core/tests/test_factories.py
@@ -184,18 +184,19 @@ def test_array(self):
             ).all()
         )
 
-        # distributed array, chunk local data (split)
-        tensor_2d = ht.array([[1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0]], split=0)
-        self.assertIsInstance(tensor_2d, ht.DNDarray)
-        self.assertEqual(tensor_2d.dtype, ht.float32)
-        self.assertEqual(tensor_2d.gshape, (3, 3))
-        self.assertEqual(len(tensor_2d.lshape), 2)
-        self.assertLessEqual(tensor_2d.lshape[0], 3)
-        self.assertEqual(tensor_2d.lshape[1], 3)
-        self.assertEqual(tensor_2d.split, 0)
+        # distributed array, chunk local data (split), copy True
+        array_2d = np.array([[1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0]])
+        dndarray_2d = ht.array(array_2d, split=0, copy=True)
+        self.assertIsInstance(dndarray_2d, ht.DNDarray)
+        self.assertEqual(dndarray_2d.dtype, ht.float64)
+        self.assertEqual(dndarray_2d.gshape, (3, 3))
+        self.assertEqual(len(dndarray_2d.lshape), 2)
+        self.assertLessEqual(dndarray_2d.lshape[0], 3)
+        self.assertEqual(dndarray_2d.lshape[1], 3)
+        self.assertEqual(dndarray_2d.split, 0)
         self.assertTrue(
             (
-                tensor_2d.larray == torch.tensor([1.0, 2.0, 3.0], device=self.device.torch_device)
+                dndarray_2d.larray == torch.tensor([1.0, 2.0, 3.0], device=self.device.torch_device)
             ).all()
         )
 
@@ -292,6 +293,9 @@ def test_array(self):
         # iterable, but unsuitable type
         with self.assertRaises(TypeError):
             ht.array("abc")
+        # iterable, but unsuitable type, with copy=True
+        with self.assertRaises(TypeError):
+            ht.array("abc", copy=True)
         # unknown dtype
         with self.assertRaises(TypeError):
             ht.array((4,), dtype="a")
@@ -307,6 +311,10 @@ def test_array(self):
         # invalid communicator
         with self.assertRaises(TypeError):
             ht.array((4,), comm={})
+        # copy=False but copy is necessary
+        data = np.arange(10)
+        with self.assertRaises(ValueError):
+            ht.array(data, dtype=ht.int32, copy=False)
 
         # data already distributed but don't match in shape
         if self.get_size() > 1:

diff --git a/heat/core/tests/test_operations.py b/heat/core/tests/test_operations.py
@@ -1,4 +1,5 @@
 import torch
+import platform
 
 import heat as ht
 import numpy as np
@@ -91,15 +92,17 @@ def test___binary_bit_op_broadcast(self):
         self.assertTrue(ht.equal(a[0:1] * b, b))
         self.assertTrue(ht.equal(b * a[0:1], b))
 
-        c = ht.array([1, 2, 3, 4], comm=ht.MPI_SELF)
         if ht.MPI_WORLD.size > 1:
+            c = ht.array([1, 2, 3, 4], comm=ht.MPI_SELF)
             with self.assertRaises(NotImplementedError):
                 b + c
             with self.assertRaises(NotImplementedError):
                 a.resplit(1) * b
-        with self.assertRaises(TypeError):
-            ht.minimum(a, np.float128(1))
-        with self.assertRaises(TypeError):
-            ht.minimum(np.float128(1), a)
+        # skip tests on arm64 architecture
+        if platform.machine() != "arm64":
+            with self.assertRaises(TypeError):
+                ht.minimum(a, np.float128(1))
+            with self.assertRaises(TypeError):
+                ht.minimum(np.float128(1), a)
         with self.assertRaises(ValueError):
             a[2:] * b
diff --git a/heat/core/types.py b/heat/core/types.py
@@ -514,6 +514,9 @@ def canonical_heat_type(a_type: Union[str, Type[datatype], Any]) -> Type[datatyp
     except TypeError:
         pass
 
+    # extract type of numpy.dtype
+    a_type = getattr(a_type, "type", a_type)
+
     # try to look the corresponding type up
     try:
         return __type_mappings[a_type]