zarr-developers
diff --git a/‎src/zarr/array.py
Lines changed: 16 additions & 7 deletions b/‎src/zarr/array.py
Lines changed: 16 additions & 7 deletions
diff --git a/‎src/zarr/chunk_grids.py
Lines changed: 72 additions & 0 deletions b/‎src/zarr/chunk_grids.py
Lines changed: 72 additions & 0 deletions
diff --git a/‎src/zarr/errors.py
Lines changed: 25 additions & 0 deletions b/‎src/zarr/errors.py
Lines changed: 25 additions & 0 deletions
@@ -21,7 +21,7 @@
 from zarr.abc.store import set_or_delete
 from zarr.attributes import Attributes
 from zarr.buffer import BufferPrototype, NDArrayLike, NDBuffer, default_buffer_prototype
-from zarr.chunk_grids import RegularChunkGrid
+from zarr.chunk_grids import RegularChunkGrid, _guess_chunks
 from zarr.chunk_key_encodings import ChunkKeyEncoding, DefaultChunkKeyEncoding, V2ChunkKeyEncoding
 from zarr.codecs import BytesCodec
 from zarr.codecs._v2 import V2Compressor, V2Filters
@@ -62,6 +62,9 @@
 )
 from zarr.metadata import ArrayMetadata, ArrayV2Metadata, ArrayV3Metadata
 from zarr.store import StoreLike, StorePath, make_store_path
+from zarr.store.core import (
+    ensure_no_existing_node,
+)
 from zarr.sync import sync
 
 
@@ -137,12 +140,13 @@ async def create(
         compressor: dict[str, JSON] | None = None,
         # runtime
         exists_ok: bool = False,
+        data: npt.ArrayLike | None = None,
     ) -> AsyncArray:
         store_path = make_store_path(store)
 
         if chunk_shape is None:
             if chunks is None:
-                raise ValueError("Either chunk_shape or chunks needs to be provided.")
+                chunk_shape = chunks = _guess_chunks(shape=shape, typesize=np.dtype(dtype).itemsize)
             chunk_shape = chunks
         elif chunks is not None:
             raise ValueError("Only one of chunk_shape or chunks must be provided.")
@@ -164,7 +168,7 @@ async def create(
                 raise ValueError(
                     "compressor cannot be used for arrays with version 3. Use bytes-to-bytes codecs instead."
                 )
-            return await cls._create_v3(
+            result = await cls._create_v3(
                 store_path,
                 shape=shape,
                 dtype=dtype,
@@ -187,7 +191,7 @@ async def create(
                 )
             if dimension_names is not None:
                 raise ValueError("dimension_names cannot be used for arrays with version 2.")
-            return await cls._create_v2(
+            result = await cls._create_v2(
                 store_path,
                 shape=shape,
                 dtype=dtype,
@@ -203,6 +207,12 @@ async def create(
         else:
             raise ValueError(f"Insupported zarr_format. Got: {zarr_format}")
 
+        if data is not None:
+            # insert user-provided data
+            await result.setitem(..., data)
+
+        return result
+
     @classmethod
     async def _create_v3(
         cls,
@@ -224,7 +234,7 @@ async def _create_v3(
         exists_ok: bool = False,
     ) -> AsyncArray:
         if not exists_ok:
-            assert not await (store_path / ZARR_JSON).exists()
+            await ensure_no_existing_node(store_path, zarr_format=3)
 
         codecs = list(codecs) if codecs is not None else [BytesCodec()]
 
@@ -280,8 +290,7 @@ async def _create_v2(
         import numcodecs
 
         if not exists_ok:
-            assert not await (store_path / ZARRAY_JSON).exists()
-
+            await ensure_no_existing_node(store_path, zarr_format=2)
         if order is None:
             order = "C"
 
 
@@ -1,13 +1,16 @@
 from __future__ import annotations
 
 import itertools
+import math
 import operator
 from abc import abstractmethod
 from collections.abc import Iterator
 from dataclasses import dataclass
 from functools import reduce
 from typing import TYPE_CHECKING
 
+import numpy as np
+
 from zarr.abc.metadata import Metadata
 from zarr.common import (
     JSON,
@@ -22,6 +25,75 @@
     from typing_extensions import Self
 
 
+def _guess_chunks(
+    shape: ChunkCoords,
+    typesize: int,
+    *,
+    increment_bytes: int = 256 * 1024,
+    min_bytes: int = 128 * 1024,
+    max_bytes: int = 64 * 1024 * 1024,
+) -> ChunkCoords:
+    """
+    Iteratively guess an appropriate chunk layout for an array, given its shape and
+    the size of each element in bytes, and size constraints expressed in bytes. This logic is
+    adapted from h5py.
+
+    Parameters
+    ----------
+    shape: ChunkCoords
+        The chunk shape.
+    typesize: int
+        The size, in bytes, of each element of the chunk.
+    increment_bytes: int = 256 * 1024
+        The number of bytes used to increment or decrement the target chunk size in bytes.
+    min_bytes: int = 128 * 1024
+        The soft lower bound on the final chunk size in bytes.
+    max_bytes: int = 64 * 1024 * 1024
+        The hard upper bound on the final chunk size in bytes.
+
+    Returns
+    -------
+    ChunkCoords
+
+    """
+
+    ndims = len(shape)
+    # require chunks to have non-zero length for all dimensions
+    chunks = np.maximum(np.array(shape, dtype="=f8"), 1)
+
+    # Determine the optimal chunk size in bytes using a PyTables expression.
+    # This is kept as a float.
+    dset_size = np.prod(chunks) * typesize
+    target_size = increment_bytes * (2 ** np.log10(dset_size / (1024.0 * 1024)))
+
+    if target_size > max_bytes:
+        target_size = max_bytes
+    elif target_size < min_bytes:
+        target_size = min_bytes
+
+    idx = 0
+    while True:
+        # Repeatedly loop over the axes, dividing them by 2.  Stop when:
+        # 1a. We're smaller than the target chunk size, OR
+        # 1b. We're within 50% of the target chunk size, AND
+        # 2. The chunk is smaller than the maximum chunk size
+
+        chunk_bytes = np.prod(chunks) * typesize
+
+        if (
+            chunk_bytes < target_size or abs(chunk_bytes - target_size) / target_size < 0.5
+        ) and chunk_bytes < max_bytes:
+            break
+
+        if np.prod(chunks) == 1:
+            break  # Element size larger than max_bytes
+
+        chunks[idx % ndims] = math.ceil(chunks[idx % ndims] / 2.0)
+        idx += 1
+
+    return tuple(int(x) for x in chunks)
+
+
 @dataclass(frozen=True)
 class ChunkGrid(Metadata):
     @classmethod
 
@@ -0,0 +1,25 @@
+from typing import Any
+
+
+class _BaseZarrError(ValueError):
+    _msg = ""
+
+    def __init__(self, *args: Any) -> None:
+        super().__init__(self._msg.format(*args))
+
+
+class ContainsGroupError(_BaseZarrError):
+    _msg = "A group exists in store {0!r} at path {1!r}."
+
+
+class ContainsArrayError(_BaseZarrError):
+    _msg = "An array exists in store {0!r} at path {1!r}."
+
+
+class ContainsArrayAndGroupError(_BaseZarrError):
+    _msg = (
+        "Array and group metadata documents (.zarray and .zgroup) were both found in store "
+        "{0!r} at path {1!r}."
+        "Only one of these files may be present in a given directory / prefix. "
+        "Remove the .zarray file, or the .zgroup file, or both."
+    )