Skip to content

Commit

Permalink
Use zarr v3 dimension_names (#9669)
Browse files Browse the repository at this point in the history
* use zarr v3 dimension_names

* Update xarray/backends/zarr.py

Co-authored-by: Stephan Hoyer <shoyer@google.com>

* Update xarray/backends/zarr.py

Co-authored-by: Joe Hamman <jhamman1@gmail.com>

---------

Co-authored-by: Stephan Hoyer <shoyer@google.com>
Co-authored-by: Joe Hamman <joe@earthmover.io>
Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
Co-authored-by: Joe Hamman <jhamman1@gmail.com>
  • Loading branch information
5 people authored Oct 24, 2024
1 parent 5b2e6f1 commit 519f05e
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 1 deletion.
18 changes: 17 additions & 1 deletion xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,18 @@ def _determine_zarr_chunks(


def _get_zarr_dims_and_attrs(zarr_obj, dimension_key, try_nczarr):
# Zarr V3 explicitly stores the dimension names in the metadata
try:
# if this exists, we are looking at a Zarr V3 array
# convert None to empty tuple
dimensions = zarr_obj.metadata.dimension_names or ()
except AttributeError:
# continue to old code path
pass
else:
attributes = dict(zarr_obj.attrs)
return dimensions, attributes

# Zarr arrays do not have dimensions. To get around this problem, we add
# an attribute that specifies the dimension. We have to hide this attribute
# when we send the attributes to the user.
Expand Down Expand Up @@ -919,6 +931,7 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No
import zarr

existing_keys = tuple(self.zarr_group.array_keys())
is_zarr_v3_format = _zarr_v3() and self.zarr_group.metadata.zarr_format == 3

for vn, v in variables.items():
name = _encode_variable_name(vn)
Expand Down Expand Up @@ -1022,7 +1035,10 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No
# new variable
encoded_attrs = {}
# the magic for storing the hidden dimension data
encoded_attrs[DIMENSION_KEY] = dims
if is_zarr_v3_format:
encoding["dimension_names"] = dims
else:
encoded_attrs[DIMENSION_KEY] = dims
for k2, v2 in attrs.items():
encoded_attrs[k2] = self.encode_attribute(v2)

Expand Down
12 changes: 12 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2559,6 +2559,8 @@ def test_drop_encoding(self):
ds.to_zarr(store, encoding=encodings)

def test_hidden_zarr_keys(self) -> None:
skip_if_zarr_format_3("This test is unnecessary; no hidden Zarr keys")

expected = create_test_data()
with self.create_store() as store:
expected.dump_to_store(store)
Expand Down Expand Up @@ -2586,6 +2588,16 @@ def test_hidden_zarr_keys(self) -> None:
with xr.decode_cf(store):
pass

def test_dimension_names(self) -> None:
skip_if_zarr_format_2("No dimension names in V2")

expected = create_test_data()
with self.create_store() as store:
expected.dump_to_store(store)
zarr_group = store.ds
for var in zarr_group:
assert expected[var].dims == zarr_group[var].metadata.dimension_names

@pytest.mark.parametrize("group", [None, "group1"])
def test_write_persistence_modes(self, group) -> None:
original = create_test_data()
Expand Down

0 comments on commit 519f05e

Please sign in to comment.