Skip to content

Commit c93b31a

Browse files
rabernatdcherianandersy005
authored
Add mode='a-': Do not overwrite coordinates when appending to Zarr with append_dim (#8428)
Co-authored-by: Deepak Cherian <deepak@cherian.net> Co-authored-by: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com>
1 parent 1715ed3 commit c93b31a

File tree

7 files changed

+78
-27
lines changed

7 files changed

+78
-27
lines changed

doc/whats-new.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ New Features
2525

2626
- Use a concise format when plotting datetime arrays. (:pull:`8449`).
2727
By `Jimmy Westling <https://github.com/illviljan>`_.
28-
29-
28+
- Avoid overwriting unchanged existing coordinate variables when appending by setting ``mode='a-'``.
29+
By `Ryan Abernathey <https://github.com/rabernat>`_ and `Deepak Cherian <https://github.com/dcherian>`_.
3030
- :py:meth:`~xarray.DataArray.rank` now operates on dask-backed arrays, assuming
3131
the core dim has exactly one chunk. (:pull:`8475`).
3232
By `Maximilian Roos <https://github.com/max-sixty>`_.

xarray/backends/api.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
from xarray.core.dataset import Dataset, _get_chunk, _maybe_chunk
4040
from xarray.core.indexes import Index
4141
from xarray.core.parallelcompat import guess_chunkmanager
42+
from xarray.core.types import ZarrWriteModes
4243
from xarray.core.utils import is_remote_uri
4344

4445
if TYPE_CHECKING:
@@ -69,7 +70,6 @@
6970
"NETCDF4", "NETCDF4_CLASSIC", "NETCDF3_64BIT", "NETCDF3_CLASSIC"
7071
]
7172

72-
7373
DATAARRAY_NAME = "__xarray_dataarray_name__"
7474
DATAARRAY_VARIABLE = "__xarray_dataarray_variable__"
7575

@@ -1577,7 +1577,7 @@ def to_zarr(
15771577
dataset: Dataset,
15781578
store: MutableMapping | str | os.PathLike[str] | None = None,
15791579
chunk_store: MutableMapping | str | os.PathLike | None = None,
1580-
mode: Literal["w", "w-", "a", "r+", None] = None,
1580+
mode: ZarrWriteModes | None = None,
15811581
synchronizer=None,
15821582
group: str | None = None,
15831583
encoding: Mapping | None = None,
@@ -1601,7 +1601,7 @@ def to_zarr(
16011601
dataset: Dataset,
16021602
store: MutableMapping | str | os.PathLike[str] | None = None,
16031603
chunk_store: MutableMapping | str | os.PathLike | None = None,
1604-
mode: Literal["w", "w-", "a", "r+", None] = None,
1604+
mode: ZarrWriteModes | None = None,
16051605
synchronizer=None,
16061606
group: str | None = None,
16071607
encoding: Mapping | None = None,
@@ -1623,7 +1623,7 @@ def to_zarr(
16231623
dataset: Dataset,
16241624
store: MutableMapping | str | os.PathLike[str] | None = None,
16251625
chunk_store: MutableMapping | str | os.PathLike | None = None,
1626-
mode: Literal["w", "w-", "a", "r+", None] = None,
1626+
mode: ZarrWriteModes | None = None,
16271627
synchronizer=None,
16281628
group: str | None = None,
16291629
encoding: Mapping | None = None,
@@ -1680,16 +1680,18 @@ def to_zarr(
16801680
else:
16811681
mode = "w-"
16821682

1683-
if mode != "a" and append_dim is not None:
1683+
if mode not in ["a", "a-"] and append_dim is not None:
16841684
raise ValueError("cannot set append_dim unless mode='a' or mode=None")
16851685

1686-
if mode not in ["a", "r+"] and region is not None:
1687-
raise ValueError("cannot set region unless mode='a', mode='r+' or mode=None")
1686+
if mode not in ["a", "a-", "r+"] and region is not None:
1687+
raise ValueError(
1688+
"cannot set region unless mode='a', mode='a-', mode='r+' or mode=None"
1689+
)
16881690

1689-
if mode not in ["w", "w-", "a", "r+"]:
1691+
if mode not in ["w", "w-", "a", "a-", "r+"]:
16901692
raise ValueError(
16911693
"The only supported options for mode are 'w', "
1692-
f"'w-', 'a' and 'r+', but mode={mode!r}"
1694+
f"'w-', 'a', 'a-', and 'r+', but mode={mode!r}"
16931695
)
16941696

16951697
# validate Dataset keys, DataArray names
@@ -1745,7 +1747,7 @@ def to_zarr(
17451747
write_empty=write_empty_chunks,
17461748
)
17471749

1748-
if mode in ["a", "r+"]:
1750+
if mode in ["a", "a-", "r+"]:
17491751
_validate_datatypes_for_zarr_append(zstore, dataset)
17501752
if append_dim is not None:
17511753
existing_dims = zstore.get_dimensions()

xarray/backends/zarr.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from xarray.core import indexing
2222
from xarray.core.parallelcompat import guess_chunkmanager
2323
from xarray.core.pycompat import integer_types
24+
from xarray.core.types import ZarrWriteModes
2425
from xarray.core.utils import (
2526
FrozenDict,
2627
HiddenKeyDict,
@@ -385,7 +386,7 @@ class ZarrStore(AbstractWritableDataStore):
385386
def open_group(
386387
cls,
387388
store,
388-
mode="r",
389+
mode: ZarrWriteModes = "r",
389390
synchronizer=None,
390391
group=None,
391392
consolidated=False,
@@ -410,7 +411,8 @@ def open_group(
410411
zarr_version = getattr(store, "_store_version", 2)
411412

412413
open_kwargs = dict(
413-
mode=mode,
414+
# mode='a-' is a handcrafted xarray specialty
415+
mode="a" if mode == "a-" else mode,
414416
synchronizer=synchronizer,
415417
path=group,
416418
)
@@ -639,8 +641,21 @@ def store(
639641
self.set_attributes(attributes)
640642
self.set_dimensions(variables_encoded, unlimited_dims=unlimited_dims)
641643

644+
# if we are appending to an append_dim, only write either
645+
# - new variables not already present, OR
646+
# - variables with the append_dim in their dimensions
647+
# We do NOT overwrite other variables.
648+
if self._mode == "a-" and self._append_dim is not None:
649+
variables_to_set = {
650+
k: v
651+
for k, v in variables_encoded.items()
652+
if (k not in existing_variable_names) or (self._append_dim in v.dims)
653+
}
654+
else:
655+
variables_to_set = variables_encoded
656+
642657
self.set_variables(
643-
variables_encoded, check_encoding_set, writer, unlimited_dims=unlimited_dims
658+
variables_to_set, check_encoding_set, writer, unlimited_dims=unlimited_dims
644659
)
645660
if self._consolidate_on_close:
646661
zarr.consolidate_metadata(self.zarr_group.store)

xarray/core/dataarray.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,12 @@
4949
from xarray.core.indexing import is_fancy_indexer, map_index_queries
5050
from xarray.core.merge import PANDAS_TYPES, MergeError
5151
from xarray.core.options import OPTIONS, _get_keep_attrs
52-
from xarray.core.types import DaCompatible, T_DataArray, T_DataArrayOrSet
52+
from xarray.core.types import (
53+
DaCompatible,
54+
T_DataArray,
55+
T_DataArrayOrSet,
56+
ZarrWriteModes,
57+
)
5358
from xarray.core.utils import (
5459
Default,
5560
HybridMappingProxy,
@@ -4074,7 +4079,7 @@ def to_zarr(
40744079
self,
40754080
store: MutableMapping | str | PathLike[str] | None = None,
40764081
chunk_store: MutableMapping | str | PathLike | None = None,
4077-
mode: Literal["w", "w-", "a", "r+", None] = None,
4082+
mode: ZarrWriteModes | None = None,
40784083
synchronizer=None,
40794084
group: str | None = None,
40804085
*,
@@ -4095,7 +4100,7 @@ def to_zarr(
40954100
self,
40964101
store: MutableMapping | str | PathLike[str] | None = None,
40974102
chunk_store: MutableMapping | str | PathLike | None = None,
4098-
mode: Literal["w", "w-", "a", "r+", None] = None,
4103+
mode: ZarrWriteModes | None = None,
40994104
synchronizer=None,
41004105
group: str | None = None,
41014106
encoding: Mapping | None = None,
@@ -4114,7 +4119,7 @@ def to_zarr(
41144119
self,
41154120
store: MutableMapping | str | PathLike[str] | None = None,
41164121
chunk_store: MutableMapping | str | PathLike | None = None,
4117-
mode: Literal["w", "w-", "a", "r+", None] = None,
4122+
mode: ZarrWriteModes | None = None,
41184123
synchronizer=None,
41194124
group: str | None = None,
41204125
encoding: Mapping | None = None,
@@ -4150,10 +4155,11 @@ def to_zarr(
41504155
chunk_store : MutableMapping, str or path-like, optional
41514156
Store or path to directory in local or remote file system only for Zarr
41524157
array chunks. Requires zarr-python v2.4.0 or later.
4153-
mode : {"w", "w-", "a", "r+", None}, optional
4158+
mode : {"w", "w-", "a", "a-", r+", None}, optional
41544159
Persistence mode: "w" means create (overwrite if exists);
41554160
"w-" means create (fail if exists);
4156-
"a" means override existing variables (create if does not exist);
4161+
"a" means override all existing variables including dimension coordinates (create if does not exist);
4162+
"a-" means only append those variables that have ``append_dim``.
41574163
"r+" means modify existing array *values* only (raise an error if
41584164
any metadata or shapes would change).
41594165
The default mode is "a" if ``append_dim`` is set. Otherwise, it is

xarray/core/dataset.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@
100100
T_Chunks,
101101
T_DataArrayOrSet,
102102
T_Dataset,
103+
ZarrWriteModes,
103104
)
104105
from xarray.core.utils import (
105106
Default,
@@ -2305,7 +2306,7 @@ def to_zarr(
23052306
self,
23062307
store: MutableMapping | str | PathLike[str] | None = None,
23072308
chunk_store: MutableMapping | str | PathLike | None = None,
2308-
mode: Literal["w", "w-", "a", "r+", None] = None,
2309+
mode: ZarrWriteModes | None = None,
23092310
synchronizer=None,
23102311
group: str | None = None,
23112312
encoding: Mapping | None = None,
@@ -2328,7 +2329,7 @@ def to_zarr(
23282329
self,
23292330
store: MutableMapping | str | PathLike[str] | None = None,
23302331
chunk_store: MutableMapping | str | PathLike | None = None,
2331-
mode: Literal["w", "w-", "a", "r+", None] = None,
2332+
mode: ZarrWriteModes | None = None,
23322333
synchronizer=None,
23332334
group: str | None = None,
23342335
encoding: Mapping | None = None,
@@ -2349,7 +2350,7 @@ def to_zarr(
23492350
self,
23502351
store: MutableMapping | str | PathLike[str] | None = None,
23512352
chunk_store: MutableMapping | str | PathLike | None = None,
2352-
mode: Literal["w", "w-", "a", "r+", None] = None,
2353+
mode: ZarrWriteModes | None = None,
23532354
synchronizer=None,
23542355
group: str | None = None,
23552356
encoding: Mapping | None = None,
@@ -2387,10 +2388,11 @@ def to_zarr(
23872388
chunk_store : MutableMapping, str or path-like, optional
23882389
Store or path to directory in local or remote file system only for Zarr
23892390
array chunks. Requires zarr-python v2.4.0 or later.
2390-
mode : {"w", "w-", "a", "r+", None}, optional
2391+
mode : {"w", "w-", "a", "a-", r+", None}, optional
23912392
Persistence mode: "w" means create (overwrite if exists);
23922393
"w-" means create (fail if exists);
2393-
"a" means override existing variables (create if does not exist);
2394+
"a" means override all existing variables including dimension coordinates (create if does not exist);
2395+
"a-" means only append those variables that have ``append_dim``.
23942396
"r+" means modify existing array *values* only (raise an error if
23952397
any metadata or shapes would change).
23962398
The default mode is "a" if ``append_dim`` is set. Otherwise, it is

xarray/core/types.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,3 +282,6 @@ def copy(
282282
"midpoint",
283283
"nearest",
284284
]
285+
286+
287+
ZarrWriteModes = Literal["w", "w-", "a", "a-", "r+", "r"]

xarray/tests/test_backends.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2390,6 +2390,29 @@ def test_append_with_new_variable(self) -> None:
23902390
xr.open_dataset(store_target, engine="zarr", **self.version_kwargs),
23912391
)
23922392

2393+
def test_append_with_append_dim_no_overwrite(self) -> None:
2394+
ds, ds_to_append, _ = create_append_test_data()
2395+
with self.create_zarr_target() as store_target:
2396+
ds.to_zarr(store_target, mode="w", **self.version_kwargs)
2397+
original = xr.concat([ds, ds_to_append], dim="time")
2398+
original2 = xr.concat([original, ds_to_append], dim="time")
2399+
2400+
# overwrite a coordinate;
2401+
# for mode='a-', this will not get written to the store
2402+
# because it does not have the append_dim as a dim
2403+
ds_to_append.lon.data[:] = -999
2404+
ds_to_append.to_zarr(
2405+
store_target, mode="a-", append_dim="time", **self.version_kwargs
2406+
)
2407+
actual = xr.open_dataset(store_target, engine="zarr", **self.version_kwargs)
2408+
assert_identical(original, actual)
2409+
2410+
# by default, mode="a" will overwrite all coordinates.
2411+
ds_to_append.to_zarr(store_target, append_dim="time", **self.version_kwargs)
2412+
actual = xr.open_dataset(store_target, engine="zarr", **self.version_kwargs)
2413+
original2.lon.data[:] = -999
2414+
assert_identical(original2, actual)
2415+
23932416
@requires_dask
23942417
def test_to_zarr_compute_false_roundtrip(self) -> None:
23952418
from dask.delayed import Delayed
@@ -2586,7 +2609,7 @@ def setup_and_verify_store(expected=data):
25862609
with pytest.raises(
25872610
ValueError,
25882611
match=re.escape(
2589-
"cannot set region unless mode='a', mode='r+' or mode=None"
2612+
"cannot set region unless mode='a', mode='a-', mode='r+' or mode=None"
25902613
),
25912614
):
25922615
data.to_zarr(

0 commit comments

Comments
 (0)