From 3cbd21aa8fd3a57c0dd324f2a276d83829518331 Mon Sep 17 00:00:00 2001 From: keewis Date: Sat, 3 Apr 2021 17:43:13 +0200 Subject: [PATCH] add a combine_attrs option to open_mfdataset (#4971) * expose combine_attrs to open_mfdataset * add tests for passing combine_attrs to open_mfdataset * don't override the main dataset attrs * switch the default to "override" which seems closer to current behavior * update whats-new.rst [skip-ci] --- doc/whats-new.rst | 2 ++ xarray/backends/api.py | 7 ++--- xarray/tests/test_backends.py | 54 +++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index fa8fd49e80d..bfbf94e67c1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -64,6 +64,8 @@ New Features :py:class:`~core.groupby.DataArrayGroupBy`, inspired by pandas' :py:meth:`~pandas.core.groupby.GroupBy.get_group`. By `Deepak Cherian `_. +- Add a ``combine_attrs`` parameter to :py:func:`open_mfdataset` (:pull:`4971`). + By `Justus Magin `_. - Disable the `cfgrib` backend if the `eccodes` library is not installed (:pull:`5083`). By `Baudouin Raoult `_. - Added :py:meth:`DataArray.curvefit` and :py:meth:`Dataset.curvefit` for general curve fitting applications. (:issue:`4300`, :pull:`4849`) By `Sam Levang `_. diff --git a/xarray/backends/api.py b/xarray/backends/api.py index b2bb928cb90..ab6ea3b1631 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -717,6 +717,7 @@ def open_mfdataset( parallel=False, join="outer", attrs_file=None, + combine_attrs="override", **kwargs, ): """Open multiple files as a single dataset. @@ -931,7 +932,7 @@ def open_mfdataset( coords=coords, ids=ids, join=join, - combine_attrs="drop", + combine_attrs=combine_attrs, ) elif combine == "by_coords": # Redo ordering from coordinates, ignoring how they were ordered @@ -942,7 +943,7 @@ def open_mfdataset( data_vars=data_vars, coords=coords, join=join, - combine_attrs="drop", + combine_attrs=combine_attrs, ) else: raise ValueError( @@ -965,8 +966,6 @@ def multi_file_closer(): if isinstance(attrs_file, Path): attrs_file = str(attrs_file) combined.attrs = datasets[paths.index(attrs_file)].attrs - else: - combined.attrs = datasets[0].attrs return combined diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index f6c00a2a9a9..33de0a98edb 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3004,6 +3004,60 @@ def test_open_mfdataset_does_same_as_concat(self, combine, opt, join): ds_expect = xr.concat([ds1, ds2], data_vars=opt, dim="t", join=join) assert_identical(ds, ds_expect) + @pytest.mark.parametrize( + ["combine_attrs", "attrs", "expected", "expect_error"], + ( + pytest.param("drop", [{"a": 1}, {"a": 2}], {}, False, id="drop"), + pytest.param( + "override", [{"a": 1}, {"a": 2}], {"a": 1}, False, id="override" + ), + pytest.param( + "no_conflicts", [{"a": 1}, {"a": 2}], None, True, id="no_conflicts" + ), + pytest.param( + "identical", + [{"a": 1, "b": 2}, {"a": 1, "c": 3}], + None, + True, + id="identical", + ), + pytest.param( + "drop_conflicts", + [{"a": 1, "b": 2}, {"b": -1, "c": 3}], + {"a": 1, "c": 3}, + False, + id="drop_conflicts", + ), + ), + ) + def test_open_mfdataset_dataset_combine_attrs( + self, combine_attrs, attrs, expected, expect_error + ): + with self.setup_files_and_datasets() as (files, [ds1, ds2]): + # Give the files an inconsistent attribute + for i, f in enumerate(files): + ds = open_dataset(f).load() + ds.attrs = attrs[i] + ds.close() + ds.to_netcdf(f) + + if expect_error: + with pytest.raises(xr.MergeError): + xr.open_mfdataset( + files, + combine="by_coords", + concat_dim="t", + combine_attrs=combine_attrs, + ) + else: + with xr.open_mfdataset( + files, + combine="by_coords", + concat_dim="t", + combine_attrs=combine_attrs, + ) as ds: + assert ds.attrs == expected + def test_open_mfdataset_dataset_attr_by_coords(self): """ Case when an attribute differs across the multiple files