Skip to content

Commit

Permalink
add a combine_attrs option to open_mfdataset (pydata#4971)
Browse files Browse the repository at this point in the history
* expose combine_attrs to open_mfdataset

* add tests for passing combine_attrs to open_mfdataset

* don't override the main dataset attrs

* switch the default to "override" which seems closer to current behavior

* update whats-new.rst [skip-ci]
  • Loading branch information
keewis authored Apr 3, 2021
1 parent c1272b5 commit 3cbd21a
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 4 deletions.
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ New Features
:py:class:`~core.groupby.DataArrayGroupBy`, inspired by pandas'
:py:meth:`~pandas.core.groupby.GroupBy.get_group`.
By `Deepak Cherian <https://github.com/dcherian>`_.
- Add a ``combine_attrs`` parameter to :py:func:`open_mfdataset` (:pull:`4971`).
By `Justus Magin <https://github.com/keewis>`_.
- Disable the `cfgrib` backend if the `eccodes` library is not installed (:pull:`5083`). By `Baudouin Raoult <https://github.com/b8raoult>`_.
- Added :py:meth:`DataArray.curvefit` and :py:meth:`Dataset.curvefit` for general curve fitting applications. (:issue:`4300`, :pull:`4849`)
By `Sam Levang <https://github.com/slevang>`_.
Expand Down
7 changes: 3 additions & 4 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,7 @@ def open_mfdataset(
parallel=False,
join="outer",
attrs_file=None,
combine_attrs="override",
**kwargs,
):
"""Open multiple files as a single dataset.
Expand Down Expand Up @@ -931,7 +932,7 @@ def open_mfdataset(
coords=coords,
ids=ids,
join=join,
combine_attrs="drop",
combine_attrs=combine_attrs,
)
elif combine == "by_coords":
# Redo ordering from coordinates, ignoring how they were ordered
Expand All @@ -942,7 +943,7 @@ def open_mfdataset(
data_vars=data_vars,
coords=coords,
join=join,
combine_attrs="drop",
combine_attrs=combine_attrs,
)
else:
raise ValueError(
Expand All @@ -965,8 +966,6 @@ def multi_file_closer():
if isinstance(attrs_file, Path):
attrs_file = str(attrs_file)
combined.attrs = datasets[paths.index(attrs_file)].attrs
else:
combined.attrs = datasets[0].attrs

return combined

Expand Down
54 changes: 54 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -3004,6 +3004,60 @@ def test_open_mfdataset_does_same_as_concat(self, combine, opt, join):
ds_expect = xr.concat([ds1, ds2], data_vars=opt, dim="t", join=join)
assert_identical(ds, ds_expect)

@pytest.mark.parametrize(
["combine_attrs", "attrs", "expected", "expect_error"],
(
pytest.param("drop", [{"a": 1}, {"a": 2}], {}, False, id="drop"),
pytest.param(
"override", [{"a": 1}, {"a": 2}], {"a": 1}, False, id="override"
),
pytest.param(
"no_conflicts", [{"a": 1}, {"a": 2}], None, True, id="no_conflicts"
),
pytest.param(
"identical",
[{"a": 1, "b": 2}, {"a": 1, "c": 3}],
None,
True,
id="identical",
),
pytest.param(
"drop_conflicts",
[{"a": 1, "b": 2}, {"b": -1, "c": 3}],
{"a": 1, "c": 3},
False,
id="drop_conflicts",
),
),
)
def test_open_mfdataset_dataset_combine_attrs(
self, combine_attrs, attrs, expected, expect_error
):
with self.setup_files_and_datasets() as (files, [ds1, ds2]):
# Give the files an inconsistent attribute
for i, f in enumerate(files):
ds = open_dataset(f).load()
ds.attrs = attrs[i]
ds.close()
ds.to_netcdf(f)

if expect_error:
with pytest.raises(xr.MergeError):
xr.open_mfdataset(
files,
combine="by_coords",
concat_dim="t",
combine_attrs=combine_attrs,
)
else:
with xr.open_mfdataset(
files,
combine="by_coords",
concat_dim="t",
combine_attrs=combine_attrs,
) as ds:
assert ds.attrs == expected

def test_open_mfdataset_dataset_attr_by_coords(self):
"""
Case when an attribute differs across the multiple files
Expand Down

0 comments on commit 3cbd21a

Please sign in to comment.