Skip to content

Commit

Permalink
Make concat more forgiving with variables that are being merged. (#3364)
Browse files Browse the repository at this point in the history
* Make concat more forgiving with variables that are being merged.

* rename test.

* simplify test.

* make diff smaller.
  • Loading branch information
dcherian authored and Joe Hamman committed Oct 14, 2019
1 parent ae1d8c7 commit 4f5ca73
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 18 deletions.
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ Bug fixes
- Line plots with the ``x`` or ``y`` argument set to a 1D non-dimensional coord
now plot the correct data for 2D DataArrays
(:issue:`3334`). By `Tom Nicholas <http://github.com/TomNicholas>`_.
- Make :py:func:`~xarray.concat` more robust when merging variables present in some datasets but
not others (:issue:`508`). By `Deepak Cherian <http://github.com/dcherian>`_.
- The default behaviour of reducing across all dimensions for
:py:class:`~xarray.core.groupby.DataArrayGroupBy` objects has now been properly removed
as was done for :py:class:`~xarray.core.groupby.DatasetGroupBy` in 0.13.0 (:issue:`3337`).
Expand Down
10 changes: 2 additions & 8 deletions xarray/core/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,15 +312,9 @@ def _dataset_concat(
to_merge = {var: [] for var in variables_to_merge}

for ds in datasets:
absent_merge_vars = variables_to_merge - set(ds.variables)
if absent_merge_vars:
raise ValueError(
"variables %r are present in some datasets but not others. "
% absent_merge_vars
)

for var in variables_to_merge:
to_merge[var].append(ds.variables[var])
if var in ds:
to_merge[var].append(ds.variables[var])

for var in variables_to_merge:
result_vars[var] = unique_variable(
Expand Down
9 changes: 4 additions & 5 deletions xarray/tests/test_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -782,12 +782,11 @@ def test_auto_combine_previously_failed(self):
actual = auto_combine(datasets, concat_dim="t")
assert_identical(expected, actual)

def test_auto_combine_still_fails(self):
# concat can't handle new variables (yet):
# https://github.com/pydata/xarray/issues/508
def test_auto_combine_with_new_variables(self):
datasets = [Dataset({"x": 0}, {"y": 0}), Dataset({"x": 1}, {"y": 1, "z": 1})]
with pytest.raises(ValueError):
auto_combine(datasets, "y")
actual = auto_combine(datasets, "y")
expected = Dataset({"x": ("y", [0, 1])}, {"y": [0, 1], "z": 1})
assert_identical(expected, actual)

def test_auto_combine_no_concat(self):
objs = [Dataset({"x": 0}), Dataset({"y": 1})]
Expand Down
21 changes: 16 additions & 5 deletions xarray/tests/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,22 @@ def test_concat_simple(self, data, dim, coords):
datasets = [g for _, g in data.groupby(dim, squeeze=False)]
assert_identical(data, concat(datasets, dim, coords=coords))

def test_concat_merge_variables_present_in_some_datasets(self, data):
# coordinates present in some datasets but not others
ds1 = Dataset(data_vars={"a": ("y", [0.1])}, coords={"x": 0.1})
ds2 = Dataset(data_vars={"a": ("y", [0.2])}, coords={"z": 0.2})
actual = concat([ds1, ds2], dim="y", coords="minimal")
expected = Dataset({"a": ("y", [0.1, 0.2])}, coords={"x": 0.1, "z": 0.2})
assert_identical(expected, actual)

# data variables present in some datasets but not others
split_data = [data.isel(dim1=slice(3)), data.isel(dim1=slice(3, None))]
data0, data1 = deepcopy(split_data)
data1["foo"] = ("bar", np.random.randn(10))
actual = concat([data0, data1], "dim1")
expected = data.copy().assign(foo=data1.foo)
assert_identical(expected, actual)

def test_concat_2(self, data):
dim = "dim2"
datasets = [g for _, g in data.groupby(dim, squeeze=True)]
Expand Down Expand Up @@ -190,11 +206,6 @@ def test_concat_errors(self):
concat([data0, data1], "dim1", compat="identical")
assert_identical(data, concat([data0, data1], "dim1", compat="equals"))

with raises_regex(ValueError, "present in some datasets"):
data0, data1 = deepcopy(split_data)
data1["foo"] = ("bar", np.random.randn(10))
concat([data0, data1], "dim1")

with raises_regex(ValueError, "compat.* invalid"):
concat(split_data, "dim1", compat="foobar")

Expand Down

0 comments on commit 4f5ca73

Please sign in to comment.