Skip to content

Commit

Permalink
also apply combine_attrs to the attrs of the variables (pydata#4902)
Browse files Browse the repository at this point in the history
  • Loading branch information
keewis authored May 5, 2021
1 parent 7432032 commit bd4650c
Show file tree
Hide file tree
Showing 8 changed files with 264 additions and 50 deletions.
8 changes: 7 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ v0.17.1 (unreleased)

New Features
~~~~~~~~~~~~

- apply ``combine_attrs`` on data variables and coordinate variables when concatenating
and merging datasets and dataarrays (:pull:`4902`).
By `Justus Magin <https://github.com/keewis>`_.
- Add :py:meth:`Dataset.to_pandas` (:pull:`5247`)
By `Giacomo Caria <https://github.com/gcaria>`_.
- Add :py:meth:`DataArray.plot.surface` which wraps matplotlib's `plot_surface` to make
Expand Down Expand Up @@ -144,6 +146,10 @@ Breaking changes
``ds.coarsen(...).mean(keep_attrs=False)`` instead of ``ds.coarsen(..., keep_attrs=False).mean()``.
Further, coarsen now keeps attributes per default (:pull:`5227`).
By `Mathias Hauser <https://github.com/mathause>`_.
- switch the default of the :py:func:`merge` ``combine_attrs`` parameter to
``"override"``. This will keep the current behavior for merging the ``attrs`` of
variables but stop dropping the ``attrs`` of the main objects (:pull:`4902`).
By `Justus Magin <https://github.com/keewis>`_.

Deprecations
~~~~~~~~~~~~
Expand Down
4 changes: 2 additions & 2 deletions xarray/core/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ def ensure_common_dims(vars):
vars = ensure_common_dims([ds[k].variable for ds in datasets])
except KeyError:
raise ValueError("%r is not present in all datasets." % k)
combined = concat_vars(vars, dim, positions)
combined = concat_vars(vars, dim, positions, combine_attrs=combine_attrs)
assert isinstance(combined, Variable)
result_vars[k] = combined
elif k in result_vars:
Expand Down Expand Up @@ -572,7 +572,7 @@ def _dataarray_concat(
positions,
fill_value=fill_value,
join=join,
combine_attrs="drop",
combine_attrs=combine_attrs,
)

merged_attrs = merge_attrs([da.attrs for da in arrays], combine_attrs)
Expand Down
16 changes: 13 additions & 3 deletions xarray/core/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ def merge_collected(
grouped: Dict[Hashable, List[MergeElement]],
prioritized: Mapping[Hashable, MergeElement] = None,
compat: str = "minimal",
combine_attrs="override",
) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, pd.Index]]:
"""Merge dicts of variables, while resolving conflicts appropriately.
Expand Down Expand Up @@ -222,11 +223,18 @@ def merge_collected(
% (name, variable.attrs, other_variable.attrs)
)
merged_vars[name] = variable
merged_vars[name].attrs = merge_attrs(
[var.attrs for var, _ in indexed_elements],
combine_attrs=combine_attrs,
)
merged_indexes[name] = index
else:
variables = [variable for variable, _ in elements_list]
try:
merged_vars[name] = unique_variable(name, variables, compat)
merged_vars[name].attrs = merge_attrs(
[var.attrs for var in variables], combine_attrs=combine_attrs
)
except MergeError:
if compat != "minimal":
# we need more than "minimal" compatibility (for which
Expand Down Expand Up @@ -613,7 +621,9 @@ def merge_core(
collected = collect_variables_and_indexes(aligned)

prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat)
variables, out_indexes = merge_collected(collected, prioritized, compat=compat)
variables, out_indexes = merge_collected(
collected, prioritized, compat=compat, combine_attrs=combine_attrs
)
assert_unique_multiindex_level_names(variables)

dims = calculate_dimensions(variables)
Expand Down Expand Up @@ -649,7 +659,7 @@ def merge(
compat: str = "no_conflicts",
join: str = "outer",
fill_value: object = dtypes.NA,
combine_attrs: str = "drop",
combine_attrs: str = "override",
) -> "Dataset":
"""Merge any number of xarray objects into a single Dataset as variables.
Expand Down Expand Up @@ -688,7 +698,7 @@ def merge(
variable names to fill values. Use a data array's name to
refer to its values.
combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \
"override"}, default: "drop"
"override"}, default: "override"
String indicating how to combine attrs of the objects being merged:
- "drop": empty attrs on returned Dataset.
Expand Down
67 changes: 59 additions & 8 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1771,7 +1771,14 @@ def reduce(
return Variable(dims, data, attrs=attrs)

@classmethod
def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False):
def concat(
cls,
variables,
dim="concat_dim",
positions=None,
shortcut=False,
combine_attrs="override",
):
"""Concatenate variables along a new or existing dimension.
Parameters
Expand All @@ -1794,13 +1801,27 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False):
This option is used internally to speed-up groupby operations.
If `shortcut` is True, some checks of internal consistency between
arrays to concatenate are skipped.
combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \
"override"}, default: "override"
String indicating how to combine attrs of the objects being merged:
- "drop": empty attrs on returned Dataset.
- "identical": all attrs must be the same on every object.
- "no_conflicts": attrs from all objects are combined, any that have
the same name must also have the same value.
- "drop_conflicts": attrs from all objects are combined, any that have
the same name but different values are dropped.
- "override": skip comparing and copy attrs from the first dataset to
the result.
Returns
-------
stacked : Variable
Concatenated Variable formed by stacking all the supplied variables
along the given dimension.
"""
from .merge import merge_attrs

if not isinstance(dim, str):
(dim,) = dim.dims

Expand All @@ -1825,7 +1846,9 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False):
dims = (dim,) + first_var.dims
data = duck_array_ops.stack(arrays, axis=axis)

attrs = dict(first_var.attrs)
attrs = merge_attrs(
[var.attrs for var in variables], combine_attrs=combine_attrs
)
encoding = dict(first_var.encoding)
if not shortcut:
for var in variables:
Expand Down Expand Up @@ -2581,12 +2604,21 @@ def __setitem__(self, key, value):
raise TypeError("%s values cannot be modified" % type(self).__name__)

@classmethod
def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False):
def concat(
cls,
variables,
dim="concat_dim",
positions=None,
shortcut=False,
combine_attrs="override",
):
"""Specialized version of Variable.concat for IndexVariable objects.
This exists because we want to avoid converting Index objects to NumPy
arrays, if possible.
"""
from .merge import merge_attrs

if not isinstance(dim, str):
(dim,) = dim.dims

Expand All @@ -2613,12 +2645,13 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False):
# keep as str if possible as pandas.Index uses object (converts to numpy array)
data = maybe_coerce_to_str(data, variables)

attrs = dict(first_var.attrs)
attrs = merge_attrs(
[var.attrs for var in variables], combine_attrs=combine_attrs
)
if not shortcut:
for var in variables:
if var.dims != first_var.dims:
raise ValueError("inconsistent dimensions")
utils.remove_incompatible_items(attrs, var.attrs)

return cls(first_var.dims, data, attrs)

Expand Down Expand Up @@ -2792,7 +2825,13 @@ def _broadcast_compat_data(self, other):
return self_data, other_data, dims


def concat(variables, dim="concat_dim", positions=None, shortcut=False):
def concat(
variables,
dim="concat_dim",
positions=None,
shortcut=False,
combine_attrs="override",
):
"""Concatenate variables along a new or existing dimension.
Parameters
Expand All @@ -2815,6 +2854,18 @@ def concat(variables, dim="concat_dim", positions=None, shortcut=False):
This option is used internally to speed-up groupby operations.
If `shortcut` is True, some checks of internal consistency between
arrays to concatenate are skipped.
combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \
"override"}, default: "override"
String indicating how to combine attrs of the objects being merged:
- "drop": empty attrs on returned Dataset.
- "identical": all attrs must be the same on every object.
- "no_conflicts": attrs from all objects are combined, any that have
the same name must also have the same value.
- "drop_conflicts": attrs from all objects are combined, any that have
the same name but different values are dropped.
- "override": skip comparing and copy attrs from the first dataset to
the result.
Returns
-------
Expand All @@ -2824,9 +2875,9 @@ def concat(variables, dim="concat_dim", positions=None, shortcut=False):
"""
variables = list(variables)
if all(isinstance(v, IndexVariable) for v in variables):
return IndexVariable.concat(variables, dim, positions, shortcut)
return IndexVariable.concat(variables, dim, positions, shortcut, combine_attrs)
else:
return Variable.concat(variables, dim, positions, shortcut)
return Variable.concat(variables, dim, positions, shortcut, combine_attrs)


def assert_unique_multiindex_level_names(variables):
Expand Down
Loading

0 comments on commit bd4650c

Please sign in to comment.