diff --git a/doc/faq.rst b/doc/faq.rst index 9313481f50a..44bc021024b 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -119,7 +119,8 @@ conventions`_. (An exception is serialization to and from netCDF files.) An implication of this choice is that we do not propagate ``attrs`` through most operations unless explicitly flagged (some methods have a ``keep_attrs`` -option). Similarly, xarray does not check for conflicts between ``attrs`` when +option, and there is a global flag for setting this to be always True or +False). Similarly, xarray does not check for conflicts between ``attrs`` when combining arrays and datasets, unless explicitly requested with the option ``compat='identical'``. The guiding principle is that metadata should not be allowed to get in the way. diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e02076120a6..67a10a709db 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -33,11 +33,14 @@ v0.11.0 (unreleased) Breaking changes ~~~~~~~~~~~~~~~~ -- ``Dataset.T`` has been removed as a shortcut for :py:meth:`Dataset.transpose`. - Call :py:meth:`Dataset.transpose` directly instead. -- Iterating over a ``Dataset`` now includes only data variables, not coordinates. - Similarily, calling ``len`` and ``bool`` on a ``Dataset`` now - includes only data variables +- Finished deprecation cycles: + - ``Dataset.T`` has been removed as a shortcut for :py:meth:`Dataset.transpose`. + Call :py:meth:`Dataset.transpose` directly instead. + - Iterating over a ``Dataset`` now includes only data variables, not coordinates. + Similarily, calling ``len`` and ``bool`` on a ``Dataset`` now + includes only data variables. + - ``DataArray.__contains__`` (used by Python's ``in`` operator) now checks + array data, not coordinates. - Xarray's storage backends now automatically open and close files when necessary, rather than requiring opening a file with ``autoclose=True``. A global least-recently-used cache is used to store open files; the default @@ -82,7 +85,12 @@ Enhancements :py:meth:`~xarray.Dataset.differentiate`, :py:meth:`~xarray.DataArray.interp`, and :py:meth:`~xarray.Dataset.interp`. - By `Spencer Clark `_. + By `Spencer Clark `_ +- There is now a global option to either always keep or always discard + dataset and dataarray attrs upon operations. The option is set with + ``xarray.set_options(keep_attrs=True)``, and the default is to use the old + behaviour. + By `Tom Nicholas `_. - Added a new backend for the GRIB file format based on ECMWF *cfgrib* python driver and *ecCodes* C-library. (:issue:`2475`) By `Alessandro Amici `_, @@ -126,6 +134,10 @@ Bug fixes By `Spencer Clark `_. - Avoid use of Dask's deprecated ``get=`` parameter in tests by `Matthew Rocklin `_. +- An ``OverflowError`` is now accurately raised and caught during the + encoding process if a reference date is used that is so distant that + the dates must be encoded using cftime rather than NumPy (:issue:`2272`). + By `Spencer Clark `_. .. _whats-new.0.10.9: diff --git a/xarray/coding/times.py b/xarray/coding/times.py index dff7e75bdcf..16380976def 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -361,7 +361,12 @@ def encode_cf_datetime(dates, units=None, calendar=None): delta_units = _netcdf_to_numpy_timeunit(delta) time_delta = np.timedelta64(1, delta_units).astype('timedelta64[ns]') ref_date = np.datetime64(pd.Timestamp(ref_date)) - num = (dates - ref_date) / time_delta + + # Wrap the dates in a DatetimeIndex to do the subtraction to ensure + # an OverflowError is raised if the ref_date is too far away from + # dates to be encoded (GH 2272). + num = (pd.DatetimeIndex(dates.ravel()) - ref_date) / time_delta + num = num.values.reshape(dates.shape) except (OutOfBoundsDatetime, OverflowError): num = _encode_datetime_with_cftime(dates, units, calendar) diff --git a/xarray/core/common.py b/xarray/core/common.py index 6c03775a5dd..e303c485523 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -11,6 +11,7 @@ from .arithmetic import SupportsArithmetic from .pycompat import OrderedDict, basestring, dask_array_type, suppress from .utils import Frozen, ReprObject, SortedKeysDict, either_dict_or_kwargs +from .options import _get_keep_attrs # Used as a sentinel value to indicate a all dimensions ALL_DIMS = ReprObject('') @@ -21,13 +22,13 @@ class ImplementsArrayReduce(object): def _reduce_method(cls, func, include_skipna, numeric_only): if include_skipna: def wrapped_func(self, dim=None, axis=None, skipna=None, - keep_attrs=False, **kwargs): - return self.reduce(func, dim, axis, keep_attrs=keep_attrs, + **kwargs): + return self.reduce(func, dim, axis, skipna=skipna, allow_lazy=True, **kwargs) else: - def wrapped_func(self, dim=None, axis=None, keep_attrs=False, + def wrapped_func(self, dim=None, axis=None, **kwargs): - return self.reduce(func, dim, axis, keep_attrs=keep_attrs, + return self.reduce(func, dim, axis, allow_lazy=True, **kwargs) return wrapped_func @@ -51,14 +52,14 @@ class ImplementsDatasetReduce(object): @classmethod def _reduce_method(cls, func, include_skipna, numeric_only): if include_skipna: - def wrapped_func(self, dim=None, keep_attrs=False, skipna=None, + def wrapped_func(self, dim=None, skipna=None, **kwargs): - return self.reduce(func, dim, keep_attrs, skipna=skipna, + return self.reduce(func, dim, skipna=skipna, numeric_only=numeric_only, allow_lazy=True, **kwargs) else: - def wrapped_func(self, dim=None, keep_attrs=False, **kwargs): - return self.reduce(func, dim, keep_attrs, + def wrapped_func(self, dim=None, **kwargs): + return self.reduce(func, dim, numeric_only=numeric_only, allow_lazy=True, **kwargs) return wrapped_func @@ -591,7 +592,7 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs): center=center) def resample(self, freq=None, dim=None, how=None, skipna=None, - closed=None, label=None, base=0, keep_attrs=False, **indexer): + closed=None, label=None, base=0, keep_attrs=None, **indexer): """Returns a Resample object for performing resampling operations. Handles both downsampling and upsampling. If any intervals contain no @@ -659,6 +660,9 @@ def resample(self, freq=None, dim=None, how=None, skipna=None, from .dataarray import DataArray from .resample import RESAMPLE_DIM + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + if dim is not None: if how is None: how = 'mean' diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index d1e0516d756..17af3cf2cd1 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -16,7 +16,7 @@ assert_coordinate_consistent, remap_label_indexers) from .dataset import Dataset, merge_indexes, split_indexes from .formatting import format_item -from .options import OPTIONS +from .options import OPTIONS, _get_keep_attrs from .pycompat import OrderedDict, basestring, iteritems, range, zip from .utils import ( _check_inplace, decode_numpy_dict_values, either_dict_or_kwargs, @@ -504,11 +504,7 @@ def _item_sources(self): LevelCoordinatesSource(self)] def __contains__(self, key): - warnings.warn( - 'xarray.DataArray.__contains__ currently checks membership in ' - 'DataArray.coords, but in xarray v0.11 will change to check ' - 'membership in array values.', FutureWarning, stacklevel=2) - return key in self._coords + return key in self.data @property def loc(self): @@ -1564,7 +1560,7 @@ def combine_first(self, other): """ return ops.fillna(self, other, join="outer") - def reduce(self, func, dim=None, axis=None, keep_attrs=False, **kwargs): + def reduce(self, func, dim=None, axis=None, keep_attrs=None, **kwargs): """Reduce this array by applying `func` along some dimension(s). Parameters @@ -1593,6 +1589,7 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False, **kwargs): DataArray with this object's array replaced with an array with summarized data and the indicated dimension(s) removed. """ + var = self.variable.reduce(func, dim, axis, keep_attrs, **kwargs) return self._replace_maybe_drop_dims(var) @@ -2275,7 +2272,7 @@ def sortby(self, variables, ascending=True): ds = self._to_temp_dataset().sortby(variables, ascending=ascending) return self._from_temp_dataset(ds) - def quantile(self, q, dim=None, interpolation='linear', keep_attrs=False): + def quantile(self, q, dim=None, interpolation='linear', keep_attrs=None): """Compute the qth quantile of the data along the specified dimension. Returns the qth quantiles(s) of the array elements. @@ -2321,7 +2318,7 @@ def quantile(self, q, dim=None, interpolation='linear', keep_attrs=False): q, dim=dim, keep_attrs=keep_attrs, interpolation=interpolation) return self._from_temp_dataset(ds) - def rank(self, dim, pct=False, keep_attrs=False): + def rank(self, dim, pct=False, keep_attrs=None): """Ranks the data. Equal values are assigned a rank that is the average of the ranks that @@ -2357,6 +2354,7 @@ def rank(self, dim, pct=False, keep_attrs=False): array([ 1., 2., 3.]) Dimensions without coordinates: x """ + ds = self._to_temp_dataset().rank(dim, pct=pct, keep_attrs=keep_attrs) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 35f3077a6d4..1f88ebaef70 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -28,7 +28,7 @@ from .merge import ( dataset_merge_method, dataset_update_method, merge_data_and_coords, merge_variables) -from .options import OPTIONS +from .options import OPTIONS, _get_keep_attrs from .pycompat import ( OrderedDict, basestring, dask_array_type, integer_types, iteritems, range) from .utils import ( @@ -2851,7 +2851,7 @@ def combine_first(self, other): out = ops.fillna(self, other, join="outer", dataset_join="outer") return out - def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False, + def reduce(self, func, dim=None, keep_attrs=None, numeric_only=False, allow_lazy=False, **kwargs): """Reduce this dataset by applying `func` along some dimension(s). @@ -2893,6 +2893,9 @@ def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False, raise ValueError('Dataset does not contain the dimensions: %s' % missing_dimensions) + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + variables = OrderedDict() for name, var in iteritems(self._variables): reduce_dims = [dim for dim in var.dims if dim in dims] @@ -2921,7 +2924,7 @@ def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False, attrs = self.attrs if keep_attrs else None return self._replace_vars_and_dims(variables, coord_names, attrs=attrs) - def apply(self, func, keep_attrs=False, args=(), **kwargs): + def apply(self, func, keep_attrs=None, args=(), **kwargs): """Apply a function over the data variables in this dataset. Parameters @@ -2966,6 +2969,8 @@ def apply(self, func, keep_attrs=False, args=(), **kwargs): variables = OrderedDict( (k, maybe_wrap_array(v, func(v, *args, **kwargs))) for k, v in iteritems(self.data_vars)) + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) attrs = self.attrs if keep_attrs else None return type(self)(variables, attrs=attrs) @@ -3630,7 +3635,7 @@ def sortby(self, variables, ascending=True): return aligned_self.isel(**indices) def quantile(self, q, dim=None, interpolation='linear', - numeric_only=False, keep_attrs=False): + numeric_only=False, keep_attrs=None): """Compute the qth quantile of the data along the specified dimension. Returns the qth quantiles(s) of the array elements for each variable @@ -3708,6 +3713,8 @@ def quantile(self, q, dim=None, interpolation='linear', # construct the new dataset coord_names = set(k for k in self.coords if k in variables) + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) attrs = self.attrs if keep_attrs else None new = self._replace_vars_and_dims(variables, coord_names, attrs=attrs) if 'quantile' in new.dims: @@ -3716,7 +3723,7 @@ def quantile(self, q, dim=None, interpolation='linear', new.coords['quantile'] = q return new - def rank(self, dim, pct=False, keep_attrs=False): + def rank(self, dim, pct=False, keep_attrs=None): """Ranks the data. Equal values are assigned a rank that is the average of the ranks that @@ -3756,6 +3763,8 @@ def rank(self, dim, pct=False, keep_attrs=False): variables[name] = var coord_names = set(self.coords) + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) attrs = self.attrs if keep_attrs else None return self._replace_vars_and_dims(variables, coord_names, attrs=attrs) @@ -3819,11 +3828,13 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None): @property def real(self): - return self._unary_op(lambda x: x.real, keep_attrs=True)(self) + return self._unary_op(lambda x: x.real, + keep_attrs=True)(self) @property def imag(self): - return self._unary_op(lambda x: x.imag, keep_attrs=True)(self) + return self._unary_op(lambda x: x.imag, + keep_attrs=True)(self) def filter_by_attrs(self, **kwargs): """Returns a ``Dataset`` with variables that match specific conditions. diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index dc23eae8b76..defe72ab3ee 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -13,6 +13,7 @@ from .pycompat import integer_types, range, zip from .utils import hashable, maybe_wrap_array, peek_at, safe_cast_to_index from .variable import IndexVariable, Variable, as_variable +from .options import _get_keep_attrs def unique_value_groups(ar, sort=True): @@ -404,15 +405,17 @@ def _first_or_last(self, op, skipna, keep_attrs): # NB. this is currently only used for reductions along an existing # dimension return self._obj + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) return self.reduce(op, self._group_dim, skipna=skipna, keep_attrs=keep_attrs, allow_lazy=True) - def first(self, skipna=None, keep_attrs=True): + def first(self, skipna=None, keep_attrs=None): """Return the first element of each group along the group dimension """ return self._first_or_last(duck_array_ops.first, skipna, keep_attrs) - def last(self, skipna=None, keep_attrs=True): + def last(self, skipna=None, keep_attrs=None): """Return the last element of each group along the group dimension """ return self._first_or_last(duck_array_ops.last, skipna, keep_attrs) @@ -539,8 +542,8 @@ def _combine(self, applied, shortcut=False): combined = self._maybe_unstack(combined) return combined - def reduce(self, func, dim=None, axis=None, keep_attrs=False, - shortcut=True, **kwargs): + def reduce(self, func, dim=None, axis=None, + keep_attrs=None, shortcut=True, **kwargs): """Reduce the items in this group by applying `func` along some dimension(s). @@ -580,6 +583,9 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False, "warning, pass dim=xarray.ALL_DIMS explicitly.", FutureWarning, stacklevel=2) + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + def reduce_array(ar): return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs) return self.apply(reduce_array, shortcut=shortcut) @@ -590,12 +596,12 @@ def reduce_array(ar): def _reduce_method(cls, func, include_skipna, numeric_only): if include_skipna: def wrapped_func(self, dim=DEFAULT_DIMS, axis=None, skipna=None, - keep_attrs=False, **kwargs): + keep_attrs=None, **kwargs): return self.reduce(func, dim, axis, keep_attrs=keep_attrs, skipna=skipna, allow_lazy=True, **kwargs) else: def wrapped_func(self, dim=DEFAULT_DIMS, axis=None, - keep_attrs=False, **kwargs): + keep_attrs=None, **kwargs): return self.reduce(func, dim, axis, keep_attrs=keep_attrs, allow_lazy=True, **kwargs) return wrapped_func @@ -651,7 +657,7 @@ def _combine(self, applied): combined = self._maybe_unstack(combined) return combined - def reduce(self, func, dim=None, keep_attrs=False, **kwargs): + def reduce(self, func, dim=None, keep_attrs=None, **kwargs): """Reduce the items in this group by applying `func` along some dimension(s). @@ -692,6 +698,9 @@ def reduce(self, func, dim=None, keep_attrs=False, **kwargs): elif dim is None: dim = self._group_dim + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + def reduce_dataset(ds): return ds.reduce(func, dim, keep_attrs, **kwargs) return self.apply(reduce_dataset) @@ -701,15 +710,15 @@ def reduce_dataset(ds): @classmethod def _reduce_method(cls, func, include_skipna, numeric_only): if include_skipna: - def wrapped_func(self, dim=DEFAULT_DIMS, keep_attrs=False, + def wrapped_func(self, dim=DEFAULT_DIMS, skipna=None, **kwargs): - return self.reduce(func, dim, keep_attrs, skipna=skipna, - numeric_only=numeric_only, allow_lazy=True, - **kwargs) + return self.reduce(func, dim, + skipna=skipna, numeric_only=numeric_only, + allow_lazy=True, **kwargs) else: - def wrapped_func(self, dim=DEFAULT_DIMS, keep_attrs=False, + def wrapped_func(self, dim=DEFAULT_DIMS, **kwargs): - return self.reduce(func, dim, keep_attrs, + return self.reduce(func, dim, numeric_only=numeric_only, allow_lazy=True, **kwargs) return wrapped_func diff --git a/xarray/core/options.py b/xarray/core/options.py index 04ea0be7172..eb3013d5233 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -6,6 +6,8 @@ FILE_CACHE_MAXSIZE = 'file_cache_maxsize' CMAP_SEQUENTIAL = 'cmap_sequential' CMAP_DIVERGENT = 'cmap_divergent' +KEEP_ATTRS = 'keep_attrs' + OPTIONS = { DISPLAY_WIDTH: 80, @@ -14,6 +16,7 @@ FILE_CACHE_MAXSIZE: 128, CMAP_SEQUENTIAL: 'viridis', CMAP_DIVERGENT: 'RdBu_r', + KEEP_ATTRS: 'default' } _JOIN_OPTIONS = frozenset(['inner', 'outer', 'left', 'right', 'exact']) @@ -28,6 +31,7 @@ def _positive_integer(value): ARITHMETIC_JOIN: _JOIN_OPTIONS.__contains__, ENABLE_CFTIMEINDEX: lambda value: isinstance(value, bool), FILE_CACHE_MAXSIZE: _positive_integer, + KEEP_ATTRS: lambda choice: choice in [True, False, 'default'] } @@ -41,6 +45,17 @@ def _set_file_cache_maxsize(value): } +def _get_keep_attrs(default): + global_choice = OPTIONS['keep_attrs'] + + if global_choice is 'default': + return default + elif global_choice in [True, False]: + return global_choice + else: + raise ValueError("The global option keep_attrs must be one of True, False or 'default'.") + + class set_options(object): """Set options for xarray in a controlled context. @@ -63,8 +78,13 @@ class set_options(object): - ``cmap_divergent``: colormap to use for divergent data plots. Default: ``RdBu_r``. If string, must be matplotlib built-in colormap. Can also be a Colormap object (e.g. mpl.cm.magma) + - ``keep_attrs``: rule for whether to keep attributes on xarray + Datasets/dataarrays after operations. Either ``True`` to always keep + attrs, ``False`` to always discard them, or ``'default'`` to use original + logic that attrs should only be kept in unambiguous circumstances. + Default: ``'default'``. -f You can use ``set_options`` either as a context manager: + You can use ``set_options`` either as a context manager: >>> ds = xr.Dataset({'x': np.arange(1000)}) >>> with xr.set_options(display_width=40): diff --git a/xarray/core/resample.py b/xarray/core/resample.py index bd84e04487e..edf7dfc3d41 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -273,7 +273,7 @@ def apply(self, func, **kwargs): return combined.rename({self._resample_dim: self._dim}) - def reduce(self, func, dim=None, keep_attrs=False, **kwargs): + def reduce(self, func, dim=None, keep_attrs=None, **kwargs): """Reduce the items in this group by applying `func` along the pre-defined resampling dimension. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index fefd48b449c..184d10b39b1 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -18,6 +18,7 @@ from .pycompat import ( OrderedDict, basestring, dask_array_type, integer_types, zip) from .utils import OrderedSet, either_dict_or_kwargs +from .options import _get_keep_attrs try: import dask.array as da @@ -1303,8 +1304,8 @@ def fillna(self, value): def where(self, cond, other=dtypes.NA): return ops.where_method(self, cond, other) - def reduce(self, func, dim=None, axis=None, keep_attrs=False, - allow_lazy=False, **kwargs): + def reduce(self, func, dim=None, axis=None, + keep_attrs=None, allow_lazy=False, **kwargs): """Reduce this array by applying `func` along some dimension(s). Parameters @@ -1351,6 +1352,8 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False, dims = [adim for n, adim in enumerate(self.dims) if n not in removed_axes] + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) attrs = self._attrs if keep_attrs else None return Variable(dims, data, attrs=attrs) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 10a1a956b27..f76b8c3ceab 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -8,7 +8,8 @@ import pytest from xarray import DataArray, Variable, coding, decode_cf, set_options -from xarray.coding.times import _import_cftime +from xarray.coding.times import (_import_cftime, decode_cf_datetime, + encode_cf_datetime) from xarray.coding.variables import SerializationWarning from xarray.core.common import contains_cftime_datetimes @@ -575,28 +576,24 @@ def test_infer_datetime_units(dates, expected): assert expected == coding.times.infer_datetime_units(dates) +_CFTIME_DATETIME_UNITS_TESTS = [ + ([(1900, 1, 1), (1900, 1, 1)], 'days since 1900-01-01 00:00:00.000000'), + ([(1900, 1, 1), (1900, 1, 2), (1900, 1, 2, 0, 0, 1)], + 'seconds since 1900-01-01 00:00:00.000000'), + ([(1900, 1, 1), (1900, 1, 8), (1900, 1, 16)], + 'days since 1900-01-01 00:00:00.000000') +] + + @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') -def test_infer_cftime_datetime_units(): - date_types = _all_cftime_date_types() - for date_type in date_types.values(): - for dates, expected in [ - ([date_type(1900, 1, 1), - date_type(1900, 1, 2)], - 'days since 1900-01-01 00:00:00.000000'), - ([date_type(1900, 1, 1, 12), - date_type(1900, 1, 1, 13)], - 'seconds since 1900-01-01 12:00:00.000000'), - ([date_type(1900, 1, 1), - date_type(1900, 1, 2), - date_type(1900, 1, 2, 0, 0, 1)], - 'seconds since 1900-01-01 00:00:00.000000'), - ([date_type(1900, 1, 1), - date_type(1900, 1, 2, 0, 0, 0, 5)], - 'days since 1900-01-01 00:00:00.000000'), - ([date_type(1900, 1, 1), date_type(1900, 1, 8), - date_type(1900, 1, 16)], - 'days since 1900-01-01 00:00:00.000000')]: - assert expected == coding.times.infer_datetime_units(dates) +@pytest.mark.parametrize( + 'calendar', _NON_STANDARD_CALENDARS + ['gregorian', 'proleptic_gregorian']) +@pytest.mark.parametrize(('date_args', 'expected'), + _CFTIME_DATETIME_UNITS_TESTS) +def test_infer_cftime_datetime_units(calendar, date_args, expected): + date_type = _all_cftime_date_types()[calendar] + dates = [date_type(*args) for args in date_args] + assert expected == coding.times.infer_datetime_units(dates) @pytest.mark.parametrize( @@ -763,3 +760,16 @@ def test_contains_cftime_datetimes_non_cftimes(non_cftime_data): @pytest.mark.parametrize('non_cftime_data', [DataArray([]), DataArray([1, 2])]) def test_contains_cftime_datetimes_non_cftimes_dask(non_cftime_data): assert not contains_cftime_datetimes(non_cftime_data.chunk()) + + +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize('shape', [(24,), (8, 3), (2, 4, 3)]) +def test_encode_datetime_overflow(shape): + # Test for fix to GH 2272 + dates = pd.date_range('2100', periods=24).values.reshape(shape) + units = 'days since 1800-01-01' + calendar = 'standard' + + num, _, _ = encode_cf_datetime(dates, units, calendar) + roundtrip = decode_cf_datetime(num, units, calendar) + np.testing.assert_array_equal(dates, roundtrip) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 3cb4e5d6cea..48524bf38c5 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -619,9 +619,9 @@ def get_data(): da[dict(x=ind)] = value # should not raise def test_contains(self): - data_array = DataArray(1, coords={'x': 2}) - with pytest.warns(FutureWarning): - assert 'x' in data_array + data_array = DataArray([1, 2]) + assert 1 in data_array + assert 3 not in data_array def test_attr_sources_multiindex(self): # make sure attr-style access for multi-index levels @@ -2534,6 +2534,7 @@ def test_upsample_interpolate_regression_1605(self): assert_allclose(actual, expected, rtol=1e-16) @requires_dask + @requires_scipy def test_upsample_interpolate_dask(self): import dask.array as da diff --git a/xarray/tests/test_options.py b/xarray/tests/test_options.py index 4441375a1b1..a21ea3e6b64 100644 --- a/xarray/tests/test_options.py +++ b/xarray/tests/test_options.py @@ -3,8 +3,10 @@ import pytest import xarray -from xarray.core.options import OPTIONS +from xarray.core.options import OPTIONS, _get_keep_attrs from xarray.backends.file_manager import FILE_CACHE +from xarray.tests.test_dataset import create_test_data +from xarray import concat, merge def test_invalid_option_raises(): @@ -44,6 +46,18 @@ def test_file_cache_maxsize(): assert FILE_CACHE.maxsize == original_size +def test_keep_attrs(): + with pytest.raises(ValueError): + xarray.set_options(keep_attrs='invalid_str') + with xarray.set_options(keep_attrs=True): + assert OPTIONS['keep_attrs'] + with xarray.set_options(keep_attrs=False): + assert not OPTIONS['keep_attrs'] + with xarray.set_options(keep_attrs='default'): + assert _get_keep_attrs(default=True) + assert not _get_keep_attrs(default=False) + + def test_nested_options(): original = OPTIONS['display_width'] with xarray.set_options(display_width=1): @@ -52,3 +66,105 @@ def test_nested_options(): assert OPTIONS['display_width'] == 2 assert OPTIONS['display_width'] == 1 assert OPTIONS['display_width'] == original + + +def create_test_dataset_attrs(seed=0): + ds = create_test_data(seed) + ds.attrs = {'attr1': 5, 'attr2': 'history', + 'attr3': {'nested': 'more_info'}} + return ds + + +def create_test_dataarray_attrs(seed=0, var='var1'): + da = create_test_data(seed)[var] + da.attrs = {'attr1': 5, 'attr2': 'history', + 'attr3': {'nested': 'more_info'}} + return da + + +class TestAttrRetention(object): + def test_dataset_attr_retention(self): + # Use .mean() for all tests: a typical reduction operation + ds = create_test_dataset_attrs() + original_attrs = ds.attrs + + # Test default behaviour + result = ds.mean() + assert result.attrs == {} + with xarray.set_options(keep_attrs='default'): + result = ds.mean() + assert result.attrs == {} + + with xarray.set_options(keep_attrs=True): + result = ds.mean() + assert result.attrs == original_attrs + + with xarray.set_options(keep_attrs=False): + result = ds.mean() + assert result.attrs == {} + + def test_dataarray_attr_retention(self): + # Use .mean() for all tests: a typical reduction operation + da = create_test_dataarray_attrs() + original_attrs = da.attrs + + # Test default behaviour + result = da.mean() + assert result.attrs == {} + with xarray.set_options(keep_attrs='default'): + result = da.mean() + assert result.attrs == {} + + with xarray.set_options(keep_attrs=True): + result = da.mean() + assert result.attrs == original_attrs + + with xarray.set_options(keep_attrs=False): + result = da.mean() + assert result.attrs == {} + + def test_groupby_attr_retention(self): + da = xarray.DataArray([1, 2, 3], [('x', [1, 1, 2])]) + da.attrs = {'attr1': 5, 'attr2': 'history', + 'attr3': {'nested': 'more_info'}} + original_attrs = da.attrs + + # Test default behaviour + result = da.groupby('x').sum(keep_attrs=True) + assert result.attrs == original_attrs + with xarray.set_options(keep_attrs='default'): + result = da.groupby('x').sum(keep_attrs=True) + assert result.attrs == original_attrs + + with xarray.set_options(keep_attrs=True): + result1 = da.groupby('x') + result = result1.sum() + assert result.attrs == original_attrs + + with xarray.set_options(keep_attrs=False): + result = da.groupby('x').sum() + assert result.attrs == {} + + def test_concat_attr_retention(self): + ds1 = create_test_dataset_attrs() + ds2 = create_test_dataset_attrs() + ds2.attrs = {'wrong': 'attributes'} + original_attrs = ds1.attrs + + # Test default behaviour of keeping the attrs of the first + # dataset in the supplied list + # global keep_attrs option current doesn't affect concat + result = concat([ds1, ds2], dim='dim1') + assert result.attrs == original_attrs + + @pytest.mark.xfail + def test_merge_attr_retention(self): + da1 = create_test_dataarray_attrs(var='var1') + da2 = create_test_dataarray_attrs(var='var2') + da2.attrs = {'wrong': 'attributes'} + original_attrs = da1.attrs + + # merge currently discards attrs, and the global keep_attrs + # option doesn't affect this + result = merge([da1, da2]) + assert result.attrs == original_attrs