Merge branch 'master' into deprecate/inplace

* master: Global option to always keep/discard attrs on operations (pydata#2482) Remove tests where answers change in cftime 1.0.2.1 (pydata#2522) Finish deprecation cycle for DataArray.__contains__ checking array values (pydata#2520) Fix bug where OverflowError is not being raised (pydata#2519)
dcherian · Oct 30, 2018 · 4359403 · 4359403
2 parents 66d3cea + 6d55f99
commit 4359403
Show file tree

Hide file tree

Showing 13 changed files with 266 additions and 76 deletions.
diff --git a/doc/faq.rst b/doc/faq.rst
@@ -119,7 +119,8 @@ conventions`_. (An exception is serialization to and from netCDF files.)
 
 An implication of this choice is that we do not propagate ``attrs`` through
 most operations unless explicitly flagged (some methods have a ``keep_attrs``
-option). Similarly, xarray does not check for conflicts between ``attrs`` when
+option, and there is a global flag for setting this to be always True or
+False). Similarly, xarray does not check for conflicts between ``attrs`` when
 combining arrays and datasets, unless explicitly requested with the option
 ``compat='identical'``. The guiding principle is that metadata should not be
 allowed to get in the way.

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -33,11 +33,14 @@ v0.11.0 (unreleased)
 Breaking changes
 ~~~~~~~~~~~~~~~~
 
-- ``Dataset.T`` has been removed as a shortcut for :py:meth:`Dataset.transpose`.
-  Call :py:meth:`Dataset.transpose` directly instead.
-- Iterating over a ``Dataset`` now includes only data variables, not coordinates.
-  Similarily, calling ``len`` and ``bool`` on a ``Dataset`` now  
-  includes only data variables
+- Finished deprecation cycles:
+  - ``Dataset.T`` has been removed as a shortcut for :py:meth:`Dataset.transpose`.
+    Call :py:meth:`Dataset.transpose` directly instead.
+  - Iterating over a ``Dataset`` now includes only data variables, not coordinates.
+    Similarily, calling ``len`` and ``bool`` on a ``Dataset`` now  
+    includes only data variables.
+  - ``DataArray.__contains__`` (used by Python's ``in`` operator) now checks
+    array data, not coordinates. 
 - Xarray's storage backends now automatically open and close files when
   necessary, rather than requiring opening a file with ``autoclose=True``. A
   global least-recently-used cache is used to store open files; the default
@@ -82,7 +85,12 @@ Enhancements
   :py:meth:`~xarray.Dataset.differentiate`,
   :py:meth:`~xarray.DataArray.interp`, and
   :py:meth:`~xarray.Dataset.interp`.
-  By `Spencer Clark <https://github.com/spencerkclark>`_.
+  By `Spencer Clark <https://github.com/spencerkclark>`_
+- There is now a global option to either always keep or always discard
+  dataset and dataarray attrs upon operations. The option is set with
+  ``xarray.set_options(keep_attrs=True)``, and the default is to use the old
+  behaviour.
+  By `Tom Nicholas <http://github.com/TomNicholas>`_.
 - Added a new backend for the GRIB file format based on ECMWF *cfgrib*
   python driver and *ecCodes* C-library. (:issue:`2475`)
   By `Alessandro Amici <https://github.com/alexamici>`_,
@@ -126,6 +134,10 @@ Bug fixes
   By `Spencer Clark <https://github.com/spencerkclark>`_.
 - Avoid use of Dask's deprecated ``get=`` parameter in tests
   by `Matthew Rocklin <https://github.com/mrocklin/>`_.
+- An ``OverflowError`` is now accurately raised and caught during the
+  encoding process if a reference date is used that is so distant that
+  the dates must be encoded using cftime rather than NumPy (:issue:`2272`).
+  By `Spencer Clark <https://github.com/spencerkclark>`_.
 
 .. _whats-new.0.10.9:
 

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
@@ -361,7 +361,12 @@ def encode_cf_datetime(dates, units=None, calendar=None):
         delta_units = _netcdf_to_numpy_timeunit(delta)
         time_delta = np.timedelta64(1, delta_units).astype('timedelta64[ns]')
         ref_date = np.datetime64(pd.Timestamp(ref_date))
-        num = (dates - ref_date) / time_delta
+
+        # Wrap the dates in a DatetimeIndex to do the subtraction to ensure
+        # an OverflowError is raised if the ref_date is too far away from
+        # dates to be encoded (GH 2272).
+        num = (pd.DatetimeIndex(dates.ravel()) - ref_date) / time_delta
+        num = num.values.reshape(dates.shape)
 
     except (OutOfBoundsDatetime, OverflowError):
         num = _encode_datetime_with_cftime(dates, units, calendar)

diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -11,6 +11,7 @@
 from .arithmetic import SupportsArithmetic
 from .pycompat import OrderedDict, basestring, dask_array_type, suppress
 from .utils import Frozen, ReprObject, SortedKeysDict, either_dict_or_kwargs
+from .options import _get_keep_attrs
 
 # Used as a sentinel value to indicate a all dimensions
 ALL_DIMS = ReprObject('<all-dims>')
@@ -21,13 +22,13 @@ class ImplementsArrayReduce(object):
     def _reduce_method(cls, func, include_skipna, numeric_only):
         if include_skipna:
             def wrapped_func(self, dim=None, axis=None, skipna=None,
-                             keep_attrs=False, **kwargs):
-                return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
+                             **kwargs):
+                return self.reduce(func, dim, axis,
                                    skipna=skipna, allow_lazy=True, **kwargs)
         else:
-            def wrapped_func(self, dim=None, axis=None, keep_attrs=False,
+            def wrapped_func(self, dim=None, axis=None,
                              **kwargs):
-                return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
+                return self.reduce(func, dim, axis,
                                    allow_lazy=True, **kwargs)
         return wrapped_func
 
@@ -51,14 +52,14 @@ class ImplementsDatasetReduce(object):
     @classmethod
     def _reduce_method(cls, func, include_skipna, numeric_only):
         if include_skipna:
-            def wrapped_func(self, dim=None, keep_attrs=False, skipna=None,
+            def wrapped_func(self, dim=None, skipna=None,
                              **kwargs):
-                return self.reduce(func, dim, keep_attrs, skipna=skipna,
+                return self.reduce(func, dim, skipna=skipna,
                                    numeric_only=numeric_only, allow_lazy=True,
                                    **kwargs)
         else:
-            def wrapped_func(self, dim=None, keep_attrs=False, **kwargs):
-                return self.reduce(func, dim, keep_attrs,
+            def wrapped_func(self, dim=None, **kwargs):
+                return self.reduce(func, dim,
                                    numeric_only=numeric_only, allow_lazy=True,
                                    **kwargs)
         return wrapped_func
@@ -591,7 +592,7 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs):
                                  center=center)
 
     def resample(self, freq=None, dim=None, how=None, skipna=None,
-                 closed=None, label=None, base=0, keep_attrs=False, **indexer):
+                 closed=None, label=None, base=0, keep_attrs=None, **indexer):
         """Returns a Resample object for performing resampling operations.
 
         Handles both downsampling and upsampling. If any intervals contain no
@@ -659,6 +660,9 @@ def resample(self, freq=None, dim=None, how=None, skipna=None,
         from .dataarray import DataArray
         from .resample import RESAMPLE_DIM
 
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
+
         if dim is not None:
             if how is None:
                 how = 'mean'

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -16,7 +16,7 @@
     assert_coordinate_consistent, remap_label_indexers)
 from .dataset import Dataset, merge_indexes, split_indexes
 from .formatting import format_item
-from .options import OPTIONS
+from .options import OPTIONS, _get_keep_attrs
 from .pycompat import OrderedDict, basestring, iteritems, range, zip
 from .utils import (
     _check_inplace, decode_numpy_dict_values, either_dict_or_kwargs,
@@ -504,11 +504,7 @@ def _item_sources(self):
                 LevelCoordinatesSource(self)]
 
     def __contains__(self, key):
-        warnings.warn(
-            'xarray.DataArray.__contains__ currently checks membership in '
-            'DataArray.coords, but in xarray v0.11 will change to check '
-            'membership in array values.', FutureWarning, stacklevel=2)
-        return key in self._coords
+        return key in self.data
 
     @property
     def loc(self):
@@ -1564,7 +1560,7 @@ def combine_first(self, other):
         """
         return ops.fillna(self, other, join="outer")
 
-    def reduce(self, func, dim=None, axis=None, keep_attrs=False, **kwargs):
+    def reduce(self, func, dim=None, axis=None, keep_attrs=None, **kwargs):
         """Reduce this array by applying `func` along some dimension(s).
 
         Parameters
@@ -1593,6 +1589,7 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False, **kwargs):
             DataArray with this object's array replaced with an array with
             summarized data and the indicated dimension(s) removed.
         """
+
         var = self.variable.reduce(func, dim, axis, keep_attrs, **kwargs)
         return self._replace_maybe_drop_dims(var)
 
@@ -2275,7 +2272,7 @@ def sortby(self, variables, ascending=True):
         ds = self._to_temp_dataset().sortby(variables, ascending=ascending)
         return self._from_temp_dataset(ds)
 
-    def quantile(self, q, dim=None, interpolation='linear', keep_attrs=False):
+    def quantile(self, q, dim=None, interpolation='linear', keep_attrs=None):
         """Compute the qth quantile of the data along the specified dimension.
 
         Returns the qth quantiles(s) of the array elements.
@@ -2321,7 +2318,7 @@ def quantile(self, q, dim=None, interpolation='linear', keep_attrs=False):
             q, dim=dim, keep_attrs=keep_attrs, interpolation=interpolation)
         return self._from_temp_dataset(ds)
 
-    def rank(self, dim, pct=False, keep_attrs=False):
+    def rank(self, dim, pct=False, keep_attrs=None):
         """Ranks the data.
 
         Equal values are assigned a rank that is the average of the ranks that
@@ -2357,6 +2354,7 @@ def rank(self, dim, pct=False, keep_attrs=False):
         array([ 1.,   2.,   3.])
         Dimensions without coordinates: x
         """
+
         ds = self._to_temp_dataset().rank(dim, pct=pct, keep_attrs=keep_attrs)
         return self._from_temp_dataset(ds)
 

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -28,7 +28,7 @@
 from .merge import (
     dataset_merge_method, dataset_update_method, merge_data_and_coords,
     merge_variables)
-from .options import OPTIONS
+from .options import OPTIONS, _get_keep_attrs
 from .pycompat import (
     OrderedDict, basestring, dask_array_type, integer_types, iteritems, range)
 from .utils import (
@@ -2851,7 +2851,7 @@ def combine_first(self, other):
         out = ops.fillna(self, other, join="outer", dataset_join="outer")
         return out
 
-    def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False,
+    def reduce(self, func, dim=None, keep_attrs=None, numeric_only=False,
                allow_lazy=False, **kwargs):
         """Reduce this dataset by applying `func` along some dimension(s).
 
@@ -2893,6 +2893,9 @@ def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False,
             raise ValueError('Dataset does not contain the dimensions: %s'
                              % missing_dimensions)
 
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
+
         variables = OrderedDict()
         for name, var in iteritems(self._variables):
             reduce_dims = [dim for dim in var.dims if dim in dims]
@@ -2921,7 +2924,7 @@ def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False,
         attrs = self.attrs if keep_attrs else None
         return self._replace_vars_and_dims(variables, coord_names, attrs=attrs)
 
-    def apply(self, func, keep_attrs=False, args=(), **kwargs):
+    def apply(self, func, keep_attrs=None, args=(), **kwargs):
         """Apply a function over the data variables in this dataset.
 
         Parameters
@@ -2966,6 +2969,8 @@ def apply(self, func, keep_attrs=False, args=(), **kwargs):
         variables = OrderedDict(
             (k, maybe_wrap_array(v, func(v, *args, **kwargs)))
             for k, v in iteritems(self.data_vars))
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
         attrs = self.attrs if keep_attrs else None
         return type(self)(variables, attrs=attrs)
 
@@ -3630,7 +3635,7 @@ def sortby(self, variables, ascending=True):
         return aligned_self.isel(**indices)
 
     def quantile(self, q, dim=None, interpolation='linear',
-                 numeric_only=False, keep_attrs=False):
+                 numeric_only=False, keep_attrs=None):
         """Compute the qth quantile of the data along the specified dimension.
 
         Returns the qth quantiles(s) of the array elements for each variable
@@ -3708,6 +3713,8 @@ def quantile(self, q, dim=None, interpolation='linear',
 
         # construct the new dataset
         coord_names = set(k for k in self.coords if k in variables)
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
         attrs = self.attrs if keep_attrs else None
         new = self._replace_vars_and_dims(variables, coord_names, attrs=attrs)
         if 'quantile' in new.dims:
@@ -3716,7 +3723,7 @@ def quantile(self, q, dim=None, interpolation='linear',
             new.coords['quantile'] = q
         return new
 
-    def rank(self, dim, pct=False, keep_attrs=False):
+    def rank(self, dim, pct=False, keep_attrs=None):
         """Ranks the data.
 
         Equal values are assigned a rank that is the average of the ranks that
@@ -3756,6 +3763,8 @@ def rank(self, dim, pct=False, keep_attrs=False):
                 variables[name] = var
 
         coord_names = set(self.coords)
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
         attrs = self.attrs if keep_attrs else None
         return self._replace_vars_and_dims(variables, coord_names, attrs=attrs)
 
@@ -3819,11 +3828,13 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None):
 
     @property
     def real(self):
-        return self._unary_op(lambda x: x.real, keep_attrs=True)(self)
+        return self._unary_op(lambda x: x.real,
+                              keep_attrs=True)(self)
 
     @property
     def imag(self):
-        return self._unary_op(lambda x: x.imag, keep_attrs=True)(self)
+        return self._unary_op(lambda x: x.imag,
+                              keep_attrs=True)(self)
 
     def filter_by_attrs(self, **kwargs):
         """Returns a ``Dataset`` with variables that match specific conditions.