pydata · DevDaoud · Feb 7, 2019 · Feb 7, 2019 · Feb 7, 2019 · Feb 7, 2019
diff --git a/.gitignore b/.gitignore
@@ -69,3 +69,4 @@ xarray/version.py
 Icon*
 
 .ipynb_checkpoints
+.nfs*
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -119,6 +119,23 @@ Other enhancements
   See :ref:`compute.using_coordinates` for the detail.
   (:issue:`1332`)
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
+- :py:meth:`pandas.Series.dropna` is now supported for a
+  :py:class:`pandas.Series` indexed by a :py:class:`~xarray.CFTimeIndex`
+  (:issue:`2688`). By `Spencer Clark <https://github.com/spencerkclark>`_.
+- Variables are now unpacked with scale_factor and offset dtypes if present in datasets.
+  According `to cf convetion <http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/build/ch08.html>`_.
+  By `Daoud Jahdou <https://github.com/daoudjahdou>`_.
+- :py:meth:`~xarray.cftime_range` now supports QuarterBegin and QuarterEnd offsets (:issue:`2663`).
+  By `Jwen Fai Low <https://github.com/jwenfai>`_
+- :py:meth:`~xarray.open_dataset` now accepts a ``use_cftime`` argument, which
+  can be used to require that ``cftime.datetime`` objects are always used, or
+  never used when decoding dates encoded with a standard calendar.  This can be
+  used to ensure consistent date types are returned when using
+  :py:meth:`~xarray.open_mfdataset` (:issue:`1263`) and/or to silence
+  serialization warnings raised if dates from a standard calendar are found to
+  be outside the :py:class:`pandas.Timestamp`-valid range (:issue:`2754`).  By
+  `Spencer Clark <https://github.com/spencerkclark>`_. 
+
 - Added :py:meth:`~xarray.Dataset.drop_dims` (:issue:`1949`).
   By `Kevin Squire <https://github.com/kmsquire>`_.
 

diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
@@ -189,8 +189,49 @@ def _scale_offset_decoding(data, scale_factor, add_offset, dtype):
     return data
 
 
-def _choose_float_dtype(dtype, has_offset):
+def _choose_decoding_float_dtype(dtype, scale_factor, add_offset):
+    """Return a float dtype according to cf-convention"""
+    # Implementing cf-convention
+    # http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/build/ch08.html:
+    # Detail:
+    # If the scale_factor and add_offset attributes are of the same
+    # data type as the
+    # associated variable, the unpacked data is assumed to be of the same
+    # data type as the packed data. However, if the scale_factor
+    # and add_offset attributes are of a different data type
+    # from the variable (containing the packed data)
+    # then the unpacked data should match
+    # the type of these attributes, which must both be of type float or both
+    # be of type double. An additional restriction in this case is that
+    # the variable containing the packed data must
+    # be of type byte, short or int.
+    # It is not advised to unpack an int into a float as there
+    # is a potential precision loss.
+
+    if scale_factor or add_offset:
-    if scale_factor or add_offset:
+    if scale_factor is not None or add_offset is not None:
-    if scale_factor or add_offset:
+    if scale_factor is not None or add_offset is not None:
+
+        types = (np.dtype(type(scale_factor)),
+                 np.dtype(type(add_offset)),
+                 np.dtype(dtype))
+
+        if add_offset is None:
+            types = (np.dtype(type(scale_factor)),
+                     np.dtype(dtype))
+
+        # scaled_type should be the largest type we find
+        scaled_dtype = dtypes.result_type(*types)
+
+        # We return it only if it's a float32 or a float64
+        if (scaled_dtype.itemsize >= 4
+                and np.issubdtype(scaled_dtype, np.floating)):
+            return scaled_dtype
+
+    return _choose_encoding_float_dtype(dtype, add_offset is not None)
-    return _choose_encoding_float_dtype(dtype, add_offset is not None)
+    return _choose_encoding_float_dtype(dtype, add_offset)
-    return _choose_encoding_float_dtype(dtype, add_offset is not None)
+    return _choose_encoding_float_dtype(dtype, add_offset)
+
+
+def _choose_encoding_float_dtype(dtype, has_offset):
     """Return a float dtype that can losslessly represent `dtype` values."""
+
     # Keep float32 as-is.  Upcast half-precision to single-precision,
     # because float16 is "intended for storage but not computation"
     if dtype.itemsize <= 4 and np.issubdtype(dtype, np.floating):
@@ -217,9 +258,9 @@ class CFScaleOffsetCoder(VariableCoder):
 
     def encode(self, variable, name=None):
         dims, data, attrs, encoding = unpack_for_encoding(variable)
-
         if 'scale_factor' in encoding or 'add_offset' in encoding:
-            dtype = _choose_float_dtype(data.dtype, 'add_offset' in encoding)
+            dtype = _choose_encoding_float_dtype(data.dtype,
+                                                 'add_offset' in encoding)
             data = data.astype(dtype=dtype, copy=True)
             if 'add_offset' in encoding:
                 data -= pop_to(encoding, attrs, 'add_offset', name=name)
@@ -232,9 +273,12 @@ def decode(self, variable, name=None):
         dims, data, attrs, encoding = unpack_for_decoding(variable)
 
         if 'scale_factor' in attrs or 'add_offset' in attrs:
+
             scale_factor = pop_to(attrs, encoding, 'scale_factor', name=name)
             add_offset = pop_to(attrs, encoding, 'add_offset', name=name)
-            dtype = _choose_float_dtype(data.dtype, 'add_offset' in attrs)
+            dtype = _choose_decoding_float_dtype(data.dtype,
+                                                 scale_factor, add_offset)
+
             transform = partial(_scale_offset_decoding,
                                 scale_factor=scale_factor,
                                 add_offset=add_offset,

diff --git a/xarray/conventions.py b/xarray/conventions.py
@@ -222,7 +222,6 @@ def encode_cf_variable(var, needs_copy=True, name=None):
         A variable which has been encoded as described above.
     """
     ensure_not_multiindex(var, name=name)
-
     for coder in [times.CFDatetimeCoder(),
                   times.CFTimedeltaCoder(),
                   variables.CFScaleOffsetCoder(),

diff --git a/xarray/core/dtypes.py b/xarray/core/dtypes.py
@@ -30,7 +30,6 @@ def __eq__(self, other):
 INF = AlwaysGreaterThan()
 NINF = AlwaysLessThan()
 
-
 # Pairs of types that, if both found, should be promoted to object dtype
 # instead of following NumPy's own type-promotion rules. These type promotion
 # rules match pandas instead. For reference, see the NumPy type hierarchy:

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -685,7 +685,9 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn):
         with self.roundtrip(decoded) as actual:
             for k in decoded.variables:
                 assert (decoded.variables[k].dtype
-                        == actual.variables[k].dtype)
+                        == actual.variables[k].dtype
+                        or (decoded.variables[k].dtype == np.float32 and
+                            actual.variables[k].dtype == np.float64))
             assert_allclose(decoded, actual, decode_bytes=False)
 
         with self.roundtrip(decoded,
@@ -711,7 +713,9 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn):
         with self.roundtrip(encoded) as actual:
             for k in decoded.variables:
                 assert (decoded.variables[k].dtype ==
-                        actual.variables[k].dtype)
+                        actual.variables[k].dtype or
+                        (decoded.variables[k].dtype == np.float32 and
+                            actual.variables[k].dtype == np.float64))
             assert_allclose(decoded, actual, decode_bytes=False)
 
     def test_coordinates_encoding(self):
@@ -1156,15 +1160,16 @@ def test_mask_and_scale(self):
                 nc.createVariable('x', 'int16', ('t',), fill_value=-1)
                 v = nc.variables['x']
                 v.set_auto_maskandscale(False)
-                v.add_offset = 10
-                v.scale_factor = 0.1
+                v.add_offset = np.float32(10)
+                v.scale_factor = np.float32(0.1)
                 v[:] = np.array([-1, -1, 0, 1, 2])
 
             # first make sure netCDF4 reads the masked and scaled data
             # correctly
             with nc4.Dataset(tmp_file, mode='r') as nc:
                 expected = np.ma.array([-1, -1, 10, 10.1, 10.2],
-                                       mask=[True, True, False, False, False])
+                                       mask=[True, True, False, False, False],
+                                       dtype=np.float32)
                 actual = nc.variables['x'][:]
                 assert_array_equal(expected, actual)
 
@@ -1173,6 +1178,25 @@ def test_mask_and_scale(self):
                 expected = create_masked_and_scaled_data()
                 assert_identical(expected, ds)
 
+    def test_mask_and_scale_with_float64_scale_factor(self):
+        with create_tmp_file() as tmp_file:
+            with nc4.Dataset(tmp_file, mode='w') as nc:
+                nc.createDimension('t', 5)
+                nc.createVariable('x', 'int16', ('t',), fill_value=-1)
+                v = nc.variables['x']
+                v.scale_factor = 0.01
+                v.add_offset = 10
+                v[:] = np.array([-1123, -1123, 123, 1123, 2123])
+            # We read the newly created netcdf file
+            with nc4.Dataset(tmp_file, mode='r') as nc:
+                # we open the dataset
+                with open_dataset(tmp_file) as ds:
+                    # Both dataset values should be equal
+                    # And both of float64 array type
+                    dsv = ds['x'].values
+                    ncv = nc.variables['x'][:]
+                    np.testing.assert_array_almost_equal(dsv, ncv, 15)
+
     def test_0dimensional_variable(self):
         # This fix verifies our work-around to this netCDF4-python bug:
         # https://github.com/Unidata/netcdf4-python/pull/220

diff --git a/xarray/tests/test_coding.py b/xarray/tests/test_coding.py
@@ -12,6 +12,10 @@
     import dask.array as da
 
 
+def reverse_list_of_tuple(plist):
+    return [tuple(reversed(t)) for t in plist]
+
+
 def test_CFMaskCoder_decode():
     original = xr.Variable(('x',), [0, -1, 1], {'_FillValue': -1})
     expected = xr.Variable(('x',), [0, np.nan, 1])
@@ -43,10 +47,145 @@ def test_coder_roundtrip():
 @pytest.mark.parametrize('dtype', 'u1 u2 i1 i2 f2 f4'.split())
 def test_scaling_converts_to_float32(dtype):
     original = xr.Variable(('x',), np.arange(10, dtype=dtype),
-                           encoding=dict(scale_factor=10))
+                           encoding=dict(scale_factor=np.float32(10)))
     coder = variables.CFScaleOffsetCoder()
     encoded = coder.encode(original)
     assert encoded.dtype == np.float32
     roundtripped = coder.decode(encoded)
-    assert_identical(original, roundtripped)
     assert roundtripped.dtype == np.float32
+    assert_identical(original, roundtripped)
+
+
+all_possible_types = [np.uint8(8),
+                      np.uint16(16),
+                      np.uint32(32),
+                      np.uint64(64),
+                      np.int8(80),
+                      np.int16(160),
+                      np.int32(320),
+                      np.int64(640),
+                      1,
+                      0.01,
+                      np.float16(1600),
+                      np.float32(3200),
+                      np.float64(6400)]
+
+# In all cases encoding returns either np.float32 or np.float64
+# Encoding only cares about existence of add_offset when the
+# variable is an integer
+# If the variable is a float then the encoded dtype is np.float32
+# If the variable is an integer with add_offset
+# then the encoded dtype is np.float64
+# If the variable is an integer with no add_affset
+# then the encoded dtype is np.float32
+# In all other cases the encoded dtype is np.float64
+# decoding is the equivalent of unpacking mentioned in the cf-convention
+# in all cases decoding takes the encoded dtype which
+# is either np.float32 or np.float64
+# then decoded type is the largest between scale_factor, add_offset
+# and encoded type and not original
+
+#############################
+# Case 1: variable has offset
+#############################
+
+# Case 1.1: variable is float
+# encoded should be np.float32
+# if (scale_factor, add_offset) is in the following list
+# decoded should be np.float32
+# if not decoded should be np.float64
+combinations_for_float32 = [
+    (np.uint8(8), np.uint16(16)),
+    (np.uint8(8), np.int8(80)),
+    (np.uint8(8), np.int16(160)),
+    (np.uint8(8), np.float16(1600)),
+    (np.uint8(8), np.float32(3200)),
+    (np.uint16(16), np.float16(1600)),
+    (np.uint16(16), np.float32(3200)),
+    (np.int8(80), np.int16(160)),
+    (np.int8(80), np.float16(1600)),
+    (np.int8(80), np.float32(3200)),
+    (np.int16(160), np.float16(1600)),
+    (np.int16(160), np.float32(3200)),
+    (np.float16(1600), np.float32(3200)),
+    (np.float32(3200), np.float32(3200)),
+    (np.float16(1600), np.float16(1600)),
+    (np.int16(160), np.int16(160)),
+    (np.int8(80), np.int8(80)),
+    (np.uint16(16), np.uint16(16)),
+    (np.uint8(8), np.uint8(8))
+]
+(combinations_for_float32.extend(
+    reverse_list_of_tuple(combinations_for_float32)))
+
+
+@pytest.mark.parametrize('dtype', 'f2 f4'.split())
+@pytest.mark.parametrize('scale_factor', all_possible_types)
+@pytest.mark.parametrize('add_offset', all_possible_types)
+def test_cfscaleoffset_case_1_float_var(dtype, scale_factor, add_offset):
+    original = xr.Variable(('x',), np.arange(10, dtype=dtype),
+                           encoding=dict(scale_factor=scale_factor,
+                           add_offset=add_offset))
+
+    coder = variables.CFScaleOffsetCoder()
+    encoded = coder.encode(original)
+    assert encoded.dtype == np.float32
+    roundtripped = coder.decode(encoded)
+
+    if (scale_factor, add_offset) in combinations_for_float32:
+        assert roundtripped.dtype == np.float32
+    else:
+        assert roundtripped.dtype == np.float64
+
+
+# Case 1.2: variable is integer
+# encoded should be np.float64 as we have offset
+# decoded should always be np.float64
+@pytest.mark.parametrize('dtype', 'u1 u2 i1 i2'.split())
+@pytest.mark.parametrize('scale_factor', all_possible_types)
+@pytest.mark.parametrize('add_offset', all_possible_types)
+def test_cfscaleoffset_case_1_int_var(dtype, scale_factor, add_offset):
+    original = xr.Variable(('x',), np.arange(10, dtype=dtype),
+                           encoding=dict(scale_factor=scale_factor,
+                           add_offset=add_offset))
+
+    coder = variables.CFScaleOffsetCoder()
+    encoded = coder.encode(original)
+    assert encoded.dtype == np.float64
+    roundtripped = coder.decode(encoded)
+    assert roundtripped.dtype == np.float64
+
+
+####################################
+# Case 2: variable has no add_offset
+####################################
+
+# Case 2.1:
+# for any variable dtype
+# encoded should be np.float32
+# if scale_factor in the following list
+# decoded should be np.float32
+# if not decoded should be np.float64
+types_for_float32 = [np.uint8(8),
+                     np.uint16(16),
+                     np.int8(80),
+                     np.int16(160),
+                     np.float16(1600),
+                     np.float32(3200)]
+
+
+@pytest.mark.parametrize('dtype', 'u1 u2 i1 i2 f2 f4'.split())
+@pytest.mark.parametrize('scale_factor', all_possible_types)
+def test_cfscaleoffset_case_2(dtype, scale_factor):
+
+    original = xr.Variable(('x',), np.arange(10, dtype=dtype),
+                           encoding=dict(scale_factor=scale_factor))
+
+    coder = variables.CFScaleOffsetCoder()
+    encoded = coder.encode(original)
+    assert encoded.dtype == np.float32
+    roundtripped = coder.decode(encoded)
+    if scale_factor in types_for_float32:
+        assert roundtripped.dtype == np.float32
+    else:
+        assert roundtripped.dtype == np.float64
Original file line number	Diff line number	Diff line change
Expand Up		@@ -69,3 +69,4 @@ xarray/version.py
		Icon*

		.ipynb_checkpoints
		.nfs*