From b6d4bf72fd56014c0bd142bc438d6204d8dfd1ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 17 Sep 2023 10:15:25 +0200 Subject: [PATCH] Preserve nanosecond resolution when encoding/decoding times (#7827) * preserve nanosecond resolution when encoding/decoding times. * Apply suggestions from code review Co-authored-by: Spencer Clark * use emit_user_level_warning * move time alignment for nc3 to encode_nc3_variable * fix test for encode_cf_timedelta * fix CFMaskCoder for time-like (also allow timedelta64), add first tests * rename to _unpack_time_units_and_ref_date as suggested in review * refactor delta -> time_units as suggested in review * refactor out function _time_units_to_timedelta64, reorder flow and remove unneeded checks, apply filterwarnings, adapt tests * import _is_time_like from coding.variables * adapt tests, add _numpy_to_netcdf_timeunit-conversion function * adapt tests, add _numpy_to_netcdf_timeunit-conversion function * adapt test as per review, remove arm_xfail for backend test * add whats-new.rst entry * Update doc/whats-new.rst Co-authored-by: Spencer Clark * Update doc/whats-new.rst Co-authored-by: Spencer Clark * fix whats-new.rst --------- Co-authored-by: Spencer Clark --- doc/whats-new.rst | 6 ++ xarray/backends/netcdf3.py | 19 +++- xarray/coding/times.py | 139 +++++++++++++++++++------- xarray/coding/variables.py | 38 +++++++- xarray/tests/test_backends.py | 2 - xarray/tests/test_coding_times.py | 156 +++++++++++++++++++++++++++++- 6 files changed, 315 insertions(+), 45 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 863451b6bd0..9ac58ea6534 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -76,6 +76,10 @@ Bug fixes - Calling plot with kwargs ``col``, ``row`` or ``hue`` no longer squeezes dimensions passed via these arguments (:issue:`7552`, :pull:`8174`). By `Wiktor Kraśnicki `_. +- Fixed a bug where casting from ``float`` to ``int64`` (undefined for ``NaN``) led to varying + issues (:issue:`7817`, :issue:`7942`, :issue:`7790`, :issue:`6191`, :issue:`7096`, + :issue:`1064`, :pull:`7827`). + By `Kai Mühlbauer `_. Documentation ~~~~~~~~~~~~~ @@ -86,6 +90,8 @@ Internal Changes - Many error messages related to invalid dimensions or coordinates now always show the list of valid dims/coords (:pull:`8079`). By `András Gunyhó `_. +- Refactor of encoding and decoding times/timedeltas to preserve nanosecond resolution in arrays that contain missing values (:pull:`7827`). + By `Kai Mühlbauer `_. .. _whats-new.2023.08.0: diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py index ef389eefc90..db00ef1972b 100644 --- a/xarray/backends/netcdf3.py +++ b/xarray/backends/netcdf3.py @@ -88,13 +88,30 @@ def encode_nc3_attrs(attrs): return {k: encode_nc3_attr_value(v) for k, v in attrs.items()} +def _maybe_prepare_times(var): + # checks for integer-based time-like and + # replaces np.iinfo(np.int64).min with _FillValue or np.nan + # this keeps backwards compatibility + + data = var.data + if data.dtype.kind in "iu": + units = var.attrs.get("units", None) + if units is not None: + if coding.variables._is_time_like(units): + mask = data == np.iinfo(np.int64).min + if mask.any(): + data = np.where(mask, var.attrs.get("_FillValue", np.nan), data) + return data + + def encode_nc3_variable(var): for coder in [ coding.strings.EncodedStringCoder(allows_unicode=False), coding.strings.CharacterArrayCoder(), ]: var = coder.encode(var) - data = coerce_nc3_dtype(var.data) + data = _maybe_prepare_times(var) + data = coerce_nc3_dtype(data) attrs = encode_nc3_attrs(var.attrs) return Variable(var.dims, data, attrs, var.encoding) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index b531dc97d0c..79efbecfb7c 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -25,6 +25,7 @@ from xarray.core.formatting import first_n_items, format_timestamp, last_item from xarray.core.pdcompat import nanosecond_precision_timestamp from xarray.core.pycompat import is_duck_dask_array +from xarray.core.utils import emit_user_level_warning from xarray.core.variable import Variable try: @@ -122,6 +123,18 @@ def _netcdf_to_numpy_timeunit(units: str) -> str: }[units] +def _numpy_to_netcdf_timeunit(units: str) -> str: + return { + "ns": "nanoseconds", + "us": "microseconds", + "ms": "milliseconds", + "s": "seconds", + "m": "minutes", + "h": "hours", + "D": "days", + }[units] + + def _ensure_padded_year(ref_date: str) -> str: # Reference dates without a padded year (e.g. since 1-1-1 or since 2-3-4) # are ambiguous (is it YMD or DMY?). This can lead to some very odd @@ -171,6 +184,20 @@ def _unpack_netcdf_time_units(units: str) -> tuple[str, str]: return delta_units, ref_date +def _unpack_time_units_and_ref_date(units: str) -> tuple[str, pd.Timestamp]: + # same us _unpack_netcdf_time_units but finalizes ref_date for + # processing in encode_cf_datetime + time_units, _ref_date = _unpack_netcdf_time_units(units) + # TODO: the strict enforcement of nanosecond precision Timestamps can be + # relaxed when addressing GitHub issue #7493. + ref_date = nanosecond_precision_timestamp(_ref_date) + # If the ref_date Timestamp is timezone-aware, convert to UTC and + # make it timezone-naive (GH 2649). + if ref_date.tz is not None: + ref_date = ref_date.tz_convert(None) + return time_units, ref_date + + def _decode_cf_datetime_dtype( data, units: str, calendar: str, use_cftime: bool | None ) -> np.dtype: @@ -222,8 +249,8 @@ def _decode_datetime_with_pandas( "pandas." ) - delta, ref_date = _unpack_netcdf_time_units(units) - delta = _netcdf_to_numpy_timeunit(delta) + time_units, ref_date = _unpack_netcdf_time_units(units) + time_units = _netcdf_to_numpy_timeunit(time_units) try: # TODO: the strict enforcement of nanosecond precision Timestamps can be # relaxed when addressing GitHub issue #7493. @@ -237,8 +264,8 @@ def _decode_datetime_with_pandas( warnings.filterwarnings("ignore", "invalid value encountered", RuntimeWarning) if flat_num_dates.size > 0: # avoid size 0 datetimes GH1329 - pd.to_timedelta(flat_num_dates.min(), delta) + ref_date - pd.to_timedelta(flat_num_dates.max(), delta) + ref_date + pd.to_timedelta(flat_num_dates.min(), time_units) + ref_date + pd.to_timedelta(flat_num_dates.max(), time_units) + ref_date # To avoid integer overflow when converting to nanosecond units for integer # dtypes smaller than np.int64 cast all integer and unsigned integer dtype @@ -251,9 +278,12 @@ def _decode_datetime_with_pandas( # Cast input ordinals to integers of nanoseconds because pd.to_timedelta # works much faster when dealing with integers (GH 1399). - flat_num_dates_ns_int = (flat_num_dates * _NS_PER_TIME_DELTA[delta]).astype( - np.int64 - ) + # properly handle NaN/NaT to prevent casting NaN to int + nan = np.isnan(flat_num_dates) | (flat_num_dates == np.iinfo(np.int64).min) + flat_num_dates = flat_num_dates * _NS_PER_TIME_DELTA[time_units] + flat_num_dates_ns_int = np.zeros_like(flat_num_dates, dtype=np.int64) + flat_num_dates_ns_int[nan] = np.iinfo(np.int64).min + flat_num_dates_ns_int[~nan] = flat_num_dates[~nan].astype(np.int64) # Use pd.to_timedelta to safely cast integer values to timedeltas, # and add those to a Timestamp to safely produce a DatetimeIndex. This @@ -364,6 +394,10 @@ def _infer_time_units_from_diff(unique_timedeltas) -> str: return "seconds" +def _time_units_to_timedelta64(units: str) -> np.timedelta64: + return np.timedelta64(1, _netcdf_to_numpy_timeunit(units)).astype("timedelta64[ns]") + + def infer_calendar_name(dates) -> CFCalendar: """Given an array of datetimes, infer the CF calendar name""" if is_np_datetime_like(dates.dtype): @@ -572,9 +606,12 @@ def _should_cftime_be_used( def _cleanup_netcdf_time_units(units: str) -> str: - delta, ref_date = _unpack_netcdf_time_units(units) + time_units, ref_date = _unpack_netcdf_time_units(units) + time_units = time_units.lower() + if not time_units.endswith("s"): + time_units = f"{time_units}s" try: - units = f"{delta} since {format_timestamp(ref_date)}" + units = f"{time_units} since {format_timestamp(ref_date)}" except (OutOfBoundsDatetime, ValueError): # don't worry about reifying the units if they're out of bounds or # formatted badly @@ -633,32 +670,32 @@ def encode_cf_datetime( """ dates = np.asarray(dates) + data_units = infer_datetime_units(dates) + if units is None: - units = infer_datetime_units(dates) + units = data_units else: units = _cleanup_netcdf_time_units(units) if calendar is None: calendar = infer_calendar_name(dates) - delta, _ref_date = _unpack_netcdf_time_units(units) try: if not _is_standard_calendar(calendar) or dates.dtype.kind == "O": # parse with cftime instead raise OutOfBoundsDatetime assert dates.dtype == "datetime64[ns]" - delta_units = _netcdf_to_numpy_timeunit(delta) - time_delta = np.timedelta64(1, delta_units).astype("timedelta64[ns]") - - # TODO: the strict enforcement of nanosecond precision Timestamps can be - # relaxed when addressing GitHub issue #7493. - ref_date = nanosecond_precision_timestamp(_ref_date) + time_units, ref_date = _unpack_time_units_and_ref_date(units) + time_delta = _time_units_to_timedelta64(time_units) - # If the ref_date Timestamp is timezone-aware, convert to UTC and - # make it timezone-naive (GH 2649). - if ref_date.tz is not None: - ref_date = ref_date.tz_convert(None) + # retrieve needed units to faithfully encode to int64 + needed_units, data_ref_date = _unpack_time_units_and_ref_date(data_units) + if data_units != units: + # this accounts for differences in the reference times + ref_delta = abs(data_ref_date - ref_date).to_timedelta64() + if ref_delta > np.timedelta64(0, "ns"): + needed_units = _infer_time_units_from_diff(ref_delta) # Wrap the dates in a DatetimeIndex to do the subtraction to ensure # an OverflowError is raised if the ref_date is too far away from @@ -666,29 +703,60 @@ def encode_cf_datetime( dates_as_index = pd.DatetimeIndex(dates.ravel()) time_deltas = dates_as_index - ref_date - # Use floor division if time_delta evenly divides all differences - # to preserve integer dtype if possible (GH 4045). - if np.all(time_deltas % time_delta == np.timedelta64(0, "ns")): - num = time_deltas // time_delta + # needed time delta to encode faithfully to int64 + needed_time_delta = _time_units_to_timedelta64(needed_units) + if time_delta <= needed_time_delta: + # calculate int64 floor division + # to preserve integer dtype if possible (GH 4045, GH7817). + num = time_deltas // time_delta.astype(np.int64) + num = num.astype(np.int64, copy=False) else: + emit_user_level_warning( + f"Times can't be serialized faithfully with requested units {units!r}. " + f"Resolution of {needed_units!r} needed. " + f"Serializing timeseries to floating point." + ) num = time_deltas / time_delta num = num.values.reshape(dates.shape) except (OutOfBoundsDatetime, OverflowError, ValueError): num = _encode_datetime_with_cftime(dates, units, calendar) + # do it now only for cftime-based flow + # we already covered for this in pandas-based flow + num = cast_to_int_if_safe(num) - num = cast_to_int_if_safe(num) return (num, units, calendar) def encode_cf_timedelta(timedeltas, units: str | None = None) -> tuple[np.ndarray, str]: - if units is None: - units = infer_timedelta_units(timedeltas) + data_units = infer_timedelta_units(timedeltas) - np_unit = _netcdf_to_numpy_timeunit(units) - num = 1.0 * timedeltas / np.timedelta64(1, np_unit) - num = np.where(pd.isnull(timedeltas), np.nan, num) - num = cast_to_int_if_safe(num) + if units is None: + units = data_units + + time_delta = _time_units_to_timedelta64(units) + time_deltas = pd.TimedeltaIndex(timedeltas.ravel()) + + # retrieve needed units to faithfully encode to int64 + needed_units = data_units + if data_units != units: + needed_units = _infer_time_units_from_diff(np.unique(time_deltas.dropna())) + + # needed time delta to encode faithfully to int64 + needed_time_delta = _time_units_to_timedelta64(needed_units) + if time_delta <= needed_time_delta: + # calculate int64 floor division + # to preserve integer dtype if possible + num = time_deltas // time_delta.astype(np.int64) + num = num.astype(np.int64, copy=False) + else: + emit_user_level_warning( + f"Timedeltas can't be serialized faithfully with requested units {units!r}. " + f"Resolution of {needed_units!r} needed. " + f"Serializing timedeltas to floating point." + ) + num = time_deltas / time_delta + num = num.values.reshape(timedeltas.shape) return (num, units) @@ -702,9 +770,10 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: ) or contains_cftime_datetimes(variable): dims, data, attrs, encoding = unpack_for_encoding(variable) - (data, units, calendar) = encode_cf_datetime( - data, encoding.pop("units", None), encoding.pop("calendar", None) - ) + units = encoding.pop("units", None) + calendar = encoding.pop("calendar", None) + (data, units, calendar) = encode_cf_datetime(data, units, calendar) + safe_setitem(attrs, "units", units, name=name) safe_setitem(attrs, "calendar", calendar, name=name) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 58c4739b810..d694c531b15 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -215,6 +215,21 @@ def _apply_mask( return np.where(condition, decoded_fill_value, data) +def _is_time_like(units): + # test for time-like + time_strings = [ + "since", + "days", + "hours", + "minutes", + "seconds", + "milliseconds", + "microseconds", + "nanoseconds", + ] + return any(tstr in str(units) for tstr in time_strings) + + class CFMaskCoder(VariableCoder): """Mask or unmask fill values according to CF conventions.""" @@ -236,19 +251,32 @@ def encode(self, variable: Variable, name: T_Name = None): f"Variable {name!r} has conflicting _FillValue ({fv}) and missing_value ({mv}). Cannot encode data." ) + # special case DateTime to properly handle NaT + is_time_like = _is_time_like(attrs.get("units")) + if fv_exists: # Ensure _FillValue is cast to same dtype as data's encoding["_FillValue"] = dtype.type(fv) fill_value = pop_to(encoding, attrs, "_FillValue", name=name) if not pd.isnull(fill_value): - data = duck_array_ops.fillna(data, fill_value) + if is_time_like and data.dtype.kind in "iu": + data = duck_array_ops.where( + data != np.iinfo(np.int64).min, data, fill_value + ) + else: + data = duck_array_ops.fillna(data, fill_value) if mv_exists: # Ensure missing_value is cast to same dtype as data's encoding["missing_value"] = dtype.type(mv) fill_value = pop_to(encoding, attrs, "missing_value", name=name) if not pd.isnull(fill_value) and not fv_exists: - data = duck_array_ops.fillna(data, fill_value) + if is_time_like and data.dtype.kind in "iu": + data = duck_array_ops.where( + data != np.iinfo(np.int64).min, data, fill_value + ) + else: + data = duck_array_ops.fillna(data, fill_value) return Variable(dims, data, attrs, encoding, fastpath=True) @@ -275,7 +303,11 @@ def decode(self, variable: Variable, name: T_Name = None): stacklevel=3, ) - dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype) + # special case DateTime to properly handle NaT + if _is_time_like(attrs.get("units")) and data.dtype.kind in "iu": + dtype, decoded_fill_value = np.int64, np.iinfo(np.int64).min + else: + dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype) if encoded_fill_values: transform = partial( diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index ea088b36a9a..5bd517098f1 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -54,7 +54,6 @@ from xarray.core.options import set_options from xarray.core.pycompat import array_type from xarray.tests import ( - arm_xfail, assert_allclose, assert_array_equal, assert_equal, @@ -526,7 +525,6 @@ def test_roundtrip_string_encoded_characters(self) -> None: assert_identical(expected, actual) assert actual["x"].encoding["_Encoding"] == "ascii" - @arm_xfail def test_roundtrip_numpy_datetime_data(self) -> None: times = pd.to_datetime(["2000-01-01", "2000-01-02", "NaT"]) expected = Dataset({"t": ("t", times), "t0": times[0]}) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 580de878fe6..079e432b565 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -20,6 +20,7 @@ ) from xarray.coding.times import ( _encode_datetime_with_cftime, + _numpy_to_netcdf_timeunit, _should_cftime_be_used, cftime_to_nptime, decode_cf_datetime, @@ -29,7 +30,7 @@ from xarray.coding.variables import SerializationWarning from xarray.conventions import _update_bounds_attributes, cf_encoder from xarray.core.common import contains_cftime_datetimes -from xarray.testing import assert_equal, assert_identical +from xarray.testing import assert_allclose, assert_equal, assert_identical from xarray.tests import ( FirstElementAccessibleArray, arm_xfail, @@ -110,6 +111,7 @@ def _all_cftime_date_types(): @requires_cftime @pytest.mark.filterwarnings("ignore:Ambiguous reference date string") +@pytest.mark.filterwarnings("ignore:Times can't be serialized faithfully") @pytest.mark.parametrize(["num_dates", "units", "calendar"], _CF_DATETIME_TESTS) def test_cf_datetime(num_dates, units, calendar) -> None: import cftime @@ -567,6 +569,7 @@ def test_infer_cftime_datetime_units(calendar, date_args, expected) -> None: assert expected == coding.times.infer_datetime_units(dates) +@pytest.mark.filterwarnings("ignore:Timedeltas can't be serialized faithfully") @pytest.mark.parametrize( ["timedeltas", "units", "numbers"], [ @@ -576,10 +579,10 @@ def test_infer_cftime_datetime_units(calendar, date_args, expected) -> None: ("1ms", "milliseconds", np.int64(1)), ("1us", "microseconds", np.int64(1)), ("1ns", "nanoseconds", np.int64(1)), - (["NaT", "0s", "1s"], None, [np.nan, 0, 1]), + (["NaT", "0s", "1s"], None, [np.iinfo(np.int64).min, 0, 1]), (["30m", "60m"], "hours", [0.5, 1.0]), - ("NaT", "days", np.nan), - (["NaT", "NaT"], "days", [np.nan, np.nan]), + ("NaT", "days", np.iinfo(np.int64).min), + (["NaT", "NaT"], "days", [np.iinfo(np.int64).min, np.iinfo(np.int64).min]), ], ) def test_cf_timedelta(timedeltas, units, numbers) -> None: @@ -1020,6 +1023,7 @@ def test_decode_ambiguous_time_warns(calendar) -> None: np.testing.assert_array_equal(result, expected) +@pytest.mark.filterwarnings("ignore:Times can't be serialized faithfully") @pytest.mark.parametrize("encoding_units", FREQUENCIES_TO_ENCODING_UNITS.values()) @pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys()) @pytest.mark.parametrize("date_range", [pd.date_range, cftime_range]) @@ -1191,3 +1195,147 @@ def test_contains_cftime_lazy() -> None: ) array = FirstElementAccessibleArray(times) assert _contains_cftime_datetimes(array) + + +@pytest.mark.parametrize( + "timestr, timeunit, dtype, fill_value, use_encoding", + [ + ("1677-09-21T00:12:43.145224193", "ns", np.int64, 20, True), + ("1970-09-21T00:12:44.145224808", "ns", np.float64, 1e30, True), + ( + "1677-09-21T00:12:43.145225216", + "ns", + np.float64, + -9.223372036854776e18, + True, + ), + ("1677-09-21T00:12:43.145224193", "ns", np.int64, None, False), + ("1677-09-21T00:12:43.145225", "us", np.int64, None, False), + ("1970-01-01T00:00:01.000001", "us", np.int64, None, False), + ], +) +def test_roundtrip_datetime64_nanosecond_precision( + timestr: str, + timeunit: str, + dtype: np.typing.DTypeLike, + fill_value: int | float | None, + use_encoding: bool, +) -> None: + # test for GH7817 + time = np.datetime64(timestr, timeunit) + times = [np.datetime64("1970-01-01T00:00:00", timeunit), np.datetime64("NaT"), time] + + if use_encoding: + encoding = dict(dtype=dtype, _FillValue=fill_value) + else: + encoding = {} + + var = Variable(["time"], times, encoding=encoding) + assert var.dtype == np.dtype(" None: + # test warning if times can't be serialized faithfully + times = [ + np.datetime64("1970-01-01T00:01:00", "ns"), + np.datetime64("NaT"), + np.datetime64("1970-01-02T00:01:00", "ns"), + ] + units = "days since 1970-01-10T01:01:00" + needed_units = "hours" + encoding = dict(_FillValue=20, units=units) + var = Variable(["time"], times, encoding=encoding) + wmsg = ( + f"Times can't be serialized faithfully with requested units {units!r}. " + f"Resolution of {needed_units!r} needed. " + ) + with pytest.warns(UserWarning, match=wmsg): + encoded_var = conventions.encode_cf_variable(var) + + decoded_var = conventions.decode_cf_variable("foo", encoded_var) + assert_identical(var, decoded_var) + + +@pytest.mark.parametrize( + "dtype, fill_value", + [(np.int64, 20), (np.int64, np.iinfo(np.int64).min), (np.float64, 1e30)], +) +def test_roundtrip_timedelta64_nanosecond_precision( + dtype: np.typing.DTypeLike, fill_value: int | float +) -> None: + # test for GH7942 + one_day = np.timedelta64(1, "ns") + nat = np.timedelta64("nat", "ns") + timedelta_values = (np.arange(5) * one_day).astype("timedelta64[ns]") + timedelta_values[2] = nat + timedelta_values[4] = nat + + encoding = dict(dtype=dtype, _FillValue=fill_value) + var = Variable(["time"], timedelta_values, encoding=encoding) + + encoded_var = conventions.encode_cf_variable(var) + decoded_var = conventions.decode_cf_variable("foo", encoded_var) + + assert_identical(var, decoded_var) + + +def test_roundtrip_timedelta64_nanosecond_precision_warning() -> None: + # test warning if timedeltas can't be serialized faithfully + one_day = np.timedelta64(1, "D") + nat = np.timedelta64("nat", "ns") + timedelta_values = (np.arange(5) * one_day).astype("timedelta64[ns]") + timedelta_values[2] = nat + timedelta_values[4] = np.timedelta64(12, "h").astype("timedelta64[ns]") + + units = "days" + needed_units = "hours" + wmsg = ( + f"Timedeltas can't be serialized faithfully with requested units {units!r}. " + f"Resolution of {needed_units!r} needed. " + ) + encoding = dict(_FillValue=20, units=units) + var = Variable(["time"], timedelta_values, encoding=encoding) + with pytest.warns(UserWarning, match=wmsg): + encoded_var = conventions.encode_cf_variable(var) + decoded_var = conventions.decode_cf_variable("foo", encoded_var) + assert_allclose(var, decoded_var) + + +def test_roundtrip_float_times() -> None: + fill_value = 20.0 + times = [np.datetime64("2000-01-01 12:00:00", "ns"), np.datetime64("NaT", "ns")] + + units = "days since 2000-01-01" + var = Variable( + ["time"], + times, + encoding=dict(dtype=np.float64, _FillValue=fill_value, units=units), + ) + + encoded_var = conventions.encode_cf_variable(var) + np.testing.assert_array_equal(encoded_var, np.array([0.5, 20.0])) + assert encoded_var.attrs["units"] == units + assert encoded_var.attrs["_FillValue"] == fill_value + + decoded_var = conventions.decode_cf_variable("foo", encoded_var) + assert_identical(var, decoded_var) + assert decoded_var.encoding["units"] == units + assert decoded_var.encoding["_FillValue"] == fill_value