From 45d25b9fac7598ba824b5aeaa058aa732bd40126 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Mon, 22 Jul 2024 12:26:13 -0500 Subject: [PATCH] Restore ability to specify _FillValue as Python native integers (#9258) --- xarray/coding/variables.py | 16 +++++++++++++--- xarray/tests/test_backends.py | 4 ++-- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index d19f285d2b9..b9343bb0f0a 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -520,9 +520,19 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: # trying to get it from encoding, resort to an int with the same precision as data.dtype if not available signed_dtype = np.dtype(encoding.get("dtype", f"i{data.dtype.itemsize}")) if "_FillValue" in attrs: - new_fill = np.array(attrs["_FillValue"]) - # use view here to prevent OverflowError - attrs["_FillValue"] = new_fill.view(signed_dtype).item() + try: + # user provided the on-disk signed fill + new_fill = signed_dtype.type(attrs["_FillValue"]) + except OverflowError: + # user provided the in-memory unsigned fill, convert to signed type + unsigned_dtype = np.dtype(f"u{signed_dtype.itemsize}") + # use view here to prevent OverflowError + new_fill = ( + np.array(attrs["_FillValue"], dtype=unsigned_dtype) + .view(signed_dtype) + .item() + ) + attrs["_FillValue"] = new_fill data = duck_array_ops.astype(duck_array_ops.around(data), signed_dtype) return Variable(dims, data, attrs, encoding, fastpath=True) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 152a9ec40e9..a80f9d2c692 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -166,7 +166,7 @@ def create_encoded_masked_and_scaled_data(dtype: np.dtype) -> Dataset: def create_unsigned_masked_scaled_data(dtype: np.dtype) -> Dataset: encoding = { - "_FillValue": np.int8(-1), + "_FillValue": 255, "_Unsigned": "true", "dtype": "i1", "add_offset": dtype.type(10), @@ -925,7 +925,7 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn, dtype) -> None: assert decoded.variables[k].dtype == actual.variables[k].dtype assert_allclose(decoded, actual, decode_bytes=False) - @pytest.mark.parametrize("fillvalue", [np.int8(-1), np.uint8(255)]) + @pytest.mark.parametrize("fillvalue", [np.int8(-1), np.uint8(255), -1, 255]) def test_roundtrip_unsigned(self, fillvalue): # regression/numpy2 test for encoding = {