Skip to content

Commit 5381962

Browse files
authored
Refactor out coarsen tests (#5474)
1 parent 11f74f1 commit 5381962

File tree

4 files changed

+303
-291
lines changed

4 files changed

+303
-291
lines changed

setup.cfg

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@ ignore =
152152
E501 # line too long - let black worry about that
153153
E731 # do not assign a lambda expression, use a def
154154
W503 # line break before binary operator
155+
per-file-ignores =
156+
xarray/tests/*.py:F401,F811
155157
exclude=
156158
.eggs
157159
doc

xarray/tests/test_coarsen.py

Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
import numpy as np
2+
import pandas as pd
3+
import pytest
4+
5+
import xarray as xr
6+
from xarray import DataArray, Dataset, set_options
7+
8+
from . import assert_allclose, assert_equal, has_dask, requires_cftime
9+
from .test_dataarray import da
10+
from .test_dataset import ds
11+
12+
13+
def test_coarsen_absent_dims_error(ds):
14+
with pytest.raises(ValueError, match=r"not found in Dataset."):
15+
ds.coarsen(foo=2)
16+
17+
18+
@pytest.mark.parametrize("dask", [True, False])
19+
@pytest.mark.parametrize(("boundary", "side"), [("trim", "left"), ("pad", "right")])
20+
def test_coarsen_dataset(ds, dask, boundary, side):
21+
if dask and has_dask:
22+
ds = ds.chunk({"x": 4})
23+
24+
actual = ds.coarsen(time=2, x=3, boundary=boundary, side=side).max()
25+
assert_equal(
26+
actual["z1"], ds["z1"].coarsen(x=3, boundary=boundary, side=side).max()
27+
)
28+
# coordinate should be mean by default
29+
assert_equal(
30+
actual["time"], ds["time"].coarsen(time=2, boundary=boundary, side=side).mean()
31+
)
32+
33+
34+
@pytest.mark.parametrize("dask", [True, False])
35+
def test_coarsen_coords(ds, dask):
36+
if dask and has_dask:
37+
ds = ds.chunk({"x": 4})
38+
39+
# check if coord_func works
40+
actual = ds.coarsen(time=2, x=3, boundary="trim", coord_func={"time": "max"}).max()
41+
assert_equal(actual["z1"], ds["z1"].coarsen(x=3, boundary="trim").max())
42+
assert_equal(actual["time"], ds["time"].coarsen(time=2, boundary="trim").max())
43+
44+
# raise if exact
45+
with pytest.raises(ValueError):
46+
ds.coarsen(x=3).mean()
47+
# should be no error
48+
ds.isel(x=slice(0, 3 * (len(ds["x"]) // 3))).coarsen(x=3).mean()
49+
50+
# working test with pd.time
51+
da = xr.DataArray(
52+
np.linspace(0, 365, num=364),
53+
dims="time",
54+
coords={"time": pd.date_range("15/12/1999", periods=364)},
55+
)
56+
actual = da.coarsen(time=2).mean()
57+
58+
59+
@requires_cftime
60+
def test_coarsen_coords_cftime():
61+
times = xr.cftime_range("2000", periods=6)
62+
da = xr.DataArray(range(6), [("time", times)])
63+
actual = da.coarsen(time=3).mean()
64+
expected_times = xr.cftime_range("2000-01-02", freq="3D", periods=2)
65+
np.testing.assert_array_equal(actual.time, expected_times)
66+
67+
68+
@pytest.mark.parametrize(
69+
"funcname, argument",
70+
[
71+
("reduce", (np.mean,)),
72+
("mean", ()),
73+
],
74+
)
75+
def test_coarsen_keep_attrs(funcname, argument):
76+
global_attrs = {"units": "test", "long_name": "testing"}
77+
da_attrs = {"da_attr": "test"}
78+
attrs_coords = {"attrs_coords": "test"}
79+
da_not_coarsend_attrs = {"da_not_coarsend_attr": "test"}
80+
81+
data = np.linspace(10, 15, 100)
82+
coords = np.linspace(1, 10, 100)
83+
84+
ds = Dataset(
85+
data_vars={
86+
"da": ("coord", data, da_attrs),
87+
"da_not_coarsend": ("no_coord", data, da_not_coarsend_attrs),
88+
},
89+
coords={"coord": ("coord", coords, attrs_coords)},
90+
attrs=global_attrs,
91+
)
92+
93+
# attrs are now kept per default
94+
func = getattr(ds.coarsen(dim={"coord": 5}), funcname)
95+
result = func(*argument)
96+
assert result.attrs == global_attrs
97+
assert result.da.attrs == da_attrs
98+
assert result.da_not_coarsend.attrs == da_not_coarsend_attrs
99+
assert result.coord.attrs == attrs_coords
100+
assert result.da.name == "da"
101+
assert result.da_not_coarsend.name == "da_not_coarsend"
102+
103+
# discard attrs
104+
func = getattr(ds.coarsen(dim={"coord": 5}), funcname)
105+
result = func(*argument, keep_attrs=False)
106+
assert result.attrs == {}
107+
assert result.da.attrs == {}
108+
assert result.da_not_coarsend.attrs == {}
109+
assert result.coord.attrs == {}
110+
assert result.da.name == "da"
111+
assert result.da_not_coarsend.name == "da_not_coarsend"
112+
113+
# test discard attrs using global option
114+
func = getattr(ds.coarsen(dim={"coord": 5}), funcname)
115+
with set_options(keep_attrs=False):
116+
result = func(*argument)
117+
118+
assert result.attrs == {}
119+
assert result.da.attrs == {}
120+
assert result.da_not_coarsend.attrs == {}
121+
assert result.coord.attrs == {}
122+
assert result.da.name == "da"
123+
assert result.da_not_coarsend.name == "da_not_coarsend"
124+
125+
# keyword takes precedence over global option
126+
func = getattr(ds.coarsen(dim={"coord": 5}), funcname)
127+
with set_options(keep_attrs=False):
128+
result = func(*argument, keep_attrs=True)
129+
130+
assert result.attrs == global_attrs
131+
assert result.da.attrs == da_attrs
132+
assert result.da_not_coarsend.attrs == da_not_coarsend_attrs
133+
assert result.coord.attrs == attrs_coords
134+
assert result.da.name == "da"
135+
assert result.da_not_coarsend.name == "da_not_coarsend"
136+
137+
func = getattr(ds.coarsen(dim={"coord": 5}), funcname)
138+
with set_options(keep_attrs=True):
139+
result = func(*argument, keep_attrs=False)
140+
141+
assert result.attrs == {}
142+
assert result.da.attrs == {}
143+
assert result.da_not_coarsend.attrs == {}
144+
assert result.coord.attrs == {}
145+
assert result.da.name == "da"
146+
assert result.da_not_coarsend.name == "da_not_coarsend"
147+
148+
149+
def test_coarsen_keep_attrs_deprecated():
150+
global_attrs = {"units": "test", "long_name": "testing"}
151+
attrs_da = {"da_attr": "test"}
152+
153+
data = np.linspace(10, 15, 100)
154+
coords = np.linspace(1, 10, 100)
155+
156+
ds = Dataset(
157+
data_vars={"da": ("coord", data)},
158+
coords={"coord": coords},
159+
attrs=global_attrs,
160+
)
161+
ds.da.attrs = attrs_da
162+
163+
# deprecated option
164+
with pytest.warns(
165+
FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated"
166+
):
167+
result = ds.coarsen(dim={"coord": 5}, keep_attrs=False).mean()
168+
169+
assert result.attrs == {}
170+
assert result.da.attrs == {}
171+
172+
# the keep_attrs in the reduction function takes precedence
173+
with pytest.warns(
174+
FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated"
175+
):
176+
result = ds.coarsen(dim={"coord": 5}, keep_attrs=True).mean(keep_attrs=False)
177+
178+
assert result.attrs == {}
179+
assert result.da.attrs == {}
180+
181+
182+
@pytest.mark.slow
183+
@pytest.mark.parametrize("ds", (1, 2), indirect=True)
184+
@pytest.mark.parametrize("window", (1, 2, 3, 4))
185+
@pytest.mark.parametrize("name", ("sum", "mean", "std", "var", "min", "max", "median"))
186+
def test_coarsen_reduce(ds, window, name):
187+
# Use boundary="trim" to accomodate all window sizes used in tests
188+
coarsen_obj = ds.coarsen(time=window, boundary="trim")
189+
190+
# add nan prefix to numpy methods to get similar behavior as bottleneck
191+
actual = coarsen_obj.reduce(getattr(np, f"nan{name}"))
192+
expected = getattr(coarsen_obj, name)()
193+
assert_allclose(actual, expected)
194+
195+
# make sure the order of data_var are not changed.
196+
assert list(ds.data_vars.keys()) == list(actual.data_vars.keys())
197+
198+
# Make sure the dimension order is restored
199+
for key, src_var in ds.data_vars.items():
200+
assert src_var.dims == actual[key].dims
201+
202+
203+
@pytest.mark.parametrize(
204+
"funcname, argument",
205+
[
206+
("reduce", (np.mean,)),
207+
("mean", ()),
208+
],
209+
)
210+
def test_coarsen_da_keep_attrs(funcname, argument):
211+
attrs_da = {"da_attr": "test"}
212+
attrs_coords = {"attrs_coords": "test"}
213+
214+
data = np.linspace(10, 15, 100)
215+
coords = np.linspace(1, 10, 100)
216+
217+
da = DataArray(
218+
data,
219+
dims=("coord"),
220+
coords={"coord": ("coord", coords, attrs_coords)},
221+
attrs=attrs_da,
222+
name="name",
223+
)
224+
225+
# attrs are now kept per default
226+
func = getattr(da.coarsen(dim={"coord": 5}), funcname)
227+
result = func(*argument)
228+
assert result.attrs == attrs_da
229+
da.coord.attrs == attrs_coords
230+
assert result.name == "name"
231+
232+
# discard attrs
233+
func = getattr(da.coarsen(dim={"coord": 5}), funcname)
234+
result = func(*argument, keep_attrs=False)
235+
assert result.attrs == {}
236+
da.coord.attrs == {}
237+
assert result.name == "name"
238+
239+
# test discard attrs using global option
240+
func = getattr(da.coarsen(dim={"coord": 5}), funcname)
241+
with set_options(keep_attrs=False):
242+
result = func(*argument)
243+
assert result.attrs == {}
244+
da.coord.attrs == {}
245+
assert result.name == "name"
246+
247+
# keyword takes precedence over global option
248+
func = getattr(da.coarsen(dim={"coord": 5}), funcname)
249+
with set_options(keep_attrs=False):
250+
result = func(*argument, keep_attrs=True)
251+
assert result.attrs == attrs_da
252+
da.coord.attrs == {}
253+
assert result.name == "name"
254+
255+
func = getattr(da.coarsen(dim={"coord": 5}), funcname)
256+
with set_options(keep_attrs=True):
257+
result = func(*argument, keep_attrs=False)
258+
assert result.attrs == {}
259+
da.coord.attrs == {}
260+
assert result.name == "name"
261+
262+
263+
def test_coarsen_da_keep_attrs_deprecated():
264+
attrs_da = {"da_attr": "test"}
265+
266+
data = np.linspace(10, 15, 100)
267+
coords = np.linspace(1, 10, 100)
268+
269+
da = DataArray(data, dims=("coord"), coords={"coord": coords}, attrs=attrs_da)
270+
271+
# deprecated option
272+
with pytest.warns(
273+
FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated"
274+
):
275+
result = da.coarsen(dim={"coord": 5}, keep_attrs=False).mean()
276+
277+
assert result.attrs == {}
278+
279+
# the keep_attrs in the reduction function takes precedence
280+
with pytest.warns(
281+
FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated"
282+
):
283+
result = da.coarsen(dim={"coord": 5}, keep_attrs=True).mean(keep_attrs=False)
284+
285+
assert result.attrs == {}
286+
287+
288+
@pytest.mark.parametrize("da", (1, 2), indirect=True)
289+
@pytest.mark.parametrize("window", (1, 2, 3, 4))
290+
@pytest.mark.parametrize("name", ("sum", "mean", "std", "max"))
291+
def test_coarsen_da_reduce(da, window, name):
292+
if da.isnull().sum() > 1 and window == 1:
293+
pytest.skip("These parameters lead to all-NaN slices")
294+
295+
# Use boundary="trim" to accomodate all window sizes used in tests
296+
coarsen_obj = da.coarsen(time=window, boundary="trim")
297+
298+
# add nan prefix to numpy methods to get similar # behavior as bottleneck
299+
actual = coarsen_obj.reduce(getattr(np, f"nan{name}"))
300+
expected = getattr(coarsen_obj, name)()
301+
assert_allclose(actual, expected)

0 commit comments

Comments
 (0)