|
1 | 1 | import operator
|
2 | 2 | import pickle
|
| 3 | +import sys |
3 | 4 | from contextlib import suppress
|
4 | 5 | from distutils.version import LooseVersion
|
5 | 6 | from textwrap import dedent
|
|
23 | 24 | raises_regex,
|
24 | 25 | )
|
25 | 26 | from ..core.duck_array_ops import lazy_array_equiv
|
| 27 | +from .test_backends import create_tmp_file |
26 | 28 |
|
27 | 29 | dask = pytest.importorskip("dask")
|
28 | 30 | da = pytest.importorskip("dask.array")
|
29 | 31 | dd = pytest.importorskip("dask.dataframe")
|
30 | 32 |
|
| 33 | +ON_WINDOWS = sys.platform == "win32" |
| 34 | + |
31 | 35 |
|
32 | 36 | class CountingScheduler:
|
33 | 37 | """ Simple dask scheduler counting the number of computes.
|
@@ -1221,3 +1225,57 @@ def test_lazy_array_equiv():
|
1221 | 1225 | "no_conflicts",
|
1222 | 1226 | ]:
|
1223 | 1227 | xr.merge([lons1, lons2], compat=compat)
|
| 1228 | + |
| 1229 | + |
| 1230 | +@pytest.mark.parametrize("obj", [make_da(), make_ds()]) |
| 1231 | +@pytest.mark.parametrize( |
| 1232 | + "transform", |
| 1233 | + [ |
| 1234 | + lambda x: x.reset_coords(), |
| 1235 | + lambda x: x.reset_coords(drop=True), |
| 1236 | + lambda x: x.isel(x=1), |
| 1237 | + lambda x: x.attrs.update(new_attrs=1), |
| 1238 | + lambda x: x.assign_coords(cxy=1), |
| 1239 | + lambda x: x.rename({"x": "xnew"}), |
| 1240 | + lambda x: x.rename({"cxy": "cxynew"}), |
| 1241 | + ], |
| 1242 | +) |
| 1243 | +def test_normalize_token_not_identical(obj, transform): |
| 1244 | + with raise_if_dask_computes(): |
| 1245 | + assert not dask.base.tokenize(obj) == dask.base.tokenize(transform(obj)) |
| 1246 | + assert not dask.base.tokenize(obj.compute()) == dask.base.tokenize( |
| 1247 | + transform(obj.compute()) |
| 1248 | + ) |
| 1249 | + |
| 1250 | + |
| 1251 | +@pytest.mark.parametrize("transform", [lambda x: x, lambda x: x.compute()]) |
| 1252 | +def test_normalize_differently_when_data_changes(transform): |
| 1253 | + obj = transform(make_ds()) |
| 1254 | + new = obj.copy(deep=True) |
| 1255 | + new["a"] *= 2 |
| 1256 | + with raise_if_dask_computes(): |
| 1257 | + assert not dask.base.tokenize(obj) == dask.base.tokenize(new) |
| 1258 | + |
| 1259 | + obj = transform(make_da()) |
| 1260 | + new = obj.copy(deep=True) |
| 1261 | + new *= 2 |
| 1262 | + with raise_if_dask_computes(): |
| 1263 | + assert not dask.base.tokenize(obj) == dask.base.tokenize(new) |
| 1264 | + |
| 1265 | + |
| 1266 | +@pytest.mark.parametrize( |
| 1267 | + "transform", [lambda x: x, lambda x: x.copy(), lambda x: x.copy(deep=True)] |
| 1268 | +) |
| 1269 | +@pytest.mark.parametrize( |
| 1270 | + "obj", [make_da(), make_ds(), make_da().indexes["x"], make_ds().variables["a"]] |
| 1271 | +) |
| 1272 | +def test_normalize_token_identical(obj, transform): |
| 1273 | + with raise_if_dask_computes(): |
| 1274 | + assert dask.base.tokenize(obj) == dask.base.tokenize(transform(obj)) |
| 1275 | + |
| 1276 | + |
| 1277 | +def test_normalize_token_netcdf_backend(map_ds): |
| 1278 | + with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as tmp_file: |
| 1279 | + map_ds.to_netcdf(tmp_file) |
| 1280 | + read = xr.open_dataset(tmp_file) |
| 1281 | + assert not dask.base.tokenize(map_ds) == dask.base.tokenize(read) |
0 commit comments