Closed
Description
Just making a new issue for #3598
The tests for #3584 fail on dask == 2.8.1
with this interesting bug. Here's a reproducible example.
import dask
import xarray as xr
ds = xr.Dataset({'x': (('y',), dask.array.ones(10, chunks=(3,)))})
mapped = ds.map_blocks(lambda x: x)
mapped.compute() # works
xr.testing.assert_equal(mapped, ds) # does not work
xr.testing.assert_equal(mapped, ds.compute()) # works
xr.testing.assert_equal(mapped.compute(), ds) # works
xr.testing.assert_equal(mapped.compute(), ds.compute()) # works
The traceback is
~/miniconda3/envs/dcpy/lib/python3.7/site-packages/dask/array/optimization.py in optimize(dsk, keys, fuse_keys, fast_functions, inline_functions_fast_functions, rename_fused_keys, **kwargs)
41 if isinstance(dsk, HighLevelGraph):
42 dsk = optimize_blockwise(dsk, keys=keys)
---> 43 dsk = fuse_roots(dsk, keys=keys)
44
45 # Low level task optimizations
~/miniconda3/envs/dcpy/lib/python3.7/site-packages/dask/blockwise.py in fuse_roots(graph, keys)
819 isinstance(layer, Blockwise)
820 and len(deps) > 1
--> 821 and not any(dependencies[dep] for dep in deps) # no need to fuse if 0 or 1
822 and all(len(dependents[dep]) == 1 for dep in deps)
823 ):
~/miniconda3/envs/dcpy/lib/python3.7/site-packages/dask/blockwise.py in <genexpr>(.0)
819 isinstance(layer, Blockwise)
820 and len(deps) > 1
--> 821 and not any(dependencies[dep] for dep in deps) # no need to fuse if 0 or 1
822 and all(len(dependents[dep]) == 1 for dep in deps)
823 ):
KeyError: 'lambda-6720ab0e3639d5c63fc06dfc66a3ce47-x'
This key is not in dependencies
. From https://github.com/dask/dask/blob/67fb5363009c583c175cb577776a4f2f4e811410/dask/blockwise.py#L816-L826
for name, layer in graph.layers.items():
deps = graph.dependencies[name]
if (
isinstance(layer, Blockwise)
and len(deps) > 1
and not any(dependencies[dep] for dep in deps) # no need to fuse if 0 or 1
and all(len(dependents[dep]) == 1 for dep in deps)
):
new = toolz.merge(layer, *[layers[dep] for dep in deps])
new, _ = fuse(new, keys, ave_width=len(deps))
I'm not sure whether this is a bug in fuse_roots
, HighLevelGraph.from_collections
or in how map_blocks
calls HighLevelGraph.from_collections
here:
xarray/xarray/core/parallel.py
Line 315 in 69c85b8
Metadata
Metadata
Assignees
Labels
No labels