Fix optimize for chunked DataArray

TomAugspurger · TomAugspurger · commit 20195ca2112b · 2020-09-17T15:15:11.000-05:00
Previously we generated in invalidate Dask task graph, becuase the lines removed here dropped keys that were referenced elsewhere in the task graph. The original implementation had a comment indicating that this was to cull: https://github.com/pydata/xarray/blame/502a988ad5b87b9f3aeec3033bf55c71272e1053/xarray/core/variable.py#L384 Just spot-checking things, I think we're OK here though. Something like `dask.visualize(arr[[0]], optimize_graph=True)` indicates that we're OK. Closes #3698
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -84,6 +84,7 @@ Bug fixes
 - Fix `KeyError` when doing linear interpolation to an nd `DataArray`
   that contains NaNs (:pull:`4233`).
   By `Jens Svensmark <https://github.com/jenssss>`_
+- Fix ``dask.optimize`` on ``DataArray`` producing an invalid Dask task graph (:issue:`3698`)
 
 Documentation
 ~~~~~~~~~~~~~
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
@@ -501,9 +501,6 @@ def __dask_postpersist__(self):
 
     @staticmethod
     def _dask_finalize(results, array_func, array_args, dims, attrs, encoding):
-        if isinstance(results, dict):  # persist case
-            name = array_args[0]
-            results = {k: v for k, v in results.items() if k[0] == name}
         data = array_func(results, *array_args)
         return Variable(dims, data, attrs=attrs, encoding=encoding)
 
diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
@@ -1607,3 +1607,10 @@ def test_more_transforms_pass_lazy_array_equiv(map_da, map_ds):
         assert_equal(map_da._from_temp_dataset(map_da._to_temp_dataset()), map_da)
         assert_equal(map_da.astype(map_da.dtype), map_da)
         assert_equal(map_da.transpose("y", "x", transpose_coords=False).cxy, map_da.cxy)
+
+
+def test_optimize():
+    a = dask.array.ones((10, 5), chunks=(1, 3))
+    arr = xr.DataArray(a).chunk(5)
+    (arr2,) = dask.optimize(arr)
+    arr2.compute()