pydata · shoyer · Jan 23, 2017 · Jan 10, 2017 · Jan 11, 2017 · Jan 11, 2017
diff --git a/doc/combining.rst b/doc/combining.rst
@@ -13,6 +13,7 @@ Combining data
 
 * For combining datasets or data arrays along a dimension, see concatenate_.
 * For combining datasets with different variables, see merge_.
+# For combining datasets or data arrays with outer-join alignment, see combine_.
 
 .. _concatenate:
 
@@ -116,6 +117,38 @@ used in the :py:class:`~xarray.Dataset` constructor:
 
     xr.Dataset({'a': arr[:-1], 'b': arr[1:]})
 
+.. _combine:
+
+Combine
+~~~~~~~
+
+The instance method ``combine_first`` combines two datasets/data arrays and
+defaults to non-null values in the calling object, using values from the called
+object to fill holes.  The resulting coordinates are the union of coordinate labels.
+Vacant cells as a result of the outer-join are filled with nan.
+
+Mimics the behavior of ``pandas.Dataframe.combine_first``
+
+For data array,
+
+.. ipython:: python
+
+    ar0 = DataArray([[0, 0], [0, 0]], [('x', ['a', 'b']), ('y', [-1, 0])])
+    ar1 = DataArray([[1, 1], [1, 1]], [('x', ['b', 'c']), ('y', [0, 1])])
+    ar2 = DataArray([2], [('x', ['d'])])
+    ar0.combine_first(ar1)
+    ar1.combine_first(ar0)
+    ar0.combine_first(ar2)
+
+For datasets, ``ds0.combine_first(ds1)`` works just like ``xr.merge([ds0, ds1])``
+
+.. ipython:: python
+
+    dsx0 = DataArray([0, 0], [('x', ['a', 'b'])]).to_dataset(name='dsx0')
+    dsx1 = DataArray([1, 1], [('x', ['b', 'c'])]).to_dataset(name='dsx1')
+    dsx0.combine_first(dsx1)
+    xr.merge([dsx1, dsx0])
+
 .. _update:
 
 Update

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -106,6 +106,13 @@ Deprecations
 
 Enhancements
 ~~~~~~~~~~~~
+
+- Added the xarray equivalent of `pandas.Dataframe.combine_first` as an instance
+  method to DataArray/Dataset objects, facilitated by the new `ops.fillna` that
+  uses `apply_ufunc` for different `join` options.
+  (see :ref:`combine`)
+  By `Chun-Wei Yuan <https://github.com/chunweiyuan>`_.
+
 - Added the ability to change default automatic alignment (arithmetic_join="inner")
   for binary operations via :py:func:`~xarray.set_options()`
   (see :ref:`automatic alignment`).

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
@@ -343,6 +343,7 @@ def apply_dataset_ufunc(func, *args, **kwargs):
     list_of_coords = build_output_coords(args, signature, exclude_dims)
 
     args = [getattr(arg, 'data_vars', arg) for arg in args]
+
     result_vars = apply_dict_of_variables_ufunc(
         func, *args, signature=signature, join=join, fill_value=fill_value)
 

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -1075,7 +1075,7 @@ def dropna(self, dim, how='any', thresh=None):
         ds = self._to_temp_dataset().dropna(dim, how=how, thresh=thresh)
         return self._from_temp_dataset(ds)
 
-    def fillna(self, value):
+    def fillna(self, value, join="left"):
         """Fill missing values in this object.
 
         This operation follows the normal broadcasting and alignment rules that
@@ -1097,10 +1097,28 @@ def fillna(self, value):
         if utils.is_dict_like(value):
             raise TypeError('cannot provide fill value as a dictionary with '
                             'fillna on a DataArray')
-        out = self._fillna(value)
+        out = ops.fillna(self, value, join=join)
         out.attrs = self.attrs
         return out
 
+    def combine_first(self, other):
+        """Combine two DataArray objects, with union of coordinates.
+
+        This operation follows the normal broadcasting and alignment rules of
+        ``join='outer'``.  Default to non-null values of array calling the
+        method.  Use np.nan to fill in vacant cells after alignment.
+
+        Parameters
+        ----------
+        other : DataArray
+            Used to fill all matching missing values in this array.
+
+        Returns
+        -------
+        DataArray
+        """
+        return self.fillna(other, join="outer")
+
     def reduce(self, func, dim=None, axis=None, keep_attrs=False, **kwargs):
         """Reduce this array by applying `func` along some dimension(s).
 

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -1955,7 +1955,35 @@ def fillna(self, value):
         -------
         Dataset
         """
-        out = self._fillna(value)
+        if utils.is_dict_like(value):
+            value_keys = value.data_vars.keys() if isinstance(value, Dataset)\
+                else value.keys()
+            if not set(value_keys) <= set(self.data_vars.keys()):
+                raise ValueError('all variables in the argument to `fillna` '
+                                 'must be contained in the original dataset')
+        out = ops.fillna(self, value, join="left")
+        out._copy_attrs_from(self)
+        return out
+
+    def combine_first(self, other):
+        """Combine two Datasets, default to data_vars of self.
+
+        The new coordinates follow the normal broadcasting and alignment rules
+        of ``join='outer'``.  Vacant cells in the expanded coordinates are
+        filled with np.nan.
+
+        Renders the same result as xr.merge([self, other]).
+
+        Parameters
+        ----------
+        other : DataArray
+            Used to fill all matching missing values in this array.
+
+        Returns
+        -------
+        DataArray
+        """
+        out = ops.fillna(self, other, join="outer")
         out._copy_attrs_from(self)
         return out
 

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
@@ -382,7 +382,15 @@ def fillna(self, value):
         Dataset.fillna
         DataArray.fillna
         """
-        return self._fillna(value)
+        def _yield_applied(this, other):
+            """apply fillna to each individual groupby ds"""
+            for group_value, obj in this:
+                other_sel = other.sel(**{this._group.name: group_value})
+                yield obj.fillna(other_sel)
+
+        datasets = _yield_applied(self, value)
+        combined = self._combine(datasets)
+        return combined
 
     def where(self, cond):
         """Return an object of the same shape with all entries where cond is

diff --git a/xarray/core/ops.py b/xarray/core/ops.py
@@ -279,11 +279,18 @@ def count(data, axis=None):
     return sum(~isnull(data), axis=axis)
 
 
-def fillna(data, other):
+def fillna(data, other, join="left"):
     """Fill missing values in this object with data from the other object.
     Follows normal broadcasting and alignment rules.
     """
-    return where(isnull(data), other, data)
+    from .computation import apply_ufunc
+
+    def _fillna(data, other):
+        left, right = np.broadcast_arrays(data, other)
+        result = left.copy()  # view must be copied before being written
+        result[isnull(result)] = right[isnull(result)]
+        return result
+    return apply_ufunc(_fillna, data, other, join=join)
 
 
 def where_method(data, cond, other=np.nan):
@@ -445,11 +452,6 @@ def inject_binary_ops(cls, inplace=False):
     for name, f in [('eq', array_eq), ('ne', array_ne)]:
         setattr(cls, op_str(name), cls._binary_op(f))
 
-    # patch in fillna
-    f = _func_slash_method_wrapper(fillna)
-    method = cls._binary_op(f, join='left', fillna=True)
-    setattr(cls, '_fillna', method)
-
     # patch in where
     f = _func_slash_method_wrapper(where_method, 'where')
     setattr(cls, '_where', cls._binary_op(f))

diff --git a/xarray/core/variable.py b/xarray/core/variable.py
@@ -855,7 +855,7 @@ def unstack(self, **dimensions):
         return result
 
     def fillna(self, value):
-        return self._fillna(value)
+        return ops.fillna(self, value)
 
     def where(self, cond):
         return self._where(cond)

diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py
@@ -2370,6 +2370,27 @@ def test_binary_op_join_setting(self):
         expected = xr.DataArray([np.nan, 2, 4, np.nan], [(dim, [0, 1, 2, 3])])
         self.assertDataArrayEqual(actual, expected)
 
+    def test_combine_first(self):
+        ar0 = DataArray([[0, 0], [0, 0]], [('x', ['a', 'b']), ('y', [-1, 0])])
+        ar1 = DataArray([[1, 1], [1, 1]], [('x', ['b', 'c']), ('y', [0, 1])])
+        ar2 = DataArray([2], [('x', ['d'])])
+
+        actual = ar0.combine_first(ar1)
+        expected = DataArray([[0, 0, np.nan], [0, 0, 1], [np.nan, 1, 1]],
+                             [('x', ['a', 'b', 'c']), ('y', [-1, 0, 1])])
+        self.assertDataArrayEqual(actual, expected)
+
+        actual = ar1.combine_first(ar0)
+        expected = DataArray([[0, 0, np.nan], [0, 1, 1], [np.nan, 1, 1]],
+                             [('x', ['a', 'b', 'c']), ('y', [-1, 0, 1])])
+        self.assertDataArrayEqual(actual, expected)
+
+        actual = ar0.combine_first(ar2)
+        expected = DataArray([[0, 0], [0, 0], [2, 2]],
+                             [('x', ['a', 'b', 'd']), ('y', [-1, 0])])
+        self.assertDataArrayEqual(actual, expected)
+
+
 
 @pytest.fixture(params=[1])
 def da(request):

diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py
@@ -3132,17 +3132,17 @@ def test_filter_by_attrs(self):
 
     def test_binary_op_join_setting(self):
         # arithmetic_join applies to data array coordinates
-        missing_2 = xr.Dataset({'x':[0, 1]})
-        missing_0 = xr.Dataset({'x':[1, 2]})
+        missing_2 = xr.Dataset({'x': [0, 1]})
+        missing_0 = xr.Dataset({'x': [1, 2]})
         with xr.set_options(arithmetic_join='outer'):
             actual = missing_2 + missing_0
-        expected = xr.Dataset({'x':[0, 1, 2]})
+        expected = xr.Dataset({'x': [0, 1, 2]})
         self.assertDatasetEqual(actual, expected)
 
         # arithmetic join also applies to data_vars
         ds1 = xr.Dataset({'foo': 1, 'bar': 2})
         ds2 = xr.Dataset({'bar': 2, 'baz': 3})
-        expected = xr.Dataset({'bar': 4}) # default is inner joining
+        expected = xr.Dataset({'bar': 4})  # default is inner joining
         actual = ds1 + ds2
         self.assertDatasetEqual(actual, expected)
 
@@ -3165,7 +3165,7 @@ def test_full_like(self):
         # For more thorough tests, see test_variable.py
         # Note: testing data_vars with mismatched dtypes
         ds = Dataset({
-            'd1': DataArray([1,2,3], dims=['x'], coords={'x': [10,20,30]}),
+            'd1': DataArray([1,2,3], dims=['x'], coords={'x': [10, 20, 30]}),
             'd2': DataArray([1.1, 2.2, 3.3], dims=['y'])
         }, attrs={'foo': 'bar'})
         actual = full_like(ds, 2)
@@ -3186,6 +3186,23 @@ def test_full_like(self):
         self.assertEqual(expect['d2'].dtype, bool)
         self.assertDatasetIdentical(expect, actual)
 
+    def test_combine_first(self):  # works just like xr.merge([self, other])
+        dsx0 = DataArray([0, 0], [('x', ['a', 'b'])]).to_dataset(name='dsx0')
+        dsx1 = DataArray([1, 1], [('x', ['b', 'c'])]).to_dataset(name='dsx1')
+
+        actual = dsx0.combine_first(dsx1)
+        expected = Dataset({'dsx0': ('x', [0, 0, np.nan]),
+                            'dsx1': ('x', [np.nan, 1, 1])},
+                           coords={'x': ['a', 'b', 'c']})
+        self.assertDatasetEqual(actual, expected)
+        self.assertDatasetEqual(actual, xr.merge([dsx0, dsx1]))
+
+        dsy2 = DataArray([2, 2, 2], [('x', ['b', 'c', 'd'])]).\
+            to_dataset(name='dsy2')
+        actual = dsx0.combine_first(dsy2)
+        expected = xr.merge([dsy2, dsx0])
+        self.assertDatasetEqual(actual, expected)
+
 ### Py.test tests