pydata · shoyer · Jul 28, 2016 · May 23, 2016 · Jul 5, 2016 · Jul 6, 2016
diff --git a/doc/api.rst b/doc/api.rst
@@ -17,6 +17,7 @@ Top-level functions
    align
    broadcast
    concat
+   merge
    set_options
 
 Dataset

diff --git a/doc/combining.rst b/doc/combining.rst
@@ -75,14 +75,15 @@ expensive if you are manipulating your dataset lazily using :ref:`dask`.
 Merge
 ~~~~~
 
-To combine variables and coordinates between multiple Datasets, you can use the
-:py:meth:`~xarray.Dataset.merge` and :py:meth:`~xarray.Dataset.update` methods.
-Merge checks for conflicting variables before merging and by default it returns
-a new Dataset:
+To combine variables and coordinates between multiple ``DataArray`` and/or
+``Dataset`` object, use :py:func:`~xarray.merge`. It can merge a list of
+``Dataset``, ``DataArray`` or dictionaries of objects convertible to
+``DataArray`` objects:
 
 .. ipython:: python
 
-    ds.merge({'hello': ('space', np.arange(3) + 10)})
+    xr.merge([ds, ds.rename({'foo': 'bar'})])
+    xr.merge([xr.DataArray(n, name='var%d' % n) for n in range(5)])
 
 If you merge another dataset (or a dictionary including data array objects), by
 default the resulting dataset will be aligned on the **union** of all index
@@ -91,9 +92,22 @@ coordinates:
 .. ipython:: python
 
     other = xr.Dataset({'bar': ('x', [1, 2, 3, 4]), 'x': list('abcd')})
-    ds.merge(other)
-
-This ensures that the ``merge`` is non-destructive.
+    xr.merge([ds, other])
+
+This ensures that ``merge`` is non-destructive. ``xarray.MergeError`` is raised
+if you attempt to merge two variables with the same name but different values:
+
+.. ipython::
+
+    @verbatim
+    In [1]: xr.merge([ds, ds + 1])
+    MergeError: conflicting values for variable 'foo' on objects to be combined:
+    first value: <xarray.Variable (x: 2, y: 3)>
+    array([[ 0.4691123 , -0.28286334, -1.5090585 ],
+           [-1.13563237,  1.21211203, -0.17321465]])
+    second value: <xarray.Variable (x: 2, y: 3)>
+    array([[ 1.4691123 ,  0.71713666, -0.5090585 ],
+           [-0.13563237,  2.21211203,  0.82678535]])
 
 The same non-destructive merging between ``DataArray`` index coordinates is
 used in the :py:class:`~xarray.Dataset` constructor:

diff --git a/doc/internals.rst b/doc/internals.rst
@@ -81,7 +81,7 @@ xarray:
 This achieves the same result as if the ``Dataset`` class had a cached property
 defined that returns an instance of your class:
 
-.. python::
+.. code-block:: python
 
   class Dataset:
       ...

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -44,6 +44,10 @@ Enhancements
   option that clips coordinate elements that are fully masked.  By
   `Phillip J. Wolfram <https://github.com/pwolfram>`_.
 
+- New top level :py:func:`merge` function allows for combining variables from
+  any number of ``Dataset`` and/or ``DataArray`` variables. See :ref:`merge`
+  for more details. By `Stephan Hoyer <https://github.com/shoyer>`_.
+
 - DataArray and Dataset method :py:meth:`resample` now supports the
   ``keep_attrs=False`` option that determines whether variable and dataset
   attributes are retained in the resampled object. By

diff --git a/xarray/__init__.py b/xarray/__init__.py
@@ -5,6 +5,7 @@
 from .core.variable import Variable, Coordinate
 from .core.dataset import Dataset
 from .core.dataarray import DataArray
+from .core.merge import merge, MergeError
 from .core.options import set_options
 
 from .backends.api import open_dataset, open_mfdataset, save_mfdataset

diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
@@ -8,7 +8,7 @@
 from . import ops, utils
 from .common import _maybe_promote
 from .pycompat import iteritems, OrderedDict
-from .utils import is_full_slice
+from .utils import is_full_slice, is_dict_like
 from .variable import Variable, Coordinate, broadcast_variables
 
 
@@ -77,13 +77,33 @@ def align(*objects, **kwargs):
     aligned : same as *objects
         Tuple of objects with aligned coordinates.
     """
+    return partial_align(*objects, exclude=None, **kwargs)
+
+
+def partial_align(*objects, **kwargs):
+    """partial_align(*objects, join='inner', copy=True, indexes=None,
+                     exclude=set())
+
+    Like align, but don't align along dimensions in exclude. Any indexes
+    explicitly provided with the `indexes` argument should be used in preference
+    to the aligned indexes.
+
+    Not public API.
+    """
     join = kwargs.pop('join', 'inner')
     copy = kwargs.pop('copy', True)
+    indexes = kwargs.pop('indexes', None)
+    exclude = kwargs.pop('exclude', None)
+    if exclude is None:
+        exclude = set()
     if kwargs:
         raise TypeError('align() got unexpected keyword arguments: %s'
                         % list(kwargs))
 
-    joined_indexes = _join_indexes(join, objects)
+    joined_indexes = _join_indexes(join, objects, exclude=exclude)
+    if indexes is not None:
+        joined_indexes.update(indexes)
+
     result = []
     for obj in objects:
         valid_indexers = dict((k, v) for k, v in joined_indexes.items()
@@ -92,36 +112,52 @@ def align(*objects, **kwargs):
     return tuple(result)
 
 
-def partial_align(*objects, **kwargs):
-    """partial_align(*objects, join='inner', copy=True, exclude=set()
+def is_alignable(obj):
+    return hasattr(obj, 'indexes') and hasattr(obj, 'reindex')
+
 
-    Like align, but don't align along dimensions in exclude. Not public API.
+def deep_align(list_of_variable_maps, join='outer', copy=True, indexes=None):
+    """Align objects, recursing into dictionary values.
     """
-    join = kwargs.pop('join', 'inner')
-    copy = kwargs.pop('copy', True)
-    exclude = kwargs.pop('exclude', set())
-    assert not kwargs
-    joined_indexes = _join_indexes(join, objects, exclude=exclude)
-    return tuple(obj.reindex(copy=copy, **joined_indexes) for obj in objects)
+    if indexes is None:
+        indexes = {}
+
+    # We use keys to identify arguments to align. Integers indicate single
+    # arguments, while (int, variable_name) pairs indicate variables in ordered
+    # dictionaries.
+    keys = []
+    out = []
+    targets = []
+    sentinel = object()
+    for n, variables in enumerate(list_of_variable_maps):
+        if is_alignable(variables):
+            keys.append(n)
+            targets.append(variables)
+            out.append(sentinel)
+        elif is_dict_like(variables):
+            for k, v in variables.items():
+                if is_alignable(v) and k not in indexes:
+                    # don't align dict-like variables that are already fixed
+                    # indexes: we might be overwriting these index variables
+                    keys.append((n, k))
+                    targets.append(v)
+            out.append(OrderedDict(variables))
+        else:
+            out.append(variables)
 
+    aligned = partial_align(*targets, join=join, copy=copy, indexes=indexes)
 
-def align_variables(variables, join='outer', copy=False):
-    """Align all DataArrays in the provided dict, leaving other values alone.
-    """
-    from .dataarray import DataArray
-    from pandas import Series, DataFrame, Panel
-
-    new_variables = OrderedDict(variables)
-    # if an item is a Series / DataFrame / Panel, try and wrap it in a DataArray constructor
-    new_variables.update((
-        (k, DataArray(v)) for k, v in variables.items()
-        if isinstance(v, (Series, DataFrame, Panel))
-    ))
-
-    alignable = [k for k, v in new_variables.items() if hasattr(v, 'indexes')]
-    aligned = align(*[new_variables[a] for a in alignable], join=join, copy=copy)
-    new_variables.update(zip(alignable, aligned))
-    return new_variables
+    for key, aligned_obj in zip(keys, aligned):
+        if isinstance(key, tuple):
+            n, k = key
+            out[n][k] = aligned_obj
+        else:
+            out[key] = aligned_obj
+
+    # something went wrong: we should have replaced all sentinel values
+    assert all(arg is not sentinel for arg in out)
+
+    return out
 
 
 def reindex_variables(variables, indexes, indexers, method=None,

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
@@ -3,7 +3,8 @@
 import pandas as pd
 
 from . import utils
-from .pycompat import iteritems, reduce, OrderedDict, basestring
+from .merge import merge
+from .pycompat import iteritems, OrderedDict, basestring
 from .variable import Variable, as_variable, Coordinate, concat as concat_vars
 
 
@@ -69,6 +70,7 @@ def concat(objs, dim=None, data_vars='all', coords='different',
 
     See also
     --------
+    merge
     auto_combine
     """
     # TODO: add join and ignore_index arguments copied from pandas.concat
@@ -204,6 +206,7 @@ def _dataset_concat(datasets, dim, data_vars, coords, compat, positions):
     # list; the gains would be minimal
     datasets = [as_dataset(ds) for ds in datasets]
     dim, coord = _calc_concat_dim_coord(dim)
+
     concat_over = _calc_concat_over(datasets, dim, data_vars, coords)
 
     def insert_result_variable(k, v):
@@ -217,7 +220,6 @@ def insert_result_variable(k, v):
     result_coord_names = set(datasets[0].coords)
     result_attrs = datasets[0].attrs
 
-    # Dataset({}, attrs=datasets[0].attrs)
     for k, v in datasets[0].variables.items():
         if k not in concat_over:
             insert_result_variable(k, v)
@@ -374,5 +376,5 @@ def auto_combine(datasets, concat_dim=None):
     grouped = itertoolz.groupby(lambda ds: tuple(sorted(ds.data_vars)),
                                 datasets).values()
     concatenated = [_auto_concat(ds, dim=concat_dim) for ds in grouped]
-    merged = reduce(lambda ds, other: ds.merge(other), concatenated)
+    merged = merge(concatenated)
     return merged
-Original file line number
+Diff line change
@@ Expand Up / @@ -17,6 +17,7 @@ Top-level functions @@
        align
        broadcast
        concat
+       merge
        set_options
     Dataset
@@ Expand Down @@