diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a9fb3d699ff..502120cd5dc 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -11,7 +11,7 @@ repos:
     hooks:
       - id: flake8
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.720  # Must match ci/requirements/*.yml
+    rev: v0.730  # Must match ci/requirements/*.yml
     hooks:
       - id: mypy
   # run these occasionally, ref discussion https://github.com/pydata/xarray/pull/3194
diff --git a/asv_bench/benchmarks/__init__.py b/asv_bench/benchmarks/__init__.py
index 1ffd3afa4ae..b0adb2feafd 100644
--- a/asv_bench/benchmarks/__init__.py
+++ b/asv_bench/benchmarks/__init__.py
@@ -18,7 +18,7 @@ def requires_dask():
     try:
         import dask  # noqa: F401
     except ImportError:
-        raise NotImplementedError
+        raise NotImplementedError()
 
 
 def randn(shape, frac_nan=None, chunks=None, seed=0):
diff --git a/asv_bench/benchmarks/dataset_io.py b/asv_bench/benchmarks/dataset_io.py
index c1567d0b513..d1ffbc34706 100644
--- a/asv_bench/benchmarks/dataset_io.py
+++ b/asv_bench/benchmarks/dataset_io.py
@@ -458,7 +458,7 @@ def setup(self):
         try:
             import distributed
         except ImportError:
-            raise NotImplementedError
+            raise NotImplementedError()
         self.client = distributed.Client()
         self.write = create_delayed_write()
 
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index d023aa317c7..c7f9de73cf4 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -8,8 +8,8 @@ jobs:
 - job: Linux
   strategy:
     matrix:
-      py35-bare-minimum:
-        conda_env: py35-bare-minimum
+      py36-bare-minimum:
+        conda_env: py36-bare-minimum
       py36-min-all-deps:
         conda_env: py36-min-all-deps
       py36-min-nep18:
@@ -82,13 +82,29 @@ jobs:
       mypy .
     displayName: mypy type checks
 
+- job: MinimumVersionsPolicy
+  pool:
+    vmImage: 'ubuntu-16.04'
+  steps:
+  - template: ci/azure/add-conda-to-path.yml
+  - bash: |
+      conda install -y pyyaml
+      python ci/min_deps_check.py ci/requirements/py36-bare-minimum.yml
+      python ci/min_deps_check.py ci/requirements/py36-min-all-deps.yml
+    displayName: minimum versions policy
+
 - job: Docs
   pool:
     vmImage: 'ubuntu-16.04'
   steps:
   - template: ci/azure/install.yml
     parameters:
-      env_file: doc/environment.yml
+      env_file: ci/requirements/doc.yml
+  - bash: |
+      source activate xarray-tests
+      # Replicate the exact environment created by the readthedocs CI
+      conda install --yes --quiet -c pkgs/main mock pillow sphinx sphinx_rtd_theme
+    displayName: Replicate readthedocs CI environment
   - bash: |
       source activate xarray-tests
       cd doc
diff --git a/ci/min_deps_check.py b/ci/min_deps_check.py
new file mode 100755
index 00000000000..3bdd48ca76d
--- /dev/null
+++ b/ci/min_deps_check.py
@@ -0,0 +1,187 @@
+"""Fetch from conda database all available versions of the xarray dependencies and their
+publication date. Compare it against requirements/py36-min-all-deps.yml to verify the
+policy on obsolete dependencies is being followed. Print a pretty report :)
+"""
+import subprocess
+import sys
+from concurrent.futures import ThreadPoolExecutor
+from datetime import datetime, timedelta
+from typing import Dict, Iterator, Tuple
+
+import yaml
+
+IGNORE_DEPS = {
+    "black",
+    "coveralls",
+    "flake8",
+    "hypothesis",
+    "mypy",
+    "pip",
+    "pytest",
+    "pytest-cov",
+    "pytest-env",
+}
+
+POLICY_MONTHS = {"python": 42, "numpy": 24, "pandas": 12, "scipy": 12}
+POLICY_MONTHS_DEFAULT = 6
+
+has_errors = False
+
+
+def error(msg: str) -> None:
+    global has_errors
+    has_errors = True
+    print("ERROR:", msg)
+
+
+def parse_requirements(fname) -> Iterator[Tuple[str, int, int]]:
+    """Load requirements/py36-min-all-deps.yml
+
+    Yield (package name, major version, minor version)
+    """
+    global has_errors
+
+    with open(fname) as fh:
+        contents = yaml.safe_load(fh)
+    for row in contents["dependencies"]:
+        if isinstance(row, dict) and list(row) == ["pip"]:
+            continue
+        pkg, eq, version = row.partition("=")
+        if pkg.rstrip("<>") in IGNORE_DEPS:
+            continue
+        if pkg.endswith("<") or pkg.endswith(">") or eq != "=":
+            error("package should be pinned with exact version: " + row)
+            continue
+        try:
+            major, minor = version.split(".")
+        except ValueError:
+            error("expected major.minor (without patch): " + row)
+            continue
+        try:
+            yield pkg, int(major), int(minor)
+        except ValueError:
+            error("failed to parse version: " + row)
+
+
+def query_conda(pkg: str) -> Dict[Tuple[int, int], datetime]:
+    """Query the conda repository for a specific package
+
+    Return map of {(major version, minor version): publication date}
+    """
+    stdout = subprocess.check_output(
+        ["conda", "search", pkg, "--info", "-c", "defaults", "-c", "conda-forge"]
+    )
+    out = {}  # type: Dict[Tuple[int, int], datetime]
+    major = None
+    minor = None
+
+    for row in stdout.decode("utf-8").splitlines():
+        label, _, value = row.partition(":")
+        label = label.strip()
+        if label == "file name":
+            value = value.strip()[len(pkg) :]
+            major, minor = value.split("-")[1].split(".")[:2]
+            major = int(major)
+            minor = int(minor)
+        if label == "timestamp":
+            assert major is not None
+            assert minor is not None
+            ts = datetime.strptime(value.split()[0].strip(), "%Y-%m-%d")
+
+            if (major, minor) in out:
+                out[major, minor] = min(out[major, minor], ts)
+            else:
+                out[major, minor] = ts
+
+    # Hardcoded fix to work around incorrect dates in conda
+    if pkg == "python":
+        out.update(
+            {
+                (2, 7): datetime(2010, 6, 3),
+                (3, 5): datetime(2015, 9, 13),
+                (3, 6): datetime(2016, 12, 23),
+                (3, 7): datetime(2018, 6, 27),
+                (3, 8): datetime(2019, 10, 14),
+            }
+        )
+
+    return out
+
+
+def process_pkg(
+    pkg: str, req_major: int, req_minor: int
+) -> Tuple[str, int, int, str, int, int, str, str]:
+    """Compare package version from requirements file to available versions in conda.
+    Return row to build pandas dataframe:
+
+    - package name
+    - major version in requirements file
+    - minor version in requirements file
+    - publication date of version in requirements file (YYYY-MM-DD)
+    - major version suggested by policy
+    - minor version suggested by policy
+    - publication date of version suggested by policy (YYYY-MM-DD)
+    - status ("<", "=", "> (!)")
+    """
+    print("Analyzing %s..." % pkg)
+    versions = query_conda(pkg)
+
+    try:
+        req_published = versions[req_major, req_minor]
+    except KeyError:
+        error("not found in conda: " + pkg)
+        return pkg, req_major, req_minor, "-", 0, 0, "-", "(!)"
+
+    policy_months = POLICY_MONTHS.get(pkg, POLICY_MONTHS_DEFAULT)
+    policy_published = datetime.now() - timedelta(days=policy_months * 30)
+
+    policy_major = req_major
+    policy_minor = req_minor
+    policy_published_actual = req_published
+    for (major, minor), published in reversed(sorted(versions.items())):
+        if published < policy_published:
+            break
+        policy_major = major
+        policy_minor = minor
+        policy_published_actual = published
+
+    if (req_major, req_minor) < (policy_major, policy_minor):
+        status = "<"
+    elif (req_major, req_minor) > (policy_major, policy_minor):
+        status = "> (!)"
+        error("Package is too new: " + pkg)
+    else:
+        status = "="
+
+    return (
+        pkg,
+        req_major,
+        req_minor,
+        req_published.strftime("%Y-%m-%d"),
+        policy_major,
+        policy_minor,
+        policy_published_actual.strftime("%Y-%m-%d"),
+        status,
+    )
+
+
+def main() -> None:
+    fname = sys.argv[1]
+    with ThreadPoolExecutor(8) as ex:
+        futures = [
+            ex.submit(process_pkg, pkg, major, minor)
+            for pkg, major, minor in parse_requirements(fname)
+        ]
+        rows = [f.result() for f in futures]
+
+    print("Package       Required          Policy            Status")
+    print("------------- ----------------- ----------------- ------")
+    fmt = "{:13} {:>1d}.{:<2d} ({:10}) {:>1d}.{:<2d} ({:10}) {}"
+    for row in rows:
+        print(fmt.format(*row))
+
+    assert not has_errors
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml
new file mode 100644
index 00000000000..e521ee4a4b8
--- /dev/null
+++ b/ci/requirements/doc.yml
@@ -0,0 +1,21 @@
+name: xarray-docs
+channels:
+  # Don't change to pkgs/main, as it causes random timeouts in readthedocs
+  - conda-forge
+dependencies:
+  - python=3.7
+  - bottleneck
+  - cartopy
+  - h5netcdf
+  - ipython
+  - iris
+  - netcdf4
+  - numpy
+  - numpydoc
+  - pandas<0.25  # Hack around https://github.com/pydata/xarray/issues/3369
+  - rasterio
+  - seaborn
+  - sphinx
+  - sphinx-gallery
+  - sphinx_rtd_theme
+  - zarr
diff --git a/ci/requirements/py35-bare-minimum.yml b/ci/requirements/py35-bare-minimum.yml
deleted file mode 100644
index 7651a1bdcf1..00000000000
--- a/ci/requirements/py35-bare-minimum.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-name: xarray-tests
-channels:
-  - conda-forge
-dependencies:
-  - python=3.5.3
-  - pytest
-  - flake8
-  - mock
-  - pip
-  - numpy=1.12
-  - pandas=0.19.2
-  - pip:
-    - pytest-env
-    - pytest-cov
-    - coveralls
diff --git a/ci/requirements/py36-bare-minimum.yml b/ci/requirements/py36-bare-minimum.yml
new file mode 100644
index 00000000000..05186bc8748
--- /dev/null
+++ b/ci/requirements/py36-bare-minimum.yml
@@ -0,0 +1,11 @@
+name: xarray-tests
+channels:
+  - conda-forge
+dependencies:
+  - python=3.6
+  - coveralls
+  - pytest
+  - pytest-cov
+  - pytest-env
+  - numpy=1.14
+  - pandas=0.24
diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml
index 1829f2a11e3..4e4f8550e16 100644
--- a/ci/requirements/py36-min-all-deps.yml
+++ b/ci/requirements/py36-min-all-deps.yml
@@ -2,42 +2,47 @@ name: xarray-tests
 channels:
   - conda-forge
 dependencies:
-  - python=3.6.7
+  # MINIMUM VERSIONS POLICY: see doc/installing.rst
+  # Run ci/min_deps_check.py to verify that this file respects the policy.
+  # When upgrading python, numpy, or pandas, must also change
+  # doc/installing.rst and setup.py.
+  - python=3.6
   - black
-  - boto3=1.9.235
-  - bottleneck=1.2.1
-  - cdms2=3.1.3
-  - cfgrib=0.9.7.2
-  - cftime=1.0.3.4
+  - boto3=1.9
+  - bottleneck=1.2
+  - cartopy=0.17
+  - cdms2=3.1
+  - cfgrib=0.9
+  - cftime=1.0
   - coveralls
-  - dask=2.4.0
-  - distributed=2.4.0
+  - dask=1.2
+  - distributed=1.27
   - flake8
-  - h5netcdf=0.7.4
-  - h5py=2.10.0
-  - hdf5=1.10.5
+  - h5netcdf=0.7
+  - h5py=2.9  # Policy allows for 2.10, but it's a conflict-fest
+  - hdf5=1.10
   - hypothesis
-  - iris=2.2.0
-  - lxml=4.4.1  # optional dep of pydap
-  - matplotlib=3.1.1
-  - mypy==0.720  # Must match .pre-commit-config.yaml
-  - nc-time-axis=1.2.0
-  - netcdf4=1.5.1.2
-  - numba=0.45.1
-  - numpy=1.17.2
-  - pandas=0.25.1
+  - iris=2.2
+  - lxml=4.4  # Optional dep of pydap
+  - matplotlib=3.1
+  - mypy=0.730  # Must match .pre-commit-config.yaml
+  - nc-time-axis=1.2
+  - netcdf4=1.4
+  - numba=0.44
+  - numpy=1.14
+  - pandas=0.24
   - pip
-  - pseudonetcdf=3.0.2
-  - pydap=3.2.2
-  - pynio=1.5.5
+  - pseudonetcdf=3.0
+  - pydap=3.2
+  - pynio=1.5
   - pytest
   - pytest-cov
   - pytest-env
-  - rasterio=1.0.28
-  - scipy=1.3.1
-  - seaborn=0.9.0
+  - rasterio=1.0
+  - scipy=1.0  # Policy allows for 1.2, but scipy>=1.1 breaks numpy=1.14
+  - seaborn=0.9
   # - sparse  # See py36-min-nep18.yml
-  - toolz=0.10.0
-  - zarr=2.3.2
+  - toolz=0.10
+  - zarr=2.3
   - pip:
     - numbagg==0.1
diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml
index 8680e412a99..5b291cf554c 100644
--- a/ci/requirements/py36-min-nep18.yml
+++ b/ci/requirements/py36-min-nep18.yml
@@ -4,14 +4,14 @@ channels:
 dependencies:
   # Optional dependencies that require NEP18, such as sparse,
   # require drastically newer packages than everything else
-  - python=3.6.7
+  - python=3.6
   - coveralls
-  - dask=2.4.0
-  - distributed=2.4.0
+  - dask=2.4
+  - distributed=2.4
   - numpy=1.17
-  - pandas=0.25
+  - pandas=0.24
   - pytest
   - pytest-cov
   - pytest-env
-  - scipy=1.3
-  - sparse=0.8.0
+  - scipy=1.2
+  - sparse=0.8
diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml
index 187a9c79fbf..cc91e8a12da 100644
--- a/ci/requirements/py36.yml
+++ b/ci/requirements/py36.yml
@@ -6,8 +6,9 @@ dependencies:
   - black
   - boto3
   - bottleneck
+  - cartopy
   - cdms2
-  - cfgrib>=0.9.2
+  - cfgrib
   - cftime
   - coveralls
   - dask
@@ -17,17 +18,17 @@ dependencies:
   - h5py
   - hdf5
   - hypothesis
-  - iris>=1.10
+  - iris
   - lxml    # optional dep of pydap
   - matplotlib
-  - mypy==0.720  # Must match .pre-commit-config.yaml
+  - mypy=0.730  # Must match .pre-commit-config.yaml
   - nc-time-axis
   - netcdf4
   - numba
-  - numpy>=1.12
-  - pandas>=0.19
+  - numpy
+  - pandas
   - pip
-  - pseudonetcdf>=3.0.1
+  - pseudonetcdf
   - pydap
   - pynio
   - pytest
diff --git a/ci/requirements/py37-windows.yml b/ci/requirements/py37-windows.yml
index 24297327393..bf485b59a49 100644
--- a/ci/requirements/py37-windows.yml
+++ b/ci/requirements/py37-windows.yml
@@ -6,6 +6,7 @@ dependencies:
   - black
   - boto3
   - bottleneck
+  - cartopy
   # - cdms2  # Not available on Windows
   # - cfgrib>=0.9.2  # Causes Python interpreter crash on Windows
   - cftime
@@ -17,17 +18,17 @@ dependencies:
   - h5py
   - hdf5
   - hypothesis
-  - iris>=1.10
-  - lxml    # optional dep of pydap
+  - iris
+  - lxml    # Optional dep of pydap
   - matplotlib
-  - mypy==0.720  # Must match .pre-commit-config.yaml
+  - mypy=0.730  # Must match .pre-commit-config.yaml
   - nc-time-axis
   - netcdf4
   - numba
-  - numpy>=1.12
-  - pandas>=0.19
+  - numpy
+  - pandas
   - pip
-  - pseudonetcdf>=3.0.1
+  - pseudonetcdf
   - pydap
   # - pynio  # Not available on Windows
   - pytest
diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml
index 5a328c64cf9..5c9a1cec5b5 100644
--- a/ci/requirements/py37.yml
+++ b/ci/requirements/py37.yml
@@ -6,8 +6,9 @@ dependencies:
   - black
   - boto3
   - bottleneck
+  - cartopy
   - cdms2
-  - cfgrib>=0.9.2
+  - cfgrib
   - cftime
   - coveralls
   - dask
@@ -17,17 +18,17 @@ dependencies:
   - h5py
   - hdf5
   - hypothesis
-  - iris>=1.10
-  - lxml    # optional dep of pydap
+  - iris
+  - lxml    # Optional dep of pydap
   - matplotlib
-  - mypy==0.720  # Must match .pre-commit-config.yaml
+  - mypy=0.730  # Must match .pre-commit-config.yaml
   - nc-time-axis
   - netcdf4
   - numba
-  - numpy>=1.12
-  - pandas>=0.19
+  - numpy
+  - pandas
   - pip
-  - pseudonetcdf>=3.0.1
+  - pseudonetcdf
   - pydap
   - pynio
   - pytest
diff --git a/doc/computation.rst b/doc/computation.rst
index 3d10774bcac..ae5f4bc5c66 100644
--- a/doc/computation.rst
+++ b/doc/computation.rst
@@ -179,7 +179,9 @@ a value when aggregating:
     r = arr.rolling(y=3, center=True, min_periods=2)
     r.mean()
 
-Note that rolling window aggregations are faster when bottleneck_ is installed.
+.. tip::
+
+   Note that rolling window aggregations are faster and use less memory when bottleneck_ is installed. This only applies to numpy-backed xarray objects.
 
 .. _bottleneck: https://github.com/kwgoodman/bottleneck/
 
diff --git a/doc/conf.py b/doc/conf.py
index a80e470af26..7c1557a1e66 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -201,7 +201,7 @@
 
 # Sometimes the savefig directory doesn't exist and needs to be created
 # https://github.com/ipython/ipython/issues/8733
-# becomes obsolete when we can pin ipython>=5.2; see doc/environment.yml
+# becomes obsolete when we can pin ipython>=5.2; see ci/requirements/doc.yml
 ipython_savefig_dir = os.path.join(
     os.path.dirname(os.path.abspath(__file__)), "_build", "html", "_static"
 )
diff --git a/doc/contributing.rst b/doc/contributing.rst
index 53b8cb51f60..66e8377600e 100644
--- a/doc/contributing.rst
+++ b/doc/contributing.rst
@@ -696,7 +696,7 @@ environment by::
 
 or, to use a specific Python interpreter,::
 
-    asv run -e -E existing:python3.5
+    asv run -e -E existing:python3.6
 
 This will display stderr from the benchmarks, and use your local
 ``python`` that comes from your ``$PATH``.
diff --git a/doc/dask.rst b/doc/dask.rst
index 19cbc11292c..5bdbf779463 100644
--- a/doc/dask.rst
+++ b/doc/dask.rst
@@ -5,13 +5,14 @@ Parallel computing with Dask
 
 xarray integrates with `Dask <http://dask.pydata.org/>`__ to support parallel
 computations and streaming computation on datasets that don't fit into memory.
-
 Currently, Dask is an entirely optional feature for xarray. However, the
 benefits of using Dask are sufficiently strong that Dask may become a required
 dependency in a future version of xarray.
 
 For a full example of how to use xarray's Dask integration, read the
-`blog post introducing xarray and Dask`_.
+`blog post introducing xarray and Dask`_. More up-to-date examples
+may be found at the `Pangeo project's use-cases <http://pangeo.io/use_cases/index.html>`_
+and at the `Dask examples website <https://examples.dask.org/xarray.html>`_.
 
 .. _blog post introducing xarray and Dask: http://stephanhoyer.com/2015/06/11/xray-dask-out-of-core-labeled-arrays/
 
@@ -37,13 +38,14 @@ which allows Dask to take full advantage of multiple processors available on
 most modern computers.
 
 For more details on Dask, read `its documentation <http://dask.pydata.org/>`__.
+Note that xarray only makes use of ``dask.array`` and ``dask.delayed``.
 
 .. _dask.io:
 
 Reading and writing data
 ------------------------
 
-The usual way to create a dataset filled with Dask arrays is to load the
+The usual way to create a ``Dataset`` filled with Dask arrays is to load the
 data from a netCDF file or files. You can do this by supplying a ``chunks``
 argument to :py:func:`~xarray.open_dataset` or using the
 :py:func:`~xarray.open_mfdataset` function.
@@ -71,8 +73,8 @@ argument to :py:func:`~xarray.open_dataset` or using the
 
 In this example ``latitude`` and ``longitude`` do not appear in the ``chunks``
 dict, so only one chunk will be used along those dimensions.  It is also
-entirely equivalent to opening a dataset using ``open_dataset`` and then
-chunking the data using the ``chunk`` method, e.g.,
+entirely equivalent to opening a dataset using :py:meth:`~xarray.open_dataset`
+and then chunking the data using the ``chunk`` method, e.g.,
 ``xr.open_dataset('example-data.nc').chunk({'time': 10})``.
 
 To open multiple files simultaneously in parallel using Dask delayed,
@@ -80,13 +82,14 @@ use :py:func:`~xarray.open_mfdataset`::
 
     xr.open_mfdataset('my/files/*.nc', parallel=True)
 
-This function will automatically concatenate and merge dataset into one in
+This function will automatically concatenate and merge datasets into one in
 the simple cases that it understands (see :py:func:`~xarray.auto_combine`
-for the full disclaimer). By default, :py:func:`~xarray.open_mfdataset` will chunk each
+for the full disclaimer). By default, :py:meth:`~xarray.open_mfdataset` will chunk each
 netCDF file into a single Dask array; again, supply the ``chunks`` argument to
 control the size of the resulting Dask arrays. In more complex cases, you can
-open each file individually using ``open_dataset`` and merge the result, as
-described in :ref:`combining data`.
+open each file individually using :py:meth:`~xarray.open_dataset` and merge the result, as
+described in :ref:`combining data`. Passing the keyword argument ``parallel=True`` to :py:meth:`~xarray.open_mfdataset` will speed up the reading of large multi-file datasets by
+executing those read tasks in parallel using ``dask.delayed``.
 
 You'll notice that printing a dataset still shows a preview of array values,
 even if they are actually Dask arrays. We can do this quickly with Dask because
@@ -106,7 +109,7 @@ usual way.
     ds.to_netcdf('manipulated-example-data.nc')
 
 By setting the ``compute`` argument to ``False``, :py:meth:`~xarray.Dataset.to_netcdf`
-will return a Dask delayed object that can be computed later.
+will return a ``dask.delayed`` object that can be computed later.
 
 .. ipython:: python
 
@@ -153,8 +156,14 @@ explicit conversion step. One notable exception is indexing operations: to
 enable label based indexing, xarray will automatically load coordinate labels
 into memory.
 
+.. tip::
+
+   By default, dask uses its multi-threaded scheduler, which distributes work across
+   multiple cores and allows for processing some datasets that do not fit into memory.
+   For running across a cluster, `setup the distributed scheduler <https://docs.dask.org/en/latest/setup.html>`_.
+
 The easiest way to convert an xarray data structure from lazy Dask arrays into
-eager, in-memory NumPy arrays is to use the :py:meth:`~xarray.Dataset.load` method:
+*eager*, in-memory NumPy arrays is to use the :py:meth:`~xarray.Dataset.load` method:
 
 .. ipython:: python
 
@@ -191,11 +200,20 @@ Dask arrays using the :py:meth:`~xarray.Dataset.persist` method:
 
    ds = ds.persist()
 
-This is particularly useful when using a distributed cluster because the data
-will be loaded into distributed memory across your machines and be much faster
-to use than reading repeatedly from disk.  Warning that on a single machine
-this operation will try to load all of your data into memory.  You should make
-sure that your dataset is not larger than available memory.
+:py:meth:`~xarray.Dataset.persist` is particularly useful when using a
+distributed cluster because the data will be loaded into distributed memory
+across your machines and be much faster to use than reading repeatedly from
+disk.
+
+.. warning::
+
+   On a single machine :py:meth:`~xarray.Dataset.persist` will try to load all of
+   your data into memory. You should make sure that your dataset is not larger than
+   available memory.
+
+.. note::
+   For more on the differences between :py:meth:`~xarray.Dataset.persist` and
+   :py:meth:`~xarray.Dataset.compute` see this `Stack Overflow answer <https://stackoverflow.com/questions/41806850/dask-difference-between-client-persist-and-client-compute>`_ and the `Dask documentation <https://distributed.readthedocs.io/en/latest/manage-computation.html#dask-collections-to-futures>`_.
 
 For performance you may wish to consider chunk sizes.  The correct choice of
 chunk size depends both on your data and on the operations you want to perform.
@@ -381,6 +399,11 @@ one million elements (e.g., a 1000x1000 matrix). With large arrays (10+ GB), the
 cost of queueing up Dask operations can be noticeable, and you may need even
 larger chunksizes.
 
+.. tip::
+
+   Check out the dask documentation on `chunks <https://docs.dask.org/en/latest/array-chunks.html>`_.
+
+
 Optimization Tips
 -----------------
 
@@ -390,4 +413,12 @@ With analysis pipelines involving both spatial subsetting and temporal resamplin
 
 2. Save intermediate results to disk as a netCDF files (using ``to_netcdf()``) and then load them again with ``open_dataset()`` for further computations. For example, if subtracting temporal mean from a dataset, save the temporal mean to disk before subtracting. Again, in theory, Dask should be able to do the computation in a streaming fashion, but in practice this is a fail case for the Dask scheduler, because it tries to keep every chunk of an array that it computes in memory. (See `Dask issue #874 <https://github.com/dask/dask/issues/874>`_)
 
-3. Specify smaller chunks across space when using ``open_mfdataset()`` (e.g., ``chunks={'latitude': 10, 'longitude': 10}``). This makes spatial subsetting easier, because there's no risk you will load chunks of data referring to different chunks (probably not necessary if you follow suggestion 1).
+3. Specify smaller chunks across space when using :py:meth:`~xarray.open_mfdataset` (e.g., ``chunks={'latitude': 10, 'longitude': 10}``). This makes spatial subsetting easier, because there's no risk you will load chunks of data referring to different chunks (probably not necessary if you follow suggestion 1).
+
+4. Using the h5netcdf package by passing ``engine='h5netcdf'`` to :py:meth:`~xarray.open_mfdataset`
+   can be quicker than the default ``engine='netcdf4'`` that uses the netCDF4 package.
+
+5. Some dask-specific tips may be found `here <https://docs.dask.org/en/latest/array-best-practices.html>`_.
+
+6. The dask `diagnostics <https://docs.dask.org/en/latest/understanding-performance.html>`_ can be
+   useful in identifying performance bottlenecks.
diff --git a/doc/environment.yml b/doc/environment.yml
deleted file mode 100644
index e1b5c5475f7..00000000000
--- a/doc/environment.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-name: xarray-docs
-channels:
-  - conda-forge
-dependencies:
-  - python=3.7
-  - numpy=1.16.0
-  - pandas=0.23.3
-  - scipy=1.2.0
-  - matplotlib=3.0.2
-  - seaborn=0.9.0
-  - dask=1.1.0
-  - ipython=7.2.0
-  - netCDF4=1.4.2
-  - h5netcdf=0.7.4
-  - cartopy=0.17.0
-  - rasterio=1.0.24
-  - zarr=2.2.0
-  - iris=2.2.0
-  - flake8=3.6.0
-  - cftime=1.0.3.4
-  - bottleneck=1.2.1
-  - sphinx=1.8.2
-  - numpydoc=0.8.0
-  - sphinx-gallery=0.2.0
-  - pillow=5.4.1
-  - sphinx_rtd_theme=0.4.2
-  - mock=2.0.0
-  - pip
diff --git a/doc/faq.rst b/doc/faq.rst
index 22a4f6cf095..28a1f7395c3 100644
--- a/doc/faq.rst
+++ b/doc/faq.rst
@@ -11,6 +11,38 @@ Frequently Asked Questions
     import xarray as xr
     np.random.seed(123456)
 
+
+Your documentation keeps mentioning pandas. What is pandas?
+-----------------------------------------------------------
+
+pandas_ is a very popular data analysis package in Python
+with wide usage in many fields. Our API is heavily inspired by pandas —
+this is why there are so many references to pandas.
+
+.. _pandas: https://pandas.pydata.org
+
+
+Do I need to know pandas to use xarray?
+---------------------------------------
+
+No! Our API is heavily inspired by pandas so while knowing pandas will let you
+become productive more quickly, knowledge of pandas is not necessary to use xarray.
+
+
+Should I use xarray instead of pandas?
+--------------------------------------
+
+It's not an either/or choice! xarray provides robust support for converting
+back and forth between the tabular data-structures of pandas and its own
+multi-dimensional data-structures.
+
+That said, you should only bother with xarray if some aspect of data is
+fundamentally multi-dimensional. If your data is unstructured or
+one-dimensional, pandas is usually the right choice: it has better performance
+for common operations such as ``groupby`` and you'll find far more usage
+examples online.
+
+
 Why is pandas not enough?
 -------------------------
 
@@ -56,20 +88,6 @@ of the "time" dimension. You never need to reshape arrays (e.g., with
 ``np.newaxis``) to align them for arithmetic operations in xarray.
 
 
-Should I use xarray instead of pandas?
---------------------------------------
-
-It's not an either/or choice! xarray provides robust support for converting
-back and forth between the tabular data-structures of pandas and its own
-multi-dimensional data-structures.
-
-That said, you should only bother with xarray if some aspect of data is
-fundamentally multi-dimensional. If your data is unstructured or
-one-dimensional, pandas is usually the right choice: it has better performance
-for common operations such as ``groupby`` and you'll find far more usage
-examples online.
-
-
 Why don't aggregations return Python scalars?
 ---------------------------------------------
 
diff --git a/doc/groupby.rst b/doc/groupby.rst
index 0f06cdabbf9..e1d88e289d2 100644
--- a/doc/groupby.rst
+++ b/doc/groupby.rst
@@ -77,7 +77,7 @@ a customized coordinate, but xarray facilitates this via the
     x_bins = [0,25,50]
     ds.groupby_bins('x', x_bins).groups
 
-The binning is implemented via `pandas.cut`__, whose documentation details how
+The binning is implemented via :func:`pandas.cut`, whose documentation details how
 the bins are assigned. As seen in the example above, by default, the bins are
 labeled with strings using set notation to precisely identify the bin limits. To
 override this behavior, you can specify the bin labels explicitly. Here we
@@ -88,8 +88,6 @@ choose `float` labels which identify the bin centers:
     x_bin_labels = [12.5,37.5]
     ds.groupby_bins('x', x_bins, labels=x_bin_labels).groups
 
-__ http://pandas.pydata.org/pandas-docs/version/0.17.1/generated/pandas.cut.html
-
 
 Apply
 ~~~~~
diff --git a/doc/index.rst b/doc/index.rst
index e5bd03801ff..972eb0a732e 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -11,7 +11,7 @@ intuitive, more concise, and less error-prone developer experience.
 The package includes a large and growing library of domain-agnostic functions
 for advanced analytics and visualization with these data structures.
 
-Xarray was inspired by and borrows heavily from pandas_, the popular data
+Xarray is inspired by and borrows heavily from pandas_, the popular data
 analysis package focused on labelled tabular data.
 It is particularly tailored to working with netCDF_ files, which were the
 source of xarray's data model, and integrates tightly with dask_ for parallel
diff --git a/doc/installing.rst b/doc/installing.rst
index a81f6c23328..b1bf072dbe1 100644
--- a/doc/installing.rst
+++ b/doc/installing.rst
@@ -6,9 +6,9 @@ Installation
 Required dependencies
 ---------------------
 
-- Python (3.5.3 or later)
-- `numpy <http://www.numpy.org/>`__ (1.12 or later)
-- `pandas <http://pandas.pydata.org/>`__ (0.19.2 or later)
+- Python (3.6 or later)
+- `numpy <http://www.numpy.org/>`__ (1.14 or later)
+- `pandas <http://pandas.pydata.org/>`__ (0.24 or later)
 
 Optional dependencies
 ---------------------
@@ -32,7 +32,7 @@ For netCDF and IO
   for accessing CAMx, GEOS-Chem (bpch), NOAA ARL files, ICARTT files
   (ffi1001) and many other.
 - `rasterio <https://github.com/mapbox/rasterio>`__: for reading GeoTiffs and
-  other gridded raster datasets. (version 1.0 or later)
+  other gridded raster datasets.
 - `iris <https://github.com/scitools/iris>`__: for conversion to and from iris'
   Cube objects
 - `cfgrib <https://github.com/ecmwf/cfgrib>`__: for reading GRIB files via the
@@ -41,30 +41,67 @@ For netCDF and IO
 For accelerating xarray
 ~~~~~~~~~~~~~~~~~~~~~~~
 
-- `scipy <http://scipy.org/>`__: necessary to enable the interpolation features for xarray objects
+- `scipy <http://scipy.org/>`__: necessary to enable the interpolation features for
+  xarray objects
 - `bottleneck <https://github.com/kwgoodman/bottleneck>`__: speeds up
   NaN-skipping and rolling window aggregations by a large factor
-  (1.1 or later)
 - `numbagg <https://github.com/shoyer/numbagg>`_: for exponential rolling
   window operations
 
 For parallel computing
 ~~~~~~~~~~~~~~~~~~~~~~
 
-- `dask.array <http://dask.pydata.org>`__ (0.16 or later): required for
-  :ref:`dask`.
+- `dask.array <http://dask.pydata.org>`__: required for :ref:`dask`.
 
 For plotting
 ~~~~~~~~~~~~
 
 - `matplotlib <http://matplotlib.org/>`__: required for :ref:`plotting`
-  (1.5 or later)
-- `cartopy <http://scitools.org.uk/cartopy/>`__: recommended for
-  :ref:`plot-maps`
+- `cartopy <http://scitools.org.uk/cartopy/>`__: recommended for :ref:`plot-maps`
 - `seaborn <https://stanford.edu/~mwaskom/software/seaborn/>`__: for better
   color palettes
 - `nc-time-axis <https://github.com/SciTools/nc-time-axis>`__: for plotting
-  cftime.datetime objects (1.2.0 or later)
+  cftime.datetime objects
+
+Alternative data containers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+- `sparse <https://sparse.pydata.org/>`_: for sparse arrays
+- Any numpy-like objects that support
+  `NEP-18 <https://numpy.org/neps/nep-0018-array-function-protocol.html>`_.
+  Note that while such libraries theoretically should work, they are untested.
+  Integration tests are in the process of being written for individual libraries.
+
+
+.. _mindeps_policy:
+
+Minimum dependency versions
+---------------------------
+xarray adopts a rolling policy regarding the minimum supported version of its
+dependencies:
+
+- **Python:** 42 months
+  (`NEP-29 <https://numpy.org/neps/nep-0029-deprecation_policy.html>`_)
+- **numpy:** 24 months
+  (`NEP-29 <https://numpy.org/neps/nep-0029-deprecation_policy.html>`_)
+- **pandas:** 12 months
+- **scipy:** 12 months
+- **sparse** and other libraries that rely on
+  `NEP-18 <https://numpy.org/neps/nep-0018-array-function-protocol.html>`_
+  for integration: very latest available versions only, until the technology will have
+  matured. This extends to dask when used in conjunction with any of these libraries.
+  numpy >=1.17.
+- **all other libraries:** 6 months
+
+The above should be interpreted as *the minor version (X.Y) initially published no more
+than N months ago*. Patch versions (x.y.Z) are not pinned, and only the latest available
+at the moment of publishing the xarray release is guaranteed to work.
+
+You can see the actual minimum tested versions:
+
+- `For NEP-18 libraries
+  <https://github.com/pydata/xarray/blob/master/ci/requirements/py36-min-nep18.yml>`_
+- `For everything else
+  <https://github.com/pydata/xarray/blob/master/ci/requirements/py36-min-all-deps.yml>`_
 
 
 Instructions
@@ -93,13 +130,9 @@ pandas) installed first. Then, install xarray with pip::
 Testing
 -------
 
-To run the test suite after installing xarray, first install (via pypi or conda)
-
-- `py.test <https://pytest.org>`__: Simple unit testing library
-- `mock <https://pypi.python.org/pypi/mock>`__: additional testing library required for python version 2
-
-and run
-``py.test --pyargs xarray``.
+To run the test suite after installing xarray, install (via pypi or conda) `py.test
+<https://pytest.org>`__ and run ``pytest`` in the root directory of the xarray
+repository.
 
 
 Performance Monitoring
@@ -110,7 +143,8 @@ A fixed-point performance monitoring of (a part of) our codes can be seen on
 
 To run these benchmark tests in a local machine, first install
 
-- `airspeed-velocity <https://asv.readthedocs.io/en/latest/>`__: a tool for benchmarking Python packages over their lifetime.
+- `airspeed-velocity <https://asv.readthedocs.io/en/latest/>`__: a tool for benchmarking
+  Python packages over their lifetime.
 
 and run
 ``asv run  # this will install some conda environments in ./.asv/envs``
diff --git a/doc/io.rst b/doc/io.rst
index 0943b598a7f..dab2a195e90 100644
--- a/doc/io.rst
+++ b/doc/io.rst
@@ -15,82 +15,6 @@ format (recommended).
     import xarray as xr
     np.random.seed(123456)
 
-.. _io.pickle:
-
-Pickle
-------
-
-The simplest way to serialize an xarray object is to use Python's built-in pickle
-module:
-
-.. ipython:: python
-
-    import pickle
-
-    ds = xr.Dataset({'foo': (('x', 'y'), np.random.rand(4, 5))},
-                    coords={'x': [10, 20, 30, 40],
-                            'y': pd.date_range('2000-01-01', periods=5),
-                            'z': ('x', list('abcd'))})
-
-    # use the highest protocol (-1) because it is way faster than the default
-    # text based pickle format
-    pkl = pickle.dumps(ds, protocol=-1)
-
-    pickle.loads(pkl)
-
-Pickling is important because it doesn't require any external libraries
-and lets you use xarray objects with Python modules like
-:py:mod:`multiprocessing` or :ref:`Dask <dask>`. However, pickling is
-**not recommended for long-term storage**.
-
-Restoring a pickle requires that the internal structure of the types for the
-pickled data remain unchanged. Because the internal design of xarray is still
-being refined, we make no guarantees (at this point) that objects pickled with
-this version of xarray will work in future versions.
-
-.. note::
-
-  When pickling an object opened from a NetCDF file, the pickle file will
-  contain a reference to the file on disk. If you want to store the actual
-  array values, load it into memory first with :py:meth:`~xarray.Dataset.load`
-  or :py:meth:`~xarray.Dataset.compute`.
-
-.. _dictionary io:
-
-Dictionary
-----------
-
-We can convert a ``Dataset`` (or a ``DataArray``) to a dict using
-:py:meth:`~xarray.Dataset.to_dict`:
-
-.. ipython:: python
-
-    d = ds.to_dict()
-    d
-
-We can create a new xarray object from a dict using
-:py:meth:`~xarray.Dataset.from_dict`:
-
-.. ipython:: python
-
-    ds_dict = xr.Dataset.from_dict(d)
-    ds_dict
-
-Dictionary support allows for flexible use of xarray objects. It doesn't
-require external libraries and dicts can easily be pickled, or converted to
-json, or geojson. All the values are converted to lists, so dicts might
-be quite large.
-
-To export just the dataset schema, without the data itself, use the
-``data=False`` option:
-
-.. ipython:: python
-
-    ds.to_dict(data=False)
-
-This can be useful for generating indices of dataset contents to expose to
-search indices or other automated data discovery tools.
-
 .. _io.netcdf:
 
 netCDF
@@ -127,12 +51,25 @@ We can save a Dataset to disk using the
 
 .. ipython:: python
 
+    ds = xr.Dataset({'foo': (('x', 'y'), np.random.rand(4, 5))},
+                    coords={'x': [10, 20, 30, 40],
+                            'y': pd.date_range('2000-01-01', periods=5),
+                            'z': ('x', list('abcd'))})
+
     ds.to_netcdf('saved_on_disk.nc')
 
 By default, the file is saved as netCDF4 (assuming netCDF4-Python is
 installed). You can control the format and engine used to write the file with
 the ``format`` and ``engine`` arguments.
 
+.. tip::
+
+   Using the `h5netcdf <https://github.com/shoyer/h5netcdf>`_  package
+   by passing ``engine='h5netcdf'`` to :py:meth:`~xarray.open_dataset` can
+   sometimes be quicker than the default ``engine='netcdf4'`` that uses the
+   `netCDF4 <https://github.com/Unidata/netcdf4-python>`_ package.
+
+
 We can load netCDF files to create a new Dataset using
 :py:func:`~xarray.open_dataset`:
 
@@ -149,7 +86,15 @@ convert the ``DataArray`` to a ``Dataset`` before saving, and then convert back
 when loading, ensuring that the ``DataArray`` that is loaded is always exactly
 the same as the one that was saved.
 
-Data is always loaded lazily from netCDF files. You can manipulate, slice and subset
+A dataset can also be loaded or written to a specific group within a netCDF
+file. To load from a group, pass a ``group`` keyword argument to the
+``open_dataset`` function. The group can be specified as a path-like
+string, e.g., to access subgroup 'bar' within group 'foo' pass
+'/foo/bar' as the ``group`` argument. When writing multiple groups in one file,
+pass ``mode='a'`` to ``to_netcdf`` to ensure that each call does not delete the
+file.
+
+Data is *always* loaded lazily from netCDF files. You can manipulate, slice and subset
 Dataset and DataArray objects, and no array values are loaded into memory until
 you try to perform some sort of actual computation. For an example of how these
 lazy arrays work, see the OPeNDAP section below.
@@ -251,8 +196,6 @@ will remove encoding information.
     :suppress:
 
     ds_disk.close()
-    import os
-    os.remove('saved_on_disk.nc')
 
 
 .. _combining multiple files:
@@ -508,6 +451,7 @@ This feature is availabe through :py:func:`DataArray.to_netcdf` and
 and currently raises a warning unless ``invalid_netcdf=True`` is set:
 
 .. ipython:: python
+    :okwarning:
 
     # Writing complex valued data
     da = xr.DataArray([1.+1.j, 2.+2.j, 3.+3.j])
@@ -681,6 +625,83 @@ that require NASA's URS authentication::
 __ http://docs.python-requests.org
 __ http://pydap.readthedocs.io/en/latest/client.html#authentication
 
+.. _io.pickle:
+
+Pickle
+------
+
+The simplest way to serialize an xarray object is to use Python's built-in pickle
+module:
+
+.. ipython:: python
+
+    import pickle
+
+    # use the highest protocol (-1) because it is way faster than the default
+    # text based pickle format
+    pkl = pickle.dumps(ds, protocol=-1)
+
+    pickle.loads(pkl)
+
+Pickling is important because it doesn't require any external libraries
+and lets you use xarray objects with Python modules like
+:py:mod:`multiprocessing` or :ref:`Dask <dask>`. However, pickling is
+**not recommended for long-term storage**.
+
+Restoring a pickle requires that the internal structure of the types for the
+pickled data remain unchanged. Because the internal design of xarray is still
+being refined, we make no guarantees (at this point) that objects pickled with
+this version of xarray will work in future versions.
+
+.. note::
+
+  When pickling an object opened from a NetCDF file, the pickle file will
+  contain a reference to the file on disk. If you want to store the actual
+  array values, load it into memory first with :py:meth:`~xarray.Dataset.load`
+  or :py:meth:`~xarray.Dataset.compute`.
+
+.. _dictionary io:
+
+Dictionary
+----------
+
+We can convert a ``Dataset`` (or a ``DataArray``) to a dict using
+:py:meth:`~xarray.Dataset.to_dict`:
+
+.. ipython:: python
+
+    d = ds.to_dict()
+    d
+
+We can create a new xarray object from a dict using
+:py:meth:`~xarray.Dataset.from_dict`:
+
+.. ipython:: python
+
+    ds_dict = xr.Dataset.from_dict(d)
+    ds_dict
+
+Dictionary support allows for flexible use of xarray objects. It doesn't
+require external libraries and dicts can easily be pickled, or converted to
+json, or geojson. All the values are converted to lists, so dicts might
+be quite large.
+
+To export just the dataset schema, without the data itself, use the
+``data=False`` option:
+
+.. ipython:: python
+
+    ds.to_dict(data=False)
+
+This can be useful for generating indices of dataset contents to expose to
+search indices or other automated data discovery tools.
+
+.. ipython:: python
+    :suppress:
+
+    import os
+    os.remove('saved_on_disk.nc')
+
 .. _io.rasterio:
 
 Rasterio
diff --git a/doc/pandas.rst b/doc/pandas.rst
index 4fa73eec18c..4f3088b4c34 100644
--- a/doc/pandas.rst
+++ b/doc/pandas.rst
@@ -65,8 +65,7 @@ For datasets containing dask arrays where the data should be lazily loaded, see
 
 To create a ``Dataset`` from a ``DataFrame``, use the
 :py:meth:`~xarray.Dataset.from_dataframe` class method or the equivalent
-:py:meth:`pandas.DataFrame.to_xarray <DataFrame.to_xarray>` method (pandas
-v0.18 or later):
+:py:meth:`pandas.DataFrame.to_xarray <DataFrame.to_xarray>` method:
 
 .. ipython:: python
 
diff --git a/doc/plotting.rst b/doc/plotting.rst
index 3e61e85f78c..e9d30fb63c8 100644
--- a/doc/plotting.rst
+++ b/doc/plotting.rst
@@ -249,6 +249,7 @@ As an alternative, also a step plot similar to matplotlib's ``plt.step`` can be
 made using 1D data.
 
 .. ipython:: python
+    :okwarning:
 
     @savefig plotting_example_step.png width=4in
     air1d[:20].plot.step(where='mid')
diff --git a/doc/quick-overview.rst b/doc/quick-overview.rst
index 1224f59515b..7d84199323d 100644
--- a/doc/quick-overview.rst
+++ b/doc/quick-overview.rst
@@ -48,21 +48,21 @@ Here are the key properties for a ``DataArray``:
 Indexing
 --------
 
-xarray supports four kind of indexing. Since we have assigned coordinate labels to the x dimension we can use label-based indexing along that dimension just like pandas. The four examples below all yield the same result but at varying levels of convenience and intuitiveness.
+xarray supports four kind of indexing. Since we have assigned coordinate labels to the x dimension we can use label-based indexing along that dimension just like pandas. The four examples below all yield the same result (the value at `x=10`) but at varying levels of convenience and intuitiveness.
 
 .. ipython:: python
 
     # positional and by integer label, like numpy
-    data[[0, 1]]
+    data[0, :]
 
-    # positional and by coordinate label, like pandas
-    data.loc[10:20]
+    # loc or "location": positional and coordinate label, like pandas
+    data.loc[10]
 
-    # by dimension name and integer label
-    data.isel(x=slice(2))
+    # isel or "integer select":  by dimension name and integer label
+    data.isel(x=0)
 
-    # by dimension name and coordinate label
-    data.sel(x=[10, 20])
+    # sel or "select": by dimension name and coordinate label
+    data.sel(x=10)
 
 
 Unlike positional indexing, label-based indexing frees us from having to know how our array is organized. All we need to know are the dimension name and the label we wish to index i.e. ``data.sel(x=10)`` works regardless of whether ``x`` is the first or second dimension of the array and regardless of whether ``10`` is the first or second element of ``x``. We have already told xarray that x is the first dimension when we created ``data``: xarray keeps track of this so we don't have to. For more, see :ref:`indexing`.
diff --git a/doc/related-projects.rst b/doc/related-projects.rst
index 647db5fd8e4..fd77ce56a0a 100644
--- a/doc/related-projects.rst
+++ b/doc/related-projects.rst
@@ -12,6 +12,7 @@ Geosciences
 
 - `aospy <https://aospy.readthedocs.io>`_: Automated analysis and management of gridded climate data.
 - `climpred <https://climpred.readthedocs.io>`_: Analysis of ensemble forecast models for climate prediction.
+- `geocube <https://corteva.github.io/geocube>`_: Tool to convert geopandas vector data into rasterized xarray data.
 - `infinite-diff <https://github.com/spencerahill/infinite-diff>`_: xarray-based finite-differencing, focused on gridded climate/meterology data
 - `marc_analysis <https://github.com/darothen/marc_analysis>`_: Analysis package for CESM/MARC experiments and output.
 - `MetPy <https://unidata.github.io/MetPy/dev/index.html>`_: A collection of tools in Python for reading, visualizing, and performing calculations with weather data.
@@ -25,6 +26,7 @@ Geosciences
   accessing data stored in GAMS Data eXchange (GDX) files. Also uses a custom
   subclass.
 - `Regionmask <https://regionmask.readthedocs.io/>`_: plotting and creation of masks of spatial regions
+- `rioxarray <https://corteva.github.io/rioxarray>`_: geospatial xarray extension powered by rasterio
 - `salem <https://salem.readthedocs.io>`_: Adds geolocalised subsetting, masking, and plotting operations to xarray's data structures via accessors.
 - `SatPy <https://satpy.readthedocs.io/>`_ : Library for reading and manipulating meteorological remote sensing data and writing it to various image and data file formats.
 - `Spyfit <https://spyfit.readthedocs.io/en/master/>`_: FTIR spectroscopy of the atmosphere
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 24a6f65e4da..2852206f3be 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -13,11 +13,36 @@ What's New
     import xarray as xr
     np.random.seed(123456)
 
-.. _whats-new.0.13.1:
+.. _whats-new.0.14.0:
 
-v0.13.1 (unreleased)
+v0.14.0 (unreleased)
 --------------------
 
+Breaking changes
+~~~~~~~~~~~~~~~~
+- This release introduces a rolling policy for minimum dependency versions:
+  :ref:`mindeps_policy`.
+
+  Several minimum versions have been increased:
+
+  ============ ================== ====
+  Package      Old                New
+  ============ ================== ====
+  Python       3.5.3              3.6
+  numpy        1.12               1.14
+  pandas       0.19.2             0.24
+  dask         0.16 (tested: 2.4) 1.2
+  bottleneck   1.1 (tested: 1.2)  1.2
+  matplotlib   1.5 (tested: 3.1)  3.1
+  ============ ================== ====
+
+  Obsolete patch versions (x.y.Z) are not tested anymore.
+  The oldest supported versions of all optional dependencies are now covered by
+  automated tests (before, only the very latest versions were tested).
+
+  (:issue:`3222`, :issue:`3293`, :issue:`3340`, :issue:`3346`, :issue:`3358`).
+  By `Guido Imperiale <https://github.com/crusaderky>`_.
+
 New functions/methods
 ~~~~~~~~~~~~~~~~~~~~~
 
@@ -39,10 +64,10 @@ Bug fixes
 ~~~~~~~~~
 - Reintroduce support for :mod:`weakref` (broken in v0.13.0). Support has been
   reinstated for :class:`DataArray` and :class:`Dataset` objects only. Internal xarray
-  objects remain unaddressable by weakref in order to save memory.
-  (:issue:`3317`) by `Guido Imperiale <https://github.com/crusaderky>`_.
+  objects remain unaddressable by weakref in order to save memory
+  (:issue:`3317`). By `Guido Imperiale <https://github.com/crusaderky>`_.
 - Line plots with the ``x`` or ``y`` argument set to a 1D non-dimensional coord
-  now plot the correct data for 2D DataArrays.
+  now plot the correct data for 2D DataArrays
   (:issue:`3334`). By `Tom Nicholas <http://github.com/TomNicholas>`_.
 - The default behaviour of reducing across all dimensions for
   :py:class:`~xarray.core.groupby.DataArrayGroupBy` objects has now been properly removed
@@ -51,6 +76,11 @@ Bug fixes
   Also raise nicer error message when no groups are created (:issue:`1764`).
   By `Deepak Cherian <https://github.com/dcherian>`_.
 
+- Fix deprecation of default reduction dimension for :py:class:`~xarray.core.groupby.DataArrayGroupBy` objects.
+  (:issue:`3337`). Also raise nicer error message when no groups are created (:issue:`1764`). By `Deepak Cherian <https://github.com/dcherian>`_.
+- Fix error in concatenating unlabeled dimensions (:pull:`3362`).
+  By `Deepak Cherian <https://github.com/dcherian/>`_.
+  
 Documentation
 ~~~~~~~~~~~~~
 
@@ -68,6 +98,7 @@ Documentation
   (:pull:`3353`).
   By `Gregory Gundersen <https://github.com/gwgundersen/>`_.
 
+
 .. _whats-new.0.13.0:
 
 v0.13.0 (17 Sep 2019)
diff --git a/doc/why-xarray.rst b/doc/why-xarray.rst
index 25d558d99d5..be8284d88c2 100644
--- a/doc/why-xarray.rst
+++ b/doc/why-xarray.rst
@@ -1,6 +1,10 @@
 Overview: Why xarray?
 =====================
 
+Xarray introduces labels in the form of dimensions, coordinates and attributes on top of
+raw NumPy-like multidimensional arrays, which allows for a more intuitive, more concise,
+and less error-prone developer experience.
+
 What labels enable
 ------------------
 
@@ -18,13 +22,14 @@ Xarray doesn't just keep track of labels on arrays -- it uses them to provide a
 powerful and concise interface. For example:
 
 -  Apply operations over dimensions by name: ``x.sum('time')``.
--  Select values by label instead of integer location:
+-  Select values by label (or logical location) instead of integer location:
    ``x.loc['2014-01-01']`` or ``x.sel(time='2014-01-01')``.
 -  Mathematical operations (e.g., ``x - y``) vectorize across multiple
    dimensions (array broadcasting) based on dimension names, not shape.
--  Flexible split-apply-combine operations with groupby:
+-  Easily use the `split-apply-combine <https://vita.had.co.nz/papers/plyr.pdf>`_
+   paradigm with ``groupby``:
    ``x.groupby('time.dayofyear').mean()``.
--  Database like alignment based on coordinate labels that smoothly
+-  Database-like alignment based on coordinate labels that smoothly
    handles missing values: ``x, y = xr.align(x, y, join='outer')``.
 -  Keep track of arbitrary metadata in the form of a Python dictionary:
    ``x.attrs``.
@@ -33,8 +38,8 @@ The N-dimensional nature of xarray's data structures makes it suitable for deali
 with multi-dimensional scientific data, and its use of dimension names
 instead of axis labels (``dim='time'`` instead of ``axis=0``) makes such
 arrays much more manageable than the raw numpy ndarray: with xarray, you don't
-need to keep track of the order of arrays dimensions or insert dummy dimensions
-(e.g., ``np.newaxis``) to align arrays.
+need to keep track of the order of an array's dimensions or insert dummy dimensions of
+size 1 to align arrays (e.g., using ``np.newaxis``).
 
 The immediate payoff of using xarray is that you'll write less code. The
 long-term payoff is that you'll understand what you were thinking when you come
@@ -44,7 +49,7 @@ Core data structures
 --------------------
 
 xarray has two core data structures, which build upon and extend the core
-strengths of  NumPy_ and pandas_. Both are fundamentally N-dimensional:
+strengths of  NumPy_ and pandas_. Both data structures are fundamentally N-dimensional:
 
 - :py:class:`~xarray.DataArray` is our implementation of a labeled, N-dimensional
   array. It is an N-D generalization of a :py:class:`pandas.Series`. The name
diff --git a/properties/test_encode_decode.py b/properties/test_encode_decode.py
index c7839608981..011e7a922d1 100644
--- a/properties/test_encode_decode.py
+++ b/properties/test_encode_decode.py
@@ -4,7 +4,7 @@
 These ones pass, just as you'd hope!
 
 """
-import pytest
+import pytest  # isort:skip
 
 pytest.importorskip("hypothesis")
 
diff --git a/readthedocs.yml b/readthedocs.yml
index 8e9c09c9414..6429780e7d7 100644
--- a/readthedocs.yml
+++ b/readthedocs.yml
@@ -1,8 +1,8 @@
 build:
     image: latest
 conda:
-    file: doc/environment.yml
+    file: ci/requirements/doc.yml
 python:
-    version: 3.6
+    version: 3.7
     setup_py_install: true
 formats: []
diff --git a/setup.py b/setup.py
index 5cfa4d9f9df..08d4f54764f 100644
--- a/setup.py
+++ b/setup.py
@@ -16,14 +16,13 @@
     "Intended Audience :: Science/Research",
     "Programming Language :: Python",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.5",
     "Programming Language :: Python :: 3.6",
     "Programming Language :: Python :: 3.7",
     "Topic :: Scientific/Engineering",
 ]
 
-PYTHON_REQUIRES = ">=3.5.3"
-INSTALL_REQUIRES = ["numpy >= 1.12", "pandas >= 0.19.2"]
+PYTHON_REQUIRES = ">=3.6"
+INSTALL_REQUIRES = ["numpy >= 1.14", "pandas >= 0.24"]
 needs_pytest = {"pytest", "test", "ptr"}.intersection(sys.argv)
 SETUP_REQUIRES = ["pytest-runner >= 4.2"] if needs_pytest else []
 TESTS_REQUIRE = ["pytest >= 2.7.1"]
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 458a2d0cc42..8f6881b804a 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -912,7 +912,7 @@ def open_mfdataset(
             # Remove this after deprecation cycle from #2616 is complete
             basic_msg = dedent(
                 """\
-            In xarray version 0.14 the default behaviour of `open_mfdataset`
+            In xarray version 0.15 the default behaviour of `open_mfdataset`
             will change. To retain the existing behavior, pass
             combine='nested'. To use future default behavior, pass
             combine='by_coords'. See
@@ -963,11 +963,11 @@ def open_mfdataset(
     return combined
 
 
-WRITEABLE_STORES = {
+WRITEABLE_STORES: Dict[str, Callable] = {
     "netcdf4": backends.NetCDF4DataStore.open,
     "scipy": backends.ScipyDataStore,
     "h5netcdf": backends.H5NetCDFStore,
-}  # type: Dict[str, Callable]
+}
 
 
 def to_netcdf(
diff --git a/xarray/backends/common.py b/xarray/backends/common.py
index 455b77907f9..72c7c5a517f 100644
--- a/xarray/backends/common.py
+++ b/xarray/backends/common.py
@@ -88,13 +88,13 @@ def __len__(self):
         return len(self.variables)
 
     def get_dimensions(self):  # pragma: no cover
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def get_attrs(self):  # pragma: no cover
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def get_variables(self):  # pragma: no cover
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def get_encoding(self):
         return {}
@@ -247,13 +247,13 @@ def encode_attribute(self, a):
         return a
 
     def set_dimension(self, d, l):  # pragma: no cover
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def set_attribute(self, k, v):  # pragma: no cover
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def set_variable(self, k, v):  # pragma: no cover
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def store_dataset(self, dataset):
         """
diff --git a/xarray/backends/file_manager.py b/xarray/backends/file_manager.py
index dfd38ff9f48..0ff574b5d81 100644
--- a/xarray/backends/file_manager.py
+++ b/xarray/backends/file_manager.py
@@ -13,7 +13,7 @@
 assert FILE_CACHE.maxsize, "file cache must be at least size one"
 
 
-REF_COUNTS = {}  # type: Dict[Any, int]
+REF_COUNTS: Dict[Any, int] = {}
 
 _DEFAULT_MODE = utils.ReprObject("<unused>")
 
@@ -28,7 +28,7 @@ class FileManager:
 
     def acquire(self, needs_lock=True):
         """Acquire the file object from this manager."""
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def acquire_context(self, needs_lock=True):
         """Context manager for acquiring a file. Yields a file object.
@@ -37,11 +37,11 @@ def acquire_context(self, needs_lock=True):
         (i.e., removes it from any cache) if an exception is raised from the
         context. It *does not* automatically close the file.
         """
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def close(self, needs_lock=True):
         """Close the file object associated with this manager, if needed."""
-        raise NotImplementedError
+        raise NotImplementedError()
 
 
 class CachingFileManager(FileManager):
diff --git a/xarray/backends/locks.py b/xarray/backends/locks.py
index 865ce1ddccd..d0bf790f074 100644
--- a/xarray/backends/locks.py
+++ b/xarray/backends/locks.py
@@ -21,7 +21,7 @@
 NETCDFC_LOCK = SerializableLock()
 
 
-_FILE_LOCKS = weakref.WeakValueDictionary()  # type: MutableMapping[Any, threading.Lock]
+_FILE_LOCKS: MutableMapping[Any, threading.Lock] = weakref.WeakValueDictionary()
 
 
 def _get_threaded_lock(key):
@@ -72,17 +72,11 @@ def _get_scheduler(get=None, collection=None):
     dask.base.get_scheduler
     """
     try:
-        # dask 0.18.1 and later
-        from dask.base import get_scheduler
-
-        actual_get = get_scheduler(get, collection)
+        import dask  # noqa: F401
     except ImportError:
-        try:
-            from dask.utils import effective_get
+        return None
 
-            actual_get = effective_get(get, collection)
-        except ImportError:
-            return None
+    actual_get = dask.base.get_scheduler(get, collection)
 
     try:
         from dask.distributed import Client
@@ -90,15 +84,12 @@ def _get_scheduler(get=None, collection=None):
         if isinstance(actual_get.__self__, Client):
             return "distributed"
     except (ImportError, AttributeError):
-        try:
-            import dask.multiprocessing
-
-            if actual_get == dask.multiprocessing.get:
-                return "multiprocessing"
-            else:
-                return "threaded"
-        except ImportError:
-            return "threaded"
+        pass
+
+    if actual_get is dask.multiprocessing.get:
+        return "multiprocessing"
+    else:
+        return "threaded"
 
 
 def get_write_lock(key):
diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py
index 813942c2f32..203a2157e70 100644
--- a/xarray/backends/netCDF4_.py
+++ b/xarray/backends/netCDF4_.py
@@ -1,9 +1,7 @@
 import functools
 import operator
-import warnings
 from collections import OrderedDict
 from contextlib import suppress
-from distutils.version import LooseVersion
 
 import numpy as np
 
@@ -354,16 +352,6 @@ def open(
     ):
         import netCDF4
 
-        if len(filename) == 88 and LooseVersion(netCDF4.__version__) < "1.3.1":
-            warnings.warn(
-                "A segmentation fault may occur when the "
-                "file path has exactly 88 characters as it does "
-                "in this case. The issue is known to occur with "
-                "version 1.2.4 of netCDF4 and can be addressed by "
-                "upgrading netCDF4 to at least version 1.3.1. "
-                "More details can be found here: "
-                "https://github.com/pydata/xarray/issues/1745"
-            )
         if format is None:
             format = "NETCDF4"
 
diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py
index 316f13470b7..deff2eaed66 100644
--- a/xarray/backends/rasterio_.py
+++ b/xarray/backends/rasterio_.py
@@ -1,7 +1,6 @@
 import os
 import warnings
 from collections import OrderedDict
-from distutils.version import LooseVersion
 
 import numpy as np
 
@@ -253,18 +252,14 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc
     coords["band"] = np.asarray(riods.indexes)
 
     # Get coordinates
-    if LooseVersion(rasterio.__version__) < "1.0":
-        transform = riods.affine
-    else:
-        transform = riods.transform
-    if transform.is_rectilinear:
+    if riods.transform.is_rectilinear:
         # 1d coordinates
         parse = True if parse_coordinates is None else parse_coordinates
         if parse:
             nx, ny = riods.width, riods.height
             # xarray coordinates are pixel centered
-            x, _ = (np.arange(nx) + 0.5, np.zeros(nx) + 0.5) * transform
-            _, y = (np.zeros(ny) + 0.5, np.arange(ny) + 0.5) * transform
+            x, _ = (np.arange(nx) + 0.5, np.zeros(nx) + 0.5) * riods.transform
+            _, y = (np.zeros(ny) + 0.5, np.arange(ny) + 0.5) * riods.transform
             coords["y"] = y
             coords["x"] = x
     else:
@@ -287,7 +282,7 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc
     # For serialization store as tuple of 6 floats, the last row being
     # always (0, 0, 1) per definition (see
     # https://github.com/sgillies/affine)
-    attrs["transform"] = tuple(transform)[:6]
+    attrs["transform"] = tuple(riods.transform)[:6]
     if hasattr(riods, "crs") and riods.crs:
         # CRS is a dict-like object specific to rasterio
         # If CRS is not None, we convert it back to a PROJ4 string using
diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py
index c4f9666f0c1..7f93ca237b1 100644
--- a/xarray/backends/scipy_.py
+++ b/xarray/backends/scipy_.py
@@ -1,6 +1,4 @@
-import warnings
 from collections import OrderedDict
-from distutils.version import LooseVersion
 from io import BytesIO
 
 import numpy as np
@@ -113,18 +111,6 @@ class ScipyDataStore(WritableCFDataStore):
     def __init__(
         self, filename_or_obj, mode="r", format=None, group=None, mmap=None, lock=None
     ):
-        import scipy
-        import scipy.io
-
-        if mode != "r" and scipy.__version__ < LooseVersion("0.13"):  # pragma: no cover
-            warnings.warn(
-                "scipy %s detected; "
-                "the minimal recommended version is 0.13. "
-                "Older version of this library do not reliably "
-                "read and write files." % scipy.__version__,
-                ImportWarning,
-            )
-
         if group is not None:
             raise ValueError(
                 "cannot save to a group with the " "scipy.io.netcdf backend"
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 9a115de55ef..b550efe052e 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -1,6 +1,5 @@
 import warnings
 from collections import OrderedDict
-from distutils.version import LooseVersion
 
 import numpy as np
 
@@ -254,25 +253,6 @@ def open_group(
     ):
         import zarr
 
-        min_zarr = "2.2"
-
-        if LooseVersion(zarr.__version__) < min_zarr:  # pragma: no cover
-            raise NotImplementedError(
-                "Zarr version %s or greater is "
-                "required by xarray. See zarr "
-                "installation "
-                "http://zarr.readthedocs.io/en/stable/"
-                "#installation" % min_zarr
-            )
-
-        if consolidated or consolidate_on_close:
-            if LooseVersion(zarr.__version__) <= "2.2.1.dev2":  # pragma: no cover
-                raise NotImplementedError(
-                    "Zarr version 2.2.1.dev2 or greater "
-                    "is required by for consolidated "
-                    "metadata."
-                )
-
         open_kwargs = dict(mode=mode, synchronizer=synchronizer, path=group)
         if consolidated:
             # TODO: an option to pass the metadata_key keyword
diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py
index 223eff571ae..515d309d75b 100644
--- a/xarray/coding/cftime_offsets.py
+++ b/xarray/coding/cftime_offsets.py
@@ -43,10 +43,11 @@
 import re
 from datetime import timedelta
 from functools import partial
-from typing import ClassVar
+from typing import ClassVar, Optional
 
 import numpy as np
 
+from ..core.pdcompat import count_not_none
 from .cftimeindex import CFTimeIndex, _parse_iso8601_with_reso
 from .times import format_cftime_datetime
 
@@ -73,8 +74,8 @@ def get_date_type(calendar):
 
 
 class BaseCFTimeOffset:
-    _freq = None  # type: ClassVar[str]
-    _day_option = None  # type: ClassVar[str]
+    _freq: ClassVar[Optional[str]] = None
+    _day_option: ClassVar[Optional[str]] = None
 
     def __init__(self, n=1):
         if not isinstance(n, int):
@@ -181,7 +182,7 @@ def _get_day_of_month(other, day_option):
     elif day_option is None:
         # Note: unlike `_shift_month`, _get_day_of_month does not
         # allow day_option = None
-        raise NotImplementedError
+        raise NotImplementedError()
     else:
         raise ValueError(day_option)
 
@@ -350,8 +351,8 @@ class QuarterOffset(BaseCFTimeOffset):
     """Quarter representation copied off of pandas/tseries/offsets.py
     """
 
-    _freq = None  # type: ClassVar[str]
-    _default_month = None  # type: ClassVar[int]
+    _freq: ClassVar[str]
+    _default_month: ClassVar[int]
 
     def __init__(self, n=1, month=None):
         BaseCFTimeOffset.__init__(self, n)
@@ -447,9 +448,9 @@ def rollback(self, date):
 
 
 class YearOffset(BaseCFTimeOffset):
-    _freq = None  # type: ClassVar[str]
-    _day_option = None  # type: ClassVar[str]
-    _default_month = None  # type: ClassVar[int]
+    _freq: ClassVar[str]
+    _day_option: ClassVar[str]
+    _default_month: ClassVar[int]
 
     def __init__(self, n=1, month=None):
         BaseCFTimeOffset.__init__(self, n)
@@ -774,11 +775,6 @@ def _generate_range(start, end, periods, offset):
             current = next_date
 
 
-def _count_not_none(*args):
-    """Compute the number of non-None arguments."""
-    return sum([arg is not None for arg in args])
-
-
 def cftime_range(
     start=None,
     end=None,
@@ -957,7 +953,7 @@ def cftime_range(
     pandas.date_range
     """
     # Adapted from pandas.core.indexes.datetimes._generate_range.
-    if _count_not_none(start, end, periods, freq) != 3:
+    if count_not_none(start, end, periods, freq) != 3:
         raise ValueError(
             "Of the arguments 'start', 'end', 'periods', and 'freq', three "
             "must be specified at a time."
diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py
index 16ab258d32e..802dd94f06c 100644
--- a/xarray/coding/cftimeindex.py
+++ b/xarray/coding/cftimeindex.py
@@ -437,11 +437,6 @@ def __sub__(self, other):
     def __rsub__(self, other):
         return pd.TimedeltaIndex(other - np.array(self))
 
-    def _add_delta(self, deltas):
-        # To support TimedeltaIndex + CFTimeIndex with older versions of
-        # pandas.  No longer used as of pandas 0.23.
-        return self + deltas
-
     def to_datetimeindex(self, unsafe=False):
         """If possible, convert this index to a pandas.DatetimeIndex.
 
diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index 7b5a7c56a53..1508fb50b38 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -6,6 +6,7 @@
 
 import numpy as np
 import pandas as pd
+from pandas.errors import OutOfBoundsDatetime
 
 from ..core import indexing
 from ..core.common import contains_cftime_datetimes
@@ -21,12 +22,6 @@
     unpack_for_encoding,
 )
 
-try:
-    from pandas.errors import OutOfBoundsDatetime
-except ImportError:
-    # pandas < 0.20
-    from pandas.tslib import OutOfBoundsDatetime
-
 
 # standard calendars recognized by cftime
 _STANDARD_CALENDARS = {"standard", "gregorian", "proleptic_gregorian"}
diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
index f54ae7867d8..f78502d81be 100644
--- a/xarray/coding/variables.py
+++ b/xarray/coding/variables.py
@@ -1,7 +1,7 @@
 """Coders for individual Variable objects."""
 import warnings
 from functools import partial
-from typing import Any
+from typing import Any, Hashable
 
 import numpy as np
 import pandas as pd
@@ -33,15 +33,19 @@ class VariableCoder:
     variables in the underlying store.
     """
 
-    def encode(self, variable, name=None):  # pragma: no cover
-        # type: (Variable, Any) -> Variable
-        """Convert an encoded variable to a decoded variable."""
-        raise NotImplementedError
-
-    def decode(self, variable, name=None):  # pragma: no cover
-        # type: (Variable, Any) -> Variable
-        """Convert an decoded variable to a encoded variable."""
-        raise NotImplementedError
+    def encode(
+        self, variable: Variable, name: Hashable = None
+    ) -> Variable:  # pragma: no cover
+        """Convert an encoded variable to a decoded variable
+        """
+        raise NotImplementedError()
+
+    def decode(
+        self, variable: Variable, name: Hashable = None
+    ) -> Variable:  # pragma: no cover
+        """Convert an decoded variable to a encoded variable
+        """
+        raise NotImplementedError()
 
 
 class _ElementwiseFunctionArray(indexing.ExplicitlyIndexedNDArrayMixin):
diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
index 4529fa509d9..b4fee1773b8 100644
--- a/xarray/core/alignment.py
+++ b/xarray/core/alignment.py
@@ -268,7 +268,7 @@ def align(
                     all_indexes[dim].append(index)
 
     if join == "override":
-        objects = _override_indexes(list(objects), all_indexes, exclude)
+        objects = _override_indexes(objects, all_indexes, exclude)
 
     # We don't reindex over dimensions with all equal indexes for two reasons:
     # - It's faster for the usual case (already aligned objects).
@@ -365,26 +365,27 @@ def is_alignable(obj):
     targets = []
     no_key = object()
     not_replaced = object()
-    for n, variables in enumerate(objects):
+    for position, variables in enumerate(objects):
         if is_alignable(variables):
-            positions.append(n)
+            positions.append(position)
             keys.append(no_key)
             targets.append(variables)
             out.append(not_replaced)
         elif is_dict_like(variables):
+            current_out = OrderedDict()
             for k, v in variables.items():
-                if is_alignable(v) and k not in indexes:
-                    # Skip variables in indexes for alignment, because these
-                    # should to be overwritten instead:
-                    # https://github.com/pydata/xarray/issues/725
-                    positions.append(n)
+                if is_alignable(v):
+                    positions.append(position)
                     keys.append(k)
                     targets.append(v)
-            out.append(OrderedDict(variables))
+                    current_out[k] = not_replaced
+                else:
+                    current_out[k] = v
+            out.append(current_out)
         elif raise_on_invalid:
             raise ValueError(
                 "object to align is neither an xarray.Dataset, "
-                "an xarray.DataArray nor a dictionary: %r" % variables
+                "an xarray.DataArray nor a dictionary: {!r}".format(variables)
             )
         else:
             out.append(variables)
@@ -405,7 +406,10 @@ def is_alignable(obj):
             out[position][key] = aligned_obj
 
     # something went wrong: we should have replaced all sentinel values
-    assert all(arg is not not_replaced for arg in out)
+    for arg in out:
+        assert arg is not not_replaced
+        if is_dict_like(arg):
+            assert all(value is not not_replaced for value in arg.values())
 
     return out
 
@@ -545,7 +549,7 @@ def reindex_variables(
 
         if dim in variables:
             var = variables[dim]
-            args = (var.attrs, var.encoding)  # type: tuple
+            args: tuple = (var.attrs, var.encoding)
         else:
             args = ()
         reindexed[dim] = IndexVariable((dim,), target, *args)
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index 38befd5698f..8c3555941c4 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -789,7 +789,7 @@ def auto_combine(
     if not from_openmfds:
         basic_msg = dedent(
             """\
-        In xarray version 0.14 `auto_combine` will be deprecated. See
+        In xarray version 0.15 `auto_combine` will be deprecated. See
         http://xarray.pydata.org/en/stable/combining.html#combining-multi"""
         )
         warnings.warn(basic_msg, FutureWarning, stacklevel=2)
@@ -831,7 +831,7 @@ def auto_combine(
         message += dedent(
             """\
         The datasets supplied require both concatenation and merging. From
-        xarray version 0.14 this will operation will require either using the
+        xarray version 0.15 this will operation will require either using the
         new `combine_nested` function (or the `combine='nested'` option to
         open_mfdataset), with a nested list structure such that you can combine
         along the dimensions {}. Alternatively if your datasets have global
diff --git a/xarray/core/common.py b/xarray/core/common.py
index a8fac245c02..b1a513e05a0 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -193,10 +193,9 @@ def __init_subclass__(cls):
         """Verify that all subclasses explicitly define ``__slots__``. If they don't,
         raise error in the core xarray module and a FutureWarning in third-party
         extensions.
-        This check is only triggered in Python 3.6+.
         """
         if not hasattr(object.__new__(cls), "__dict__"):
-            cls.__setattr__ = cls._setattr_slots
+            pass
         elif cls.__module__.startswith("xarray."):
             raise AttributeError("%s must explicitly define __slots__" % cls.__name__)
         else:
@@ -230,12 +229,11 @@ def __getattr__(self, name: str) -> Any:
             "%r object has no attribute %r" % (type(self).__name__, name)
         )
 
-    # This complicated three-method design boosts overall performance of simple
-    # operations - particularly DataArray methods that perform a _to_temp_dataset()
-    # round-trip - by a whopping 8% compared to a single method that checks
-    # hasattr(self, "__dict__") at runtime before every single assignment (like
-    # _setattr_py35 does). All of this is just temporary until the FutureWarning can be
-    # changed into a hard crash.
+    # This complicated two-method design boosts overall performance of simple operations
+    # - particularly DataArray methods that perform a _to_temp_dataset() round-trip - by
+    # a whopping 8% compared to a single method that checks hasattr(self, "__dict__") at
+    # runtime before every single assignment. All of this is just temporary until the
+    # FutureWarning can be changed into a hard crash.
     def _setattr_dict(self, name: str, value: Any) -> None:
         """Deprecated third party subclass (see ``__init_subclass__`` above)
         """
@@ -251,7 +249,7 @@ def _setattr_dict(self, name: str, value: Any) -> None:
                 stacklevel=2,
             )
 
-    def _setattr_slots(self, name: str, value: Any) -> None:
+    def __setattr__(self, name: str, value: Any) -> None:
         """Objects with ``__slots__`` raise AttributeError if you try setting an
         undeclared attribute. This is desirable, but the error message could use some
         improvement.
@@ -269,14 +267,6 @@ def _setattr_slots(self, name: str, value: Any) -> None:
                 % (name, type(self).__name__)
             ) from e
 
-    def _setattr_py35(self, name: str, value: Any) -> None:
-        if hasattr(self, "__dict__"):
-            return self._setattr_dict(name, value)
-        return self._setattr_slots(name, value)
-
-    # Overridden in Python >=3.6 by __init_subclass__
-    __setattr__ = _setattr_py35
-
     def __dir__(self) -> List[str]:
         """Provide method name lookup and completion. Only provide 'public'
         methods.
@@ -392,7 +382,7 @@ def get_index(self, key: Hashable) -> pd.Index:
     def _calc_assign_results(
         self: C, kwargs: Mapping[Hashable, Union[T, Callable[[C], T]]]
     ) -> MutableMapping[Hashable, T]:
-        results = SortedKeysDict()  # type: SortedKeysDict[Hashable, T]
+        results: MutableMapping[Hashable, T] = SortedKeysDict()
         for k, v in kwargs.items():
             if callable(v):
                 results[k] = v(self)
@@ -1040,13 +1030,8 @@ def resample(
 
             grouper = CFTimeGrouper(freq, closed, label, base, loffset)
         else:
-            # TODO: to_offset() call required for pandas==0.19.2
             grouper = pd.Grouper(
-                freq=freq,
-                closed=closed,
-                label=label,
-                base=base,
-                loffset=pd.tseries.frequencies.to_offset(loffset),
+                freq=freq, closed=closed, label=label, base=base, loffset=loffset
             )
         group = DataArray(
             dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
@@ -1216,7 +1201,7 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
 
     def __getitem__(self, value):
         # implementations of this class should implement this method
-        raise NotImplementedError
+        raise NotImplementedError()
 
 
 def full_like(other, fill_value, dtype: DTypeLike = None):
diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index 0d08234c474..a55613dd4b4 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -5,12 +5,12 @@
 import itertools
 import operator
 from collections import Counter, OrderedDict
-from distutils.version import LooseVersion
 from typing import (
     TYPE_CHECKING,
     AbstractSet,
     Any,
     Callable,
+    Hashable,
     Iterable,
     List,
     Mapping,
@@ -24,15 +24,15 @@
 
 from . import duck_array_ops, utils
 from .alignment import deep_align
-from .merge import expand_and_merge_variables
+from .merge import merge_coordinates_without_align
 from .pycompat import dask_array_type
 from .utils import is_dict_like
 from .variable import Variable
 
 if TYPE_CHECKING:
+    from .coordinates import Coordinates  # noqa
     from .dataset import Dataset
 
-_DEFAULT_FROZEN_SET = frozenset()  # type: frozenset
 _NO_FILL_VALUE = utils.ReprObject("<no-fill-value>")
 _DEFAULT_NAME = utils.ReprObject("<default-name>")
 _JOINS_WITHOUT_FILL_VALUES = frozenset({"inner", "exact"})
@@ -152,17 +152,16 @@ def result_name(objects: list) -> Any:
     return name
 
 
-def _get_coord_variables(args):
-    input_coords = []
+def _get_coords_list(args) -> List["Coordinates"]:
+    coords_list = []
     for arg in args:
         try:
             coords = arg.coords
         except AttributeError:
             pass  # skip this argument
         else:
-            coord_vars = getattr(coords, "variables", coords)
-            input_coords.append(coord_vars)
-    return input_coords
+            coords_list.append(coords)
+    return coords_list
 
 
 def build_output_coords(
@@ -185,32 +184,29 @@ def build_output_coords(
     -------
     OrderedDict of Variable objects with merged coordinates.
     """
-    input_coords = _get_coord_variables(args)
+    coords_list = _get_coords_list(args)
 
-    if exclude_dims:
-        input_coords = [
-            OrderedDict(
-                (k, v) for k, v in coord_vars.items() if exclude_dims.isdisjoint(v.dims)
-            )
-            for coord_vars in input_coords
-        ]
-
-    if len(input_coords) == 1:
+    if len(coords_list) == 1 and not exclude_dims:
         # we can skip the expensive merge
-        unpacked_input_coords, = input_coords
-        merged = OrderedDict(unpacked_input_coords)
+        unpacked_coords, = coords_list
+        merged_vars = OrderedDict(unpacked_coords.variables)
     else:
-        merged = expand_and_merge_variables(input_coords)
+        # TODO: save these merged indexes, instead of re-computing them later
+        merged_vars, unused_indexes = merge_coordinates_without_align(
+            coords_list, exclude_dims=exclude_dims
+        )
 
     output_coords = []
     for output_dims in signature.output_core_dims:
         dropped_dims = signature.all_input_core_dims - set(output_dims)
         if dropped_dims:
             filtered = OrderedDict(
-                (k, v) for k, v in merged.items() if dropped_dims.isdisjoint(v.dims)
+                (k, v)
+                for k, v in merged_vars.items()
+                if dropped_dims.isdisjoint(v.dims)
             )
         else:
-            filtered = merged
+            filtered = merged_vars
         output_coords.append(filtered)
 
     return output_coords
@@ -495,8 +491,11 @@ def unified_dim_sizes(
 SLICE_NONE = slice(None)
 
 
-def broadcast_compat_data(variable, broadcast_dims, core_dims):
-    # type: (Variable, tuple, tuple) -> Any
+def broadcast_compat_data(
+    variable: Variable,
+    broadcast_dims: Tuple[Hashable, ...],
+    core_dims: Tuple[Hashable, ...],
+) -> Any:
     data = variable.data
 
     old_dims = variable.dims
@@ -657,7 +656,7 @@ def func(*arrays):
 def _apply_blockwise(
     func, args, input_dims, output_dims, signature, output_dtypes, output_sizes=None
 ):
-    from .dask_array_compat import blockwise
+    import dask.array
 
     if signature.num_outputs > 1:
         raise NotImplementedError(
@@ -720,7 +719,7 @@ def _apply_blockwise(
         trimmed_dims = dims[-ndim:] if ndim else ()
         blockwise_args.extend([arg, trimmed_dims])
 
-    return blockwise(
+    return dask.array.blockwise(
         func,
         out_ind,
         *blockwise_args,
@@ -998,13 +997,6 @@ def earth_mover_distance(first_samples,
 
     if vectorize:
         if signature.all_core_dims:
-            # we need the signature argument
-            if LooseVersion(np.__version__) < "1.12":  # pragma: no cover
-                raise NotImplementedError(
-                    "numpy 1.12 or newer required when using vectorize=True "
-                    "in xarray.apply_ufunc with non-scalar output core "
-                    "dimensions."
-                )
             func = np.vectorize(
                 func, otypes=output_dtypes, signature=signature.to_gufunc_string()
             )
@@ -1172,25 +1164,6 @@ def dot(*arrays, dims=None, **kwargs):
     ]
     output_core_dims = [tuple(d for d in all_dims if d not in dims + broadcast_dims)]
 
-    # older dask than 0.17.4, we use tensordot if possible.
-    if isinstance(arr.data, dask_array_type):
-        import dask
-
-        if LooseVersion(dask.__version__) < LooseVersion("0.17.4"):
-            if len(broadcast_dims) == 0 and len(arrays) == 2:
-                axes = [
-                    [arr.get_axis_num(d) for d in arr.dims if d in dims]
-                    for arr in arrays
-                ]
-                return apply_ufunc(
-                    duck_array_ops.tensordot,
-                    *arrays,
-                    dask="allowed",
-                    input_core_dims=input_core_dims,
-                    output_core_dims=output_core_dims,
-                    kwargs={"axes": axes}
-                )
-
     # construct einsum subscripts, such as '...abc,...ab->...c'
     # Note: input_core_dims are always moved to the last position
     subscripts_list = [
diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index e68c247d880..75c72c99a42 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -4,7 +4,7 @@
 
 from . import dtypes, utils
 from .alignment import align
-from .merge import unique_variable, _VALID_COMPAT
+from .merge import _VALID_COMPAT, unique_variable
 from .variable import IndexVariable, Variable, as_variable
 from .variable import concat as concat_vars
 
@@ -177,8 +177,6 @@ def _calc_concat_over(datasets, dim, dim_names, data_vars, coords, compat):
             if dim not in ds.dims:
                 if dim in ds:
                     ds = ds.set_coords(dim)
-                else:
-                    raise ValueError("%r is not present in all datasets" % dim)
         concat_over.update(k for k, v in ds.variables.items() if dim in v.dims)
         concat_dim_lengths.append(ds.dims.get(dim, 1))
 
@@ -362,12 +360,21 @@ def ensure_common_dims(vars):
     # n.b. this loop preserves variable order, needed for groupby.
     for k in datasets[0].variables:
         if k in concat_over:
-            vars = ensure_common_dims([ds.variables[k] for ds in datasets])
+            try:
+                vars = ensure_common_dims([ds.variables[k] for ds in datasets])
+            except KeyError:
+                raise ValueError("%r is not present in all datasets." % k)
             combined = concat_vars(vars, dim, positions)
             assert isinstance(combined, Variable)
             result_vars[k] = combined
 
     result = Dataset(result_vars, attrs=result_attrs)
+    absent_coord_names = coord_names - set(result.variables)
+    if absent_coord_names:
+        raise ValueError(
+            "Variables %r are coordinates in some datasets but not others."
+            % absent_coord_names
+        )
     result = result.set_coords(coord_names)
     result.encoding = result_encoding
 
diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py
index ddea5739fff..ce17973866e 100644
--- a/xarray/core/coordinates.py
+++ b/xarray/core/coordinates.py
@@ -17,11 +17,7 @@
 
 from . import formatting, indexing
 from .indexes import Indexes
-from .merge import (
-    expand_and_merge_variables,
-    merge_coords,
-    merge_coords_for_inplace_math,
-)
+from .merge import merge_coordinates_without_align, merge_coords
 from .utils import Frozen, ReprObject, either_dict_or_kwargs
 from .variable import Variable
 
@@ -34,7 +30,7 @@
 _THIS_ARRAY = ReprObject("<this-array>")
 
 
-class AbstractCoordinates(Mapping[Hashable, "DataArray"]):
+class Coordinates(Mapping[Hashable, "DataArray"]):
     __slots__ = ()
 
     def __getitem__(self, key: Hashable) -> "DataArray":
@@ -59,7 +55,7 @@ def indexes(self) -> Indexes:
     def variables(self):
         raise NotImplementedError()
 
-    def _update_coords(self, coords):
+    def _update_coords(self, coords, indexes):
         raise NotImplementedError()
 
     def __iter__(self) -> Iterator["Hashable"]:
@@ -116,19 +112,19 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index:
 
     def update(self, other: Mapping[Hashable, Any]) -> None:
         other_vars = getattr(other, "variables", other)
-        coords = merge_coords(
+        coords, indexes = merge_coords(
             [self.variables, other_vars], priority_arg=1, indexes=self.indexes
         )
-        self._update_coords(coords)
+        self._update_coords(coords, indexes)
 
     def _merge_raw(self, other):
         """For use with binary arithmetic."""
         if other is None:
             variables = OrderedDict(self.variables)
+            indexes = OrderedDict(self.indexes)
         else:
-            # don't align because we already called xarray.align
-            variables = expand_and_merge_variables([self.variables, other.variables])
-        return variables
+            variables, indexes = merge_coordinates_without_align([self, other])
+        return variables, indexes
 
     @contextmanager
     def _merge_inplace(self, other):
@@ -136,18 +132,18 @@ def _merge_inplace(self, other):
         if other is None:
             yield
         else:
-            # don't include indexes in priority_vars, because we didn't align
-            # first
-            priority_vars = OrderedDict(
-                kv for kv in self.variables.items() if kv[0] not in self.dims
-            )
-            variables = merge_coords_for_inplace_math(
-                [self.variables, other.variables], priority_vars=priority_vars
+            # don't include indexes in prioritized, because we didn't align
+            # first and we want indexes to be checked
+            prioritized = {
+                k: (v, None) for k, v in self.variables.items() if k not in self.indexes
+            }
+            variables, indexes = merge_coordinates_without_align(
+                [self, other], prioritized
             )
             yield
-            self._update_coords(variables)
+            self._update_coords(variables, indexes)
 
-    def merge(self, other: "AbstractCoordinates") -> "Dataset":
+    def merge(self, other: "Coordinates") -> "Dataset":
         """Merge two sets of coordinates to create a new Dataset
 
         The method implements the logic used for joining coordinates in the
@@ -173,13 +169,19 @@ def merge(self, other: "AbstractCoordinates") -> "Dataset":
 
         if other is None:
             return self.to_dataset()
-        else:
-            other_vars = getattr(other, "variables", other)
-            coords = expand_and_merge_variables([self.variables, other_vars])
-            return Dataset._from_vars_and_coord_names(coords, set(coords))
+
+        if not isinstance(other, Coordinates):
+            other = Dataset(coords=other).coords
+
+        coords, indexes = merge_coordinates_without_align([self, other])
+        coord_names = set(coords)
+        merged = Dataset._construct_direct(
+            variables=coords, coord_names=coord_names, indexes=indexes
+        )
+        return merged
 
 
-class DatasetCoordinates(AbstractCoordinates):
+class DatasetCoordinates(Coordinates):
     """Dictionary like container for Dataset coordinates.
 
     Essentially an immutable OrderedDict with keys given by the array's
@@ -218,7 +220,11 @@ def to_dataset(self) -> "Dataset":
         """
         return self._data._copy_listed(self._names)
 
-    def _update_coords(self, coords: Mapping[Hashable, Any]) -> None:
+    def _update_coords(
+        self,
+        coords: "OrderedDict[Hashable, Variable]",
+        indexes: Mapping[Hashable, pd.Index],
+    ) -> None:
         from .dataset import calculate_dimensions
 
         variables = self._data._variables.copy()
@@ -234,7 +240,12 @@ def _update_coords(self, coords: Mapping[Hashable, Any]) -> None:
         self._data._variables = variables
         self._data._coord_names.update(new_coord_names)
         self._data._dims = dims
-        self._data._indexes = None
+
+        # TODO(shoyer): once ._indexes is always populated by a dict, modify
+        # it to update inplace instead.
+        original_indexes = OrderedDict(self._data.indexes)
+        original_indexes.update(indexes)
+        self._data._indexes = original_indexes
 
     def __delitem__(self, key: Hashable) -> None:
         if key in self:
@@ -251,7 +262,7 @@ def _ipython_key_completions_(self):
         ]
 
 
-class DataArrayCoordinates(AbstractCoordinates):
+class DataArrayCoordinates(Coordinates):
     """Dictionary like container for DataArray coordinates.
 
     Essentially an OrderedDict with keys given by the array's
@@ -274,7 +285,11 @@ def _names(self) -> Set[Hashable]:
     def __getitem__(self, key: Hashable) -> "DataArray":
         return self._data._getitem_coord(key)
 
-    def _update_coords(self, coords) -> None:
+    def _update_coords(
+        self,
+        coords: "OrderedDict[Hashable, Variable]",
+        indexes: Mapping[Hashable, pd.Index],
+    ) -> None:
         from .dataset import calculate_dimensions
 
         coords_plus_data = coords.copy()
@@ -285,7 +300,12 @@ def _update_coords(self, coords) -> None:
                 "cannot add coordinates with new dimensions to " "a DataArray"
             )
         self._data._coords = coords
-        self._data._indexes = None
+
+        # TODO(shoyer): once ._indexes is always populated by a dict, modify
+        # it to update inplace instead.
+        original_indexes = OrderedDict(self._data.indexes)
+        original_indexes.update(indexes)
+        self._data._indexes = original_indexes
 
     @property
     def variables(self):
diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py
deleted file mode 100644
index fe2cdc5c553..00000000000
--- a/xarray/core/dask_array_compat.py
+++ /dev/null
@@ -1,173 +0,0 @@
-from distutils.version import LooseVersion
-
-import dask.array as da
-import numpy as np
-from dask import __version__ as dask_version
-
-try:
-    blockwise = da.blockwise
-except AttributeError:
-    blockwise = da.atop
-
-
-try:
-    from dask.array import isin
-except ImportError:  # pragma: no cover
-    # Copied from dask v0.17.3.
-    # Used under the terms of Dask's license, see licenses/DASK_LICENSE.
-
-    def _isin_kernel(element, test_elements, assume_unique=False):
-        values = np.in1d(element.ravel(), test_elements, assume_unique=assume_unique)
-        return values.reshape(element.shape + (1,) * test_elements.ndim)
-
-    def isin(element, test_elements, assume_unique=False, invert=False):
-        element = da.asarray(element)
-        test_elements = da.asarray(test_elements)
-        element_axes = tuple(range(element.ndim))
-        test_axes = tuple(i + element.ndim for i in range(test_elements.ndim))
-        mapped = blockwise(
-            _isin_kernel,
-            element_axes + test_axes,
-            element,
-            element_axes,
-            test_elements,
-            test_axes,
-            adjust_chunks={axis: lambda _: 1 for axis in test_axes},
-            dtype=bool,
-            assume_unique=assume_unique,
-        )
-        result = mapped.any(axis=test_axes)
-        if invert:
-            result = ~result
-        return result
-
-
-if LooseVersion(dask_version) > LooseVersion("0.19.2"):
-    gradient = da.gradient
-
-else:  # pragma: no cover
-    # Copied from dask v0.19.2
-    # Used under the terms of Dask's license, see licenses/DASK_LICENSE.
-    import math
-    from numbers import Integral, Real
-
-    try:
-        AxisError = np.AxisError
-    except AttributeError:
-        try:
-            np.array([0]).sum(axis=5)
-        except Exception as e:
-            AxisError = type(e)
-
-    def validate_axis(axis, ndim):
-        """ Validate an input to axis= keywords """
-        if isinstance(axis, (tuple, list)):
-            return tuple(validate_axis(ax, ndim) for ax in axis)
-        if not isinstance(axis, Integral):
-            raise TypeError("Axis value must be an integer, got %s" % axis)
-        if axis < -ndim or axis >= ndim:
-            raise AxisError(
-                "Axis %d is out of bounds for array of dimension " "%d" % (axis, ndim)
-            )
-        if axis < 0:
-            axis += ndim
-        return axis
-
-    def _gradient_kernel(x, block_id, coord, axis, array_locs, grad_kwargs):
-        """
-        x: nd-array
-            array of one block
-        coord: 1d-array or scalar
-            coordinate along which the gradient is computed.
-        axis: int
-            axis along which the gradient is computed
-        array_locs:
-            actual location along axis. None if coordinate is scalar
-        grad_kwargs:
-            keyword to be passed to np.gradient
-        """
-        block_loc = block_id[axis]
-        if array_locs is not None:
-            coord = coord[array_locs[0][block_loc] : array_locs[1][block_loc]]
-        grad = np.gradient(x, coord, axis=axis, **grad_kwargs)
-        return grad
-
-    def gradient(f, *varargs, axis=None, **kwargs):
-        f = da.asarray(f)
-
-        kwargs["edge_order"] = math.ceil(kwargs.get("edge_order", 1))
-        if kwargs["edge_order"] > 2:
-            raise ValueError("edge_order must be less than or equal to 2.")
-
-        drop_result_list = False
-        if axis is None:
-            axis = tuple(range(f.ndim))
-        elif isinstance(axis, Integral):
-            drop_result_list = True
-            axis = (axis,)
-
-        axis = validate_axis(axis, f.ndim)
-
-        if len(axis) != len(set(axis)):
-            raise ValueError("duplicate axes not allowed")
-
-        axis = tuple(ax % f.ndim for ax in axis)
-
-        if varargs == ():
-            varargs = (1,)
-        if len(varargs) == 1:
-            varargs = len(axis) * varargs
-        if len(varargs) != len(axis):
-            raise TypeError(
-                "Spacing must either be a single scalar, or a scalar / "
-                "1d-array per axis"
-            )
-
-        if issubclass(f.dtype.type, (np.bool8, Integral)):
-            f = f.astype(float)
-        elif issubclass(f.dtype.type, Real) and f.dtype.itemsize < 4:
-            f = f.astype(float)
-
-        results = []
-        for i, ax in enumerate(axis):
-            for c in f.chunks[ax]:
-                if np.min(c) < kwargs["edge_order"] + 1:
-                    raise ValueError(
-                        "Chunk size must be larger than edge_order + 1. "
-                        "Minimum chunk for aixs {} is {}. Rechunk to "
-                        "proceed.".format(np.min(c), ax)
-                    )
-
-            if np.isscalar(varargs[i]):
-                array_locs = None
-            else:
-                if isinstance(varargs[i], da.Array):
-                    raise NotImplementedError(
-                        "dask array coordinated is not supported."
-                    )
-                # coordinate position for each block taking overlap into
-                # account
-                chunk = np.array(f.chunks[ax])
-                array_loc_stop = np.cumsum(chunk) + 1
-                array_loc_start = array_loc_stop - chunk - 2
-                array_loc_stop[-1] -= 1
-                array_loc_start[0] = 0
-                array_locs = (array_loc_start, array_loc_stop)
-
-            results.append(
-                f.map_overlap(
-                    _gradient_kernel,
-                    dtype=f.dtype,
-                    depth={j: 1 if j == ax else 0 for j in range(f.ndim)},
-                    boundary="none",
-                    coord=varargs[i],
-                    axis=ax,
-                    array_locs=array_locs,
-                    grad_kwargs=kwargs,
-                )
-            )
-
-        if drop_result_list:
-            results = results[0]
-
-        return results
diff --git a/xarray/core/dask_array_ops.py b/xarray/core/dask_array_ops.py
index 11fdb86e9b0..37f261cc3ad 100644
--- a/xarray/core/dask_array_ops.py
+++ b/xarray/core/dask_array_ops.py
@@ -1,26 +1,13 @@
-from distutils.version import LooseVersion
-
 import numpy as np
 
 from . import dtypes, nputils
 
-try:
-    import dask
-    import dask.array as da
-
-    # Note: dask has used `ghost` before 0.18.2
-    if LooseVersion(dask.__version__) <= LooseVersion("0.18.2"):
-        overlap = da.ghost.ghost
-        trim_internal = da.ghost.trim_internal
-    else:
-        overlap = da.overlap.overlap
-        trim_internal = da.overlap.trim_internal
-except ImportError:
-    pass
-
 
 def dask_rolling_wrapper(moving_func, a, window, min_count=None, axis=-1):
-    """wrapper to apply bottleneck moving window funcs on dask arrays"""
+    """Wrapper to apply bottleneck moving window funcs on dask arrays
+    """
+    import dask.array as da
+
     dtype, fill_value = dtypes.maybe_promote(a.dtype)
     a = a.astype(dtype)
     # inputs for overlap
@@ -30,18 +17,21 @@ def dask_rolling_wrapper(moving_func, a, window, min_count=None, axis=-1):
     depth[axis] = (window + 1) // 2
     boundary = {d: fill_value for d in range(a.ndim)}
     # Create overlap array.
-    ag = overlap(a, depth=depth, boundary=boundary)
+    ag = da.overlap.overlap(a, depth=depth, boundary=boundary)
     # apply rolling func
     out = ag.map_blocks(
         moving_func, window, min_count=min_count, axis=axis, dtype=a.dtype
     )
     # trim array
-    result = trim_internal(out, depth)
+    result = da.overlap.trim_internal(out, depth)
     return result
 
 
 def rolling_window(a, axis, window, center, fill_value):
-    """ Dask's equivalence to np.utils.rolling_window """
+    """Dask's equivalence to np.utils.rolling_window
+    """
+    import dask.array as da
+
     orig_shape = a.shape
     if axis < 0:
         axis = a.ndim + axis
@@ -59,7 +49,7 @@ def rolling_window(a, axis, window, center, fill_value):
             % (window, depth[axis], min(a.chunks[axis]))
         )
 
-    # Although dask.overlap pads values to boundaries of the array,
+    # Although da.overlap pads values to boundaries of the array,
     # the size of the generated array is smaller than what we want
     # if center == False.
     if center:
@@ -88,7 +78,7 @@ def rolling_window(a, axis, window, center, fill_value):
     boundary = {d: fill_value for d in range(a.ndim)}
 
     # create overlap arrays
-    ag = overlap(a, depth=depth, boundary=boundary)
+    ag = da.overlap.overlap(a, depth=depth, boundary=boundary)
 
     # apply rolling func
     def func(x, window, axis=-1):
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 68bfe301bfc..d536d0de2c5 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -1,5 +1,4 @@
 import functools
-import sys
 import warnings
 from collections import OrderedDict
 from numbers import Number
@@ -323,7 +322,7 @@ def __init__(
         if encoding is not None:
             warnings.warn(
                 "The `encoding` argument to `DataArray` is deprecated, and . "
-                "will be removed in 0.14. "
+                "will be removed in 0.15. "
                 "Instead, specify the encoding when writing to disk or "
                 "set the `encoding` attribute directly.",
                 FutureWarning,
@@ -419,7 +418,7 @@ def _overwrite_indexes(self, indexes: Mapping[Hashable, Any]) -> "DataArray":
         obj = self._replace(coords=coords)
 
         # switch from dimension to level names, if necessary
-        dim_names = {}  # type: Dict[Any, str]
+        dim_names: Dict[Any, str] = {}
         for dim, idx in indexes.items():
             if not isinstance(idx, pd.MultiIndex) and idx.name != dim:
                 dim_names[dim] = idx.name
@@ -1184,12 +1183,11 @@ def reindex_like(
             * None (default): don't fill gaps
             * pad / ffill: propagate last valid index value forward
             * backfill / bfill: propagate next valid index value backward
-            * nearest: use nearest valid index value (requires pandas>=0.16)
+            * nearest: use nearest valid index value
         tolerance : optional
             Maximum distance between original and new labels for inexact
             matches. The values of the index at the matching locations must
             satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
-            Requires pandas>=0.17.
         copy : bool, optional
             If ``copy=True``, data in the return value is always copied. If
             ``copy=False`` and reindexing is unnecessary, or can be performed
@@ -1250,7 +1248,7 @@ def reindex(
             * None (default): don't fill gaps
             * pad / ffill: propagate last valid index value forward
             * backfill / bfill: propagate next valid index value backward
-            * nearest: use nearest valid index value (requires pandas>=0.16)
+            * nearest: use nearest valid index value
         tolerance : optional
             Maximum distance between original and new labels for inexact
             matches. The values of the index at the matching locations must
@@ -1504,9 +1502,7 @@ def expand_dims(
             with length 1. If provided as a dict, then the keys are the new
             dimensions and the values are either integers (giving the length of
             the new dimensions) or sequence/ndarray (giving the coordinates of
-            the new dimensions). **WARNING** for python 3.5, if ``dim`` is
-            dict-like, then it must be an ``OrderedDict``. This is to ensure
-            that the order in which the dims are given is maintained.
+            the new dimensions).
         axis : integer, list (or tuple) of integers, or None
             Axis position(s) where new axis is to be inserted (position(s) on
             the result array). If a list (or tuple) of integers is passed,
@@ -1517,8 +1513,7 @@ def expand_dims(
             The keywords are arbitrary dimensions being inserted and the values
             are either the lengths of the new dims (if int is given), or their
             coordinates. Note, this is an alternative to passing a dict to the
-            dim kwarg and will only be used if dim is None. **WARNING** for
-            python 3.5 ``dim_kwargs`` is not available.
+            dim kwarg and will only be used if dim is None.
 
         Returns
         -------
@@ -1534,16 +1529,6 @@ def expand_dims(
         elif dim is not None and not isinstance(dim, Mapping):
             dim = OrderedDict(((cast(Hashable, dim), 1),))
 
-        # TODO: get rid of the below code block when python 3.5 is no longer
-        #   supported.
-        python36_plus = sys.version_info[0] == 3 and sys.version_info[1] > 5
-        not_ordereddict = dim is not None and not isinstance(dim, OrderedDict)
-        if not python36_plus and not_ordereddict:
-            raise TypeError("dim must be an OrderedDict for python <3.6")
-        elif not python36_plus and dim_kwargs:
-            raise ValueError("dim_kwargs isn't available for python <3.6")
-        dim_kwargs = OrderedDict(dim_kwargs)
-
         dim = either_dict_or_kwargs(dim, dim_kwargs, "expand_dims")
         ds = self._to_temp_dataset().expand_dims(dim, axis)
         return self._from_temp_dataset(ds)
@@ -2519,7 +2504,7 @@ def func(self, other):
                 if not reflexive
                 else f(other_variable, self.variable)
             )
-            coords = self.coords._merge_raw(other_coords)
+            coords, indexes = self.coords._merge_raw(other_coords)
             name = self._result_name(other)
 
             return self._replace(variable, coords, name)
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 9a1339cf528..1d9ef6f7a72 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -3,7 +3,6 @@
 import sys
 import warnings
 from collections import OrderedDict, defaultdict
-from distutils.version import LooseVersion
 from numbers import Number
 from pathlib import Path
 from typing import (
@@ -41,7 +40,6 @@
     formatting,
     groupby,
     ops,
-    pdcompat,
     resample,
     rolling,
     utils,
@@ -64,8 +62,8 @@
 from .merge import (
     dataset_merge_method,
     dataset_update_method,
+    merge_coordinates_without_align,
     merge_data_and_coords,
-    merge_variables,
 )
 from .options import OPTIONS, _get_keep_attrs
 from .pycompat import dask_array_type
@@ -76,16 +74,16 @@
     decode_numpy_dict_values,
     either_dict_or_kwargs,
     hashable,
-    maybe_wrap_array,
     is_dict_like,
     is_list_like,
+    maybe_wrap_array,
 )
 from .variable import IndexVariable, Variable, as_variable, broadcast_variables
 
 if TYPE_CHECKING:
     from ..backends import AbstractDataStore, ZarrStore
     from .dataarray import DataArray
-    from .merge import DatasetLike
+    from .merge import CoercibleMapping
 
     try:
         from dask.delayed import Delayed
@@ -132,8 +130,9 @@ def _get_virtual_variable(
         raise KeyError(key)
 
     split_key = key.split(".", 1)
+    var_name: Optional[str]
     if len(split_key) == 2:
-        ref_name, var_name = split_key  # type: str, Optional[str]
+        ref_name, var_name = split_key
     elif len(split_key) == 1:
         ref_name, var_name = key, None
     else:
@@ -165,7 +164,7 @@ def calculate_dimensions(variables: Mapping[Hashable, Variable]) -> "Dict[Any, i
     Returns dictionary mapping from dimension names to sizes. Raises ValueError
     if any of the dimension sizes conflict.
     """
-    dims = {}  # type: Dict[Any, int]
+    dims: Dict[Any, int] = {}
     last_used = {}
     scalar_vars = {k for k, v in variables.items() if not v.dims}
     for k, var in variables.items():
@@ -197,15 +196,17 @@ def merge_indexes(
     Not public API. Used in Dataset and DataArray set_index
     methods.
     """
-    vars_to_replace = {}  # Dict[Any, Variable]
-    vars_to_remove = []  # type: list
+    vars_to_replace: Dict[Hashable, Variable] = {}
+    vars_to_remove: List[Hashable] = []
     error_msg = "{} is not the name of an existing variable."
 
     for dim, var_names in indexes.items():
         if isinstance(var_names, str) or not isinstance(var_names, Sequence):
             var_names = [var_names]
 
-        names, codes, levels = [], [], []  # type: (list, list, list)
+        names: List[Hashable] = []
+        codes: List[List[int]] = []
+        levels: List[List[int]] = []
         current_index_variable = variables.get(dim)
 
         for n in var_names:
@@ -225,13 +226,8 @@ def merge_indexes(
         if current_index_variable is not None and append:
             current_index = current_index_variable.to_index()
             if isinstance(current_index, pd.MultiIndex):
-                try:
-                    current_codes = current_index.codes
-                except AttributeError:
-                    # fpr pandas<0.24
-                    current_codes = current_index.labels
                 names.extend(current_index.names)
-                codes.extend(current_codes)
+                codes.extend(current_index.codes)
                 levels.extend(current_index.levels)
             else:
                 names.append("%s_level_0" % dim)
@@ -490,7 +486,7 @@ def __init__(
         if compat is not None:
             warnings.warn(
                 "The `compat` argument to Dataset is deprecated and will be "
-                "removed in 0.14."
+                "removed in 0.15."
                 "Instead, use `merge` to control how variables are combined",
                 FutureWarning,
                 stacklevel=2,
@@ -508,10 +504,9 @@ def __init__(
             data_vars = {}
         if coords is None:
             coords = {}
-        self._set_init_vars_and_dims(data_vars, coords, compat)
 
         # TODO(shoyer): expose indexes as a public argument in __init__
-        self._indexes = None  # type: Optional[OrderedDict[Any, pd.Index]]
+        self._set_init_vars_and_dims(data_vars, coords, compat)
 
         if attrs is not None:
             self._attrs = OrderedDict(attrs)
@@ -531,13 +526,14 @@ def _set_init_vars_and_dims(self, data_vars, coords, compat):
         if isinstance(coords, Dataset):
             coords = coords.variables
 
-        variables, coord_names, dims = merge_data_and_coords(
+        variables, coord_names, dims, indexes = merge_data_and_coords(
             data_vars, coords, compat=compat
         )
 
         self._variables = variables
         self._coord_names = coord_names
         self._dims = dims
+        self._indexes = indexes
 
     @classmethod
     def load_store(cls, store, decoder=None) -> "Dataset":
@@ -614,8 +610,9 @@ def sizes(self) -> Mapping[Hashable, int]:
         return self.dims
 
     def load(self, **kwargs) -> "Dataset":
-        """Manually trigger loading of this dataset's data from disk or a
-        remote source into memory and return this dataset.
+        """Manually trigger loading and/or computation of this dataset's data
+        from disk or a remote source into memory and return this dataset.
+        Unlike compute, the original dataset is modified and returned.
 
         Normally, it should not be necessary to call this method in user code,
         because all xarray functions should either work on deferred data or
@@ -771,9 +768,9 @@ def _dask_postpersist(dsk, info, *args):
         return Dataset._construct_direct(variables, *args)
 
     def compute(self, **kwargs) -> "Dataset":
-        """Manually trigger loading of this dataset's data from disk or a
-        remote source into memory and return a new dataset. The original is
-        left unaltered.
+        """Manually trigger loading and/or computation of this dataset's data
+        from disk or a remote source into memory and return a new dataset.
+        Unlike load, the original dataset is left unaltered.
 
         Normally, it should not be necessary to call this method in user code,
         because all xarray functions should either work on deferred data or
@@ -816,10 +813,10 @@ def persist(self, **kwargs) -> "Dataset":
         """ Trigger computation, keeping data as dask arrays
 
         This operation can be used to trigger computation on underlying dask
-        arrays, similar to ``.compute()``.  However this operation keeps the
-        data as dask arrays.  This is particularly useful when using the
-        dask.distributed scheduler and you want to load a large amount of data
-        into distributed memory.
+        arrays, similar to ``.compute()`` or ``.load()``.  However this
+        operation keeps the data as dask arrays. This is particularly useful
+        when using the dask.distributed scheduler and you want to load a large
+        amount of data into distributed memory.
 
         Parameters
         ----------
@@ -838,7 +835,7 @@ def _construct_direct(
         cls,
         variables,
         coord_names,
-        dims,
+        dims=None,
         attrs=None,
         indexes=None,
         encoding=None,
@@ -847,6 +844,8 @@ def _construct_direct(
         """Shortcut around __init__ for internal use when we want to skip
         costly validation
         """
+        if dims is None:
+            dims = calculate_dimensions(variables)
         obj = object.__new__(cls)
         obj._variables = variables
         obj._coord_names = coord_names
@@ -862,8 +861,7 @@ def _construct_direct(
 
     @classmethod
     def _from_vars_and_coord_names(cls, variables, coord_names, attrs=None):
-        dims = calculate_dimensions(variables)
-        return cls._construct_direct(variables, coord_names, dims, attrs)
+        return cls._construct_direct(variables, coord_names, attrs=attrs)
 
     # TODO(shoyer): renable type checking on this signature when pytype has a
     # good way to handle defaulting arguments to a sentinel value:
@@ -963,7 +961,7 @@ def _overwrite_indexes(self, indexes: Mapping[Any, pd.Index]) -> "Dataset":
         obj = self._replace(variables, indexes=new_indexes)
 
         # switch from dimension to level names, if necessary
-        dim_names = {}  # type: Dict[Hashable, str]
+        dim_names: Dict[Hashable, str] = {}
         for dim, idx in indexes.items():
             if not isinstance(idx, pd.MultiIndex) and idx.name != dim:
                 dim_names[dim] = idx.name
@@ -1128,7 +1126,7 @@ def _copy_listed(self, names: Iterable[Hashable]) -> "Dataset":
                 if (var_name,) == var.dims:
                     indexes[var_name] = var.to_index()
 
-        needed_dims = set()  # type: set
+        needed_dims: Set[Hashable] = set()
         for v in variables.values():
             needed_dims.update(v.dims)
 
@@ -1268,6 +1266,8 @@ def __delitem__(self, key: Hashable) -> None:
         """
         del self._variables[key]
         self._coord_names.discard(key)
+        if key in self.indexes:
+            del self._indexes[key]
         self._dims = calculate_dimensions(self._variables)
 
     # mutable objects should not be hashable
@@ -1665,7 +1665,7 @@ def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
         """Block dimensions for this dataset's data or None if it's not a dask
         array.
         """
-        chunks = {}  # type: Dict[Hashable, Tuple[int, ...]]
+        chunks: Dict[Hashable, Tuple[int, ...]] = {}
         for v in self.variables.values():
             if v.chunks is not None:
                 for dim, c in zip(v.dims, v.chunks):
@@ -1710,13 +1710,7 @@ def chunk(
         -------
         chunked : xarray.Dataset
         """
-        try:
-            from dask.base import tokenize
-        except ImportError:
-            # raise the usual error if dask is entirely missing
-            import dask  # noqa: F401
-
-            raise ImportError("xarray requires dask version 0.9 or newer")
+        from dask.base import tokenize
 
         if isinstance(chunks, Number):
             chunks = dict.fromkeys(self.dims, chunks)
@@ -1766,7 +1760,7 @@ def _validate_indexers(
             raise ValueError("dimensions %r do not exist" % invalid)
 
         # all indexers should be int, slice, np.ndarrays, or Variable
-        indexers_list = []  # type: List[Tuple[Any, Union[slice, Variable]]]
+        indexers_list: List[Tuple[Any, Union[slice, Variable]]] = []
         for k, v in indexers.items():
             if isinstance(v, slice):
                 indexers_list.append((k, v))
@@ -1807,20 +1801,16 @@ def _validate_indexers(
         return indexers_list
 
     def _get_indexers_coords_and_indexes(self, indexers):
-        """  Extract coordinates from indexers.
-        Returns an OrderedDict mapping from coordinate name to the
-        coordinate variable.
+        """Extract coordinates and indexes from indexers.
 
         Only coordinate with a name different from any of self.variables will
         be attached.
         """
         from .dataarray import DataArray
 
-        coord_list = []
-        indexes = OrderedDict()
+        coords_list = []
         for k, v in indexers.items():
             if isinstance(v, DataArray):
-                v_coords = v.coords
                 if v.dtype.kind == "b":
                     if v.ndim != 1:  # we only support 1-d boolean array
                         raise ValueError(
@@ -1831,14 +1821,14 @@ def _get_indexers_coords_and_indexes(self, indexers):
                     # Make sure in case of boolean DataArray, its
                     # coordinate also should be indexed.
                     v_coords = v[v.values.nonzero()[0]].coords
-
-                coord_list.append({d: v_coords[d].variable for d in v.coords})
-                indexes.update(v.indexes)
+                else:
+                    v_coords = v.coords
+                coords_list.append(v_coords)
 
         # we don't need to call align() explicitly or check indexes for
         # alignment, because merge_variables already checks for exact alignment
         # between dimension coordinates
-        coords = merge_variables(coord_list)
+        coords, indexes = merge_coordinates_without_align(coords_list)
         assert_coordinate_consistent(self, coords)
 
         # silently drop the conflicted variables.
@@ -1964,7 +1954,7 @@ def sel(
             carried out. See :ref:`indexing` for the details.
             One of indexers or indexers_kwargs must be provided.
         method : {None, 'nearest', 'pad'/'ffill', 'backfill'/'bfill'}, optional
-            Method to use for inexact matches (requires pandas>=0.16):
+            Method to use for inexact matches:
 
             * None (default): only exact matches
             * pad / ffill: propagate last valid index value forward
@@ -1974,7 +1964,6 @@ def sel(
             Maximum distance between original and new labels for inexact
             matches. The values of the index at the matching locations must
             satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
-            Requires pandas>=0.17.
         drop : bool, optional
             If ``drop=True``, drop coordinates variables in `indexers` instead
             of making them scalar.
@@ -2204,12 +2193,11 @@ def reindex_like(
             * None (default): don't fill gaps
             * pad / ffill: propagate last valid index value forward
             * backfill / bfill: propagate next valid index value backward
-            * nearest: use nearest valid index value (requires pandas>=0.16)
+            * nearest: use nearest valid index value
         tolerance : optional
             Maximum distance between original and new labels for inexact
             matches. The values of the index at the matching locations must
             satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
-            Requires pandas>=0.17.
         copy : bool, optional
             If ``copy=True``, data in the return value is always copied. If
             ``copy=False`` and reindexing is unnecessary, or can be performed
@@ -2265,12 +2253,11 @@ def reindex(
             * None (default): don't fill gaps
             * pad / ffill: propagate last valid index value forward
             * backfill / bfill: propagate next valid index value backward
-            * nearest: use nearest valid index value (requires pandas>=0.16)
+            * nearest: use nearest valid index value
         tolerance : optional
             Maximum distance between original and new labels for inexact
             matches. The values of the index at the matching locations must
             satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
-            Requires pandas>=0.17.
         copy : bool, optional
             If ``copy=True``, data in the return value is always copied. If
             ``copy=False`` and reindexing is unnecessary, or can be performed
@@ -2644,12 +2631,14 @@ def _rename_vars(self, name_dict, dims_dict):
     def _rename_dims(self, name_dict):
         return {name_dict.get(k, k): v for k, v in self.dims.items()}
 
-    def _rename_indexes(self, name_dict):
+    def _rename_indexes(self, name_dict, dims_set):
         if self._indexes is None:
             return None
         indexes = OrderedDict()
         for k, v in self.indexes.items():
             new_name = name_dict.get(k, k)
+            if new_name not in dims_set:
+                continue
             if isinstance(v, pd.MultiIndex):
                 new_names = [name_dict.get(k, k) for k in v.names]
                 index = pd.MultiIndex(
@@ -2667,7 +2656,7 @@ def _rename_indexes(self, name_dict):
     def _rename_all(self, name_dict, dims_dict):
         variables, coord_names = self._rename_vars(name_dict, dims_dict)
         dims = self._rename_dims(dims_dict)
-        indexes = self._rename_indexes(name_dict)
+        indexes = self._rename_indexes(name_dict, dims.keys())
         return variables, coord_names, dims, indexes
 
     def rename(
@@ -2923,14 +2912,6 @@ def expand_dims(
         expanded : same type as caller
             This object, but with an additional dimension(s).
         """
-        # TODO: get rid of the below code block when python 3.5 is no longer
-        #   supported.
-        if sys.version < "3.6":
-            if isinstance(dim, Mapping) and not isinstance(dim, OrderedDict):
-                raise TypeError("dim must be an OrderedDict for python <3.6")
-            if dim_kwargs:
-                raise ValueError("dim_kwargs isn't available for python <3.6")
-
         if dim is None:
             pass
         elif isinstance(dim, Mapping):
@@ -3184,13 +3165,6 @@ def _stack_once(self, dims, new_dim):
 
         # consider dropping levels that are unused?
         levels = [self.get_index(dim) for dim in dims]
-        if LooseVersion(pd.__version__) < LooseVersion("0.19.0"):
-            # RangeIndex levels in a MultiIndex are broken for appending in
-            # pandas before v0.19.0
-            levels = [
-                pd.Int64Index(level) if isinstance(level, pd.RangeIndex) else level
-                for level in levels
-            ]
         idx = utils.multiindex_from_product_levels(levels, names=dims)
         variables[new_dim] = IndexVariable(new_dim, idx)
 
@@ -3358,12 +3332,7 @@ def ensure_stackable(val):
 
     def _unstack_once(self, dim: Hashable) -> "Dataset":
         index = self.get_index(dim)
-        # GH2619. For MultiIndex, we need to call remove_unused.
-        if LooseVersion(pd.__version__) >= "0.20":
-            index = index.remove_unused_levels()
-        else:  # for pandas 0.19
-            index = pdcompat.remove_unused_levels(index)
-
+        index = index.remove_unused_levels()
         full_idx = pd.MultiIndex.from_product(index.levels, names=index.names)
 
         # take a shortcut in case the MultiIndex was not modified.
@@ -3448,7 +3417,7 @@ def unstack(self, dim: Union[Hashable, Iterable[Hashable]] = None) -> "Dataset":
             result = result._unstack_once(dim)
         return result
 
-    def update(self, other: "DatasetLike", inplace: bool = None) -> "Dataset":
+    def update(self, other: "CoercibleMapping", inplace: bool = None) -> "Dataset":
         """Update this dataset's variables with those from another dataset.
 
         Parameters
@@ -3475,13 +3444,12 @@ def update(self, other: "DatasetLike", inplace: bool = None) -> "Dataset":
             dataset.
         """
         _check_inplace(inplace)
-        variables, coord_names, dims = dataset_update_method(self, other)
-
-        return self._replace_vars_and_dims(variables, coord_names, dims, inplace=True)
+        merge_result = dataset_update_method(self, other)
+        return self._replace(inplace=True, **merge_result._asdict())
 
     def merge(
         self,
-        other: "DatasetLike",
+        other: "CoercibleMapping",
         inplace: bool = None,
         overwrite_vars: Union[Hashable, Iterable[Hashable]] = frozenset(),
         compat: str = "no_conflicts",
@@ -3536,7 +3504,7 @@ def merge(
             If any variables conflict (see ``compat``).
         """
         _check_inplace(inplace)
-        variables, coord_names, dims = dataset_merge_method(
+        merge_result = dataset_merge_method(
             self,
             other,
             overwrite_vars=overwrite_vars,
@@ -3544,8 +3512,7 @@ def merge(
             join=join,
             fill_value=fill_value,
         )
-
-        return self._replace_vars_and_dims(variables, coord_names, dims)
+        return self._replace(**merge_result._asdict())
 
     def _assert_all_in_dataset(
         self, names: Iterable[Hashable], virtual_okay: bool = False
@@ -4987,13 +4954,6 @@ def sortby(self, variables, ascending=True):
         for data_array in aligned_other_vars:
             if data_array.ndim != 1:
                 raise ValueError("Input DataArray is not 1-D.")
-            if data_array.dtype == object and LooseVersion(
-                np.__version__
-            ) < LooseVersion("1.11.0"):
-                raise NotImplementedError(
-                    "sortby uses np.lexsort under the hood, which requires "
-                    "numpy 1.11.0 or later to support object data-type."
-                )
             (key,) = data_array.dims
             vars_by_dim[key].append(data_array)
 
diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
index fcd0400566f..126168d418b 100644
--- a/xarray/core/duck_array_ops.py
+++ b/xarray/core/duck_array_ops.py
@@ -17,10 +17,8 @@
 
 try:
     import dask.array as dask_array
-    from . import dask_array_compat
 except ImportError:
     dask_array = None  # type: ignore
-    dask_array_compat = None  # type: ignore
 
 
 def _dask_or_eager_func(
@@ -120,9 +118,7 @@ def notnull(data):
 
 transpose = _dask_or_eager_func("transpose")
 _where = _dask_or_eager_func("where", array_args=slice(3))
-isin = _dask_or_eager_func(
-    "isin", eager_module=npcompat, dask_module=dask_array_compat, array_args=slice(2)
-)
+isin = _dask_or_eager_func("isin", array_args=slice(2))
 take = _dask_or_eager_func("take")
 broadcast_to = _dask_or_eager_func("broadcast_to")
 
@@ -133,15 +129,13 @@ def notnull(data):
 array_any = _dask_or_eager_func("any")
 
 tensordot = _dask_or_eager_func("tensordot", array_args=slice(2))
-einsum = _dask_or_eager_func(
-    "einsum", array_args=slice(1, None), requires_dask="0.17.3"
-)
+einsum = _dask_or_eager_func("einsum", array_args=slice(1, None))
 
 
 def gradient(x, coord, axis, edge_order):
     if isinstance(x, dask_array_type):
-        return dask_array_compat.gradient(x, coord, axis=axis, edge_order=edge_order)
-    return npcompat.gradient(x, coord, axis=axis, edge_order=edge_order)
+        return dask_array.gradient(x, coord, axis=axis, edge_order=edge_order)
+    return np.gradient(x, coord, axis=axis, edge_order=edge_order)
 
 
 def trapz(y, x, axis):
diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py
index c6b2537c958..0c7f073819d 100644
--- a/xarray/core/formatting.py
+++ b/xarray/core/formatting.py
@@ -7,17 +7,12 @@
 
 import numpy as np
 import pandas as pd
+from pandas.errors import OutOfBoundsDatetime
 
 from .duck_array_ops import array_equiv
 from .options import OPTIONS
 from .pycompat import dask_array_type, sparse_array_type
 
-try:
-    from pandas.errors import OutOfBoundsDatetime
-except ImportError:
-    # pandas < 0.20
-    from pandas.tslib import OutOfBoundsDatetime
-
 
 def pretty_print(x, numchars):
     """Given an object `x`, call `str(x)` and format the returned string so
diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
index 6d42c254438..010c4818ca5 100644
--- a/xarray/core/indexing.py
+++ b/xarray/core/indexing.py
@@ -96,28 +96,12 @@ def _is_nested_tuple(possible_tuple):
     )
 
 
-def _index_method_kwargs(method, tolerance):
-    # backwards compatibility for pandas<0.16 (method) or pandas<0.17
-    # (tolerance)
-    kwargs = {}
-    if method is not None:
-        kwargs["method"] = method
-    if tolerance is not None:
-        kwargs["tolerance"] = tolerance
-    return kwargs
-
-
-def get_loc(index, label, method=None, tolerance=None):
-    kwargs = _index_method_kwargs(method, tolerance)
-    return index.get_loc(label, **kwargs)
-
-
 def get_indexer_nd(index, labels, method=None, tolerance=None):
-    """ Call pd.Index.get_indexer(labels). """
-    kwargs = _index_method_kwargs(method, tolerance)
-
+    """Wrapper around :meth:`pandas.Index.get_indexer` supporting n-dimensional
+    labels
+    """
     flat_labels = np.ravel(labels)
-    flat_indexer = index.get_indexer(flat_labels, **kwargs)
+    flat_indexer = index.get_indexer(flat_labels, method=method, tolerance=tolerance)
     indexer = flat_indexer.reshape(labels.shape)
     return indexer
 
@@ -193,7 +177,9 @@ def convert_label_indexer(index, label, index_name="", method=None, tolerance=No
             if isinstance(index, pd.MultiIndex):
                 indexer, new_index = index.get_loc_level(label.item(), level=0)
             else:
-                indexer = get_loc(index, label.item(), method, tolerance)
+                indexer = index.get_loc(
+                    label.item(), method=method, tolerance=tolerance
+                )
         elif label.dtype.kind == "b":
             indexer = label
         else:
@@ -1382,7 +1368,6 @@ def __array__(self, dtype: DTypeLike = None) -> np.ndarray:
 
     @property
     def shape(self) -> Tuple[int]:
-        # .shape is broken on pandas prior to v0.15.2
         return (len(self.array),)
 
     def __getitem__(
diff --git a/xarray/core/merge.py b/xarray/core/merge.py
index ceeb7db09f1..6eb0acd760e 100644
--- a/xarray/core/merge.py
+++ b/xarray/core/merge.py
@@ -1,13 +1,15 @@
 from collections import OrderedDict
 from typing import (
     TYPE_CHECKING,
+    AbstractSet,
     Any,
     Dict,
     Hashable,
     Iterable,
     List,
     Mapping,
-    MutableMapping,
+    NamedTuple,
+    Optional,
     Sequence,
     Set,
     Tuple,
@@ -18,21 +20,26 @@
 
 from . import dtypes, pdcompat
 from .alignment import deep_align
-from .utils import Frozen
+from .utils import Frozen, dict_equiv
 from .variable import Variable, as_variable, assert_unique_multiindex_level_names
 
 if TYPE_CHECKING:
+    from .coordinates import Coordinates
     from .dataarray import DataArray
     from .dataset import Dataset
 
-    DatasetLikeValue = Union[
-        DataArray, Variable, Tuple[Hashable, Any], Tuple[Sequence[Hashable], Any]
+    DimsLike = Union[Hashable, Sequence[Hashable]]
+    ArrayLike = Any
+    VariableLike = Union[
+        ArrayLike,
+        Tuple[DimsLike, ArrayLike],
+        Tuple[DimsLike, ArrayLike, Mapping],
+        Tuple[DimsLike, ArrayLike, Mapping, Mapping],
     ]
-    DatasetLike = Union[Dataset, Mapping[Hashable, DatasetLikeValue]]
-    """Any object type that can be used on the rhs of Dataset.update,
-    Dataset.merge, etc.
-    """
-    MutableDatasetLike = Union[Dataset, MutableMapping[Hashable, DatasetLikeValue]]
+    XarrayValue = Union[DataArray, Variable, VariableLike]
+    DatasetLike = Union[Dataset, Mapping[Hashable, XarrayValue]]
+    CoercibleValue = Union[XarrayValue, pd.Series, pd.DataFrame]
+    CoercibleMapping = Union[Dataset, Mapping[Hashable, CoercibleValue]]
 
 
 PANDAS_TYPES = (pd.Series, pd.DataFrame, pdcompat.Panel)
@@ -71,8 +78,12 @@ class MergeError(ValueError):
     # TODO: move this to an xarray.exceptions module?
 
 
-def unique_variable(name, variables, compat="broadcast_equals", equals=None):
-    # type: (Any, List[Variable], str, bool) -> Variable
+def unique_variable(
+    name: Hashable,
+    variables: List[Variable],
+    compat: str = "broadcast_equals",
+    equals: bool = None,
+) -> Variable:
     """Return the unique variable from a list of variables or raise MergeError.
 
     Parameters
@@ -121,8 +132,8 @@ def unique_variable(name, variables, compat="broadcast_equals", equals=None):
 
     if not equals:
         raise MergeError(
-            "conflicting values for variable %r on objects to be combined. You can skip this check by specifying compat='override'."
-            % (name)
+            "conflicting values for variable {!r} on objects to be combined. "
+            "You can skip this check by specifying compat='override'.".format(name)
         )
 
     if combine_method:
@@ -137,138 +148,188 @@ def _assert_compat_valid(compat):
         raise ValueError("compat=%r invalid: must be %s" % (compat, set(_VALID_COMPAT)))
 
 
-class OrderedDefaultDict(OrderedDict):
-    # minimal version of an ordered defaultdict
-    # beware: does not pickle or copy properly
-    def __init__(self, default_factory):
-        self.default_factory = default_factory
-        super().__init__()
-
-    def __missing__(self, key):
-        self[key] = default = self.default_factory()
-        return default
+MergeElement = Tuple[Variable, Optional[pd.Index]]
 
 
-def merge_variables(
-    list_of_variables_dicts: List[Mapping[Any, Variable]],
-    priority_vars: Mapping[Any, Variable] = None,
+def merge_collected(
+    grouped: "OrderedDict[Hashable, List[MergeElement]]",
+    prioritized: Mapping[Hashable, MergeElement] = None,
     compat: str = "minimal",
-) -> "OrderedDict[Any, Variable]":
+) -> Tuple["OrderedDict[Hashable, Variable]", "OrderedDict[Hashable, pd.Index]"]:
     """Merge dicts of variables, while resolving conflicts appropriately.
 
     Parameters
     ----------
-    lists_of_variables_dicts : list of mappings with Variable values
-        List of mappings for which each value is a xarray.Variable object.
-    priority_vars : mapping with Variable or None values, optional
-        If provided, variables are always taken from this dict in preference to
-        the input variable dictionaries, without checking for conflicts.
-    compat : {'identical', 'equals', 'broadcast_equals', 'minimal', 'no_conflicts', 'override'}, optional
+
         Type of equality check to use when checking for conflicts.
 
     Returns
     -------
-    OrderedDict with keys taken by the union of keys on list_of_variable_dicts,
+    OrderedDict with keys taken by the union of keys on list_of_mappings,
     and Variable values corresponding to those that should be found on the
     merged result.
     """
-    if priority_vars is None:
-        priority_vars = {}
+    if prioritized is None:
+        prioritized = {}
 
     _assert_compat_valid(compat)
-    dim_compat = min(compat, "equals", key=_VALID_COMPAT.get)
-
-    lookup = OrderedDefaultDict(list)
-    for variables in list_of_variables_dicts:
-        for name, var in variables.items():
-            lookup[name].append(var)
-
-    # n.b. it's important to fill up merged in the original order in which
-    # variables appear
-    merged = OrderedDict()  # type: OrderedDict[Any, Variable]
-
-    for name, var_list in lookup.items():
-        if name in priority_vars:
-            # one of these arguments (e.g., the first for in-place arithmetic
-            # or the second for Dataset.update) takes priority
-            merged[name] = priority_vars[name]
+
+    merged_vars = OrderedDict()  # type: OrderedDict[Any, Variable]
+    merged_indexes = OrderedDict()  # type: OrderedDict[Any, pd.Index]
+
+    for name, elements_list in grouped.items():
+        if name in prioritized:
+            variable, index = prioritized[name]
+            merged_vars[name] = variable
+            if index is not None:
+                merged_indexes[name] = index
         else:
-            dim_variables = [var for var in var_list if (name,) == var.dims]
-            if dim_variables:
-                # if there are dimension coordinates, these must be equal (or
-                # identical), and they take priority over non-dimension
-                # coordinates
-                merged[name] = unique_variable(name, dim_variables, dim_compat)
+            indexed_elements = [
+                (variable, index)
+                for variable, index in elements_list
+                if index is not None
+            ]
+
+            if indexed_elements:
+                # TODO(shoyer): consider adjusting this logic. Are we really
+                # OK throwing away variable without an index in favor of
+                # indexed variables, without even checking if values match?
+                variable, index = indexed_elements[0]
+                for _, other_index in indexed_elements[1:]:
+                    if not index.equals(other_index):
+                        raise MergeError(
+                            "conflicting values for index %r on objects to be "
+                            "combined:\nfirst value: %r\nsecond value: %r"
+                            % (name, index, other_index)
+                        )
+                if compat == "identical":
+                    for other_variable, _ in indexed_elements[1:]:
+                        if not dict_equiv(variable.attrs, other_variable.attrs):
+                            raise MergeError(
+                                "conflicting attribute values on combined "
+                                "variable %r:\nfirst value: %r\nsecond value: %r"
+                                % (name, variable.attrs, other_variable.attrs)
+                            )
+                merged_vars[name] = variable
+                merged_indexes[name] = index
             else:
+                variables = [variable for variable, _ in elements_list]
                 try:
-                    merged[name] = unique_variable(name, var_list, compat)
+                    merged_vars[name] = unique_variable(name, variables, compat)
                 except MergeError:
                     if compat != "minimal":
                         # we need more than "minimal" compatibility (for which
                         # we drop conflicting coordinates)
                         raise
 
-    return merged
+    return merged_vars, merged_indexes
 
 
-def expand_variable_dicts(
-    list_of_variable_dicts: "List[Union[Dataset, OrderedDict]]",
-) -> "List[Mapping[Any, Variable]]":
-    """Given a list of dicts with xarray object values, expand the values.
+def collect_variables_and_indexes(
+    list_of_mappings: "List[DatasetLike]",
+) -> "OrderedDict[Hashable, List[MergeElement]]":
+    """Collect variables and indexes from list of mappings of xarray objects.
 
-    Parameters
-    ----------
-    list_of_variable_dicts : list of dict or Dataset objects
-        Each value for the mappings must be of the following types:
-        - an xarray.Variable
-        - a tuple `(dims, data[, attrs[, encoding]])` that can be converted in
-          an xarray.Variable
-        - or an xarray.DataArray
-
-    Returns
-    -------
-    A list of ordered dictionaries corresponding to inputs, or coordinates from
-    an input's values. The values of each ordered dictionary are all
-    xarray.Variable objects.
+    Mappings must either be Dataset objects, or have values of one of the
+    following types:
+    - an xarray.Variable
+    - a tuple `(dims, data[, attrs[, encoding]])` that can be converted in
+      an xarray.Variable
+    - or an xarray.DataArray
     """
     from .dataarray import DataArray
     from .dataset import Dataset
 
-    var_dicts = []
+    grouped = (
+        OrderedDict()
+    )  # type: OrderedDict[Hashable, List[Tuple[Variable, pd.Index]]]
 
-    for variables in list_of_variable_dicts:
-        if isinstance(variables, Dataset):
-            var_dicts.append(variables.variables)
-            continue
+    def append(name, variable, index):
+        values = grouped.setdefault(name, [])
+        values.append((variable, index))
 
-        # append coords to var_dicts before appending sanitized_vars,
-        # because we want coords to appear first
-        sanitized_vars = OrderedDict()  # type: OrderedDict[Any, Variable]
+    def append_all(variables, indexes):
+        for name, variable in variables.items():
+            append(name, variable, indexes.get(name))
 
-        for name, var in variables.items():
-            if isinstance(var, DataArray):
-                # use private API for speed
-                coords = var._coords.copy()
+    for mapping in list_of_mappings:
+        if isinstance(mapping, Dataset):
+            append_all(mapping.variables, mapping.indexes)
+            continue
+
+        for name, variable in mapping.items():
+            if isinstance(variable, DataArray):
+                coords = variable._coords.copy()  # use private API for speed
+                indexes = OrderedDict(variable.indexes)
                 # explicitly overwritten variables should take precedence
                 coords.pop(name, None)
-                var_dicts.append(coords)
-
-            var = as_variable(var, name=name)
-            sanitized_vars[name] = var
+                indexes.pop(name, None)
+                append_all(coords, indexes)
 
-        var_dicts.append(sanitized_vars)
+            variable = as_variable(variable, name=name)
+            if variable.dims == (name,):
+                variable = variable.to_index_variable()
+                index = variable.to_index()
+            else:
+                index = None
+            append(name, variable, index)
+
+    return grouped
+
+
+def collect_from_coordinates(
+    list_of_coords: "List[Coordinates]"
+) -> "OrderedDict[Hashable, List[MergeElement]]":
+    """Collect variables and indexes to be merged from Coordinate objects."""
+    grouped = (
+        OrderedDict()
+    )  # type: OrderedDict[Hashable, List[Tuple[Variable, pd.Index]]]
+
+    for coords in list_of_coords:
+        variables = coords.variables
+        indexes = coords.indexes
+        for name, variable in variables.items():
+            value = grouped.setdefault(name, [])
+            value.append((variable, indexes.get(name)))
+    return grouped
+
+
+def merge_coordinates_without_align(
+    objects: "List[Coordinates]",
+    prioritized: Mapping[Hashable, MergeElement] = None,
+    exclude_dims: AbstractSet = frozenset(),
+) -> Tuple["OrderedDict[Hashable, Variable]", "OrderedDict[Hashable, pd.Index]"]:
+    """Merge variables/indexes from coordinates without automatic alignments.
+
+    This function is used for merging coordinate from pre-existing xarray
+    objects.
+    """
+    collected = collect_from_coordinates(objects)
+
+    if exclude_dims:
+        filtered = OrderedDict()  # type: OrderedDict[Hashable, List[MergeElement]]
+        for name, elements in collected.items():
+            new_elements = [
+                (variable, index)
+                for variable, index in elements
+                if exclude_dims.isdisjoint(variable.dims)
+            ]
+            if new_elements:
+                filtered[name] = new_elements
+    else:
+        filtered = collected
 
-    return var_dicts
+    return merge_collected(filtered, prioritized)
 
 
 def determine_coords(
-    list_of_variable_dicts: Iterable["DatasetLike"]
+    list_of_mappings: Iterable["DatasetLike"]
 ) -> Tuple[Set[Hashable], Set[Hashable]]:
     """Given a list of dicts with xarray object values, identify coordinates.
 
     Parameters
     ----------
-    list_of_variable_dicts : list of dict or Dataset objects
+    list_of_mappings : list of dict or Dataset objects
         Of the same form as the arguments to expand_variable_dicts.
 
     Returns
@@ -281,15 +342,15 @@ def determine_coords(
     from .dataarray import DataArray
     from .dataset import Dataset
 
-    coord_names = set()  # type: set
-    noncoord_names = set()  # type: set
+    coord_names: Set[Hashable] = set()
+    noncoord_names: Set[Hashable] = set()
 
-    for variables in list_of_variable_dicts:
-        if isinstance(variables, Dataset):
-            coord_names.update(variables.coords)
-            noncoord_names.update(variables.data_vars)
+    for mapping in list_of_mappings:
+        if isinstance(mapping, Dataset):
+            coord_names.update(mapping.coords)
+            noncoord_names.update(mapping.data_vars)
         else:
-            for name, var in variables.items():
+            for name, var in mapping.items():
                 if isinstance(var, DataArray):
                     coords = set(var._coords)  # use private API for speed
                     # explicitly overwritten variables should take precedence
@@ -299,7 +360,7 @@ def determine_coords(
     return coord_names, noncoord_names
 
 
-def coerce_pandas_values(objects: Iterable["DatasetLike"]) -> List["DatasetLike"]:
+def coerce_pandas_values(objects: Iterable["CoercibleMapping"]) -> List["DatasetLike"]:
     """Convert pandas values found in a list of labeled objects.
 
     Parameters
@@ -332,18 +393,9 @@ def coerce_pandas_values(objects: Iterable["DatasetLike"]) -> List["DatasetLike"
     return out
 
 
-def merge_coords_for_inplace_math(objs, priority_vars=None):
-    """Merge coordinate variables without worrying about alignment.
-
-    This function is used for merging variables in coordinates.py.
-    """
-    expanded = expand_variable_dicts(objs)
-    variables = merge_variables(expanded, priority_vars)
-    assert_unique_multiindex_level_names(variables)
-    return variables
-
-
-def _get_priority_vars(objects, priority_arg, compat="equals"):
+def _get_priority_vars_and_indexes(
+    objects: List["DatasetLike"], priority_arg: Optional[int], compat: str = "equals"
+) -> "OrderedDict[Hashable, MergeElement]":
     """Extract the priority variable from a list of mappings.
 
     We need this method because in some cases the priority argument itself
@@ -361,36 +413,27 @@ def _get_priority_vars(objects, priority_arg, compat="equals"):
 
     Returns
     -------
-    None, if priority_arg is None, or an OrderedDict with Variable objects as
-    values indicating priority variables.
+    An OrderedDict of variables and associated indexes (if any) to prioritize.
     """
     if priority_arg is None:
-        priority_vars = {}
-    else:
-        expanded = expand_variable_dicts([objects[priority_arg]])
-        priority_vars = merge_variables(expanded, compat=compat)
-    return priority_vars
-
+        return OrderedDict()
 
-def expand_and_merge_variables(objs, priority_arg=None):
-    """Merge coordinate variables without worrying about alignment.
-
-    This function is used for merging variables in computation.py.
-    """
-    expanded = expand_variable_dicts(objs)
-    priority_vars = _get_priority_vars(objs, priority_arg)
-    variables = merge_variables(expanded, priority_vars)
-    return variables
+    collected = collect_variables_and_indexes([objects[priority_arg]])
+    variables, indexes = merge_collected(collected, compat=compat)
+    grouped = OrderedDict()  # type: OrderedDict[Hashable, MergeElement]
+    for name, variable in variables.items():
+        grouped[name] = (variable, indexes.get(name))
+    return grouped
 
 
 def merge_coords(
-    objs,
-    compat="minimal",
-    join="outer",
-    priority_arg=None,
-    indexes=None,
-    fill_value=dtypes.NA,
-):
+    objects: Iterable["CoercibleMapping"],
+    compat: str = "minimal",
+    join: str = "outer",
+    priority_arg: Optional[int] = None,
+    indexes: Optional[Mapping[Hashable, pd.Index]] = None,
+    fill_value: object = dtypes.NA,
+) -> Tuple["OrderedDict[Hashable, Variable]", "OrderedDict[Hashable, pd.Index]"]:
     """Merge coordinate variables.
 
     See merge_core below for argument descriptions. This works similarly to
@@ -398,29 +441,28 @@ def merge_coords(
     coordinates or not.
     """
     _assert_compat_valid(compat)
-    coerced = coerce_pandas_values(objs)
+    coerced = coerce_pandas_values(objects)
     aligned = deep_align(
         coerced, join=join, copy=False, indexes=indexes, fill_value=fill_value
     )
-    expanded = expand_variable_dicts(aligned)
-    priority_vars = _get_priority_vars(aligned, priority_arg, compat=compat)
-    variables = merge_variables(expanded, priority_vars, compat=compat)
+    collected = collect_variables_and_indexes(aligned)
+    prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat)
+    variables, out_indexes = merge_collected(collected, prioritized, compat=compat)
     assert_unique_multiindex_level_names(variables)
-
-    return variables
+    return variables, out_indexes
 
 
 def merge_data_and_coords(data, coords, compat="broadcast_equals", join="outer"):
     """Used in Dataset.__init__."""
-    objs = [data, coords]
+    objects = [data, coords]
     explicit_coords = coords.keys()
-    indexes = dict(extract_indexes(coords))
+    indexes = dict(_extract_indexes_from_coords(coords))
     return merge_core(
-        objs, compat, join, explicit_coords=explicit_coords, indexes=indexes
+        objects, compat, join, explicit_coords=explicit_coords, indexes=indexes
     )
 
 
-def extract_indexes(coords):
+def _extract_indexes_from_coords(coords):
     """Yields the name & index of valid indexes from a mapping of coords"""
     for name, variable in coords.items():
         variable = as_variable(variable, name=name)
@@ -443,31 +485,42 @@ def assert_valid_explicit_coords(variables, dims, explicit_coords):
             )
 
 
+_MergeResult = NamedTuple(
+    "_MergeResult",
+    [
+        ("variables", "OrderedDict[Hashable, Variable]"),
+        ("coord_names", Set[Hashable]),
+        ("dims", Dict[Hashable, int]),
+        ("indexes", "OrderedDict[Hashable, pd.Index]"),
+    ],
+)
+
+
 def merge_core(
-    objs,
-    compat="broadcast_equals",
-    join="outer",
-    priority_arg=None,
-    explicit_coords=None,
-    indexes=None,
-    fill_value=dtypes.NA,
-) -> Tuple["OrderedDict[Hashable, Variable]", Set[Hashable], Dict[Hashable, int]]:
+    objects: Iterable["CoercibleMapping"],
+    compat: str = "broadcast_equals",
+    join: str = "outer",
+    priority_arg: Optional[int] = None,
+    explicit_coords: Optional[Sequence] = None,
+    indexes: Optional[Mapping[Hashable, pd.Index]] = None,
+    fill_value: object = dtypes.NA,
+) -> _MergeResult:
     """Core logic for merging labeled objects.
 
     This is not public API.
 
     Parameters
     ----------
-    objs : list of mappings
+    objects : list of mappings
         All values must be convertable to labeled arrays.
     compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts', 'override'}, optional
         Compatibility checks to use when merging variables.
     join : {'outer', 'inner', 'left', 'right'}, optional
         How to combine objects with different indexes.
     priority_arg : integer, optional
-        Optional argument in `objs` that takes precedence over the others.
+        Optional argument in `objects` that takes precedence over the others.
     explicit_coords : set, optional
-        An explicit list of variables from `objs` that are coordinates.
+        An explicit list of variables from `objects` that are coordinates.
     indexes : dict, optional
         Dictionary with values given by pandas.Index objects.
     fill_value : scalar, optional
@@ -490,28 +543,25 @@ def merge_core(
 
     _assert_compat_valid(compat)
 
-    coerced = coerce_pandas_values(objs)
+    coerced = coerce_pandas_values(objects)
     aligned = deep_align(
         coerced, join=join, copy=False, indexes=indexes, fill_value=fill_value
     )
-    expanded = expand_variable_dicts(aligned)
+    collected = collect_variables_and_indexes(aligned)
 
-    coord_names, noncoord_names = determine_coords(coerced)
-
-    priority_vars = _get_priority_vars(aligned, priority_arg, compat=compat)
-    variables = merge_variables(expanded, priority_vars, compat=compat)
+    prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat)
+    variables, out_indexes = merge_collected(collected, prioritized, compat=compat)
     assert_unique_multiindex_level_names(variables)
 
     dims = calculate_dimensions(variables)
 
+    coord_names, noncoord_names = determine_coords(coerced)
     if explicit_coords is not None:
         assert_valid_explicit_coords(variables, dims, explicit_coords)
         coord_names.update(explicit_coords)
-
     for dim, size in dims.items():
         if dim in variables:
             coord_names.add(dim)
-
     ambiguous_coords = coord_names.intersection(noncoord_names)
     if ambiguous_coords:
         raise MergeError(
@@ -519,10 +569,15 @@ def merge_core(
             "coordinates or not in the merged result: %s" % ambiguous_coords
         )
 
-    return variables, coord_names, dims
+    return _MergeResult(variables, coord_names, dims, out_indexes)
 
 
-def merge(objects, compat="no_conflicts", join="outer", fill_value=dtypes.NA):
+def merge(
+    objects: Iterable[Union["DataArray", "CoercibleMapping"]],
+    compat: str = "no_conflicts",
+    join: str = "outer",
+    fill_value: object = dtypes.NA,
+) -> "Dataset":
     """Merge any number of xarray objects into a single Dataset as variables.
 
     Parameters
@@ -724,7 +779,7 @@ def merge(objects, compat="no_conflicts", join="outer", fill_value=dtypes.NA):
 
     dict_like_objects = list()
     for obj in objects:
-        if not (isinstance(obj, (DataArray, Dataset, dict))):
+        if not isinstance(obj, (DataArray, Dataset, dict)):
             raise TypeError(
                 "objects must be an iterable containing only "
                 "Dataset(s), DataArray(s), and dictionaries."
@@ -733,26 +788,21 @@ def merge(objects, compat="no_conflicts", join="outer", fill_value=dtypes.NA):
         obj = obj.to_dataset() if isinstance(obj, DataArray) else obj
         dict_like_objects.append(obj)
 
-    variables, coord_names, dims = merge_core(
-        dict_like_objects, compat, join, fill_value=fill_value
-    )
-    # TODO: don't always recompute indexes
-    merged = Dataset._construct_direct(variables, coord_names, dims, indexes=None)
-
+    merge_result = merge_core(dict_like_objects, compat, join, fill_value=fill_value)
+    merged = Dataset._construct_direct(**merge_result._asdict())
     return merged
 
 
 def dataset_merge_method(
     dataset: "Dataset",
-    other: "DatasetLike",
+    other: "CoercibleMapping",
     overwrite_vars: Union[Hashable, Iterable[Hashable]],
     compat: str,
     join: str,
     fill_value: Any,
-) -> Tuple["OrderedDict[Hashable, Variable]", Set[Hashable], Dict[Hashable, int]]:
+) -> _MergeResult:
     """Guts of the Dataset.merge method.
     """
-
     # we are locked into supporting overwrite_vars for the Dataset.merge
     # method due for backwards compatibility
     # TODO: consider deprecating it?
@@ -769,8 +819,10 @@ def dataset_merge_method(
         objs = [dataset, other]
         priority_arg = 1
     else:
-        other_overwrite = OrderedDict()  # type: MutableDatasetLike
-        other_no_overwrite = OrderedDict()  # type: MutableDatasetLike
+        other_overwrite = OrderedDict()  # type: OrderedDict[Hashable, CoercibleValue]
+        other_no_overwrite = (
+            OrderedDict()
+        )  # type: OrderedDict[Hashable, CoercibleValue]
         for k, v in other.items():
             if k in overwrite_vars:
                 other_overwrite[k] = v
@@ -785,8 +837,8 @@ def dataset_merge_method(
 
 
 def dataset_update_method(
-    dataset: "Dataset", other: "DatasetLike"
-) -> Tuple["OrderedDict[Hashable, Variable]", Set[Hashable], Dict[Hashable, int]]:
+    dataset: "Dataset", other: "CoercibleMapping"
+) -> _MergeResult:
     """Guts of the Dataset.update method.
 
     This drops a duplicated coordinates from `other` if `other` is not an
diff --git a/xarray/core/missing.py b/xarray/core/missing.py
index fdabdb156b6..dfe209e3f7e 100644
--- a/xarray/core/missing.py
+++ b/xarray/core/missing.py
@@ -17,10 +17,10 @@ class BaseInterpolator:
     """Generic interpolator class for normalizing interpolation methods
     """
 
-    cons_kwargs = None  # type: Dict[str, Any]
-    call_kwargs = None  # type: Dict[str, Any]
-    f = None  # type: Callable
-    method = None  # type: str
+    cons_kwargs: Dict[str, Any]
+    call_kwargs: Dict[str, Any]
+    f: Callable
+    method: str
 
     def __call__(self, x):
         return self.f(x, **self.call_kwargs)
diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py
index 22c14d9ff40..1018332df29 100644
--- a/xarray/core/npcompat.py
+++ b/xarray/core/npcompat.py
@@ -30,294 +30,10 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import builtins
 import operator
-from distutils.version import LooseVersion
 from typing import Union
 
 import numpy as np
 
-try:
-    from numpy import isin
-except ImportError:
-
-    def isin(element, test_elements, assume_unique=False, invert=False):
-        """
-        Calculates `element in test_elements`, broadcasting over `element`
-        only. Returns a boolean array of the same shape as `element` that is
-        True where an element of `element` is in `test_elements` and False
-        otherwise.
-
-        Parameters
-        ----------
-        element : array_like
-            Input array.
-        test_elements : array_like
-            The values against which to test each value of `element`.
-            This argument is flattened if it is an array or array_like.
-            See notes for behavior with non-array-like parameters.
-        assume_unique : bool, optional
-            If True, the input arrays are both assumed to be unique, which
-            can speed up the calculation.  Default is False.
-        invert : bool, optional
-            If True, the values in the returned array are inverted, as if
-            calculating `element not in test_elements`. Default is False.
-            ``np.isin(a, b, invert=True)`` is equivalent to (but faster
-            than) ``np.invert(np.isin(a, b))``.
-
-        Returns
-        -------
-        isin : ndarray, bool
-            Has the same shape as `element`. The values `element[isin]`
-            are in `test_elements`.
-
-        See Also
-        --------
-        in1d                  : Flattened version of this function.
-        numpy.lib.arraysetops : Module with a number of other functions for
-                                performing set operations on arrays.
-
-        Notes
-        -----
-
-        `isin` is an element-wise function version of the python keyword `in`.
-        ``isin(a, b)`` is roughly equivalent to
-        ``np.array([item in b for item in a])`` if `a` and `b` are 1-D
-        sequences.
-
-        `element` and `test_elements` are converted to arrays if they are not
-        already. If `test_elements` is a set (or other non-sequence collection)
-        it will be converted to an object array with one element, rather than
-        an array of the values contained in `test_elements`. This is a
-        consequence of the `array` constructor's way of handling non-sequence
-        collections. Converting the set to a list usually gives the desired
-        behavior.
-
-        .. versionadded:: 1.13.0
-
-        Examples
-        --------
-        >>> element = 2*np.arange(4).reshape((2, 2))
-        >>> element
-        array([[0, 2],
-               [4, 6]])
-        >>> test_elements = [1, 2, 4, 8]
-        >>> mask = np.isin(element, test_elements)
-        >>> mask
-        array([[ False,  True],
-               [ True,  False]])
-        >>> element[mask]
-        array([2, 4])
-        >>> mask = np.isin(element, test_elements, invert=True)
-        >>> mask
-        array([[ True, False],
-               [ False, True]])
-        >>> element[mask]
-        array([0, 6])
-
-        Because of how `array` handles sets, the following does not
-        work as expected:
-
-        >>> test_set = {1, 2, 4, 8}
-        >>> np.isin(element, test_set)
-        array([[ False, False],
-               [ False, False]])
-
-        Casting the set to a list gives the expected result:
-
-        >>> np.isin(element, list(test_set))
-        array([[ False,  True],
-               [ True,  False]])
-        """
-        element = np.asarray(element)
-        return np.in1d(
-            element, test_elements, assume_unique=assume_unique, invert=invert
-        ).reshape(element.shape)
-
-
-if LooseVersion(np.__version__) >= LooseVersion("1.13"):
-    gradient = np.gradient
-else:
-
-    def normalize_axis_tuple(axes, N):
-        if isinstance(axes, int):
-            axes = (axes,)
-        return tuple([N + a if a < 0 else a for a in axes])
-
-    def gradient(f, *varargs, axis=None, edge_order=1):
-        f = np.asanyarray(f)
-        N = f.ndim  # number of dimensions
-
-        axes = axis
-        del axis
-
-        if axes is None:
-            axes = tuple(range(N))
-        else:
-            axes = normalize_axis_tuple(axes, N)
-
-        len_axes = len(axes)
-        n = len(varargs)
-        if n == 0:
-            # no spacing argument - use 1 in all axes
-            dx = [1.0] * len_axes
-        elif n == 1 and np.ndim(varargs[0]) == 0:
-            # single scalar for all axes
-            dx = varargs * len_axes
-        elif n == len_axes:
-            # scalar or 1d array for each axis
-            dx = list(varargs)
-            for i, distances in enumerate(dx):
-                if np.ndim(distances) == 0:
-                    continue
-                elif np.ndim(distances) != 1:
-                    raise ValueError("distances must be either scalars or 1d")
-                if len(distances) != f.shape[axes[i]]:
-                    raise ValueError(
-                        "when 1d, distances must match the "
-                        "length of the corresponding dimension"
-                    )
-                diffx = np.diff(distances)
-                # if distances are constant reduce to the scalar case
-                # since it brings a consistent speedup
-                if (diffx == diffx[0]).all():
-                    diffx = diffx[0]
-                dx[i] = diffx
-        else:
-            raise TypeError("invalid number of arguments")
-
-        if edge_order > 2:
-            raise ValueError("'edge_order' greater than 2 not supported")
-
-        # use central differences on interior and one-sided differences on the
-        # endpoints. This preserves second order-accuracy over the full domain.
-
-        outvals = []
-
-        # create slice objects --- initially all are [:, :, ..., :]
-        slice1 = [slice(None)] * N
-        slice2 = [slice(None)] * N
-        slice3 = [slice(None)] * N
-        slice4 = [slice(None)] * N
-
-        otype = f.dtype.char
-        if otype not in ["f", "d", "F", "D", "m", "M"]:
-            otype = "d"
-
-        # Difference of datetime64 elements results in timedelta64
-        if otype == "M":
-            # Need to use the full dtype name because it contains unit
-            # information
-            otype = f.dtype.name.replace("datetime", "timedelta")
-        elif otype == "m":
-            # Needs to keep the specific units, can't be a general unit
-            otype = f.dtype
-
-        # Convert datetime64 data into ints. Make dummy variable `y`
-        # that is a view of ints if the data is datetime64, otherwise
-        # just set y equal to the array `f`.
-        if f.dtype.char in ["M", "m"]:
-            y = f.view("int64")
-        else:
-            y = f
-
-        for i, axis in enumerate(axes):
-            if y.shape[axis] < edge_order + 1:
-                raise ValueError(
-                    "Shape of array too small to calculate a numerical "
-                    "gradient, at least (edge_order + 1) elements are "
-                    "required."
-                )
-            # result allocation
-            out = np.empty_like(y, dtype=otype)
-
-            uniform_spacing = np.ndim(dx[i]) == 0
-
-            # Numerical differentiation: 2nd order interior
-            slice1[axis] = slice(1, -1)
-            slice2[axis] = slice(None, -2)
-            slice3[axis] = slice(1, -1)
-            slice4[axis] = slice(2, None)
-
-            if uniform_spacing:
-                out[slice1] = (f[slice4] - f[slice2]) / (2.0 * dx[i])
-            else:
-                dx1 = dx[i][0:-1]
-                dx2 = dx[i][1:]
-                a = -(dx2) / (dx1 * (dx1 + dx2))
-                b = (dx2 - dx1) / (dx1 * dx2)
-                c = dx1 / (dx2 * (dx1 + dx2))
-                # fix the shape for broadcasting
-                shape = np.ones(N, dtype=int)
-                shape[axis] = -1
-                a.shape = b.shape = c.shape = shape
-                # 1D equivalent --
-                # out[1:-1] = a * f[:-2] + b * f[1:-1] + c * f[2:]
-                out[slice1] = a * f[slice2] + b * f[slice3] + c * f[slice4]
-
-            # Numerical differentiation: 1st order edges
-            if edge_order == 1:
-                slice1[axis] = 0
-                slice2[axis] = 1
-                slice3[axis] = 0
-                dx_0 = dx[i] if uniform_spacing else dx[i][0]
-                # 1D equivalent -- out[0] = (y[1] - y[0]) / (x[1] - x[0])
-                out[slice1] = (y[slice2] - y[slice3]) / dx_0
-
-                slice1[axis] = -1
-                slice2[axis] = -1
-                slice3[axis] = -2
-                dx_n = dx[i] if uniform_spacing else dx[i][-1]
-                # 1D equivalent -- out[-1] = (y[-1] - y[-2]) / (x[-1] - x[-2])
-                out[slice1] = (y[slice2] - y[slice3]) / dx_n
-
-            # Numerical differentiation: 2nd order edges
-            else:
-                slice1[axis] = 0
-                slice2[axis] = 0
-                slice3[axis] = 1
-                slice4[axis] = 2
-                if uniform_spacing:
-                    a = -1.5 / dx[i]
-                    b = 2.0 / dx[i]
-                    c = -0.5 / dx[i]
-                else:
-                    dx1 = dx[i][0]
-                    dx2 = dx[i][1]
-                    a = -(2.0 * dx1 + dx2) / (dx1 * (dx1 + dx2))
-                    b = (dx1 + dx2) / (dx1 * dx2)
-                    c = -dx1 / (dx2 * (dx1 + dx2))
-                # 1D equivalent -- out[0] = a * y[0] + b * y[1] + c * y[2]
-                out[slice1] = a * y[slice2] + b * y[slice3] + c * y[slice4]
-
-                slice1[axis] = -1
-                slice2[axis] = -3
-                slice3[axis] = -2
-                slice4[axis] = -1
-                if uniform_spacing:
-                    a = 0.5 / dx[i]
-                    b = -2.0 / dx[i]
-                    c = 1.5 / dx[i]
-                else:
-                    dx1 = dx[i][-2]
-                    dx2 = dx[i][-1]
-                    a = (dx2) / (dx1 * (dx1 + dx2))
-                    b = -(dx2 + dx1) / (dx1 * dx2)
-                    c = (2.0 * dx2 + dx1) / (dx2 * (dx1 + dx2))
-                # 1D equivalent -- out[-1] = a * f[-3] + b * f[-2] + c * f[-1]
-                out[slice1] = a * y[slice2] + b * y[slice3] + c * y[slice4]
-
-            outvals.append(out)
-
-            # reset the slice object in this dimension to ":"
-            slice1[axis] = slice(None)
-            slice2[axis] = slice(None)
-            slice3[axis] = slice(None)
-            slice4[axis] = slice(None)
-
-        if len_axes == 1:
-            return outvals[0]
-        else:
-            return outvals
-
 
 # Vendored from NumPy 1.12; we need a version that support duck typing, even
 # on dask arrays with __array_function__ enabled.
diff --git a/xarray/core/pdcompat.py b/xarray/core/pdcompat.py
index 91998482e3e..7591fff3abe 100644
--- a/xarray/core/pdcompat.py
+++ b/xarray/core/pdcompat.py
@@ -39,9 +39,9 @@
 
 from distutils.version import LooseVersion
 
-import numpy as np
 import pandas as pd
 
+
 # allow ourselves to type checks for Panel even after it's removed
 if LooseVersion(pd.__version__) < "0.25.0":
     Panel = pd.Panel
@@ -51,78 +51,9 @@ class Panel:  # type: ignore
         pass
 
 
-# for pandas 0.19
-def remove_unused_levels(self):
-    """
-    create a new MultiIndex from the current that removing
-    unused levels, meaning that they are not expressed in the labels
-    The resulting MultiIndex will have the same outward
-    appearance, meaning the same .values and ordering. It will also
-    be .equals() to the original.
-    .. versionadded:: 0.20.0
-    Returns
-    -------
-    MultiIndex
-    Examples
-    --------
-    >>> i = pd.MultiIndex.from_product([range(2), list('ab')])
-    MultiIndex(levels=[[0, 1], ['a', 'b']],
-               codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
-    >>> i[2:]
-    MultiIndex(levels=[[0, 1], ['a', 'b']],
-               codes=[[1, 1], [0, 1]])
-    The 0 from the first level is not represented
-    and can be removed
-    >>> i[2:].remove_unused_levels()
-    MultiIndex(levels=[[1], ['a', 'b']],
-               codes=[[0, 0], [0, 1]])
-    """
-    import pandas.core.algorithms as algos
-
-    new_levels = []
-    new_labels = []
-
-    changed = False
-    for lev, lab in zip(self.levels, self.labels):
-
-        # Since few levels are typically unused, bincount() is more
-        # efficient than unique() - however it only accepts positive values
-        # (and drops order):
-        uniques = np.where(np.bincount(lab + 1) > 0)[0] - 1
-        has_na = int(len(uniques) and (uniques[0] == -1))
-
-        if len(uniques) != len(lev) + has_na:
-            # We have unused levels
-            changed = True
-
-            # Recalculate uniques, now preserving order.
-            # Can easily be cythonized by exploiting the already existing
-            # "uniques" and stop parsing "lab" when all items are found:
-            uniques = algos.unique(lab)
-            if has_na:
-                na_idx = np.where(uniques == -1)[0]
-                # Just ensure that -1 is in first position:
-                uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]]
+def count_not_none(*args) -> int:
+    """Compute the number of non-None arguments.
 
-            # labels get mapped from uniques to 0:len(uniques)
-            # -1 (if present) is mapped to last position
-            label_mapping = np.zeros(len(lev) + has_na)
-            # ... and reassigned value -1:
-            label_mapping[uniques] = np.arange(len(uniques)) - has_na
-
-            lab = label_mapping[lab]
-
-            # new levels are simple
-            lev = lev.take(uniques[has_na:])
-
-        new_levels.append(lev)
-        new_labels.append(lab)
-
-    result = self._shallow_copy()
-
-    if changed:
-        result._reset_identity()
-        result._set_levels(new_levels, validate=False)
-        result._set_labels(new_labels, validate=False)
-
-    return result
+    Copied from pandas.core.common.count_not_none (not part of the public API)
+    """
+    return sum([arg is not None for arg in args])
diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py
index a812e7472ca..3e86ebbfd73 100644
--- a/xarray/core/rolling.py
+++ b/xarray/core/rolling.py
@@ -1,7 +1,5 @@
 import functools
-import warnings
 from collections import OrderedDict
-from distutils.version import LooseVersion
 
 import numpy as np
 
@@ -71,17 +69,6 @@ def __init__(self, obj, windows, min_periods=None, center=False):
         -------
         rolling : type of input argument
         """
-
-        if bottleneck is not None and (
-            LooseVersion(bottleneck.__version__) < LooseVersion("1.0")
-        ):
-            warnings.warn(
-                "xarray requires bottleneck version of 1.0 or "
-                "greater for rolling operations. Rolling "
-                "aggregation methods will use numpy instead"
-                "of bottleneck."
-            )
-
         if len(windows) != 1:
             raise ValueError("exactly one dim/window should be provided")
 
@@ -332,14 +319,6 @@ def _bottleneck_reduce(self, func, **kwargs):
 
         padded = self.obj.variable
         if self.center:
-            if (
-                LooseVersion(np.__version__) < LooseVersion("1.13")
-                and self.obj.dtype.kind == "b"
-            ):
-                # with numpy < 1.13 bottleneck cannot handle np.nan-Boolean
-                # mixed array correctly. We cast boolean array to float.
-                padded = padded.astype(float)
-
             if isinstance(padded.data, dask_array_type):
                 # Workaround to make the padded chunk size is larger than
                 # self.window-1
diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py
index 2139d246f46..ac6768e8a9c 100644
--- a/xarray/core/rolling_exp.py
+++ b/xarray/core/rolling_exp.py
@@ -1,5 +1,6 @@
 import numpy as np
 
+from .pdcompat import count_not_none
 from .pycompat import dask_array_type
 
 
@@ -24,13 +25,11 @@ def move_exp_nanmean(array, *, axis, alpha):
 
 def _get_center_of_mass(comass, span, halflife, alpha):
     """
-    Vendored from pandas.core.window._get_center_of_mass
+    Vendored from pandas.core.window.common._get_center_of_mass
 
     See licenses/PANDAS_LICENSE for the function's license
     """
-    from pandas.core import common as com
-
-    valid_count = com.count_not_none(comass, span, halflife, alpha)
+    valid_count = count_not_none(comass, span, halflife, alpha)
     if valid_count > 1:
         raise ValueError("comass, span, halflife, and alpha " "are mutually exclusive")
 
diff --git a/xarray/core/utils.py b/xarray/core/utils.py
index 0d730edeaeb..12024ff8245 100644
--- a/xarray/core/utils.py
+++ b/xarray/core/utils.py
@@ -444,7 +444,7 @@ class OrderedSet(MutableSet[T]):
     __slots__ = ("_ordered_dict",)
 
     def __init__(self, values: AbstractSet[T] = None):
-        self._ordered_dict = OrderedDict()  # type: MutableMapping[T, None]
+        self._ordered_dict: MutableMapping[T, None] = OrderedDict()
         if values is not None:
             # Disable type checking - both mypy and PyCharm believes that
             # we're altering the type of self in place (see signature of
diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py
index f69a8af7a2f..e070ea16855 100644
--- a/xarray/plot/utils.py
+++ b/xarray/plot/utils.py
@@ -2,7 +2,6 @@
 import textwrap
 import warnings
 from datetime import datetime
-from distutils.version import LooseVersion
 from inspect import getfullargspec
 from typing import Any, Iterable, Mapping, Tuple, Union
 
@@ -13,12 +12,9 @@
 from ..core.utils import is_scalar
 
 try:
-    import nc_time_axis
+    import nc_time_axis  # noqa: F401
 
-    if LooseVersion(nc_time_axis.__version__) < LooseVersion("1.2.0"):
-        nc_time_axis_available = False
-    else:
-        nc_time_axis_available = True
+    nc_time_axis_available = True
 except ImportError:
     nc_time_axis_available = False
 
@@ -52,15 +48,7 @@ def register_pandas_datetime_converter_if_needed():
     # based on https://github.com/pandas-dev/pandas/pull/17710
     global _registered
     if not _registered:
-        try:
-            from pandas.plotting import register_matplotlib_converters
-
-            register_matplotlib_converters()
-        except ImportError:
-            # register_matplotlib_converters new in pandas 0.22
-            from pandas.tseries import converter
-
-            converter.register()
+        pd.plotting.register_matplotlib_converters()
         _registered = True
 
 
diff --git a/xarray/testing.py b/xarray/testing.py
index 787ec1aadb0..f01cbe896b9 100644
--- a/xarray/testing.py
+++ b/xarray/testing.py
@@ -1,6 +1,6 @@
 """Testing functions exposed to the user API"""
 from collections import OrderedDict
-from typing import Hashable, Union
+from typing import Hashable, Set, Union
 
 import numpy as np
 import pandas as pd
@@ -162,7 +162,7 @@ def _assert_indexes_invariants_checks(indexes, possible_coord_variables, dims):
 
 def _assert_variable_invariants(var: Variable, name: Hashable = None):
     if name is None:
-        name_or_empty = ()  # type: tuple
+        name_or_empty: tuple = ()
     else:
         name_or_empty = (name,)
     assert isinstance(var._dims, tuple), name_or_empty + (var._dims,)
@@ -212,7 +212,7 @@ def _assert_dataset_invariants(ds: Dataset):
 
     assert type(ds._dims) is dict, ds._dims
     assert all(isinstance(v, int) for v in ds._dims.values()), ds._dims
-    var_dims = set()  # type: set
+    var_dims: Set[Hashable] = set()
     for v in ds._variables.values():
         var_dims.update(v.dims)
     assert ds._dims.keys() == var_dims, (set(ds._dims), var_dims)
diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
index 4f5a3e37888..8b4d3073e1c 100644
--- a/xarray/tests/__init__.py
+++ b/xarray/tests/__init__.py
@@ -17,11 +17,7 @@
 from xarray.core.options import set_options
 from xarray.plot.utils import import_seaborn
 
-try:
-    from pandas.testing import assert_frame_equal
-except ImportError:
-    # old location, for pandas < 0.20
-    from pandas.util.testing import assert_frame_equal  # noqa: F401
+from pandas.testing import assert_frame_equal  # noqa: F401
 
 # import mpl and change the backend before other mpl imports
 try:
@@ -61,7 +57,6 @@ def LooseVersion(vstring):
 
 
 has_matplotlib, requires_matplotlib = _importorskip("matplotlib")
-has_matplotlib2, requires_matplotlib2 = _importorskip("matplotlib", minversion="2")
 has_scipy, requires_scipy = _importorskip("scipy")
 has_pydap, requires_pydap = _importorskip("pydap.client")
 has_netCDF4, requires_netCDF4 = _importorskip("netCDF4")
@@ -69,30 +64,17 @@ def LooseVersion(vstring):
 has_pynio, requires_pynio = _importorskip("Nio")
 has_pseudonetcdf, requires_pseudonetcdf = _importorskip("PseudoNetCDF")
 has_cftime, requires_cftime = _importorskip("cftime")
-has_nc_time_axis, requires_nc_time_axis = _importorskip(
-    "nc_time_axis", minversion="1.2.0"
-)
-has_cftime_1_0_2_1, requires_cftime_1_0_2_1 = _importorskip(
-    "cftime", minversion="1.0.2.1"
-)
 has_dask, requires_dask = _importorskip("dask")
 has_bottleneck, requires_bottleneck = _importorskip("bottleneck")
+has_nc_time_axis, requires_nc_time_axis = _importorskip("nc_time_axis")
 has_rasterio, requires_rasterio = _importorskip("rasterio")
-has_pathlib, requires_pathlib = _importorskip("pathlib")
-has_zarr, requires_zarr = _importorskip("zarr", minversion="2.2")
-has_np113, requires_np113 = _importorskip("numpy", minversion="1.13.0")
+has_zarr, requires_zarr = _importorskip("zarr")
 has_iris, requires_iris = _importorskip("iris")
 has_cfgrib, requires_cfgrib = _importorskip("cfgrib")
 has_numbagg, requires_numbagg = _importorskip("numbagg")
 has_sparse, requires_sparse = _importorskip("sparse")
 
 # some special cases
-has_h5netcdf07, requires_h5netcdf07 = _importorskip("h5netcdf", minversion="0.7")
-has_h5py29, requires_h5py29 = _importorskip("h5py", minversion="2.9.0")
-has_h5fileobj = has_h5netcdf07 and has_h5py29
-requires_h5fileobj = pytest.mark.skipif(
-    not has_h5fileobj, reason="requires h5py>2.9.0 & h5netcdf>0.7"
-)
 has_scipy_or_netCDF4 = has_scipy or has_netCDF4
 requires_scipy_or_netCDF4 = pytest.mark.skipif(
     not has_scipy_or_netCDF4, reason="requires scipy or netCDF4"
@@ -101,8 +83,6 @@ def LooseVersion(vstring):
 requires_cftime_or_netCDF4 = pytest.mark.skipif(
     not has_cftime_or_netCDF4, reason="requires cftime or netCDF4"
 )
-if not has_pathlib:
-    has_pathlib, requires_pathlib = _importorskip("pathlib2")
 try:
     import_seaborn()
     has_seaborn = True
@@ -116,10 +96,7 @@ def LooseVersion(vstring):
 if has_dask:
     import dask
 
-    if LooseVersion(dask.__version__) < "0.18":
-        dask.set_options(get=dask.get)
-    else:
-        dask.config.set(scheduler="single-threaded")
+    dask.config.set(scheduler="single-threaded")
 
 flaky = pytest.mark.flaky
 network = pytest.mark.network
diff --git a/xarray/tests/test_accessor_str.py b/xarray/tests/test_accessor_str.py
index 56bf6dbb3a2..5cd815eebf0 100644
--- a/xarray/tests/test_accessor_str.py
+++ b/xarray/tests/test_accessor_str.py
@@ -56,7 +56,7 @@ def dtype(request):
 def test_dask():
     import dask.array as da
 
-    arr = da.from_array(["a", "b", "c"])
+    arr = da.from_array(["a", "b", "c"], chunks=-1)
     xarr = xr.DataArray(arr)
 
     result = xarr.str.len().compute()
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 87958824c7b..0120e2ca0fe 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -14,6 +14,7 @@
 
 import numpy as np
 import pandas as pd
+from pandas.errors import OutOfBoundsDatetime
 import pytest
 
 import xarray as xr
@@ -51,10 +52,8 @@
     requires_cfgrib,
     requires_cftime,
     requires_dask,
-    requires_h5fileobj,
     requires_h5netcdf,
     requires_netCDF4,
-    requires_pathlib,
     requires_pseudonetcdf,
     requires_pydap,
     requires_pynio,
@@ -80,13 +79,6 @@
 except ImportError:
     pass
 
-try:
-    from pandas.errors import OutOfBoundsDatetime
-except ImportError:
-    # pandas < 0.20
-    from pandas.tslib import OutOfBoundsDatetime
-
-
 ON_WINDOWS = sys.platform == "win32"
 
 
@@ -233,11 +225,11 @@ class NetCDF3Only:
 
 
 class DatasetIOBase:
-    engine = None  # type: Optional[str]
-    file_format = None  # type: Optional[str]
+    engine: Optional[str] = None
+    file_format: Optional[str] = None
 
     def create_store(self):
-        raise NotImplementedError
+        raise NotImplementedError()
 
     @contextlib.contextmanager
     def roundtrip(
@@ -1355,19 +1347,6 @@ def test_unsorted_index_raises(self):
             except IndexError as err:
                 assert "first by calling .load" in str(err)
 
-    def test_88_character_filename_segmentation_fault(self):
-        # should be fixed in netcdf4 v1.3.1
-        with mock.patch("netCDF4.__version__", "1.2.4"):
-            with warnings.catch_warnings():
-                message = (
-                    "A segmentation fault may occur when the "
-                    "file path has exactly 88 characters"
-                )
-                warnings.filterwarnings("error", message)
-                with pytest.raises(Warning):
-                    # Need to construct 88 character filepath
-                    xr.Dataset().to_netcdf("a" * (88 - len(os.getcwd()) - 1))
-
     def test_setncattr_string(self):
         list_of_strings = ["list", "of", "strings"]
         one_element_list_of_strings = ["one element"]
@@ -2334,7 +2313,7 @@ def test_dump_encodings_h5py(self):
             assert actual.x.encoding["compression_opts"] is None
 
 
-@requires_h5fileobj
+@requires_h5netcdf
 class TestH5NetCDFFileObject(TestH5NetCDFData):
     engine = "h5netcdf"
 
@@ -2754,7 +2733,6 @@ def test_open_mfdataset_2d(self):
                                 (2, 2, 2, 2),
                             )
 
-    @requires_pathlib
     def test_open_mfdataset_pathlib(self):
         original = Dataset({"foo": ("x", np.random.randn(10))})
         with create_tmp_file() as tmp1:
@@ -2768,7 +2746,6 @@ def test_open_mfdataset_pathlib(self):
                 ) as actual:
                     assert_identical(original, actual)
 
-    @requires_pathlib
     def test_open_mfdataset_2d_pathlib(self):
         original = Dataset({"foo": (["x", "y"], np.random.randn(10, 8))})
         with create_tmp_file() as tmp1:
@@ -2903,7 +2880,6 @@ def test_save_mfdataset_invalid_dataarray(self):
         with raises_regex(TypeError, "supports writing Dataset"):
             save_mfdataset([da], ["dataarray"])
 
-    @requires_pathlib
     def test_save_mfdataset_pathlib_roundtrip(self):
         original = Dataset({"foo": ("x", np.random.randn(10))})
         datasets = [original.isel(x=slice(5)), original.isel(x=slice(5, 10))]
@@ -4231,7 +4207,6 @@ def test_dataarray_to_netcdf_return_bytes(self):
         output = data.to_netcdf()
         assert isinstance(output, bytes)
 
-    @requires_pathlib
     def test_dataarray_to_netcdf_no_name_pathlib(self):
         original_da = DataArray(np.arange(12).reshape((3, 4)))
 
diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py
index fcc9acf75bb..e49dc72abdd 100644
--- a/xarray/tests/test_cftimeindex.py
+++ b/xarray/tests/test_cftimeindex.py
@@ -15,13 +15,7 @@
 )
 from xarray.tests import assert_array_equal, assert_identical
 
-from . import (
-    has_cftime,
-    has_cftime_1_0_2_1,
-    has_cftime_or_netCDF4,
-    raises_regex,
-    requires_cftime,
-)
+from . import has_cftime, has_cftime_or_netCDF4, raises_regex, requires_cftime
 from .test_coding_times import (
     _ALL_CALENDARS,
     _NON_STANDARD_CALENDARS,
@@ -175,14 +169,14 @@ def index_with_name(date_type):
     return CFTimeIndex(dates, name="foo")
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize(("name", "expected_name"), [("bar", "bar"), (None, "foo")])
 def test_constructor_with_name(index_with_name, name, expected_name):
     result = CFTimeIndex(index_with_name, name=name).name
     assert result == expected_name
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_assert_all_valid_date_type(date_type, index):
     import cftime
 
@@ -203,7 +197,7 @@ def test_assert_all_valid_date_type(date_type, index):
     assert_all_valid_date_type(np.array([date_type(1, 1, 1), date_type(1, 2, 1)]))
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize(
     ("field", "expected"),
     [
@@ -221,21 +215,21 @@ def test_cftimeindex_field_accessors(index, field, expected):
     assert_array_equal(result, expected)
 
 
-@pytest.mark.skipif(not has_cftime_1_0_2_1, reason="cftime not installed")
+@requires_cftime
 def test_cftimeindex_dayofyear_accessor(index):
     result = index.dayofyear
     expected = [date.dayofyr for date in index]
     assert_array_equal(result, expected)
 
 
-@pytest.mark.skipif(not has_cftime_1_0_2_1, reason="cftime not installed")
+@requires_cftime
 def test_cftimeindex_dayofweek_accessor(index):
     result = index.dayofweek
     expected = [date.dayofwk for date in index]
     assert_array_equal(result, expected)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize(
     ("string", "date_args", "reso"),
     [
@@ -255,7 +249,7 @@ def test_parse_iso8601_with_reso(date_type, string, date_args, reso):
     assert result_reso == expected_reso
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_parse_string_to_bounds_year(date_type, dec_days):
     parsed = date_type(2, 2, 10, 6, 2, 8, 1)
     expected_start = date_type(2, 1, 1)
@@ -265,7 +259,7 @@ def test_parse_string_to_bounds_year(date_type, dec_days):
     assert result_end == expected_end
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_parse_string_to_bounds_month_feb(date_type, feb_days):
     parsed = date_type(2, 2, 10, 6, 2, 8, 1)
     expected_start = date_type(2, 2, 1)
@@ -275,7 +269,7 @@ def test_parse_string_to_bounds_month_feb(date_type, feb_days):
     assert result_end == expected_end
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_parse_string_to_bounds_month_dec(date_type, dec_days):
     parsed = date_type(2, 12, 1)
     expected_start = date_type(2, 12, 1)
@@ -285,7 +279,7 @@ def test_parse_string_to_bounds_month_dec(date_type, dec_days):
     assert result_end == expected_end
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize(
     ("reso", "ex_start_args", "ex_end_args"),
     [
@@ -307,13 +301,13 @@ def test_parsed_string_to_bounds_sub_monthly(
     assert result_end == expected_end
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_parsed_string_to_bounds_raises(date_type):
     with pytest.raises(KeyError):
         _parsed_string_to_bounds(date_type, "a", date_type(1, 1, 1))
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_get_loc(date_type, index):
     result = index.get_loc("0001")
     assert result == slice(0, 2)
@@ -328,7 +322,7 @@ def test_get_loc(date_type, index):
         index.get_loc("1234")
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("kind", ["loc", "getitem"])
 def test_get_slice_bound(date_type, index, kind):
     result = index.get_slice_bound("0001", "left", kind)
@@ -348,7 +342,7 @@ def test_get_slice_bound(date_type, index, kind):
     assert result == expected
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("kind", ["loc", "getitem"])
 def test_get_slice_bound_decreasing_index(date_type, monotonic_decreasing_index, kind):
     result = monotonic_decreasing_index.get_slice_bound("0001", "left", kind)
@@ -372,7 +366,7 @@ def test_get_slice_bound_decreasing_index(date_type, monotonic_decreasing_index,
     assert result == expected
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("kind", ["loc", "getitem"])
 def test_get_slice_bound_length_one_index(date_type, length_one_index, kind):
     result = length_one_index.get_slice_bound("0001", "left", kind)
@@ -392,19 +386,19 @@ def test_get_slice_bound_length_one_index(date_type, length_one_index, kind):
     assert result == expected
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_string_slice_length_one_index(length_one_index):
     da = xr.DataArray([1], coords=[length_one_index], dims=["time"])
     result = da.sel(time=slice("0001", "0001"))
     assert_identical(result, da)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_date_type_property(date_type, index):
     assert index.date_type is date_type
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_contains(date_type, index):
     assert "0001-01-01" in index
     assert "0001" in index
@@ -413,7 +407,7 @@ def test_contains(date_type, index):
     assert date_type(3, 1, 1) not in index
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_groupby(da):
     result = da.groupby("time.month").sum("time")
     expected = xr.DataArray([4, 6], coords=[[1, 2]], dims=["month"])
@@ -427,7 +421,7 @@ def test_groupby(da):
 }
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize(
     "sel_arg",
     list(SEL_STRING_OR_LIST_TESTS.values()),
@@ -439,7 +433,7 @@ def test_sel_string_or_list(da, index, sel_arg):
     assert_identical(result, expected)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_sel_date_slice_or_list(da, index, date_type):
     expected = xr.DataArray([1, 2], coords=[index[:2]], dims=["time"])
     result = da.sel(time=slice(date_type(1, 1, 1), date_type(1, 12, 30)))
@@ -449,14 +443,14 @@ def test_sel_date_slice_or_list(da, index, date_type):
     assert_identical(result, expected)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_sel_date_scalar(da, date_type, index):
     expected = xr.DataArray(1).assign_coords(time=index[0])
     result = da.sel(time=date_type(1, 1, 1))
     assert_identical(result, expected)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize(
     "sel_kwargs",
     [{"method": "nearest"}, {"method": "nearest", "tolerance": timedelta(days=70)}],
@@ -471,7 +465,7 @@ def test_sel_date_scalar_nearest(da, date_type, index, sel_kwargs):
     assert_identical(result, expected)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize(
     "sel_kwargs",
     [{"method": "pad"}, {"method": "pad", "tolerance": timedelta(days=365)}],
@@ -486,7 +480,7 @@ def test_sel_date_scalar_pad(da, date_type, index, sel_kwargs):
     assert_identical(result, expected)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize(
     "sel_kwargs",
     [{"method": "backfill"}, {"method": "backfill", "tolerance": timedelta(days=365)}],
@@ -501,7 +495,7 @@ def test_sel_date_scalar_backfill(da, date_type, index, sel_kwargs):
     assert_identical(result, expected)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize(
     "sel_kwargs",
     [
@@ -515,7 +509,7 @@ def test_sel_date_scalar_tolerance_raises(da, date_type, sel_kwargs):
         da.sel(time=date_type(1, 5, 1), **sel_kwargs)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize(
     "sel_kwargs",
     [{"method": "nearest"}, {"method": "nearest", "tolerance": timedelta(days=70)}],
@@ -534,7 +528,7 @@ def test_sel_date_list_nearest(da, date_type, index, sel_kwargs):
     assert_identical(result, expected)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize(
     "sel_kwargs",
     [{"method": "pad"}, {"method": "pad", "tolerance": timedelta(days=365)}],
@@ -545,7 +539,7 @@ def test_sel_date_list_pad(da, date_type, index, sel_kwargs):
     assert_identical(result, expected)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize(
     "sel_kwargs",
     [{"method": "backfill"}, {"method": "backfill", "tolerance": timedelta(days=365)}],
@@ -556,7 +550,7 @@ def test_sel_date_list_backfill(da, date_type, index, sel_kwargs):
     assert_identical(result, expected)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize(
     "sel_kwargs",
     [
@@ -570,7 +564,7 @@ def test_sel_date_list_tolerance_raises(da, date_type, sel_kwargs):
         da.sel(time=[date_type(1, 2, 1), date_type(1, 5, 1)], **sel_kwargs)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_isel(da, index):
     expected = xr.DataArray(1).assign_coords(time=index[0])
     result = da.isel(time=0)
@@ -597,7 +591,7 @@ def range_args(date_type):
     ]
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_indexing_in_series_getitem(series, index, scalar_args, range_args):
     for arg in scalar_args:
         assert series[arg] == 1
@@ -607,7 +601,7 @@ def test_indexing_in_series_getitem(series, index, scalar_args, range_args):
         assert series[arg].equals(expected)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_indexing_in_series_loc(series, index, scalar_args, range_args):
     for arg in scalar_args:
         assert series.loc[arg] == 1
@@ -617,7 +611,7 @@ def test_indexing_in_series_loc(series, index, scalar_args, range_args):
         assert series.loc[arg].equals(expected)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_indexing_in_series_iloc(series, index):
     expected = 1
     assert series.iloc[0] == expected
@@ -626,7 +620,7 @@ def test_indexing_in_series_iloc(series, index):
     assert series.iloc[:2].equals(expected)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_series_dropna(index):
     series = pd.Series([0.0, 1.0, np.nan, np.nan], index=index)
     expected = series.iloc[:2]
@@ -634,7 +628,7 @@ def test_series_dropna(index):
     assert result.equals(expected)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_indexing_in_dataframe_loc(df, index, scalar_args, range_args):
     expected = pd.Series([1], name=index[0])
     for arg in scalar_args:
@@ -647,7 +641,7 @@ def test_indexing_in_dataframe_loc(df, index, scalar_args, range_args):
         assert result.equals(expected)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_indexing_in_dataframe_iloc(df, index):
     expected = pd.Series([1], name=index[0])
     result = df.iloc[0]
@@ -676,13 +670,13 @@ def test_concat_cftimeindex(date_type):
         assert not isinstance(da.indexes["time"], CFTimeIndex)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_empty_cftimeindex():
     index = CFTimeIndex([])
     assert index.date_type is None
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_cftimeindex_add(index):
     date_type = index.date_type
     expected_dates = [
@@ -697,7 +691,7 @@ def test_cftimeindex_add(index):
     assert isinstance(result, CFTimeIndex)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
 def test_cftimeindex_add_timedeltaindex(calendar):
     a = xr.cftime_range("2000", periods=5, calendar=calendar)
@@ -708,7 +702,7 @@ def test_cftimeindex_add_timedeltaindex(calendar):
     assert isinstance(result, CFTimeIndex)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_cftimeindex_radd(index):
     date_type = index.date_type
     expected_dates = [
@@ -723,7 +717,7 @@ def test_cftimeindex_radd(index):
     assert isinstance(result, CFTimeIndex)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
 def test_timedeltaindex_add_cftimeindex(calendar):
     a = xr.cftime_range("2000", periods=5, calendar=calendar)
@@ -734,7 +728,7 @@ def test_timedeltaindex_add_cftimeindex(calendar):
     assert isinstance(result, CFTimeIndex)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_cftimeindex_sub(index):
     date_type = index.date_type
     expected_dates = [
@@ -750,7 +744,7 @@ def test_cftimeindex_sub(index):
     assert isinstance(result, CFTimeIndex)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
 def test_cftimeindex_sub_cftimeindex(calendar):
     a = xr.cftime_range("2000", periods=5, calendar=calendar)
@@ -761,7 +755,7 @@ def test_cftimeindex_sub_cftimeindex(calendar):
     assert isinstance(result, pd.TimedeltaIndex)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
 def test_cftimeindex_sub_cftime_datetime(calendar):
     a = xr.cftime_range("2000", periods=5, calendar=calendar)
@@ -771,7 +765,7 @@ def test_cftimeindex_sub_cftime_datetime(calendar):
     assert isinstance(result, pd.TimedeltaIndex)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
 def test_cftime_datetime_sub_cftimeindex(calendar):
     a = xr.cftime_range("2000", periods=5, calendar=calendar)
@@ -781,7 +775,7 @@ def test_cftime_datetime_sub_cftimeindex(calendar):
     assert isinstance(result, pd.TimedeltaIndex)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
 def test_cftimeindex_sub_timedeltaindex(calendar):
     a = xr.cftime_range("2000", periods=5, calendar=calendar)
@@ -792,13 +786,13 @@ def test_cftimeindex_sub_timedeltaindex(calendar):
     assert isinstance(result, CFTimeIndex)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_cftimeindex_rsub(index):
     with pytest.raises(TypeError):
         timedelta(days=1) - index
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("freq", ["D", timedelta(days=1)])
 def test_cftimeindex_shift(index, freq):
     date_type = index.date_type
@@ -814,14 +808,14 @@ def test_cftimeindex_shift(index, freq):
     assert isinstance(result, CFTimeIndex)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_cftimeindex_shift_invalid_n():
     index = xr.cftime_range("2000", periods=3)
     with pytest.raises(TypeError):
         index.shift("a", "D")
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 def test_cftimeindex_shift_invalid_freq():
     index = xr.cftime_range("2000", periods=3)
     with pytest.raises(TypeError):
@@ -850,18 +844,18 @@ def test_parse_array_of_cftime_strings():
     np.testing.assert_array_equal(result, expected)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("calendar", _ALL_CALENDARS)
 def test_strftime_of_cftime_array(calendar):
     date_format = "%Y%m%d%H%M"
     cf_values = xr.cftime_range("2000", periods=5, calendar=calendar)
     dt_values = pd.date_range("2000", periods=5)
-    expected = dt_values.strftime(date_format)
+    expected = pd.Index(dt_values.strftime(date_format))
     result = cf_values.strftime(date_format)
     assert result.equals(expected)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("calendar", _ALL_CALENDARS)
 @pytest.mark.parametrize("unsafe", [False, True])
 def test_to_datetimeindex(calendar, unsafe):
@@ -879,7 +873,7 @@ def test_to_datetimeindex(calendar, unsafe):
     assert isinstance(result, pd.DatetimeIndex)
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("calendar", _ALL_CALENDARS)
 def test_to_datetimeindex_out_of_range(calendar):
     index = xr.cftime_range("0001", periods=5, calendar=calendar)
@@ -887,7 +881,7 @@ def test_to_datetimeindex_out_of_range(calendar):
         index.to_datetimeindex()
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.parametrize("calendar", ["all_leap", "360_day"])
 def test_to_datetimeindex_feb_29(calendar):
     index = xr.cftime_range("2001-02-28", periods=2, calendar=calendar)
@@ -895,7 +889,7 @@ def test_to_datetimeindex_feb_29(calendar):
         index.to_datetimeindex()
 
 
-@pytest.mark.skipif(not has_cftime, reason="cftime not installed")
+@requires_cftime
 @pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/24263")
 def test_multiindex():
     index = xr.cftime_range("2001-01-01", periods=100, calendar="360_day")
diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py
index bbc8dd82c95..c4f32795b59 100644
--- a/xarray/tests/test_cftimeindex_resample.py
+++ b/xarray/tests/test_cftimeindex_resample.py
@@ -8,7 +8,6 @@
 from xarray.core.resample_cftime import CFTimeGrouper
 
 pytest.importorskip("cftime")
-pytest.importorskip("pandas", minversion="0.24")
 
 
 # Create a list of pairs of similar-length initial and resample frequencies
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index a778ff8147f..406b9c1ba69 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -4,6 +4,8 @@
 import numpy as np
 import pandas as pd
 import pytest
+from pandas.errors import OutOfBoundsDatetime
+
 
 from xarray import DataArray, Dataset, Variable, coding, decode_cf
 from xarray.coding.times import (
@@ -28,11 +30,6 @@
     requires_cftime_or_netCDF4,
 )
 
-try:
-    from pandas.errors import OutOfBoundsDatetime
-except ImportError:
-    # pandas < 0.20
-    from pandas.tslib import OutOfBoundsDatetime
 
 _NON_STANDARD_CALENDARS_SET = {
     "noleap",
@@ -119,7 +116,9 @@ def test_cf_datetime(num_dates, units, calendar):
         warnings.filterwarnings("ignore", "Unable to decode time axis")
         actual = coding.times.decode_cf_datetime(num_dates, units, calendar)
 
-    abs_diff = np.atleast_1d(abs(actual - expected)).astype(np.timedelta64)
+    abs_diff = np.asarray(abs(actual - expected)).ravel()
+    abs_diff = pd.to_timedelta(abs_diff.tolist()).to_numpy()
+
     # once we no longer support versions of netCDF4 older than 1.1.5,
     # we could do this check with near microsecond accuracy:
     # https://github.com/Unidata/netcdf4-python/issues/355
@@ -829,8 +828,7 @@ def test_encode_cf_datetime_overflow(shape):
 
 
 def test_encode_cf_datetime_pandas_min():
-    # Test that encode_cf_datetime does not fail for versions
-    # of pandas < 0.21.1 (GH 2623).
+    # GH 2623
     dates = pd.date_range("2000", periods=3)
     num, units, calendar = encode_cf_datetime(dates)
     expected_num = np.array([0.0, 1.0, 2.0])
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index 6037669ac07..0d1e5951b32 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -714,7 +714,7 @@ def test_check_for_impossible_ordering(self):
 
 
 @pytest.mark.filterwarnings(
-    "ignore:In xarray version 0.14 `auto_combine` " "will be deprecated"
+    "ignore:In xarray version 0.15 `auto_combine` " "will be deprecated"
 )
 @pytest.mark.filterwarnings("ignore:Also `open_mfdataset` will no longer")
 @pytest.mark.filterwarnings("ignore:The datasets supplied")
diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 784a988b7cc..3df84c0460b 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -2,7 +2,6 @@
 import operator
 import pickle
 from collections import OrderedDict
-from distutils.version import LooseVersion
 
 import numpy as np
 import pandas as pd
@@ -942,12 +941,6 @@ def test_dot(use_dask):
     assert (actual.data == np.einsum("ij,ijk->k", a, b)).all()
     assert isinstance(actual.variable.data, type(da_a.variable.data))
 
-    if use_dask:
-        import dask
-
-        if LooseVersion(dask.__version__) < LooseVersion("0.17.3"):
-            pytest.skip("needs dask.array.einsum")
-
     # for only a single array is passed without dims argument, just return
     # as is
     actual = xr.dot(da_a)
@@ -1008,7 +1001,7 @@ def test_dot(use_dask):
     assert (actual.data == np.zeros(actual.shape)).all()
 
     # Invalid cases
-    if not use_dask or LooseVersion(dask.__version__) > LooseVersion("0.17.4"):
+    if not use_dask:
         with pytest.raises(TypeError):
             xr.dot(da_a, dims="a", invalid=None)
     with pytest.raises(TypeError):
diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py
index 00428f70966..1114027387e 100644
--- a/xarray/tests/test_concat.py
+++ b/xarray/tests/test_concat.py
@@ -6,6 +6,7 @@
 
 from xarray import DataArray, Dataset, Variable, concat
 from xarray.core import dtypes, merge
+
 from . import (
     InaccessibleArray,
     assert_array_equal,
@@ -41,8 +42,10 @@ def test_concat_compat():
     for var in ["has_x", "no_x_y"]:
         assert "y" not in result[var]
 
+    with raises_regex(ValueError, "coordinates in some datasets but not others"):
+        concat([ds1, ds2], dim="q")
     with raises_regex(ValueError, "'q' is not present in all datasets"):
-        concat([ds1, ds2], dim="q", data_vars="all", compat="broadcast_equals")
+        concat([ds2, ds1], dim="q")
 
 
 class TestConcatDataset:
@@ -89,7 +92,11 @@ def test_concat_coords_kwarg(self, data, dim, coords):
             assert_equal(data["extra"], actual["extra"])
 
     def test_concat(self, data):
-        split_data = [data.isel(dim1=slice(3)), data.isel(dim1=slice(3, None))]
+        split_data = [
+            data.isel(dim1=slice(3)),
+            data.isel(dim1=3),
+            data.isel(dim1=slice(4, None)),
+        ]
         assert_identical(data, concat(split_data, "dim1"))
 
     def test_concat_dim_precedence(self, data):
diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
index 76b3ed1a8d6..c142ca7643b 100644
--- a/xarray/tests/test_dask.py
+++ b/xarray/tests/test_dask.py
@@ -46,16 +46,9 @@ def __call__(self, dsk, keys, **kwargs):
         return dask.get(dsk, keys, **kwargs)
 
 
-def _set_dask_scheduler(scheduler=dask.get):
-    """ Backwards compatible way of setting scheduler. """
-    if LooseVersion(dask.__version__) >= LooseVersion("0.18.0"):
-        return dask.config.set(scheduler=scheduler)
-    return dask.set_options(get=scheduler)
-
-
 def raise_if_dask_computes(max_computes=0):
     scheduler = CountingScheduler(max_computes)
-    return _set_dask_scheduler(scheduler)
+    return dask.config.set(scheduler=scheduler)
 
 
 def test_raise_if_dask_computes():
@@ -67,9 +60,7 @@ def test_raise_if_dask_computes():
 
 class DaskTestCase:
     def assertLazyAnd(self, expected, actual, test):
-        with _set_dask_scheduler(dask.get):
-            # dask.get is the syncronous scheduler, which get's set also by
-            # dask.config.set(scheduler="syncronous") in current versions.
+        with dask.config.set(scheduler="synchronous"):
             test(actual, expected)
 
         if isinstance(actual, Dataset):
@@ -512,10 +503,7 @@ def counting_get(*args, **kwargs):
             count[0] += 1
             return dask.get(*args, **kwargs)
 
-        if dask.__version__ < "0.19.4":
-            ds.load(get=counting_get)
-        else:
-            ds.load(scheduler=counting_get)
+        ds.load(scheduler=counting_get)
 
         assert count[0] == 1
 
@@ -543,7 +531,7 @@ def test_dataarray_repr_legacy(self):
             <xarray.DataArray 'data' (x: 1)>
             {!r}
             Coordinates:
-                y        (x) int64 dask.array<chunksize=(1,)>
+                y        (x) int64 dask.array<chunksize=(1,), meta=np.ndarray>
             Dimensions without coordinates: x""".format(
                 data
             )
@@ -838,8 +826,6 @@ def build_dask_array(name):
     )
 
 
-# test both the perist method and the dask.persist function
-# the dask.persist function requires a new version of dask
 @pytest.mark.parametrize(
     "persist", [lambda x: x.persist(), lambda x: dask.persist(x)[0]]
 )
@@ -892,21 +878,12 @@ def test_dataarray_with_dask_coords():
 def test_basic_compute():
     ds = Dataset({"foo": ("x", range(5)), "bar": ("x", range(5))}).chunk({"x": 2})
     for get in [dask.threaded.get, dask.multiprocessing.get, dask.local.get_sync, None]:
-        with (
-            dask.config.set(scheduler=get)
-            if LooseVersion(dask.__version__) >= LooseVersion("0.19.4")
-            else dask.config.set(scheduler=get)
-            if LooseVersion(dask.__version__) >= LooseVersion("0.18.0")
-            else dask.set_options(get=get)
-        ):
+        with dask.config.set(scheduler=get):
             ds.compute()
             ds.foo.compute()
             ds.foo.variable.compute()
 
 
-@pytest.mark.skipif(
-    LooseVersion(dask.__version__) < LooseVersion("0.20.0"), reason="needs newer dask"
-)
 def test_dask_layers_and_dependencies():
     ds = Dataset({"foo": ("x", range(5)), "bar": ("x", range(5))}).chunk()
 
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index ea13d7c86fe..520174d31ec 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -26,7 +26,6 @@
     requires_bottleneck,
     requires_dask,
     requires_iris,
-    requires_np113,
     requires_numbagg,
     requires_scipy,
     requires_sparse,
@@ -159,9 +158,7 @@ def test_struct_array_dims(self):
         when dimension is a structured array.
         """
         # GH837, GH861
-        # checking array subraction when dims are the same
-        # note: names need to be in sorted order to align consistently with
-        # pandas < 0.24 and >= 0.24.
+        # checking array subtraction when dims are the same
         p_data = np.array(
             [("Abe", 180), ("Stacy", 150), ("Dick", 200)],
             dtype=[("name", "|S256"), ("height", object)],
@@ -3381,7 +3378,7 @@ def test_to_pandas(self):
 
         # roundtrips
         for shape in [(3,), (3, 4), (3, 4, 5)]:
-            if len(shape) > 2 and not LooseVersion(pd.__version__) < "0.25.0":
+            if len(shape) > 2 and LooseVersion(pd.__version__) >= "0.25.0":
                 continue
             dims = list("abc")[: len(shape)]
             da = DataArray(np.random.randn(*shape), dims=dims)
@@ -4195,12 +4192,12 @@ def test_rolling_wrapped_bottleneck(da, name, center, min_periods):
     assert_equal(actual, da["time"])
 
 
+@requires_dask
 @pytest.mark.parametrize("name", ("mean", "count"))
 @pytest.mark.parametrize("center", (True, False, None))
 @pytest.mark.parametrize("min_periods", (1, None))
 @pytest.mark.parametrize("window", (7, 8))
 def test_rolling_wrapped_dask(da_dask, name, center, min_periods, window):
-    pytest.importorskip("dask.array")
     # dask version
     rolling_obj = da_dask.rolling(time=window, min_periods=min_periods, center=center)
     actual = getattr(rolling_obj, name)().load()
@@ -4306,7 +4303,6 @@ def test_rolling_reduce(da, center, min_periods, window, name):
     assert actual.dims == expected.dims
 
 
-@requires_np113
 @pytest.mark.parametrize("center", (True, False))
 @pytest.mark.parametrize("min_periods", (None, 1, 2, 3))
 @pytest.mark.parametrize("window", (1, 2, 3, 4))
@@ -4667,7 +4663,6 @@ def test_no_dict():
         d.__dict__
 
 
-@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
 def test_subclass_slots():
     """Test that DataArray subclasses must explicitly define ``__slots__``.
 
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 5d856c9f323..fdd5a419383 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -25,7 +25,7 @@
     open_dataset,
     set_options,
 )
-from xarray.core import dtypes, indexing, npcompat, utils
+from xarray.core import dtypes, indexing, utils
 from xarray.core.common import duck_array_ops, full_like
 from xarray.core.npcompat import IS_NEP18_ACTIVE
 from xarray.core.pycompat import integer_types
@@ -2142,9 +2142,7 @@ def test_drop_index_labels(self):
         expected = data.isel(x=slice(0, 0))
         assert_identical(expected, actual)
 
-        # This exception raised by pandas changed from ValueError -> KeyError
-        # in pandas 0.23.
-        with pytest.raises((ValueError, KeyError)):
+        with pytest.raises(KeyError):
             # not contained in axis
             data.drop(["c"], dim="x")
 
@@ -2492,13 +2490,8 @@ def test_expand_dims_error(self):
         )
         with raises_regex(TypeError, "value of new dimension"):
             original.expand_dims(OrderedDict((("d", 3.2),)))
-
-        # TODO: only the code under the if-statement is needed when python 3.5
-        #   is no longer supported.
-        python36_plus = sys.version_info[0] == 3 and sys.version_info[1] > 5
-        if python36_plus:
-            with raises_regex(ValueError, "both keyword and positional"):
-                original.expand_dims(OrderedDict((("d", 4),)), e=4)
+        with raises_regex(ValueError, "both keyword and positional"):
+            original.expand_dims(OrderedDict((("d", 4),)), e=4)
 
     def test_expand_dims_int(self):
         original = Dataset(
@@ -2605,21 +2598,6 @@ def test_expand_dims_mixed_int_and_coords(self):
         )
         assert_identical(actual, expected)
 
-    @pytest.mark.skipif(
-        sys.version_info[:2] > (3, 5),
-        reason="we only raise these errors for Python 3.5",
-    )
-    def test_expand_dims_kwargs_python35(self):
-        original = Dataset({"x": ("a", np.random.randn(3))})
-        with raises_regex(ValueError, "dim_kwargs isn't"):
-            original.expand_dims(e=["l", "m", "n"])
-        with raises_regex(TypeError, "must be an OrderedDict"):
-            original.expand_dims({"e": ["l", "m", "n"]})
-
-    @pytest.mark.skipif(
-        sys.version_info[:2] < (3, 6),
-        reason="keyword arguments are only ordered on Python 3.6+",
-    )
     def test_expand_dims_kwargs_python36plus(self):
         original = Dataset(
             {"x": ("a", np.random.randn(3)), "y": (["b", "a"], np.random.randn(4, 3))},
@@ -5554,7 +5532,7 @@ def test_differentiate(dask, edge_order):
     # along x
     actual = da.differentiate("x", edge_order)
     expected_x = xr.DataArray(
-        npcompat.gradient(da, da["x"], axis=0, edge_order=edge_order),
+        np.gradient(da, da["x"], axis=0, edge_order=edge_order),
         dims=da.dims,
         coords=da.coords,
     )
@@ -5569,7 +5547,7 @@ def test_differentiate(dask, edge_order):
     # along y
     actual = da.differentiate("y", edge_order)
     expected_y = xr.DataArray(
-        npcompat.gradient(da, da["y"], axis=1, edge_order=edge_order),
+        np.gradient(da, da["y"], axis=1, edge_order=edge_order),
         dims=da.dims,
         coords=da.coords,
     )
@@ -5612,7 +5590,7 @@ def test_differentiate_datetime(dask):
     # along x
     actual = da.differentiate("x", edge_order=1, datetime_unit="D")
     expected_x = xr.DataArray(
-        npcompat.gradient(
+        np.gradient(
             da, da["x"].variable._to_numeric(datetime_unit="D"), axis=0, edge_order=1
         ),
         dims=da.dims,
@@ -5649,7 +5627,7 @@ def test_differentiate_cftime(dask):
         da = da.chunk({"time": 4})
 
     actual = da.differentiate("time", edge_order=1, datetime_unit="D")
-    expected_data = npcompat.gradient(
+    expected_data = np.gradient(
         da, da["time"].variable._to_numeric(datetime_unit="D"), axis=0, edge_order=1
     )
     expected = xr.DataArray(expected_data, coords=da.coords, dims=da.dims)
@@ -5772,7 +5750,6 @@ def test_no_dict():
         d.__dict__
 
 
-@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
 def test_subclass_slots():
     """Test that Dataset subclasses must explicitly define ``__slots__``.
 
diff --git a/xarray/tests/test_distributed.py b/xarray/tests/test_distributed.py
index a3bea6db85f..b3c0ce37a54 100644
--- a/xarray/tests/test_distributed.py
+++ b/xarray/tests/test_distributed.py
@@ -3,8 +3,8 @@
 
 import pytest
 
-dask = pytest.importorskip("dask", minversion="0.18")  # isort:skip
-distributed = pytest.importorskip("distributed", minversion="1.21")  # isort:skip
+dask = pytest.importorskip("dask")  # isort:skip
+distributed = pytest.importorskip("distributed")  # isort:skip
 
 from dask.distributed import Client, Lock
 from distributed.utils_test import cluster, gen_cluster
diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
index 766a391b57f..62ea19be97b 100644
--- a/xarray/tests/test_duck_array_ops.py
+++ b/xarray/tests/test_duck_array_ops.py
@@ -1,5 +1,4 @@
 import warnings
-from distutils.version import LooseVersion
 from textwrap import dedent
 
 import numpy as np
@@ -28,7 +27,6 @@
     arm_xfail,
     assert_array_equal,
     has_dask,
-    has_np113,
     raises_regex,
     requires_cftime,
     requires_dask,
@@ -353,7 +351,7 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim):
         warnings.filterwarnings("ignore", "All-NaN slice")
         warnings.filterwarnings("ignore", "invalid value encountered in")
 
-        if has_np113 and da.dtype.kind == "O" and skipna:
+        if da.dtype.kind == "O" and skipna:
             # Numpy < 1.13 does not handle object-type array.
             try:
                 if skipna:
@@ -531,12 +529,8 @@ def test_min_count(dim_num, dtype, dask, func, aggdim):
     min_count = 3
 
     actual = getattr(da, func)(dim=aggdim, skipna=True, min_count=min_count)
-
-    if LooseVersion(pd.__version__) >= LooseVersion("0.22.0"):
-        # min_count is only implenented in pandas > 0.22
-        expected = series_reduce(da, func, skipna=True, dim=aggdim, min_count=min_count)
-        assert_allclose(actual, expected)
-
+    expected = series_reduce(da, func, skipna=True, dim=aggdim, min_count=min_count)
+    assert_allclose(actual, expected)
     assert_dask_array(actual, dask)
 
 
diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py
index ba108b2dbaf..ae405015659 100644
--- a/xarray/tests/test_indexing.py
+++ b/xarray/tests/test_indexing.py
@@ -83,8 +83,7 @@ def test_convert_label_indexer(self):
             indexing.convert_label_indexer(mindex, 0)
         with pytest.raises(ValueError):
             indexing.convert_label_indexer(index, {"three": 0})
-        with pytest.raises((KeyError, IndexError)):
-            # pandas 0.21 changed this from KeyError to IndexError
+        with pytest.raises(IndexError):
             indexing.convert_label_indexer(mindex, (slice(None), 1, "no_level"))
 
     def test_convert_unsorted_datetime_index_raises(self):
diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py
index 99a72d68ad8..e3b29b86e4d 100644
--- a/xarray/tests/test_plot.py
+++ b/xarray/tests/test_plot.py
@@ -25,7 +25,6 @@
     raises_regex,
     requires_cftime,
     requires_matplotlib,
-    requires_matplotlib2,
     requires_nc_time_axis,
     requires_seaborn,
 )
@@ -360,7 +359,6 @@ def test_convenient_facetgrid(self):
             d[0].plot(x="x", y="y", col="z", ax=plt.gca())
 
     @pytest.mark.slow
-    @requires_matplotlib2
     def test_subplot_kws(self):
         a = easy_array((10, 15, 4))
         d = DataArray(a, dims=["y", "x", "z"])
@@ -1962,10 +1960,11 @@ def test_datetime_hue(self, hue_style):
         ds2.plot.scatter(x="A", y="B", hue="hue", hue_style=hue_style)
 
     def test_facetgrid_hue_style(self):
-        # Can't move this to pytest.mark.parametrize because py35-bare-minimum
-        # doesn't have mpl.
-        for hue_style, map_type in zip(
-            ["discrete", "continuous"], [list, mpl.collections.PathCollection]
+        # Can't move this to pytest.mark.parametrize because py36-bare-minimum
+        # doesn't have matplotlib.
+        for hue_style, map_type in (
+            ("discrete", list),
+            ("continuous", mpl.collections.PathCollection),
         ):
             g = self.ds.plot.scatter(
                 x="A", y="B", row="row", col="col", hue="hue", hue_style=hue_style
diff --git a/xarray/tests/test_ufuncs.py b/xarray/tests/test_ufuncs.py
index 1095cc360dd..26241152dfa 100644
--- a/xarray/tests/test_ufuncs.py
+++ b/xarray/tests/test_ufuncs.py
@@ -8,7 +8,7 @@
 
 from . import assert_array_equal
 from . import assert_identical as assert_identical_
-from . import mock, raises_regex, requires_np113
+from . import mock, raises_regex
 
 
 def assert_identical(a, b):
@@ -19,7 +19,6 @@ def assert_identical(a, b):
         assert_array_equal(a, b)
 
 
-@requires_np113
 def test_unary():
     args = [
         0,
@@ -32,7 +31,6 @@ def test_unary():
         assert_identical(a + 1, np.cos(a))
 
 
-@requires_np113
 def test_binary():
     args = [
         0,
@@ -49,7 +47,6 @@ def test_binary():
             assert_identical(t2 + 1, np.maximum(t2 + 1, t1))
 
 
-@requires_np113
 def test_binary_out():
     args = [
         1,
@@ -64,7 +61,6 @@ def test_binary_out():
         assert_identical(actual_exponent, arg)
 
 
-@requires_np113
 def test_groupby():
     ds = xr.Dataset({"a": ("x", [0, 0, 0])}, {"c": ("x", [0, 0, 1])})
     ds_grouped = ds.groupby("c")
@@ -87,7 +83,6 @@ def test_groupby():
         np.maximum(ds.a.variable, ds_grouped)
 
 
-@requires_np113
 def test_alignment():
     ds1 = xr.Dataset({"a": ("x", [1, 2])}, {"x": [0, 1]})
     ds2 = xr.Dataset({"a": ("x", [2, 3]), "b": 4}, {"x": [1, 2]})
@@ -104,14 +99,12 @@ def test_alignment():
         assert_identical_(actual, expected)
 
 
-@requires_np113
 def test_kwargs():
     x = xr.DataArray(0)
     result = np.add(x, 1, dtype=np.float64)
     assert result.dtype == np.float64
 
 
-@requires_np113
 def test_xarray_defers_to_unrecognized_type():
     class Other:
         def __array_ufunc__(self, *args, **kwargs):
@@ -123,7 +116,6 @@ def __array_ufunc__(self, *args, **kwargs):
     assert np.sin(xarray_obj, out=other) == "other"
 
 
-@requires_np113
 def test_xarray_handles_dask():
     da = pytest.importorskip("dask.array")
     x = xr.DataArray(np.ones((2, 2)), dims=["x", "y"])
@@ -133,7 +125,6 @@ def test_xarray_handles_dask():
     assert isinstance(result, xr.DataArray)
 
 
-@requires_np113
 def test_dask_defers_to_xarray():
     da = pytest.importorskip("dask.array")
     x = xr.DataArray(np.ones((2, 2)), dims=["x", "y"])
@@ -143,14 +134,12 @@ def test_dask_defers_to_xarray():
     assert isinstance(result, xr.DataArray)
 
 
-@requires_np113
 def test_gufunc_methods():
     xarray_obj = xr.DataArray([1, 2, 3])
     with raises_regex(NotImplementedError, "reduce method"):
         np.add.reduce(xarray_obj, 1)
 
 
-@requires_np113
 def test_out():
     xarray_obj = xr.DataArray([1, 2, 3])
 
@@ -164,7 +153,6 @@ def test_out():
     assert_identical(other, np.array([1, 2, 3]))
 
 
-@requires_np113
 def test_gufuncs():
     xarray_obj = xr.DataArray([1, 2, 3])
     fake_gufunc = mock.Mock(signature="(n)->()", autospec=np.sin)
@@ -182,7 +170,6 @@ def test_xarray_ufuncs_deprecation():
     assert len(record) == 0
 
 
-@requires_np113
 @pytest.mark.filterwarnings("ignore::RuntimeWarning")
 @pytest.mark.parametrize(
     "name",
diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py
index 254983364f9..859306b88cb 100644
--- a/xarray/tests/test_utils.py
+++ b/xarray/tests/test_utils.py
@@ -73,9 +73,7 @@ def test_multiindex_from_product_levels():
         [pd.Index(["b", "a"]), pd.Index([1, 3, 2])]
     )
     np.testing.assert_array_equal(
-        # compat for pandas < 0.24
-        result.codes if hasattr(result, "codes") else result.labels,
-        [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]],
+        result.codes, [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
     )
     np.testing.assert_array_equal(result.levels[0], ["b", "a"])
     np.testing.assert_array_equal(result.levels[1], [1, 3, 2])
@@ -89,9 +87,7 @@ def test_multiindex_from_product_levels_non_unique():
         [pd.Index(["b", "a"]), pd.Index([1, 1, 2])]
     )
     np.testing.assert_array_equal(
-        # compat for pandas < 0.24
-        result.codes if hasattr(result, "codes") else result.labels,
-        [[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1]],
+        result.codes, [[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1]]
     )
     np.testing.assert_array_equal(result.levels[0], ["b", "a"])
     np.testing.assert_array_equal(result.levels[1], [1, 2])
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index 7f9538c9ea9..172a23d9a76 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -2,7 +2,6 @@
 from collections import OrderedDict
 from copy import copy, deepcopy
 from datetime import datetime, timedelta
-from distutils.version import LooseVersion
 from textwrap import dedent
 
 import numpy as np
@@ -1837,13 +1836,6 @@ def test_getitem_fancy(self):
     def test_getitem_1d_fancy(self):
         super().test_getitem_1d_fancy()
 
-    def test_equals_all_dtypes(self):
-        import dask
-
-        if "0.18.2" <= LooseVersion(dask.__version__) < "0.19.1":
-            pytest.xfail("https://github.com/pydata/xarray/issues/2318")
-        super().test_equals_all_dtypes()
-
     def test_getitem_with_mask_nd_indexer(self):
         import dask.array as da