diff --git a/.binder/environment.yml b/.binder/environment.yml
new file mode 100644
index 00000000000..13b6b99e6fc
--- /dev/null
+++ b/.binder/environment.yml
@@ -0,0 +1,39 @@
+name: xarray-examples
+channels:
+  - conda-forge
+dependencies:
+  - python=3.7
+  - boto3
+  - bottleneck
+  - cartopy
+  - cdms2
+  - cfgrib
+  - cftime
+  - coveralls
+  - dask
+  - distributed
+  - dask_labextension
+  - h5netcdf
+  - h5py
+  - hdf5
+  - iris
+  - lxml    # Optional dep of pydap
+  - matplotlib
+  - nc-time-axis
+  - netcdf4
+  - numba
+  - numpy
+  - pandas
+  - pint
+  - pip
+  - pydap
+  - pynio
+  - rasterio
+  - scipy
+  - seaborn
+  - sparse
+  - toolz
+  - xarray
+  - zarr
+  - pip:
+    - numbagg
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 90de0705a27..d6ee76c7d3f 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -110,5 +110,5 @@ jobs:
   - bash: |
       source activate xarray-tests
       cd doc
-      sphinx-build -n -j auto -b html -d _build/doctrees . _build/html
+      sphinx-build -W --keep-going -j auto -b html -d _build/doctrees . _build/html
     displayName: Build HTML docs
diff --git a/ci/azure/install.yml b/ci/azure/install.yml
index baa69bcc8d5..e4f3a0b9e16 100644
--- a/ci/azure/install.yml
+++ b/ci/azure/install.yml
@@ -25,7 +25,9 @@ steps:
         git+https://github.com/dask/dask \
         git+https://github.com/dask/distributed \
         git+https://github.com/zarr-developers/zarr \
-        git+https://github.com/Unidata/cftime
+        git+https://github.com/Unidata/cftime \
+        git+https://github.com/mapbox/rasterio \
+        git+https://github.com/pydata/bottleneck
   condition: eq(variables['UPSTREAM_DEV'], 'true')
   displayName: Install upstream dev dependencies
 
diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml
index f2c09ed6fef..a0c27a30b01 100644
--- a/ci/requirements/doc.yml
+++ b/ci/requirements/doc.yml
@@ -6,7 +6,7 @@ dependencies:
   - python=3.7
   - bottleneck
   - cartopy
-  - eccodes
+  - cfgrib
   - h5netcdf
   - ipykernel
   - ipython
@@ -22,7 +22,3 @@ dependencies:
   - sphinx
   - sphinx_rtd_theme
   - zarr
-  - pip
-  - pip:
-    - cfgrib
-
diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst
index 027c732697f..c117b0f4fc7 100644
--- a/doc/api-hidden.rst
+++ b/doc/api-hidden.rst
@@ -27,6 +27,38 @@
    Dataset.std
    Dataset.var
 
+   core.coordinates.DatasetCoordinates.get
+   core.coordinates.DatasetCoordinates.items
+   core.coordinates.DatasetCoordinates.keys
+   core.coordinates.DatasetCoordinates.merge
+   core.coordinates.DatasetCoordinates.to_dataset
+   core.coordinates.DatasetCoordinates.to_index
+   core.coordinates.DatasetCoordinates.update
+   core.coordinates.DatasetCoordinates.values
+   core.coordinates.DatasetCoordinates.dims
+   core.coordinates.DatasetCoordinates.indexes
+   core.coordinates.DatasetCoordinates.variables
+
+   core.rolling.DatasetCoarsen.all
+   core.rolling.DatasetCoarsen.any
+   core.rolling.DatasetCoarsen.argmax
+   core.rolling.DatasetCoarsen.argmin
+   core.rolling.DatasetCoarsen.count
+   core.rolling.DatasetCoarsen.max
+   core.rolling.DatasetCoarsen.mean
+   core.rolling.DatasetCoarsen.median
+   core.rolling.DatasetCoarsen.min
+   core.rolling.DatasetCoarsen.prod
+   core.rolling.DatasetCoarsen.std
+   core.rolling.DatasetCoarsen.sum
+   core.rolling.DatasetCoarsen.var
+   core.rolling.DatasetCoarsen.boundary
+   core.rolling.DatasetCoarsen.coord_func
+   core.rolling.DatasetCoarsen.obj
+   core.rolling.DatasetCoarsen.side
+   core.rolling.DatasetCoarsen.trim_excess
+   core.rolling.DatasetCoarsen.windows
+
    core.groupby.DatasetGroupBy.assign
    core.groupby.DatasetGroupBy.assign_coords
    core.groupby.DatasetGroupBy.first
@@ -34,6 +66,69 @@
    core.groupby.DatasetGroupBy.fillna
    core.groupby.DatasetGroupBy.quantile
    core.groupby.DatasetGroupBy.where
+   core.groupby.DatasetGroupBy.all
+   core.groupby.DatasetGroupBy.any
+   core.groupby.DatasetGroupBy.argmax
+   core.groupby.DatasetGroupBy.argmin
+   core.groupby.DatasetGroupBy.count
+   core.groupby.DatasetGroupBy.max
+   core.groupby.DatasetGroupBy.mean
+   core.groupby.DatasetGroupBy.median
+   core.groupby.DatasetGroupBy.min
+   core.groupby.DatasetGroupBy.prod
+   core.groupby.DatasetGroupBy.std
+   core.groupby.DatasetGroupBy.sum
+   core.groupby.DatasetGroupBy.var
+   core.groupby.DatasetGroupBy.dims
+   core.groupby.DatasetGroupBy.groups
+
+   core.resample.DatasetResample.all
+   core.resample.DatasetResample.any
+   core.resample.DatasetResample.apply
+   core.resample.DatasetResample.argmax
+   core.resample.DatasetResample.argmin
+   core.resample.DatasetResample.assign
+   core.resample.DatasetResample.assign_coords
+   core.resample.DatasetResample.bfill
+   core.resample.DatasetResample.count
+   core.resample.DatasetResample.ffill
+   core.resample.DatasetResample.fillna
+   core.resample.DatasetResample.first
+   core.resample.DatasetResample.last
+   core.resample.DatasetResample.map
+   core.resample.DatasetResample.max
+   core.resample.DatasetResample.mean
+   core.resample.DatasetResample.median
+   core.resample.DatasetResample.min
+   core.resample.DatasetResample.prod
+   core.resample.DatasetResample.quantile
+   core.resample.DatasetResample.reduce
+   core.resample.DatasetResample.std
+   core.resample.DatasetResample.sum
+   core.resample.DatasetResample.var
+   core.resample.DatasetResample.where
+   core.resample.DatasetResample.dims
+   core.resample.DatasetResample.groups
+
+   core.rolling.DatasetRolling.argmax
+   core.rolling.DatasetRolling.argmin
+   core.rolling.DatasetRolling.count
+   core.rolling.DatasetRolling.max
+   core.rolling.DatasetRolling.mean
+   core.rolling.DatasetRolling.median
+   core.rolling.DatasetRolling.min
+   core.rolling.DatasetRolling.prod
+   core.rolling.DatasetRolling.std
+   core.rolling.DatasetRolling.sum
+   core.rolling.DatasetRolling.var
+   core.rolling.DatasetRolling.center
+   core.rolling.DatasetRolling.dim
+   core.rolling.DatasetRolling.min_periods
+   core.rolling.DatasetRolling.obj
+   core.rolling.DatasetRolling.rollings
+   core.rolling.DatasetRolling.window
+
+   core.rolling_exp.RollingExp.mean
 
    Dataset.argsort
    Dataset.astype
@@ -47,6 +142,9 @@
    Dataset.cumprod
    Dataset.rank
 
+   Dataset.load_store
+   Dataset.dump_to_store
+
    DataArray.ndim
    DataArray.nbytes
    DataArray.shape
@@ -71,12 +169,104 @@
    DataArray.std
    DataArray.var
 
+   core.coordinates.DataArrayCoordinates.get
+   core.coordinates.DataArrayCoordinates.items
+   core.coordinates.DataArrayCoordinates.keys
+   core.coordinates.DataArrayCoordinates.merge
+   core.coordinates.DataArrayCoordinates.to_dataset
+   core.coordinates.DataArrayCoordinates.to_index
+   core.coordinates.DataArrayCoordinates.update
+   core.coordinates.DataArrayCoordinates.values
+   core.coordinates.DataArrayCoordinates.dims
+   core.coordinates.DataArrayCoordinates.indexes
+   core.coordinates.DataArrayCoordinates.variables
+
+   core.rolling.DataArrayCoarsen.all
+   core.rolling.DataArrayCoarsen.any
+   core.rolling.DataArrayCoarsen.argmax
+   core.rolling.DataArrayCoarsen.argmin
+   core.rolling.DataArrayCoarsen.count
+   core.rolling.DataArrayCoarsen.max
+   core.rolling.DataArrayCoarsen.mean
+   core.rolling.DataArrayCoarsen.median
+   core.rolling.DataArrayCoarsen.min
+   core.rolling.DataArrayCoarsen.prod
+   core.rolling.DataArrayCoarsen.std
+   core.rolling.DataArrayCoarsen.sum
+   core.rolling.DataArrayCoarsen.var
+   core.rolling.DataArrayCoarsen.boundary
+   core.rolling.DataArrayCoarsen.coord_func
+   core.rolling.DataArrayCoarsen.obj
+   core.rolling.DataArrayCoarsen.side
+   core.rolling.DataArrayCoarsen.trim_excess
+   core.rolling.DataArrayCoarsen.windows
+
    core.groupby.DataArrayGroupBy.assign_coords
    core.groupby.DataArrayGroupBy.first
    core.groupby.DataArrayGroupBy.last
    core.groupby.DataArrayGroupBy.fillna
    core.groupby.DataArrayGroupBy.quantile
    core.groupby.DataArrayGroupBy.where
+   core.groupby.DataArrayGroupBy.all
+   core.groupby.DataArrayGroupBy.any
+   core.groupby.DataArrayGroupBy.argmax
+   core.groupby.DataArrayGroupBy.argmin
+   core.groupby.DataArrayGroupBy.count
+   core.groupby.DataArrayGroupBy.max
+   core.groupby.DataArrayGroupBy.mean
+   core.groupby.DataArrayGroupBy.median
+   core.groupby.DataArrayGroupBy.min
+   core.groupby.DataArrayGroupBy.prod
+   core.groupby.DataArrayGroupBy.std
+   core.groupby.DataArrayGroupBy.sum
+   core.groupby.DataArrayGroupBy.var
+   core.groupby.DataArrayGroupBy.dims
+   core.groupby.DataArrayGroupBy.groups
+
+   core.resample.DataArrayResample.all
+   core.resample.DataArrayResample.any
+   core.resample.DataArrayResample.apply
+   core.resample.DataArrayResample.argmax
+   core.resample.DataArrayResample.argmin
+   core.resample.DataArrayResample.assign_coords
+   core.resample.DataArrayResample.bfill
+   core.resample.DataArrayResample.count
+   core.resample.DataArrayResample.ffill
+   core.resample.DataArrayResample.fillna
+   core.resample.DataArrayResample.first
+   core.resample.DataArrayResample.last
+   core.resample.DataArrayResample.map
+   core.resample.DataArrayResample.max
+   core.resample.DataArrayResample.mean
+   core.resample.DataArrayResample.median
+   core.resample.DataArrayResample.min
+   core.resample.DataArrayResample.prod
+   core.resample.DataArrayResample.quantile
+   core.resample.DataArrayResample.reduce
+   core.resample.DataArrayResample.std
+   core.resample.DataArrayResample.sum
+   core.resample.DataArrayResample.var
+   core.resample.DataArrayResample.where
+   core.resample.DataArrayResample.dims
+   core.resample.DataArrayResample.groups
+
+   core.rolling.DataArrayRolling.argmax
+   core.rolling.DataArrayRolling.argmin
+   core.rolling.DataArrayRolling.count
+   core.rolling.DataArrayRolling.max
+   core.rolling.DataArrayRolling.mean
+   core.rolling.DataArrayRolling.median
+   core.rolling.DataArrayRolling.min
+   core.rolling.DataArrayRolling.prod
+   core.rolling.DataArrayRolling.std
+   core.rolling.DataArrayRolling.sum
+   core.rolling.DataArrayRolling.var
+   core.rolling.DataArrayRolling.center
+   core.rolling.DataArrayRolling.dim
+   core.rolling.DataArrayRolling.min_periods
+   core.rolling.DataArrayRolling.obj
+   core.rolling.DataArrayRolling.window
+   core.rolling.DataArrayRolling.window_labels
 
    DataArray.argsort
    DataArray.clip
@@ -91,6 +281,221 @@
    DataArray.cumprod
    DataArray.rank
 
+   core.accessor_dt.DatetimeAccessor.ceil
+   core.accessor_dt.DatetimeAccessor.floor
+   core.accessor_dt.DatetimeAccessor.round
+   core.accessor_dt.DatetimeAccessor.strftime
+   core.accessor_dt.DatetimeAccessor.day
+   core.accessor_dt.DatetimeAccessor.dayofweek
+   core.accessor_dt.DatetimeAccessor.dayofyear
+   core.accessor_dt.DatetimeAccessor.days_in_month
+   core.accessor_dt.DatetimeAccessor.daysinmonth
+   core.accessor_dt.DatetimeAccessor.hour
+   core.accessor_dt.DatetimeAccessor.microsecond
+   core.accessor_dt.DatetimeAccessor.minute
+   core.accessor_dt.DatetimeAccessor.month
+   core.accessor_dt.DatetimeAccessor.nanosecond
+   core.accessor_dt.DatetimeAccessor.quarter
+   core.accessor_dt.DatetimeAccessor.season
+   core.accessor_dt.DatetimeAccessor.second
+   core.accessor_dt.DatetimeAccessor.time
+   core.accessor_dt.DatetimeAccessor.week
+   core.accessor_dt.DatetimeAccessor.weekday
+   core.accessor_dt.DatetimeAccessor.weekday_name
+   core.accessor_dt.DatetimeAccessor.weekofyear
+   core.accessor_dt.DatetimeAccessor.year
+
+   core.accessor_str.StringAccessor.capitalize
+   core.accessor_str.StringAccessor.center
+   core.accessor_str.StringAccessor.contains
+   core.accessor_str.StringAccessor.count
+   core.accessor_str.StringAccessor.decode
+   core.accessor_str.StringAccessor.encode
+   core.accessor_str.StringAccessor.endswith
+   core.accessor_str.StringAccessor.find
+   core.accessor_str.StringAccessor.get
+   core.accessor_str.StringAccessor.index
+   core.accessor_str.StringAccessor.isalnum
+   core.accessor_str.StringAccessor.isalpha
+   core.accessor_str.StringAccessor.isdecimal
+   core.accessor_str.StringAccessor.isdigit
+   core.accessor_str.StringAccessor.islower
+   core.accessor_str.StringAccessor.isnumeric
+   core.accessor_str.StringAccessor.isspace
+   core.accessor_str.StringAccessor.istitle
+   core.accessor_str.StringAccessor.isupper
+   core.accessor_str.StringAccessor.len
+   core.accessor_str.StringAccessor.ljust
+   core.accessor_str.StringAccessor.lower
+   core.accessor_str.StringAccessor.lstrip
+   core.accessor_str.StringAccessor.match
+   core.accessor_str.StringAccessor.pad
+   core.accessor_str.StringAccessor.repeat
+   core.accessor_str.StringAccessor.replace
+   core.accessor_str.StringAccessor.rfind
+   core.accessor_str.StringAccessor.rindex
+   core.accessor_str.StringAccessor.rjust
+   core.accessor_str.StringAccessor.rstrip
+   core.accessor_str.StringAccessor.slice
+   core.accessor_str.StringAccessor.slice_replace
+   core.accessor_str.StringAccessor.startswith
+   core.accessor_str.StringAccessor.strip
+   core.accessor_str.StringAccessor.swapcase
+   core.accessor_str.StringAccessor.title
+   core.accessor_str.StringAccessor.translate
+   core.accessor_str.StringAccessor.upper
+   core.accessor_str.StringAccessor.wrap
+   core.accessor_str.StringAccessor.zfill
+
+   Variable.all
+   Variable.any
+   Variable.argmax
+   Variable.argmin
+   Variable.argsort
+   Variable.astype
+   Variable.broadcast_equals
+   Variable.chunk
+   Variable.clip
+   Variable.coarsen
+   Variable.compute
+   Variable.concat
+   Variable.conj
+   Variable.conjugate
+   Variable.copy
+   Variable.count
+   Variable.cumprod
+   Variable.cumsum
+   Variable.equals
+   Variable.fillna
+   Variable.get_axis_num
+   Variable.identical
+   Variable.isel
+   Variable.isnull
+   Variable.item
+   Variable.load
+   Variable.max
+   Variable.mean
+   Variable.median
+   Variable.min
+   Variable.no_conflicts
+   Variable.notnull
+   Variable.pad_with_fill_value
+   Variable.prod
+   Variable.quantile
+   Variable.rank
+   Variable.reduce
+   Variable.roll
+   Variable.rolling_window
+   Variable.round
+   Variable.searchsorted
+   Variable.set_dims
+   Variable.shift
+   Variable.squeeze
+   Variable.stack
+   Variable.std
+   Variable.sum
+   Variable.to_base_variable
+   Variable.to_coord
+   Variable.to_dict
+   Variable.to_index
+   Variable.to_index_variable
+   Variable.to_variable
+   Variable.transpose
+   Variable.unstack
+   Variable.var
+   Variable.where
+   Variable.T
+   Variable.attrs
+   Variable.chunks
+   Variable.data
+   Variable.dims
+   Variable.dtype
+   Variable.encoding
+   Variable.imag
+   Variable.nbytes
+   Variable.ndim
+   Variable.real
+   Variable.shape
+   Variable.size
+   Variable.sizes
+   Variable.values
+
+   IndexVariable.all
+   IndexVariable.any
+   IndexVariable.argmax
+   IndexVariable.argmin
+   IndexVariable.argsort
+   IndexVariable.astype
+   IndexVariable.broadcast_equals
+   IndexVariable.chunk
+   IndexVariable.clip
+   IndexVariable.coarsen
+   IndexVariable.compute
+   IndexVariable.concat
+   IndexVariable.conj
+   IndexVariable.conjugate
+   IndexVariable.copy
+   IndexVariable.count
+   IndexVariable.cumprod
+   IndexVariable.cumsum
+   IndexVariable.equals
+   IndexVariable.fillna
+   IndexVariable.get_axis_num
+   IndexVariable.get_level_variable
+   IndexVariable.identical
+   IndexVariable.isel
+   IndexVariable.isnull
+   IndexVariable.item
+   IndexVariable.load
+   IndexVariable.max
+   IndexVariable.mean
+   IndexVariable.median
+   IndexVariable.min
+   IndexVariable.no_conflicts
+   IndexVariable.notnull
+   IndexVariable.pad_with_fill_value
+   IndexVariable.prod
+   IndexVariable.quantile
+   IndexVariable.rank
+   IndexVariable.reduce
+   IndexVariable.roll
+   IndexVariable.rolling_window
+   IndexVariable.round
+   IndexVariable.searchsorted
+   IndexVariable.set_dims
+   IndexVariable.shift
+   IndexVariable.squeeze
+   IndexVariable.stack
+   IndexVariable.std
+   IndexVariable.sum
+   IndexVariable.to_base_variable
+   IndexVariable.to_coord
+   IndexVariable.to_dict
+   IndexVariable.to_index
+   IndexVariable.to_index_variable
+   IndexVariable.to_variable
+   IndexVariable.transpose
+   IndexVariable.unstack
+   IndexVariable.var
+   IndexVariable.where
+   IndexVariable.T
+   IndexVariable.attrs
+   IndexVariable.chunks
+   IndexVariable.data
+   IndexVariable.dims
+   IndexVariable.dtype
+   IndexVariable.encoding
+   IndexVariable.imag
+   IndexVariable.level_names
+   IndexVariable.name
+   IndexVariable.nbytes
+   IndexVariable.ndim
+   IndexVariable.real
+   IndexVariable.shape
+   IndexVariable.size
+   IndexVariable.sizes
+   IndexVariable.values
+
    ufuncs.angle
    ufuncs.arccos
    ufuncs.arccosh
@@ -156,6 +561,252 @@
    plot.FacetGrid.set_ticks
    plot.FacetGrid.map
 
+   CFTimeIndex.all
+   CFTimeIndex.any
+   CFTimeIndex.append
+   CFTimeIndex.argmax
+   CFTimeIndex.argmin
+   CFTimeIndex.argsort
+   CFTimeIndex.asof
+   CFTimeIndex.asof_locs
+   CFTimeIndex.astype
+   CFTimeIndex.contains
+   CFTimeIndex.copy
+   CFTimeIndex.delete
+   CFTimeIndex.difference
+   CFTimeIndex.drop
+   CFTimeIndex.drop_duplicates
+   CFTimeIndex.droplevel
+   CFTimeIndex.dropna
+   CFTimeIndex.duplicated
+   CFTimeIndex.equals
+   CFTimeIndex.factorize
+   CFTimeIndex.fillna
+   CFTimeIndex.format
+   CFTimeIndex.get_duplicates
+   CFTimeIndex.get_indexer
+   CFTimeIndex.get_indexer_for
+   CFTimeIndex.get_indexer_non_unique
+   CFTimeIndex.get_level_values
+   CFTimeIndex.get_loc
+   CFTimeIndex.get_slice_bound
+   CFTimeIndex.get_value
+   CFTimeIndex.get_values
+   CFTimeIndex.groupby
+   CFTimeIndex.holds_integer
+   CFTimeIndex.identical
+   CFTimeIndex.insert
+   CFTimeIndex.intersection
+   CFTimeIndex.is_
+   CFTimeIndex.is_boolean
+   CFTimeIndex.is_categorical
+   CFTimeIndex.is_floating
+   CFTimeIndex.is_integer
+   CFTimeIndex.is_interval
+   CFTimeIndex.is_lexsorted_for_tuple
+   CFTimeIndex.is_mixed
+   CFTimeIndex.is_numeric
+   CFTimeIndex.is_object
+   CFTimeIndex.is_type_compatible
+   CFTimeIndex.isin
+   CFTimeIndex.isna
+   CFTimeIndex.isnull
+   CFTimeIndex.item
+   CFTimeIndex.join
+   CFTimeIndex.map
+   CFTimeIndex.max
+   CFTimeIndex.memory_usage
+   CFTimeIndex.min
+   CFTimeIndex.notna
+   CFTimeIndex.notnull
+   CFTimeIndex.nunique
+   CFTimeIndex.putmask
+   CFTimeIndex.ravel
+   CFTimeIndex.reindex
+   CFTimeIndex.rename
+   CFTimeIndex.repeat
+   CFTimeIndex.searchsorted
+   CFTimeIndex.set_names
+   CFTimeIndex.set_value
    CFTimeIndex.shift
-   CFTimeIndex.to_datetimeindex
+   CFTimeIndex.slice_indexer
+   CFTimeIndex.slice_locs
+   CFTimeIndex.sort
+   CFTimeIndex.sort_values
+   CFTimeIndex.sortlevel
    CFTimeIndex.strftime
+   CFTimeIndex.summary
+   CFTimeIndex.symmetric_difference
+   CFTimeIndex.take
+   CFTimeIndex.to_datetimeindex
+   CFTimeIndex.to_flat_index
+   CFTimeIndex.to_frame
+   CFTimeIndex.to_list
+   CFTimeIndex.to_native_types
+   CFTimeIndex.to_numpy
+   CFTimeIndex.to_series
+   CFTimeIndex.tolist
+   CFTimeIndex.transpose
+   CFTimeIndex.union
+   CFTimeIndex.unique
+   CFTimeIndex.value_counts
+   CFTimeIndex.view
+   CFTimeIndex.where
+
+   CFTimeIndex.T
+   CFTimeIndex.array
+   CFTimeIndex.asi8
+   CFTimeIndex.base
+   CFTimeIndex.data
+   CFTimeIndex.date_type
+   CFTimeIndex.day
+   CFTimeIndex.dayofweek
+   CFTimeIndex.dayofyear
+   CFTimeIndex.dtype
+   CFTimeIndex.dtype_str
+   CFTimeIndex.empty
+   CFTimeIndex.flags
+   CFTimeIndex.has_duplicates
+   CFTimeIndex.hasnans
+   CFTimeIndex.hour
+   CFTimeIndex.inferred_type
+   CFTimeIndex.is_all_dates
+   CFTimeIndex.is_monotonic
+   CFTimeIndex.is_monotonic_increasing
+   CFTimeIndex.is_monotonic_decreasing
+   CFTimeIndex.is_unique
+   CFTimeIndex.itemsize
+   CFTimeIndex.microsecond
+   CFTimeIndex.minute
+   CFTimeIndex.month
+   CFTimeIndex.name
+   CFTimeIndex.names
+   CFTimeIndex.nbytes
+   CFTimeIndex.ndim
+   CFTimeIndex.nlevels
+   CFTimeIndex.second
+   CFTimeIndex.shape
+   CFTimeIndex.size
+   CFTimeIndex.strides
+   CFTimeIndex.values
+   CFTimeIndex.year
+
+   backends.NetCDF4DataStore.close
+   backends.NetCDF4DataStore.encode
+   backends.NetCDF4DataStore.encode_attribute
+   backends.NetCDF4DataStore.encode_variable
+   backends.NetCDF4DataStore.get
+   backends.NetCDF4DataStore.get_attrs
+   backends.NetCDF4DataStore.get_dimensions
+   backends.NetCDF4DataStore.get_encoding
+   backends.NetCDF4DataStore.get_variables
+   backends.NetCDF4DataStore.items
+   backends.NetCDF4DataStore.keys
+   backends.NetCDF4DataStore.load
+   backends.NetCDF4DataStore.open
+   backends.NetCDF4DataStore.open_store_variable
+   backends.NetCDF4DataStore.prepare_variable
+   backends.NetCDF4DataStore.set_attribute
+   backends.NetCDF4DataStore.set_attributes
+   backends.NetCDF4DataStore.set_dimension
+   backends.NetCDF4DataStore.set_dimensions
+   backends.NetCDF4DataStore.set_variable
+   backends.NetCDF4DataStore.set_variables
+   backends.NetCDF4DataStore.store
+   backends.NetCDF4DataStore.store_dataset
+   backends.NetCDF4DataStore.sync
+   backends.NetCDF4DataStore.values
+   backends.NetCDF4DataStore.attrs
+   backends.NetCDF4DataStore.autoclose
+   backends.NetCDF4DataStore.dimensions
+   backends.NetCDF4DataStore.ds
+   backends.NetCDF4DataStore.format
+   backends.NetCDF4DataStore.is_remote
+   backends.NetCDF4DataStore.lock
+   backends.NetCDF4DataStore.variables
+
+   backends.H5NetCDFStore.close
+   backends.H5NetCDFStore.encode
+   backends.H5NetCDFStore.encode_attribute
+   backends.H5NetCDFStore.encode_variable
+   backends.H5NetCDFStore.get
+   backends.H5NetCDFStore.get_attrs
+   backends.H5NetCDFStore.get_dimensions
+   backends.H5NetCDFStore.get_encoding
+   backends.H5NetCDFStore.get_variables
+   backends.H5NetCDFStore.items
+   backends.H5NetCDFStore.keys
+   backends.H5NetCDFStore.load
+   backends.H5NetCDFStore.open_store_variable
+   backends.H5NetCDFStore.prepare_variable
+   backends.H5NetCDFStore.set_attribute
+   backends.H5NetCDFStore.set_attributes
+   backends.H5NetCDFStore.set_dimension
+   backends.H5NetCDFStore.set_dimensions
+   backends.H5NetCDFStore.set_variable
+   backends.H5NetCDFStore.set_variables
+   backends.H5NetCDFStore.store
+   backends.H5NetCDFStore.store_dataset
+   backends.H5NetCDFStore.sync
+   backends.H5NetCDFStore.values
+   backends.H5NetCDFStore.attrs
+   backends.H5NetCDFStore.dimensions
+   backends.H5NetCDFStore.ds
+   backends.H5NetCDFStore.variables
+
+   backends.PydapDataStore.close
+   backends.PydapDataStore.get
+   backends.PydapDataStore.get_attrs
+   backends.PydapDataStore.get_dimensions
+   backends.PydapDataStore.get_encoding
+   backends.PydapDataStore.get_variables
+   backends.PydapDataStore.items
+   backends.PydapDataStore.keys
+   backends.PydapDataStore.load
+   backends.PydapDataStore.open
+   backends.PydapDataStore.open_store_variable
+   backends.PydapDataStore.values
+   backends.PydapDataStore.attrs
+   backends.PydapDataStore.dimensions
+   backends.PydapDataStore.variables
+
+   backends.ScipyDataStore.close
+   backends.ScipyDataStore.encode
+   backends.ScipyDataStore.encode_attribute
+   backends.ScipyDataStore.encode_variable
+   backends.ScipyDataStore.get
+   backends.ScipyDataStore.get_attrs
+   backends.ScipyDataStore.get_dimensions
+   backends.ScipyDataStore.get_encoding
+   backends.ScipyDataStore.get_variables
+   backends.ScipyDataStore.items
+   backends.ScipyDataStore.keys
+   backends.ScipyDataStore.load
+   backends.ScipyDataStore.open_store_variable
+   backends.ScipyDataStore.prepare_variable
+   backends.ScipyDataStore.set_attribute
+   backends.ScipyDataStore.set_attributes
+   backends.ScipyDataStore.set_dimension
+   backends.ScipyDataStore.set_dimensions
+   backends.ScipyDataStore.set_variable
+   backends.ScipyDataStore.set_variables
+   backends.ScipyDataStore.store
+   backends.ScipyDataStore.store_dataset
+   backends.ScipyDataStore.sync
+   backends.ScipyDataStore.values
+   backends.ScipyDataStore.attrs
+   backends.ScipyDataStore.dimensions
+   backends.ScipyDataStore.ds
+   backends.ScipyDataStore.variables
+
+   backends.FileManager.acquire
+   backends.FileManager.acquire_context
+   backends.FileManager.close
+
+   backends.CachingFileManager.acquire
+   backends.CachingFileManager.acquire_context
+   backends.CachingFileManager.close
+
+   backends.DummyFileManager.acquire
+   backends.DummyFileManager.acquire_context
+   backends.DummyFileManager.close
diff --git a/doc/api.rst b/doc/api.rst
index a1fae3deb03..9735eb0c708 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -31,6 +31,8 @@ Top-level functions
    ones_like
    dot
    map_blocks
+   show_versions
+   set_options
 
 Dataset
 =======
@@ -74,7 +76,9 @@ and values given by ``DataArray`` objects.
    Dataset.__setitem__
    Dataset.__delitem__
    Dataset.update
+   Dataset.get
    Dataset.items
+   Dataset.keys
    Dataset.values
 
 Dataset contents
@@ -537,6 +541,15 @@ DataArray methods
    DataArray.unify_chunks
    DataArray.map_blocks
 
+Coordinates objects
+===================
+
+.. autosummary::
+   :toctree: generated/
+
+   core.coordinates.DataArrayCoordinates
+   core.coordinates.DatasetCoordinates
+
 GroupBy objects
 ===============
 
@@ -564,6 +577,16 @@ Rolling objects
    core.rolling.DatasetRolling.reduce
    core.rolling_exp.RollingExp
 
+Coarsen objects
+===============
+
+.. autosummary::
+   :toctree: generated/
+
+   core.rolling.DataArrayCoarsen
+   core.rolling.DatasetCoarsen
+
+
 Resample objects
 ================
 
@@ -593,6 +616,7 @@ Accessors
    :toctree: generated/
 
    core.accessor_dt.DatetimeAccessor
+   core.accessor_dt.TimedeltaAccessor
    core.accessor_str.StringAccessor
 
 Custom Indexes
@@ -627,6 +651,33 @@ Plotting
    plot.pcolormesh
    plot.FacetGrid
 
+Faceting
+--------
+.. autosummary::
+   :toctree: generated/
+
+   plot.FacetGrid
+   plot.FacetGrid.add_colorbar
+   plot.FacetGrid.add_legend
+   plot.FacetGrid.map
+   plot.FacetGrid.map_dataarray
+   plot.FacetGrid.map_dataarray_line
+   plot.FacetGrid.map_dataset
+   plot.FacetGrid.set_axis_labels
+   plot.FacetGrid.set_ticks
+   plot.FacetGrid.set_titles
+   plot.FacetGrid.set_xlabels
+   plot.FacetGrid.set_ylabels
+
+Tutorial
+========
+
+.. autosummary::
+   :toctree: generated/
+
+   tutorial.open_dataset
+   tutorial.load_dataset
+
 Testing
 =======
 
@@ -663,7 +714,7 @@ Advanced API
 
 These backends provide a low-level interface for lazily loading data from
 external file-formats or protocols, and can be manually invoked to create
-arguments for the ``from_store`` and ``dump_to_store`` Dataset methods:
+arguments for the ``load_store`` and ``dump_to_store`` Dataset methods:
 
 .. autosummary::
    :toctree: generated/
@@ -679,6 +730,9 @@ arguments for the ``from_store`` and ``dump_to_store`` Dataset methods:
 Deprecated / Pending Deprecation
 ================================
 
+.. autosummary::
+   :toctree: generated/
+
    Dataset.drop
    DataArray.drop
    Dataset.apply
diff --git a/doc/conf.py b/doc/conf.py
index b6edc07f612..11abda6bb63 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -15,10 +15,16 @@
 
 import datetime
 import os
+import pathlib
 import subprocess
 import sys
 from contextlib import suppress
 
+# make sure the source version is preferred (#3567)
+root = pathlib.Path(__file__).absolute().parent.parent
+os.environ["PYTHONPATH"] = str(root)
+sys.path.insert(0, str(root))
+
 import xarray
 
 allowed_failures = set()
@@ -351,4 +357,5 @@
     "numba": ("https://numba.pydata.org/numba-doc/latest", None),
     "matplotlib": ("https://matplotlib.org", None),
     "dask": ("https://docs.dask.org/en/latest", None),
+    "cftime": ("https://unidata.github.io/cftime", None),
 }
diff --git a/doc/groupby.rst b/doc/groupby.rst
index f5943703765..927e192eb6c 100644
--- a/doc/groupby.rst
+++ b/doc/groupby.rst
@@ -94,7 +94,7 @@ Apply
 ~~~~~
 
 To apply a function to each group, you can use the flexible
-:py:meth:`~xarray.DatasetGroupBy.map` method. The resulting objects are automatically
+:py:meth:`~xarray.core.groupby.DatasetGroupBy.map` method. The resulting objects are automatically
 concatenated back together along the group axis:
 
 .. ipython:: python
@@ -104,8 +104,8 @@ concatenated back together along the group axis:
 
     arr.groupby('letters').map(standardize)
 
-GroupBy objects also have a :py:meth:`~xarray.DatasetGroupBy.reduce` method and
-methods like :py:meth:`~xarray.DatasetGroupBy.mean` as shortcuts for applying an
+GroupBy objects also have a :py:meth:`~xarray.core.groupby.DatasetGroupBy.reduce` method and
+methods like :py:meth:`~xarray.core.groupby.DatasetGroupBy.mean` as shortcuts for applying an
 aggregation function:
 
 .. ipython:: python
diff --git a/doc/howdoi.rst b/doc/howdoi.rst
index 91644ba2718..80266bd3b84 100644
--- a/doc/howdoi.rst
+++ b/doc/howdoi.rst
@@ -22,7 +22,7 @@ How do I ...
    * - change the order of dimensions
      - :py:meth:`DataArray.transpose`, :py:meth:`Dataset.transpose`
    * - remove a variable from my object
-     - :py:meth:`Dataset.drop`, :py:meth:`DataArray.drop`
+     - :py:meth:`Dataset.drop_vars`, :py:meth:`DataArray.drop_vars`
    * - remove dimensions of length 1 or 0
      - :py:meth:`DataArray.squeeze`, :py:meth:`Dataset.squeeze`
    * - remove all variables with a particular dimension
@@ -48,7 +48,7 @@ How do I ...
    * - write xarray objects with complex values to a netCDF file
      - :py:func:`Dataset.to_netcdf`, :py:func:`DataArray.to_netcdf` specifying ``engine="h5netcdf", invalid_netcdf=True``
    * - make xarray objects look like other xarray objects
-     - :py:func:`~xarray.ones_like`, :py:func:`~xarray.zeros_like`, :py:func:`~xarray.full_like`, :py:meth:`Dataset.reindex_like`, :py:meth:`Dataset.interpolate_like`, :py:meth:`Dataset.broadcast_like`, :py:meth:`DataArray.reindex_like`, :py:meth:`DataArray.interpolate_like`, :py:meth:`DataArray.broadcast_like`
+     - :py:func:`~xarray.ones_like`, :py:func:`~xarray.zeros_like`, :py:func:`~xarray.full_like`, :py:meth:`Dataset.reindex_like`, :py:meth:`Dataset.interp_like`, :py:meth:`Dataset.broadcast_like`, :py:meth:`DataArray.reindex_like`, :py:meth:`DataArray.interp_like`, :py:meth:`DataArray.broadcast_like`
    * - replace NaNs with other values
      - :py:meth:`Dataset.fillna`, :py:meth:`Dataset.ffill`, :py:meth:`Dataset.bfill`, :py:meth:`Dataset.interpolate_na`, :py:meth:`DataArray.fillna`, :py:meth:`DataArray.ffill`, :py:meth:`DataArray.bfill`, :py:meth:`DataArray.interpolate_na`
    * - extract the year, month, day or similar from a DataArray of time values
diff --git a/doc/indexing.rst b/doc/indexing.rst
index e8482ac66b3..cfbb84a8343 100644
--- a/doc/indexing.rst
+++ b/doc/indexing.rst
@@ -132,7 +132,7 @@ use them explicitly to slice data. There are two ways to do this:
 
 The arguments to these methods can be any objects that could index the array
 along the dimension given by the keyword, e.g., labels for an individual value,
-Python :py:func:`slice` objects or 1-dimensional arrays.
+Python :py:class:`slice` objects or 1-dimensional arrays.
 
 .. note::
 
diff --git a/doc/interpolation.rst b/doc/interpolation.rst
index 7c750506cf3..63e9a7cd35e 100644
--- a/doc/interpolation.rst
+++ b/doc/interpolation.rst
@@ -48,7 +48,7 @@ array-like, which gives the interpolated result as an array.
     # interpolation
     da.interp(time=[2.5, 3.5])
 
-To interpolate data with a :py:func:`numpy.datetime64` coordinate you can pass a string.
+To interpolate data with a :py:doc:`numpy.datetime64 <reference/arrays.datetime>` coordinate you can pass a string.
 
 .. ipython:: python
 
@@ -128,7 +128,7 @@ It is now possible to safely compute the difference ``other - interpolated``.
 Interpolation methods
 ---------------------
 
-We use :py:func:`scipy.interpolate.interp1d` for 1-dimensional interpolation and
+We use :py:class:`scipy.interpolate.interp1d` for 1-dimensional interpolation and
 :py:func:`scipy.interpolate.interpn` for multi-dimensional interpolation.
 
 The interpolation method can be specified by the optional ``method`` argument.
diff --git a/doc/io.rst b/doc/io.rst
index 8f8a776f73a..e910943236f 100644
--- a/doc/io.rst
+++ b/doc/io.rst
@@ -1,3 +1,4 @@
+.. currentmodule:: xarray
 .. _io:
 
 Reading and writing files
@@ -23,8 +24,8 @@ netCDF
 The recommended way to store xarray data structures is `netCDF`__, which
 is a binary file format for self-described datasets that originated
 in the geosciences. xarray is based on the netCDF data model, so netCDF files
-on disk directly correspond to :py:class:`~xarray.Dataset` objects (more accurately,
-a group in a netCDF file directly corresponds to a to :py:class:`~xarray.Dataset` object.
+on disk directly correspond to :py:class:`Dataset` objects (more accurately,
+a group in a netCDF file directly corresponds to a to :py:class:`Dataset` object.
 See :ref:`io.netcdf_groups` for more.)
 
 NetCDF is supported on almost all platforms, and parsers exist
@@ -47,7 +48,7 @@ read/write netCDF V4 files and use the compression options described below).
 __ https://github.com/Unidata/netcdf4-python
 
 We can save a Dataset to disk using the
-:py:meth:`~Dataset.to_netcdf` method:
+:py:meth:`Dataset.to_netcdf` method:
 
 .. ipython:: python
 
@@ -65,13 +66,13 @@ the ``format`` and ``engine`` arguments.
 .. tip::
 
    Using the `h5netcdf <https://github.com/shoyer/h5netcdf>`_  package
-   by passing ``engine='h5netcdf'`` to :py:meth:`~xarray.open_dataset` can
+   by passing ``engine='h5netcdf'`` to :py:meth:`open_dataset` can
    sometimes be quicker than the default ``engine='netcdf4'`` that uses the
    `netCDF4 <https://github.com/Unidata/netcdf4-python>`_ package.
 
 
 We can load netCDF files to create a new Dataset using
-:py:func:`~xarray.open_dataset`:
+:py:func:`open_dataset`:
 
 .. ipython:: python
 
@@ -79,9 +80,9 @@ We can load netCDF files to create a new Dataset using
     ds_disk
 
 Similarly, a DataArray can be saved to disk using the
-:py:attr:`DataArray.to_netcdf <xarray.DataArray.to_netcdf>` method, and loaded
-from disk using the :py:func:`~xarray.open_dataarray` function. As netCDF files
-correspond to :py:class:`~xarray.Dataset` objects, these functions internally
+:py:meth:`DataArray.to_netcdf` method, and loaded
+from disk using the :py:func:`open_dataarray` function. As netCDF files
+correspond to :py:class:`Dataset` objects, these functions internally
 convert the ``DataArray`` to a ``Dataset`` before saving, and then convert back
 when loading, ensuring that the ``DataArray`` that is loaded is always exactly
 the same as the one that was saved.
@@ -108,9 +109,9 @@ is modified: the original file on disk is never touched.
     xarray's lazy loading of remote or on-disk datasets is often but not always
     desirable. Before performing computationally intense operations, it is
     often a good idea to load a Dataset (or DataArray) entirely into memory by
-    invoking the :py:meth:`~xarray.Dataset.load` method.
+    invoking the :py:meth:`Dataset.load` method.
 
-Datasets have a :py:meth:`~xarray.Dataset.close` method to close the associated
+Datasets have a :py:meth:`Dataset.close` method to close the associated
 netCDF file. However, it's often cleaner to use a ``with`` statement:
 
 .. ipython:: python
@@ -135,17 +136,17 @@ to the original netCDF file, regardless if they exist in the original dataset.
 Groups
 ~~~~~~
 
-NetCDF groups are not supported as part of the :py:class:`~xarray.Dataset` data model.
+NetCDF groups are not supported as part of the :py:class:`Dataset` data model.
 Instead, groups can be loaded individually as Dataset objects.
 To do so, pass a ``group`` keyword argument to the
-:py:func:`~xarray.open_dataset` function. The group can be specified as a path-like
+:py:func:`open_dataset` function. The group can be specified as a path-like
 string, e.g., to access subgroup ``'bar'`` within group ``'foo'`` pass
 ``'/foo/bar'`` as the ``group`` argument.
 In a similar way, the ``group`` keyword argument can be given to the
-:py:meth:`~xarray.Dataset.to_netcdf` method to write to a group
+:py:meth:`Dataset.to_netcdf` method to write to a group
 in a netCDF file.
 When writing multiple groups in one file, pass ``mode='a'`` to
-:py:meth:`~xarray.Dataset.to_netcdf` to ensure that each call does not delete the file.
+:py:meth:`Dataset.to_netcdf` to ensure that each call does not delete the file.
 
 .. _io.encoding:
 
@@ -155,7 +156,7 @@ Reading encoded data
 NetCDF files follow some conventions for encoding datetime arrays (as numbers
 with a "units" attribute) and for packing and unpacking data (as
 described by the "scale_factor" and "add_offset" attributes). If the argument
-``decode_cf=True`` (default) is given to :py:func:`~xarray.open_dataset`, xarray will attempt
+``decode_cf=True`` (default) is given to :py:func:`open_dataset`, xarray will attempt
 to automatically decode the values in the netCDF objects according to
 `CF conventions`_. Sometimes this will fail, for example, if a variable
 has an invalid "units" or "calendar" attribute. For these cases, you can
@@ -164,8 +165,8 @@ turn this decoding off manually.
 .. _CF conventions: http://cfconventions.org/
 
 You can view this encoding information (among others) in the
-:py:attr:`DataArray.encoding <xarray.DataArray.encoding>` and
-:py:attr:`DataArray.encoding <xarray.DataArray.encoding>` attributes:
+:py:attr:`DataArray.encoding` and
+:py:attr:`DataArray.encoding` attributes:
 
 .. ipython::
     :verbatim:
@@ -206,13 +207,13 @@ Reading multi-file datasets
 NetCDF files are often encountered in collections, e.g., with different files
 corresponding to different model runs or one file per timestamp.
 xarray can straightforwardly combine such files into a single Dataset by making use of
-:py:func:`~xarray.concat`, :py:func:`~xarray.merge`, :py:func:`~xarray.combine_nested` and
-:py:func:`~xarray.combine_by_coords`. For details on the difference between these
+:py:func:`concat`, :py:func:`merge`, :py:func:`combine_nested` and
+:py:func:`combine_by_coords`. For details on the difference between these
 functions see :ref:`combining data`.
 
 Xarray includes support for manipulating datasets that don't fit into memory
 with dask_. If you have dask installed, you can open multiple files
-simultaneously in parallel using :py:func:`~xarray.open_mfdataset`::
+simultaneously in parallel using :py:func:`open_mfdataset`::
 
     xr.open_mfdataset('my/files/*.nc', parallel=True)
 
@@ -221,7 +222,7 @@ single xarray dataset.
 It is the recommended way to open multiple files with xarray.
 For more details on parallel reading, see :ref:`combining.multi`, :ref:`dask.io` and a
 `blog post`_ by Stephan Hoyer.
-:py:func:`~xarray.open_mfdataset` takes many kwargs that allow you to
+:py:func:`open_mfdataset` takes many kwargs that allow you to
 control its behaviour (for e.g. ``parallel``, ``combine``, ``compat``, ``join``, ``concat_dim``).
 See its docstring for more details.
 
@@ -246,14 +247,14 @@ See its docstring for more details.
 .. _dask: http://dask.pydata.org
 .. _blog post: http://stephanhoyer.com/2015/06/11/xray-dask-out-of-core-labeled-arrays/
 
-Sometimes multi-file datasets are not conveniently organized for easy use of :py:func:`~xarray.open_mfdataset`.
+Sometimes multi-file datasets are not conveniently organized for easy use of :py:func:`open_mfdataset`.
 One can use the ``preprocess`` argument to provide a function that takes a dataset
 and returns a modified Dataset.
-:py:func:`~xarray.open_mfdataset` will call ``preprocess`` on every dataset
+:py:func:`open_mfdataset` will call ``preprocess`` on every dataset
 (corresponding to each file) prior to combining them.
 
 
-If :py:func:`~xarray.open_mfdataset` does not meet your needs, other approaches are possible.
+If :py:func:`open_mfdataset` does not meet your needs, other approaches are possible.
 The general pattern for parallel reading of multiple files
 using dask, modifying those datasets and then combining into a single ``Dataset`` is::
 
@@ -437,17 +438,31 @@ like ``'days'`` for ``timedelta64`` data. ``calendar`` should be one of the cale
 supported by netCDF4-python: 'standard', 'gregorian', 'proleptic_gregorian' 'noleap',
 '365_day', '360_day', 'julian', 'all_leap', '366_day'.
 
-By default, xarray uses the 'proleptic_gregorian' calendar and units of the smallest time
+By default, xarray uses the ``'proleptic_gregorian'`` calendar and units of the smallest time
 difference between values, with a reference time of the first time value.
 
+
+.. _io.coordinates:
+
+Coordinates
+...........
+
+You can control the ``coordinates`` attribute written to disk by specifying ``DataArray.encoding["coordinates"]``.
+If not specified, xarray automatically sets ``DataArray.encoding["coordinates"]`` to a space-delimited list
+of names of coordinate variables that share dimensions with the ``DataArray`` being written.
+This allows perfect roundtripping of xarray datasets but may not be desirable.
+When an xarray ``Dataset`` contains non-dimensional coordinates that do not share dimensions with any of
+the variables, these coordinate variable names are saved under a "global" ``"coordinates"`` attribute.
+This is not CF-compliant but again facilitates roundtripping of xarray datasets.
+
 Invalid netCDF files
 ~~~~~~~~~~~~~~~~~~~~
 
 The library ``h5netcdf`` allows writing some dtypes (booleans, complex, ...) that aren't 
 allowed in netCDF4 (see
-`h5netcdf documentation <https://github.com/shoyer/h5netcdf#invalid-netcdf-files)>`_.
-This feature is availabe through :py:func:`DataArray.to_netcdf` and
-:py:func:`Dataset.to_netcdf` when used with ``engine="h5netcdf"``
+`h5netcdf documentation <https://github.com/shoyer/h5netcdf#invalid-netcdf-files>`_).
+This feature is availabe through :py:meth:`DataArray.to_netcdf` and
+:py:meth:`Dataset.to_netcdf` when used with ``engine="h5netcdf"``
 and currently raises a warning unless ``invalid_netcdf=True`` is set:
 
 .. ipython:: python
@@ -480,7 +495,7 @@ The Iris_ tool allows easy reading of common meteorological and climate model fo
 (including GRIB and UK MetOffice PP files) into ``Cube`` objects which are in many ways very
 similar to ``DataArray`` objects, while enforcing a CF-compliant data model. If iris is
 installed xarray can convert a ``DataArray`` into a ``Cube`` using
-:py:meth:`~xarray.DataArray.to_iris`:
+:py:meth:`DataArray.to_iris`:
 
 .. ipython:: python
 
@@ -492,7 +507,7 @@ installed xarray can convert a ``DataArray`` into a ``Cube`` using
     cube
 
 Conversely, we can create a new ``DataArray`` object from a ``Cube`` using
-:py:meth:`~xarray.DataArray.from_iris`:
+:py:meth:`DataArray.from_iris`:
 
 .. ipython:: python
 
@@ -594,7 +609,7 @@ over the network until we look at particular values:
 .. image:: _static/opendap-prism-tmax.png
 
 Some servers require authentication before we can access the data. For this
-purpose we can explicitly create a :py:class:`~xarray.backends.PydapDataStore`
+purpose we can explicitly create a :py:class:`backends.PydapDataStore`
 and pass in a `Requests`__ session object. For example for
 HTTP Basic authentication::
 
@@ -657,8 +672,8 @@ this version of xarray will work in future versions.
 
   When pickling an object opened from a NetCDF file, the pickle file will
   contain a reference to the file on disk. If you want to store the actual
-  array values, load it into memory first with :py:meth:`~xarray.Dataset.load`
-  or :py:meth:`~xarray.Dataset.compute`.
+  array values, load it into memory first with :py:meth:`Dataset.load`
+  or :py:meth:`Dataset.compute`.
 
 .. _dictionary io:
 
@@ -666,7 +681,7 @@ Dictionary
 ----------
 
 We can convert a ``Dataset`` (or a ``DataArray``) to a dict using
-:py:meth:`~xarray.Dataset.to_dict`:
+:py:meth:`Dataset.to_dict`:
 
 .. ipython:: python
 
@@ -674,7 +689,7 @@ We can convert a ``Dataset`` (or a ``DataArray``) to a dict using
     d
 
 We can create a new xarray object from a dict using
-:py:meth:`~xarray.Dataset.from_dict`:
+:py:meth:`Dataset.from_dict`:
 
 .. ipython:: python
 
@@ -709,7 +724,7 @@ Rasterio
 
 GeoTIFFs and other gridded raster datasets can be opened using `rasterio`_, if
 rasterio is installed. Here is an example of how to use
-:py:func:`~xarray.open_rasterio` to read one of rasterio's `test files`_:
+:py:func:`open_rasterio` to read one of rasterio's `test files`_:
 
 .. ipython::
     :verbatim:
@@ -768,8 +783,7 @@ Xarray's Zarr backend allows xarray to leverage these capabilities.
 Xarray can't open just any zarr dataset, because xarray requires special
 metadata (attributes) describing the dataset dimensions and coordinates.
 At this time, xarray can only open zarr datasets that have been written by
-xarray. To write a dataset with zarr, we use the
-:py:attr:`Dataset.to_zarr <xarray.Dataset.to_zarr>` method.
+xarray. To write a dataset with zarr, we use the :py:attr:`Dataset.to_zarr` method.
 To write to a local directory, we pass a path to a directory
 
 .. ipython:: python
@@ -816,7 +830,7 @@ can be omitted as it will internally be set to ``'a'``.
 To store variable length strings use ``dtype=object``.
 
 To read back a zarr dataset that has been created this way, we use the
-:py:func:`~xarray.open_zarr` method:
+:py:func:`open_zarr` method:
 
 .. ipython:: python
 
@@ -885,12 +899,12 @@ opening the store. (For more information on this feature, consult the
 If you have zarr version 2.3 or greater, xarray can write and read stores
 with consolidated metadata. To write consolidated metadata, pass the
 ``consolidated=True`` option to the
-:py:attr:`Dataset.to_zarr <xarray.Dataset.to_zarr>` method::
+:py:attr:`Dataset.to_zarr` method::
 
     ds.to_zarr('foo.zarr', consolidated=True)
 
 To read a consolidated store, pass the ``consolidated=True`` option to
-:py:func:`~xarray.open_zarr`::
+:py:func:`open_zarr`::
 
     ds = xr.open_zarr('foo.zarr', consolidated=True)
 
@@ -912,7 +926,7 @@ GRIB format via cfgrib
 
 xarray supports reading GRIB files via ECMWF cfgrib_ python driver and ecCodes_
 C-library, if they are installed. To open a GRIB file supply ``engine='cfgrib'``
-to :py:func:`~xarray.open_dataset`:
+to :py:func:`open_dataset`:
 
 .. ipython::
     :verbatim:
@@ -934,7 +948,7 @@ Formats supported by PyNIO
 
 xarray can also read GRIB, HDF4 and other file formats supported by PyNIO_,
 if PyNIO is installed. To use PyNIO to read such files, supply
-``engine='pynio'`` to :py:func:`~xarray.open_dataset`.
+``engine='pynio'`` to :py:func:`open_dataset`.
 
 We recommend installing PyNIO via conda::
 
@@ -956,7 +970,7 @@ identify readers heuristically, or format can be specified via a key in
 `backend_kwargs`.
 
 To use PseudoNetCDF to read such files, supply
-``engine='pseudonetcdf'`` to :py:func:`~xarray.open_dataset`.
+``engine='pseudonetcdf'`` to :py:func:`open_dataset`.
 
 Add ``backend_kwargs={'format': '<format name>'}`` where `<format name>`
 options are listed on the PseudoNetCDF page.
diff --git a/doc/pandas.rst b/doc/pandas.rst
index 72abf6609f6..a84c89ab938 100644
--- a/doc/pandas.rst
+++ b/doc/pandas.rst
@@ -1,3 +1,4 @@
+.. currentmodule:: xarray
 .. _pandas:
 
 ===================
@@ -32,9 +33,9 @@ Tabular data is easiest to work with when it meets the criteria for
 
 __ http://www.jstatsoft.org/v59/i10/
 
-In this "tidy data" format, we can represent any :py:class:`~xarray.Dataset` and
-:py:class:`~xarray.DataArray` in terms of :py:class:`pandas.DataFrame` and
-:py:class:`pandas.Series`, respectively (and vice-versa). The representation
+In this "tidy data" format, we can represent any :py:class:`Dataset` and
+:py:class:`DataArray` in terms of :py:class:`~pandas.DataFrame` and
+:py:class:`~pandas.Series`, respectively (and vice-versa). The representation
 works by flattening non-coordinates to 1D, and turning the tensor product of
 coordinate indexes into a :py:class:`pandas.MultiIndex`.
 
@@ -42,7 +43,7 @@ Dataset and DataFrame
 ---------------------
 
 To convert any dataset to a ``DataFrame`` in tidy form, use the
-:py:meth:`Dataset.to_dataframe() <xarray.Dataset.to_dataframe>` method:
+:py:meth:`Dataset.to_dataframe()` method:
 
 .. ipython:: python
 
@@ -61,11 +62,11 @@ use ``DataFrame`` methods like :py:meth:`~pandas.DataFrame.reset_index`,
 :py:meth:`~pandas.DataFrame.stack` and :py:meth:`~pandas.DataFrame.unstack`.
 
 For datasets containing dask arrays where the data should be lazily loaded, see the
-:py:meth:`Dataset.to_dask_dataframe() <xarray.Dataset.to_dask_dataframe>` method.
+:py:meth:`Dataset.to_dask_dataframe()` method.
 
 To create a ``Dataset`` from a ``DataFrame``, use the
-:py:meth:`~xarray.Dataset.from_dataframe` class method or the equivalent
-:py:meth:`pandas.DataFrame.to_xarray <DataFrame.to_xarray>` method:
+:py:meth:`Dataset.from_dataframe` class method or the equivalent
+:py:meth:`pandas.DataFrame.to_xarray` method:
 
 .. ipython:: python
 
@@ -83,7 +84,7 @@ DataArray and Series
 --------------------
 
 ``DataArray`` objects have a complementary representation in terms of a
-:py:class:`pandas.Series`. Using a Series preserves the ``Dataset`` to
+:py:class:`~pandas.Series`. Using a Series preserves the ``Dataset`` to
 ``DataArray`` relationship, because ``DataFrames`` are dict-like containers
 of ``Series``. The methods are very similar to those for working with
 DataFrames:
@@ -109,7 +110,7 @@ Multi-dimensional data
 Tidy data is great, but it sometimes you want to preserve dimensions instead of
 automatically stacking them into a ``MultiIndex``.
 
-:py:meth:`DataArray.to_pandas() <xarray.DataArray.to_pandas>` is a shortcut that
+:py:meth:`DataArray.to_pandas()` is a shortcut that
 lets you convert a DataArray directly into a pandas object with the same
 dimensionality (i.e., a 1D array is converted to a :py:class:`~pandas.Series`,
 2D to :py:class:`~pandas.DataFrame` and 3D to ``pandas.Panel``):
@@ -122,7 +123,7 @@ dimensionality (i.e., a 1D array is converted to a :py:class:`~pandas.Series`,
     df
 
 To perform the inverse operation of converting any pandas objects into a data
-array with the same shape, simply use the :py:class:`~xarray.DataArray`
+array with the same shape, simply use the :py:class:`DataArray`
 constructor:
 
 .. ipython:: python
@@ -143,7 +144,7 @@ preserve all use of multi-indexes:
 
 However, you will need to set dimension names explicitly, either with the
 ``dims`` argument on in the ``DataArray`` constructor or by calling
-:py:class:`~xarray.Dataset.rename` on the new object.
+:py:class:`~Dataset.rename` on the new object.
 
 .. _panel transition:
 
diff --git a/doc/plotting.rst b/doc/plotting.rst
index 270988b99de..ea9816780a7 100644
--- a/doc/plotting.rst
+++ b/doc/plotting.rst
@@ -1,3 +1,4 @@
+.. currentmodule:: xarray
 .. _plotting:
 
 Plotting
@@ -10,8 +11,8 @@ Labeled data enables expressive computations. These same
 labels can also be used to easily create informative plots.
 
 xarray's plotting capabilities are centered around
-:py:class:`xarray.DataArray` objects.
-To plot :py:class:`xarray.Dataset` objects
+:py:class:`DataArray` objects.
+To plot :py:class:`Dataset` objects
 simply access the relevant DataArrays, ie ``dset['var1']``.
 Dataset specific plotting routines are also available (see :ref:`plot-dataset`).
 Here we focus mostly on arrays 2d or larger. If your data fits
@@ -94,7 +95,7 @@ One Dimension
  Simple Example
 ================
 
-The simplest way to make a plot is to call the :py:func:`xarray.DataArray.plot()` method.
+The simplest way to make a plot is to call the :py:func:`DataArray.plot()` method.
 
 .. ipython:: python
 
@@ -227,7 +228,7 @@ It is required to explicitly specify either
 
 Thus, we could have made the previous plot by specifying ``hue='lat'`` instead of ``x='time'``.
 If required, the automatic legend can be turned off using ``add_legend=False``. Alternatively,
-``hue`` can be passed directly to :py:func:`xarray.plot` as `air.isel(lon=10, lat=[19,21,22]).plot(hue='lat')`.
+``hue`` can be passed directly to :py:func:`xarray.plot.line` as `air.isel(lon=10, lat=[19,21,22]).plot.line(hue='lat')`.
 
 
 ========================
@@ -256,7 +257,7 @@ made using 1D data.
 
 The argument ``where`` defines where the steps should be placed, options are
 ``'pre'`` (default), ``'post'``, and ``'mid'``. This is particularly handy
-when plotting data grouped with :py:func:`xarray.Dataset.groupby_bins`.
+when plotting data grouped with :py:meth:`Dataset.groupby_bins`.
 
 .. ipython:: python
 
@@ -295,7 +296,7 @@ Two Dimensions
  Simple Example
 ================
 
-The default method :py:meth:`xarray.DataArray.plot` calls :py:func:`xarray.plot.pcolormesh` by default when the data is two-dimensional.
+The default method :py:meth:`DataArray.plot` calls :py:func:`xarray.plot.pcolormesh` by default when the data is two-dimensional.
 
 .. ipython:: python
 
@@ -487,6 +488,7 @@ Faceting here refers to splitting an array along one or two dimensions and
 plotting each group.
 xarray's basic plotting is useful for plotting two dimensional arrays. What
 about three or four dimensional arrays? That's where facets become helpful.
+The general approach to plotting here is called “small multiples”, where the same kind of plot is repeated multiple times, and the specific use of small multiples to display the same relationship conditioned on one ore more other variables is often called a “trellis plot”.
 
 Consider the temperature data set. There are 4 observations per day for two
 years which makes for 2920 values along the time dimension.
@@ -572,8 +574,9 @@ Faceted plotting supports other arguments common to xarray 2d plots.
  FacetGrid Objects
 ===================
 
-:py:class:`xarray.plot.FacetGrid` is used to control the behavior of the
-multiple plots.
+The object returned, ``g`` in the above examples, is a :py:class:`~xarray.plot.FacetGrid` object
+that links a :py:class:`DataArray` to a matplotlib figure with a particular structure.
+This object can be used to control the behavior of the multiple plots.
 It borrows an API and code from `Seaborn's FacetGrid
 <http://seaborn.pydata.org/tutorial/axis_grids.html>`_.
 The structure is contained within the ``axes`` and ``name_dicts``
@@ -609,6 +612,13 @@ they have been plotted.
     @savefig plot_facet_iterator.png
     plt.draw()
 
+
+:py:class:`~xarray.plot.FacetGrid` objects have methods that let you customize the automatically generated
+axis labels, axis ticks and plot titles. See :py:meth:`~xarray.plot.FacetGrid.set_titles`,
+:py:meth:`~xarray.plot.FacetGrid.set_xlabels`, :py:meth:`~xarray.plot.FacetGrid.set_ylabels` and
+:py:meth:`~xarray.plot.FacetGrid.set_ticks` for more information.
+Plotting functions can be applied to each subset of the data by calling :py:meth:`~xarray.plot.FacetGrid.map_dataarray` or to each subplot by calling :py:meth:`~xarray.plot.FacetGrid.map`.
+
 TODO: add an example of using the ``map`` method to plot dataset variables
 (e.g., with ``plt.quiver``).
 
diff --git a/doc/related-projects.rst b/doc/related-projects.rst
index fd77ce56a0a..3188751366f 100644
--- a/doc/related-projects.rst
+++ b/doc/related-projects.rst
@@ -25,6 +25,8 @@ Geosciences
 - `PyGDX <https://pygdx.readthedocs.io/en/latest/>`_: Python 3 package for
   accessing data stored in GAMS Data eXchange (GDX) files. Also uses a custom
   subclass.
+- `pyinterp <https://pangeo-pyinterp.readthedocs.io/en/latest/>`_: Python 3 package for interpolating geo-referenced data used in the field of geosciences.
+- `pyXpcm <https://pyxpcm.readthedocs.io>`_: xarray-based Profile Classification Modelling (PCM), mostly for ocean data.
 - `Regionmask <https://regionmask.readthedocs.io/>`_: plotting and creation of masks of spatial regions
 - `rioxarray <https://corteva.github.io/rioxarray>`_: geospatial xarray extension powered by rasterio
 - `salem <https://salem.readthedocs.io>`_: Adds geolocalised subsetting, masking, and plotting operations to xarray's data structures via accessors.
diff --git a/doc/terminology.rst b/doc/terminology.rst
index d1265e4da9d..ab6d856920a 100644
--- a/doc/terminology.rst
+++ b/doc/terminology.rst
@@ -1,3 +1,4 @@
+.. currentmodule:: xarray
 .. _terminology:
 
 Terminology
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 1d239e18fcd..00d1c50780e 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -25,32 +25,69 @@ Breaking changes
 
 New Features
 ~~~~~~~~~~~~
-
+- Implement :py:func:`median` and :py:func:`nanmedian` for dask arrays. This works by rechunking
+  to a single chunk along all reduction axes. (:issue:`2999`).
+  By `Deepak Cherian <https://github.com/dcherian>`_.
+- :py:func:`xarray.concat` now preserves attributes from the first Variable.
+  (:issue:`2575`, :issue:`2060`, :issue:`1614`)
+  By `Deepak Cherian <https://github.com/dcherian>`_.
+- :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile` and ``GroupBy.quantile``
+  now work with dask Variables.
+  By `Deepak Cherian <https://github.com/dcherian>`_.
+- Added the ``count`` reduction method to both :py:class:`~core.rolling.DatasetCoarsen`
+  and :py:class:`~core.rolling.DataArrayCoarsen` objects. (:pull:`3500`)
+  By `Deepak Cherian <https://github.com/dcherian>`_
+- Extend :py:class:`core.accessor_dt.DatetimeAccessor` properties 
+  and support `.dt` accessor for timedelta 
+  via :py:class:`core.accessor_dt.TimedeltaAccessor` (:pull:`3612`)
+  By `Anderson Banihirwe <https://github.com/andersy005>`_.
 
 Bug fixes
 ~~~~~~~~~
-
+- Fix :py:meth:`xarray.combine_by_coords` to allow for combining incomplete
+  hypercubes of Datasets (:issue:`3648`).  By `Ian Bolliger
+  <https://github.com/bolliger32>`_.
+- Fix :py:meth:`xarray.combine_by_coords` when combining cftime coordinates
+  which span long time intervals (:issue:`3535`).  By `Spencer Clark
+  <https://github.com/spencerkclark>`_.
+- Fix plotting with transposed 2D non-dimensional coordinates. (:issue:`3138`, :pull:`3441`)
+  By `Deepak Cherian <https://github.com/dcherian>`_.
+- :py:meth:`~xarray.plot.FacetGrid.set_titles` can now replace existing row titles of a
+  :py:class:`~xarray.plot.FacetGrid` plot. In addition :py:class:`~xarray.plot.FacetGrid` gained
+  two new attributes: :py:attr:`~xarray.plot.FacetGrid.col_labels` and
+  :py:attr:`~xarray.plot.FacetGrid.row_labels` contain matplotlib Text handles for both column and
+  row labels. These can be used to manually change the labels.
+  By `Deepak Cherian <https://github.com/dcherian>`_.
+- Fix issue with Dask-backed datasets raising a ``KeyError`` on some computations involving ``map_blocks`` (:pull:`3598`)
+  By `Tom Augspurger <https://github.com/TomAugspurger>`_.
+- Ensure :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile` issue the correct error
+  when ``q`` is out of bounds (:issue:`3634`) by `Mathias Hauser <https://github.com/mathause>`_.
 
 Documentation
 ~~~~~~~~~~~~~
 - Switch doc examples to use nbsphinx and replace sphinx_gallery with
   notebook.
   (:pull:`3105`, :pull:`3106`, :pull:`3121`)
-  By `Ryan Abernathey <https://github.com/rabernat>`
+  By `Ryan Abernathey <https://github.com/rabernat>`_
 - Added example notebook demonstrating use of xarray with Regional Ocean
   Modeling System (ROMS) ocean hydrodynamic model output.
   (:pull:`3116`).
-  By `Robert Hetland <https://github.com/hetland>`
+  By `Robert Hetland <https://github.com/hetland>`_
 - Added example notebook demonstrating the visualization of ERA5 GRIB
   data. (:pull:`3199`)
-  By `Zach Bruick <https://github.com/zbruick>` and
-  `Stephan Siemen <https://github.com/StephanSiemen>`
+  By `Zach Bruick <https://github.com/zbruick>`_ and
+  `Stephan Siemen <https://github.com/StephanSiemen>`_
+- Added examples for :py:meth:`DataArray.quantile`, :py:meth:`Dataset.quantile` and
+  ``GroupBy.quantile``. (:pull:`3576`)
+  By `Justus Magin <https://github.com/keewis>`_.
 
 Internal Changes
 ~~~~~~~~~~~~~~~~
-
-
-- Removed internal method ``Dataset._from_vars_and_coord_names``, 
+- 2x to 5x speed boost (on small arrays) for :py:meth:`Dataset.isel`,
+  :py:meth:`DataArray.isel`, and :py:meth:`DataArray.__getitem__` when indexing by int,
+  slice, list of int, scalar ndarray, or 1-dimensional ndarray.
+  (:pull:`3533`) by `Guido Imperiale <https://github.com/crusaderky>`_.
+- Removed internal method ``Dataset._from_vars_and_coord_names``,
   which was dominated by ``Dataset._construct_direct``. (:pull:`3565`)
   By `Maximilian Roos <https://github.com/max-sixty>`_
 
@@ -77,8 +114,8 @@ Breaking changes
 
 New Features
 ~~~~~~~~~~~~
-- Added the ``sparse`` option to :py:meth:`~xarray.DataArray.unstack`, 
-  :py:meth:`~xarray.Dataset.unstack`, :py:meth:`~xarray.DataArray.reindex`, 
+- Added the ``sparse`` option to :py:meth:`~xarray.DataArray.unstack`,
+  :py:meth:`~xarray.Dataset.unstack`, :py:meth:`~xarray.DataArray.reindex`,
   :py:meth:`~xarray.Dataset.reindex` (:issue:`3518`).
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 - Added the ``fill_value`` option to :py:meth:`DataArray.unstack` and
@@ -88,13 +125,13 @@ New Features
   :py:meth:`~xarray.Dataset.interpolate_na`. This controls the maximum size of the data
   gap that will be filled by interpolation. By `Deepak Cherian <https://github.com/dcherian>`_.
 - Added :py:meth:`Dataset.drop_sel` & :py:meth:`DataArray.drop_sel` for dropping labels.
-  :py:meth:`Dataset.drop_vars` & :py:meth:`DataArray.drop_vars` have been added for 
+  :py:meth:`Dataset.drop_vars` & :py:meth:`DataArray.drop_vars` have been added for
   dropping variables (including coordinates). The existing :py:meth:`Dataset.drop` &
   :py:meth:`DataArray.drop` methods remain as a backward compatible
   option for dropping either labels or variables, but using the more specific methods is encouraged.
   (:pull:`3475`)
   By `Maximilian Roos <https://github.com/max-sixty>`_
-- Added :py:meth:`Dataset.map` & :py:meth:`GroupBy.map` & :py:meth:`Resample.map` for 
+- Added :py:meth:`Dataset.map` & ``GroupBy.map`` & ``Resample.map`` for
   mapping / applying a function over each item in the collection, reflecting the widely used
   and least surprising name for this operation.
   The existing ``apply`` methods remain for backward compatibility, though using the ``map``
@@ -113,7 +150,7 @@ New Features
 - :py:func:`xarray.dot`, and :py:meth:`DataArray.dot` now support the
   ``dims=...`` option to sum over the union of dimensions of all input arrays
   (:issue:`3423`) by `Mathias Hauser <https://github.com/mathause>`_.
-- Added new :py:meth:`Dataset._repr_html_` and :py:meth:`DataArray._repr_html_` to improve
+- Added new ``Dataset._repr_html_`` and ``DataArray._repr_html_`` to improve
   representation of objects in Jupyter. By default this feature is turned off
   for now. Enable it with ``xarray.set_options(display_style="html")``.
   (:pull:`3425`) by `Benoit Bovy <https://github.com/benbovy>`_ and
@@ -122,22 +159,26 @@ New Features
   <https://docs.dask.org/en/latest/custom-collections.html#deterministic-hashing>`_
   for xarray objects. Note that xarray objects with a dask.array backend already used
   deterministic hashing in previous releases; this change implements it when whole
-  xarray objects are embedded in a dask graph, e.g. when :py:meth:`DataArray.map` is
+  xarray objects are embedded in a dask graph, e.g. when :py:meth:`DataArray.map_blocks` is
   invoked. (:issue:`3378`, :pull:`3446`, :pull:`3515`)
   By `Deepak Cherian <https://github.com/dcherian>`_ and
   `Guido Imperiale <https://github.com/crusaderky>`_.
-- Add the documented-but-missing :py:meth:`DatasetGroupBy.quantile`.
+- Add the documented-but-missing :py:meth:`~core.groupby.DatasetGroupBy.quantile`.
+- xarray now respects the ``DataArray.encoding["coordinates"]`` attribute when writing to disk.
+  See :ref:`io.coordinates` for more. (:issue:`3351`, :pull:`3487`)
+  By `Deepak Cherian <https://github.com/dcherian>`_.
+- Add the documented-but-missing :py:meth:`~core.groupby.DatasetGroupBy.quantile`.
   (:issue:`3525`, :pull:`3527`). By `Justus Magin <https://github.com/keewis>`_.
 
 Bug fixes
 ~~~~~~~~~
-- Ensure an index of type ``CFTimeIndex`` is not converted to a ``DatetimeIndex`` when 
+- Ensure an index of type ``CFTimeIndex`` is not converted to a ``DatetimeIndex`` when
   calling :py:meth:`Dataset.rename`, :py:meth:`Dataset.rename_dims` and :py:meth:`Dataset.rename_vars`.
   By `Mathias Hauser <https://github.com/mathause>`_. (:issue:`3522`).
 - Fix a bug in :py:meth:`DataArray.set_index` in case that an existing dimension becomes a level
   variable of MultiIndex. (:pull:`3520`). By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 - Harmonize ``_FillValue``, ``missing_value`` during encoding and decoding steps. (:pull:`3502`)
-  By `Anderson Banihirwe <https://github.com/andersy005>`_. 
+  By `Anderson Banihirwe <https://github.com/andersy005>`_.
 - Fix regression introduced in v0.14.0 that would cause a crash if dask is installed
   but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle <https://github.com/rdoyle45>`_
 - Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`).
@@ -152,7 +193,7 @@ Bug fixes
 - Rolling reduction operations no longer compute dask arrays by default. (:issue:`3161`).
   In addition, the ``allow_lazy`` kwarg to ``reduce`` is deprecated.
   By `Deepak Cherian <https://github.com/dcherian>`_.
-- Fix :py:meth:`GroupBy.reduce` when reducing over multiple dimensions.
+- Fix ``GroupBy.reduce`` when reducing over multiple dimensions.
   (:issue:`3402`). By `Deepak Cherian <https://github.com/dcherian>`_
 - Allow appending datetime and bool data variables to zarr stores.
   (:issue:`3480`). By `Akihiro Matsukawa <https://github.com/amatsukawa>`_.
@@ -180,6 +221,7 @@ Documentation
 
 Internal Changes
 ~~~~~~~~~~~~~~~~
+
 - Added integration tests against `pint <https://pint.readthedocs.io/>`_.
   (:pull:`3238`, :pull:`3447`, :pull:`3493`, :pull:`3508`)
   by `Justus Magin <https://github.com/keewis>`_.
@@ -201,7 +243,7 @@ Internal Changes
 - Enable type checking on default sentinel values (:pull:`3472`)
   By `Maximilian Roos <https://github.com/max-sixty>`_
 
-- Add :py:meth:`Variable._replace` for simpler replacing of a subset of attributes (:pull:`3472`)
+- Add ``Variable._replace`` for simpler replacing of a subset of attributes (:pull:`3472`)
   By `Maximilian Roos <https://github.com/max-sixty>`_
 
 .. _whats-new.0.14.0:
@@ -257,7 +299,7 @@ New functions/methods
 Enhancements
 ~~~~~~~~~~~~
 
-- :py:class:`~xarray.core.GroupBy` enhancements. By `Deepak Cherian <https://github.com/dcherian>`_.
+- ``core.groupby.GroupBy`` enhancements. By `Deepak Cherian <https://github.com/dcherian>`_.
 
   - Added a repr (:pull:`3344`). Example::
 
@@ -292,7 +334,7 @@ Bug fixes
 - Fix error in concatenating unlabeled dimensions (:pull:`3362`).
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - Warn if the ``dim`` kwarg is passed to rolling operations. This is redundant since a dimension is
-  specified when the :py:class:`DatasetRolling` or :py:class:`DataArrayRolling` object is created.
+  specified when the :py:class:`~core.rolling.DatasetRolling` or :py:class:`~core.rolling.DataArrayRolling` object is created.
   (:pull:`3362`). By `Deepak Cherian <https://github.com/dcherian>`_.
 
 Documentation
@@ -365,7 +407,7 @@ Breaking changes
 - Reindexing with variables of a different dimension now raise an error (previously deprecated)
 - ``xarray.broadcast_array`` is removed (previously deprecated in favor of
   :py:func:`~xarray.broadcast`)
-- :py:meth:`Variable.expand_dims` is removed (previously deprecated in favor of
+- ``Variable.expand_dims`` is removed (previously deprecated in favor of
   :py:meth:`Variable.set_dims`)
 
 New functions/methods
@@ -450,8 +492,7 @@ Enhancements
 
 - ``xarray.Dataset.drop`` now supports keyword arguments; dropping index
   labels by using both ``dim`` and ``labels`` or using a
-  :py:class:`~xarray.core.coordinates.DataArrayCoordinates` object are
-  deprecated (:issue:`2910`).
+  :py:class:`~core.coordinates.DataArrayCoordinates` object are deprecated (:issue:`2910`).
   By `Gregory Gundersen <https://github.com/gwgundersen>`_.
 
 - Added examples of :py:meth:`Dataset.set_index` and
@@ -599,7 +640,7 @@ New functions/methods
   By `Alan Brammer <https://github.com/abrammer>`_ and
   `Ryan May <https://github.com/dopplershift>`_.
 
-- :py:meth:`~xarray.core.GroupBy.quantile` is now a method of ``GroupBy``
+- ``GroupBy.quantile`` is now a method of ``GroupBy``
   objects  (:issue:`3018`).
   By `David Huard <https://github.com/huard>`_.
 
@@ -1141,7 +1182,7 @@ Announcements of note:
   for more details.
 - We have a new :doc:`roadmap` that outlines our future development plans.
 
-- `Dataset.apply` now properly documents the way `func` is called.
+- ``Dataset.apply`` now properly documents the way `func` is called.
   By `Matti Eskelinen <https://github.com/maaleske>`_.
 
 Enhancements
@@ -1573,7 +1614,7 @@ Backwards incompatible changes
 Enhancements
 ~~~~~~~~~~~~
 
-- Added :py:func:`~xarray.dot`, equivalent to :py:func:`np.einsum`.
+- Added :py:func:`~xarray.dot`, equivalent to :py:func:`numpy.einsum`.
   Also, :py:func:`~xarray.DataArray.dot` now supports ``dims`` option,
   which specifies the dimensions to sum over.
   (:issue:`1951`)
@@ -1758,7 +1799,7 @@ Bug fixes
   coordinates of target, destination and keys. If there are any conflict among
   these coordinates, ``IndexError`` will be raised.
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
-- Properly point :py:meth:`DataArray.__dask_scheduler__` to
+- Properly point ``DataArray.__dask_scheduler__`` to
   ``dask.threaded.get``.  By `Matthew Rocklin <https://github.com/mrocklin>`_.
 - Bug fixes in :py:meth:`DataArray.plot.imshow`: all-NaN arrays and arrays
   with size one in some dimension can now be plotted, which is good for
@@ -1970,7 +2011,7 @@ Enhancements
 
 - Support for :py:class:`pathlib.Path` objects added to
   :py:func:`~xarray.open_dataset`, :py:func:`~xarray.open_mfdataset`,
-  :py:func:`~xarray.to_netcdf`, and :py:func:`~xarray.save_mfdataset`
+  ``xarray.to_netcdf``, and :py:func:`~xarray.save_mfdataset`
   (:issue:`799`):
 
   .. ipython::
@@ -2378,7 +2419,7 @@ Enhancements
   By `Stephan Hoyer <https://github.com/shoyer>`_ and
   `Phillip J. Wolfram <https://github.com/pwolfram>`_.
 
-- New aggregation on rolling objects :py:meth:`DataArray.rolling(...).count()`
+- New aggregation on rolling objects :py:meth:`~core.rolling.DataArrayRolling.count`
   which providing a rolling count of valid values (:issue:`1138`).
 
 Bug fixes
diff --git a/readthedocs.yml b/readthedocs.yml
index 6429780e7d7..9ed8d28eaf2 100644
--- a/readthedocs.yml
+++ b/readthedocs.yml
@@ -1,8 +1,13 @@
+version: 2
+
 build:
     image: latest
+
 conda:
-    file: ci/requirements/doc.yml
+    environment: ci/requirements/doc.yml
+
 python:
     version: 3.7
-    setup_py_install: true
+    install: []
+
 formats: []
diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py
index 8471ed1a558..eeb68508527 100644
--- a/xarray/coding/cftime_offsets.py
+++ b/xarray/coding/cftime_offsets.py
@@ -42,6 +42,7 @@
 
 import re
 from datetime import timedelta
+from distutils.version import LooseVersion
 from functools import partial
 from typing import ClassVar, Optional
 
@@ -50,7 +51,6 @@
 from ..core.pdcompat import count_not_none
 from .cftimeindex import CFTimeIndex, _parse_iso8601_with_reso
 from .times import format_cftime_datetime
-from distutils.version import LooseVersion
 
 
 def get_date_type(calendar):
diff --git a/xarray/conventions.py b/xarray/conventions.py
index a83b4b31c17..a8b9906c153 100644
--- a/xarray/conventions.py
+++ b/xarray/conventions.py
@@ -5,7 +5,7 @@
 import pandas as pd
 
 from .coding import strings, times, variables
-from .coding.variables import SerializationWarning
+from .coding.variables import SerializationWarning, pop_to
 from .core import duck_array_ops, indexing
 from .core.common import contains_cftime_datetimes
 from .core.pycompat import dask_array_type
@@ -660,34 +660,46 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names):
                 and set(target_dims) <= set(v.dims)
             ):
                 variable_coordinates[k].add(coord_name)
-                global_coordinates.discard(coord_name)
 
     variables = {k: v.copy(deep=False) for k, v in variables.items()}
 
-    # These coordinates are saved according to CF conventions
-    for var_name, coord_names in variable_coordinates.items():
-        attrs = variables[var_name].attrs
-        if "coordinates" in attrs:
+    # keep track of variable names written to file under the "coordinates" attributes
+    written_coords = set()
+    for name, var in variables.items():
+        encoding = var.encoding
+        attrs = var.attrs
+        if "coordinates" in attrs and "coordinates" in encoding:
             raise ValueError(
-                "cannot serialize coordinates because variable "
-                "%s already has an attribute 'coordinates'" % var_name
+                f"'coordinates' found in both attrs and encoding for variable {name!r}."
             )
-        attrs["coordinates"] = " ".join(map(str, coord_names))
+
+        # this will copy coordinates from encoding to attrs if "coordinates" in attrs
+        # after the next line, "coordinates" is never in encoding
+        # we get support for attrs["coordinates"] for free.
+        coords_str = pop_to(encoding, attrs, "coordinates")
+        if not coords_str and variable_coordinates[name]:
+            attrs["coordinates"] = " ".join(map(str, variable_coordinates[name]))
+        if "coordinates" in attrs:
+            written_coords.update(attrs["coordinates"].split())
 
     # These coordinates are not associated with any particular variables, so we
     # save them under a global 'coordinates' attribute so xarray can roundtrip
     # the dataset faithfully. Because this serialization goes beyond CF
     # conventions, only do it if necessary.
     # Reference discussion:
-    # http://mailman.cgd.ucar.edu/pipermail/cf-metadata/2014/057771.html
+    # http://mailman.cgd.ucar.edu/pipermail/cf-metadata/2014/007571.html
+    global_coordinates.difference_update(written_coords)
     if global_coordinates:
         attributes = dict(attributes)
         if "coordinates" in attributes:
-            raise ValueError(
-                "cannot serialize coordinates because the global "
-                "attribute 'coordinates' already exists"
+            warnings.warn(
+                f"cannot serialize global coordinates {global_coordinates!r} because the global "
+                f"attribute 'coordinates' already exists. This may prevent faithful roundtripping"
+                f"of xarray datasets",
+                SerializationWarning,
             )
-        attributes["coordinates"] = " ".join(map(str, global_coordinates))
+        else:
+            attributes["coordinates"] = " ".join(map(str, global_coordinates))
 
     return variables, attributes
 
diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py
index aff6fbc6691..c407371f9f0 100644
--- a/xarray/core/accessor_dt.py
+++ b/xarray/core/accessor_dt.py
@@ -1,7 +1,11 @@
 import numpy as np
 import pandas as pd
 
-from .common import _contains_datetime_like_objects, is_np_datetime_like
+from .common import (
+    _contains_datetime_like_objects,
+    is_np_datetime_like,
+    is_np_timedelta_like,
+)
 from .pycompat import dask_array_type
 
 
@@ -145,37 +149,8 @@ def _strftime(values, date_format):
         return access_method(values, date_format)
 
 
-class DatetimeAccessor:
-    """Access datetime fields for DataArrays with datetime-like dtypes.
-
-     Similar to pandas, fields can be accessed through the `.dt` attribute
-     for applicable DataArrays:
-
-        >>> ds = xarray.Dataset({'time': pd.date_range(start='2000/01/01',
-        ...                                            freq='D', periods=100)})
-        >>> ds.time.dt
-        <xarray.core.accessors.DatetimeAccessor at 0x10c369f60>
-        >>> ds.time.dt.dayofyear[:5]
-        <xarray.DataArray 'dayofyear' (time: 5)>
-        array([1, 2, 3, 4, 5], dtype=int32)
-        Coordinates:
-          * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ...
-
-     All of the pandas fields are accessible here. Note that these fields are
-     not calendar-aware; if your datetimes are encoded with a non-Gregorian
-     calendar (e.g. a 360-day calendar) using cftime, then some fields like
-     `dayofyear` may not be accurate.
-
-     """
-
+class Properties:
     def __init__(self, obj):
-        if not _contains_datetime_like_objects(obj):
-            raise TypeError(
-                "'dt' accessor only available for "
-                "DataArray with datetime64 timedelta64 dtype or "
-                "for arrays containing cftime datetime "
-                "objects."
-            )
         self._obj = obj
 
     def _tslib_field_accessor(  # type: ignore
@@ -194,48 +169,6 @@ def f(self, dtype=dtype):
         f.__doc__ = docstring
         return property(f)
 
-    year = _tslib_field_accessor("year", "The year of the datetime", np.int64)
-    month = _tslib_field_accessor(
-        "month", "The month as January=1, December=12", np.int64
-    )
-    day = _tslib_field_accessor("day", "The days of the datetime", np.int64)
-    hour = _tslib_field_accessor("hour", "The hours of the datetime", np.int64)
-    minute = _tslib_field_accessor("minute", "The minutes of the datetime", np.int64)
-    second = _tslib_field_accessor("second", "The seconds of the datetime", np.int64)
-    microsecond = _tslib_field_accessor(
-        "microsecond", "The microseconds of the datetime", np.int64
-    )
-    nanosecond = _tslib_field_accessor(
-        "nanosecond", "The nanoseconds of the datetime", np.int64
-    )
-    weekofyear = _tslib_field_accessor(
-        "weekofyear", "The week ordinal of the year", np.int64
-    )
-    week = weekofyear
-    dayofweek = _tslib_field_accessor(
-        "dayofweek", "The day of the week with Monday=0, Sunday=6", np.int64
-    )
-    weekday = dayofweek
-
-    weekday_name = _tslib_field_accessor(
-        "weekday_name", "The name of day in a week (ex: Friday)", object
-    )
-
-    dayofyear = _tslib_field_accessor(
-        "dayofyear", "The ordinal day of the year", np.int64
-    )
-    quarter = _tslib_field_accessor("quarter", "The quarter of the date")
-    days_in_month = _tslib_field_accessor(
-        "days_in_month", "The number of days in the month", np.int64
-    )
-    daysinmonth = days_in_month
-
-    season = _tslib_field_accessor("season", "Season of the year (ex: DJF)", object)
-
-    time = _tslib_field_accessor(
-        "time", "Timestamps corresponding to datetimes", object
-    )
-
     def _tslib_round_accessor(self, name, freq):
         obj_type = type(self._obj)
         result = _round_field(self._obj.data, name, freq)
@@ -290,6 +223,50 @@ def round(self, freq):
         """
         return self._tslib_round_accessor("round", freq)
 
+
+class DatetimeAccessor(Properties):
+    """Access datetime fields for DataArrays with datetime-like dtypes.
+
+    Fields can be accessed through the `.dt` attribute
+    for applicable DataArrays.
+
+    Notes
+    ------
+    Note that these fields are not calendar-aware; if your datetimes are encoded
+    with a non-Gregorian calendar (e.g. a 360-day calendar) using cftime,
+    then some fields like `dayofyear` may not be accurate.
+
+    Examples
+    ---------
+    >>> import xarray as xr
+    >>> import pandas as pd
+    >>> dates = pd.date_range(start='2000/01/01', freq='D', periods=10)
+    >>> ts = xr.DataArray(dates, dims=('time'))
+    >>> ts
+    <xarray.DataArray (time: 10)>
+    array(['2000-01-01T00:00:00.000000000', '2000-01-02T00:00:00.000000000',
+        '2000-01-03T00:00:00.000000000', '2000-01-04T00:00:00.000000000',
+        '2000-01-05T00:00:00.000000000', '2000-01-06T00:00:00.000000000',
+        '2000-01-07T00:00:00.000000000', '2000-01-08T00:00:00.000000000',
+        '2000-01-09T00:00:00.000000000', '2000-01-10T00:00:00.000000000'],
+        dtype='datetime64[ns]')
+    Coordinates:
+    * time     (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10
+    >>> ts.dt
+    <xarray.core.accessor_dt.DatetimeAccessor object at 0x118b54d68>
+    >>> ts.dt.dayofyear
+    <xarray.DataArray 'dayofyear' (time: 10)>
+    array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])
+    Coordinates:
+    * time     (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10
+    >>> ts.dt.quarter
+    <xarray.DataArray 'quarter' (time: 10)>
+    array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
+    Coordinates:
+    * time     (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10
+
+    """
+
     def strftime(self, date_format):
         '''
         Return an array of formatted strings specified by date_format, which
@@ -323,3 +300,163 @@ def strftime(self, date_format):
         return obj_type(
             result, name="strftime", coords=self._obj.coords, dims=self._obj.dims
         )
+
+    year = Properties._tslib_field_accessor(
+        "year", "The year of the datetime", np.int64
+    )
+    month = Properties._tslib_field_accessor(
+        "month", "The month as January=1, December=12", np.int64
+    )
+    day = Properties._tslib_field_accessor("day", "The days of the datetime", np.int64)
+    hour = Properties._tslib_field_accessor(
+        "hour", "The hours of the datetime", np.int64
+    )
+    minute = Properties._tslib_field_accessor(
+        "minute", "The minutes of the datetime", np.int64
+    )
+    second = Properties._tslib_field_accessor(
+        "second", "The seconds of the datetime", np.int64
+    )
+    microsecond = Properties._tslib_field_accessor(
+        "microsecond", "The microseconds of the datetime", np.int64
+    )
+    nanosecond = Properties._tslib_field_accessor(
+        "nanosecond", "The nanoseconds of the datetime", np.int64
+    )
+    weekofyear = Properties._tslib_field_accessor(
+        "weekofyear", "The week ordinal of the year", np.int64
+    )
+    week = weekofyear
+    dayofweek = Properties._tslib_field_accessor(
+        "dayofweek", "The day of the week with Monday=0, Sunday=6", np.int64
+    )
+    weekday = dayofweek
+
+    weekday_name = Properties._tslib_field_accessor(
+        "weekday_name", "The name of day in a week", object
+    )
+
+    dayofyear = Properties._tslib_field_accessor(
+        "dayofyear", "The ordinal day of the year", np.int64
+    )
+    quarter = Properties._tslib_field_accessor("quarter", "The quarter of the date")
+    days_in_month = Properties._tslib_field_accessor(
+        "days_in_month", "The number of days in the month", np.int64
+    )
+    daysinmonth = days_in_month
+
+    season = Properties._tslib_field_accessor("season", "Season of the year", object)
+
+    time = Properties._tslib_field_accessor(
+        "time", "Timestamps corresponding to datetimes", object
+    )
+
+    is_month_start = Properties._tslib_field_accessor(
+        "is_month_start",
+        "Indicates whether the date is the first day of the month.",
+        bool,
+    )
+    is_month_end = Properties._tslib_field_accessor(
+        "is_month_end", "Indicates whether the date is the last day of the month.", bool
+    )
+    is_quarter_start = Properties._tslib_field_accessor(
+        "is_quarter_start",
+        "Indicator for whether the date is the first day of a quarter.",
+        bool,
+    )
+    is_quarter_end = Properties._tslib_field_accessor(
+        "is_quarter_end",
+        "Indicator for whether the date is the last day of a quarter.",
+        bool,
+    )
+    is_year_start = Properties._tslib_field_accessor(
+        "is_year_start", "Indicate whether the date is the first day of a year.", bool
+    )
+    is_year_end = Properties._tslib_field_accessor(
+        "is_year_end", "Indicate whether the date is the last day of the year.", bool
+    )
+    is_leap_year = Properties._tslib_field_accessor(
+        "is_leap_year", "Boolean indicator if the date belongs to a leap year.", bool
+    )
+
+
+class TimedeltaAccessor(Properties):
+    """Access Timedelta fields for DataArrays with Timedelta-like dtypes.
+
+    Fields can be accessed through the `.dt` attribute for applicable DataArrays.
+
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> import xarray as xr
+    >>> dates = pd.timedelta_range(start="1 day", freq="6H", periods=20)
+    >>> ts = xr.DataArray(dates, dims=('time'))
+    >>> ts
+    <xarray.DataArray (time: 20)>
+    array([ 86400000000000, 108000000000000, 129600000000000, 151200000000000,
+        172800000000000, 194400000000000, 216000000000000, 237600000000000,
+        259200000000000, 280800000000000, 302400000000000, 324000000000000,
+        345600000000000, 367200000000000, 388800000000000, 410400000000000,
+        432000000000000, 453600000000000, 475200000000000, 496800000000000],
+        dtype='timedelta64[ns]')
+    Coordinates:
+    * time     (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00
+    >>> ts.dt
+    <xarray.core.accessor_dt.TimedeltaAccessor object at 0x109a27d68>
+    >>> ts.dt.days
+    <xarray.DataArray 'days' (time: 20)>
+    array([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5])
+    Coordinates:
+    * time     (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00
+    >>> ts.dt.microseconds
+    <xarray.DataArray 'microseconds' (time: 20)>
+    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
+    Coordinates:
+    * time     (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00
+    >>> ts.dt.seconds
+    <xarray.DataArray 'seconds' (time: 20)>
+    array([    0, 21600, 43200, 64800,     0, 21600, 43200, 64800,     0,
+        21600, 43200, 64800,     0, 21600, 43200, 64800,     0, 21600,
+        43200, 64800])
+    Coordinates:
+    * time     (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00
+    """
+
+    days = Properties._tslib_field_accessor(
+        "days", "Number of days for each element.", np.int64
+    )
+    seconds = Properties._tslib_field_accessor(
+        "seconds",
+        "Number of seconds (>= 0 and less than 1 day) for each element.",
+        np.int64,
+    )
+    microseconds = Properties._tslib_field_accessor(
+        "microseconds",
+        "Number of microseconds (>= 0 and less than 1 second) for each element.",
+        np.int64,
+    )
+    nanoseconds = Properties._tslib_field_accessor(
+        "nanoseconds",
+        "Number of nanoseconds (>= 0 and less than 1 microsecond) for each element.",
+        np.int64,
+    )
+
+
+class CombinedDatetimelikeAccessor(DatetimeAccessor, TimedeltaAccessor):
+    def __new__(cls, obj):
+        # CombinedDatetimelikeAccessor isn't really instatiated. Instead
+        # we need to choose which parent (datetime or timedelta) is
+        # appropriate. Since we're checking the dtypes anyway, we'll just
+        # do all the validation here.
+        if not _contains_datetime_like_objects(obj):
+            raise TypeError(
+                "'.dt' accessor only available for "
+                "DataArray with datetime64 timedelta64 dtype or "
+                "for arrays containing cftime datetime "
+                "objects."
+            )
+
+        if is_np_timedelta_like(obj.dtype):
+            return TimedeltaAccessor(obj)
+        else:
+            return DatetimeAccessor(obj)
diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py
index 8838e71e6ca..6a975b948eb 100644
--- a/xarray/core/accessor_str.py
+++ b/xarray/core/accessor_str.py
@@ -854,12 +854,10 @@ def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
         ----------
         pat : string or compiled regex
             String can be a character sequence or regular expression.
-
         repl : string or callable
             Replacement string or a callable. The callable is passed the regex
             match object and must return a replacement string to be used.
             See :func:`re.sub`.
-
         n : int, default -1 (all)
             Number of replacements to make from start
         case : boolean, default None
@@ -873,7 +871,7 @@ def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
             - If True, assumes the passed-in pattern is a regular expression.
             - If False, treats the pattern as a literal string
             - Cannot be set to False if `pat` is a compiled regex or `repl` is
-            a callable.
+              a callable.
 
         Returns
         -------
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index b9db30a9f92..3f6e0e79351 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -88,7 +88,7 @@ def _infer_concat_order_from_coords(datasets):
                 # with the same value have the same coord values throughout.
                 if any(index.size == 0 for index in indexes):
                     raise ValueError("Cannot handle size zero dimensions")
-                first_items = pd.Index([index.take([0]) for index in indexes])
+                first_items = pd.Index([index[0] for index in indexes])
 
                 # Sort datasets along dim
                 # We want rank but with identical elements given identical
@@ -115,11 +115,12 @@ def _infer_concat_order_from_coords(datasets):
     return combined_ids, concat_dims
 
 
-def _check_shape_tile_ids(combined_tile_ids):
+def _check_dimension_depth_tile_ids(combined_tile_ids):
+    """
+    Check all tuples are the same length, i.e. check that all lists are
+    nested to the same depth.
+    """
     tile_ids = combined_tile_ids.keys()
-
-    # Check all tuples are the same length
-    # i.e. check that all lists are nested to the same depth
     nesting_depths = [len(tile_id) for tile_id in tile_ids]
     if not nesting_depths:
         nesting_depths = [0]
@@ -128,8 +129,13 @@ def _check_shape_tile_ids(combined_tile_ids):
             "The supplied objects do not form a hypercube because"
             " sub-lists do not have consistent depths"
         )
+    # return these just to be reused in _check_shape_tile_ids
+    return tile_ids, nesting_depths
 
-    # Check all lists along one dimension are same length
+
+def _check_shape_tile_ids(combined_tile_ids):
+    """Check all lists along one dimension are same length."""
+    tile_ids, nesting_depths = _check_dimension_depth_tile_ids(combined_tile_ids)
     for dim in range(nesting_depths[0]):
         indices_along_dim = [tile_id[dim] for tile_id in tile_ids]
         occurrences = Counter(indices_along_dim)
@@ -536,7 +542,8 @@ def combine_by_coords(
     coords : {'minimal', 'different', 'all' or list of str}, optional
         As per the 'data_vars' kwarg, but for coordinate variables.
     fill_value : scalar, optional
-        Value to use for newly missing values
+        Value to use for newly missing values. If None, raises a ValueError if
+        the passed Datasets do not create a complete hypercube.
     join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
         String indicating how to combine differing indexes
         (excluding concat_dim) in objects
@@ -653,6 +660,15 @@ def combine_by_coords(
     temperature    (y, x) float64 1.654 10.63 7.015 2.543 ... 12.46 2.22 15.96
     precipitation  (y, x) float64 0.2136 0.9974 0.7603 ... 0.6125 0.4654 0.5953
 
+    >>> xr.combine_by_coords([x1, x2, x3])
+    <xarray.Dataset>
+    Dimensions:        (x: 6, y: 4)
+    Coordinates:
+    * x              (x) int64 10 20 30 40 50 60
+    * y              (y) int64 0 1 2 3
+    Data variables:
+    temperature    (y, x) float64 1.654 10.63 7.015 nan ... 12.46 2.22 15.96
+    precipitation  (y, x) float64 0.2136 0.9974 0.7603 ... 0.6125 0.4654 0.5953
     """
 
     # Group by data vars
@@ -667,7 +683,13 @@ def combine_by_coords(
             list(datasets_with_same_vars)
         )
 
-        _check_shape_tile_ids(combined_ids)
+        if fill_value is None:
+            # check that datasets form complete hypercube
+            _check_shape_tile_ids(combined_ids)
+        else:
+            # check only that all datasets have same dimension depth for these
+            # vars
+            _check_dimension_depth_tile_ids(combined_ids)
 
         # Concatenate along all of concat_dims one by one to create single ds
         concatenated = _combine_nd(
diff --git a/xarray/core/common.py b/xarray/core/common.py
index a74318b2f90..e908c69dd14 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -1447,6 +1447,12 @@ def is_np_datetime_like(dtype: DTypeLike) -> bool:
     return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64)
 
 
+def is_np_timedelta_like(dtype: DTypeLike) -> bool:
+    """Check whether dtype is of the timedelta64 dtype.
+    """
+    return np.issubdtype(dtype, np.timedelta64)
+
+
 def _contains_cftime_datetimes(array) -> bool:
     """Check if an array contains cftime.datetime objects
     """
diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index 5ccbfa3f2b4..302f7afcec6 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -93,12 +93,14 @@ def concat(
           those of the first object with that dimension. Indexes for the same
           dimension must have the same size in all objects.
 
-    indexers, mode, concat_over : deprecated
-
     Returns
     -------
     concatenated : type of objs
 
+    Notes
+    -----
+    Each concatenated Variable preserves corresponding ``attrs`` from the first element of ``objs``.
+
     See also
     --------
     merge
diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py
index c3dbdd27098..de55de89f0c 100644
--- a/xarray/core/dask_array_compat.py
+++ b/xarray/core/dask_array_compat.py
@@ -1,8 +1,14 @@
 from distutils.version import LooseVersion
+from typing import Iterable
 
-import dask.array as da
 import numpy as np
-from dask import __version__ as dask_version
+
+try:
+    import dask.array as da
+    from dask import __version__ as dask_version
+except ImportError:
+    dask_version = "0.0.0"
+    da = None
 
 if LooseVersion(dask_version) >= LooseVersion("2.0.0"):
     meta_from_array = da.utils.meta_from_array
@@ -89,3 +95,76 @@ def meta_from_array(x, ndim=None, dtype=None):
             meta = meta.astype(dtype)
 
         return meta
+
+
+if LooseVersion(dask_version) >= LooseVersion("2.8.1"):
+    median = da.median
+else:
+    # Copied from dask v2.8.1
+    # Used under the terms of Dask's license, see licenses/DASK_LICENSE.
+    def median(a, axis=None, keepdims=False):
+        """
+        This works by automatically chunking the reduced axes to a single chunk
+        and then calling ``numpy.median`` function across the remaining dimensions
+        """
+
+        if axis is None:
+            raise NotImplementedError(
+                "The da.median function only works along an axis.  "
+                "The full algorithm is difficult to do in parallel"
+            )
+
+        if not isinstance(axis, Iterable):
+            axis = (axis,)
+
+        axis = [ax + a.ndim if ax < 0 else ax for ax in axis]
+
+        a = a.rechunk({ax: -1 if ax in axis else "auto" for ax in range(a.ndim)})
+
+        result = a.map_blocks(
+            np.median,
+            axis=axis,
+            keepdims=keepdims,
+            drop_axis=axis if not keepdims else None,
+            chunks=[1 if ax in axis else c for ax, c in enumerate(a.chunks)]
+            if keepdims
+            else None,
+        )
+
+        return result
+
+
+if LooseVersion(dask_version) > LooseVersion("2.9.0"):
+    nanmedian = da.nanmedian
+else:
+
+    def nanmedian(a, axis=None, keepdims=False):
+        """
+        This works by automatically chunking the reduced axes to a single chunk
+        and then calling ``numpy.nanmedian`` function across the remaining dimensions
+        """
+
+        if axis is None:
+            raise NotImplementedError(
+                "The da.nanmedian function only works along an axis.  "
+                "The full algorithm is difficult to do in parallel"
+            )
+
+        if not isinstance(axis, Iterable):
+            axis = (axis,)
+
+        axis = [ax + a.ndim if ax < 0 else ax for ax in axis]
+
+        a = a.rechunk({ax: -1 if ax in axis else "auto" for ax in range(a.ndim)})
+
+        result = a.map_blocks(
+            np.nanmedian,
+            axis=axis,
+            keepdims=keepdims,
+            drop_axis=axis if not keepdims else None,
+            chunks=[1 if ax in axis else c for ax, c in enumerate(a.chunks)]
+            if keepdims
+            else None,
+        )
+
+        return result
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 1b135a350d1..31aa4da57b2 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -33,7 +33,7 @@
     rolling,
     utils,
 )
-from .accessor_dt import DatetimeAccessor
+from .accessor_dt import CombinedDatetimelikeAccessor
 from .accessor_str import StringAccessor
 from .alignment import (
     _broadcast_helper,
@@ -50,7 +50,8 @@
 )
 from .dataset import Dataset, split_indexes
 from .formatting import format_item
-from .indexes import Indexes, propagate_indexes, default_indexes
+from .indexes import Indexes, default_indexes, propagate_indexes
+from .indexing import is_fancy_indexer
 from .merge import PANDAS_TYPES, _extract_indexes_from_coords
 from .options import OPTIONS
 from .utils import Default, ReprObject, _check_inplace, _default, either_dict_or_kwargs
@@ -234,19 +235,6 @@ class DataArray(AbstractArray, DataWithCoords):
 
     Getting items from or doing mathematical operations with a DataArray
     always returns another DataArray.
-
-    Attributes
-    ----------
-    dims : tuple
-        Dimension names associated with this array.
-    values : numpy.ndarray
-        Access or modify DataArray values as a numpy array.
-    coords : dict-like
-        Dictionary of DataArray objects that label values along each dimension.
-    name : str or None
-        Name of this array.
-    attrs : dict
-        Dictionary for holding arbitrary metadata.
     """
 
     _cache: Dict[str, Any]
@@ -270,7 +258,7 @@ class DataArray(AbstractArray, DataWithCoords):
     _coarsen_cls = rolling.DataArrayCoarsen
     _resample_cls = resample.DataArrayResample
 
-    dt = property(DatetimeAccessor)
+    dt = property(CombinedDatetimelikeAccessor)
 
     def __init__(
         self,
@@ -1027,8 +1015,27 @@ def isel(
         DataArray.sel
         """
         indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")
-        ds = self._to_temp_dataset().isel(drop=drop, indexers=indexers)
-        return self._from_temp_dataset(ds)
+        if any(is_fancy_indexer(idx) for idx in indexers.values()):
+            ds = self._to_temp_dataset()._isel_fancy(indexers, drop=drop)
+            return self._from_temp_dataset(ds)
+
+        # Much faster algorithm for when all indexers are ints, slices, one-dimensional
+        # lists, or zero or one-dimensional np.ndarray's
+
+        variable = self._variable.isel(indexers)
+
+        coords = {}
+        for coord_name, coord_value in self._coords.items():
+            coord_indexers = {
+                k: v for k, v in indexers.items() if k in coord_value.dims
+            }
+            if coord_indexers:
+                coord_value = coord_value.isel(coord_indexers)
+                if drop and coord_value.ndim == 0:
+                    continue
+            coords[coord_name] = coord_value
+
+        return self._replace(variable=variable, coords=coords)
 
     def sel(
         self,
@@ -1108,7 +1115,7 @@ def thin(
         **indexers_kwargs: Any,
     ) -> "DataArray":
         """Return a new DataArray whose data is given by each `n` value
-        along the specified dimension(s). Default `n` = 5
+        along the specified dimension(s).
 
         See Also
         --------
@@ -1282,7 +1289,7 @@ def reindex(
             satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
         fill_value : scalar, optional
             Value to use for newly missing values
-        **indexers_kwarg : {dim: indexer, ...}, optional
+        **indexers_kwargs : {dim: indexer, ...}, optional
             The keyword arguments form of ``indexers``.
             One of indexers or indexers_kwargs must be provided.
 
@@ -1331,7 +1338,7 @@ def interp(
             values.
         kwargs: dictionary
             Additional keyword passed to scipy's interpolator.
-        ``**coords_kwarg`` : {dim: coordinate, ...}, optional
+        ``**coords_kwargs`` : {dim: coordinate, ...}, optional
             The keyword arguments form of ``coords``.
             One of coords or coords_kwargs must be provided.
 
@@ -2733,7 +2740,7 @@ def shift(
             Value to use for newly missing values
         **shifts_kwargs:
             The keyword arguments form of ``shifts``.
-            One of shifts or shifts_kwarg must be provided.
+            One of shifts or shifts_kwargs must be provided.
 
         Returns
         -------
@@ -2784,7 +2791,7 @@ def roll(
             deprecated and will change to False in a future version.
             Explicitly pass roll_coords to silence the warning.
         **shifts_kwargs : The keyword arguments form of ``shifts``.
-            One of shifts or shifts_kwarg must be provided.
+            One of shifts or shifts_kwargs must be provided.
 
         Returns
         -------
@@ -2971,6 +2978,39 @@ def quantile(
         See Also
         --------
         numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile
+
+        Examples
+        --------
+
+        >>> da = xr.DataArray(
+        ...     data=[[0.7, 4.2, 9.4, 1.5], [6.5, 7.3, 2.6, 1.9]],
+        ...     coords={"x": [7, 9], "y": [1, 1.5, 2, 2.5]},
+        ...     dims=("x", "y"),
+        ... )
+        >>> da.quantile(0)  # or da.quantile(0, dim=...)
+        <xarray.DataArray ()>
+        array(0.7)
+        Coordinates:
+            quantile  float64 0.0
+        >>> da.quantile(0, dim="x")
+        <xarray.DataArray (y: 4)>
+        array([0.7, 4.2, 2.6, 1.5])
+        Coordinates:
+          * y         (y) float64 1.0 1.5 2.0 2.5
+            quantile  float64 0.0
+        >>> da.quantile([0, 0.5, 1])
+        <xarray.DataArray (quantile: 3)>
+        array([0.7, 3.4, 9.4])
+        Coordinates:
+          * quantile  (quantile) float64 0.0 0.5 1.0
+        >>> da.quantile([0, 0.5, 1], dim="x")
+        <xarray.DataArray (quantile: 3, y: 4)>
+        array([[0.7 , 4.2 , 2.6 , 1.5 ],
+               [3.6 , 5.75, 6.  , 1.7 ],
+               [6.5 , 7.3 , 9.4 , 1.9 ]])
+        Coordinates:
+          * y         (y) float64 1.0 1.5 2.0 2.5
+          * quantile  (quantile) float64 0.0 0.5 1.0
         """
 
         ds = self._to_temp_dataset().quantile(
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index fdddde773c1..6be06fed117 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -66,6 +66,7 @@
     propagate_indexes,
     roll_index,
 )
+from .indexing import is_fancy_indexer
 from .merge import (
     dataset_merge_method,
     dataset_update_method,
@@ -78,8 +79,8 @@
     Default,
     Frozen,
     SortedKeysDict,
-    _default,
     _check_inplace,
+    _default,
     decode_numpy_dict_values,
     either_dict_or_kwargs,
     hashable,
@@ -1886,7 +1887,7 @@ def isel(
         drop : bool, optional
             If ``drop=True``, drop coordinates variables indexed by integers
             instead of making them scalar.
-        **indexers_kwarg : {dim: indexer, ...}, optional
+        **indexers_kwargs : {dim: indexer, ...}, optional
             The keyword arguments form of ``indexers``.
             One of indexers or indexers_kwargs must be provided.
 
@@ -1907,6 +1908,48 @@ def isel(
         DataArray.isel
         """
         indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")
+        if any(is_fancy_indexer(idx) for idx in indexers.values()):
+            return self._isel_fancy(indexers, drop=drop)
+
+        # Much faster algorithm for when all indexers are ints, slices, one-dimensional
+        # lists, or zero or one-dimensional np.ndarray's
+        invalid = indexers.keys() - self.dims.keys()
+        if invalid:
+            raise ValueError("dimensions %r do not exist" % invalid)
+
+        variables = {}
+        dims: Dict[Hashable, Tuple[int, ...]] = {}
+        coord_names = self._coord_names.copy()
+        indexes = self._indexes.copy() if self._indexes is not None else None
+
+        for var_name, var_value in self._variables.items():
+            var_indexers = {k: v for k, v in indexers.items() if k in var_value.dims}
+            if var_indexers:
+                var_value = var_value.isel(var_indexers)
+                if drop and var_value.ndim == 0 and var_name in coord_names:
+                    coord_names.remove(var_name)
+                    if indexes:
+                        indexes.pop(var_name, None)
+                    continue
+                if indexes and var_name in indexes:
+                    if var_value.ndim == 1:
+                        indexes[var_name] = var_value.to_index()
+                    else:
+                        del indexes[var_name]
+            variables[var_name] = var_value
+            dims.update(zip(var_value.dims, var_value.shape))
+
+        return self._construct_direct(
+            variables=variables,
+            coord_names=coord_names,
+            dims=dims,
+            attrs=self._attrs,
+            indexes=indexes,
+            encoding=self._encoding,
+            file_obj=self._file_obj,
+        )
+
+    def _isel_fancy(self, indexers: Mapping[Hashable, Any], *, drop: bool) -> "Dataset":
         # Note: we need to preserve the original indexers variable in order to merge the
         # coords below
         indexers_list = list(self._validate_indexers(indexers))
@@ -1990,7 +2033,7 @@ def sel(
         drop : bool, optional
             If ``drop=True``, drop coordinates variables in `indexers` instead
             of making them scalar.
-        **indexers_kwarg : {dim: indexer, ...}, optional
+        **indexers_kwargs : {dim: indexer, ...}, optional
             The keyword arguments form of ``indexers``.
             One of indexers or indexers_kwargs must be provided.
 
@@ -2125,7 +2168,7 @@ def thin(
 
         Parameters
         ----------
-        indexers : dict or int, default: 5
+        indexers : dict or int
             A dict with keys matching dimensions and integer values `n`
             or a single integer `n` applied over all dimensions.
             One of indexers or indexers_kwargs must be provided.
@@ -2289,7 +2332,7 @@ def reindex(
         fill_value : scalar, optional
             Value to use for newly missing values
         sparse: use sparse-array. By default, False
-        **indexers_kwarg : {dim: indexer, ...}, optional
+        **indexers_kwargs : {dim: indexer, ...}, optional
             Keyword arguments in the same form as ``indexers``.
             One of indexers or indexers_kwargs must be provided.
 
@@ -2504,7 +2547,7 @@ def interp(
             values.
         kwargs: dictionary, optional
             Additional keyword passed to scipy's interpolator.
-        **coords_kwarg : {dim: coordinate, ...}, optional
+        **coords_kwargs : {dim: coordinate, ...}, optional
             The keyword arguments form of ``coords``.
             One of coords or coords_kwargs must be provided.
 
@@ -4895,7 +4938,7 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs):
             Value to use for newly missing values
         **shifts_kwargs:
             The keyword arguments form of ``shifts``.
-            One of shifts or shifts_kwarg must be provided.
+            One of shifts or shifts_kwargs must be provided.
 
         Returns
         -------
@@ -5116,6 +5159,44 @@ def quantile(
         See Also
         --------
         numpy.nanpercentile, pandas.Series.quantile, DataArray.quantile
+
+        Examples
+        --------
+
+        >>> ds = xr.Dataset(
+        ...     {"a": (("x", "y"), [[0.7, 4.2, 9.4, 1.5], [6.5, 7.3, 2.6, 1.9]])},
+        ...     coords={"x": [7, 9], "y": [1, 1.5, 2, 2.5]},
+        ... )
+        >>> ds.quantile(0)  # or ds.quantile(0, dim=...)
+        <xarray.Dataset>
+        Dimensions:   ()
+        Coordinates:
+            quantile  float64 0.0
+        Data variables:
+            a         float64 0.7
+        >>> ds.quantile(0, dim="x")
+        <xarray.Dataset>
+        Dimensions:   (y: 4)
+        Coordinates:
+          * y         (y) float64 1.0 1.5 2.0 2.5
+            quantile  float64 0.0
+        Data variables:
+            a         (y) float64 0.7 4.2 2.6 1.5
+        >>> ds.quantile([0, 0.5, 1])
+        <xarray.Dataset>
+        Dimensions:   (quantile: 3)
+        Coordinates:
+          * quantile  (quantile) float64 0.0 0.5 1.0
+        Data variables:
+            a         (quantile) float64 0.7 3.4 9.4
+        >>> ds.quantile([0, 0.5, 1], dim="x")
+        <xarray.Dataset>
+        Dimensions:   (quantile: 3, y: 4)
+        Coordinates:
+          * y         (y) float64 1.0 1.5 2.0 2.5
+          * quantile  (quantile) float64 0.0 0.5 1.0
+        Data variables:
+            a         (quantile, y) float64 0.7 4.2 2.6 1.5 3.6 ... 1.7 6.5 7.3 9.4 1.9
         """
 
         if isinstance(dim, str):
@@ -5166,11 +5247,7 @@ def quantile(
         new = self._replace_with_new_dims(
             variables, coord_names=coord_names, attrs=attrs, indexes=indexes
         )
-        if "quantile" in new.dims:
-            new.coords["quantile"] = Variable("quantile", q)
-        else:
-            new.coords["quantile"] = q
-        return new
+        return new.assign_coords(quantile=q)
 
     def rank(self, dim, pct=False, keep_attrs=None):
         """Ranks the data.
diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
index cf616acb485..98b371ab7c3 100644
--- a/xarray/core/duck_array_ops.py
+++ b/xarray/core/duck_array_ops.py
@@ -11,7 +11,7 @@
 import numpy as np
 import pandas as pd
 
-from . import dask_array_ops, dtypes, npcompat, nputils
+from . import dask_array_ops, dask_array_compat, dtypes, npcompat, nputils
 from .nputils import nanfirst, nanlast
 from .pycompat import dask_array_type
 
@@ -284,7 +284,7 @@ def _ignore_warnings_if(condition):
         yield
 
 
-def _create_nan_agg_method(name, coerce_strings=False):
+def _create_nan_agg_method(name, dask_module=dask_array, coerce_strings=False):
     from . import nanops
 
     def f(values, axis=None, skipna=None, **kwargs):
@@ -301,7 +301,7 @@ def f(values, axis=None, skipna=None, **kwargs):
             nanname = "nan" + name
             func = getattr(nanops, nanname)
         else:
-            func = _dask_or_eager_func(name)
+            func = _dask_or_eager_func(name, dask_module=dask_module)
 
         try:
             return func(values, axis=axis, **kwargs)
@@ -337,7 +337,7 @@ def f(values, axis=None, skipna=None, **kwargs):
 std.numeric_only = True
 var = _create_nan_agg_method("var")
 var.numeric_only = True
-median = _create_nan_agg_method("median")
+median = _create_nan_agg_method("median", dask_module=dask_array_compat)
 median.numeric_only = True
 prod = _create_nan_agg_method("prod")
 prod.numeric_only = True
diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py
index dbebbcf4fbe..8ceda8bfbfa 100644
--- a/xarray/core/formatting_html.py
+++ b/xarray/core/formatting_html.py
@@ -1,11 +1,11 @@
 import uuid
-import pkg_resources
 from collections import OrderedDict
 from functools import partial
 from html import escape
 
-from .formatting import inline_variable_array_repr, short_data_repr
+import pkg_resources
 
+from .formatting import inline_variable_array_repr, short_data_repr
 
 CSS_FILE_PATH = "/".join(("static", "css", "style.css"))
 CSS_STYLE = pkg_resources.resource_string("xarray", CSS_FILE_PATH).decode("utf8")
diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index 7e872c74d72..5b52f48413d 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -597,6 +597,54 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None):
         --------
         numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile,
         DataArray.quantile
+
+        Examples
+        --------
+
+        >>> da = xr.DataArray(
+        ...     [[1.3, 8.4, 0.7, 6.9], [0.7, 4.2, 9.4, 1.5], [6.5, 7.3, 2.6, 1.9]],
+        ...     coords={"x": [0, 0, 1], "y": [1, 1, 2, 2]},
+        ...     dims=("y", "y"),
+        ... )
+        >>> ds = xr.Dataset({"a": da})
+        >>> da.groupby("x").quantile(0)
+        <xarray.DataArray (x: 2, y: 4)>
+        array([[0.7, 4.2, 0.7, 1.5],
+               [6.5, 7.3, 2.6, 1.9]])
+        Coordinates:
+            quantile  float64 0.0
+          * y         (y) int64 1 1 2 2
+          * x         (x) int64 0 1
+        >>> ds.groupby("y").quantile(0, dim=...)
+        <xarray.Dataset>
+        Dimensions:   (y: 2)
+        Coordinates:
+            quantile  float64 0.0
+          * y         (y) int64 1 2
+        Data variables:
+            a         (y) float64 0.7 0.7
+        >>> da.groupby("x").quantile([0, 0.5, 1])
+        <xarray.DataArray (x: 2, y: 4, quantile: 3)>
+        array([[[0.7 , 1.  , 1.3 ],
+                [4.2 , 6.3 , 8.4 ],
+                [0.7 , 5.05, 9.4 ],
+                [1.5 , 4.2 , 6.9 ]],
+               [[6.5 , 6.5 , 6.5 ],
+                [7.3 , 7.3 , 7.3 ],
+                [2.6 , 2.6 , 2.6 ],
+                [1.9 , 1.9 , 1.9 ]]])
+        Coordinates:
+          * y         (y) int64 1 1 2 2
+          * quantile  (quantile) float64 0.0 0.5 1.0
+          * x         (x) int64 0 1
+        >>> ds.groupby("y").quantile([0, 0.5, 1], dim=...)
+        <xarray.Dataset>
+        Dimensions:   (quantile: 3, y: 2)
+        Coordinates:
+          * quantile  (quantile) float64 0.0 0.5 1.0
+          * y         (y) int64 1 2
+        Data variables:
+            a         (y, quantile) float64 0.7 5.35 8.4 0.7 2.25 9.4
         """
         if dim is None:
             dim = self._group_dim
diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
index d587ee3a02b..0e546dc2f68 100644
--- a/xarray/core/indexing.py
+++ b/xarray/core/indexing.py
@@ -1213,6 +1213,19 @@ def posify_mask_indexer(indexer):
     return type(indexer)(key)
 
 
+def is_fancy_indexer(indexer: Any) -> bool:
+    """Return False if indexer is a int, slice, a 1-dimensional list, or a 0 or
+    1-dimensional ndarray; in all other cases return True
+    """
+    if isinstance(indexer, (int, slice)):
+        return False
+    if isinstance(indexer, np.ndarray):
+        return indexer.ndim > 1
+    if isinstance(indexer, list):
+        return bool(indexer) and not isinstance(indexer[0], int)
+    return True
+
+
 class NumpyIndexingAdapter(ExplicitlyIndexedNDArrayMixin):
     """Wrap a NumPy array to use explicit indexing."""
 
diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py
index 17240faf007..f9989c2c8c9 100644
--- a/xarray/core/nanops.py
+++ b/xarray/core/nanops.py
@@ -6,8 +6,10 @@
 
 try:
     import dask.array as dask_array
+    from . import dask_array_compat
 except ImportError:
     dask_array = None
+    dask_array_compat = None  # type: ignore
 
 
 def _replace_nan(a, val):
@@ -25,7 +27,7 @@ def _maybe_null_out(result, axis, mask, min_count=1):
     """
     if hasattr(axis, "__len__"):  # if tuple or list
         raise ValueError(
-            "min_count is not available for reduction " "with more than one dimensions."
+            "min_count is not available for reduction with more than one dimensions."
         )
 
     if axis is not None and getattr(result, "ndim", False):
@@ -141,7 +143,15 @@ def nanmean(a, axis=None, dtype=None, out=None):
 
 
 def nanmedian(a, axis=None, out=None):
-    return _dask_or_eager_func("nanmedian", eager_module=nputils)(a, axis=axis)
+    # The dask algorithm works by rechunking to one chunk along axis
+    # Make sure we trigger the dask error when passing all dimensions
+    # so that we don't rechunk the entire array to one chunk and
+    # possibly blow memory
+    if axis is not None and len(np.atleast_1d(axis)) == a.ndim:
+        axis = None
+    return _dask_or_eager_func(
+        "nanmedian", dask_module=dask_array_compat, eager_module=nputils
+    )(a, axis=axis)
 
 
 def _nanvar_object(value, axis=None, ddof=0, keepdims=False, **kwargs):
diff --git a/xarray/core/ops.py b/xarray/core/ops.py
index 78c4466faed..b789f93b4f1 100644
--- a/xarray/core/ops.py
+++ b/xarray/core/ops.py
@@ -347,13 +347,3 @@ def inject_all_ops_and_reduce_methods(cls, priority=50, array_only=True):
 
     inject_reduce_methods(cls)
     inject_cum_methods(cls)
-
-
-def inject_coarsen_methods(cls):
-    # standard numpy reduce methods
-    methods = [(name, getattr(duck_array_ops, name)) for name in NAN_REDUCE_METHODS]
-    for name, f in methods:
-        func = cls._reduce_method(f)
-        func.__name__ = name
-        func.__doc__ = _COARSEN_REDUCE_DOCSTRING_TEMPLATE.format(name=func.__name__)
-        setattr(cls, name, func)
diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py
index fbb5ef94ca2..dd6c67338d8 100644
--- a/xarray/core/parallel.py
+++ b/xarray/core/parallel.py
@@ -7,12 +7,14 @@
 except ImportError:
     pass
 
+import collections
 import itertools
 import operator
 from typing import (
     Any,
     Callable,
     Dict,
+    DefaultDict,
     Hashable,
     Mapping,
     Sequence,
@@ -221,7 +223,12 @@ def _wrapper(func, obj, to_array, args, kwargs):
     indexes = {dim: dataset.indexes[dim] for dim in preserved_indexes}
     indexes.update({k: template.indexes[k] for k in new_indexes})
 
+    # We're building a new HighLevelGraph hlg. We'll have one new layer
+    # for each variable in the dataset, which is the result of the
+    # func applied to the values.
+
     graph: Dict[Any, Any] = {}
+    new_layers: DefaultDict[str, Dict[Any, Any]] = collections.defaultdict(dict)
     gname = "{}-{}".format(
         dask.utils.funcname(func), dask.base.tokenize(dataset, args, kwargs)
     )
@@ -310,9 +317,20 @@ def _wrapper(func, obj, to_array, args, kwargs):
                     # unchunked dimensions in the input have one chunk in the result
                     key += (0,)
 
-            graph[key] = (operator.getitem, from_wrapper, name)
+            # We're adding multiple new layers to the graph:
+            # The first new layer is the result of the computation on
+            # the array.
+            # Then we add one layer per variable, which extracts the
+            # result for that variable, and depends on just the first new
+            # layer.
+            new_layers[gname_l][key] = (operator.getitem, from_wrapper, name)
+
+    hlg = HighLevelGraph.from_collections(gname, graph, dependencies=[dataset])
 
-    graph = HighLevelGraph.from_collections(gname, graph, dependencies=[dataset])
+    for gname_l, layer in new_layers.items():
+        # This adds in the getitems for each variable in the dataset.
+        hlg.dependencies[gname_l] = {gname}
+        hlg.layers[gname_l] = layer
 
     result = Dataset(coords=indexes, attrs=template.attrs)
     for name, gname_l in var_key_map.items():
@@ -325,7 +343,7 @@ def _wrapper(func, obj, to_array, args, kwargs):
                 var_chunks.append((len(indexes[dim]),))
 
         data = dask.array.Array(
-            graph, name=gname_l, chunks=var_chunks, dtype=template[name].dtype
+            hlg, name=gname_l, chunks=var_chunks, dtype=template[name].dtype
         )
         result[name] = (dims, data, template[name].attrs)
 
diff --git a/xarray/core/resample.py b/xarray/core/resample.py
index fb388490d06..2b3b7da6217 100644
--- a/xarray/core/resample.py
+++ b/xarray/core/resample.py
@@ -184,6 +184,7 @@ def map(self, func, shortcut=False, args=(), **kwargs):
 
         Apply uses heuristics (like `pandas.GroupBy.apply`) to figure out how
         to stack together the array. The rule is:
+
         1. If the dimension along which the group coordinate is defined is
            still in the first grouped array after applying `func`, then stack
            over this dimension.
@@ -196,11 +197,13 @@ def map(self, func, shortcut=False, args=(), **kwargs):
             Callable to apply to each array.
         shortcut : bool, optional
             Whether or not to shortcut evaluation under the assumptions that:
+
             (1) The action of `func` does not depend on any of the array
                 metadata (attributes or coordinates) but only on the data and
                 dimensions.
             (2) The action of `func` creates arrays with homogeneous metadata,
                 that is, with the same dimensions and attributes.
+
             If these conditions are satisfied `shortcut` provides significant
             speedup. This should be the case for many common groupby operations
             (e.g., applying numpy ufuncs).
@@ -275,6 +278,7 @@ def map(self, func, args=(), shortcut=None, **kwargs):
 
         Apply uses heuristics (like `pandas.GroupBy.apply`) to figure out how
         to stack together the datasets. The rule is:
+
         1. If the dimension along which the group coordinate is defined is
            still in the first grouped item after applying `func`, then stack
            over this dimension.
diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py
index a1864332f4d..ea6d72b2e03 100644
--- a/xarray/core/rolling.py
+++ b/xarray/core/rolling.py
@@ -1,12 +1,12 @@
 import functools
 import warnings
-from typing import Callable
+from typing import Any, Callable, Dict
 
 import numpy as np
 
 from . import dtypes, duck_array_ops, utils
 from .dask_array_ops import dask_rolling_wrapper
-from .ops import inject_coarsen_methods
+from .ops import inject_reduce_methods
 from .pycompat import dask_array_type
 
 try:
@@ -542,6 +542,11 @@ def __init__(self, obj, windows, boundary, side, coord_func):
         self.side = side
         self.boundary = boundary
 
+        absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims]
+        if absent_dims:
+            raise ValueError(
+                f"Dimensions {absent_dims!r} not found in {self.obj.__class__.__name__}."
+            )
         if not utils.is_dict_like(coord_func):
             coord_func = {d: coord_func for d in self.obj.dims}
         for c in self.obj.coords:
@@ -565,18 +570,23 @@ def __repr__(self):
 class DataArrayCoarsen(Coarsen):
     __slots__ = ()
 
+    _reduce_extra_args_docstring = """"""
+
     @classmethod
-    def _reduce_method(cls, func):
+    def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool):
         """
-        Return a wrapped function for injecting numpy methods.
-        see ops.inject_coarsen_methods
+        Return a wrapped function for injecting reduction methods.
+        see ops.inject_reduce_methods
         """
+        kwargs: Dict[str, Any] = {}
+        if include_skipna:
+            kwargs["skipna"] = None
 
         def wrapped_func(self, **kwargs):
             from .dataarray import DataArray
 
             reduced = self.obj.variable.coarsen(
-                self.windows, func, self.boundary, self.side
+                self.windows, func, self.boundary, self.side, **kwargs
             )
             coords = {}
             for c, v in self.obj.coords.items():
@@ -585,7 +595,11 @@ def wrapped_func(self, **kwargs):
                 else:
                     if any(d in self.windows for d in v.dims):
                         coords[c] = v.variable.coarsen(
-                            self.windows, self.coord_func[c], self.boundary, self.side
+                            self.windows,
+                            self.coord_func[c],
+                            self.boundary,
+                            self.side,
+                            **kwargs,
                         )
                     else:
                         coords[c] = v
@@ -597,12 +611,17 @@ def wrapped_func(self, **kwargs):
 class DatasetCoarsen(Coarsen):
     __slots__ = ()
 
+    _reduce_extra_args_docstring = """"""
+
     @classmethod
-    def _reduce_method(cls, func):
+    def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool):
         """
-        Return a wrapped function for injecting numpy methods.
-        see ops.inject_coarsen_methods
+        Return a wrapped function for injecting reduction methods.
+        see ops.inject_reduce_methods
         """
+        kwargs: Dict[str, Any] = {}
+        if include_skipna:
+            kwargs["skipna"] = None
 
         def wrapped_func(self, **kwargs):
             from .dataset import Dataset
@@ -610,14 +629,18 @@ def wrapped_func(self, **kwargs):
             reduced = {}
             for key, da in self.obj.data_vars.items():
                 reduced[key] = da.variable.coarsen(
-                    self.windows, func, self.boundary, self.side
+                    self.windows, func, self.boundary, self.side, **kwargs
                 )
 
             coords = {}
             for c, v in self.obj.coords.items():
                 if any(d in self.windows for d in v.dims):
                     coords[c] = v.variable.coarsen(
-                        self.windows, self.coord_func[c], self.boundary, self.side
+                        self.windows,
+                        self.coord_func[c],
+                        self.boundary,
+                        self.side,
+                        **kwargs,
                     )
                 else:
                     coords[c] = v.variable
@@ -626,5 +649,5 @@ def wrapped_func(self, **kwargs):
         return wrapped_func
 
 
-inject_coarsen_methods(DataArrayCoarsen)
-inject_coarsen_methods(DatasetCoarsen)
+inject_reduce_methods(DataArrayCoarsen)
+inject_reduce_methods(DatasetCoarsen)
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index 8e62341d5ee..ec8829c3dfb 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -346,7 +346,10 @@ def data(self):
     def data(self, data):
         data = as_compatible_data(data)
         if data.shape != self.shape:
-            raise ValueError("replacement data must match the Variable's shape")
+            raise ValueError(
+                f"replacement data must match the Variable's shape. "
+                f"replacement data has shape {data.shape}; Variable has shape {self.shape}"
+            )
         self._data = data
 
     def load(self, **kwargs):
@@ -617,7 +620,10 @@ def _broadcast_indexes_outer(self, key):
                 k = k.data
             if not isinstance(k, BASIC_INDEXING_TYPES):
                 k = np.asarray(k)
-                if k.dtype.kind == "b":
+                if k.size == 0:
+                    # Slice by empty list; numpy could not infer the dtype
+                    k = k.astype(int)
+                elif k.dtype.kind == "b":
                     (k,) = np.nonzero(k)
             new_key.append(k)
 
@@ -1136,7 +1142,7 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs):
             Value to use for newly missing values
         **shifts_kwargs:
             The keyword arguments form of ``shifts``.
-            One of shifts or shifts_kwarg must be provided.
+            One of shifts or shifts_kwargs must be provided.
 
         Returns
         -------
@@ -1244,7 +1250,7 @@ def roll(self, shifts=None, **shifts_kwargs):
             left.
         **shifts_kwargs:
             The keyword arguments form of ``shifts``.
-            One of shifts or shifts_kwarg must be provided.
+            One of shifts or shifts_kwargs must be provided.
 
         Returns
         -------
@@ -1621,8 +1627,9 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False):
         if not shortcut:
             for var in variables:
                 if var.dims != first_var.dims:
-                    raise ValueError("inconsistent dimensions")
-                utils.remove_incompatible_items(attrs, var.attrs)
+                    raise ValueError(
+                        f"Variable has dimensions {list(var.dims)} but first Variable has dimensions {list(first_var.dims)}"
+                    )
 
         return cls(dims, data, attrs, encoding)
 
@@ -1692,6 +1699,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None):
             This optional parameter specifies the interpolation method to
             use when the desired quantile lies between two data points
             ``i < j``:
+
                 * linear: ``i + (j - i) * fraction``, where ``fraction`` is
                   the fractional part of the index surrounded by ``i`` and
                   ``j``.
@@ -1699,6 +1707,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None):
                 * higher: ``j``.
                 * nearest: ``i`` or ``j``, whichever is nearest.
                 * midpoint: ``(i + j) / 2``.
+
         keep_attrs : bool, optional
             If True, the variable's attributes (`attrs`) will be copied from
             the original object to the new one.  If False (default), the new
@@ -1718,40 +1727,51 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None):
         numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile,
         DataArray.quantile
         """
-        if isinstance(self.data, dask_array_type):
-            raise TypeError(
-                "quantile does not work for arrays stored as dask "
-                "arrays. Load the data via .compute() or .load() "
-                "prior to calling this method."
-            )
 
-        q = np.asarray(q, dtype=np.float64)
+        from .computation import apply_ufunc
 
-        new_dims = list(self.dims)
-        if dim is not None:
-            axis = self.get_axis_num(dim)
-            if utils.is_scalar(dim):
-                new_dims.remove(dim)
-            else:
-                for d in dim:
-                    new_dims.remove(d)
-        else:
-            axis = None
-            new_dims = []
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
 
-        # Only add the quantile dimension if q is array-like
-        if q.ndim != 0:
-            new_dims = ["quantile"] + new_dims
+        scalar = utils.is_scalar(q)
+        q = np.atleast_1d(np.asarray(q, dtype=np.float64))
 
-        qs = np.nanpercentile(
-            self.data, q * 100.0, axis=axis, interpolation=interpolation
-        )
+        # TODO: remove once numpy >= 1.15.0 is the minimum requirement
+        if np.count_nonzero(q < 0.0) or np.count_nonzero(q > 1.0):
+            raise ValueError("Quantiles must be in the range [0, 1]")
 
-        if keep_attrs is None:
-            keep_attrs = _get_keep_attrs(default=False)
-        attrs = self._attrs if keep_attrs else None
+        if dim is None:
+            dim = self.dims
+
+        if utils.is_scalar(dim):
+            dim = [dim]
+
+        def _wrapper(npa, **kwargs):
+            # move quantile axis to end. required for apply_ufunc
+
+            # TODO: use np.nanquantile once numpy >= 1.15.0 is the minimum requirement
+            return np.moveaxis(np.nanpercentile(npa, **kwargs), 0, -1)
 
-        return Variable(new_dims, qs, attrs)
+        axis = np.arange(-1, -1 * len(dim) - 1, -1)
+        result = apply_ufunc(
+            _wrapper,
+            self,
+            input_core_dims=[dim],
+            exclude_dims=set(dim),
+            output_core_dims=[["quantile"]],
+            output_dtypes=[np.float64],
+            output_sizes={"quantile": len(q)},
+            dask="parallelized",
+            kwargs={"q": q * 100, "axis": axis, "interpolation": interpolation},
+        )
+
+        # for backward compatibility
+        result = result.transpose("quantile", ...)
+        if scalar:
+            result = result.squeeze("quantile")
+        if keep_attrs:
+            result.attrs = self._attrs
+        return result
 
     def rank(self, dim, pct=False):
         """Ranks the data.
@@ -1861,9 +1881,9 @@ def rolling_window(
             ),
         )
 
-    def coarsen(self, windows, func, boundary="exact", side="left"):
+    def coarsen(self, windows, func, boundary="exact", side="left", **kwargs):
         """
-        Apply
+        Apply reduction function.
         """
         windows = {k: v for k, v in windows.items() if k in self.dims}
         if not windows:
@@ -1875,11 +1895,11 @@ def coarsen(self, windows, func, boundary="exact", side="left"):
             func = getattr(duck_array_ops, name, None)
             if func is None:
                 raise NameError(f"{name} is not a valid method.")
-        return type(self)(self.dims, func(reshaped, axis=axes), self._attrs)
+        return self._replace(data=func(reshaped, axis=axes, **kwargs))
 
     def _coarsen_reshape(self, windows, boundary, side):
         """
-        Construct a reshaped-array for corsen
+        Construct a reshaped-array for coarsen
         """
         if not utils.is_dict_like(boundary):
             boundary = {d: boundary for d in windows.keys()}
diff --git a/xarray/plot/facetgrid.py b/xarray/plot/facetgrid.py
index 7f13ba601fe..4f3268c1203 100644
--- a/xarray/plot/facetgrid.py
+++ b/xarray/plot/facetgrid.py
@@ -61,6 +61,10 @@ class FacetGrid:
     axes : numpy object array
         Contains axes in corresponding position, as returned from
         plt.subplots
+    col_labels : list
+        list of :class:`matplotlib.text.Text` instances corresponding to column titles.
+    row_labels : list
+        list of :class:`matplotlib.text.Text` instances corresponding to row titles.
     fig : matplotlib.Figure
         The figure containing all the axes
     name_dicts : numpy object array
@@ -200,6 +204,8 @@ def __init__(
         self._ncol = ncol
         self._col_var = col
         self._col_wrap = col_wrap
+        self.row_labels = [None] * nrow
+        self.col_labels = [None] * ncol
         self._x_var = None
         self._y_var = None
         self._cmap_extend = None
@@ -482,22 +488,32 @@ def set_titles(self, template="{coord} = {value}", maxchar=30, size=None, **kwar
                     ax.set_title(title, size=size, **kwargs)
         else:
             # The row titles on the right edge of the grid
-            for ax, row_name in zip(self.axes[:, -1], self.row_names):
+            for index, (ax, row_name, handle) in enumerate(
+                zip(self.axes[:, -1], self.row_names, self.row_labels)
+            ):
                 title = nicetitle(coord=self._row_var, value=row_name, maxchar=maxchar)
-                ax.annotate(
-                    title,
-                    xy=(1.02, 0.5),
-                    xycoords="axes fraction",
-                    rotation=270,
-                    ha="left",
-                    va="center",
-                    **kwargs,
-                )
+                if not handle:
+                    self.row_labels[index] = ax.annotate(
+                        title,
+                        xy=(1.02, 0.5),
+                        xycoords="axes fraction",
+                        rotation=270,
+                        ha="left",
+                        va="center",
+                        **kwargs,
+                    )
+                else:
+                    handle.set_text(title)
 
             # The column titles on the top row
-            for ax, col_name in zip(self.axes[0, :], self.col_names):
+            for index, (ax, col_name, handle) in enumerate(
+                zip(self.axes[0, :], self.col_names, self.col_labels)
+            ):
                 title = nicetitle(coord=self._col_var, value=col_name, maxchar=maxchar)
-                ax.set_title(title, size=size, **kwargs)
+                if not handle:
+                    self.col_labels[index] = ax.set_title(title, size=size, **kwargs)
+                else:
+                    handle.set_text(title)
 
         return self
 
diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py
index 16a4943627e..d38c9765352 100644
--- a/xarray/plot/plot.py
+++ b/xarray/plot/plot.py
@@ -672,10 +672,22 @@ def newplotfunc(
 
         # check if we need to broadcast one dimension
         if xval.ndim < yval.ndim:
-            xval = np.broadcast_to(xval, yval.shape)
+            dims = darray[ylab].dims
+            if xval.shape[0] == yval.shape[0]:
+                xval = np.broadcast_to(xval[:, np.newaxis], yval.shape)
+            else:
+                xval = np.broadcast_to(xval[np.newaxis, :], yval.shape)
 
-        if yval.ndim < xval.ndim:
-            yval = np.broadcast_to(yval, xval.shape)
+        elif yval.ndim < xval.ndim:
+            dims = darray[xlab].dims
+            if yval.shape[0] == xval.shape[0]:
+                yval = np.broadcast_to(yval[:, np.newaxis], xval.shape)
+            else:
+                yval = np.broadcast_to(yval[np.newaxis, :], xval.shape)
+        elif xval.ndim == 2:
+            dims = darray[xlab].dims
+        else:
+            dims = (darray[ylab].dims[0], darray[xlab].dims[0])
 
         # May need to transpose for correct x, y labels
         # xlab may be the name of a coord, we have to check for dim names
@@ -685,10 +697,9 @@ def newplotfunc(
             # we transpose to (y, x, color) to make this work.
             yx_dims = (ylab, xlab)
             dims = yx_dims + tuple(d for d in darray.dims if d not in yx_dims)
-            if dims != darray.dims:
-                darray = darray.transpose(*dims, transpose_coords=True)
-        elif darray[xlab].dims[-1] == darray.dims[0]:
-            darray = darray.transpose(transpose_coords=True)
+
+        if dims != darray.dims:
+            darray = darray.transpose(*dims, transpose_coords=True)
 
         # Pass the data as a masked ndarray too
         zval = darray.to_masked_array(copy=False)
diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py
index 5fe5b8c3f59..67ca12532c7 100644
--- a/xarray/tests/test_accessor_dt.py
+++ b/xarray/tests/test_accessor_dt.py
@@ -12,6 +12,8 @@
     requires_dask,
 )
 
+from .test_dask import raise_if_dask_computes, assert_chunks_equal
+
 
 class TestDatetimeAccessor:
     @pytest.fixture(autouse=True)
@@ -37,24 +39,38 @@ def setup(self):
             name="data",
         )
 
-    def test_field_access(self):
-        years = xr.DataArray(
-            self.times.year, name="year", coords=[self.times], dims=["time"]
-        )
-        months = xr.DataArray(
-            self.times.month, name="month", coords=[self.times], dims=["time"]
-        )
-        days = xr.DataArray(
-            self.times.day, name="day", coords=[self.times], dims=["time"]
-        )
-        hours = xr.DataArray(
-            self.times.hour, name="hour", coords=[self.times], dims=["time"]
+    @pytest.mark.parametrize(
+        "field",
+        [
+            "year",
+            "month",
+            "day",
+            "hour",
+            "minute",
+            "second",
+            "microsecond",
+            "nanosecond",
+            "week",
+            "weekofyear",
+            "dayofweek",
+            "weekday",
+            "dayofyear",
+            "quarter",
+            "is_month_start",
+            "is_month_end",
+            "is_quarter_start",
+            "is_quarter_end",
+            "is_year_start",
+            "is_year_end",
+            "is_leap_year",
+        ],
+    )
+    def test_field_access(self, field):
+        expected = xr.DataArray(
+            getattr(self.times, field), name=field, coords=[self.times], dims=["time"]
         )
-
-        assert_equal(years, self.data.time.dt.year)
-        assert_equal(months, self.data.time.dt.month)
-        assert_equal(days, self.data.time.dt.day)
-        assert_equal(hours, self.data.time.dt.hour)
+        actual = getattr(self.data.time.dt, field)
+        assert_equal(expected, actual)
 
     def test_strftime(self):
         assert (
@@ -69,55 +85,74 @@ def test_not_datetime_type(self):
             nontime_data.time.dt
 
     @requires_dask
-    def test_dask_field_access(self):
+    @pytest.mark.parametrize(
+        "field",
+        [
+            "year",
+            "month",
+            "day",
+            "hour",
+            "minute",
+            "second",
+            "microsecond",
+            "nanosecond",
+            "week",
+            "weekofyear",
+            "dayofweek",
+            "weekday",
+            "dayofyear",
+            "quarter",
+            "is_month_start",
+            "is_month_end",
+            "is_quarter_start",
+            "is_quarter_end",
+            "is_year_start",
+            "is_year_end",
+            "is_leap_year",
+        ],
+    )
+    def test_dask_field_access(self, field):
         import dask.array as da
 
-        years = self.times_data.dt.year
-        months = self.times_data.dt.month
-        hours = self.times_data.dt.hour
-        days = self.times_data.dt.day
-        floor = self.times_data.dt.floor("D")
-        ceil = self.times_data.dt.ceil("D")
-        round = self.times_data.dt.round("D")
-        strftime = self.times_data.dt.strftime("%Y-%m-%d %H:%M:%S")
+        expected = getattr(self.times_data.dt, field)
+
+        dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50))
+        dask_times_2d = xr.DataArray(
+            dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data"
+        )
+
+        with raise_if_dask_computes():
+            actual = getattr(dask_times_2d.dt, field)
+
+        assert isinstance(actual.data, da.Array)
+        assert_chunks_equal(actual, dask_times_2d)
+        assert_equal(actual.compute(), expected.compute())
+
+    @requires_dask
+    @pytest.mark.parametrize(
+        "method, parameters",
+        [
+            ("floor", "D"),
+            ("ceil", "D"),
+            ("round", "D"),
+            ("strftime", "%Y-%m-%d %H:%M:%S"),
+        ],
+    )
+    def test_dask_accessor_method(self, method, parameters):
+        import dask.array as da
 
+        expected = getattr(self.times_data.dt, method)(parameters)
         dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50))
         dask_times_2d = xr.DataArray(
             dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data"
         )
-        dask_year = dask_times_2d.dt.year
-        dask_month = dask_times_2d.dt.month
-        dask_day = dask_times_2d.dt.day
-        dask_hour = dask_times_2d.dt.hour
-        dask_floor = dask_times_2d.dt.floor("D")
-        dask_ceil = dask_times_2d.dt.ceil("D")
-        dask_round = dask_times_2d.dt.round("D")
-        dask_strftime = dask_times_2d.dt.strftime("%Y-%m-%d %H:%M:%S")
-
-        # Test that the data isn't eagerly evaluated
-        assert isinstance(dask_year.data, da.Array)
-        assert isinstance(dask_month.data, da.Array)
-        assert isinstance(dask_day.data, da.Array)
-        assert isinstance(dask_hour.data, da.Array)
-        assert isinstance(dask_strftime.data, da.Array)
-
-        # Double check that outcome chunksize is unchanged
-        dask_chunks = dask_times_2d.chunks
-        assert dask_year.data.chunks == dask_chunks
-        assert dask_month.data.chunks == dask_chunks
-        assert dask_day.data.chunks == dask_chunks
-        assert dask_hour.data.chunks == dask_chunks
-        assert dask_strftime.data.chunks == dask_chunks
-
-        # Check the actual output from the accessors
-        assert_equal(years, dask_year.compute())
-        assert_equal(months, dask_month.compute())
-        assert_equal(days, dask_day.compute())
-        assert_equal(hours, dask_hour.compute())
-        assert_equal(floor, dask_floor.compute())
-        assert_equal(ceil, dask_ceil.compute())
-        assert_equal(round, dask_round.compute())
-        assert_equal(strftime, dask_strftime.compute())
+
+        with raise_if_dask_computes():
+            actual = getattr(dask_times_2d.dt, method)(parameters)
+
+        assert isinstance(actual.data, da.Array)
+        assert_chunks_equal(actual, dask_times_2d)
+        assert_equal(actual.compute(), expected.compute())
 
     def test_seasons(self):
         dates = pd.date_range(start="2000/01/01", freq="M", periods=12)
@@ -140,12 +175,108 @@ def test_seasons(self):
 
         assert_array_equal(seasons.values, dates.dt.season.values)
 
-    def test_rounders(self):
+    @pytest.mark.parametrize(
+        "method, parameters", [("floor", "D"), ("ceil", "D"), ("round", "D")]
+    )
+    def test_accessor_method(self, method, parameters):
         dates = pd.date_range("2014-01-01", "2014-05-01", freq="H")
-        xdates = xr.DataArray(np.arange(len(dates)), dims=["time"], coords=[dates])
-        assert_array_equal(dates.floor("D").values, xdates.time.dt.floor("D").values)
-        assert_array_equal(dates.ceil("D").values, xdates.time.dt.ceil("D").values)
-        assert_array_equal(dates.round("D").values, xdates.time.dt.round("D").values)
+        xdates = xr.DataArray(dates, dims=["time"])
+        expected = getattr(dates, method)(parameters)
+        actual = getattr(xdates.dt, method)(parameters)
+        assert_array_equal(expected, actual)
+
+
+class TestTimedeltaAccessor:
+    @pytest.fixture(autouse=True)
+    def setup(self):
+        nt = 100
+        data = np.random.rand(10, 10, nt)
+        lons = np.linspace(0, 11, 10)
+        lats = np.linspace(0, 20, 10)
+        self.times = pd.timedelta_range(start="1 day", freq="6H", periods=nt)
+
+        self.data = xr.DataArray(
+            data,
+            coords=[lons, lats, self.times],
+            dims=["lon", "lat", "time"],
+            name="data",
+        )
+
+        self.times_arr = np.random.choice(self.times, size=(10, 10, nt))
+        self.times_data = xr.DataArray(
+            self.times_arr,
+            coords=[lons, lats, self.times],
+            dims=["lon", "lat", "time"],
+            name="data",
+        )
+
+    def test_not_datetime_type(self):
+        nontime_data = self.data.copy()
+        int_data = np.arange(len(self.data.time)).astype("int8")
+        nontime_data["time"].values = int_data
+        with raises_regex(TypeError, "dt"):
+            nontime_data.time.dt
+
+    @pytest.mark.parametrize(
+        "field", ["days", "seconds", "microseconds", "nanoseconds"]
+    )
+    def test_field_access(self, field):
+        expected = xr.DataArray(
+            getattr(self.times, field), name=field, coords=[self.times], dims=["time"]
+        )
+        actual = getattr(self.data.time.dt, field)
+        assert_equal(expected, actual)
+
+    @pytest.mark.parametrize(
+        "method, parameters", [("floor", "D"), ("ceil", "D"), ("round", "D")]
+    )
+    def test_accessor_methods(self, method, parameters):
+        dates = pd.timedelta_range(start="1 day", end="30 days", freq="6H")
+        xdates = xr.DataArray(dates, dims=["time"])
+        expected = getattr(dates, method)(parameters)
+        actual = getattr(xdates.dt, method)(parameters)
+        assert_array_equal(expected, actual)
+
+    @requires_dask
+    @pytest.mark.parametrize(
+        "field", ["days", "seconds", "microseconds", "nanoseconds"]
+    )
+    def test_dask_field_access(self, field):
+        import dask.array as da
+
+        expected = getattr(self.times_data.dt, field)
+
+        dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50))
+        dask_times_2d = xr.DataArray(
+            dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data"
+        )
+
+        with raise_if_dask_computes():
+            actual = getattr(dask_times_2d.dt, field)
+
+        assert isinstance(actual.data, da.Array)
+        assert_chunks_equal(actual, dask_times_2d)
+        assert_equal(actual, expected)
+
+    @requires_dask
+    @pytest.mark.parametrize(
+        "method, parameters", [("floor", "D"), ("ceil", "D"), ("round", "D")]
+    )
+    def test_dask_accessor_method(self, method, parameters):
+        import dask.array as da
+
+        expected = getattr(self.times_data.dt, method)(parameters)
+        dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50))
+        dask_times_2d = xr.DataArray(
+            dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data"
+        )
+
+        with raise_if_dask_computes():
+            actual = getattr(dask_times_2d.dt, method)(parameters)
+
+        assert isinstance(actual.data, da.Array)
+        assert_chunks_equal(actual, dask_times_2d)
+        assert_equal(actual.compute(), expected.compute())
 
 
 _CFTIME_CALENDARS = [
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index de3a7eadab0..a23527bd49a 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -33,6 +33,7 @@
 from xarray.backends.netCDF4_ import _extract_nc4_variable_encoding
 from xarray.backends.pydap_ import PydapDataStore
 from xarray.coding.variables import SerializationWarning
+from xarray.conventions import encode_dataset_coordinates
 from xarray.core import indexing
 from xarray.core.options import set_options
 from xarray.core.pycompat import dask_array_type
@@ -522,15 +523,35 @@ def test_roundtrip_coordinates(self):
         with self.roundtrip(original) as actual:
             assert_identical(original, actual)
 
+        original["foo"].encoding["coordinates"] = "y"
+        with self.roundtrip(original, open_kwargs={"decode_coords": False}) as expected:
+            # check roundtripping when decode_coords=False
+            with self.roundtrip(
+                expected, open_kwargs={"decode_coords": False}
+            ) as actual:
+                assert_identical(expected, actual)
+
     def test_roundtrip_global_coordinates(self):
-        original = Dataset({"x": [2, 3], "y": ("a", [42]), "z": ("x", [4, 5])})
+        original = Dataset(
+            {"foo": ("x", [0, 1])}, {"x": [2, 3], "y": ("a", [42]), "z": ("x", [4, 5])}
+        )
         with self.roundtrip(original) as actual:
             assert_identical(original, actual)
 
+        # test that global "coordinates" is as expected
+        _, attrs = encode_dataset_coordinates(original)
+        assert attrs["coordinates"] == "y"
+
+        # test warning when global "coordinates" is already set
+        original.attrs["coordinates"] = "foo"
+        with pytest.warns(SerializationWarning):
+            _, attrs = encode_dataset_coordinates(original)
+            assert attrs["coordinates"] == "foo"
+
     def test_roundtrip_coordinates_with_space(self):
         original = Dataset(coords={"x": 0, "y z": 1})
         expected = Dataset({"y z": 1}, {"x": 0})
-        with pytest.warns(xr.SerializationWarning):
+        with pytest.warns(SerializationWarning):
             with self.roundtrip(original) as actual:
                 assert_identical(expected, actual)
 
@@ -810,6 +831,18 @@ def equals_latlon(obj):
                 assert "coordinates" not in ds["lat"].attrs
                 assert "coordinates" not in ds["lon"].attrs
 
+        original["temp"].encoding["coordinates"] = "lat"
+        with self.roundtrip(original) as actual:
+            assert_identical(actual, original)
+        original["precip"].encoding["coordinates"] = "lat"
+        with create_tmp_file() as tmp_file:
+            original.to_netcdf(tmp_file)
+            with open_dataset(tmp_file, decode_coords=True) as ds:
+                assert "lon" not in ds["temp"].encoding["coordinates"]
+                assert "lon" not in ds["precip"].encoding["coordinates"]
+                assert "coordinates" not in ds["lat"].encoding
+                assert "coordinates" not in ds["lon"].encoding
+
     def test_roundtrip_endian(self):
         ds = Dataset(
             {
@@ -3951,6 +3984,7 @@ def test_rasterio_environment(self):
                     with xr.open_rasterio(tmp_file) as actual:
                         assert_allclose(actual, expected)
 
+    @pytest.mark.xfail(reason="rasterio 1.1.1 is broken. GH3573")
     def test_rasterio_vrt(self):
         import rasterio
 
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index cd26e7fb60b..d907e1c5e46 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -22,7 +22,7 @@
     _new_tile_id,
 )
 
-from . import assert_equal, assert_identical, raises_regex
+from . import assert_equal, assert_identical, raises_regex, requires_cftime
 from .test_dataset import create_test_data
 
 
@@ -711,6 +711,22 @@ def test_check_for_impossible_ordering(self):
         ):
             combine_by_coords([ds1, ds0])
 
+    def test_combine_by_coords_incomplete_hypercube(self):
+        # test that this succeeds with default fill_value
+        x1 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]})
+        x2 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [1], "x": [0]})
+        x3 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [1]})
+        actual = combine_by_coords([x1, x2, x3])
+        expected = Dataset(
+            {"a": (("y", "x"), [[1, 1], [1, np.nan]])},
+            coords={"y": [0, 1], "x": [0, 1]},
+        )
+        assert_identical(expected, actual)
+
+        # test that this fails if fill_value is None
+        with pytest.raises(ValueError):
+            combine_by_coords([x1, x2, x3], fill_value=None)
+
 
 @pytest.mark.filterwarnings(
     "ignore:In xarray version 0.15 `auto_combine` " "will be deprecated"
@@ -877,3 +893,25 @@ def test_auto_combine_without_coords(self):
         objs = [Dataset({"foo": ("x", [0])}), Dataset({"foo": ("x", [1])})]
         with pytest.warns(FutureWarning, match="supplied do not have global"):
             auto_combine(objs)
+
+
+@requires_cftime
+def test_combine_by_coords_distant_cftime_dates():
+    # Regression test for https://github.com/pydata/xarray/issues/3535
+    import cftime
+
+    time_1 = [cftime.DatetimeGregorian(4500, 12, 31)]
+    time_2 = [cftime.DatetimeGregorian(4600, 12, 31)]
+    time_3 = [cftime.DatetimeGregorian(5100, 12, 31)]
+
+    da_1 = DataArray([0], dims=["time"], coords=[time_1], name="a").to_dataset()
+    da_2 = DataArray([1], dims=["time"], coords=[time_2], name="a").to_dataset()
+    da_3 = DataArray([2], dims=["time"], coords=[time_3], name="a").to_dataset()
+
+    result = combine_by_coords([da_1, da_2, da_3])
+
+    expected_time = np.concatenate([time_1, time_2, time_3])
+    expected = DataArray(
+        [0, 1, 2], dims=["time"], coords=[expected_time], name="a"
+    ).to_dataset()
+    assert_identical(result, expected)
diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py
index 0661ebb7a38..def5abc942f 100644
--- a/xarray/tests/test_concat.py
+++ b/xarray/tests/test_concat.py
@@ -462,3 +462,16 @@ def test_concat_join_kwarg(self):
         for join in expected:
             actual = concat([ds1, ds2], join=join, dim="x")
             assert_equal(actual, expected[join].to_array())
+
+
+@pytest.mark.parametrize("attr1", ({"a": {"meta": [10, 20, 30]}}, {"a": [1, 2, 3]}, {}))
+@pytest.mark.parametrize("attr2", ({"a": [1, 2, 3]}, {}))
+def test_concat_attrs_first_variable(attr1, attr2):
+
+    arrs = [
+        DataArray([[1], [2]], dims=["x", "y"], attrs=attr1),
+        DataArray([[3], [4]], dims=["x", "y"], attrs=attr2),
+    ]
+
+    concat_attrs = concat(arrs, "y").attrs
+    assert concat_attrs == attr1
diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py
index 09002e252b4..acb2400ea04 100644
--- a/xarray/tests/test_conventions.py
+++ b/xarray/tests/test_conventions.py
@@ -136,6 +136,20 @@ def test_multidimensional_coordinates(self):
         # Should not have any global coordinates.
         assert "coordinates" not in attrs
 
+    def test_do_not_overwrite_user_coordinates(self):
+        orig = Dataset(
+            coords={"x": [0, 1, 2], "y": ("x", [5, 6, 7]), "z": ("x", [8, 9, 10])},
+            data_vars={"a": ("x", [1, 2, 3]), "b": ("x", [3, 5, 6])},
+        )
+        orig["a"].encoding["coordinates"] = "y"
+        orig["b"].encoding["coordinates"] = "z"
+        enc, _ = conventions.encode_dataset_coordinates(orig)
+        assert enc["a"].attrs["coordinates"] == "y"
+        assert enc["b"].attrs["coordinates"] == "z"
+        orig["a"].attrs["coordinates"] = "foo"
+        with raises_regex(ValueError, "'coordinates' found in both attrs"):
+            conventions.encode_dataset_coordinates(orig)
+
     @requires_dask
     def test_string_object_warning(self):
         original = Variable(("x",), np.array(["foo", "bar"], dtype=object)).chunk()
diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
index 43a602eb9b0..4daa786068f 100644
--- a/xarray/tests/test_dask.py
+++ b/xarray/tests/test_dask.py
@@ -16,6 +16,7 @@
 from xarray.testing import assert_chunks_equal
 from xarray.tests import mock
 
+from ..core.duck_array_ops import lazy_array_equiv
 from . import (
     assert_allclose,
     assert_array_equal,
@@ -25,7 +26,6 @@
     raises_regex,
     requires_scipy_or_netCDF4,
 )
-from ..core.duck_array_ops import lazy_array_equiv
 from .test_backends import create_tmp_file
 
 dask = pytest.importorskip("dask")
@@ -216,8 +216,10 @@ def test_reduce(self):
         self.assertLazyAndAllClose(u.argmin(dim="x"), actual)
         self.assertLazyAndAllClose((u > 1).any(), (v > 1).any())
         self.assertLazyAndAllClose((u < 1).all("x"), (v < 1).all("x"))
-        with raises_regex(NotImplementedError, "dask"):
+        with raises_regex(NotImplementedError, "only works along an axis"):
             v.median()
+        with raises_regex(NotImplementedError, "only works along an axis"):
+            v.median(v.dims)
         with raise_if_dask_computes():
             v.reduce(duck_array_ops.mean)
 
@@ -1189,6 +1191,19 @@ def func(obj):
     assert_identical(expected.compute(), actual.compute())
 
 
+def test_map_blocks_hlg_layers():
+    # regression test for #3599
+    ds = xr.Dataset(
+        {
+            "x": (("a",), dask.array.ones(10, chunks=(5,))),
+            "z": (("b",), dask.array.ones(10, chunks=(5,))),
+        }
+    )
+    mapped = ds.map_blocks(lambda x: x)
+
+    xr.testing.assert_equal(mapped, ds)
+
+
 def test_make_meta(map_ds):
     from ..core.parallel import make_meta
 
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index ad98792372e..f957316d8ac 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -15,6 +15,7 @@
 from xarray.core import dtypes
 from xarray.core.common import full_like
 from xarray.core.indexes import propagate_indexes
+from xarray.core.utils import is_scalar
 from xarray.tests import (
     LooseVersion,
     ReturnItem,
@@ -2330,17 +2331,20 @@ def test_reduce_out(self):
         with pytest.raises(TypeError):
             orig.mean(out=np.ones(orig.shape))
 
-    def test_quantile(self):
-        for q in [0.25, [0.50], [0.25, 0.75]]:
-            for axis, dim in zip(
-                [None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]
-            ):
-                actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True)
-                expected = np.nanpercentile(
-                    self.dv.values, np.array(q) * 100, axis=axis
-                )
-                np.testing.assert_allclose(actual.values, expected)
-                assert actual.attrs == self.attrs
+    @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
+    @pytest.mark.parametrize(
+        "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]])
+    )
+    def test_quantile(self, q, axis, dim):
+        actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True)
+        expected = np.nanpercentile(self.dv.values, np.array(q) * 100, axis=axis)
+        np.testing.assert_allclose(actual.values, expected)
+        if is_scalar(q):
+            assert "quantile" not in actual.dims
+        else:
+            assert "quantile" in actual.dims
+
+        assert actual.attrs == self.attrs
 
     def test_reduce_keep_attrs(self):
         # Test dropped attrs
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index e8fe768b783..7db1911621b 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -28,6 +28,7 @@
 from xarray.core.common import duck_array_ops, full_like
 from xarray.core.npcompat import IS_NEP18_ACTIVE
 from xarray.core.pycompat import integer_types
+from xarray.core.utils import is_scalar
 
 from . import (
     InaccessibleArray,
@@ -4575,21 +4576,24 @@ def test_reduce_keepdims(self):
         )
         assert_identical(expected, actual)
 
-    def test_quantile(self):
-
+    @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
+    def test_quantile(self, q):
         ds = create_test_data(seed=123)
 
-        for q in [0.25, [0.50], [0.25, 0.75]]:
-            for dim in [None, "dim1", ["dim1"]]:
-                ds_quantile = ds.quantile(q, dim=dim)
-                assert "quantile" in ds_quantile
-                for var, dar in ds.data_vars.items():
-                    assert var in ds_quantile
-                    assert_identical(ds_quantile[var], dar.quantile(q, dim=dim))
-            dim = ["dim1", "dim2"]
+        for dim in [None, "dim1", ["dim1"]]:
             ds_quantile = ds.quantile(q, dim=dim)
-            assert "dim3" in ds_quantile.dims
-            assert all(d not in ds_quantile.dims for d in dim)
+            if is_scalar(q):
+                assert "quantile" not in ds_quantile.dims
+            else:
+                assert "quantile" in ds_quantile.dims
+
+            for var, dar in ds.data_vars.items():
+                assert var in ds_quantile
+                assert_identical(ds_quantile[var], dar.quantile(q, dim=dim))
+        dim = ["dim1", "dim2"]
+        ds_quantile = ds.quantile(q, dim=dim)
+        assert "dim3" in ds_quantile.dims
+        assert all(d not in ds_quantile.dims for d in dim)
 
     @requires_bottleneck
     def test_rank(self):
@@ -5493,6 +5497,11 @@ def ds(request):
         )
 
 
+def test_coarsen_absent_dims_error(ds):
+    with raises_regex(ValueError, "not found in Dataset."):
+        ds.coarsen(foo=2)
+
+
 @pytest.mark.parametrize("dask", [True, False])
 @pytest.mark.parametrize(("boundary", "side"), [("trim", "left"), ("pad", "right")])
 def test_coarsen(ds, dask, boundary, side):
@@ -5501,12 +5510,11 @@ def test_coarsen(ds, dask, boundary, side):
 
     actual = ds.coarsen(time=2, x=3, boundary=boundary, side=side).max()
     assert_equal(
-        actual["z1"], ds["z1"].coarsen(time=2, x=3, boundary=boundary, side=side).max()
+        actual["z1"], ds["z1"].coarsen(x=3, boundary=boundary, side=side).max()
     )
     # coordinate should be mean by default
     assert_equal(
-        actual["time"],
-        ds["time"].coarsen(time=2, x=3, boundary=boundary, side=side).mean(),
+        actual["time"], ds["time"].coarsen(time=2, boundary=boundary, side=side).mean()
     )
 
 
@@ -5517,8 +5525,8 @@ def test_coarsen_coords(ds, dask):
 
     # check if coord_func works
     actual = ds.coarsen(time=2, x=3, boundary="trim", coord_func={"time": "max"}).max()
-    assert_equal(actual["z1"], ds["z1"].coarsen(time=2, x=3, boundary="trim").max())
-    assert_equal(actual["time"], ds["time"].coarsen(time=2, x=3, boundary="trim").max())
+    assert_equal(actual["z1"], ds["z1"].coarsen(x=3, boundary="trim").max())
+    assert_equal(actual["time"], ds["time"].coarsen(time=2, boundary="trim").max())
 
     # raise if exact
     with pytest.raises(ValueError):
diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py
index 0b410383a34..1cd0319a9a5 100644
--- a/xarray/tests/test_missing.py
+++ b/xarray/tests/test_missing.py
@@ -9,8 +9,8 @@
     NumpyInterpolator,
     ScipyInterpolator,
     SplineInterpolator,
-    get_clean_interp_index,
     _get_nan_block_lengths,
+    get_clean_interp_index,
 )
 from xarray.core.pycompat import dask_array_type
 from xarray.tests import (
diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py
index 6e283ea01da..a5402d88f3e 100644
--- a/xarray/tests/test_plot.py
+++ b/xarray/tests/test_plot.py
@@ -62,6 +62,15 @@ def substring_in_axes(substring, ax):
     return False
 
 
+def substring_not_in_axes(substring, ax):
+    """
+    Return True if a substring is not found anywhere in an axes
+    """
+    alltxt = {t.get_text() for t in ax.findobj(mpl.text.Text)}
+    check = [(substring not in txt) for txt in alltxt]
+    return all(check)
+
+
 def easy_array(shape, start=0, stop=1):
     """
     Make an array with desired shape using np.linspace
@@ -265,6 +274,7 @@ def test2d_1d_2d_coordinates_contourf(self):
         )
 
         a.plot.contourf(x="time", y="depth")
+        a.plot.contourf(x="depth", y="time")
 
     def test3d(self):
         self.darray.plot()
@@ -1775,6 +1785,18 @@ def test_default_labels(self):
         for label, ax in zip(self.darray.coords["col"].values, g.axes[0, :]):
             assert substring_in_axes(label, ax)
 
+        # ensure that row & col labels can be changed
+        g.set_titles("abc={value}")
+        for label, ax in zip(self.darray.coords["row"].values, g.axes[:, -1]):
+            assert substring_in_axes(f"abc={label}", ax)
+            # previous labels were "row=row0" etc.
+            assert substring_not_in_axes("row=", ax)
+
+        for label, ax in zip(self.darray.coords["col"].values, g.axes[0, :]):
+            assert substring_in_axes(f"abc={label}", ax)
+            # previous labels were "col=row0" etc.
+            assert substring_not_in_axes("col=", ax)
+
 
 @pytest.mark.filterwarnings("ignore:tight_layout cannot")
 class TestFacetedLinePlotsLegend(PlotTestCase):
@@ -2149,3 +2171,31 @@ def test_yticks_kwarg(self, da):
         da.plot(yticks=np.arange(5))
         expected = np.arange(5)
         assert np.all(plt.gca().get_yticks() == expected)
+
+
+@requires_matplotlib
+@pytest.mark.parametrize("plotfunc", ["pcolormesh", "contourf", "contour"])
+def test_plot_transposed_nondim_coord(plotfunc):
+    x = np.linspace(0, 10, 101)
+    h = np.linspace(3, 7, 101)
+    s = np.linspace(0, 1, 51)
+    z = s[:, np.newaxis] * h[np.newaxis, :]
+    da = xr.DataArray(
+        np.sin(x) * np.cos(z),
+        dims=["s", "x"],
+        coords={"x": x, "s": s, "z": (("s", "x"), z), "zt": (("x", "s"), z.T)},
+    )
+    getattr(da.plot, plotfunc)(x="x", y="zt")
+    getattr(da.plot, plotfunc)(x="zt", y="x")
+
+
+@requires_matplotlib
+@pytest.mark.parametrize("plotfunc", ["pcolormesh", "imshow"])
+def test_plot_transposes_properly(plotfunc):
+    # test that we aren't mistakenly transposing when the 2 dimensions have equal sizes.
+    da = xr.DataArray([np.sin(2 * np.pi / 10 * np.arange(10))] * 10, dims=("y", "x"))
+    hdl = getattr(da.plot, plotfunc)(x="x", y="y")
+    # get_array doesn't work for contour, contourf. It returns the colormap intervals.
+    # pcolormesh returns 1D array but imshow returns a 2D array so it is necessary
+    # to ravel() on the LHS
+    assert np.all(hdl.get_array().ravel() == da.to_masked_array().ravel())
diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py
index 0be6f8af464..f8a8a259c1f 100644
--- a/xarray/tests/test_units.py
+++ b/xarray/tests/test_units.py
@@ -29,9 +29,11 @@
 
 
 def array_extract_units(obj):
-    raw = obj.data if hasattr(obj, "data") else obj
+    if isinstance(obj, (xr.Variable, xr.DataArray, xr.Dataset)):
+        obj = obj.data
+
     try:
-        return raw.units
+        return obj.units
     except AttributeError:
         return None
 
@@ -112,7 +114,7 @@ def extract_units(obj):
 
         units = {**vars_units, **coords_units}
     elif isinstance(obj, Quantity):
-        vars_units = {"<array>": array_extract_units(obj)}
+        vars_units = {None: array_extract_units(obj)}
 
         units = {**vars_units}
     else:
@@ -203,21 +205,25 @@ def attach_units(obj, units):
 def convert_units(obj, to):
     if isinstance(obj, xr.Dataset):
         data_vars = {
-            name: convert_units(array, to) for name, array in obj.data_vars.items()
+            name: convert_units(array.variable, {None: to.get(name)})
+            for name, array in obj.data_vars.items()
+        }
+        coords = {
+            name: convert_units(array.variable, {None: to.get(name)})
+            for name, array in obj.coords.items()
         }
-        coords = {name: convert_units(array, to) for name, array in obj.coords.items()}
 
         new_obj = xr.Dataset(data_vars=data_vars, coords=coords, attrs=obj.attrs)
     elif isinstance(obj, xr.DataArray):
         name = obj.name
 
         new_units = (
-            to.get(name, None) or to.get("data", None) or to.get(None, None) or 1
+            to.get(name, None) or to.get("data", None) or to.get(None, None) or None
         )
-        data = convert_units(obj.data, {None: new_units})
+        data = convert_units(obj.variable, {None: new_units})
 
         coords = {
-            name: (array.dims, convert_units(array.data, to))
+            name: (array.dims, convert_units(array.variable, {None: to.get(name)}))
             for name, array in obj.coords.items()
             if name != obj.name
         }
@@ -225,6 +231,9 @@ def convert_units(obj, to):
         new_obj = xr.DataArray(
             name=name, data=data, coords=coords, attrs=obj.attrs, dims=obj.dims
         )
+    elif isinstance(obj, xr.Variable):
+        new_data = convert_units(obj.data, to)
+        new_obj = obj.copy(data=new_data)
     elif isinstance(obj, unit_registry.Quantity):
         units = to.get(None)
         new_obj = obj.to(units) if units is not None else obj
@@ -344,14 +353,34 @@ def test_apply_ufunc_dataarray(dtype):
     data_array = xr.DataArray(data=array, dims="x", coords={"x": x})
 
     expected = attach_units(func(strip_units(data_array)), extract_units(data_array))
-    result = func(data_array)
+    actual = func(data_array)
 
-    assert_equal_with_units(expected, result)
+    assert_equal_with_units(expected, actual)
 
 
-@pytest.mark.xfail(
-    reason="pint does not implement `np.result_type` and align strips units"
-)
+def test_apply_ufunc_dataset(dtype):
+    func = function(
+        xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1}
+    )
+
+    array1 = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
+    array2 = np.linspace(0, 10, 5).astype(dtype) * unit_registry.m
+
+    x = np.arange(5) * unit_registry.s
+    y = np.arange(10) * unit_registry.m
+
+    ds = xr.Dataset(
+        data_vars={"a": (("x", "y"), array1), "b": ("x", array2)},
+        coords={"x": x, "y": y},
+    )
+
+    expected = attach_units(func(strip_units(ds)), extract_units(ds))
+    actual = func(ds)
+
+    assert_equal_with_units(expected, actual)
+
+
+@pytest.mark.xfail(reason="blocked by `reindex` / `where`")
 @pytest.mark.parametrize(
     "unit,error",
     (
@@ -378,9 +407,9 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype):
     original_unit = unit_registry.m
 
     variants = {
-        "data": (unit, original_unit, original_unit),
-        "dims": (original_unit, unit, original_unit),
-        "coords": (original_unit, original_unit, unit),
+        "data": (unit, 1, 1),
+        "dims": (original_unit, unit, 1),
+        "coords": (original_unit, 1, unit),
     }
     data_unit, dim_unit, coord_unit = variants.get(variant)
 
@@ -410,32 +439,27 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype):
 
     stripped_kwargs = {
         key: strip_units(
-            convert_units(value, {None: original_unit})
-            if isinstance(value, unit_registry.Quantity)
-            else value
+            convert_units(value, {None: original_unit if data_unit != 1 else None})
         )
         for key, value in func.kwargs.items()
     }
-    units = extract_units(data_array1)
-    # FIXME: should the expected_b have the same units as data_array1
-    # or data_array2?
-    expected_a, expected_b = tuple(
-        attach_units(elem, units)
-        for elem in func(
-            strip_units(data_array1),
-            strip_units(convert_units(data_array2, units)),
-            **stripped_kwargs,
-        )
+    units_a = extract_units(data_array1)
+    units_b = extract_units(data_array2)
+    expected_a, expected_b = func(
+        strip_units(data_array1),
+        strip_units(convert_units(data_array2, units_a)),
+        **stripped_kwargs,
     )
-    result_a, result_b = func(data_array1, data_array2)
+    expected_a = attach_units(expected_a, units_a)
+    expected_b = convert_units(attach_units(expected_b, units_a), units_b)
 
-    assert_equal_with_units(expected_a, result_a)
-    assert_equal_with_units(expected_b, result_b)
+    actual_a, actual_b = func(data_array1, data_array2)
 
+    assert_equal_with_units(expected_a, actual_a)
+    assert_equal_with_units(expected_b, actual_b)
 
-@pytest.mark.xfail(
-    reason="pint does not implement `np.result_type` and align strips units"
-)
+
+@pytest.mark.xfail(reason="blocked by `reindex` / `where`")
 @pytest.mark.parametrize(
     "unit,error",
     (
@@ -461,11 +485,7 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype):
 def test_align_dataset(fill_value, unit, variant, error, dtype):
     original_unit = unit_registry.m
 
-    variants = {
-        "data": (unit, original_unit, original_unit),
-        "dims": (original_unit, unit, original_unit),
-        "coords": (original_unit, original_unit, unit),
-    }
+    variants = {"data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit)}
     data_unit, dim_unit, coord_unit = variants.get(variant)
 
     array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit
@@ -497,24 +517,22 @@ def test_align_dataset(fill_value, unit, variant, error, dtype):
 
     stripped_kwargs = {
         key: strip_units(
-            convert_units(value, {None: original_unit})
-            if isinstance(value, unit_registry.Quantity)
-            else value
+            convert_units(value, {None: original_unit if data_unit != 1 else None})
         )
         for key, value in func.kwargs.items()
     }
-    units = extract_units(ds1)
-    # FIXME: should the expected_b have the same units as ds1 or ds2?
-    expected_a, expected_b = tuple(
-        attach_units(elem, units)
-        for elem in func(
-            strip_units(ds1), strip_units(convert_units(ds2, units)), **stripped_kwargs
-        )
+    units_a = extract_units(ds1)
+    units_b = extract_units(ds2)
+    expected_a, expected_b = func(
+        strip_units(ds1), strip_units(convert_units(ds2, units_a)), **stripped_kwargs
     )
-    result_a, result_b = func(ds1, ds2)
+    expected_a = attach_units(expected_a, units_a)
+    expected_b = convert_units(attach_units(expected_b, units_a), units_b)
 
-    assert_equal_with_units(expected_a, result_a)
-    assert_equal_with_units(expected_b, result_b)
+    actual_a, actual_b = func(ds1, ds2)
+
+    assert_equal_with_units(expected_a, actual_a)
+    assert_equal_with_units(expected_b, actual_b)
 
 
 def test_broadcast_dataarray(dtype):
@@ -528,10 +546,10 @@ def test_broadcast_dataarray(dtype):
         attach_units(elem, extract_units(a))
         for elem in xr.broadcast(strip_units(a), strip_units(b))
     )
-    result_a, result_b = xr.broadcast(a, b)
+    actual_a, actual_b = xr.broadcast(a, b)
 
-    assert_equal_with_units(expected_a, result_a)
-    assert_equal_with_units(expected_b, result_b)
+    assert_equal_with_units(expected_a, actual_a)
+    assert_equal_with_units(expected_b, actual_b)
 
 
 def test_broadcast_dataset(dtype):
@@ -543,12 +561,11 @@ def test_broadcast_dataset(dtype):
     (expected,) = tuple(
         attach_units(elem, extract_units(ds)) for elem in xr.broadcast(strip_units(ds))
     )
-    (result,) = xr.broadcast(ds)
+    (actual,) = xr.broadcast(ds)
 
-    assert_equal_with_units(expected, result)
+    assert_equal_with_units(expected, actual)
 
 
-@pytest.mark.xfail(reason="`combine_by_coords` strips units")
 @pytest.mark.parametrize(
     "unit,error",
     (
@@ -614,12 +631,11 @@ def test_combine_by_coords(variant, unit, error, dtype):
         ),
         units,
     )
-    result = xr.combine_by_coords([ds, other])
+    actual = xr.combine_by_coords([ds, other])
 
-    assert_equal_with_units(expected, result)
+    assert_equal_with_units(expected, actual)
 
 
-@pytest.mark.xfail(reason="blocked by `where`")
 @pytest.mark.parametrize(
     "unit,error",
     (
@@ -628,7 +644,12 @@ def test_combine_by_coords(variant, unit, error, dtype):
             unit_registry.dimensionless, DimensionalityError, id="dimensionless"
         ),
         pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
-        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(
+            unit_registry.mm,
+            None,
+            id="compatible_unit",
+            marks=pytest.mark.xfail(reason="wrong order of arguments to `where`"),
+        ),
         pytest.param(unit_registry.m, None, id="identical_unit"),
     ),
     ids=repr,
@@ -714,12 +735,11 @@ def test_combine_nested(variant, unit, error, dtype):
         ),
         units,
     )
-    result = func([[ds1, ds2], [ds3, ds4]])
+    actual = func([[ds1, ds2], [ds3, ds4]])
 
-    assert_equal_with_units(expected, result)
+    assert_equal_with_units(expected, actual)
 
 
-@pytest.mark.xfail(reason="`concat` strips units")
 @pytest.mark.parametrize(
     "unit,error",
     (
@@ -760,15 +780,18 @@ def test_concat_dataarray(variant, unit, error, dtype):
 
         return
 
+    units = extract_units(arr1)
     expected = attach_units(
-        xr.concat([strip_units(arr1), strip_units(arr2)], dim="x"), extract_units(arr1)
+        xr.concat(
+            [strip_units(arr1), strip_units(convert_units(arr2, units))], dim="x"
+        ),
+        units,
     )
-    result = xr.concat([arr1, arr2], dim="x")
+    actual = xr.concat([arr1, arr2], dim="x")
 
-    assert_equal_with_units(expected, result)
+    assert_equal_with_units(expected, actual)
 
 
-@pytest.mark.xfail(reason="`concat` strips units")
 @pytest.mark.parametrize(
     "unit,error",
     (
@@ -809,15 +832,17 @@ def test_concat_dataset(variant, unit, error, dtype):
 
         return
 
+    units = extract_units(ds1)
     expected = attach_units(
-        xr.concat([strip_units(ds1), strip_units(ds2)], dim="x"), extract_units(ds1)
+        xr.concat([strip_units(ds1), strip_units(convert_units(ds2, units))], dim="x"),
+        units,
     )
-    result = xr.concat([ds1, ds2], dim="x")
+    actual = xr.concat([ds1, ds2], dim="x")
 
-    assert_equal_with_units(expected, result)
+    assert_equal_with_units(expected, actual)
 
 
-@pytest.mark.xfail(reason="blocked by `where`")
+@pytest.mark.xfail(reason="blocked by `reindex` / `where`")
 @pytest.mark.parametrize(
     "unit,error",
     (
@@ -902,12 +927,12 @@ def test_merge_dataarray(variant, unit, error, dtype):
         func([strip_units(arr1), convert_and_strip(arr2), convert_and_strip(arr3)]),
         units,
     )
-    result = func([arr1, arr2, arr3])
+    actual = func([arr1, arr2, arr3])
 
-    assert_equal_with_units(expected, result)
+    assert_equal_with_units(expected, actual)
 
 
-@pytest.mark.xfail(reason="blocked by `where`")
+@pytest.mark.xfail(reason="blocked by `reindex` / `where`")
 @pytest.mark.parametrize(
     "unit,error",
     (
@@ -985,9 +1010,9 @@ def test_merge_dataset(variant, unit, error, dtype):
     expected = attach_units(
         func([strip_units(ds1), convert_and_strip(ds2), convert_and_strip(ds3)]), units
     )
-    result = func([ds1, ds2, ds3])
+    actual = func([ds1, ds2, ds3])
 
-    assert_equal_with_units(expected, result)
+    assert_equal_with_units(expected, actual)
 
 
 @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like))
@@ -997,9 +1022,9 @@ def test_replication_dataarray(func, dtype):
 
     numpy_func = getattr(np, func.__name__)
     expected = xr.DataArray(data=numpy_func(array), dims="x")
-    result = func(data_array)
+    actual = func(data_array)
 
-    assert_equal_with_units(expected, result)
+    assert_equal_with_units(expected, actual)
 
 
 @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like))
@@ -1019,9 +1044,9 @@ def test_replication_dataset(func, dtype):
     expected = ds.copy(
         data={name: numpy_func(array.data) for name, array in ds.data_vars.items()}
     )
-    result = func(ds)
+    actual = func(ds)
 
-    assert_equal_with_units(expected, result)
+    assert_equal_with_units(expected, actual)
 
 
 @pytest.mark.xfail(
@@ -1051,11 +1076,16 @@ def test_replication_full_like_dataarray(unit, error, dtype):
     if error is not None:
         with pytest.raises(error):
             xr.full_like(data_array, fill_value=fill_value)
-    else:
-        result = xr.full_like(data_array, fill_value=fill_value)
-        expected = np.full_like(array, fill_value=fill_value)
 
-        assert_equal_with_units(expected, result)
+        return
+
+    units = {**extract_units(data_array), **{None: unit if unit != 1 else None}}
+    expected = attach_units(
+        xr.full_like(strip_units(data_array), fill_value=strip_units(fill_value)), units
+    )
+    actual = xr.full_like(data_array, fill_value=fill_value)
+
+    assert_equal_with_units(expected, actual)
 
 
 @pytest.mark.xfail(
@@ -1096,18 +1126,18 @@ def test_replication_full_like_dataset(unit, error, dtype):
 
         return
 
-    expected = ds.copy(
-        data={
-            name: np.full_like(array, fill_value=fill_value)
-            for name, array in ds.data_vars.items()
-        }
+    units = {
+        **extract_units(ds),
+        **{name: unit if unit != 1 else None for name in ds.data_vars},
+    }
+    expected = attach_units(
+        xr.full_like(strip_units(ds), fill_value=strip_units(fill_value)), units
     )
-    result = xr.full_like(ds, fill_value=fill_value)
+    actual = xr.full_like(ds, fill_value=fill_value)
 
-    assert_equal_with_units(expected, result)
+    assert_equal_with_units(expected, actual)
 
 
-@pytest.mark.xfail(reason="`where` strips units")
 @pytest.mark.parametrize(
     "unit,error",
     (
@@ -1127,30 +1157,29 @@ def test_where_dataarray(fill_value, unit, error, dtype):
 
     x = xr.DataArray(data=array, dims="x")
     cond = x < 5 * unit_registry.m
-    # FIXME: this should work without wrapping in array()
-    fill_value = np.array(fill_value) * unit
+    fill_value = fill_value * unit
 
-    if error is not None:
+    if error is not None and not (
+        np.isnan(fill_value) and not isinstance(fill_value, Quantity)
+    ):
         with pytest.raises(error):
             xr.where(cond, x, fill_value)
 
         return
 
-    fill_value_ = (
-        fill_value.to(unit_registry.m)
-        if isinstance(fill_value, unit_registry.Quantity)
-        and fill_value.check(unit_registry.m)
-        else fill_value
-    )
     expected = attach_units(
-        xr.where(cond, strip_units(x), strip_units(fill_value_)), extract_units(x)
+        xr.where(
+            cond,
+            strip_units(x),
+            strip_units(convert_units(fill_value, {None: unit_registry.m})),
+        ),
+        extract_units(x),
     )
-    result = xr.where(cond, x, fill_value)
+    actual = xr.where(cond, x, fill_value)
 
-    assert_equal_with_units(expected, result)
+    assert_equal_with_units(expected, actual)
 
 
-@pytest.mark.xfail(reason="`where` strips units")
 @pytest.mark.parametrize(
     "unit,error",
     (
@@ -1171,31 +1200,30 @@ def test_where_dataset(fill_value, unit, error, dtype):
     x = np.arange(10) * unit_registry.s
 
     ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("x", array2)}, coords={"x": x})
-    cond = ds.x < 5 * unit_registry.s
-    # FIXME: this should work without wrapping in array()
-    fill_value = np.array(fill_value) * unit
+    cond = x < 5 * unit_registry.s
+    fill_value = fill_value * unit
 
-    if error is not None:
+    if error is not None and not (
+        np.isnan(fill_value) and not isinstance(fill_value, Quantity)
+    ):
         with pytest.raises(error):
             xr.where(cond, ds, fill_value)
 
         return
 
-    fill_value_ = (
-        fill_value.to(unit_registry.m)
-        if isinstance(fill_value, unit_registry.Quantity)
-        and fill_value.check(unit_registry.m)
-        else fill_value
-    )
     expected = attach_units(
-        xr.where(cond, strip_units(ds), strip_units(fill_value_)), extract_units(ds)
+        xr.where(
+            cond,
+            strip_units(ds),
+            strip_units(convert_units(fill_value, {None: unit_registry.m})),
+        ),
+        extract_units(ds),
     )
-    result = xr.where(cond, ds, fill_value)
+    actual = xr.where(cond, ds, fill_value)
 
-    assert_equal_with_units(expected, result)
+    assert_equal_with_units(expected, actual)
 
 
-@pytest.mark.xfail(reason="pint does not implement `np.einsum`")
 def test_dot_dataarray(dtype):
     array1 = (
         np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype)
@@ -1206,13 +1234,15 @@ def test_dot_dataarray(dtype):
         np.linspace(10, 20, 10 * 20).reshape(10, 20).astype(dtype) * unit_registry.s
     )
 
-    arr1 = xr.DataArray(data=array1, dims=("x", "y"))
-    arr2 = xr.DataArray(data=array2, dims=("y", "z"))
+    data_array = xr.DataArray(data=array1, dims=("x", "y"))
+    other = xr.DataArray(data=array2, dims=("y", "z"))
 
-    expected = array1.dot(array2)
-    result = xr.dot(arr1, arr2)
+    expected = attach_units(
+        xr.dot(strip_units(data_array), strip_units(other)), {None: unit_registry.m}
+    )
+    actual = xr.dot(data_array, other)
 
-    assert_equal_with_units(expected, result)
+    assert_equal_with_units(expected, actual)
 
 
 class TestDataArray:
@@ -1295,37 +1325,20 @@ def test_repr(self, func, variant, dtype):
                 function("any"),
                 marks=pytest.mark.xfail(reason="not implemented by pint yet"),
             ),
-            pytest.param(
-                function("argmax"),
-                marks=pytest.mark.xfail(
-                    reason="comparison of quantity with ndarrays in nanops not implemented"
-                ),
-            ),
-            pytest.param(
-                function("argmin"),
-                marks=pytest.mark.xfail(
-                    reason="comparison of quantity with ndarrays in nanops not implemented"
-                ),
-            ),
+            function("argmax"),
+            function("argmin"),
             function("max"),
             function("mean"),
             pytest.param(
                 function("median"),
-                marks=pytest.mark.xfail(
-                    reason="np.median on DataArray strips the units"
-                ),
+                marks=pytest.mark.xfail(reason="not implemented by xarray"),
             ),
             function("min"),
             pytest.param(
                 function("prod"),
                 marks=pytest.mark.xfail(reason="not implemented by pint yet"),
             ),
-            pytest.param(
-                function("sum"),
-                marks=pytest.mark.xfail(
-                    reason="comparison of quantity with ndarrays in nanops not implemented"
-                ),
-            ),
+            function("sum"),
             function("std"),
             function("var"),
             function("cumsum"),
@@ -1341,18 +1354,8 @@ def test_repr(self, func, variant, dtype):
                 method("any"),
                 marks=pytest.mark.xfail(reason="not implemented by pint yet"),
             ),
-            pytest.param(
-                method("argmax"),
-                marks=pytest.mark.xfail(
-                    reason="comparison of quantities with ndarrays in nanops not implemented"
-                ),
-            ),
-            pytest.param(
-                method("argmin"),
-                marks=pytest.mark.xfail(
-                    reason="comparison of quantities with ndarrays in nanops not implemented"
-                ),
-            ),
+            method("argmax"),
+            method("argmin"),
             method("max"),
             method("mean"),
             method("median"),
@@ -1363,12 +1366,7 @@ def test_repr(self, func, variant, dtype):
                     reason="comparison of quantity with ndarrays in nanops not implemented"
                 ),
             ),
-            pytest.param(
-                method("sum"),
-                marks=pytest.mark.xfail(
-                    reason="comparison of quantity with ndarrays in nanops not implemented"
-                ),
-            ),
+            method("sum"),
             method("std"),
             method("var"),
             method("cumsum"),
@@ -1380,34 +1378,36 @@ def test_repr(self, func, variant, dtype):
         ids=repr,
     )
     def test_aggregation(self, func, dtype):
-        array = np.arange(10).astype(dtype) * unit_registry.m
-        data_array = xr.DataArray(data=array)
+        array = np.arange(10).astype(dtype) * (
+            unit_registry.m if func.name != "cumprod" else unit_registry.dimensionless
+        )
+        data_array = xr.DataArray(data=array, dims="x")
 
-        expected = xr.DataArray(data=func(array))
-        result = func(data_array)
+        # units differ based on the applied function, so we need to
+        # first compute the units
+        units = extract_units(func(array))
+        expected = attach_units(func(strip_units(data_array)), units)
+        actual = func(data_array)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
         (
             pytest.param(operator.neg, id="negate"),
             pytest.param(abs, id="absolute"),
-            pytest.param(
-                np.round,
-                id="round",
-                marks=pytest.mark.xfail(reason="pint does not implement round"),
-            ),
+            pytest.param(np.round, id="round"),
         ),
     )
     def test_unary_operations(self, func, dtype):
         array = np.arange(10).astype(dtype) * unit_registry.m
         data_array = xr.DataArray(data=array)
 
-        expected = xr.DataArray(data=func(array))
-        result = func(data_array)
+        units = extract_units(func(array))
+        expected = attach_units(func(strip_units(data_array)), units)
+        actual = func(data_array)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
@@ -1415,23 +1415,18 @@ def test_unary_operations(self, func, dtype):
             pytest.param(lambda x: 2 * x, id="multiply"),
             pytest.param(lambda x: x + x, id="add"),
             pytest.param(lambda x: x[0] + x, id="add scalar"),
-            pytest.param(
-                lambda x: x.T @ x,
-                id="matrix multiply",
-                marks=pytest.mark.xfail(
-                    reason="pint does not support matrix multiplication yet"
-                ),
-            ),
+            pytest.param(lambda x: x.T @ x, id="matrix multiply"),
         ),
     )
     def test_binary_operations(self, func, dtype):
         array = np.arange(10).astype(dtype) * unit_registry.m
         data_array = xr.DataArray(data=array)
 
-        expected = xr.DataArray(data=func(array))
-        result = func(data_array)
+        units = extract_units(func(array))
+        expected = attach_units(func(strip_units(data_array)), units)
+        actual = func(data_array)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "comparison",
@@ -1448,8 +1443,9 @@ def test_binary_operations(self, func, dtype):
             pytest.param(
                 unit_registry.dimensionless, DimensionalityError, id="dimensionless"
             ),
-            pytest.param(unit_registry.s, DimensionalityError, id="incorrect_unit"),
-            pytest.param(unit_registry.m, None, id="correct_unit"),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+            pytest.param(unit_registry.mm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
     def test_comparison_operations(self, comparison, unit, error, dtype):
@@ -1469,48 +1465,85 @@ def test_comparison_operations(self, comparison, unit, error, dtype):
 
             with pytest.raises(error):
                 comparison(data_array, to_compare_with)
-        else:
-            result = comparison(data_array, to_compare_with)
-            # pint compares incompatible arrays to False, so we need to extend
-            # the multiplication works for both scalar and array results
-            expected = xr.DataArray(
-                data=comparison(array, to_compare_with)
-                * np.ones_like(array, dtype=bool)
-            )
 
-            assert_equal_with_units(expected, result)
+            return
+
+        actual = comparison(data_array, to_compare_with)
+
+        expected_units = {None: unit_registry.m if array.check(unit) else None}
+        expected = array.check(unit) & comparison(
+            strip_units(data_array),
+            strip_units(convert_units(to_compare_with, expected_units)),
+        )
+
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "units,error",
         (
             pytest.param(unit_registry.dimensionless, None, id="dimensionless"),
-            pytest.param(unit_registry.m, DimensionalityError, id="incorrect unit"),
-            pytest.param(unit_registry.degree, None, id="correct unit"),
+            pytest.param(unit_registry.m, DimensionalityError, id="incompatible_unit"),
+            pytest.param(unit_registry.degree, None, id="compatible_unit"),
         ),
     )
     def test_univariate_ufunc(self, units, error, dtype):
         array = np.arange(10).astype(dtype) * units
         data_array = xr.DataArray(data=array)
 
+        func = function("sin")
+
         if error is not None:
             with pytest.raises(error):
                 np.sin(data_array)
-        else:
-            expected = xr.DataArray(data=np.sin(array))
-            result = np.sin(data_array)
 
-            assert_equal_with_units(expected, result)
+            return
+
+        expected = attach_units(
+            func(strip_units(convert_units(data_array, {None: unit_registry.radians}))),
+            {None: unit_registry.dimensionless},
+        )
+        actual = func(data_array)
+
+        assert_equal_with_units(expected, actual)
 
-    @pytest.mark.xfail(reason="pint's implementation of `np.maximum` strips units")
-    def test_bivariate_ufunc(self, dtype):
-        unit = unit_registry.m
-        array = np.arange(10).astype(dtype) * unit
+    @pytest.mark.xfail(reason="xarray's `np.maximum` strips units")
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, DimensionalityError, id="without_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+            pytest.param(unit_registry.mm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
+    def test_bivariate_ufunc(self, unit, error, dtype):
+        original_unit = unit_registry.m
+        array = np.arange(10).astype(dtype) * original_unit
         data_array = xr.DataArray(data=array)
 
-        expected = xr.DataArray(np.maximum(array, 0 * unit))
+        if error is not None:
+            with pytest.raises(error):
+                np.maximum(data_array, 0 * unit)
+
+            return
+
+        expected_units = {None: original_unit}
+        expected = attach_units(
+            np.maximum(
+                strip_units(data_array),
+                strip_units(convert_units(0 * unit, expected_units)),
+            ),
+            expected_units,
+        )
+
+        actual = np.maximum(data_array, 0 * unit)
+        assert_equal_with_units(expected, actual)
 
-        assert_equal_with_units(expected, np.maximum(data_array, 0 * unit))
-        assert_equal_with_units(expected, np.maximum(0 * unit, data_array))
+        actual = np.maximum(0 * unit, data_array)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize("property", ("T", "imag", "real"))
     def test_numpy_properties(self, property, dtype):
@@ -1518,41 +1551,43 @@ def test_numpy_properties(self, property, dtype):
             np.arange(5 * 10).astype(dtype)
             + 1j * np.linspace(-1, 0, 5 * 10).astype(dtype)
         ).reshape(5, 10) * unit_registry.s
+
         data_array = xr.DataArray(data=array, dims=("x", "y"))
 
-        expected = xr.DataArray(
-            data=getattr(array, property),
-            dims=("x", "y")[:: 1 if property != "T" else -1],
+        expected = attach_units(
+            getattr(strip_units(data_array), property), extract_units(data_array)
         )
-        result = getattr(data_array, property)
+        actual = getattr(data_array, property)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
-        (
-            method("conj"),
-            method("argsort"),
-            method("conjugate"),
-            method("round"),
-            pytest.param(
-                method("rank", dim="x"),
-                marks=pytest.mark.xfail(reason="pint does not implement rank yet"),
-            ),
-        ),
+        (method("conj"), method("argsort"), method("conjugate"), method("round")),
         ids=repr,
     )
     def test_numpy_methods(self, func, dtype):
         array = np.arange(10).astype(dtype) * unit_registry.m
         data_array = xr.DataArray(data=array, dims="x")
 
-        expected = xr.DataArray(func(array), dims="x")
-        result = func(data_array)
+        units = extract_units(func(array))
+        expected = attach_units(strip_units(data_array), units)
+        actual = func(data_array)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
-        "func", (method("clip", min=3, max=8), method("searchsorted", v=5)), ids=repr
+        "func",
+        (
+            method("clip", min=3, max=8),
+            pytest.param(
+                method("searchsorted", v=5),
+                marks=pytest.mark.xfail(
+                    reason="searchsorted somehow requires a undocumented `keys` argument"
+                ),
+            ),
+        ),
+        ids=repr,
     )
     @pytest.mark.parametrize(
         "unit,error",
@@ -1575,20 +1610,24 @@ def test_numpy_methods_with_args(self, func, unit, error, dtype):
             key: (value * unit if isinstance(value, scalar_types) else value)
             for key, value in func.kwargs.items()
         }
-
         if error is not None:
             with pytest.raises(error):
                 func(data_array, **kwargs)
-        else:
-            expected = func(array, **kwargs)
-            if func.name not in ["searchsorted"]:
-                expected = xr.DataArray(data=expected)
-            result = func(data_array, **kwargs)
 
-            if func.name in ["searchsorted"]:
-                assert np.allclose(expected, result)
-            else:
-                assert_equal_with_units(expected, result)
+            return
+
+        units = extract_units(data_array)
+        expected_units = extract_units(func(array, **kwargs))
+        stripped_kwargs = {
+            key: strip_units(convert_units(value, units))
+            for key, value in kwargs.items()
+        }
+        expected = attach_units(
+            func(strip_units(data_array), **stripped_kwargs), expected_units
+        )
+        actual = func(data_array, **kwargs)
+
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "func", (method("isnull"), method("notnull"), method("count")), ids=repr
@@ -1611,9 +1650,9 @@ def test_missing_value_detection(self, func, dtype):
         data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
 
         expected = func(strip_units(data_array))
-        result = func(data_array)
+        actual = func(data_array)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.xfail(reason="ffill and bfill lose units in data")
     @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr)
@@ -1623,48 +1662,67 @@ def test_missing_value_filling(self, func, dtype):
             * unit_registry.degK
         )
         x = np.arange(len(array))
-        data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"])
-
-        result_without_units = func(strip_units(data_array), dim="x")
-        result = xr.DataArray(
-            data=result_without_units.data * unit_registry.degK,
-            coords={"x": x},
-            dims=["x"],
-        )
+        data_array = xr.DataArray(data=array, coords={"x": x}, dims="x")
 
         expected = attach_units(
-            func(strip_units(data_array), dim="x"), {"data": unit_registry.degK}
+            func(strip_units(data_array), dim="x"), extract_units(data_array)
         )
-        result = func(data_array, dim="x")
+        actual = func(data_array, dim="x")
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
-    @pytest.mark.xfail(reason="fillna drops the unit")
     @pytest.mark.parametrize(
-        "fill_value",
+        "unit,error",
         (
+            pytest.param(1, DimensionalityError, id="no_unit"),
             pytest.param(
-                -1,
-                id="python scalar",
-                marks=pytest.mark.xfail(
-                    reason="python scalar cannot be converted using astype()"
-                ),
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+            pytest.param(
+                unit_registry.cm,
+                None,
+                id="compatible_unit",
+                marks=pytest.mark.xfail(reason="fillna converts to value's unit"),
             ),
-            pytest.param(np.array(-1), id="numpy scalar"),
-            pytest.param(np.array([-1]), id="numpy array"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
+    @pytest.mark.parametrize(
+        "fill_value",
+        (
+            pytest.param(-1, id="python_scalar"),
+            pytest.param(np.array(-1), id="numpy_scalar"),
+            pytest.param(np.array([-1]), id="numpy_array"),
         ),
     )
-    def test_fillna(self, fill_value, dtype):
-        unit = unit_registry.m
-        array = np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) * unit
+    def test_fillna(self, fill_value, unit, error, dtype):
+        original_unit = unit_registry.m
+        array = (
+            np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype)
+            * original_unit
+        )
         data_array = xr.DataArray(data=array)
 
+        func = method("fillna")
+
+        value = fill_value * unit
+        if error is not None:
+            with pytest.raises(error):
+                func(data_array, value=value)
+
+            return
+
+        units = extract_units(data_array)
         expected = attach_units(
-            strip_units(data_array).fillna(value=fill_value), {"data": unit}
+            func(
+                strip_units(data_array), value=strip_units(convert_units(value, units))
+            ),
+            units,
         )
-        result = data_array.fillna(value=fill_value * unit)
+        actual = func(data_array, value=value)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     def test_dropna(self, dtype):
         array = (
@@ -1674,22 +1732,26 @@ def test_dropna(self, dtype):
         x = np.arange(len(array))
         data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"])
 
-        expected = attach_units(
-            strip_units(data_array).dropna(dim="x"), {"data": unit_registry.m}
-        )
-        result = data_array.dropna(dim="x")
+        units = extract_units(data_array)
+        expected = attach_units(strip_units(data_array).dropna(dim="x"), units)
+        actual = data_array.dropna(dim="x")
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
-    @pytest.mark.xfail(reason="pint does not implement `numpy.isin`")
     @pytest.mark.parametrize(
         "unit",
         (
-            pytest.param(1, id="no_unit"),
+            pytest.param(
+                1,
+                id="no_unit",
+                marks=pytest.mark.xfail(
+                    reason="pint's isin implementation does not work well with mixed args"
+                ),
+            ),
             pytest.param(unit_registry.dimensionless, id="dimensionless"),
             pytest.param(unit_registry.s, id="incompatible_unit"),
             pytest.param(unit_registry.cm, id="compatible_unit"),
-            pytest.param(unit_registry.m, id="same_unit"),
+            pytest.param(unit_registry.m, id="identical_unit"),
         ),
     )
     def test_isin(self, unit, dtype):
@@ -1702,33 +1764,26 @@ def test_isin(self, unit, dtype):
         raw_values = np.array([1.4, np.nan, 2.3]).astype(dtype)
         values = raw_values * unit
 
-        result_without_units = strip_units(data_array).isin(raw_values)
-        if unit != unit_registry.m:
-            result_without_units[:] = False
-        result_with_units = data_array.isin(values)
+        units = {None: unit_registry.m if array.check(unit) else None}
+        expected = strip_units(data_array).isin(
+            strip_units(convert_units(values, units))
+        ) & array.check(unit)
+        actual = data_array.isin(values)
 
-        assert_equal_with_units(result_without_units, result_with_units)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "variant",
         (
             pytest.param(
                 "masking",
-                marks=pytest.mark.xfail(reason="nan not compatible with quantity"),
-            ),
-            pytest.param(
-                "replacing_scalar",
-                marks=pytest.mark.xfail(reason="scalar not convertible using astype"),
-            ),
-            pytest.param(
-                "replacing_array",
-                marks=pytest.mark.xfail(
-                    reason="replacing using an array drops the units"
-                ),
+                marks=pytest.mark.xfail(reason="array(nan) is not a quantity"),
             ),
+            "replacing_scalar",
+            "replacing_array",
             pytest.param(
                 "dropping",
-                marks=pytest.mark.xfail(reason="nan not compatible with quantity"),
+                marks=pytest.mark.xfail(reason="array(nan) is not a quantity"),
             ),
         ),
     )
@@ -1741,13 +1796,10 @@ def test_isin(self, unit, dtype):
             ),
             pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
             pytest.param(unit_registry.cm, None, id="compatible_unit"),
-            pytest.param(unit_registry.m, None, id="same_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
     def test_where(self, variant, unit, error, dtype):
-        def _strip_units(mapping):
-            return {key: array_strip_units(value) for key, value in mapping.items()}
-
         original_unit = unit_registry.m
         array = np.linspace(0, 1, 10).astype(dtype) * original_unit
 
@@ -1762,19 +1814,28 @@ def _strip_units(mapping):
             "dropping": {"cond": condition, "drop": True},
         }
         kwargs = variant_kwargs.get(variant)
-        kwargs_without_units = _strip_units(kwargs)
+        kwargs_without_units = {
+            key: strip_units(
+                convert_units(
+                    value, {None: original_unit if array.check(unit) else None}
+                )
+            )
+            for key, value in kwargs.items()
+        }
 
         if variant not in ("masking", "dropping") and error is not None:
             with pytest.raises(error):
                 data_array.where(**kwargs)
-        else:
-            expected = attach_units(
-                strip_units(array).where(**kwargs_without_units),
-                {"data": original_unit},
-            )
-            result = data_array.where(**kwargs)
 
-            assert_equal_with_units(expected, result)
+            return
+
+        expected = attach_units(
+            strip_units(data_array).where(**kwargs_without_units),
+            extract_units(data_array),
+        )
+        actual = data_array.where(**kwargs)
+
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.xfail(reason="interpolate strips units")
     def test_interpolate_na(self, dtype):
@@ -1785,14 +1846,12 @@ def test_interpolate_na(self, dtype):
         x = np.arange(len(array))
         data_array = xr.DataArray(data=array, coords={"x": x}, dims="x").astype(dtype)
 
-        expected = attach_units(
-            strip_units(data_array).interpolate_na(dim="x"), {"data": unit_registry.m}
-        )
-        result = data_array.interpolate_na(dim="x")
+        units = extract_units(data_array)
+        expected = attach_units(strip_units(data_array).interpolate_na(dim="x"), units)
+        actual = data_array.interpolate_na(dim="x")
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
-    @pytest.mark.xfail(reason="uses DataArray.where, which currently fails")
     @pytest.mark.parametrize(
         "unit,error",
         (
@@ -1801,8 +1860,18 @@ def test_interpolate_na(self, dtype):
                 unit_registry.dimensionless, DimensionalityError, id="dimensionless"
             ),
             pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
-            pytest.param(unit_registry.cm, None, id="compatible_unit"),
-            pytest.param(unit_registry.m, None, id="identical_unit"),
+            pytest.param(
+                unit_registry.cm,
+                None,
+                id="compatible_unit",
+                marks=pytest.mark.xfail(reason="depends on reindex"),
+            ),
+            pytest.param(
+                unit_registry.m,
+                None,
+                id="identical_unit",
+                marks=pytest.mark.xfail(reason="depends on reindex"),
+            ),
         ),
     )
     def test_combine_first(self, unit, error, dtype):
@@ -1819,14 +1888,19 @@ def test_combine_first(self, unit, error, dtype):
         if error is not None:
             with pytest.raises(error):
                 data_array.combine_first(other)
-        else:
-            expected = attach_units(
-                strip_units(data_array).combine_first(strip_units(other)),
-                {"data": unit_registry.m},
-            )
-            result = data_array.combine_first(other)
 
-            assert_equal_with_units(expected, result)
+            return
+
+        units = extract_units(data_array)
+        expected = attach_units(
+            strip_units(data_array).combine_first(
+                strip_units(convert_units(other, units))
+            ),
+            units,
+        )
+        actual = data_array.combine_first(other)
+
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "unit",
@@ -1834,11 +1908,7 @@ def test_combine_first(self, unit, error, dtype):
             pytest.param(1, id="no_unit"),
             pytest.param(unit_registry.dimensionless, id="dimensionless"),
             pytest.param(unit_registry.s, id="incompatible_unit"),
-            pytest.param(
-                unit_registry.cm,
-                id="compatible_unit",
-                marks=pytest.mark.xfail(reason="identical does not check units yet"),
-            ),
+            pytest.param(unit_registry.cm, id="compatible_unit"),
             pytest.param(unit_registry.m, id="identical_unit"),
         ),
     )
@@ -1854,53 +1924,51 @@ def test_combine_first(self, unit, error, dtype):
     )
     @pytest.mark.parametrize("func", (method("equals"), method("identical")), ids=repr)
     def test_comparisons(self, func, variation, unit, dtype):
+        def is_compatible(a, b):
+            a = a if a is not None else 1
+            b = b if b is not None else 1
+            quantity = np.arange(5) * a
+
+            return a == b or quantity.check(b)
+
         data = np.linspace(0, 5, 10).astype(dtype)
         coord = np.arange(len(data)).astype(dtype)
 
         base_unit = unit_registry.m
-        quantity = data * base_unit
-        x = coord * base_unit
-        y = coord * base_unit
-
-        units = {
-            "data": (unit, base_unit, base_unit),
-            "dims": (base_unit, unit, base_unit),
-            "coords": (base_unit, base_unit, unit),
+        array = data * (base_unit if variation == "data" else 1)
+        x = coord * (base_unit if variation == "dims" else 1)
+        y = coord * (base_unit if variation == "coords" else 1)
+
+        variations = {
+            "data": (unit, 1, 1),
+            "dims": (1, unit, 1),
+            "coords": (1, 1, unit),
         }
-        data_unit, dim_unit, coord_unit = units.get(variation)
+        data_unit, dim_unit, coord_unit = variations.get(variation)
 
-        data_array = xr.DataArray(
-            data=quantity, coords={"x": x, "y": ("x", y)}, dims="x"
-        )
+        data_array = xr.DataArray(data=array, coords={"x": x, "y": ("x", y)}, dims="x")
 
         other = attach_units(
-            strip_units(data_array),
-            {
-                None: (data_unit, base_unit if quantity.check(data_unit) else None),
-                "x": (dim_unit, base_unit if x.check(dim_unit) else None),
-                "y": (coord_unit, base_unit if y.check(coord_unit) else None),
-            },
+            strip_units(data_array), {None: data_unit, "x": dim_unit, "y": coord_unit}
         )
 
-        # TODO: test dim coord once indexes leave units intact
-        # also, express this in terms of calls on the raw data array
-        # and then check the units
-        equal_arrays = (
-            np.all(quantity == other.data)
-            and (np.all(x == other.x.data) or True)  # dims can't be checked yet
-            and np.all(y == other.y.data)
-        )
-        equal_units = (
-            data_unit == unit_registry.m
-            and coord_unit == unit_registry.m
-            and dim_unit == unit_registry.m
+        units = extract_units(data_array)
+        other_units = extract_units(other)
+
+        equal_arrays = all(
+            is_compatible(units[name], other_units[name]) for name in units.keys()
+        ) and (
+            strip_units(data_array).equals(
+                strip_units(convert_units(other, extract_units(data_array)))
+            )
         )
+        equal_units = units == other_units
         expected = equal_arrays and (func.name != "identical" or equal_units)
-        result = func(data_array, other)
 
-        assert expected == result
+        actual = func(data_array, other)
+
+        assert expected == actual
 
-    @pytest.mark.xfail(reason="blocked by `where`")
     @pytest.mark.parametrize(
         "unit",
         (
@@ -1926,9 +1994,9 @@ def test_broadcast_like(self, unit, dtype):
         expected = attach_units(
             strip_units(arr1).broadcast_like(strip_units(arr2)), extract_units(arr1)
         )
-        result = arr1.broadcast_like(arr2)
+        actual = arr1.broadcast_like(arr2)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "unit",
@@ -1942,19 +2010,21 @@ def test_broadcast_like(self, unit, dtype):
     )
     def test_broadcast_equals(self, unit, dtype):
         left_array = np.ones(shape=(2, 2), dtype=dtype) * unit_registry.m
-        right_array = array_attach_units(
-            np.ones(shape=(2,), dtype=dtype),
-            unit,
-            convert_from=unit_registry.m if left_array.check(unit) else None,
-        )
+        right_array = np.ones(shape=(2,), dtype=dtype) * unit
 
         left = xr.DataArray(data=left_array, dims=("x", "y"))
         right = xr.DataArray(data=right_array, dims="x")
 
-        expected = np.all(left_array == right_array[:, None])
-        result = left.broadcast_equals(right)
+        units = {
+            **extract_units(left),
+            **({} if left_array.check(unit) else {None: None}),
+        }
+        expected = strip_units(left).broadcast_equals(
+            strip_units(convert_units(right, units))
+        ) & left_array.check(unit)
+        actual = left.broadcast_equals(right)
 
-        assert expected == result
+        assert expected == actual
 
     @pytest.mark.parametrize(
         "func",
@@ -1969,16 +2039,11 @@ def test_broadcast_equals(self, unit, dtype):
                 dim={"z": np.linspace(10, 20, 12) * unit_registry.s},
                 axis=1,
             ),
-            method("drop_sel", labels="x"),
+            method("drop_vars", "x"),
             method("reset_coords", names="x2"),
             method("copy"),
-            pytest.param(
-                method("astype", np.float32),
-                marks=pytest.mark.xfail(reason="units get stripped"),
-            ),
-            pytest.param(
-                method("item", 1), marks=pytest.mark.xfail(reason="units get stripped")
-            ),
+            method("astype", np.float32),
+            method("item", 1),
         ),
         ids=repr,
     )
@@ -2001,67 +2066,38 @@ def test_content_manipulation(self, func, dtype):
         stripped_kwargs = {
             key: array_strip_units(value) for key, value in func.kwargs.items()
         }
-        expected = attach_units(
-            func(strip_units(data_array), **stripped_kwargs),
-            {
-                "data": quantity.units,
-                "x": x.units,
-                "x_mm": x2.units,
-                "x2": x2.units,
-                "y": y.units,
-            },
-        )
-        result = func(data_array)
+        units = {**{"x_mm": x2.units, "x2": x2.units}, **extract_units(data_array)}
 
-        assert_equal_with_units(expected, result)
+        expected = attach_units(func(strip_units(data_array), **stripped_kwargs), units)
+        actual = func(data_array)
+
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
-        "func",
-        (
-            pytest.param(
-                method("drop_sel", labels=dict(x=np.array([1, 5]))),
-                marks=pytest.mark.xfail(
-                    reason="selecting using incompatible units does not raise"
-                ),
-            ),
-            pytest.param(method("copy", data=np.arange(20))),
-        ),
-        ids=repr,
+        "func", (pytest.param(method("copy", data=np.arange(20))),), ids=repr
     )
     @pytest.mark.parametrize(
-        "unit,error",
+        "unit",
         (
-            pytest.param(1, DimensionalityError, id="no_unit"),
-            pytest.param(
-                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
-            ),
-            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
-            pytest.param(unit_registry.cm, KeyError, id="compatible_unit"),
-            pytest.param(unit_registry.m, None, id="identical_unit"),
+            pytest.param(1, id="no_unit"),
+            pytest.param(unit_registry.dimensionless, id="dimensionless"),
+            pytest.param(unit_registry.degK, id="with_unit"),
         ),
     )
-    def test_content_manipulation_with_units(self, func, unit, error, dtype):
+    def test_content_manipulation_with_units(self, func, unit, dtype):
         quantity = np.linspace(0, 10, 20, dtype=dtype) * unit_registry.pascal
         x = np.arange(len(quantity)) * unit_registry.m
 
-        data_array = xr.DataArray(name="data", data=quantity, coords={"x": x}, dims="x")
+        data_array = xr.DataArray(data=quantity, coords={"x": x}, dims="x")
 
-        kwargs = {
-            key: (value * unit if isinstance(value, np.ndarray) else value)
-            for key, value in func.kwargs.items()
-        }
-        stripped_kwargs = func.kwargs
+        kwargs = {key: value * unit for key, value in func.kwargs.items()}
 
         expected = attach_units(
-            func(strip_units(data_array), **stripped_kwargs),
-            {"data": quantity.units if func.name == "drop_sel" else unit, "x": x.units},
+            func(strip_units(data_array)), {None: unit, "x": x.units}
         )
-        if error is not None and func.name == "drop_sel":
-            with pytest.raises(error):
-                func(data_array, **kwargs)
-        else:
-            result = func(data_array, **kwargs)
-            assert_equal_with_units(expected, result)
+
+        actual = func(data_array, **kwargs)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "indices",
@@ -2074,95 +2110,152 @@ def test_isel(self, indices, dtype):
         array = np.arange(10).astype(dtype) * unit_registry.s
         x = np.arange(len(array)) * unit_registry.m
 
-        data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"])
+        data_array = xr.DataArray(data=array, coords={"x": x}, dims="x")
 
         expected = attach_units(
-            strip_units(data_array).isel(x=indices),
-            {"data": unit_registry.s, "x": unit_registry.m},
+            strip_units(data_array).isel(x=indices), extract_units(data_array)
         )
-        result = data_array.isel(x=indices)
+        actual = data_array.isel(x=indices)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
-    @pytest.mark.xfail(
-        reason="xarray does not support duck arrays in dimension coordinates"
-    )
+    @pytest.mark.xfail(reason="indexes don't support units")
     @pytest.mark.parametrize(
-        "values",
+        "raw_values",
         (
-            pytest.param(12, id="single value"),
-            pytest.param([10, 5, 13], id="list of multiple values"),
-            pytest.param(np.array([9, 3, 7, 12]), id="array of multiple values"),
+            pytest.param(10, id="single_value"),
+            pytest.param([10, 5, 13], id="list_of_values"),
+            pytest.param(np.array([9, 3, 7, 12]), id="array_of_values"),
         ),
     )
     @pytest.mark.parametrize(
-        "units,error",
+        "unit,error",
         (
-            pytest.param(1, KeyError, id="no units"),
+            pytest.param(1, KeyError, id="no_units"),
             pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"),
-            pytest.param(unit_registry.degree, KeyError, id="incorrect unit"),
-            pytest.param(unit_registry.s, None, id="correct unit"),
+            pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"),
+            pytest.param(unit_registry.dm, KeyError, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
-    def test_sel(self, values, units, error, dtype):
+    def test_sel(self, raw_values, unit, error, dtype):
         array = np.linspace(5, 10, 20).astype(dtype) * unit_registry.m
-        x = np.arange(len(array)) * unit_registry.s
-        data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"])
+        x = np.arange(len(array)) * unit_registry.m
+        data_array = xr.DataArray(data=array, coords={"x": x}, dims="x")
 
-        values_with_units = values * units
+        values = raw_values * unit
 
-        if error is not None:
+        if error is not None and not (
+            isinstance(raw_values, (int, float)) and x.check(unit)
+        ):
             with pytest.raises(error):
-                data_array.sel(x=values_with_units)
-        else:
-            result_array = array[values]
-            result_data_array = data_array.sel(x=values_with_units)
-            assert_equal_with_units(result_array, result_data_array)
+                data_array.sel(x=values)
+
+            return
 
-    @pytest.mark.xfail(
-        reason="xarray does not support duck arrays in dimension coordinates"
+        expected = attach_units(
+            strip_units(data_array).sel(
+                x=strip_units(convert_units(values, {None: array.units}))
+            ),
+            extract_units(data_array),
+        )
+        actual = data_array.sel(x=values)
+        assert_equal_with_units(expected, actual)
+
+    @pytest.mark.xfail(reason="indexes don't support units")
+    @pytest.mark.parametrize(
+        "raw_values",
+        (
+            pytest.param(10, id="single_value"),
+            pytest.param([10, 5, 13], id="list_of_values"),
+            pytest.param(np.array([9, 3, 7, 12]), id="array_of_values"),
+        ),
+    )
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, KeyError, id="no_units"),
+            pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"),
+            pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"),
+            pytest.param(unit_registry.dm, KeyError, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
     )
+    def test_loc(self, raw_values, unit, error, dtype):
+        array = np.linspace(5, 10, 20).astype(dtype) * unit_registry.m
+        x = np.arange(len(array)) * unit_registry.m
+        data_array = xr.DataArray(data=array, coords={"x": x}, dims="x")
+
+        values = raw_values * unit
+
+        if error is not None and not (
+            isinstance(raw_values, (int, float)) and x.check(unit)
+        ):
+            with pytest.raises(error):
+                data_array.loc[{"x": values}]
+
+            return
+
+        expected = attach_units(
+            strip_units(data_array).loc[
+                {"x": strip_units(convert_units(values, {None: array.units}))}
+            ],
+            extract_units(data_array),
+        )
+        actual = data_array.loc[{"x": values}]
+        assert_equal_with_units(expected, actual)
+
+    @pytest.mark.xfail(reason="indexes don't support units")
     @pytest.mark.parametrize(
-        "values",
+        "raw_values",
         (
-            pytest.param(12, id="single value"),
-            pytest.param([10, 5, 13], id="list of multiple values"),
-            pytest.param(np.array([9, 3, 7, 12]), id="array of multiple values"),
+            pytest.param(10, id="single_value"),
+            pytest.param([10, 5, 13], id="list_of_values"),
+            pytest.param(np.array([9, 3, 7, 12]), id="array_of_values"),
         ),
     )
     @pytest.mark.parametrize(
-        "units,error",
+        "unit,error",
         (
-            pytest.param(1, KeyError, id="no units"),
+            pytest.param(1, KeyError, id="no_units"),
             pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"),
-            pytest.param(unit_registry.degree, KeyError, id="incorrect unit"),
-            pytest.param(unit_registry.s, None, id="correct unit"),
+            pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"),
+            pytest.param(unit_registry.dm, KeyError, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
-    def test_loc(self, values, units, error, dtype):
+    def test_drop_sel(self, raw_values, unit, error, dtype):
         array = np.linspace(5, 10, 20).astype(dtype) * unit_registry.m
-        x = np.arange(len(array)) * unit_registry.s
-        data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"])
+        x = np.arange(len(array)) * unit_registry.m
+        data_array = xr.DataArray(data=array, coords={"x": x}, dims="x")
 
-        values_with_units = values * units
+        values = raw_values * unit
 
-        if error is not None:
+        if error is not None and not (
+            isinstance(raw_values, (int, float)) and x.check(unit)
+        ):
             with pytest.raises(error):
-                data_array.loc[values_with_units]
-        else:
-            result_array = array[values]
-            result_data_array = data_array.loc[values_with_units]
-            assert_equal_with_units(result_array, result_data_array)
+                data_array.drop_sel(x=values)
+
+            return
+
+        expected = attach_units(
+            strip_units(data_array).drop_sel(
+                x=strip_units(convert_units(values, {None: x.units}))
+            ),
+            extract_units(data_array),
+        )
+        actual = data_array.drop_sel(x=values)
+        assert_equal_with_units(expected, actual)
 
-    @pytest.mark.xfail(reason="tries to coerce using asarray")
     @pytest.mark.parametrize(
         "shape",
         (
-            pytest.param((10, 20), id="nothing squeezable"),
-            pytest.param((10, 20, 1), id="last dimension squeezable"),
-            pytest.param((10, 1, 20), id="middle dimension squeezable"),
-            pytest.param((1, 10, 20), id="first dimension squeezable"),
-            pytest.param((1, 10, 1, 20), id="first and last dimension squeezable"),
+            pytest.param((10, 20), id="nothing_squeezable"),
+            pytest.param((10, 20, 1), id="last_dimension_squeezable"),
+            pytest.param((10, 1, 20), id="middle_dimension_squeezable"),
+            pytest.param((1, 10, 20), id="first_dimension_squeezable"),
+            pytest.param((1, 10, 1, 20), id="first_and_last_dimension_squeezable"),
         ),
     )
     def test_squeeze(self, shape, dtype):
@@ -2177,38 +2270,27 @@ def test_squeeze(self, shape, dtype):
             data=array, coords=coords, dims=tuple(names[: len(shape)])
         )
 
-        result_array = array.squeeze()
-        result_data_array = data_array.squeeze()
-        assert_equal_with_units(result_array, result_data_array)
+        expected = attach_units(
+            strip_units(data_array).squeeze(), extract_units(data_array)
+        )
+        actual = data_array.squeeze()
+        assert_equal_with_units(expected, actual)
 
         # try squeezing the dimensions separately
         names = tuple(dim for dim, coord in coords.items() if len(coord) == 1)
         for index, name in enumerate(names):
-            assert_equal_with_units(
-                np.squeeze(array, axis=index), data_array.squeeze(dim=name)
+            expected = attach_units(
+                strip_units(data_array).squeeze(dim=name), extract_units(data_array)
             )
+            actual = data_array.squeeze(dim=name)
+            assert_equal_with_units(expected, actual)
 
-    @pytest.mark.xfail(
-        reason="indexes strip units and head / tail / thin only support integers"
-    )
-    @pytest.mark.parametrize(
-        "unit,error",
-        (
-            pytest.param(1, DimensionalityError, id="no_unit"),
-            pytest.param(
-                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
-            ),
-            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
-            pytest.param(unit_registry.cm, None, id="compatible_unit"),
-            pytest.param(unit_registry.m, None, id="identical_unit"),
-        ),
-    )
     @pytest.mark.parametrize(
         "func",
         (method("head", x=7, y=3), method("tail", x=7, y=3), method("thin", x=7, y=3)),
         ids=repr,
     )
-    def test_head_tail_thin(self, func, unit, error, dtype):
+    def test_head_tail_thin(self, func, dtype):
         array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
 
         coords = {
@@ -2216,27 +2298,24 @@ def test_head_tail_thin(self, func, unit, error, dtype):
             "y": np.arange(5) * unit_registry.m,
         }
 
-        arr = xr.DataArray(data=array, coords=coords, dims=("x", "y"))
+        data_array = xr.DataArray(data=array, coords=coords, dims=("x", "y"))
 
-        kwargs = {name: value * unit for name, value in func.kwargs.items()}
-
-        if error is not None:
-            with pytest.raises(error):
-                func(arr, **kwargs)
-
-            return
-
-        expected = attach_units(func(strip_units(arr)), extract_units(arr))
-        result = func(arr, **kwargs)
+        expected = attach_units(
+            func(strip_units(data_array)), extract_units(data_array)
+        )
+        actual = func(data_array)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
+    @pytest.mark.xfail(reason="indexes don't support units")
     @pytest.mark.parametrize(
         "unit,error",
         (
-            pytest.param(1, None, id="no_unit"),
-            pytest.param(unit_registry.dimensionless, None, id="dimensionless"),
-            pytest.param(unit_registry.s, None, id="incompatible_unit"),
+            pytest.param(1, DimensionalityError, id="no_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
             pytest.param(unit_registry.cm, None, id="compatible_unit"),
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
@@ -2254,24 +2333,29 @@ def test_interp(self, unit, error):
         if error is not None:
             with pytest.raises(error):
                 data_array.interp(x=new_coords)
-        else:
-            new_coords_ = (
-                new_coords.magnitude if hasattr(new_coords, "magnitude") else new_coords
-            )
-            result_array = strip_units(data_array).interp(
-                x=new_coords_ * unit_registry.degK
-            )
-            result_data_array = data_array.interp(x=new_coords)
 
-            assert_equal_with_units(result_array, result_data_array)
+            return
+
+        units = extract_units(data_array)
+        expected = attach_units(
+            strip_units(data_array).interp(
+                x=strip_units(convert_units(new_coords, {None: unit_registry.m}))
+            ),
+            units,
+        )
+        actual = data_array.interp(x=new_coords)
+
+        assert_equal_with_units(expected, actual)
 
-    @pytest.mark.xfail(reason="tries to coerce using asarray")
+    @pytest.mark.xfail(reason="indexes strip units")
     @pytest.mark.parametrize(
         "unit,error",
         (
-            pytest.param(1, None, id="no_unit"),
-            pytest.param(unit_registry.dimensionless, None, id="dimensionless"),
-            pytest.param(unit_registry.s, None, id="incompatible_unit"),
+            pytest.param(1, DimensionalityError, id="no_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
             pytest.param(unit_registry.cm, None, id="compatible_unit"),
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
@@ -2284,43 +2368,46 @@ def test_interp_like(self, unit, error):
         }
 
         data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
-        new_data_array = xr.DataArray(
-            data=np.empty((20, 10)),
+        other = xr.DataArray(
+            data=np.empty((20, 10)) * unit_registry.degK,
             coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit},
             dims=("x", "y"),
         )
 
         if error is not None:
             with pytest.raises(error):
-                data_array.interp_like(new_data_array)
-        else:
-            result_array = (
-                xr.DataArray(
-                    data=array.magnitude,
-                    coords={name: value.magnitude for name, value in coords.items()},
-                    dims=("x", "y"),
-                ).interp_like(strip_units(new_data_array))
-                * unit_registry.degK
-            )
-            result_data_array = data_array.interp_like(new_data_array)
+                data_array.interp_like(other)
 
-            assert_equal_with_units(result_array, result_data_array)
+            return
 
-    @pytest.mark.xfail(
-        reason="pint does not implement np.result_type in __array_function__ yet"
-    )
+        units = extract_units(data_array)
+        expected = attach_units(
+            strip_units(data_array).interp_like(
+                strip_units(convert_units(other, units))
+            ),
+            units,
+        )
+        actual = data_array.interp_like(other)
+
+        assert_equal_with_units(expected, actual)
+
+    @pytest.mark.xfail(reason="indexes don't support units")
     @pytest.mark.parametrize(
         "unit,error",
         (
-            pytest.param(1, None, id="no_unit"),
-            pytest.param(unit_registry.dimensionless, None, id="dimensionless"),
-            pytest.param(unit_registry.s, None, id="incompatible_unit"),
+            pytest.param(1, DimensionalityError, id="no_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
             pytest.param(unit_registry.cm, None, id="compatible_unit"),
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
-    def test_reindex(self, unit, error):
-        array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
+    def test_reindex(self, unit, error, dtype):
+        array = (
+            np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
+        )
         new_coords = (np.arange(10) + 0.5) * unit
         coords = {
             "x": np.arange(10) * unit_registry.m,
@@ -2328,65 +2415,70 @@ def test_reindex(self, unit, error):
         }
 
         data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
+        func = method("reindex")
 
         if error is not None:
             with pytest.raises(error):
-                data_array.interp(x=new_coords)
-        else:
-            result_array = strip_units(data_array).reindex(
-                x=(
-                    new_coords.magnitude
-                    if hasattr(new_coords, "magnitude")
-                    else new_coords
-                )
-                * unit_registry.degK
-            )
-            result_data_array = data_array.reindex(x=new_coords)
+                func(data_array, x=new_coords)
 
-            assert_equal_with_units(result_array, result_data_array)
+            return
 
-    @pytest.mark.xfail(
-        reason="pint does not implement np.result_type in __array_function__ yet"
-    )
+        expected = attach_units(
+            func(
+                strip_units(data_array),
+                x=strip_units(convert_units(new_coords, {None: unit_registry.m})),
+            ),
+            {None: unit_registry.degK},
+        )
+        actual = func(data_array, x=new_coords)
+
+        assert_equal_with_units(expected, actual)
+
+    @pytest.mark.xfail(reason="indexes don't support units")
     @pytest.mark.parametrize(
         "unit,error",
         (
-            pytest.param(1, None, id="no_unit"),
-            pytest.param(unit_registry.dimensionless, None, id="dimensionless"),
-            pytest.param(unit_registry.s, None, id="incompatible_unit"),
+            pytest.param(1, DimensionalityError, id="no_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
             pytest.param(unit_registry.cm, None, id="compatible_unit"),
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
-    def test_reindex_like(self, unit, error):
-        array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
+    def test_reindex_like(self, unit, error, dtype):
+        array = (
+            np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
+        )
         coords = {
             "x": (np.arange(10) + 0.3) * unit_registry.m,
             "y": (np.arange(5) + 0.3) * unit_registry.m,
         }
 
         data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
-        new_data_array = xr.DataArray(
-            data=np.empty((20, 10)),
+        other = xr.DataArray(
+            data=np.empty((20, 10)) * unit_registry.degK,
             coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit},
             dims=("x", "y"),
         )
 
         if error is not None:
             with pytest.raises(error):
-                data_array.reindex_like(new_data_array)
-        else:
-            expected = attach_units(
-                strip_units(data_array).reindex_like(strip_units(new_data_array)),
-                {
-                    "data": unit_registry.degK,
-                    "x": unit_registry.m,
-                    "y": unit_registry.m,
-                },
-            )
-            result = data_array.reindex_like(new_data_array)
+                data_array.reindex_like(other)
 
-            assert_equal_with_units(expected, result)
+            return
+
+        units = extract_units(data_array)
+        expected = attach_units(
+            strip_units(data_array).reindex_like(
+                strip_units(convert_units(other, units))
+            ),
+            units,
+        )
+        actual = data_array.reindex_like(other)
+
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
@@ -2406,11 +2498,11 @@ def test_stacking_stacked(self, func, dtype):
         stacked = data_array.stack(z=("x", "y"))
 
         expected = attach_units(func(strip_units(stacked)), {"data": unit_registry.m})
-        result = func(stacked)
+        actual = func(stacked)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
-    @pytest.mark.xfail(reason="indexes strip the label units")
+    @pytest.mark.xfail(reason="indexes don't support units")
     def test_to_unstacked_dataset(self, dtype):
         array = (
             np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype)
@@ -2429,13 +2521,9 @@ def test_to_unstacked_dataset(self, dtype):
             func(strip_units(data_array)),
             {"y": y.units, **dict(zip(x.magnitude, [array.units] * len(y)))},
         ).rename({elem.magnitude: elem for elem in x})
-        result = func(data_array)
-
-        print(data_array, expected, result, sep="\n")
+        actual = func(data_array)
 
-        assert_equal_with_units(expected, result)
-
-        assert False
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
@@ -2446,10 +2534,7 @@ def test_to_unstacked_dataset(self, dtype):
             pytest.param(
                 method("shift", x=2), marks=pytest.mark.xfail(reason="strips units")
             ),
-            pytest.param(
-                method("roll", x=2, roll_coords=False),
-                marks=pytest.mark.xfail(reason="strips units"),
-            ),
+            method("roll", x=2, roll_coords=False),
             method("sortby", "x2"),
         ),
         ids=repr,
@@ -2471,12 +2556,10 @@ def test_stacking_reordering(self, func, dtype):
             dims=("x", "y", "z"),
         )
 
-        expected = attach_units(
-            func(strip_units(data_array)), {"data": unit_registry.m}
-        )
-        result = func(data_array)
+        expected = attach_units(func(strip_units(data_array)), {None: unit_registry.m})
+        actual = func(data_array)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
@@ -2484,20 +2567,14 @@ def test_stacking_reordering(self, func, dtype):
             method("diff", dim="x"),
             method("differentiate", coord="x"),
             method("integrate", dim="x"),
-            pytest.param(
-                method("quantile", q=[0.25, 0.75]),
-                marks=pytest.mark.xfail(
-                    reason="pint does not implement nanpercentile yet"
-                ),
-            ),
-            pytest.param(
-                method("reduce", func=np.sum, dim="x"),
-                marks=pytest.mark.xfail(reason="strips units"),
-            ),
+            method("quantile", q=[0.25, 0.75]),
+            method("reduce", func=np.sum, dim="x"),
             pytest.param(
                 lambda x: x.dot(x),
                 id="method_dot",
-                marks=pytest.mark.xfail(reason="pint does not implement einsum"),
+                marks=pytest.mark.xfail(
+                    reason="pint does not implement the dot method"
+                ),
             ),
         ),
         ids=repr,
@@ -2511,30 +2588,35 @@ def test_computation(self, func, dtype):
         y = np.arange(array.shape[1]) * unit_registry.s
 
         data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
-        units = extract_units(data_array)
+
+        # we want to make sure the output unit is correct
+        units = {
+            **extract_units(data_array),
+            **(
+                {}
+                if isinstance(func, (function, method))
+                else extract_units(func(array.reshape(-1)))
+            ),
+        }
 
         expected = attach_units(func(strip_units(data_array)), units)
-        result = func(data_array)
+        actual = func(data_array)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
         (
-            pytest.param(
-                method("groupby", "y"), marks=pytest.mark.xfail(reason="strips units")
-            ),
-            pytest.param(
-                method("groupby_bins", "y", bins=4),
-                marks=pytest.mark.xfail(reason="strips units"),
-            ),
+            method("groupby", "x"),
+            method("groupby_bins", "y", bins=4),
             method("coarsen", y=2),
             pytest.param(
-                method("rolling", y=3), marks=pytest.mark.xfail(reason="strips units")
+                method("rolling", y=3),
+                marks=pytest.mark.xfail(reason="rolling strips units"),
             ),
             pytest.param(
                 method("rolling_exp", y=3),
-                marks=pytest.mark.xfail(reason="strips units"),
+                marks=pytest.mark.xfail(reason="units not supported by numbagg"),
             ),
         ),
         ids=repr,
@@ -2544,18 +2626,17 @@ def test_computation_objects(self, func, dtype):
             np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
         )
 
-        x = np.arange(array.shape[0]) * unit_registry.m
+        x = np.array([0, 0, 1, 2, 2]) * unit_registry.m
         y = np.arange(array.shape[1]) * 3 * unit_registry.s
 
         data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
         units = extract_units(data_array)
 
         expected = attach_units(func(strip_units(data_array)).mean(), units)
-        result = func(data_array).mean()
+        actual = func(data_array).mean()
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
-    @pytest.mark.xfail(reason="strips units")
     def test_resample(self, dtype):
         array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
 
@@ -2566,23 +2647,17 @@ def test_resample(self, dtype):
         func = method("resample", time="6m")
 
         expected = attach_units(func(strip_units(data_array)).mean(), units)
-        result = func(data_array).mean()
+        actual = func(data_array).mean()
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
         (
-            pytest.param(
-                method("assign_coords", {"z": (["x"], np.arange(5) * unit_registry.s)}),
-                marks=pytest.mark.xfail(reason="strips units"),
-            ),
-            pytest.param(method("first")),
-            pytest.param(method("last")),
-            pytest.param(
-                method("quantile", q=[0.25, 0.5, 0.75], dim="x"),
-                marks=pytest.mark.xfail(reason="strips units"),
-            ),
+            method("assign_coords", z=(["x"], np.arange(5) * unit_registry.s)),
+            method("first"),
+            method("last"),
+            method("quantile", q=np.array([0.25, 0.5, 0.75]), dim="x"),
         ),
         ids=repr,
     )
@@ -2595,12 +2670,22 @@ def test_grouped_operations(self, func, dtype):
         y = np.arange(array.shape[1]) * 3 * unit_registry.s
 
         data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
-        units = extract_units(data_array)
+        units = {**extract_units(data_array), **{"z": unit_registry.s, "q": None}}
 
-        expected = attach_units(func(strip_units(data_array).groupby("y")), units)
-        result = func(data_array.groupby("y"))
+        stripped_kwargs = {
+            key: (
+                strip_units(value)
+                if not isinstance(value, tuple)
+                else tuple(strip_units(elem) for elem in value)
+            )
+            for key, value in func.kwargs.items()
+        }
+        expected = attach_units(
+            func(strip_units(data_array).groupby("y"), **stripped_kwargs), units
+        )
+        actual = func(data_array.groupby("y"))
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
 
 class TestDataset:
@@ -2620,10 +2705,7 @@ class TestDataset:
         "shared",
         (
             "nothing",
-            pytest.param(
-                "dims",
-                marks=pytest.mark.xfail(reason="reindex does not work with pint yet"),
-            ),
+            pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
             pytest.param(
                 "coords",
                 marks=pytest.mark.xfail(reason="reindex does not work with pint yet"),
@@ -2674,7 +2756,7 @@ def test_init(self, shared, unit, error, dtype):
 
             return
 
-        result = xr.Dataset(data_vars={"a": arr1, "b": arr2})
+        actual = xr.Dataset(data_vars={"a": arr1, "b": arr2})
 
         expected_units = {
             "a": a.units,
@@ -2688,7 +2770,7 @@ def test_init(self, shared, unit, error, dtype):
             xr.Dataset(data_vars={"a": strip_units(arr1), "b": strip_units(arr2)}),
             expected_units,
         )
-        assert_equal_with_units(result, expected)
+        assert_equal_with_units(actual, expected)
 
     @pytest.mark.parametrize(
         "func", (pytest.param(str, id="str"), pytest.param(repr, id="repr"))
@@ -2749,12 +2831,7 @@ def test_repr(self, func, variant, dtype):
                     reason="np.median does not work with dataset yet"
                 ),
             ),
-            pytest.param(
-                function("sum"),
-                marks=pytest.mark.xfail(
-                    reason="np.result_type not implemented by pint"
-                ),
-            ),
+            function("sum"),
             pytest.param(
                 function("prod"),
                 marks=pytest.mark.xfail(reason="not implemented by pint"),
@@ -2764,9 +2841,7 @@ def test_repr(self, func, variant, dtype):
             function("cumsum"),
             pytest.param(
                 function("cumprod"),
-                marks=pytest.mark.xfail(
-                    reason="pint does not support cumprod on non-dimensionless yet"
-                ),
+                marks=pytest.mark.xfail(reason="fails within xarray"),
             ),
             pytest.param(
                 method("all"), marks=pytest.mark.xfail(reason="not implemented by pint")
@@ -2780,12 +2855,7 @@ def test_repr(self, func, variant, dtype):
             method("min"),
             method("mean"),
             method("median"),
-            pytest.param(
-                method("sum"),
-                marks=pytest.mark.xfail(
-                    reason="np.result_type not implemented by pint"
-                ),
-            ),
+            method("sum"),
             pytest.param(
                 method("prod"),
                 marks=pytest.mark.xfail(reason="not implemented by pint"),
@@ -2794,17 +2864,20 @@ def test_repr(self, func, variant, dtype):
             method("var"),
             method("cumsum"),
             pytest.param(
-                method("cumprod"),
-                marks=pytest.mark.xfail(
-                    reason="pint does not support cumprod on non-dimensionless yet"
-                ),
+                method("cumprod"), marks=pytest.mark.xfail(reason="fails within xarray")
             ),
         ),
         ids=repr,
     )
     def test_aggregation(self, func, dtype):
-        unit_a = unit_registry.Pa
-        unit_b = unit_registry.kg / unit_registry.m ** 3
+        unit_a = (
+            unit_registry.Pa if func.name != "cumprod" else unit_registry.dimensionless
+        )
+        unit_b = (
+            unit_registry.kg / unit_registry.m ** 3
+            if func.name != "cumprod"
+            else unit_registry.dimensionless
+        )
         a = xr.DataArray(data=np.linspace(0, 1, 10).astype(dtype) * unit_a, dims="x")
         b = xr.DataArray(data=np.linspace(-1, 0, 10).astype(dtype) * unit_b, dims="x")
         x = xr.DataArray(data=np.arange(10).astype(dtype) * unit_registry.m, dims="x")
@@ -2814,13 +2887,16 @@ def test_aggregation(self, func, dtype):
 
         ds = xr.Dataset(data_vars={"a": a, "b": b}, coords={"x": x, "y": y})
 
-        result = func(ds)
+        actual = func(ds)
         expected = attach_units(
             func(strip_units(ds)),
-            {"a": array_extract_units(func(a)), "b": array_extract_units(func(b))},
+            {
+                "a": extract_units(func(a)).get(None),
+                "b": extract_units(func(b)).get(None),
+            },
         )
 
-        assert_equal_with_units(result, expected)
+        assert_equal_with_units(actual, expected)
 
     @pytest.mark.parametrize("property", ("imag", "real"))
     def test_numpy_properties(self, property, dtype):
@@ -2840,10 +2916,10 @@ def test_numpy_properties(self, property, dtype):
         )
         units = extract_units(ds)
 
-        result = getattr(ds, property)
+        actual = getattr(ds, property)
         expected = attach_units(getattr(strip_units(ds), property), units)
 
-        assert_equal_with_units(result, expected)
+        assert_equal_with_units(actual, expected)
 
     @pytest.mark.parametrize(
         "func",
@@ -2853,10 +2929,6 @@ def test_numpy_properties(self, property, dtype):
             method("argsort"),
             method("conjugate"),
             method("round"),
-            pytest.param(
-                method("rank", dim="x"),
-                marks=pytest.mark.xfail(reason="pint does not implement rank yet"),
-            ),
         ),
         ids=repr,
     )
@@ -2882,10 +2954,10 @@ def test_numpy_methods(self, func, dtype):
             "y": unit_registry.s,
         }
 
-        result = func(ds)
+        actual = func(ds)
         expected = attach_units(func(strip_units(ds)), units)
 
-        assert_equal_with_units(result, expected)
+        assert_equal_with_units(actual, expected)
 
     @pytest.mark.parametrize("func", (method("clip", min=3, max=8),), ids=repr)
     @pytest.mark.parametrize(
@@ -2914,37 +2986,26 @@ def test_numpy_methods_with_args(self, func, unit, error, dtype):
         )
         units = extract_units(ds)
 
-        def strip(value):
-            return (
-                value.magnitude if isinstance(value, unit_registry.Quantity) else value
-            )
-
-        def convert(value, to):
-            if isinstance(value, unit_registry.Quantity) and value.check(to):
-                return value.to(to)
-
-            return value
-
-        scalar_types = (int, float)
         kwargs = {
-            key: (value * unit if isinstance(value, scalar_types) else value)
+            key: (value * unit if isinstance(value, (int, float)) else value)
             for key, value in func.kwargs.items()
         }
 
-        stripped_kwargs = {
-            key: strip(convert(value, data_unit)) for key, value in kwargs.items()
-        }
-
         if error is not None:
             with pytest.raises(error):
                 func(ds, **kwargs)
 
             return
 
-        result = func(ds, **kwargs)
+        stripped_kwargs = {
+            key: strip_units(convert_units(value, {None: data_unit}))
+            for key, value in kwargs.items()
+        }
+
+        actual = func(ds, **kwargs)
         expected = attach_units(func(strip_units(ds), **stripped_kwargs), units)
 
-        assert_equal_with_units(result, expected)
+        assert_equal_with_units(actual, expected)
 
     @pytest.mark.parametrize(
         "func", (method("isnull"), method("notnull"), method("count")), ids=repr
@@ -2987,9 +3048,9 @@ def test_missing_value_detection(self, func, dtype):
         )
 
         expected = func(strip_units(ds))
-        result = func(ds)
+        actual = func(ds)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.xfail(reason="ffill and bfill lose the unit")
     @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr)
@@ -3017,40 +3078,35 @@ def test_missing_value_filling(self, func, dtype):
             func(strip_units(ds), dim="x"),
             {"a": unit_registry.degK, "b": unit_registry.Pa},
         )
-        result = func(ds, dim="x")
+        actual = func(ds, dim="x")
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
-    @pytest.mark.xfail(reason="fillna drops the unit")
     @pytest.mark.parametrize(
         "unit,error",
         (
-            pytest.param(
-                1,
-                DimensionalityError,
-                id="no_unit",
-                marks=pytest.mark.xfail(reason="blocked by the failing `where`"),
-            ),
+            pytest.param(1, DimensionalityError, id="no_unit"),
             pytest.param(
                 unit_registry.dimensionless, DimensionalityError, id="dimensionless"
             ),
             pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
-            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(
+                unit_registry.cm,
+                None,
+                id="compatible_unit",
+                marks=pytest.mark.xfail(
+                    reason="where converts the array, not the fill value"
+                ),
+            ),
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
     @pytest.mark.parametrize(
         "fill_value",
         (
-            pytest.param(
-                -1,
-                id="python scalar",
-                marks=pytest.mark.xfail(
-                    reason="python scalar cannot be converted using astype()"
-                ),
-            ),
-            pytest.param(np.array(-1), id="numpy scalar"),
-            pytest.param(np.array([-1]), id="numpy array"),
+            pytest.param(-1, id="python_scalar"),
+            pytest.param(np.array(-1), id="numpy_scalar"),
+            pytest.param(np.array([-1]), id="numpy_array"),
         ),
     )
     def test_fillna(self, fill_value, unit, error, dtype):
@@ -3075,13 +3131,17 @@ def test_fillna(self, fill_value, unit, error, dtype):
 
             return
 
-        result = ds.fillna(value=fill_value * unit)
+        actual = ds.fillna(value=fill_value * unit)
         expected = attach_units(
-            strip_units(ds).fillna(value=fill_value),
+            strip_units(ds).fillna(
+                value=strip_units(
+                    convert_units(fill_value * unit, {None: unit_registry.m})
+                )
+            ),
             {"a": unit_registry.m, "b": unit_registry.m},
         )
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     def test_dropna(self, dtype):
         array1 = (
@@ -3105,11 +3165,10 @@ def test_dropna(self, dtype):
             strip_units(ds).dropna(dim="x"),
             {"a": unit_registry.degK, "b": unit_registry.Pa},
         )
-        result = ds.dropna(dim="x")
+        actual = ds.dropna(dim="x")
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
-    @pytest.mark.xfail(reason="pint does not implement `numpy.isin`")
     @pytest.mark.parametrize(
         "unit",
         (
@@ -3154,36 +3213,12 @@ def test_isin(self, unit, dtype):
         ):
             expected.a[:] = False
             expected.b[:] = False
-        result = ds.isin(values)
+        actual = ds.isin(values)
 
-        assert_equal_with_units(result, expected)
+        assert_equal_with_units(actual, expected)
 
     @pytest.mark.parametrize(
-        "variant",
-        (
-            pytest.param(
-                "masking",
-                marks=pytest.mark.xfail(
-                    reason="np.result_type not implemented by quantity"
-                ),
-            ),
-            pytest.param(
-                "replacing_scalar",
-                marks=pytest.mark.xfail(
-                    reason="python scalar not convertible using astype"
-                ),
-            ),
-            pytest.param(
-                "replacing_array",
-                marks=pytest.mark.xfail(
-                    reason="replacing using an array drops the units"
-                ),
-            ),
-            pytest.param(
-                "dropping",
-                marks=pytest.mark.xfail(reason="nan not compatible with quantity"),
-            ),
-        ),
+        "variant", ("masking", "replacing_scalar", "replacing_array", "dropping")
     )
     @pytest.mark.parametrize(
         "unit,error",
@@ -3198,9 +3233,6 @@ def test_isin(self, unit, dtype):
         ),
     )
     def test_where(self, variant, unit, error, dtype):
-        def _strip_units(mapping):
-            return {key: array_strip_units(value) for key, value in mapping.items()}
-
         original_unit = unit_registry.m
         array1 = np.linspace(0, 1, 10).astype(dtype) * original_unit
         array2 = np.linspace(-1, 0, 10).astype(dtype) * original_unit
@@ -3222,21 +3254,24 @@ def _strip_units(mapping):
             "dropping": {"cond": condition, "drop": True},
         }
         kwargs = variant_kwargs.get(variant)
-        kwargs_without_units = _strip_units(kwargs)
-
         if variant not in ("masking", "dropping") and error is not None:
             with pytest.raises(error):
                 ds.where(**kwargs)
 
             return
 
+        kwargs_without_units = {
+            key: strip_units(convert_units(value, {None: original_unit}))
+            for key, value in kwargs.items()
+        }
+
         expected = attach_units(
             strip_units(ds).where(**kwargs_without_units),
             {"a": original_unit, "b": original_unit},
         )
-        result = ds.where(**kwargs)
+        actual = ds.where(**kwargs)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.xfail(reason="interpolate strips units")
     def test_interpolate_na(self, dtype):
@@ -3261,11 +3296,11 @@ def test_interpolate_na(self, dtype):
             strip_units(ds).interpolate_na(dim="x"),
             {"a": unit_registry.degK, "b": unit_registry.Pa},
         )
-        result = ds.interpolate_na(dim="x")
+        actual = ds.interpolate_na(dim="x")
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
-    @pytest.mark.xfail(reason="uses Dataset.where, which currently fails")
+    @pytest.mark.xfail(reason="wrong argument order for `where`")
     @pytest.mark.parametrize(
         "unit,error",
         (
@@ -3281,11 +3316,11 @@ def test_interpolate_na(self, dtype):
     def test_combine_first(self, unit, error, dtype):
         array1 = (
             np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype)
-            * unit_registry.degK
+            * unit_registry.m
         )
         array2 = (
             np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype)
-            * unit_registry.Pa
+            * unit_registry.m
         )
         x = np.arange(len(array1))
         ds = xr.Dataset(
@@ -3312,12 +3347,16 @@ def test_combine_first(self, unit, error, dtype):
             return
 
         expected = attach_units(
-            strip_units(ds).combine_first(strip_units(other)),
+            strip_units(ds).combine_first(
+                strip_units(
+                    convert_units(other, {"a": unit_registry.m, "b": unit_registry.m})
+                )
+            ),
             {"a": unit_registry.m, "b": unit_registry.m},
         )
-        result = ds.combine_first(other)
+        actual = ds.combine_first(other)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "unit",
@@ -3325,11 +3364,7 @@ def test_combine_first(self, unit, error, dtype):
             pytest.param(1, id="no_unit"),
             pytest.param(unit_registry.dimensionless, id="dimensionless"),
             pytest.param(unit_registry.s, id="incompatible_unit"),
-            pytest.param(
-                unit_registry.cm,
-                id="compatible_unit",
-                marks=pytest.mark.xfail(reason="identical does not check units yet"),
-            ),
+            pytest.param(unit_registry.cm, id="compatible_unit"),
             pytest.param(unit_registry.m, id="identical_unit"),
         ),
     )
@@ -3345,6 +3380,13 @@ def test_combine_first(self, unit, error, dtype):
     )
     @pytest.mark.parametrize("func", (method("equals"), method("identical")), ids=repr)
     def test_comparisons(self, func, variation, unit, dtype):
+        def is_compatible(a, b):
+            a = a if a is not None else 1
+            b = b if b is not None else 1
+            quantity = np.arange(5) * a
+
+            return a == b or quantity.check(b)
+
         array1 = np.linspace(0, 5, 10).astype(dtype)
         array2 = np.linspace(-5, 0, 10).astype(dtype)
 
@@ -3356,11 +3398,7 @@ def test_comparisons(self, func, variation, unit, dtype):
         x = coord * original_unit
         y = coord * original_unit
 
-        units = {
-            "data": (unit, original_unit, original_unit),
-            "dims": (original_unit, unit, original_unit),
-            "coords": (original_unit, original_unit, unit),
-        }
+        units = {"data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit)}
         data_unit, dim_unit, coord_unit = units.get(variation)
 
         ds = xr.Dataset(
@@ -3371,36 +3409,27 @@ def test_comparisons(self, func, variation, unit, dtype):
             coords={"x": x, "y": ("x", y)},
         )
 
-        other = attach_units(
-            strip_units(ds),
-            {
-                "a": (data_unit, original_unit if quantity1.check(data_unit) else None),
-                "b": (data_unit, original_unit if quantity2.check(data_unit) else None),
-                "x": (dim_unit, original_unit if x.check(dim_unit) else None),
-                "y": (coord_unit, original_unit if y.check(coord_unit) else None),
-            },
-        )
+        other_units = {
+            "a": data_unit if quantity1.check(data_unit) else None,
+            "b": data_unit if quantity2.check(data_unit) else None,
+            "x": dim_unit if x.check(dim_unit) else None,
+            "y": coord_unit if y.check(coord_unit) else None,
+        }
+        other = attach_units(strip_units(convert_units(ds, other_units)), other_units)
 
-        # TODO: test dim coord once indexes leave units intact
-        # also, express this in terms of calls on the raw data array
-        # and then check the units
-        equal_arrays = (
-            np.all(ds.a.data == other.a.data)
-            and np.all(ds.b.data == other.b.data)
-            and (np.all(x == other.x.data) or True)  # dims can't be checked yet
-            and np.all(y == other.y.data)
-        )
-        equal_units = (
-            data_unit == original_unit
-            and coord_unit == original_unit
-            and dim_unit == original_unit
-        )
-        expected = equal_arrays and (func.name != "identical" or equal_units)
-        result = func(ds, other)
+        units = extract_units(ds)
+        other_units = extract_units(other)
+
+        equal_ds = all(
+            is_compatible(units[name], other_units[name]) for name in units.keys()
+        ) and (strip_units(ds).equals(strip_units(convert_units(other, units))))
+        equal_units = units == other_units
+        expected = equal_ds and (func.name != "identical" or equal_units)
+
+        actual = func(ds, other)
 
-        assert expected == result
+        assert expected == actual
 
-    @pytest.mark.xfail(reason="blocked by `where`")
     @pytest.mark.parametrize(
         "unit",
         (
@@ -3430,9 +3459,9 @@ def test_broadcast_like(self, unit, dtype):
         expected = attach_units(
             strip_units(ds1).broadcast_like(strip_units(ds2)), extract_units(ds1)
         )
-        result = ds1.broadcast_like(ds2)
+        actual = ds1.broadcast_like(ds2)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "unit",
@@ -3446,38 +3475,34 @@ def test_broadcast_like(self, unit, dtype):
     )
     def test_broadcast_equals(self, unit, dtype):
         left_array1 = np.ones(shape=(2, 3), dtype=dtype) * unit_registry.m
-        left_array2 = np.zeros(shape=(2, 6), dtype=dtype) * unit_registry.m
+        left_array2 = np.zeros(shape=(3, 6), dtype=dtype) * unit_registry.m
 
-        right_array1 = array_attach_units(
-            np.ones(shape=(2,), dtype=dtype),
-            unit,
-            convert_from=unit_registry.m if left_array1.check(unit) else None,
-        )
-        right_array2 = array_attach_units(
-            np.ones(shape=(2,), dtype=dtype),
-            unit,
-            convert_from=unit_registry.m if left_array2.check(unit) else None,
-        )
+        right_array1 = np.ones(shape=(2,)) * unit
+        right_array2 = np.ones(shape=(3,)) * unit
 
         left = xr.Dataset(
             data_vars={
                 "a": xr.DataArray(data=left_array1, dims=("x", "y")),
-                "b": xr.DataArray(data=left_array2, dims=("x", "z")),
+                "b": xr.DataArray(data=left_array2, dims=("y", "z")),
             }
         )
         right = xr.Dataset(
             data_vars={
                 "a": xr.DataArray(data=right_array1, dims="x"),
-                "b": xr.DataArray(data=right_array2, dims="x"),
+                "b": xr.DataArray(data=right_array2, dims="y"),
             }
         )
 
-        expected = np.all(left_array1 == right_array1[:, None]) and np.all(
-            left_array2 == right_array2[:, None]
-        )
-        result = left.broadcast_equals(right)
+        units = {
+            **extract_units(left),
+            **({} if left_array1.check(unit) else {"a": None, "b": None}),
+        }
+        expected = strip_units(left).broadcast_equals(
+            strip_units(convert_units(right, units))
+        ) & left_array1.check(unit)
+        actual = left.broadcast_equals(right)
 
-        assert expected == result
+        assert expected == actual
 
     @pytest.mark.parametrize(
         "func",
@@ -3510,11 +3535,10 @@ def test_stacking_stacked(self, func, dtype):
         expected = attach_units(
             func(strip_units(stacked)), {"a": unit_registry.m, "b": unit_registry.m}
         )
-        result = func(stacked)
+        actual = func(stacked)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
-    @pytest.mark.xfail(reason="tries to subscript scalar quantities")
     def test_to_stacked_array(self, dtype):
         labels = np.arange(5).astype(dtype) * unit_registry.s
         arrays = {name: np.linspace(0, 1, 10) * unit_registry.m for name in labels}
@@ -3528,13 +3552,13 @@ def test_to_stacked_array(self, dtype):
 
         func = method("to_stacked_array", "z", variable_dim="y", sample_dims=["x"])
 
-        result = func(ds).rename(None)
+        actual = func(ds).rename(None)
         expected = attach_units(
             func(strip_units(ds)).rename(None),
             {None: unit_registry.m, "y": unit_registry.s},
         )
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
@@ -3543,12 +3567,10 @@ def test_to_stacked_array(self, dtype):
             method("stack", a=("x", "y")),
             method("set_index", x="x2"),
             pytest.param(
-                method("shift", x=2), marks=pytest.mark.xfail(reason="sets all to nan")
-            ),
-            pytest.param(
-                method("roll", x=2, roll_coords=False),
-                marks=pytest.mark.xfail(reason="strips units"),
+                method("shift", x=2),
+                marks=pytest.mark.xfail(reason="tries to concatenate nan arrays"),
             ),
+            method("roll", x=2, roll_coords=False),
             method("sortby", "x2"),
         ),
         ids=repr,
@@ -3581,9 +3603,9 @@ def test_stacking_reordering(self, func, dtype):
         expected = attach_units(
             func(strip_units(ds)), {"a": unit_registry.Pa, "b": unit_registry.degK}
         )
-        result = func(ds)
+        actual = func(ds)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.xfail(reason="indexes strip units")
     @pytest.mark.parametrize(
@@ -3610,35 +3632,33 @@ def test_isel(self, indices, dtype):
             strip_units(ds).isel(x=indices),
             {"a": unit_registry.s, "b": unit_registry.Pa, "x": unit_registry.m},
         )
-        result = ds.isel(x=indices)
+        actual = ds.isel(x=indices)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
-    @pytest.mark.xfail(
-        reason="xarray does not support duck arrays in dimension coordinates"
-    )
+    @pytest.mark.xfail(reason="indexes don't support units")
     @pytest.mark.parametrize(
-        "values",
+        "raw_values",
         (
-            pytest.param(12, id="single_value"),
+            pytest.param(10, id="single_value"),
             pytest.param([10, 5, 13], id="list_of_values"),
             pytest.param(np.array([9, 3, 7, 12]), id="array_of_values"),
         ),
     )
     @pytest.mark.parametrize(
-        "units,error",
+        "unit,error",
         (
             pytest.param(1, KeyError, id="no_units"),
             pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"),
             pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"),
-            pytest.param(unit_registry.ms, KeyError, id="compatible_unit"),
-            pytest.param(unit_registry.s, None, id="same_unit"),
+            pytest.param(unit_registry.dm, KeyError, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
-    def test_sel(self, values, units, error, dtype):
+    def test_sel(self, raw_values, unit, error, dtype):
         array1 = np.linspace(5, 10, 20).astype(dtype) * unit_registry.degK
         array2 = np.linspace(0, 5, 20).astype(dtype) * unit_registry.Pa
-        x = np.arange(len(array1)) * unit_registry.s
+        x = np.arange(len(array1)) * unit_registry.m
 
         ds = xr.Dataset(
             data_vars={
@@ -3648,46 +3668,46 @@ def test_sel(self, values, units, error, dtype):
             coords={"x": x},
         )
 
-        values_with_units = values * units
+        values = raw_values * unit
 
-        if error is not None:
+        if error is not None and not (
+            isinstance(raw_values, (int, float)) and x.check(unit)
+        ):
             with pytest.raises(error):
-                ds.sel(x=values_with_units)
+                ds.sel(x=values)
 
             return
 
         expected = attach_units(
-            strip_units(ds).sel(x=values),
-            {"a": unit_registry.degK, "b": unit_registry.Pa, "x": unit_registry.s},
+            strip_units(ds).sel(x=strip_units(convert_units(values, {None: x.units}))),
+            {"a": array1.units, "b": array2.units, "x": x.units},
         )
-        result = ds.sel(x=values_with_units)
-        assert_equal_with_units(expected, result)
+        actual = ds.sel(x=values)
+        assert_equal_with_units(expected, actual)
 
-    @pytest.mark.xfail(
-        reason="xarray does not support duck arrays in dimension coordinates"
-    )
+    @pytest.mark.xfail(reason="indexes don't support units")
     @pytest.mark.parametrize(
-        "values",
+        "raw_values",
         (
-            pytest.param(12, id="single value"),
-            pytest.param([10, 5, 13], id="list of multiple values"),
-            pytest.param(np.array([9, 3, 7, 12]), id="array of multiple values"),
+            pytest.param(10, id="single_value"),
+            pytest.param([10, 5, 13], id="list_of_values"),
+            pytest.param(np.array([9, 3, 7, 12]), id="array_of_values"),
         ),
     )
     @pytest.mark.parametrize(
-        "units,error",
+        "unit,error",
         (
             pytest.param(1, KeyError, id="no_units"),
             pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"),
             pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"),
-            pytest.param(unit_registry.ms, KeyError, id="compatible_unit"),
-            pytest.param(unit_registry.s, None, id="same_unit"),
+            pytest.param(unit_registry.dm, KeyError, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
-    def test_loc(self, values, units, error, dtype):
+    def test_drop_sel(self, raw_values, unit, error, dtype):
         array1 = np.linspace(5, 10, 20).astype(dtype) * unit_registry.degK
         array2 = np.linspace(0, 5, 20).astype(dtype) * unit_registry.Pa
-        x = np.arange(len(array1)) * unit_registry.s
+        x = np.arange(len(array1)) * unit_registry.m
 
         ds = xr.Dataset(
             data_vars={
@@ -3697,36 +3717,76 @@ def test_loc(self, values, units, error, dtype):
             coords={"x": x},
         )
 
-        values_with_units = values * units
+        values = raw_values * unit
 
-        if error is not None:
+        if error is not None and not (
+            isinstance(raw_values, (int, float)) and x.check(unit)
+        ):
             with pytest.raises(error):
-                ds.loc[{"x": values_with_units}]
+                ds.drop_sel(x=values)
 
             return
 
         expected = attach_units(
-            strip_units(ds).loc[{"x": values}],
-            {"a": unit_registry.degK, "b": unit_registry.Pa, "x": unit_registry.s},
+            strip_units(ds).drop_sel(
+                x=strip_units(convert_units(values, {None: x.units}))
+            ),
+            extract_units(ds),
         )
-        result = ds.loc[{"x": values_with_units}]
-        assert_equal_with_units(expected, result)
+        actual = ds.drop_sel(x=values)
+        assert_equal_with_units(expected, actual)
 
-    @pytest.mark.xfail(
-        reason="indexes strip units and head / tail / thin only support integers"
+    @pytest.mark.xfail(reason="indexes don't support units")
+    @pytest.mark.parametrize(
+        "raw_values",
+        (
+            pytest.param(10, id="single_value"),
+            pytest.param([10, 5, 13], id="list_of_values"),
+            pytest.param(np.array([9, 3, 7, 12]), id="array_of_values"),
+        ),
     )
     @pytest.mark.parametrize(
         "unit,error",
         (
-            pytest.param(1, DimensionalityError, id="no_unit"),
-            pytest.param(
-                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
-            ),
-            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
-            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(1, KeyError, id="no_units"),
+            pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"),
+            pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"),
+            pytest.param(unit_registry.dm, KeyError, id="compatible_unit"),
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
+    def test_loc(self, raw_values, unit, error, dtype):
+        array1 = np.linspace(5, 10, 20).astype(dtype) * unit_registry.degK
+        array2 = np.linspace(0, 5, 20).astype(dtype) * unit_registry.Pa
+        x = np.arange(len(array1)) * unit_registry.m
+
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims="x"),
+                "b": xr.DataArray(data=array2, dims="x"),
+            },
+            coords={"x": x},
+        )
+
+        values = raw_values * unit
+
+        if error is not None and not (
+            isinstance(raw_values, (int, float)) and x.check(unit)
+        ):
+            with pytest.raises(error):
+                ds.loc[{"x": values}]
+
+            return
+
+        expected = attach_units(
+            strip_units(ds).loc[
+                {"x": strip_units(convert_units(values, {None: x.units}))}
+            ],
+            {"a": array1.units, "b": array2.units, "x": x.units},
+        )
+        actual = ds.loc[{"x": values}]
+        assert_equal_with_units(expected, actual)
+
     @pytest.mark.parametrize(
         "func",
         (
@@ -3736,7 +3796,7 @@ def test_loc(self, values, units, error, dtype):
         ),
         ids=repr,
     )
-    def test_head_tail_thin(self, func, unit, error, dtype):
+    def test_head_tail_thin(self, func, dtype):
         array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
         array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_registry.Pa
 
@@ -3754,18 +3814,10 @@ def test_head_tail_thin(self, func, unit, error, dtype):
             coords=coords,
         )
 
-        kwargs = {name: value * unit for name, value in func.kwargs.items()}
-
-        if error is not None:
-            with pytest.raises(error):
-                func(ds, **kwargs)
-
-            return
-
         expected = attach_units(func(strip_units(ds)), extract_units(ds))
-        result = func(ds, **kwargs)
+        actual = func(ds)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "shape",
@@ -3802,15 +3854,15 @@ def test_squeeze(self, shape, dtype):
 
         expected = attach_units(strip_units(ds).squeeze(), units)
 
-        result = ds.squeeze()
-        assert_equal_with_units(result, expected)
+        actual = ds.squeeze()
+        assert_equal_with_units(actual, expected)
 
         # try squeezing the dimensions separately
         names = tuple(dim for dim, coord in coords.items() if len(coord) == 1)
         for name in names:
             expected = attach_units(strip_units(ds).squeeze(dim=name), units)
-            result = ds.squeeze(dim=name)
-            assert_equal_with_units(result, expected)
+            actual = ds.squeeze(dim=name)
+            assert_equal_with_units(actual, expected)
 
     @pytest.mark.xfail(reason="ignores units")
     @pytest.mark.parametrize(
@@ -3851,12 +3903,14 @@ def test_interp(self, unit, error):
 
             return
 
+        units = extract_units(ds)
         expected = attach_units(
-            strip_units(ds).interp(x=strip_units(new_coords)), extract_units(ds)
+            strip_units(ds).interp(x=strip_units(convert_units(new_coords, units))),
+            units,
         )
-        result = ds.interp(x=new_coords)
+        actual = ds.interp(x=new_coords)
 
-        assert_equal_with_units(result, expected)
+        assert_equal_with_units(actual, expected)
 
     @pytest.mark.xfail(reason="ignores units")
     @pytest.mark.parametrize(
@@ -3911,16 +3965,15 @@ def test_interp_like(self, unit, error, dtype):
 
             return
 
+        units = extract_units(ds)
         expected = attach_units(
-            strip_units(ds).interp_like(strip_units(other)), extract_units(ds)
+            strip_units(ds).interp_like(strip_units(convert_units(other, units))), units
         )
-        result = ds.interp_like(other)
+        actual = ds.interp_like(other)
 
-        assert_equal_with_units(result, expected)
+        assert_equal_with_units(actual, expected)
 
-    @pytest.mark.xfail(
-        reason="pint does not implement np.result_type in __array_function__ yet"
-    )
+    @pytest.mark.xfail(reason="indexes don't support units")
     @pytest.mark.parametrize(
         "unit,error",
         (
@@ -3933,9 +3986,13 @@ def test_interp_like(self, unit, error, dtype):
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
-    def test_reindex(self, unit, error):
-        array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
-        array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_registry.Pa
+    def test_reindex(self, unit, error, dtype):
+        array1 = (
+            np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
+        )
+        array2 = (
+            np.linspace(1, 2, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa
+        )
 
         coords = {
             "x": np.arange(10) * unit_registry.m,
@@ -3955,20 +4012,21 @@ def test_reindex(self, unit, error):
 
         if error is not None:
             with pytest.raises(error):
-                ds.interp(x=new_coords)
+                ds.reindex(x=new_coords)
 
             return
 
         expected = attach_units(
-            strip_units(ds).reindex(x=strip_units(new_coords)), extract_units(ds)
+            strip_units(ds).reindex(
+                x=strip_units(convert_units(new_coords, {None: coords["x"].units}))
+            ),
+            extract_units(ds),
         )
-        result = ds.reindex(x=new_coords)
+        actual = ds.reindex(x=new_coords)
 
-        assert_equal_with_units(result, expected)
+        assert_equal_with_units(actual, expected)
 
-    @pytest.mark.xfail(
-        reason="pint does not implement np.result_type in __array_function__ yet"
-    )
+    @pytest.mark.xfail(reason="indexes don't support units")
     @pytest.mark.parametrize(
         "unit,error",
         (
@@ -4021,12 +4079,14 @@ def test_reindex_like(self, unit, error, dtype):
 
             return
 
+        units = extract_units(ds)
         expected = attach_units(
-            strip_units(ds).reindex_like(strip_units(other)), extract_units(ds)
+            strip_units(ds).reindex_like(strip_units(convert_units(other, units))),
+            units,
         )
-        result = ds.reindex_like(other)
+        actual = ds.reindex_like(other)
 
-        assert_equal_with_units(result, expected)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
@@ -4034,20 +4094,9 @@ def test_reindex_like(self, unit, error, dtype):
             method("diff", dim="x"),
             method("differentiate", coord="x"),
             method("integrate", coord="x"),
-            pytest.param(
-                method("quantile", q=[0.25, 0.75]),
-                marks=pytest.mark.xfail(
-                    reason="pint does not implement nanpercentile yet"
-                ),
-            ),
-            pytest.param(
-                method("reduce", func=np.sum, dim="x"),
-                marks=pytest.mark.xfail(reason="strips units"),
-            ),
-            pytest.param(
-                method("map", np.fabs),
-                marks=pytest.mark.xfail(reason="fabs strips units"),
-            ),
+            method("quantile", q=[0.25, 0.75]),
+            method("reduce", func=np.sum, dim="x"),
+            method("map", np.fabs),
         ),
         ids=repr,
     )
@@ -4073,27 +4122,22 @@ def test_computation(self, func, dtype):
         units = extract_units(ds)
 
         expected = attach_units(func(strip_units(ds)), units)
-        result = func(ds)
+        actual = func(ds)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
         (
-            pytest.param(
-                method("groupby", "x"), marks=pytest.mark.xfail(reason="strips units")
-            ),
-            pytest.param(
-                method("groupby_bins", "x", bins=4),
-                marks=pytest.mark.xfail(reason="strips units"),
-            ),
+            method("groupby", "x"),
+            method("groupby_bins", "x", bins=4),
             method("coarsen", x=2),
             pytest.param(
                 method("rolling", x=3), marks=pytest.mark.xfail(reason="strips units")
             ),
             pytest.param(
                 method("rolling_exp", x=3),
-                marks=pytest.mark.xfail(reason="strips units"),
+                marks=pytest.mark.xfail(reason="uses numbagg which strips units"),
             ),
         ),
         ids=repr,
@@ -4122,11 +4166,10 @@ def test_computation_objects(self, func, dtype):
         args = [] if func.name != "groupby" else ["y"]
         reduce_func = method("mean", *args)
         expected = attach_units(reduce_func(func(strip_units(ds))), units)
-        result = reduce_func(func(ds))
+        actual = reduce_func(func(ds))
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
-    @pytest.mark.xfail(reason="strips units")
     def test_resample(self, dtype):
         array1 = (
             np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
@@ -4150,29 +4193,18 @@ def test_resample(self, dtype):
         func = method("resample", time="6m")
 
         expected = attach_units(func(strip_units(ds)).mean(), units)
-        result = func(ds).mean()
+        actual = func(ds).mean()
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
         (
-            pytest.param(
-                method("assign", c=lambda ds: 10 * ds.b),
-                marks=pytest.mark.xfail(reason="strips units"),
-            ),
-            pytest.param(
-                method("assign_coords", v=("x", np.arange(10) * unit_registry.s)),
-                marks=pytest.mark.xfail(reason="strips units"),
-            ),
-            pytest.param(method("first")),
-            pytest.param(method("last")),
-            pytest.param(
-                method("quantile", q=[0.25, 0.5, 0.75], dim="x"),
-                marks=pytest.mark.xfail(
-                    reason="dataset groupby does not implement quantile"
-                ),
-            ),
+            method("assign", c=lambda ds: 10 * ds.b),
+            method("assign_coords", v=("x", np.arange(10) * unit_registry.s)),
+            method("first"),
+            method("last"),
+            method("quantile", q=[0.25, 0.5, 0.75], dim="x"),
         ),
         ids=repr,
     )
@@ -4204,9 +4236,9 @@ def test_grouped_operations(self, func, dtype):
         expected = attach_units(
             func(strip_units(ds).groupby("y"), **stripped_kwargs), units
         )
-        result = func(ds.groupby("y"))
+        actual = func(ds.groupby("y"))
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
@@ -4220,7 +4252,7 @@ def test_grouped_operations(self, func, dtype):
             method("rename_dims", x="offset_x"),
             method("swap_dims", {"x": "x2"}),
             method("expand_dims", v=np.linspace(10, 20, 12) * unit_registry.s, axis=1),
-            method("drop_sel", labels="x"),
+            method("drop_vars", "x"),
             method("drop_dims", "z"),
             method("set_coords", names="c"),
             method("reset_coords", names="x2"),
@@ -4252,26 +4284,25 @@ def test_content_manipulation(self, func, dtype):
             },
             coords={"x": x, "y": y, "z": z, "x2": ("x", x2)},
         )
-        units = extract_units(ds)
-        units.update(
-            {
+        units = {
+            **extract_units(ds),
+            **{
                 "y2": unit_registry.mm,
                 "x_mm": unit_registry.mm,
                 "offset_x": unit_registry.m,
                 "d": unit_registry.Pa,
                 "temperature": unit_registry.degK,
-            }
-        )
+            },
+        }
 
         stripped_kwargs = {
             key: strip_units(value) for key, value in func.kwargs.items()
         }
         expected = attach_units(func(strip_units(ds), **stripped_kwargs), units)
-        result = func(ds)
+        actual = func(ds)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
 
-    @pytest.mark.xfail(reason="blocked by reindex")
     @pytest.mark.parametrize(
         "unit,error",
         (
@@ -4284,7 +4315,16 @@ def test_content_manipulation(self, func, dtype):
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
-    @pytest.mark.parametrize("variant", ("data", "dims", "coords"))
+    @pytest.mark.parametrize(
+        "variant",
+        (
+            "data",
+            pytest.param(
+                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
+            ),
+            "coords",
+        ),
+    )
     def test_merge(self, variant, unit, error, dtype):
         original_data_unit = unit_registry.m
         original_dim_unit = unit_registry.m
@@ -4325,6 +4365,6 @@ def test_merge(self, variant, unit, error, dtype):
 
         converted = convert_units(right, units)
         expected = attach_units(strip_units(left).merge(strip_units(converted)), units)
-        result = left.merge(right)
+        actual = left.merge(right)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(expected, actual)
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index ee8d54e567e..62fde920b1e 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -22,6 +22,7 @@
     PandasIndexAdapter,
     VectorizedIndexer,
 )
+from xarray.core.pycompat import dask_array_type
 from xarray.core.utils import NDArrayMixin
 from xarray.core.variable import as_compatible_data, as_variable
 from xarray.tests import requires_bottleneck
@@ -431,7 +432,7 @@ def test_concat(self):
         assert_identical(
             Variable(["b", "a"], np.array([x, y])), Variable.concat((v, w), "b")
         )
-        with raises_regex(ValueError, "inconsistent dimensions"):
+        with raises_regex(ValueError, "Variable has dimensions"):
             Variable.concat([v, Variable(["c"], y)], "b")
         # test indexers
         actual = Variable.concat(
@@ -450,16 +451,12 @@ def test_concat(self):
             Variable.concat([v[:, 0], v[:, 1:]], "x")
 
     def test_concat_attrs(self):
-        # different or conflicting attributes should be removed
+        # always keep attrs from first variable
         v = self.cls("a", np.arange(5), {"foo": "bar"})
         w = self.cls("a", np.ones(5))
         expected = self.cls(
             "a", np.concatenate([np.arange(5), np.ones(5)])
         ).to_base_variable()
-        assert_identical(expected, Variable.concat([v, w], "a"))
-        w.attrs["foo"] = 2
-        assert_identical(expected, Variable.concat([v, w], "a"))
-        w.attrs["foo"] = "bar"
         expected.attrs["foo"] = "bar"
         assert_identical(expected, Variable.concat([v, w], "a"))
 
@@ -1155,6 +1152,26 @@ def test_items(self):
     def test_getitem_basic(self):
         v = self.cls(["x", "y"], [[0, 1, 2], [3, 4, 5]])
 
+        # int argument
+        v_new = v[0]
+        assert v_new.dims == ("y",)
+        assert_array_equal(v_new, v._data[0])
+
+        # slice argument
+        v_new = v[:2]
+        assert v_new.dims == ("x", "y")
+        assert_array_equal(v_new, v._data[:2])
+
+        # list arguments
+        v_new = v[[0]]
+        assert v_new.dims == ("x", "y")
+        assert_array_equal(v_new, v._data[[0]])
+
+        v_new = v[[]]
+        assert v_new.dims == ("x", "y")
+        assert_array_equal(v_new, v._data[[]])
+
+        # dict arguments
         v_new = v[dict(x=0)]
         assert v_new.dims == ("y",)
         assert_array_equal(v_new, v._data[0])
@@ -1195,6 +1212,8 @@ def test_isel(self):
         assert_identical(v.isel(time=0), v[0])
         assert_identical(v.isel(time=slice(0, 3)), v[:3])
         assert_identical(v.isel(x=0), v[:, 0])
+        assert_identical(v.isel(x=[0, 2]), v[:, [0, 2]])
+        assert_identical(v.isel(time=[]), v[[]])
         with raises_regex(ValueError, "do not exist"):
             v.isel(not_a_dim=0)
 
@@ -1492,25 +1511,41 @@ def test_reduce(self):
         with pytest.warns(DeprecationWarning, match="allow_lazy is deprecated"):
             v.mean(dim="x", allow_lazy=False)
 
-    def test_quantile(self):
+    @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
+    @pytest.mark.parametrize(
+        "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]])
+    )
+    def test_quantile(self, q, axis, dim):
         v = Variable(["x", "y"], self.d)
-        for q in [0.25, [0.50], [0.25, 0.75]]:
-            for axis, dim in zip(
-                [None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]
-            ):
-                actual = v.quantile(q, dim=dim)
+        actual = v.quantile(q, dim=dim)
+        expected = np.nanpercentile(self.d, np.array(q) * 100, axis=axis)
+        np.testing.assert_allclose(actual.values, expected)
 
-                expected = np.nanpercentile(self.d, np.array(q) * 100, axis=axis)
-                np.testing.assert_allclose(actual.values, expected)
+    @requires_dask
+    @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
+    @pytest.mark.parametrize("axis, dim", [[1, "y"], [[1], ["y"]]])
+    def test_quantile_dask(self, q, axis, dim):
+        v = Variable(["x", "y"], self.d).chunk({"x": 2})
+        actual = v.quantile(q, dim=dim)
+        assert isinstance(actual.data, dask_array_type)
+        expected = np.nanpercentile(self.d, np.array(q) * 100, axis=axis)
+        np.testing.assert_allclose(actual.values, expected)
 
     @requires_dask
-    def test_quantile_dask_raises(self):
-        # regression for GH1524
-        v = Variable(["x", "y"], self.d).chunk(2)
+    def test_quantile_chunked_dim_error(self):
+        v = Variable(["x", "y"], self.d).chunk({"x": 2})
 
-        with raises_regex(TypeError, "arrays stored as dask"):
+        with raises_regex(ValueError, "dimension 'x'"):
             v.quantile(0.5, dim="x")
 
+    @pytest.mark.parametrize("q", [-0.1, 1.1, [2], [0.25, 2]])
+    def test_quantile_out_of_bounds(self, q):
+        v = Variable(["x", "y"], self.d)
+
+        # escape special characters
+        with raises_regex(ValueError, r"Quantiles must be in the range \[0, 1\]"):
+            v.quantile(q, dim="x")
+
     @requires_dask
     @requires_bottleneck
     def test_rank_dask_raises(self):
@@ -1824,6 +1859,26 @@ def test_coarsen_2d(self):
         expected[1, 1] *= 12 / 11
         assert_allclose(actual, expected)
 
+        v = self.cls(("x", "y"), np.arange(4 * 4, dtype=np.float32).reshape(4, 4))
+        actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact")
+        expected = self.cls(("x", "y"), 4 * np.ones((2, 2)))
+        assert_equal(actual, expected)
+
+        v[0, 0] = np.nan
+        v[-1, -1] = np.nan
+        expected[0, 0] = 3
+        expected[-1, -1] = 3
+        actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact")
+        assert_equal(actual, expected)
+
+        actual = v.coarsen(dict(x=2, y=2), func="sum", boundary="exact", skipna=False)
+        expected = self.cls(("x", "y"), [[np.nan, 18], [42, np.nan]])
+        assert_equal(actual, expected)
+
+        actual = v.coarsen(dict(x=2, y=2), func="sum", boundary="exact", skipna=True)
+        expected = self.cls(("x", "y"), [[10, 18], [42, 35]])
+        assert_equal(actual, expected)
+
 
 @requires_dask
 class TestVariableWithDask(VariableSubclassobjects):
diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py
index 0d6d147f0bb..6a0e62cc9dc 100755
--- a/xarray/util/print_versions.py
+++ b/xarray/util/print_versions.py
@@ -78,6 +78,13 @@ def netcdf_and_hdf5_versions():
 
 
 def show_versions(file=sys.stdout):
+    """ print the versions of xarray and its dependencies
+
+    Parameters
+    ----------
+    file : file-like, optional
+        print to the given file-like object. Defaults to sys.stdout.
+    """
     sys_info = get_sys_info()
 
     try: