quantile: rename interpolation arg to method (#6108)

* quantile: rename interpolation arg to method * add whats new entry * Apply suggestions from code review * fix ArrayLike * type dim * cleanup * update docstrings * indentation and quotation marks * use Literal * update whats new * remove newline
pydata · Feb 7, 2022 · d47cf0c · d47cf0c
1 parent 52a051a
commit d47cf0c
Show file tree

Hide file tree

Showing 10 changed files with 386 additions and 75 deletions.
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -26,6 +26,9 @@ New Features
 Breaking changes
 ~~~~~~~~~~~~~~~~
 
+- Renamed the ``interpolation`` keyword of all ``quantile`` methods (e.g. :py:meth:`DataArray.quantile`)
+  to ``method`` for consistency with numpy v1.22.0 (:pull:`6108`).
+  By `Mathias Hauser <https://github.com/mathause>`_.
 
 Deprecations
 ~~~~~~~~~~~~

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -53,6 +53,7 @@
 from .indexes import Index, Indexes, default_indexes, propagate_indexes
 from .indexing import is_fancy_indexer
 from .merge import PANDAS_TYPES, MergeError, _extract_indexes_from_coords
+from .npcompat import QUANTILE_METHODS, ArrayLike
 from .options import OPTIONS, _get_keep_attrs
 from .utils import (
     Default,
@@ -3426,11 +3427,12 @@ def sortby(
 
     def quantile(
         self,
-        q: Any,
-        dim: Hashable | Sequence[Hashable] | None = None,
-        interpolation: str = "linear",
+        q: ArrayLike,
+        dim: str | Sequence[Hashable] | None = None,
+        method: QUANTILE_METHODS = "linear",
         keep_attrs: bool = None,
         skipna: bool = True,
+        interpolation: QUANTILE_METHODS = None,
     ) -> DataArray:
         """Compute the qth quantile of the data along the specified dimension.
 
@@ -3442,18 +3444,34 @@ def quantile(
             Quantile to compute, which must be between 0 and 1 inclusive.
         dim : hashable or sequence of hashable, optional
             Dimension(s) over which to apply quantile.
-        interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}, default: "linear"
-            This optional parameter specifies the interpolation method to
-            use when the desired quantile lies between two data points
-            ``i < j``:
-
-                - linear: ``i + (j - i) * fraction``, where ``fraction`` is
-                  the fractional part of the index surrounded by ``i`` and
-                  ``j``.
-                - lower: ``i``.
-                - higher: ``j``.
-                - nearest: ``i`` or ``j``, whichever is nearest.
-                - midpoint: ``(i + j) / 2``.
+        method : str, default: "linear"
+            This optional parameter specifies the interpolation method to use when the
+            desired quantile lies between two data points. The options sorted by their R
+            type as summarized in the H&F paper [1]_ are:
+
+                1. "inverted_cdf" (*)
+                2. "averaged_inverted_cdf" (*)
+                3. "closest_observation" (*)
+                4. "interpolated_inverted_cdf" (*)
+                5. "hazen" (*)
+                6. "weibull" (*)
+                7. "linear"  (default)
+                8. "median_unbiased" (*)
+                9. "normal_unbiased" (*)
+
+            The first three methods are discontiuous.  The following discontinuous
+            variations of the default "linear" (7.) option are also available:
+
+                * "lower"
+                * "higher"
+                * "midpoint"
+                * "nearest"
+
+            See :py:func:`numpy.quantile` or [1]_ for details. Methods marked with
+            an asterix require numpy version 1.22 or newer. The "method" argument was
+            previously called "interpolation", renamed in accordance with numpy
+            version 1.22.0.
+
         keep_attrs : bool, optional
             If True, the dataset's attributes (`attrs`) will be copied from
             the original object to the new one.  If False (default), the new
@@ -3505,14 +3523,21 @@ def quantile(
         Coordinates:
           * y         (y) float64 1.0 1.5 2.0 2.5
           * quantile  (quantile) float64 0.0 0.5 1.0
+
+        References
+        ----------
+        .. [1] R. J. Hyndman and Y. Fan,
+           "Sample quantiles in statistical packages,"
+           The American Statistician, 50(4), pp. 361-365, 1996
         """
 
         ds = self._to_temp_dataset().quantile(
             q,
             dim=dim,
             keep_attrs=keep_attrs,
-            interpolation=interpolation,
+            method=method,
             skipna=skipna,
+            interpolation=interpolation,
         )
         return self._from_temp_dataset(ds)
 

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -77,6 +77,7 @@
     merge_data_and_coords,
 )
 from .missing import get_clean_interp_index
+from .npcompat import QUANTILE_METHODS, ArrayLike
 from .options import OPTIONS, _get_keep_attrs
 from .pycompat import is_duck_dask_array, sparse_array_type
 from .utils import (
@@ -6137,12 +6138,13 @@ def sortby(self, variables, ascending=True):
 
     def quantile(
         self,
-        q,
-        dim=None,
-        interpolation="linear",
-        numeric_only=False,
-        keep_attrs=None,
-        skipna=True,
+        q: ArrayLike,
+        dim: str | Iterable[Hashable] | None = None,
+        method: QUANTILE_METHODS = "linear",
+        numeric_only: bool = False,
+        keep_attrs: bool = None,
+        skipna: bool = True,
+        interpolation: QUANTILE_METHODS = None,
     ):
         """Compute the qth quantile of the data along the specified dimension.
 
@@ -6155,18 +6157,34 @@ def quantile(
             Quantile to compute, which must be between 0 and 1 inclusive.
         dim : str or sequence of str, optional
             Dimension(s) over which to apply quantile.
-        interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}, default: "linear"
-            This optional parameter specifies the interpolation method to
-            use when the desired quantile lies between two data points
-            ``i < j``:
-
-                * linear: ``i + (j - i) * fraction``, where ``fraction`` is
-                  the fractional part of the index surrounded by ``i`` and
-                  ``j``.
-                * lower: ``i``.
-                * higher: ``j``.
-                * nearest: ``i`` or ``j``, whichever is nearest.
-                * midpoint: ``(i + j) / 2``.
+        method : str, default: "linear"
+            This optional parameter specifies the interpolation method to use when the
+            desired quantile lies between two data points. The options sorted by their R
+            type as summarized in the H&F paper [1]_ are:
+
+                1. "inverted_cdf" (*)
+                2. "averaged_inverted_cdf" (*)
+                3. "closest_observation" (*)
+                4. "interpolated_inverted_cdf" (*)
+                5. "hazen" (*)
+                6. "weibull" (*)
+                7. "linear"  (default)
+                8. "median_unbiased" (*)
+                9. "normal_unbiased" (*)
+
+            The first three methods are discontiuous.  The following discontinuous
+            variations of the default "linear" (7.) option are also available:
+
+                * "lower"
+                * "higher"
+                * "midpoint"
+                * "nearest"
+
+            See :py:func:`numpy.quantile` or [1]_ for a description. Methods marked with
+            an asterix require numpy version 1.22 or newer. The "method" argument was
+            previously called "interpolation", renamed in accordance with numpy
+            version 1.22.0.
+
         keep_attrs : bool, optional
             If True, the dataset's attributes (`attrs`) will be copied from
             the original object to the new one.  If False (default), the new
@@ -6225,17 +6243,37 @@ def quantile(
           * quantile  (quantile) float64 0.0 0.5 1.0
         Data variables:
             a         (quantile, y) float64 0.7 4.2 2.6 1.5 3.6 ... 1.7 6.5 7.3 9.4 1.9
+
+        References
+        ----------
+        .. [1] R. J. Hyndman and Y. Fan,
+           "Sample quantiles in statistical packages,"
+           The American Statistician, 50(4), pp. 361-365, 1996
         """
 
+        # interpolation renamed to method in version 0.21.0
+        # check here and in variable to avoid repeated warnings
+        if interpolation is not None:
+            warnings.warn(
+                "The `interpolation` argument to quantile was renamed to `method`.",
+                FutureWarning,
+            )
+
+            if method != "linear":
+                raise TypeError("Cannot pass interpolation and method keywords!")
+
+            method = interpolation
+
+        dims: set[Hashable]
         if isinstance(dim, str):
             dims = {dim}
-        elif dim in [None, ...]:
+        elif dim is None or dim is ...:
             dims = set(self.dims)
         else:
             dims = set(dim)
 
         _assert_empty(
-            [d for d in dims if d not in self.dims],
+            tuple(d for d in dims if d not in self.dims),
             "Dataset does not contain the dimensions: %s",
         )
 
@@ -6251,15 +6289,10 @@ def quantile(
                         or np.issubdtype(var.dtype, np.number)
                         or var.dtype == np.bool_
                     ):
-                        if len(reduce_dims) == var.ndim:
-                            # prefer to aggregate over axis=None rather than
-                            # axis=(0, 1) if they will be equivalent, because
-                            # the former is often more efficient
-                            reduce_dims = None
                         variables[name] = var.quantile(
                             q,
                             dim=reduce_dims,
-                            interpolation=interpolation,
+                            method=method,
                             keep_attrs=keep_attrs,
                             skipna=skipna,
                         )

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
@@ -549,7 +549,13 @@ def fillna(self, value):
         return ops.fillna(self, value)
 
     def quantile(
-        self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True
+        self,
+        q,
+        dim=None,
+        method="linear",
+        keep_attrs=None,
+        skipna=True,
+        interpolation=None,
     ):
         """Compute the qth quantile over each array in the groups and
         concatenate them together into a new array.
@@ -562,18 +568,34 @@ def quantile(
         dim : ..., str or sequence of str, optional
             Dimension(s) over which to apply quantile.
             Defaults to the grouped dimension.
-        interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}, default: "linear"
-            This optional parameter specifies the interpolation method to
-            use when the desired quantile lies between two data points
-            ``i < j``:
-
-                * linear: ``i + (j - i) * fraction``, where ``fraction`` is
-                  the fractional part of the index surrounded by ``i`` and
-                  ``j``.
-                * lower: ``i``.
-                * higher: ``j``.
-                * nearest: ``i`` or ``j``, whichever is nearest.
-                * midpoint: ``(i + j) / 2``.
+        method : str, default: "linear"
+            This optional parameter specifies the interpolation method to use when the
+            desired quantile lies between two data points. The options sorted by their R
+            type as summarized in the H&F paper [1]_ are:
+
+                1. "inverted_cdf" (*)
+                2. "averaged_inverted_cdf" (*)
+                3. "closest_observation" (*)
+                4. "interpolated_inverted_cdf" (*)
+                5. "hazen" (*)
+                6. "weibull" (*)
+                7. "linear"  (default)
+                8. "median_unbiased" (*)
+                9. "normal_unbiased" (*)
+
+            The first three methods are discontiuous.  The following discontinuous
+            variations of the default "linear" (7.) option are also available:
+
+                * "lower"
+                * "higher"
+                * "midpoint"
+                * "nearest"
+
+            See :py:func:`numpy.quantile` or [1]_ for details. Methods marked with
+            an asterix require numpy version 1.22 or newer. The "method" argument was
+            previously called "interpolation", renamed in accordance with numpy
+            version 1.22.0.
+
         skipna : bool, optional
             Whether to skip missing values when aggregating.
 
@@ -639,6 +661,12 @@ def quantile(
           * y         (y) int64 1 2
         Data variables:
             a         (y, quantile) float64 0.7 5.35 8.4 0.7 2.25 9.4
+
+        References
+        ----------
+        .. [1] R. J. Hyndman and Y. Fan,
+           "Sample quantiles in statistical packages,"
+           The American Statistician, 50(4), pp. 361-365, 1996
         """
         if dim is None:
             dim = self._group_dim
@@ -648,9 +676,10 @@ def quantile(
             shortcut=False,
             q=q,
             dim=dim,
-            interpolation=interpolation,
+            method=method,
             keep_attrs=keep_attrs,
             skipna=skipna,
+            interpolation=interpolation,
         )
         return out
 

diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py
@@ -28,7 +28,7 @@
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-from typing import TYPE_CHECKING, Any, Sequence, TypeVar, Union
+from typing import TYPE_CHECKING, Any, Literal, Sequence, TypeVar, Union
 
 import numpy as np
 from packaging.version import Version
@@ -169,3 +169,29 @@ def sliding_window_view(
         return as_strided(
             x, strides=out_strides, shape=out_shape, subok=subok, writeable=writeable
         )
+
+
+if Version(np.__version__) >= Version("1.22.0"):
+    QUANTILE_METHODS = Literal[
+        "inverted_cdf",
+        "averaged_inverted_cdf",
+        "closest_observation",
+        "interpolated_inverted_cdf",
+        "hazen",
+        "weibull",
+        "linear",
+        "median_unbiased",
+        "normal_unbiased",
+        "lower",
+        "higher",
+        "midpoint",
+        "nearest",
+    ]
+else:
+    QUANTILE_METHODS = Literal[  # type: ignore[misc]
+        "linear",
+        "lower",
+        "higher",
+        "midpoint",
+        "nearest",
+    ]