Skip to content
forked from pydata/xarray

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into fix/plot-broadcast
Browse files Browse the repository at this point in the history
* upstream/master:
  Resolve the version issues on RTD (pydata#3589)
  Add bottleneck & rasterio git tip to upstream-dev CI (pydata#3585)
  update whats-new.rst (pydata#3581)
  Examples for quantile (pydata#3576)
  add cftime intersphinx entries (pydata#3577)
  Add pyXpcm to Related Projects doc page (pydata#3578)
  Reimplement quantile with apply_ufunc (pydata#3559)
  add environment file for binderized examples (pydata#3568)
  • Loading branch information
dcherian committed Dec 4, 2019
2 parents c69b21a + ed05f98 commit e8ea801
Show file tree
Hide file tree
Showing 15 changed files with 280 additions and 76 deletions.
39 changes: 39 additions & 0 deletions .binder/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: xarray-examples
channels:
- conda-forge
dependencies:
- python=3.7
- boto3
- bottleneck
- cartopy
- cdms2
- cfgrib
- cftime
- coveralls
- dask
- distributed
- dask_labextension
- h5netcdf
- h5py
- hdf5
- iris
- lxml # Optional dep of pydap
- matplotlib
- nc-time-axis
- netcdf4
- numba
- numpy
- pandas
- pint
- pip
- pydap
- pynio
- rasterio
- scipy
- seaborn
- sparse
- toolz
- xarray
- zarr
- pip:
- numbagg
4 changes: 3 additions & 1 deletion ci/azure/install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ steps:
git+https://github.com/dask/dask \
git+https://github.com/dask/distributed \
git+https://github.com/zarr-developers/zarr \
git+https://github.com/Unidata/cftime
git+https://github.com/Unidata/cftime \
git+https://github.com/mapbox/rasterio \
git+https://github.com/pydata/bottleneck
condition: eq(variables['UPSTREAM_DEV'], 'true')
displayName: Install upstream dev dependencies

Expand Down
7 changes: 2 additions & 5 deletions ci/requirements/doc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ dependencies:
- python=3.7
- bottleneck
- cartopy
- eccodes
- cfgrib
- h5netcdf
- ipykernel
- ipython
Expand All @@ -21,8 +21,5 @@ dependencies:
- seaborn
- sphinx
- sphinx_rtd_theme
- xarray
- zarr
- pip
- pip:
- cfgrib

7 changes: 7 additions & 0 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,16 @@

import datetime
import os
import pathlib
import subprocess
import sys
from contextlib import suppress

# make sure the source version is preferred (#3567)
root = pathlib.Path(__file__).absolute().parent.parent
os.environ["PYTHONPATH"] = str(root)
sys.path.insert(0, str(root))

import xarray

allowed_failures = set()
Expand Down Expand Up @@ -351,4 +357,5 @@
"numba": ("https://numba.pydata.org/numba-doc/latest", None),
"matplotlib": ("https://matplotlib.org", None),
"dask": ("https://docs.dask.org/en/latest", None),
"cftime": ("https://unidata.github.io/cftime", None),
}
1 change: 1 addition & 0 deletions doc/related-projects.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Geosciences
- `PyGDX <https://pygdx.readthedocs.io/en/latest/>`_: Python 3 package for
accessing data stored in GAMS Data eXchange (GDX) files. Also uses a custom
subclass.
- `pyXpcm <https://pyxpcm.readthedocs.io>`_: xarray-based Profile Classification Modelling (PCM), mostly for ocean data.
- `Regionmask <https://regionmask.readthedocs.io/>`_: plotting and creation of masks of spatial regions
- `rioxarray <https://corteva.github.io/rioxarray>`_: geospatial xarray extension powered by rasterio
- `salem <https://salem.readthedocs.io>`_: Adds geolocalised subsetting, masking, and plotting operations to xarray's data structures via accessors.
Expand Down
6 changes: 6 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ Breaking changes

New Features
~~~~~~~~~~~~
- :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile` and ``GroupBy.quantile``
now work with dask Variables.
By `Deepak Cherian <https://github.com/dcherian>`_.


Bug fixes
Expand All @@ -47,6 +50,9 @@ Documentation
data. (:pull:`3199`)
By `Zach Bruick <https://github.com/zbruick>` and
`Stephan Siemen <https://github.com/StephanSiemen>`
- Added examples for `DataArray.quantile`, `Dataset.quantile` and
`GroupBy.quantile`. (:pull:`3576`)
By `Justus Magin <https://github.com/keewis>`_.

Internal Changes
~~~~~~~~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ conda:
file: ci/requirements/doc.yml
python:
version: 3.7
setup_py_install: true
setup_py_install: false
formats: []
37 changes: 37 additions & 0 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2971,6 +2971,43 @@ def quantile(
See Also
--------
numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile
Examples
--------
>>> da = xr.DataArray(
... data=[[0.7, 4.2, 9.4, 1.5], [6.5, 7.3, 2.6, 1.9]],
... coords={"x": [7, 9], "y": [1, 1.5, 2, 2.5]},
... dims=("x", "y"),
... )
Single quantile
>>> da.quantile(0) # or da.quantile(0, dim=...)
<xarray.DataArray ()>
array(0.7)
Coordinates:
quantile float64 0.0
>>> da.quantile(0, dim="x")
<xarray.DataArray (y: 4)>
array([0.7, 4.2, 2.6, 1.5])
Coordinates:
* y (y) float64 1.0 1.5 2.0 2.5
quantile float64 0.0
Multiple quantiles
>>> da.quantile([0, 0.5, 1])
<xarray.DataArray (quantile: 3)>
array([0.7, 3.4, 9.4])
Coordinates:
* quantile (quantile) float64 0.0 0.5 1.0
>>> da.quantile([0, 0.5, 1], dim="x")
<xarray.DataArray (quantile: 3, y: 4)>
array([[0.7 , 4.2 , 2.6 , 1.5 ],
[3.6 , 5.75, 6. , 1.7 ],
[6.5 , 7.3 , 9.4 , 1.9 ]])
Coordinates:
* y (y) float64 1.0 1.5 2.0 2.5
* quantile (quantile) float64 0.0 0.5 1.0
"""

ds = self._to_temp_dataset().quantile(
Expand Down
48 changes: 43 additions & 5 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5116,6 +5116,48 @@ def quantile(
See Also
--------
numpy.nanpercentile, pandas.Series.quantile, DataArray.quantile
Examples
--------
>>> ds = xr.Dataset(
... {"a": (("x", "y"), [[0.7, 4.2, 9.4, 1.5], [6.5, 7.3, 2.6, 1.9]])},
... coords={"x": [7, 9], "y": [1, 1.5, 2, 2.5]},
... )
Single quantile
>>> ds.quantile(0) # or ds.quantile(0, dim=...)
<xarray.Dataset>
Dimensions: ()
Coordinates:
quantile float64 0.0
Data variables:
a float64 0.7
>>> ds.quantile(0, dim="x")
<xarray.Dataset>
Dimensions: (y: 4)
Coordinates:
* y (y) float64 1.0 1.5 2.0 2.5
quantile float64 0.0
Data variables:
a (y) float64 0.7 4.2 2.6 1.5
Multiple quantiles
>>> ds.quantile([0, 0.5, 1])
<xarray.Dataset>
Dimensions: (quantile: 3)
Coordinates:
* quantile (quantile) float64 0.0 0.5 1.0
Data variables:
a (quantile) float64 0.7 3.4 9.4
>>> ds.quantile([0, 0.5, 1], dim="x")
<xarray.Dataset>
Dimensions: (quantile: 3, y: 4)
Coordinates:
* y (y) float64 1.0 1.5 2.0 2.5
* quantile (quantile) float64 0.0 0.5 1.0
Data variables:
a (quantile, y) float64 0.7 4.2 2.6 1.5 3.6 ... 1.7 6.5 7.3 9.4 1.9
"""

if isinstance(dim, str):
Expand Down Expand Up @@ -5166,11 +5208,7 @@ def quantile(
new = self._replace_with_new_dims(
variables, coord_names=coord_names, attrs=attrs, indexes=indexes
)
if "quantile" in new.dims:
new.coords["quantile"] = Variable("quantile", q)
else:
new.coords["quantile"] = q
return new
return new.assign_coords(quantile=q)

def rank(self, dim, pct=False, keep_attrs=None):
"""Ranks the data.
Expand Down
53 changes: 53 additions & 0 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,59 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None):
--------
numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile,
DataArray.quantile
Examples
--------
>>> da = xr.DataArray(
... [[1.3, 8.4, 0.7, 6.9], [0.7, 4.2, 9.4, 1.5], [6.5, 7.3, 2.6, 1.9]],
... coords={"x": [0, 0, 1], "y": [1, 1, 2, 2]},
... dims=("y", "y"),
... )
>>> ds = xr.Dataset({"a": da})
Single quantile
>>> da.groupby("x").quantile(0)
<xarray.DataArray (x: 2, y: 4)>
array([[0.7, 4.2, 0.7, 1.5],
[6.5, 7.3, 2.6, 1.9]])
Coordinates:
quantile float64 0.0
* y (y) int64 1 1 2 2
* x (x) int64 0 1
>>> ds.groupby("y").quantile(0, dim=...)
<xarray.Dataset>
Dimensions: (y: 2)
Coordinates:
quantile float64 0.0
* y (y) int64 1 2
Data variables:
a (y) float64 0.7 0.7
Multiple quantiles
>>> da.groupby("x").quantile([0, 0.5, 1])
<xarray.DataArray (x: 2, y: 4, quantile: 3)>
array([[[0.7 , 1. , 1.3 ],
[4.2 , 6.3 , 8.4 ],
[0.7 , 5.05, 9.4 ],
[1.5 , 4.2 , 6.9 ]],
[[6.5 , 6.5 , 6.5 ],
[7.3 , 7.3 , 7.3 ],
[2.6 , 2.6 , 2.6 ],
[1.9 , 1.9 , 1.9 ]]])
Coordinates:
* y (y) int64 1 1 2 2
* quantile (quantile) float64 0.0 0.5 1.0
* x (x) int64 0 1
>>> ds.groupby("y").quantile([0, 0.5, 1], dim=...)
<xarray.Dataset>
Dimensions: (quantile: 3, y: 2)
Coordinates:
* quantile (quantile) float64 0.0 0.5 1.0
* y (y) int64 1 2
Data variables:
a (y, quantile) float64 0.7 5.35 8.4 0.7 2.25 9.4
"""
if dim is None:
dim = self._group_dim
Expand Down
63 changes: 34 additions & 29 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1716,40 +1716,45 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None):
numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile,
DataArray.quantile
"""
if isinstance(self.data, dask_array_type):
raise TypeError(
"quantile does not work for arrays stored as dask "
"arrays. Load the data via .compute() or .load() "
"prior to calling this method."
)

q = np.asarray(q, dtype=np.float64)

new_dims = list(self.dims)
if dim is not None:
axis = self.get_axis_num(dim)
if utils.is_scalar(dim):
new_dims.remove(dim)
else:
for d in dim:
new_dims.remove(d)
else:
axis = None
new_dims = []

# Only add the quantile dimension if q is array-like
if q.ndim != 0:
new_dims = ["quantile"] + new_dims

qs = np.nanpercentile(
self.data, q * 100.0, axis=axis, interpolation=interpolation
)
from .computation import apply_ufunc

if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
attrs = self._attrs if keep_attrs else None

return Variable(new_dims, qs, attrs)
scalar = utils.is_scalar(q)
q = np.atleast_1d(np.asarray(q, dtype=np.float64))

if dim is None:
dim = self.dims

if utils.is_scalar(dim):
dim = [dim]

def _wrapper(npa, **kwargs):
# move quantile axis to end. required for apply_ufunc
return np.moveaxis(np.nanpercentile(npa, **kwargs), 0, -1)

axis = np.arange(-1, -1 * len(dim) - 1, -1)
result = apply_ufunc(
_wrapper,
self,
input_core_dims=[dim],
exclude_dims=set(dim),
output_core_dims=[["quantile"]],
output_dtypes=[np.float64],
output_sizes={"quantile": len(q)},
dask="parallelized",
kwargs={"q": q * 100, "axis": axis, "interpolation": interpolation},
)

# for backward compatibility
result = result.transpose("quantile", ...)
if scalar:
result = result.squeeze("quantile")
if keep_attrs:
result.attrs = self._attrs
return result

def rank(self, dim, pct=False):
"""Ranks the data.
Expand Down
1 change: 1 addition & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -3951,6 +3951,7 @@ def test_rasterio_environment(self):
with xr.open_rasterio(tmp_file) as actual:
assert_allclose(actual, expected)

@pytest.mark.xfail(reason="rasterio 1.1.1 is broken. GH3573")
def test_rasterio_vrt(self):
import rasterio

Expand Down
Loading

0 comments on commit e8ea801

Please sign in to comment.