Skip to content

Commit 4923039

Browse files
fujiisoupshoyer
authored andcommitted
Implement integrate (#2653)
* added integrate. * Docs * Update via comment * Update via comments * integrate can accept multiple dimensions. * using set instead of list * dim -> coord
1 parent 3cc0d22 commit 4923039

File tree

7 files changed

+232
-2
lines changed

7 files changed

+232
-2
lines changed

doc/api.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ Computation
152152
Dataset.diff
153153
Dataset.quantile
154154
Dataset.differentiate
155+
Dataset.integrate
155156

156157
**Aggregation**:
157158
:py:attr:`~Dataset.all`
@@ -321,6 +322,7 @@ Computation
321322
DataArray.dot
322323
DataArray.quantile
323324
DataArray.differentiate
325+
DataArray.integrate
324326

325327
**Aggregation**:
326328
:py:attr:`~DataArray.all`

doc/computation.rst

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,8 @@ function or method name to ``coord_func`` option,
240240
da.coarsen(time=7, x=2, coord_func={'time': 'min'}).mean()
241241
242242
243+
.. _compute.using_coordinates:
244+
243245
Computation using Coordinates
244246
=============================
245247

@@ -261,9 +263,17 @@ This method can be used also for multidimensional arrays,
261263
coords={'x': [0.1, 0.11, 0.2, 0.3]})
262264
a.differentiate('x')
263265
266+
:py:meth:`~xarray.DataArray.integrate` computes integration based on
267+
trapezoidal rule using their coordinates,
268+
269+
.. ipython:: python
270+
271+
a.integrate('x')
272+
264273
.. note::
265-
This method is limited to simple cartesian geometry. Differentiation along
266-
multidimensional coordinate is not supported.
274+
These methods are limited to simple cartesian geometry. Differentiation
275+
and integration along multidimensional coordinate are not supported.
276+
267277

268278
.. _compute.broadcasting:
269279

doc/whats-new.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,13 @@ Enhancements
4747
``nearest``. (:issue:`2695`)
4848
By `Hauke Schulz <https://github.com/observingClouds>`_.
4949

50+
- :py:meth:`~xarray.DataArray.integrate` and
51+
:py:meth:`~xarray.Dataset.integrate` are newly added.
52+
See :ref:`_compute.using_coordinates` for the detail.
53+
(:issue:`1332`)
54+
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
55+
56+
5057
Bug fixes
5158
~~~~~~~~~
5259

@@ -115,6 +122,7 @@ Breaking changes
115122
(:issue:`2565`). The previous behavior was to decode them only if they
116123
had specific time attributes, now these attributes are copied
117124
automatically from the corresponding time coordinate. This might
125+
break downstream code that was relying on these variables to be
118126
brake downstream code that was relying on these variables to be
119127
not decoded.
120128
By `Fabien Maussion <https://github.com/fmaussion>`_.

xarray/core/dataarray.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2425,6 +2425,53 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None):
24252425
coord, edge_order, datetime_unit)
24262426
return self._from_temp_dataset(ds)
24272427

2428+
def integrate(self, dim, datetime_unit=None):
2429+
""" integrate the array with the trapezoidal rule.
2430+
2431+
.. note::
2432+
This feature is limited to simple cartesian geometry, i.e. coord
2433+
must be one dimensional.
2434+
2435+
Parameters
2436+
----------
2437+
dim: str, or a sequence of str
2438+
Coordinate(s) used for the integration.
2439+
datetime_unit
2440+
Can be specify the unit if datetime coordinate is used. One of
2441+
{'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', 'ps', 'fs',
2442+
'as'}
2443+
2444+
Returns
2445+
-------
2446+
integrated: DataArray
2447+
2448+
See also
2449+
--------
2450+
numpy.trapz: corresponding numpy function
2451+
2452+
Examples
2453+
--------
2454+
2455+
>>> da = xr.DataArray(np.arange(12).reshape(4, 3), dims=['x', 'y'],
2456+
... coords={'x': [0, 0.1, 1.1, 1.2]})
2457+
>>> da
2458+
<xarray.DataArray (x: 4, y: 3)>
2459+
array([[ 0, 1, 2],
2460+
[ 3, 4, 5],
2461+
[ 6, 7, 8],
2462+
[ 9, 10, 11]])
2463+
Coordinates:
2464+
* x (x) float64 0.0 0.1 1.1 1.2
2465+
Dimensions without coordinates: y
2466+
>>>
2467+
>>> da.integrate('x')
2468+
<xarray.DataArray (y: 3)>
2469+
array([5.4, 6.6, 7.8])
2470+
Dimensions without coordinates: y
2471+
"""
2472+
ds = self._to_temp_dataset().integrate(dim, datetime_unit)
2473+
return self._from_temp_dataset(ds)
2474+
24282475

24292476
# priority most be higher than Variable to properly work with binary ufuncs
24302477
ops.inject_all_ops_and_reduce_methods(DataArray, priority=60)

xarray/core/dataset.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3869,6 +3869,78 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None):
38693869
variables[k] = v
38703870
return self._replace_vars_and_dims(variables)
38713871

3872+
def integrate(self, coord, datetime_unit=None):
3873+
""" integrate the array with the trapezoidal rule.
3874+
3875+
.. note::
3876+
This feature is limited to simple cartesian geometry, i.e. coord
3877+
must be one dimensional.
3878+
3879+
Parameters
3880+
----------
3881+
dim: str, or a sequence of str
3882+
Coordinate(s) used for the integration.
3883+
datetime_unit
3884+
Can be specify the unit if datetime coordinate is used. One of
3885+
{'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', 'ps', 'fs',
3886+
'as'}
3887+
3888+
Returns
3889+
-------
3890+
integrated: Dataset
3891+
3892+
See also
3893+
--------
3894+
DataArray.integrate
3895+
numpy.trapz: corresponding numpy function
3896+
"""
3897+
if not isinstance(coord, (list, tuple)):
3898+
coord = (coord, )
3899+
result = self
3900+
for c in coord:
3901+
result = result._integrate_one(c, datetime_unit=datetime_unit)
3902+
return result
3903+
3904+
def _integrate_one(self, coord, datetime_unit=None):
3905+
from .variable import Variable
3906+
3907+
if coord not in self.variables and coord not in self.dims:
3908+
raise ValueError('Coordinate {} does not exist.'.format(dim))
3909+
3910+
coord_var = self[coord].variable
3911+
if coord_var.ndim != 1:
3912+
raise ValueError('Coordinate {} must be 1 dimensional but is {}'
3913+
' dimensional'.format(coord, coord_var.ndim))
3914+
3915+
dim = coord_var.dims[0]
3916+
if _contains_datetime_like_objects(coord_var):
3917+
if coord_var.dtype.kind in 'mM' and datetime_unit is None:
3918+
datetime_unit, _ = np.datetime_data(coord_var.dtype)
3919+
elif datetime_unit is None:
3920+
datetime_unit = 's' # Default to seconds for cftime objects
3921+
coord_var = datetime_to_numeric(
3922+
coord_var, datetime_unit=datetime_unit)
3923+
3924+
variables = OrderedDict()
3925+
coord_names = set()
3926+
for k, v in self.variables.items():
3927+
if k in self.coords:
3928+
if dim not in v.dims:
3929+
variables[k] = v
3930+
coord_names.add(k)
3931+
else:
3932+
if k in self.data_vars and dim in v.dims:
3933+
if _contains_datetime_like_objects(v):
3934+
v = datetime_to_numeric(v, datetime_unit=datetime_unit)
3935+
integ = duck_array_ops.trapz(
3936+
v.data, coord_var.data, axis=v.get_axis_num(dim))
3937+
v_dims = list(v.dims)
3938+
v_dims.remove(dim)
3939+
variables[k] = Variable(v_dims, integ)
3940+
else:
3941+
variables[k] = v
3942+
return self._replace_vars_and_dims(variables, coord_names=coord_names)
3943+
38723944
@property
38733945
def real(self):
38743946
return self._unary_op(lambda x: x.real,

xarray/core/duck_array_ops.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,18 @@ def gradient(x, coord, axis, edge_order):
9999
return npcompat.gradient(x, coord, axis=axis, edge_order=edge_order)
100100

101101

102+
def trapz(y, x, axis):
103+
if axis < 0:
104+
axis = y.ndim + axis
105+
x_sl1 = (slice(1, None), ) + (None, ) * (y.ndim - axis - 1)
106+
x_sl2 = (slice(None, -1), ) + (None, ) * (y.ndim - axis - 1)
107+
slice1 = (slice(None),) * axis + (slice(1, None), )
108+
slice2 = (slice(None),) * axis + (slice(None, -1), )
109+
dx = (x[x_sl1] - x[x_sl2])
110+
integrand = dx * 0.5 * (y[tuple(slice1)] + y[tuple(slice2)])
111+
return sum(integrand, axis=axis, skipna=False)
112+
113+
102114
masked_invalid = _dask_or_eager_func(
103115
'masked_invalid', eager_module=np.ma,
104116
dask_module=getattr(dask_array, 'ma', None))

xarray/tests/test_dataset.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4716,3 +4716,82 @@ def test_differentiate_cftime(dask):
47164716
# Test the differentiation of datetimes themselves
47174717
actual = da['time'].differentiate('time', edge_order=1, datetime_unit='D')
47184718
assert_allclose(actual, xr.ones_like(da['time']).astype(float))
4719+
4720+
4721+
@pytest.mark.parametrize('dask', [True, False])
4722+
def test_integrate(dask):
4723+
rs = np.random.RandomState(42)
4724+
coord = [0.2, 0.35, 0.4, 0.6, 0.7, 0.75, 0.76, 0.8]
4725+
4726+
da = xr.DataArray(rs.randn(8, 6), dims=['x', 'y'],
4727+
coords={'x': coord, 'x2': (('x', ), rs.randn(8)),
4728+
'z': 3, 'x2d': (('x', 'y'), rs.randn(8, 6))})
4729+
if dask and has_dask:
4730+
da = da.chunk({'x': 4})
4731+
4732+
ds = xr.Dataset({'var': da})
4733+
4734+
# along x
4735+
actual = da.integrate('x')
4736+
# coordinate that contains x should be dropped.
4737+
expected_x = xr.DataArray(
4738+
np.trapz(da, da['x'], axis=0), dims=['y'],
4739+
coords={k: v for k, v in da.coords.items() if 'x' not in v.dims})
4740+
assert_allclose(expected_x, actual.compute())
4741+
assert_equal(ds['var'].integrate('x'), ds.integrate('x')['var'])
4742+
4743+
# make sure result is also a dask array (if the source is dask array)
4744+
assert isinstance(actual.data, type(da.data))
4745+
4746+
# along y
4747+
actual = da.integrate('y')
4748+
expected_y = xr.DataArray(
4749+
np.trapz(da, da['y'], axis=1), dims=['x'],
4750+
coords={k: v for k, v in da.coords.items() if 'y' not in v.dims})
4751+
assert_allclose(expected_y, actual.compute())
4752+
assert_equal(actual, ds.integrate('y')['var'])
4753+
assert_equal(ds['var'].integrate('y'), ds.integrate('y')['var'])
4754+
4755+
# along x and y
4756+
actual = da.integrate(('y', 'x'))
4757+
assert actual.ndim == 0
4758+
4759+
with pytest.raises(ValueError):
4760+
da.integrate('x2d')
4761+
4762+
4763+
@pytest.mark.parametrize('dask', [True, False])
4764+
@pytest.mark.parametrize('which_datetime', ['np', 'cftime'])
4765+
def test_trapz_datetime(dask, which_datetime):
4766+
rs = np.random.RandomState(42)
4767+
if which_datetime == 'np':
4768+
coord = np.array(
4769+
['2004-07-13', '2006-01-13', '2010-08-13', '2010-09-13',
4770+
'2010-10-11', '2010-12-13', '2011-02-13', '2012-08-13'],
4771+
dtype='datetime64')
4772+
else:
4773+
if not has_cftime:
4774+
pytest.skip('Test requires cftime.')
4775+
coord = xr.cftime_range('2000', periods=8, freq='2D')
4776+
4777+
da = xr.DataArray(
4778+
rs.randn(8, 6),
4779+
coords={'time': coord, 'z': 3, 't2d': (('time', 'y'), rs.randn(8, 6))},
4780+
dims=['time', 'y'])
4781+
4782+
if dask and has_dask:
4783+
da = da.chunk({'time': 4})
4784+
4785+
actual = da.integrate('time', datetime_unit='D')
4786+
expected_data = np.trapz(
4787+
da, utils.datetime_to_numeric(da['time'], datetime_unit='D'), axis=0)
4788+
expected = xr.DataArray(
4789+
expected_data, dims=['y'],
4790+
coords={k: v for k, v in da.coords.items() if 'time' not in v.dims})
4791+
assert_allclose(expected, actual.compute())
4792+
4793+
# make sure result is also a dask array (if the source is dask array)
4794+
assert isinstance(actual.data, type(da.data))
4795+
4796+
actual2 = da.integrate('time', datetime_unit='h')
4797+
assert_allclose(actual, actual2 / 24.0)

0 commit comments

Comments
 (0)