Skip to content
forked from pydata/xarray

Commit

Permalink
Fix binning by unsorted array
Browse files Browse the repository at this point in the history
  • Loading branch information
dcherian committed Apr 17, 2023
1 parent 68bae08 commit f22e214
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 26 deletions.
24 changes: 4 additions & 20 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,33 +16,17 @@ What's New
.. _whats-new.2023.05.0:
.. _whats-new.2023.04.1:

v2023.05.0 (unreleased)
v2023.04.1 (unreleased)
-----------------------

New Features
~~~~~~~~~~~~


Breaking changes
~~~~~~~~~~~~~~~~


Deprecations
~~~~~~~~~~~~

This is a patch release to fix a bug with binning (:issue:`7759`)

Bug fixes
~~~~~~~~~


Documentation
~~~~~~~~~~~~~


Internal Changes
~~~~~~~~~~~~~~~~
- Fix binning by unsorted arrays. (:issue:`7759`)


.. _whats-new.2023.04.0:
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ def _factorize_bins(
if (codes == -1).all():
raise ValueError(f"None of the data falls within bins with edges {bins!r}")
full_index = binned.categories
unique_values = binned.unique().dropna()
unique_values = np.sort(binned.unique().dropna())
group_indices = [g for g in _codes_to_groups(codes, len(full_index)) if g]

if len(group_indices) == 0:
Expand Down
23 changes: 18 additions & 5 deletions xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1371,17 +1371,30 @@ def test_groupby_multidim_map(self):
)
assert_identical(expected, actual)

def test_groupby_bins(self):
array = DataArray(np.arange(4), dims="dim_0")
@pytest.mark.parametrize("coords", [np.arange(4), np.arange(4)[::-1], [2, 0, 3, 1]])
def test_groupby_bins(self, coords):
array = DataArray(
np.arange(4), dims="dim_0", coords={"dim_0": coords}, name="a"
)
# the first value should not be part of any group ("right" binning)
array[0] = 99
# bins follow conventions for pandas.cut
# http://pandas.pydata.org/pandas-docs/stable/generated/pandas.cut.html
bins = [0, 1.5, 5]
bin_coords = pd.cut(array["dim_0"], bins).categories
expected = DataArray(
[1, 5], dims="dim_0_bins", coords={"dim_0_bins": bin_coords}

df = array.to_dataframe()
df["dim_0_bins"] = pd.cut(array["dim_0"], bins)

expected_df = df.groupby("dim_0_bins").sum()
# TODO: can't convert df with IntervalIndex to Xarray

expected = (
expected_df.reset_index(drop=True)
.to_xarray()
.assign_coords(index=np.array(expected_df.index))
.rename({"index": "dim_0_bins"})["a"]
)

actual = array.groupby_bins("dim_0", bins=bins).sum()
assert_identical(expected, actual)

Expand Down

0 comments on commit f22e214

Please sign in to comment.