Skip to content
8 changes: 6 additions & 2 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1634,7 +1634,9 @@ def func(df):
return df._constructor_sliced(result, index=res.index)

func.__name__ = "idxmax"
result = self._python_apply_general(func, self._obj_with_exclusions)
result = self._python_apply_general(
func, self._obj_with_exclusions, not_indexed_same=True
)
self._maybe_warn_numeric_only_depr("idxmax", result, numeric_only)
return result

Expand Down Expand Up @@ -1673,7 +1675,9 @@ def func(df):
return df._constructor_sliced(result, index=res.index)

func.__name__ = "idxmin"
result = self._python_apply_general(func, self._obj_with_exclusions)
result = self._python_apply_general(
func, self._obj_with_exclusions, not_indexed_same=True
)
self._maybe_warn_numeric_only_depr("idxmin", result, numeric_only)
return result

Expand Down
5 changes: 4 additions & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1040,7 +1040,10 @@ def curried(x):
return self._obj_with_exclusions

result = self._python_apply_general(
curried, self._obj_with_exclusions, is_transform=is_transform
curried,
self._obj_with_exclusions,
is_transform=is_transform,
not_indexed_same=not is_transform,
)

if self._selected_obj.ndim != 1 and self.axis != 1 and result.ndim != 1:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/interchange/from_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,7 +497,7 @@ def set_nulls(
null_pos = None

if null_kind == ColumnNullType.USE_SENTINEL:
null_pos = data == sentinel_val
null_pos = pd.Series(data) == sentinel_val
elif null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK):
assert validity, "Expected to have a validity buffer for the mask"
valid_buff, valid_dtype = validity
Expand Down
3 changes: 2 additions & 1 deletion pandas/plotting/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
from pandas.io.formats.printing import pprint_thing
from pandas.plotting._matplotlib.converter import register_pandas_matplotlib_converters
from pandas.plotting._matplotlib.groupby import reconstruct_data_with_by
from pandas.plotting._matplotlib.misc import unpack_single_str_list
from pandas.plotting._matplotlib.style import get_standard_colors
from pandas.plotting._matplotlib.timeseries import (
decorate_axes,
Expand Down Expand Up @@ -177,7 +178,7 @@ def __init__(
# For `hist` plot, need to get grouped original data before `self.data` is
# updated later
if self.by is not None and self._kind == "hist":
self._grouped = data.groupby(self.by)
self._grouped = data.groupby(unpack_single_str_list(self.by))

self.kind = kind

Expand Down
1 change: 0 additions & 1 deletion pandas/plotting/_matplotlib/hist.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ def __init__(
MPLPlot.__init__(self, data, **kwargs)

def _args_adjust(self):

# calculate bin number separately in different subplots
# where subplots are created based on by argument
if is_integer(self.bins):
Expand Down
5 changes: 2 additions & 3 deletions pandas/plotting/_matplotlib/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,6 @@ def r(h):

def unpack_single_str_list(keys):
# GH 42795
if isinstance(keys, list):
if len(keys) == 1 and isinstance(keys[0], str):
keys = keys[0]
if isinstance(keys, list) and len(keys) == 1:
keys = keys[0]
return keys
25 changes: 12 additions & 13 deletions pandas/tests/groupby/test_counting.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,21 +188,20 @@ def test_ngroup_cumcount_pair(self):
tm.assert_series_equal(g.ngroup(), Series(ngroupd))
tm.assert_series_equal(g.cumcount(), Series(cumcounted))

def test_ngroup_respects_groupby_order(self):
def test_ngroup_respects_groupby_order(self, sort):
np.random.seed(0)
df = DataFrame({"a": np.random.choice(list("abcdef"), 100)})
for sort_flag in (False, True):
g = df.groupby(["a"], sort=sort_flag)
df["group_id"] = -1
df["group_index"] = -1

for i, (_, group) in enumerate(g):
df.loc[group.index, "group_id"] = i
for j, ind in enumerate(group.index):
df.loc[ind, "group_index"] = j

tm.assert_series_equal(Series(df["group_id"].values), g.ngroup())
tm.assert_series_equal(Series(df["group_index"].values), g.cumcount())
g = df.groupby("a", sort=sort)
df["group_id"] = -1
df["group_index"] = -1

for i, (_, group) in enumerate(g):
df.loc[group.index, "group_id"] = i
for j, ind in enumerate(group.index):
df.loc[ind, "group_index"] = j

tm.assert_series_equal(Series(df["group_id"].values), g.ngroup())
tm.assert_series_equal(Series(df["group_index"].values), g.cumcount())

@pytest.mark.parametrize(
"datetimelike",
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -1590,11 +1590,11 @@ def test_corrwith_with_1_axis():
tm.assert_series_equal(result, expected)


@pytest.mark.filterwarnings("ignore:The 'mad' method.*:FutureWarning")
@pytest.mark.filterwarnings("ignore:.* is deprecated:FutureWarning")
def test_multiindex_group_all_columns_when_empty(groupby_func):
# GH 32464
df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"])
gb = df.groupby(["a", "b", "c"])
gb = df.groupby(["a", "b", "c"], group_keys=False)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These warnings should only be generated when using groupby(...).apply; I think we should instead suppress internally. In

result = self._python_apply_general(
curried, self._obj_with_exclusions, is_transform=is_transform
)

we could pass not_indexed_same=not is_transform and that would suppress all cases called from here. I believe it should also be correct as something is indexed the same precisely when its a transform.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 16ee154156..8d442c6ae3 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1040,7 +1040,7 @@ class GroupBy(BaseGroupBy[NDFrameT]):
                 return self._obj_with_exclusions

             result = self._python_apply_general(
-                curried, self._obj_with_exclusions, is_transform=is_transform
+                curried, self._obj_with_exclusions, is_transform=is_transform, not_indexed_same=not is_transform
             )

             if self._selected_obj.ndim != 1 and self.axis != 1 and result.ndim != 1:

and still get

pandas/tests/groupby/test_function.py::test_multiindex_group_all_columns_when_empty[idxmax]
pandas/tests/groupby/test_function.py::test_multiindex_group_all_columns_when_empty[idxmin]
  .../pandas/tests/groupby/test_function.py:1601: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
  To preserve the previous behavior, use

  	>>> .groupby(..., group_keys=False)

  To adopt the future behavior and silence this warning, use

  	>>> .groupby(..., group_keys=True)
    result = method(*args).index

Ignoring the warnings at this line also still raises this warnings for idxmin/idxmax so that must take a different code path?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh, that's right; they are listed in common_apply_allowlist but then also defined on DataFrameGroupBy (I have a PR for #48028 that does away with common_apply_allowlist completely that I'll put up once 1.5 is released).

The same can be done with idxmin/max on DataFrameGroupBy.

method = getattr(gb, groupby_func)
args = get_groupby_method_args(groupby_func, df)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/sas/test_sas7bdat.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def data_test_ix(request, dirpath):
for k in range(df.shape[1]):
col = df.iloc[:, k]
if col.dtype == np.int64:
df.iloc[:, k] = df.iloc[:, k].astype(np.float64)
df.isetitem(k, df.iloc[:, k].astype(np.float64))
return df, test_ix


Expand Down
22 changes: 15 additions & 7 deletions pandas/tests/plotting/frame/test_hist_box_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,9 @@ class TestHistWithBy(TestPlotBase):
)
def test_hist_plot_by_argument(self, by, column, titles, legends, hist_df):
# GH 15079
axes = _check_plot_works(hist_df.plot.hist, column=column, by=by)
axes = _check_plot_works(
hist_df.plot.hist, column=column, by=by, default_axes=True
)
result_titles = [ax.get_title() for ax in axes]
result_legends = [
[legend.get_text() for legend in ax.get_legend().texts] for ax in axes
Expand Down Expand Up @@ -120,7 +122,7 @@ def test_hist_plot_by_0(self, by, column, titles, legends, hist_df):
df = hist_df.copy()
df = df.rename(columns={"C": 0})

axes = _check_plot_works(df.plot.hist, column=column, by=by)
axes = _check_plot_works(df.plot.hist, default_axes=True, column=column, by=by)
result_titles = [ax.get_title() for ax in axes]
result_legends = [
[legend.get_text() for legend in ax.get_legend().texts] for ax in axes
Expand All @@ -142,7 +144,9 @@ def test_hist_plot_empty_list_string_tuple_by(self, by, column, hist_df):
# GH 15079
msg = "No group keys passed"
with pytest.raises(ValueError, match=msg):
_check_plot_works(hist_df.plot.hist, column=column, by=by)
_check_plot_works(
hist_df.plot.hist, default_axes=True, column=column, by=by
)

@pytest.mark.slow
@pytest.mark.parametrize(
Expand Down Expand Up @@ -274,7 +278,9 @@ class TestBoxWithBy(TestPlotBase):
)
def test_box_plot_by_argument(self, by, column, titles, xticklabels, hist_df):
# GH 15079
axes = _check_plot_works(hist_df.plot.box, column=column, by=by)
axes = _check_plot_works(
hist_df.plot.box, default_axes=True, column=column, by=by
)
result_titles = [ax.get_title() for ax in axes]
result_xticklabels = [
[label.get_text() for label in ax.get_xticklabels()] for ax in axes
Expand Down Expand Up @@ -313,7 +319,7 @@ def test_box_plot_by_0(self, by, column, titles, xticklabels, hist_df):
df = hist_df.copy()
df = df.rename(columns={"C": 0})

axes = _check_plot_works(df.plot.box, column=column, by=by)
axes = _check_plot_works(df.plot.box, default_axes=True, column=column, by=by)
result_titles = [ax.get_title() for ax in axes]
result_xticklabels = [
[label.get_text() for label in ax.get_xticklabels()] for ax in axes
Expand All @@ -335,7 +341,7 @@ def test_box_plot_with_none_empty_list_by(self, by, column, hist_df):
# GH 15079
msg = "No group keys passed"
with pytest.raises(ValueError, match=msg):
_check_plot_works(hist_df.plot.box, column=column, by=by)
_check_plot_works(hist_df.plot.box, default_axes=True, column=column, by=by)

@pytest.mark.slow
@pytest.mark.parametrize(
Expand All @@ -351,7 +357,9 @@ def test_box_plot_with_none_empty_list_by(self, by, column, hist_df):
)
def test_box_plot_layout_with_by(self, by, column, layout, axes_num, hist_df):
# GH 15079
axes = _check_plot_works(hist_df.plot.box, column=column, by=by, layout=layout)
axes = _check_plot_works(
hist_df.plot.box, default_axes=True, column=column, by=by, layout=layout
)
self._check_axes_shape(axes, axes_num=axes_num, layout=layout)

@pytest.mark.parametrize(
Expand Down