pandas-dev · mroeschke · Mar 17, 2023 · Feb 27, 2023 · Mar 6, 2023 · Mar 6, 2023
diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst
@@ -702,11 +702,11 @@ Sorting is per order in the categories, not lexical order:
 
     df.sort_values(by="grade")
 
-Grouping by a categorical column also shows empty categories:
+Grouping by a categorical column with ``observed=False`` also shows empty categories:
 
 .. ipython:: python
 
-    df.groupby("grade").size()
+    df.groupby("grade", observed=False).size()
 
 
 Plotting

diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst
@@ -800,8 +800,8 @@ Groupby operations on the index will preserve the index nature as well.
 
 .. ipython:: python
 
-   df2.groupby(level=0).sum()
-   df2.groupby(level=0).sum().index
+   df2.groupby(level=0, observed=True).sum()
+   df2.groupby(level=0, observed=True).sum().index
 
 Reindexing operations will return a resulting index based on the type of the passed
 indexer. Passing a list will return a plain-old ``Index``; indexing with

diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst
@@ -607,7 +607,7 @@ even if some categories are not present in the data:
     s = pd.Series(pd.Categorical(["a", "b", "c", "c"], categories=["c", "a", "b", "d"]))
     s.value_counts()
 
-``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories.
+``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories when ``observed=False``.
 
 .. ipython:: python
 
@@ -618,17 +618,17 @@ even if some categories are not present in the data:
         data=[[1, 2, 3], [4, 5, 6]],
         columns=pd.MultiIndex.from_arrays([["A", "B", "B"], columns]),
     ).T
-    df.groupby(level=1).sum()
+    df.groupby(level=1, observed=False).sum()
 
-Groupby will also show "unused" categories:
+Groupby will also show "unused" categories when ``observed=False``:
 
 .. ipython:: python
 
     cats = pd.Categorical(
         ["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c", "d"]
     )
     df = pd.DataFrame({"cats": cats, "values": [1, 2, 2, 2, 3, 4, 5]})
-    df.groupby("cats").mean()
+    df.groupby("cats", observed=False).mean()
 
     cats2 = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"])
     df2 = pd.DataFrame(
@@ -638,7 +638,7 @@ Groupby will also show "unused" categories:
             "values": [1, 2, 3, 4],
         }
     )
-    df2.groupby(["cats", "B"]).mean()
+    df2.groupby(["cats", "B"], observed=False).mean()
 
 
 Pivot tables:

diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
@@ -1401,7 +1401,7 @@ can be used as group keys. If so, the order of the levels will be preserved:
 
    factor = pd.qcut(data, [0, 0.25, 0.5, 0.75, 1.0])
 
-   data.groupby(factor).mean()
+   data.groupby(factor, observed=False).mean()
 
 .. _groupby.specify:
 

diff --git a/doc/source/whatsnew/v0.15.0.rst b/doc/source/whatsnew/v0.15.0.rst
@@ -85,7 +85,7 @@ For full docs, see the :ref:`categorical introduction <categorical>` and the
                                                   "medium", "good", "very good"])
     df["grade"]
     df.sort_values("grade")
-    df.groupby("grade").size()
+    df.groupby("grade", observed=False).size()
 
 - ``pandas.core.group_agg`` and ``pandas.core.factor_agg`` were removed. As an alternative, construct
   a dataframe and use ``df.groupby(<group>).agg(<func>)``.

diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst
@@ -1134,7 +1134,7 @@ As a consequence, ``groupby`` and ``set_index`` also preserve categorical dtypes
 .. ipython:: python
 
    df = pd.DataFrame({"A": [0, 1], "B": [10, 11], "C": cat})
-   df_grouped = df.groupby(by=["A", "C"]).first()
+   df_grouped = df.groupby(by=["A", "C"], observed=False).first()
    df_set_idx = df.set_index(["A", "C"])
 
 **Previous behavior**:

diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst
@@ -289,15 +289,15 @@ In previous versions, ``.groupby(..., sort=False)`` would fail with a ``ValueErr
 
 .. code-block:: ipython
 
-   In [3]: df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum()
+   In [3]: df[df.chromosomes != '1'].groupby('chromosomes', observed=False, sort=False).sum()
    ---------------------------------------------------------------------------
    ValueError: items in new_categories are not the same as in old categories
 
 **New behavior**:
 
 .. ipython:: python
 
-   df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum()
+   df[df.chromosomes != '1'].groupby('chromosomes', observed=False, sort=False).sum()
 
 .. _whatsnew_0200.enhancements.table_schema:
 

diff --git a/doc/source/whatsnew/v0.22.0.rst b/doc/source/whatsnew/v0.22.0.rst
@@ -109,7 +109,7 @@ instead of ``NaN``.
 
    In [8]: grouper = pd.Categorical(['a', 'a'], categories=['a', 'b'])
 
-   In [9]: pd.Series([1, 2]).groupby(grouper).sum()
+   In [9]: pd.Series([1, 2]).groupby(grouper, observed=False).sum()
    Out[9]:
    a    3.0
    b    NaN
@@ -120,14 +120,14 @@ instead of ``NaN``.
 .. ipython:: python
 
    grouper = pd.Categorical(["a", "a"], categories=["a", "b"])
-   pd.Series([1, 2]).groupby(grouper).sum()
+   pd.Series([1, 2]).groupby(grouper, observed=False).sum()
 
 To restore the 0.21 behavior of returning ``NaN`` for unobserved groups,
 use ``min_count>=1``.
 
 .. ipython:: python
 
-   pd.Series([1, 2]).groupby(grouper).sum(min_count=1)
+   pd.Series([1, 2]).groupby(grouper, observed=False).sum(min_count=1)
 
 Resample
 ^^^^^^^^

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -99,6 +99,7 @@ Deprecations
 - Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`)
 - Deprecated :meth:`DataFrame._data` and :meth:`Series._data`, use public APIs instead (:issue:`33333`)
 - Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`)
+- Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby`; this will default to ``True`` in a future version (:issue:`43999`)
 - Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`)
 - Deprecated passing a :class:`DataFrame` to :meth:`DataFrame.from_records`, use :meth:`DataFrame.set_index` or :meth:`DataFrame.drop` instead (:issue:`51353`)
 - Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -8677,7 +8677,7 @@ def groupby(
         as_index: bool = True,
         sort: bool = True,
         group_keys: bool = True,
-        observed: bool = False,
+        observed: bool | lib.NoDefault = lib.no_default,
         dropna: bool = True,
     ) -> DataFrameGroupBy:
         if axis is not lib.no_default:

@@ -68,6 +68,7 @@ class providing the base-class of operations.
     cache_readonly,
     doc,
 )
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.cast import ensure_dtype_can_hold_na
 from pandas.core.dtypes.common import (
@@ -905,7 +906,7 @@ def __init__(
         as_index: bool = True,
         sort: bool = True,
         group_keys: bool | lib.NoDefault = True,
-        observed: bool = False,
+        observed: bool | lib.NoDefault = lib.no_default,
         dropna: bool = True,
     ) -> None:
         self._selection = selection
@@ -922,7 +923,6 @@ def __init__(
         self.keys = keys
         self.sort = sort
         self.group_keys = group_keys
-        self.observed = observed
         self.dropna = dropna
 
         if grouper is None:
@@ -932,10 +932,23 @@ def __init__(
                 axis=axis,
                 level=level,
                 sort=sort,
-                observed=observed,
+                observed=False if observed is lib.no_default else observed,
                 dropna=self.dropna,
             )
 
+        if observed is lib.no_default:
+            if any(ping._passed_categorical for ping in grouper.groupings):
+                warnings.warn(
+                    "The default of observed=False is deprecated and will be changed "
+                    "to True in a future version of pandas. Pass observed=False to "
+                    "retain current behavior or observed=True to adopt the future "
+                    "default and silence this warning.",
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
+            observed = False
+        self.observed = observed
+
         self.obj = obj
         self.axis = obj._get_axis_number(axis)
         self.grouper = grouper
@@ -2125,6 +2138,8 @@ def _value_counts(
                 result_series.index.droplevel(levels),
                 sort=self.sort,
                 dropna=self.dropna,
+                # GH#43999 - deprecation of observed=False
+                observed=False,
             ).transform("sum")
             result_series /= indexed_group_size
 

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -723,7 +723,11 @@ def _format_duplicate_message(self) -> DataFrame:
         duplicates = self[self.duplicated(keep="first")].unique()
         assert len(duplicates)
 
-        out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates]
+        out = (
+            Series(np.arange(len(self)))
+            .groupby(self, observed=False)
+            .agg(list)[duplicates]
+        )
         if self._is_multi:
             # test_format_duplicate_labels_message_multi
             # error: "Type[Index]" has no attribute "from_tuples"  [attr-defined]

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1999,7 +1999,7 @@ def groupby(
         as_index: bool = True,
         sort: bool = True,
         group_keys: bool = True,
-        observed: bool = False,
+        observed: bool | lib.NoDefault = lib.no_default,
         dropna: bool = True,
     ) -> SeriesGroupBy:
         from pandas.core.groupby.generic import SeriesGroupBy

diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
@@ -154,6 +154,11 @@
     This only applies if any of the groupers are Categoricals.
     If True: only show observed values for categorical groupers.
     If False: show all values for categorical groupers.
+
+    .. deprecated:: 2.1.0
+
+        The default value will change to True in a future version of pandas.
+
 dropna : bool, default True
     If True, and if group keys contain NA values, NA values together
     with row/column will be dropped.

diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py
@@ -254,7 +254,7 @@ def _grouped_plot_by_column(
     return_type=None,
     **kwargs,
 ):
-    grouped = data.groupby(by)
+    grouped = data.groupby(by, observed=False)
     if columns is None:
         if not isinstance(by, (list, tuple)):
             by = [by]

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -1250,7 +1250,7 @@ def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data):
 
     input_df = input_df.astype({"cat": "category", "cat_ord": "category"})
     input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered()
-    result_df = input_df.groupby("cat").agg(grp_col_dict)
+    result_df = input_df.groupby("cat", observed=False).agg(grp_col_dict)
 
     # create expected dataframe
     cat_index = pd.CategoricalIndex(
@@ -1289,7 +1289,7 @@ def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data):
 
     input_df = input_df.astype({"cat": "category", "cat_ord": "category"})
     input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered()
-    result_df = input_df.groupby("cat").agg(grp_col_dict)
+    result_df = input_df.groupby("cat", observed=False).agg(grp_col_dict)
 
     # create expected dataframe
     cat_index = pd.CategoricalIndex(

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
@@ -883,7 +883,7 @@ def test_apply_multi_level_name(category):
     df = DataFrame(
         {"A": np.arange(10), "B": b, "C": list(range(10)), "D": list(range(10))}
     ).set_index(["A", "B"])
-    result = df.groupby("B").apply(lambda x: x.sum())
+    result = df.groupby("B", observed=False).apply(lambda x: x.sum())
     tm.assert_frame_equal(result, expected)
     assert df.index.names == ["A", "B"]
 

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
@@ -739,7 +739,7 @@ def test_categorical_series(series, data):
     # Group the given series by a series with categorical data type such that group A
     # takes indices 0 and 3 and group B indices 1 and 2, obtaining the values mapped in
     # the given data.
-    groupby = series.groupby(Series(list("ABBA"), dtype="category"))
+    groupby = series.groupby(Series(list("ABBA"), dtype="category"), observed=False)
     result = groupby.aggregate(list)
     expected = Series(data, index=CategoricalIndex(data.keys()))
     tm.assert_series_equal(result, expected)
@@ -1115,7 +1115,7 @@ def test_groupby_multiindex_categorical_datetime():
             "values": np.arange(9),
         }
     )
-    result = df.groupby(["key1", "key2"]).mean()
+    result = df.groupby(["key1", "key2"], observed=False).mean()
 
     idx = MultiIndex.from_product(
         [
@@ -1291,8 +1291,8 @@ def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data):
 
 def test_groupby_categorical_series_dataframe_consistent(df_cat):
     # GH 20416
-    expected = df_cat.groupby(["A", "B"])["C"].mean()
-    result = df_cat.groupby(["A", "B"]).mean()["C"]
+    expected = df_cat.groupby(["A", "B"], observed=False)["C"].mean()
+    result = df_cat.groupby(["A", "B"], observed=False).mean()["C"]
     tm.assert_series_equal(result, expected)
 
 
@@ -1303,11 +1303,11 @@ def test_groupby_categorical_axis_1(code):
     cat = Categorical.from_codes(code, categories=list("abc"))
     msg = "DataFrame.groupby with axis=1 is deprecated"
     with tm.assert_produces_warning(FutureWarning, match=msg):
-        gb = df.groupby(cat, axis=1)
+        gb = df.groupby(cat, axis=1, observed=False)
     result = gb.mean()
     msg = "The 'axis' keyword in DataFrame.groupby is deprecated"
     with tm.assert_produces_warning(FutureWarning, match=msg):
-        gb2 = df.T.groupby(cat, axis=0)
+        gb2 = df.T.groupby(cat, axis=0, observed=False)
     expected = gb2.mean().T
     tm.assert_frame_equal(result, expected)
 
@@ -1478,7 +1478,7 @@ def test_series_groupby_categorical_aggregation_getitem():
     df = DataFrame(d)
     cat = pd.cut(df["foo"], np.linspace(0, 20, 5))
     df["range"] = cat
-    groups = df.groupby(["range", "baz"], as_index=True, sort=True)
+    groups = df.groupby(["range", "baz"], as_index=True, sort=True, observed=False)
     result = groups["foo"].agg("mean")
     expected = groups.agg("mean")["foo"]
     tm.assert_series_equal(result, expected)
@@ -1539,7 +1539,7 @@ def test_read_only_category_no_sort():
         {"a": [1, 3, 5, 7], "b": Categorical([1, 1, 2, 2], categories=Index(cats))}
     )
     expected = DataFrame(data={"a": [2.0, 6.0]}, index=CategoricalIndex(cats, name="b"))
-    result = df.groupby("b", sort=False).mean()
+    result = df.groupby("b", sort=False, observed=False).mean()
     tm.assert_frame_equal(result, expected)
 
 
@@ -1583,7 +1583,7 @@ def test_sorted_missing_category_values():
         dtype="category",
     )
 
-    result = df.groupby(["bar", "foo"]).size().unstack()
+    result = df.groupby(["bar", "foo"], observed=False).size().unstack()
 
     tm.assert_frame_equal(result, expected)
 
@@ -1748,7 +1748,7 @@ def test_groupby_categorical_indices_unused_categories():
             "col": range(3),
         }
     )
-    grouped = df.groupby("key", sort=False)
+    grouped = df.groupby("key", sort=False, observed=False)
     result = grouped.indices
     expected = {
         "b": np.array([0, 1], dtype="intp"),
@@ -2013,3 +2013,15 @@ def test_many_categories(as_index, sort, index_kind, ordered):
         expected = DataFrame({"a": Series(index), "b": data})
 
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("cat_columns", ["a", "b", ["a", "b"]])
+@pytest.mark.parametrize("keys", ["a", "b", ["a", "b"]])
+def test_groupby_default_depr(cat_columns, keys):
+    # GH#43999
+    df = DataFrame({"a": [1, 1, 2, 3], "b": [4, 5, 6, 7]})
+    df[cat_columns] = df[cat_columns].astype("category")
+    msg = "The default of observed=False is deprecated"
+    klass = FutureWarning if set(cat_columns) & set(keys) else None
+    with tm.assert_produces_warning(klass, match=msg):
+        df.groupby(keys)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -1926,7 +1926,7 @@ def test_empty_groupby(
 
     df = df.iloc[:0]
 
-    gb = df.groupby(keys, group_keys=False, dropna=dropna)[columns]
+    gb = df.groupby(keys, group_keys=False, dropna=dropna, observed=False)[columns]
 
     def get_result(**kwargs):
         if method == "attr":
@@ -2638,7 +2638,7 @@ def test_datetime_categorical_multikey_groupby_indices():
             "c": Categorical.from_codes([-1, 0, 1], categories=[0, 1]),
         }
     )
-    result = df.groupby(["a", "b"]).indices
+    result = df.groupby(["a", "b"], observed=False).indices
     expected = {
         ("a", Timestamp("2018-01-01 00:00:00")): np.array([0]),
         ("b", Timestamp("2018-02-01 00:00:00")): np.array([1]),