CLN: Enforce change in default value of observed (#57330)

rhshadrach · web-flow · commit 833f0ac9d92d · 2024-02-10T12:39:04.000+01:00
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -104,6 +104,7 @@ Deprecations
 Removal of prior version deprecations/changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 - All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`)
+- Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`)
 - Removed ``DataFrame.bool`` and ``Series.bool`` (:issue:`51756`)
 - Removed ``DataFrame.first`` and ``DataFrame.last`` (:issue:`53710`)
 - Removed ``DataFrameGroupBy.grouper`` and ``SeriesGroupBy.grouper`` (:issue:`56521`)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -8856,7 +8856,7 @@ def groupby(
         as_index: bool = True,
         sort: bool = True,
         group_keys: bool = True,
-        observed: bool | lib.NoDefault = lib.no_default,
+        observed: bool = True,
         dropna: bool = True,
     ) -> DataFrameGroupBy:
         from pandas.core.groupby.generic import DataFrameGroupBy
@@ -9065,10 +9065,9 @@ def pivot(
             If True: only show observed values for categorical groupers.
             If False: show all values for categorical groupers.
 
-            .. deprecated:: 2.2.0
+            .. versionchanged:: 3.0.0
 
-                The default value of ``False`` is deprecated and will change to
-                ``True`` in a future version of pandas.
+                The default value is now ``True``.
 
         sort : bool, default True
             Specifies if the result should be sorted.
@@ -9180,7 +9179,7 @@ def pivot_table(
         margins: bool = False,
         dropna: bool = True,
         margins_name: Level = "All",
-        observed: bool | lib.NoDefault = lib.no_default,
+        observed: bool = True,
         sort: bool = True,
     ) -> DataFrame:
         from pandas.core.reshape.pivot import pivot_table
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1117,7 +1117,7 @@ def __init__(
         as_index: bool = True,
         sort: bool = True,
         group_keys: bool = True,
-        observed: bool | lib.NoDefault = lib.no_default,
+        observed: bool = False,
         dropna: bool = True,
     ) -> None:
         self._selection = selection
@@ -1137,23 +1137,11 @@ def __init__(
                 keys,
                 level=level,
                 sort=sort,
-                observed=False if observed is lib.no_default else observed,
+                observed=observed,
                 dropna=self.dropna,
             )
 
-        if observed is lib.no_default:
-            if any(ping._passed_categorical for ping in grouper.groupings):
-                warnings.warn(
-                    "The default of observed=False is deprecated and will be changed "
-                    "to True in a future version of pandas. Pass observed=False to "
-                    "retain current behavior or observed=True to adopt the future "
-                    "default and silence this warning.",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
-                )
-            observed = False
         self.observed = observed
-
         self.obj = obj
         self._grouper = grouper
         self.exclusions = frozenset(exclusions) if exclusions else frozenset()
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -10,7 +10,6 @@
     Literal,
     cast,
 )
-import warnings
 
 import numpy as np
 
@@ -19,7 +18,6 @@
     Appender,
     Substitution,
 )
-from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.cast import maybe_downcast_to_dtype
 from pandas.core.dtypes.common import (
@@ -70,7 +68,7 @@ def pivot_table(
     margins: bool = False,
     dropna: bool = True,
     margins_name: Hashable = "All",
-    observed: bool | lib.NoDefault = lib.no_default,
+    observed: bool = True,
     sort: bool = True,
 ) -> DataFrame:
     index = _convert_by(index)
@@ -125,7 +123,7 @@ def __internal_pivot_table(
     margins: bool,
     dropna: bool,
     margins_name: Hashable,
-    observed: bool | lib.NoDefault,
+    observed: bool,
     sort: bool,
 ) -> DataFrame:
     """
@@ -168,18 +166,7 @@ def __internal_pivot_table(
                 pass
         values = list(values)
 
-    observed_bool = False if observed is lib.no_default else observed
-    grouped = data.groupby(keys, observed=observed_bool, sort=sort, dropna=dropna)
-    if observed is lib.no_default and any(
-        ping._passed_categorical for ping in grouped._grouper.groupings
-    ):
-        warnings.warn(
-            "The default value of observed=False is deprecated and will change "
-            "to observed=True in a future version of pandas. Specify "
-            "observed=False to silence this warning and retain the current behavior",
-            category=FutureWarning,
-            stacklevel=find_stack_level(),
-        )
+    grouped = data.groupby(keys, observed=observed, sort=sort, dropna=dropna)
     agged = grouped.agg(aggfunc)
 
     if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1930,7 +1930,7 @@ def groupby(
         as_index: bool = True,
         sort: bool = True,
         group_keys: bool = True,
-        observed: bool | lib.NoDefault = lib.no_default,
+        observed: bool = False,
         dropna: bool = True,
     ) -> SeriesGroupBy:
         from pandas.core.groupby.generic import SeriesGroupBy
diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
@@ -148,14 +148,14 @@
 
        ``group_keys`` now defaults to ``True``.
 
-observed : bool, default False
+observed : bool, default True
     This only applies if any of the groupers are Categoricals.
     If True: only show observed values for categorical groupers.
     If False: show all values for categorical groupers.
 
-    .. deprecated:: 2.1.0
+    .. versionchanged:: 3.0.0
 
-        The default value will change to True in a future version of pandas.
+        The default value is now ``True``.
 
 dropna : bool, default True
     If True, and if group keys contain NA values, NA values together
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
@@ -2088,18 +2088,6 @@ def test_many_categories(as_index, sort, index_kind, ordered):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize("cat_columns", ["a", "b", ["a", "b"]])
-@pytest.mark.parametrize("keys", ["a", "b", ["a", "b"]])
-def test_groupby_default_depr(cat_columns, keys):
-    # GH#43999
-    df = DataFrame({"a": [1, 1, 2, 3], "b": [4, 5, 6, 7]})
-    df[cat_columns] = df[cat_columns].astype("category")
-    msg = "The default of observed=False is deprecated"
-    klass = FutureWarning if set(cat_columns) & set(keys) else None
-    with tm.assert_produces_warning(klass, match=msg):
-        df.groupby(keys)
-
-
 @pytest.mark.parametrize("test_series", [True, False])
 @pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]])
 def test_agg_list(request, as_index, observed, reduction_func, test_series, keys):
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
@@ -192,9 +192,9 @@ def test_pivot_table_categorical(self):
             ["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True
         )
         df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
-        msg = "The default value of observed=False is deprecated"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            result = pivot_table(df, values="values", index=["A", "B"], dropna=True)
+        result = pivot_table(
+            df, values="values", index=["A", "B"], dropna=True, observed=False
+        )
 
         exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"])
         expected = DataFrame({"values": [1.0, 2.0, 3.0, 4.0]}, index=exp_index)
@@ -213,9 +213,9 @@ def test_pivot_table_dropna_categoricals(self, dropna):
         )
 
         df["A"] = df["A"].astype(CategoricalDtype(categories, ordered=False))
-        msg = "The default value of observed=False is deprecated"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna)
+        result = df.pivot_table(
+            index="B", columns="A", values="C", dropna=dropna, observed=False
+        )
         expected_columns = Series(["a", "b", "c"], name="A")
         expected_columns = expected_columns.astype(
             CategoricalDtype(categories, ordered=False)
@@ -245,9 +245,7 @@ def test_pivot_with_non_observable_dropna(self, dropna):
             }
         )
 
-        msg = "The default value of observed=False is deprecated"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            result = df.pivot_table(index="A", values="B", dropna=dropna)
+        result = df.pivot_table(index="A", values="B", dropna=dropna, observed=False)
         if dropna:
             values = [2.0, 3.0]
             codes = [0, 1]
@@ -278,9 +276,7 @@ def test_pivot_with_non_observable_dropna_multi_cat(self, dropna):
             }
         )
 
-        msg = "The default value of observed=False is deprecated"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            result = df.pivot_table(index="A", values="B", dropna=dropna)
+        result = df.pivot_table(index="A", values="B", dropna=dropna, observed=False)
         expected = DataFrame(
             {"B": [2.0, 3.0, 0.0]},
             index=Index(
@@ -304,9 +300,7 @@ def test_pivot_with_interval_index(self, left_right, dropna, closed):
         interval_values = Categorical(pd.IntervalIndex.from_arrays(left, right, closed))
         df = DataFrame({"A": interval_values, "B": 1})
 
-        msg = "The default value of observed=False is deprecated"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            result = df.pivot_table(index="A", values="B", dropna=dropna)
+        result = df.pivot_table(index="A", values="B", dropna=dropna, observed=False)
         expected = DataFrame(
             {"B": 1.0}, index=Index(interval_values.unique(), name="A")
         )
@@ -327,11 +321,15 @@ def test_pivot_with_interval_index_margins(self):
             }
         )
 
-        msg = "The default value of observed=False is deprecated"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            pivot_tab = pivot_table(
-                df, index="C", columns="B", values="A", aggfunc="sum", margins=True
-            )
+        pivot_tab = pivot_table(
+            df,
+            index="C",
+            columns="B",
+            values="A",
+            aggfunc="sum",
+            margins=True,
+            observed=False,
+        )
 
         result = pivot_tab["All"]
         expected = Series(
@@ -1830,9 +1828,9 @@ def test_categorical_margins_category(self, observed):
 
         df.y = df.y.astype("category")
         df.z = df.z.astype("category")
-        msg = "The default value of observed=False is deprecated"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            table = df.pivot_table("x", "y", "z", dropna=observed, margins=True)
+        table = df.pivot_table(
+            "x", "y", "z", dropna=observed, margins=True, observed=False
+        )
         tm.assert_frame_equal(table, expected)
 
     def test_margins_casted_to_float(self):
@@ -1894,11 +1892,14 @@ def test_categorical_aggfunc(self, observed):
             {"C1": ["A", "B", "C", "C"], "C2": ["a", "a", "b", "b"], "V": [1, 2, 3, 4]}
         )
         df["C1"] = df["C1"].astype("category")
-        msg = "The default value of observed=False is deprecated"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            result = df.pivot_table(
-                "V", index="C1", columns="C2", dropna=observed, aggfunc="count"
-            )
+        result = df.pivot_table(
+            "V",
+            index="C1",
+            columns="C2",
+            dropna=observed,
+            aggfunc="count",
+            observed=False,
+        )
 
         expected_index = pd.CategoricalIndex(
             ["A", "B", "C"], categories=["A", "B", "C"], ordered=False, name="C1"