Skip to content

Commit 833f0ac

Browse files
authored
CLN: Enforce change in default value of observed (#57330)
1 parent a238618 commit 833f0ac

File tree

8 files changed

+43
-79
lines changed

8 files changed

+43
-79
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ Deprecations
104104
Removal of prior version deprecations/changes
105105
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
106106
- All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`)
107+
- Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`)
107108
- Removed ``DataFrame.bool`` and ``Series.bool`` (:issue:`51756`)
108109
- Removed ``DataFrame.first`` and ``DataFrame.last`` (:issue:`53710`)
109110
- Removed ``DataFrameGroupBy.grouper`` and ``SeriesGroupBy.grouper`` (:issue:`56521`)

pandas/core/frame.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8856,7 +8856,7 @@ def groupby(
88568856
as_index: bool = True,
88578857
sort: bool = True,
88588858
group_keys: bool = True,
8859-
observed: bool | lib.NoDefault = lib.no_default,
8859+
observed: bool = True,
88608860
dropna: bool = True,
88618861
) -> DataFrameGroupBy:
88628862
from pandas.core.groupby.generic import DataFrameGroupBy
@@ -9065,10 +9065,9 @@ def pivot(
90659065
If True: only show observed values for categorical groupers.
90669066
If False: show all values for categorical groupers.
90679067
9068-
.. deprecated:: 2.2.0
9068+
.. versionchanged:: 3.0.0
90699069
9070-
The default value of ``False`` is deprecated and will change to
9071-
``True`` in a future version of pandas.
9070+
The default value is now ``True``.
90729071
90739072
sort : bool, default True
90749073
Specifies if the result should be sorted.
@@ -9180,7 +9179,7 @@ def pivot_table(
91809179
margins: bool = False,
91819180
dropna: bool = True,
91829181
margins_name: Level = "All",
9183-
observed: bool | lib.NoDefault = lib.no_default,
9182+
observed: bool = True,
91849183
sort: bool = True,
91859184
) -> DataFrame:
91869185
from pandas.core.reshape.pivot import pivot_table

pandas/core/groupby/groupby.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1117,7 +1117,7 @@ def __init__(
11171117
as_index: bool = True,
11181118
sort: bool = True,
11191119
group_keys: bool = True,
1120-
observed: bool | lib.NoDefault = lib.no_default,
1120+
observed: bool = False,
11211121
dropna: bool = True,
11221122
) -> None:
11231123
self._selection = selection
@@ -1137,23 +1137,11 @@ def __init__(
11371137
keys,
11381138
level=level,
11391139
sort=sort,
1140-
observed=False if observed is lib.no_default else observed,
1140+
observed=observed,
11411141
dropna=self.dropna,
11421142
)
11431143

1144-
if observed is lib.no_default:
1145-
if any(ping._passed_categorical for ping in grouper.groupings):
1146-
warnings.warn(
1147-
"The default of observed=False is deprecated and will be changed "
1148-
"to True in a future version of pandas. Pass observed=False to "
1149-
"retain current behavior or observed=True to adopt the future "
1150-
"default and silence this warning.",
1151-
FutureWarning,
1152-
stacklevel=find_stack_level(),
1153-
)
1154-
observed = False
11551144
self.observed = observed
1156-
11571145
self.obj = obj
11581146
self._grouper = grouper
11591147
self.exclusions = frozenset(exclusions) if exclusions else frozenset()

pandas/core/reshape/pivot.py

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
Literal,
1111
cast,
1212
)
13-
import warnings
1413

1514
import numpy as np
1615

@@ -19,7 +18,6 @@
1918
Appender,
2019
Substitution,
2120
)
22-
from pandas.util._exceptions import find_stack_level
2321

2422
from pandas.core.dtypes.cast import maybe_downcast_to_dtype
2523
from pandas.core.dtypes.common import (
@@ -70,7 +68,7 @@ def pivot_table(
7068
margins: bool = False,
7169
dropna: bool = True,
7270
margins_name: Hashable = "All",
73-
observed: bool | lib.NoDefault = lib.no_default,
71+
observed: bool = True,
7472
sort: bool = True,
7573
) -> DataFrame:
7674
index = _convert_by(index)
@@ -125,7 +123,7 @@ def __internal_pivot_table(
125123
margins: bool,
126124
dropna: bool,
127125
margins_name: Hashable,
128-
observed: bool | lib.NoDefault,
126+
observed: bool,
129127
sort: bool,
130128
) -> DataFrame:
131129
"""
@@ -168,18 +166,7 @@ def __internal_pivot_table(
168166
pass
169167
values = list(values)
170168

171-
observed_bool = False if observed is lib.no_default else observed
172-
grouped = data.groupby(keys, observed=observed_bool, sort=sort, dropna=dropna)
173-
if observed is lib.no_default and any(
174-
ping._passed_categorical for ping in grouped._grouper.groupings
175-
):
176-
warnings.warn(
177-
"The default value of observed=False is deprecated and will change "
178-
"to observed=True in a future version of pandas. Specify "
179-
"observed=False to silence this warning and retain the current behavior",
180-
category=FutureWarning,
181-
stacklevel=find_stack_level(),
182-
)
169+
grouped = data.groupby(keys, observed=observed, sort=sort, dropna=dropna)
183170
agged = grouped.agg(aggfunc)
184171

185172
if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):

pandas/core/series.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1930,7 +1930,7 @@ def groupby(
19301930
as_index: bool = True,
19311931
sort: bool = True,
19321932
group_keys: bool = True,
1933-
observed: bool | lib.NoDefault = lib.no_default,
1933+
observed: bool = False,
19341934
dropna: bool = True,
19351935
) -> SeriesGroupBy:
19361936
from pandas.core.groupby.generic import SeriesGroupBy

pandas/core/shared_docs.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -148,14 +148,14 @@
148148
149149
``group_keys`` now defaults to ``True``.
150150
151-
observed : bool, default False
151+
observed : bool, default True
152152
This only applies if any of the groupers are Categoricals.
153153
If True: only show observed values for categorical groupers.
154154
If False: show all values for categorical groupers.
155155
156-
.. deprecated:: 2.1.0
156+
.. versionchanged:: 3.0.0
157157
158-
The default value will change to True in a future version of pandas.
158+
The default value is now ``True``.
159159
160160
dropna : bool, default True
161161
If True, and if group keys contain NA values, NA values together

pandas/tests/groupby/test_categorical.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2088,18 +2088,6 @@ def test_many_categories(as_index, sort, index_kind, ordered):
20882088
tm.assert_frame_equal(result, expected)
20892089

20902090

2091-
@pytest.mark.parametrize("cat_columns", ["a", "b", ["a", "b"]])
2092-
@pytest.mark.parametrize("keys", ["a", "b", ["a", "b"]])
2093-
def test_groupby_default_depr(cat_columns, keys):
2094-
# GH#43999
2095-
df = DataFrame({"a": [1, 1, 2, 3], "b": [4, 5, 6, 7]})
2096-
df[cat_columns] = df[cat_columns].astype("category")
2097-
msg = "The default of observed=False is deprecated"
2098-
klass = FutureWarning if set(cat_columns) & set(keys) else None
2099-
with tm.assert_produces_warning(klass, match=msg):
2100-
df.groupby(keys)
2101-
2102-
21032091
@pytest.mark.parametrize("test_series", [True, False])
21042092
@pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]])
21052093
def test_agg_list(request, as_index, observed, reduction_func, test_series, keys):

pandas/tests/reshape/test_pivot.py

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -192,9 +192,9 @@ def test_pivot_table_categorical(self):
192192
["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True
193193
)
194194
df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
195-
msg = "The default value of observed=False is deprecated"
196-
with tm.assert_produces_warning(FutureWarning, match=msg):
197-
result = pivot_table(df, values="values", index=["A", "B"], dropna=True)
195+
result = pivot_table(
196+
df, values="values", index=["A", "B"], dropna=True, observed=False
197+
)
198198

199199
exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"])
200200
expected = DataFrame({"values": [1.0, 2.0, 3.0, 4.0]}, index=exp_index)
@@ -213,9 +213,9 @@ def test_pivot_table_dropna_categoricals(self, dropna):
213213
)
214214

215215
df["A"] = df["A"].astype(CategoricalDtype(categories, ordered=False))
216-
msg = "The default value of observed=False is deprecated"
217-
with tm.assert_produces_warning(FutureWarning, match=msg):
218-
result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna)
216+
result = df.pivot_table(
217+
index="B", columns="A", values="C", dropna=dropna, observed=False
218+
)
219219
expected_columns = Series(["a", "b", "c"], name="A")
220220
expected_columns = expected_columns.astype(
221221
CategoricalDtype(categories, ordered=False)
@@ -245,9 +245,7 @@ def test_pivot_with_non_observable_dropna(self, dropna):
245245
}
246246
)
247247

248-
msg = "The default value of observed=False is deprecated"
249-
with tm.assert_produces_warning(FutureWarning, match=msg):
250-
result = df.pivot_table(index="A", values="B", dropna=dropna)
248+
result = df.pivot_table(index="A", values="B", dropna=dropna, observed=False)
251249
if dropna:
252250
values = [2.0, 3.0]
253251
codes = [0, 1]
@@ -278,9 +276,7 @@ def test_pivot_with_non_observable_dropna_multi_cat(self, dropna):
278276
}
279277
)
280278

281-
msg = "The default value of observed=False is deprecated"
282-
with tm.assert_produces_warning(FutureWarning, match=msg):
283-
result = df.pivot_table(index="A", values="B", dropna=dropna)
279+
result = df.pivot_table(index="A", values="B", dropna=dropna, observed=False)
284280
expected = DataFrame(
285281
{"B": [2.0, 3.0, 0.0]},
286282
index=Index(
@@ -304,9 +300,7 @@ def test_pivot_with_interval_index(self, left_right, dropna, closed):
304300
interval_values = Categorical(pd.IntervalIndex.from_arrays(left, right, closed))
305301
df = DataFrame({"A": interval_values, "B": 1})
306302

307-
msg = "The default value of observed=False is deprecated"
308-
with tm.assert_produces_warning(FutureWarning, match=msg):
309-
result = df.pivot_table(index="A", values="B", dropna=dropna)
303+
result = df.pivot_table(index="A", values="B", dropna=dropna, observed=False)
310304
expected = DataFrame(
311305
{"B": 1.0}, index=Index(interval_values.unique(), name="A")
312306
)
@@ -327,11 +321,15 @@ def test_pivot_with_interval_index_margins(self):
327321
}
328322
)
329323

330-
msg = "The default value of observed=False is deprecated"
331-
with tm.assert_produces_warning(FutureWarning, match=msg):
332-
pivot_tab = pivot_table(
333-
df, index="C", columns="B", values="A", aggfunc="sum", margins=True
334-
)
324+
pivot_tab = pivot_table(
325+
df,
326+
index="C",
327+
columns="B",
328+
values="A",
329+
aggfunc="sum",
330+
margins=True,
331+
observed=False,
332+
)
335333

336334
result = pivot_tab["All"]
337335
expected = Series(
@@ -1830,9 +1828,9 @@ def test_categorical_margins_category(self, observed):
18301828

18311829
df.y = df.y.astype("category")
18321830
df.z = df.z.astype("category")
1833-
msg = "The default value of observed=False is deprecated"
1834-
with tm.assert_produces_warning(FutureWarning, match=msg):
1835-
table = df.pivot_table("x", "y", "z", dropna=observed, margins=True)
1831+
table = df.pivot_table(
1832+
"x", "y", "z", dropna=observed, margins=True, observed=False
1833+
)
18361834
tm.assert_frame_equal(table, expected)
18371835

18381836
def test_margins_casted_to_float(self):
@@ -1894,11 +1892,14 @@ def test_categorical_aggfunc(self, observed):
18941892
{"C1": ["A", "B", "C", "C"], "C2": ["a", "a", "b", "b"], "V": [1, 2, 3, 4]}
18951893
)
18961894
df["C1"] = df["C1"].astype("category")
1897-
msg = "The default value of observed=False is deprecated"
1898-
with tm.assert_produces_warning(FutureWarning, match=msg):
1899-
result = df.pivot_table(
1900-
"V", index="C1", columns="C2", dropna=observed, aggfunc="count"
1901-
)
1895+
result = df.pivot_table(
1896+
"V",
1897+
index="C1",
1898+
columns="C2",
1899+
dropna=observed,
1900+
aggfunc="count",
1901+
observed=False,
1902+
)
19021903

19031904
expected_index = pd.CategoricalIndex(
19041905
["A", "B", "C"], categories=["A", "B", "C"], ordered=False, name="C1"

0 commit comments

Comments
 (0)