Skip to content

Commit 0f17970

Browse files
authored
BUG: Groupby with as_index=False raises error when type is Category (#34767)
1 parent 94a9f7d commit 0f17970

File tree

5 files changed

+53
-9
lines changed

5 files changed

+53
-9
lines changed

doc/source/whatsnew/v1.1.0.rst

+20
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,25 @@ Using :meth:`DataFrame.groupby` with ``as_index=False`` and the function ``idxma
676676
677677
df.groupby("a", as_index=False).nunique()
678678
679+
The method :meth:`core.DataFrameGroupBy.size` would previously ignore ``as_index=False``. Now the grouping columns are returned as columns, making the result a `DataFrame` instead of a `Series`. (:issue:`32599`)
680+
681+
*Previous behavior*:
682+
683+
.. code-block:: ipython
684+
685+
In [3]: df.groupby("a", as_index=False).size()
686+
Out[4]:
687+
a
688+
x 2
689+
y 2
690+
dtype: int64
691+
692+
*New behavior*:
693+
694+
.. ipython:: python
695+
696+
df.groupby("a", as_index=False).size()
697+
679698
.. _whatsnew_110.api_breaking.apply_applymap_first_once:
680699

681700
apply and applymap on ``DataFrame`` evaluates first row/column only once
@@ -995,6 +1014,7 @@ Groupby/resample/rolling
9951014
- Bug in :meth:`SeriesGroupBy.agg` where any column name was accepted in the named aggregation of ``SeriesGroupBy`` previously. The behaviour now allows only ``str`` and callables else would raise ``TypeError``. (:issue:`34422`)
9961015
- Bug in :meth:`DataFrame.groupby` lost index, when one of the ``agg`` keys referenced an empty list (:issue:`32580`)
9971016

1017+
9981018
Reshaping
9991019
^^^^^^^^^
10001020

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5450,7 +5450,7 @@ def value_counts(
54505450
if subset is None:
54515451
subset = self.columns.tolist()
54525452

5453-
counts = self.groupby(subset).size()
5453+
counts = self.groupby(subset).grouper.size()
54545454

54555455
if sort:
54565456
counts = counts.sort_values(ascending=ascending)

pandas/core/groupby/groupby.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -942,9 +942,9 @@ def _transform_should_cast(self, func_nm: str) -> bool:
942942
bool
943943
Whether transform should attempt to cast the result of aggregation
944944
"""
945-
return (self.size().fillna(0) > 0).any() and (
946-
func_nm not in base.cython_cast_blacklist
947-
)
945+
filled_series = self.grouper.size().fillna(0)
946+
assert filled_series is not None
947+
return filled_series.gt(0).any() and func_nm not in base.cython_cast_blacklist
948948

949949
def _cython_transform(self, how: str, numeric_only: bool = True, **kwargs):
950950
output: Dict[base.OutputKey, np.ndarray] = {}
@@ -1507,14 +1507,15 @@ def sem(self, ddof: int = 1):
15071507

15081508
@Substitution(name="groupby")
15091509
@Appender(_common_see_also)
1510-
def size(self):
1510+
def size(self) -> FrameOrSeriesUnion:
15111511
"""
15121512
Compute group sizes.
15131513
15141514
Returns
15151515
-------
1516-
Series
1517-
Number of rows in each group.
1516+
DataFrame or Series
1517+
Number of rows in each group as a Series if as_index is True
1518+
or a DataFrame if as_index is False.
15181519
"""
15191520
result = self.grouper.size()
15201521

@@ -1523,6 +1524,10 @@ def size(self):
15231524
result = self._obj_1d_constructor(result, name=self.obj.name)
15241525
else:
15251526
result = self._obj_1d_constructor(result)
1527+
1528+
if not self.as_index:
1529+
result = result.rename("size").reset_index()
1530+
15261531
return self._reindex_output(result, fill_value=0)
15271532

15281533
@doc(_groupby_agg_method_template, fname="sum", no=True, mc=0)

pandas/tests/groupby/test_groupby.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -668,11 +668,14 @@ def test_ops_not_as_index(reduction_func):
668668
if reduction_func in ("corrwith",):
669669
pytest.skip("Test not applicable")
670670

671-
if reduction_func in ("nth", "ngroup", "size",):
671+
if reduction_func in ("nth", "ngroup",):
672672
pytest.skip("Skip until behavior is determined (GH #5755)")
673673

674674
df = DataFrame(np.random.randint(0, 5, size=(100, 2)), columns=["a", "b"])
675-
expected = getattr(df.groupby("a"), reduction_func)().reset_index()
675+
expected = getattr(df.groupby("a"), reduction_func)()
676+
if reduction_func == "size":
677+
expected = expected.rename("size")
678+
expected = expected.reset_index()
676679

677680
g = df.groupby("a", as_index=False)
678681

pandas/tests/groupby/test_size.py

+16
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,19 @@ def test_size_period_index():
4444
grp = ser.groupby(level="A")
4545
result = grp.size()
4646
tm.assert_series_equal(result, ser)
47+
48+
49+
@pytest.mark.parametrize("as_index", [True, False])
50+
def test_size_on_categorical(as_index):
51+
df = DataFrame([[1, 1], [2, 2]], columns=["A", "B"])
52+
df["A"] = df["A"].astype("category")
53+
result = df.groupby(["A", "B"], as_index=as_index).size()
54+
55+
expected = DataFrame(
56+
[[1, 1, 1], [1, 2, 0], [2, 1, 0], [2, 2, 1]], columns=["A", "B", "size"],
57+
)
58+
expected["A"] = expected["A"].astype("category")
59+
if as_index:
60+
expected = expected.set_index(["A", "B"])["size"].rename(None)
61+
62+
tm.assert_equal(result, expected)

0 commit comments

Comments
 (0)