From 91a7ebb0b7dc3aa3c077c64ebc30d07a87c64cb8 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Mon, 28 Jun 2021 10:27:50 -0400 Subject: [PATCH] Backport PR #42254: REGR: groupby with as_index=False on an empty frame --- pandas/core/groupby/generic.py | 4 +++- pandas/tests/groupby/conftest.py | 5 +++++ pandas/tests/groupby/test_groupby.py | 23 +++++++++++++---------- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 5cb0eac5d9074..18c84d9aa88bf 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1676,7 +1676,9 @@ def _wrap_transformed_output( def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: if not self.as_index: - index = Index(range(mgr.shape[1])) + # GH 41998 - empty mgr always gets index of length 0 + rows = mgr.shape[1] if mgr.shape[0] > 0 else 0 + index = Index(range(rows)) mgr.set_axis(1, index) result = self.obj._constructor(mgr) diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py index 9e7d6d2a7c5bd..d699d05963b46 100644 --- a/pandas/tests/groupby/conftest.py +++ b/pandas/tests/groupby/conftest.py @@ -12,6 +12,11 @@ ) +@pytest.fixture(params=[True, False]) +def as_index(request): + return request.param + + @pytest.fixture def mframe(): index = MultiIndex( diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 89944e2a745e4..0181481b29c44 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -15,6 +15,7 @@ Grouper, Index, MultiIndex, + RangeIndex, Series, Timestamp, date_range, @@ -2360,19 +2361,21 @@ def test_groupby_all_nan_groups_drop(): tm.assert_series_equal(result, expected) -def test_groupby_empty_multi_column(): - # GH 15106 +@pytest.mark.parametrize("numeric_only", [True, False]) +def test_groupby_empty_multi_column(as_index, numeric_only): + # GH 15106 & GH 41998 df = DataFrame(data=[], columns=["A", "B", "C"]) - gb = df.groupby(["A", "B"]) - result = gb.sum(numeric_only=False) - expected = DataFrame( - [], columns=["C"], index=MultiIndex([[], []], [[], []], names=["A", "B"]) - ) + gb = df.groupby(["A", "B"], as_index=as_index) + result = gb.sum(numeric_only=numeric_only) + if as_index: + index = MultiIndex([[], []], [[], []], names=["A", "B"]) + columns = ["C"] if not numeric_only else [] + else: + index = RangeIndex(0) + columns = ["A", "B", "C"] if not numeric_only else ["A", "B"] + expected = DataFrame([], columns=columns, index=index) tm.assert_frame_equal(result, expected) - result = gb.sum(numeric_only=True) - tm.assert_frame_equal(result, expected[[]]) - def test_groupby_filtered_df_std(): # GH 16174