From 6a01c732845009724e76bf036471c9c6b0efa00d Mon Sep 17 00:00:00 2001 From: HH Date: Sat, 14 Sep 2019 18:16:56 +0900 Subject: [PATCH 1/2] EHN: Groupby on multiindex with missing data in group keys raises IndexError (#20519) * if all the values in a level of a MultiIndex were missing, fill with numpy nan --- doc/source/whatsnew/v1.0.0.rst | 4 +++- pandas/core/indexes/multi.py | 5 ++++- pandas/tests/groupby/test_grouping.py | 18 ++++++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4decc99087a9e..1e2213d67001c 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -174,8 +174,10 @@ Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - -- +- Bug in :meth:`DataFrame.groupby` with multiple groups where an ``IndexError`` would be raised if any group contained all NA values (:issue:`20519`) +- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`) - Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`) +- Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index b614952ba1e04..37c91bd9f2b6f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1279,7 +1279,10 @@ def _get_grouper_for_level(self, mapper, level): # Remove unobserved levels from level_index level_index = level_index.take(uniques) - grouper = level_index.take(codes) + if len(level_index): + grouper = level_index.take(codes) + else: + grouper = level_index.take(codes, fill_value=True) return grouper, codes, level_index diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 403f5f11ee768..ab25d183ae3ff 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -628,6 +628,24 @@ def test_groupby_empty(self): # check name assert s.groupby(s).grouper.names == ["name"] + def test_groupby_level_index_value_all_na(self): + # issue 20519 + df = DataFrame( + [["x", np.nan, 10], [None, np.nan, 20]], columns=["A", "B", "C"] + ).set_index(["A", "B"]) + result = df.groupby(level=["A", "B"]).sum() + expected = DataFrame( + data=[], + index=MultiIndex( + levels=[Index(["x"], dtype="object"), Index([], dtype="float64")], + codes=[[], []], + names=["A", "B"], + ), + columns=["C"], + dtype="int64", + ) + tm.assert_frame_equal(result, expected) + # get_group # -------------------------------- From ef08517703cdb61ba9fb6420a0f9797f9744f85d Mon Sep 17 00:00:00 2001 From: proost Date: Fri, 25 Oct 2019 13:59:42 +0900 Subject: [PATCH 2/2] Update v1.0.0.rst --- doc/source/whatsnew/v1.0.0.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4ba1d3b621d4f..1d33a1c1d5e58 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -377,9 +377,6 @@ Groupby/resample/rolling - - Bug in :meth:`DataFrame.groupby` with multiple groups where an ``IndexError`` would be raised if any group contained all NA values (:issue:`20519`) - Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`) -- Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`) -- Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`) -- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`) - Bug in :meth:`DataFrame.rolling` not allowing rolling over multi-index levels (:issue: `15584`). - Bug in :meth:`DataFrame.rolling` not allowing rolling on monotonic decreasing time indexes (:issue: `19248`). - Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)