Skip to content

Commit ab639ca

Browse files
committed
EHN: Groupby on multiindex with missing data in group keys raises IndexError (#20519)
* If all index values in some level is NA, fill with NaN
1 parent def01cf commit ab639ca

File tree

3 files changed

+48
-2
lines changed

3 files changed

+48
-2
lines changed

doc/source/whatsnew/v1.0.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ Groupby/resample/rolling
174174
^^^^^^^^^^^^^^^^^^^^^^^^
175175

176176
-
177-
-
177+
- ``IndexError`` would not raise if all index values in some index level is missing data (:issue:`20519`)
178178
- Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)
179179

180180
Reshaping

pandas/core/indexes/multi.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1279,7 +1279,10 @@ def _get_grouper_for_level(self, mapper, level):
12791279
# Remove unobserved levels from level_index
12801280
level_index = level_index.take(uniques)
12811281

1282-
grouper = level_index.take(codes)
1282+
if len(level_index):
1283+
grouper = level_index.take(codes)
1284+
else:
1285+
grouper = level_index.take(codes, fill_value=True)
12831286

12841287
return grouper, codes, level_index
12851288

pandas/tests/groupby/test_grouping.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,49 @@ def test_groupby_empty(self):
628628
# check name
629629
assert s.groupby(s).grouper.names == ["name"]
630630

631+
def test_groupby_level_index_value_all_na(self):
632+
# issue 20519
633+
df = pd.DataFrame([["x", np.nan, 1]], columns=["A", "B", "C"]).set_index(
634+
["A", "B"]
635+
)
636+
result = df.groupby(level=["A", "B"]).sum()
637+
expected = DataFrame(
638+
data=[],
639+
index=MultiIndex(
640+
levels=[["x"], []],
641+
codes=[[], []],
642+
names=["A", "B"]
643+
),
644+
columns=["C"]
645+
)
646+
tm.assert_frame_equal(
647+
result,
648+
expected,
649+
check_index_type=False,
650+
check_dtype=False
651+
)
652+
653+
df = pd.DataFrame(
654+
[[None, None, "x", 2], [np.nan, "y", np.nan, 4]],
655+
columns=["A", "B", "C", "D"]
656+
).set_index(["A", "B", "C"])
657+
result = df.groupby(level=["A", "B", "C"]).sum()
658+
expected = DataFrame(
659+
data=[],
660+
index=MultiIndex(
661+
levels=[[], ["y"], ["x"]],
662+
codes=[[], [], []],
663+
names=["A", "B", "C"]
664+
),
665+
columns=["D"]
666+
)
667+
tm.assert_frame_equal(
668+
result,
669+
expected,
670+
check_index_type=False,
671+
check_dtype=False
672+
)
673+
631674

632675
# get_group
633676
# --------------------------------

0 commit comments

Comments
 (0)