diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 5197fd2b23dab..ba3af0a6accb3 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -284,7 +284,7 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- +- Bug in :meth:`SeriesGroupBy.value_counts` where unobserved categories in a grouped categorical series were not tallied (:issue:`38672`) - Reshaping diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 16b00735cf694..f2899a7ca704b 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -42,6 +42,7 @@ ensure_int64, ensure_platform_int, is_bool, + is_categorical_dtype, is_integer_dtype, is_interval_dtype, is_numeric_dtype, @@ -681,9 +682,10 @@ def value_counts( from pandas.core.reshape.merge import get_join_indexers from pandas.core.reshape.tile import cut - if bins is not None and not np.iterable(bins): - # scalar bins cannot be done at top level - # in a backward compatible way + ids, _, _ = self.grouper.group_info + val = self.obj._values + + def apply_series_value_counts(): return self.apply( Series.value_counts, normalize=normalize, @@ -692,8 +694,14 @@ def value_counts( bins=bins, ) - ids, _, _ = self.grouper.group_info - val = self.obj._values + if bins is not None: + if not np.iterable(bins): + # scalar bins cannot be done at top level + # in a backward compatible way + return apply_series_value_counts() + elif is_categorical_dtype(val): + # GH38672 + return apply_series_value_counts() # groupby removes null keys from groupings mask = ids != -1 diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index c5d454baa7e7b..afb648d8527ca 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -9,7 +9,16 @@ import numpy as np import pytest -from pandas import DataFrame, Grouper, MultiIndex, Series, date_range, to_datetime +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + Grouper, + MultiIndex, + Series, + date_range, + to_datetime, +) import pandas._testing as tm @@ -111,3 +120,30 @@ def test_series_groupby_value_counts_with_grouper(): expected.index.names = result.index.names tm.assert_series_equal(result, expected) + + +def test_series_groupby_value_counts_on_categorical(): + # GH38672 + + s = Series(Categorical(["a"], categories=["a", "b"])) + result = s.groupby([0]).value_counts() + + expected = Series( + data=[1, 0], + index=MultiIndex.from_arrays( + [ + [0, 0], + CategoricalIndex( + ["a", "b"], categories=["a", "b"], ordered=False, dtype="category" + ), + ] + ), + name=0, + ) + + # Expected: + # 0 a 1 + # b 0 + # Name: 0, dtype: int64 + + tm.assert_series_equal(result, expected)