From 20ed975b1eb5affe3295517926428ae31b949af1 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Mon, 18 Oct 2021 13:47:29 -0400 Subject: [PATCH] BUG: crosstab fails with lists/tuples --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/core/reshape/pivot.py | 7 +++++-- pandas/tests/reshape/test_crosstab.py | 18 +++++++++++------- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index b7efec8fd2e89..0f2102b70653a 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -545,6 +545,7 @@ Reshaping - Bug in :func:`crosstab` when inputs are are categorical Series, there are categories that are not present in one or both of the Series, and ``margins=True``. Previously the margin value for missing categories was ``NaN``. It is now correctly reported as 0 (:issue:`43505`) - Bug in :func:`concat` would fail when the ``objs`` argument all had the same index and the ``keys`` argument contained duplicates (:issue:`43595`) - Bug in :func:`concat` which ignored the ``sort`` parameter (:issue:`43375`) +- Bug in :func:`crosstab` would fail when inputs are lists or tuples (:issue:`44076`) Sparse ^^^^^^ diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index fcf00276aa8af..edd3599aabe35 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -25,6 +25,7 @@ from pandas.core.dtypes.common import ( is_integer_dtype, is_list_like, + is_nested_list_like, is_scalar, ) from pandas.core.dtypes.generic import ( @@ -625,8 +626,10 @@ def crosstab( if values is not None and aggfunc is None: raise ValueError("values cannot be used without an aggfunc.") - index = com.maybe_make_list(index) - columns = com.maybe_make_list(columns) + if not is_nested_list_like(index): + index = [index] + if not is_nested_list_like(columns): + columns = [columns] common_idx = None pass_objs = [x for x in index + columns if isinstance(x, (ABCSeries, ABCDataFrame))] diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py index f252b5e1ceedf..74beda01e4b8a 100644 --- a/pandas/tests/reshape/test_crosstab.py +++ b/pandas/tests/reshape/test_crosstab.py @@ -84,10 +84,12 @@ def test_crosstab_multiple(self): expected = expected.unstack("A").fillna(0).astype(np.int64) tm.assert_frame_equal(result, expected) - def test_crosstab_ndarray(self): - a = np.random.randint(0, 5, size=100) - b = np.random.randint(0, 3, size=100) - c = np.random.randint(0, 10, size=100) + @pytest.mark.parametrize("box", [np.array, list, tuple]) + def test_crosstab_ndarray(self, box): + # GH 44076 + a = box(np.random.randint(0, 5, size=100)) + b = box(np.random.randint(0, 3, size=100)) + c = box(np.random.randint(0, 10, size=100)) df = DataFrame({"a": a, "b": b, "c": c}) @@ -100,9 +102,11 @@ def test_crosstab_ndarray(self): tm.assert_frame_equal(result, expected) # assign arbitrary names - result = crosstab(self.df["A"].values, self.df["C"].values) - assert result.index.name == "row_0" - assert result.columns.name == "col_0" + result = crosstab(a, c) + expected = crosstab(df["a"], df["c"]) + expected.index.names = ["row_0"] + expected.columns.names = ["col_0"] + tm.assert_frame_equal(result, expected) def test_crosstab_non_aligned(self): # GH 17005