diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 5b4761c3bc6c5..917ffba3ec0a7 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -999,7 +999,7 @@ Reshaping - Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`) - Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtype (:issue:`30091`) - Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`) -- +- Improved error message and docstring in :func:`cut` and :func:`qcut` when `labels=True` (:issue:`13318`) Sparse ^^^^^^ diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 8cf51ae09fbcb..2e3eb9170b15c 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -15,6 +15,7 @@ is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, is_integer, + is_list_like, is_scalar, is_timedelta64_dtype, ) @@ -65,11 +66,12 @@ def cut( ``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]`` indicate (1,2], (2,3], (3,4]. This argument is ignored when `bins` is an IntervalIndex. - labels : array or bool, optional + labels : array or False, default None Specifies the labels for the returned bins. Must be the same length as the resulting bins. If False, returns only integer indicators of the bins. This affects the type of the output container (see below). - This argument is ignored when `bins` is an IntervalIndex. + This argument is ignored when `bins` is an IntervalIndex. If True, + raises an error. retbins : bool, default False Whether to return the bins or not. Useful when bins is provided as a scalar. @@ -286,10 +288,10 @@ def qcut( q : int or list-like of int Number of quantiles. 10 for deciles, 4 for quartiles, etc. Alternately array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles. - labels : array or bool, default None + labels : array or False, default None Used as labels for the resulting bins. Must be of the same length as the resulting bins. If False, return only integer indicators of the - bins. + bins. If True, raises an error. retbins : bool, optional Whether to return the (bins, labels) or not. Can be useful if bins is given as a scalar. @@ -391,15 +393,23 @@ def _bins_to_cuts( has_nas = na_mask.any() if labels is not False: - if labels is None: + if not (labels is None or is_list_like(labels)): + raise ValueError( + "Bin labels must either be False, None or passed in as a " + "list-like argument" + ) + + elif labels is None: labels = _format_labels( bins, precision, right=right, include_lowest=include_lowest, dtype=dtype ) + else: if len(labels) != len(bins) - 1: raise ValueError( "Bin labels must be one fewer than the number of bin edges" ) + if not is_categorical_dtype(labels): labels = Categorical(labels, categories=labels, ordered=True) diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index e52636d54ebe8..13b6f05ed304a 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -603,3 +603,12 @@ def test_cut_bool_coercion_to_int(bins, box, compare): expected = cut(data_expected, bins, duplicates="drop") result = cut(data_result, bins, duplicates="drop") compare(result, expected) + + +@pytest.mark.parametrize("labels", ["foo", 1, True]) +def test_cut_incorrect_labels(labels): + # GH 13318 + values = range(5) + msg = "Bin labels must either be False, None or passed in as a list-like argument" + with pytest.raises(ValueError, match=msg): + cut(values, 4, labels=labels) diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index c5ca05056a306..95406a5ebf4f7 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -130,6 +130,38 @@ def test_qcut_return_intervals(): tm.assert_series_equal(res, exp) +@pytest.mark.parametrize("labels", ["foo", 1, True]) +def test_qcut_incorrect_labels(labels): + # GH 13318 + values = range(5) + msg = "Bin labels must either be False, None or passed in as a list-like argument" + with pytest.raises(ValueError, match=msg): + qcut(values, 4, labels=labels) + + +@pytest.mark.parametrize("labels", [["a", "b", "c"], list(range(3))]) +def test_qcut_wrong_length_labels(labels): + # GH 13318 + values = range(10) + msg = "Bin labels must be one fewer than the number of bin edges" + with pytest.raises(ValueError, match=msg): + qcut(values, 4, labels=labels) + + +@pytest.mark.parametrize( + "labels, expected", + [ + (["a", "b", "c"], Categorical(["a", "b", "c"], ordered=True)), + (list(range(3)), Categorical([0, 1, 2], ordered=True)), + ], +) +def test_qcut_list_like_labels(labels, expected): + # GH 13318 + values = range(3) + result = qcut(values, 3, labels=labels) + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize( "kwargs,msg", [