diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 3e72072eae303..afecc7fa4e41b 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -742,6 +742,7 @@ Indexing - Fix assignment of column via `.loc` with numpy non-ns datetime type (:issue:`27395`) - Bug in :meth:`Float64Index.astype` where ``np.inf`` was not handled properly when casting to an integer dtype (:issue:`28475`) - :meth:`Index.union` could fail when the left contained duplicates (:issue:`28257`) +- Bug when indexing with ``.loc`` where the index was a :class:`CategoricalIndex` with integer and float categories, a ValueError was raised (:issue:`17569`) - :meth:`Index.get_indexer_non_unique` could fail with `TypeError` in some cases, such as when searching for ints in a string index (:issue:`28257`) - Bug in :meth:`Float64Index.get_loc` incorrectly raising ``TypeError`` instead of ``KeyError`` (:issue:`29189`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1587d97ffb52c..0f97bb327f525 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2953,11 +2953,11 @@ def _convert_scalar_indexer(self, key, kind=None): "unicode", "mixed", ]: - return self._invalid_indexer("label", key) + self._invalid_indexer("label", key) elif kind in ["loc"] and is_integer(key): if not self.holds_integer(): - return self._invalid_indexer("label", key) + self._invalid_indexer("label", key) return key diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index d061f61effff3..2cc853ecf568b 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -696,9 +696,11 @@ def get_indexer_non_unique(self, target): @Appender(_index_shared_docs["_convert_scalar_indexer"]) def _convert_scalar_indexer(self, key, kind=None): - if self.categories._defer_to_indexing: - return self.categories._convert_scalar_indexer(key, kind=kind) - + if kind == "loc": + try: + return self.categories._convert_scalar_indexer(key, kind=kind) + except TypeError: + self._invalid_indexer("label", key) return super()._convert_scalar_indexer(key, kind=kind) @Appender(_index_shared_docs["_convert_list_indexer"]) diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index ab3b0ed13b5c0..bc3ee1c59f76c 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -12,7 +12,9 @@ Index, Interval, Series, + Timedelta, Timestamp, + conftest, ) from pandas.api.types import CategoricalDtype as CDT import pandas.util.testing as tm @@ -80,6 +82,13 @@ def test_loc_scalar(self): with pytest.raises(TypeError, match=msg): df.loc["d", "C"] = 10 + msg = ( + r"cannot do label indexing on with these indexers \[1\] of " + ) + with pytest.raises(TypeError, match=msg): + df.loc[1] + def test_getitem_scalar(self): cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")]) @@ -754,3 +763,56 @@ def test_map_with_dict_or_series(self): output = cur_index.map(mapper) # Order of categories in output can be different tm.assert_index_equal(expected, output) + + @pytest.mark.parametrize( + "idx_values", + [ + # python types + [1, 2, 3], + [-1, -2, -3], + [1.5, 2.5, 3.5], + [-1.5, -2.5, -3.5], + # numpy int/uint + *[np.array([1, 2, 3], dtype=dtype) for dtype in conftest.ALL_INT_DTYPES], + # numpy floats + *[np.array([1.5, 2.5, 3.5], dtype=dtyp) for dtyp in conftest.FLOAT_DTYPES], + # numpy object + np.array([1, "b", 3.5], dtype=object), + # pandas scalars + [Interval(1, 4), Interval(4, 6), Interval(6, 9)], + [Timestamp(2019, 1, 1), Timestamp(2019, 2, 1), Timestamp(2019, 3, 1)], + [Timedelta(1, "d"), Timedelta(2, "d"), Timedelta(3, "D")], + # pandas Integer arrays + *[pd.array([1, 2, 3], dtype=dtype) for dtype in conftest.ALL_EA_INT_DTYPES], + # other pandas arrays + pd.IntervalIndex.from_breaks([1, 4, 6, 9]).array, + pd.date_range("2019-01-01", periods=3).array, + pd.timedelta_range(start="1d", periods=3).array, + ], + ) + def test_loc_with_non_string_categories(self, idx_values, ordered_fixture): + # GH-17569 + cat_idx = CategoricalIndex(idx_values, ordered=ordered_fixture) + df = DataFrame({"A": ["foo", "bar", "baz"]}, index=cat_idx) + + # scalar selection + result = df.loc[idx_values[0]] + expected = Series(["foo"], index=["A"], name=idx_values[0]) + tm.assert_series_equal(result, expected) + + # list selection + result = df.loc[idx_values[:2]] + expected = DataFrame(["foo", "bar"], index=cat_idx[:2], columns=["A"]) + tm.assert_frame_equal(result, expected) + + # scalar assignment + result = df.copy() + result.loc[idx_values[0]] = "qux" + expected = DataFrame({"A": ["qux", "bar", "baz"]}, index=cat_idx) + tm.assert_frame_equal(result, expected) + + # list assignment + result = df.copy() + result.loc[idx_values[:2], "A"] = ["qux", "qux2"] + expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 0a3b513ff0167..cdacecc6c79d3 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -100,7 +100,12 @@ def test_scalar_non_numeric(self): idxr(s)[3.0] # label based can be a TypeError or KeyError - if s.index.inferred_type in ["string", "unicode", "mixed"]: + if s.index.inferred_type in { + "categorical", + "string", + "unicode", + "mixed", + }: error = KeyError msg = r"^3$" else: