From d974f28da7fac5c8515b6b28ef196e3d43795c81 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 10 Feb 2020 17:10:29 -0800 Subject: [PATCH 1/2] API: always raise KeyError on __getitem__ and loc.__getitem__, never TypeError --- pandas/core/indexes/base.py | 6 +-- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/datetimelike.py | 4 +- pandas/core/indexing.py | 2 +- pandas/tests/frame/test_constructors.py | 6 +-- pandas/tests/indexing/test_categorical.py | 6 +-- pandas/tests/indexing/test_floats.py | 55 +++++++++-------------- pandas/tests/indexing/test_loc.py | 6 +-- pandas/tests/indexing/test_scalar.py | 16 ++----- 9 files changed, 38 insertions(+), 65 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 719bf13cbd313..9c439fdb3b218 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3121,7 +3121,7 @@ def _convert_scalar_indexer(self, key, kind: str_t): if kind == "getitem" and is_float(key): if not self.is_floating(): - self._invalid_indexer("label", key) + raise KeyError(key) elif kind == "loc" and is_float(key): @@ -3135,11 +3135,11 @@ def _convert_scalar_indexer(self, key, kind: str_t): "string", "mixed", ]: - self._invalid_indexer("label", key) + raise KeyError(key) elif kind == "loc" and is_integer(key): if not self.holds_integer(): - self._invalid_indexer("label", key) + raise KeyError(key) return key diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 7373f41daefa4..54cd153a414e4 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -598,7 +598,7 @@ def _convert_scalar_indexer(self, key, kind: str): try: return self.categories._convert_scalar_indexer(key, kind="loc") except TypeError: - self._invalid_indexer("label", key) + raise KeyError(key) return super()._convert_scalar_indexer(key, kind=kind) @Appender(Index._convert_list_indexer.__doc__) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 941b6c876bb36..64b44f4d27e31 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -397,9 +397,9 @@ def _convert_scalar_indexer(self, key, kind: str): is_int = is_integer(key) is_flt = is_float(key) if kind == "loc" and (is_int or is_flt): - self._invalid_indexer("label", key) + raise KeyError(key) elif kind == "getitem" and is_flt: - self._invalid_indexer("label", key) + raise KeyError(key) return super()._convert_scalar_indexer(key, kind=kind) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c7dcccab00d95..03ba5ab1fbcda 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1176,7 +1176,7 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): # try to find out correct indexer, if not type correct raise try: key = labels._convert_scalar_indexer(key, kind="loc") - except TypeError: + except KeyError: # but we will allow setting if not is_setter: raise diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 5f4c78449f71d..d13beb5f04a1c 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1859,11 +1859,7 @@ def check(df): # No NaN found -> error if len(indexer) == 0: - msg = ( - "cannot do label indexing on RangeIndex " - r"with these indexers \[nan\] of type float" - ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match="^nan$"): df.loc[:, np.nan] # single nan should result in Series elif len(indexer) == 1: diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index da935b1c911d0..8a8ac584c16c2 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -82,11 +82,7 @@ def test_loc_scalar(self): with pytest.raises(TypeError, match=msg): df.loc["d", "C"] = 10 - msg = ( - "cannot do label indexing on CategoricalIndex with these " - r"indexers \[1\] of type int" - ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match="^1$"): df.loc[1] def test_getitem_scalar(self): diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 6cc18a3989266..1b44dbb65e992 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -86,11 +86,9 @@ def test_scalar_non_numeric(self, index_func): # getting for idxr, getitem in [(lambda x: x.iloc, False), (lambda x: x, True)]: - # gettitem on a DataFrame is a KeyError as it is indexing - # via labels on the columns - if getitem and isinstance(s, DataFrame): + if getitem: error = KeyError - msg = r"^3(\.0)?$" + msg = r"^3\.0$" else: error = TypeError msg = ( @@ -109,6 +107,9 @@ def test_scalar_non_numeric(self, index_func): "string", "unicode", "mixed", + "period", + "timedelta64", + "datetime64", }: error = KeyError msg = r"^3\.0$" @@ -163,12 +164,7 @@ def test_scalar_non_numeric(self, index_func): # fallsback to position selection, series only s = Series(np.arange(len(i)), index=i) s[3] - msg = ( - r"cannot do (label|positional) indexing " - r"on {klass} with these indexers \[3\.0\] of " - r"type float".format(klass=type(i).__name__) - ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match="^3.0$"): s[3.0] def test_scalar_with_mixed(self): @@ -178,18 +174,18 @@ def test_scalar_with_mixed(self): # lookup in a pure stringstr # with an invalid indexer - for idxr in [lambda x: x, lambda x: x.iloc]: - - msg = ( - r"cannot do label indexing " - r"on {klass} with these indexers \[1\.0\] of " - r"type float|" - "Cannot index by location index with a non-integer key".format( - klass=Index.__name__ - ) + msg = ( + r"cannot do label indexing " + r"on {klass} with these indexers \[1\.0\] of " + r"type float|" + "Cannot index by location index with a non-integer key".format( + klass=Index.__name__ ) - with pytest.raises(TypeError, match=msg): - idxr(s2)[1.0] + ) + with pytest.raises(KeyError, match="^1.0$"): + s2[1.0] + with pytest.raises(TypeError, match=msg): + s2.iloc[1.0] with pytest.raises(KeyError, match=r"^1\.0$"): s2.loc[1.0] @@ -200,19 +196,12 @@ def test_scalar_with_mixed(self): # mixed index so we have label # indexing - for idxr in [lambda x: x]: + with pytest.raises(KeyError, match="^1.0$"): + s3[1.0] - msg = ( - r"cannot do label indexing " - r"on {klass} with these indexers \[1\.0\] of " - r"type float".format(klass=Index.__name__) - ) - with pytest.raises(TypeError, match=msg): - idxr(s3)[1.0] - - result = idxr(s3)[1] - expected = 2 - assert result == expected + result = s3[1] + expected = 2 + assert result == expected msg = "Cannot index by location index with a non-integer key" with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 02652d993e0f3..f4c6900dfcf74 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -16,7 +16,7 @@ class TestLoc(Base): def test_loc_getitem_int(self): # int label - self.check_result("loc", 2, typs=["labels"], fails=TypeError) + self.check_result("loc", 2, typs=["labels"], fails=KeyError) def test_loc_getitem_label(self): @@ -34,8 +34,8 @@ def test_loc_getitem_label_out_of_range(self): self.check_result( "loc", 20, typs=["ints", "uints", "mixed"], fails=KeyError, ) - self.check_result("loc", 20, typs=["labels"], fails=TypeError) - self.check_result("loc", 20, typs=["ts"], axes=0, fails=TypeError) + self.check_result("loc", 20, typs=["labels"], fails=KeyError) + self.check_result("loc", 20, typs=["ts"], axes=0, fails=KeyError) self.check_result("loc", 20, typs=["floats"], axes=0, fails=KeyError) def test_loc_getitem_label_list(self): diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index 899c58eb5edea..aff1d5e6e7f5e 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -137,13 +137,9 @@ def test_series_at_raises_type_error(self): result = ser.loc["a"] assert result == 1 - msg = ( - "cannot do label indexing on Index " - r"with these indexers \[0\] of type int" - ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match="^0$"): ser.at[0] - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match="^0$"): ser.loc[0] def test_frame_raises_type_error(self): @@ -154,13 +150,9 @@ def test_frame_raises_type_error(self): result = df.loc["a", "A"] assert result == 1 - msg = ( - "cannot do label indexing on Index " - r"with these indexers \[0\] of type int" - ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match="^0$"): df.at["a", 0] - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match="^0$"): df.loc["a", 0] def test_series_at_raises_key_error(self): From 434be304b95a5e9fd12cd41ed45684fbbce22218 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 10:27:52 -0800 Subject: [PATCH 2/2] whatsnew --- doc/source/whatsnew/v1.1.0.rst | 71 +++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 7449c62a5ad31..3d478df4c0ea8 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -67,7 +67,76 @@ Backwards incompatible API changes now raise a ``TypeError`` if a not-accepted keyword argument is passed into it. Previously a ``UnsupportedFunctionCall`` was raised (``AssertionError`` if ``min_count`` passed into :meth:`~DataFrameGroupby.median``) (:issue:`31485`) - :meth:`DataFrame.at` and :meth:`Series.at` will raise a ``TypeError`` instead of a ``ValueError`` if an incompatible key is passed, and ``KeyError`` if a missing key is passed, matching the behavior of ``.loc[]`` (:issue:`31722`) -- + +.. _whatsnew_110.api_breaking.indexing_raises_key_errors: + +Failed Label-Based Lookups Always Raise KeyError +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Label lookups ``series[key]``, ``series.loc[key]`` and ``frame.loc[key]`` +used to raises either ``KeyError`` or ``TypeError`` depending on the type of +key and type of :class:`Index`. These now consistently raise ``KeyError`` (:issue:`31867`) + +.. ipython:: python + + ser1 = pd.Series(range(3), index=[0, 1, 2]) + ser2 = pd.Series(range(3), index=pd.date_range("2020-02-01", periods=3)) + +*Previous behavior*: + +.. code-block:: ipython + + In [3]: ser1[1.5] + ... + TypeError: cannot do label indexing on Int64Index with these indexers [1.5] of type float + + In [4] ser1["foo"] + ... + KeyError: 'foo' + + In [5]: ser1.loc[1.5] + ... + TypeError: cannot do label indexing on Int64Index with these indexers [1.5] of type float + + In [6]: ser1.loc["foo"] + ... + KeyError: 'foo' + + In [7]: ser2.loc[1] + ... + TypeError: cannot do label indexing on DatetimeIndex with these indexers [1] of type int + + In [8]: ser2.loc[pd.Timestamp(0)] + ... + KeyError: Timestamp('1970-01-01 00:00:00') + +*New behavior*: + +.. code-block:: ipython + + In [3]: ser1[1.5] + ... + KeyError: 1.5 + + In [4] ser1["foo"] + ... + KeyError: 'foo' + + In [5]: ser1.loc[1.5] + ... + KeyError: 1.5 + + In [6]: ser1.loc["foo"] + ... + KeyError: 'foo' + + In [7]: ser2.loc[1] + ... + KeyError: 1 + + In [8]: ser2.loc[pd.Timestamp(0)] + ... + KeyError: Timestamp('1970-01-01 00:00:00') .. ---------------------------------------------------------------------------