pandas-dev · mroeschke · Jul 22, 2024 · Jul 20, 2024 · Jul 20, 2024 · Jul 20, 2024
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -33,6 +33,7 @@ Other enhancements
 - :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`)
 - :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`)
 - :meth:`DataFrame.agg` called with ``axis=1`` and a ``func`` which relabels the result index now raises a ``NotImplementedError`` (:issue:`58807`).
+- :meth:`Index.get_loc` now accepts also subclasses of ``tuple`` as keys (:issue:`57922`)
 - :meth:`Styler.set_tooltips` provides alternative method to storing tooltips by using title attribute of td elements. (:issue:`56981`)
 - Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`)
 - Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`)
@@ -231,6 +232,7 @@ Other API changes
 ^^^^^^^^^^^^^^^^^
 - 3rd party ``py.path`` objects are no longer explicitly supported in IO methods. Use :py:class:`pathlib.Path` objects instead (:issue:`57091`)
 - :func:`read_table`'s ``parse_dates`` argument defaults to ``None`` to improve consistency with :func:`read_csv` (:issue:`57476`)
+- All classes inheriting from builtin ``tuple`` (including types created with :func:`collections.namedtuple`) are now hashed and compared as builtin ``tuple`` during indexing operations (:issue:`57922`)
 - Made ``dtype`` a required argument in :meth:`ExtensionArray._from_sequence_of_strings` (:issue:`56519`)
 - Passing a :class:`Series` input to :func:`json_normalize` will now retain the :class:`Series` :class:`Index`, previously output had a new :class:`RangeIndex` (:issue:`51452`)
 - Updated :meth:`DataFrame.to_excel` so that the output spreadsheet has no styling. Custom styling can still be done using :meth:`Styler.to_excel` (:issue:`54154`)

diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h
@@ -207,7 +207,8 @@ static inline int pyobject_cmp(PyObject *a, PyObject *b) {
     if (PyComplex_CheckExact(a)) {
       return complexobject_cmp((PyComplexObject *)a, (PyComplexObject *)b);
     }
-    if (PyTuple_CheckExact(a)) {
+    if (PyTuple_Check(a)) {
+      // compare tuple subclasses as builtin tuples
       return tupleobject_cmp((PyTupleObject *)a, (PyTupleObject *)b);
     }
     // frozenset isn't yet supported
@@ -311,7 +312,8 @@ static inline khuint32_t kh_python_hash_func(PyObject *key) {
     // because complex(k,0) == k holds for any int-object k
     // and kh_complex128_hash_func doesn't respect it
     hash = complexobject_hash((PyComplexObject *)key);
-  } else if (PyTuple_CheckExact(key)) {
+  } else if (PyTuple_Check(key)) {
+    // hash tuple subclasses as builtin tuples
     hash = tupleobject_hash((PyTupleObject *)key);
   } else {
     hash = PyObject_Hash(key);

diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py
@@ -1,3 +1,4 @@
+from collections import namedtuple
 from datetime import timedelta
 import re
 
@@ -1006,3 +1007,26 @@ def test_get_indexer_for_multiindex_with_nans(nulls_fixture):
     result = idx1.get_indexer(idx2)
     expected = np.array([-1, 1], dtype=np.intp)
     tm.assert_numpy_array_equal(result, expected)
+
+
+def test_get_loc_namedtuple_behaves_like_tuple():
+    # GH57922
+    NamedIndex = namedtuple("NamedIndex", ("a", "b"))
+    multi_idx = MultiIndex.from_tuples(
+        [NamedIndex("i1", "i2"), NamedIndex("i3", "i4"), NamedIndex("i5", "i6")]
+    )
+    for idx in (multi_idx, multi_idx.to_flat_index()):
+        assert idx.get_loc(NamedIndex("i1", "i2")) == 0
+        assert idx.get_loc(NamedIndex("i3", "i4")) == 1
+        assert idx.get_loc(NamedIndex("i5", "i6")) == 2
+        assert idx.get_loc(("i1", "i2")) == 0
+        assert idx.get_loc(("i3", "i4")) == 1
+        assert idx.get_loc(("i5", "i6")) == 2
+    multi_idx = MultiIndex.from_tuples([("i1", "i2"), ("i3", "i4"), ("i5", "i6")])
+    for idx in (multi_idx, multi_idx.to_flat_index()):
+        assert idx.get_loc(NamedIndex("i1", "i2")) == 0
+        assert idx.get_loc(NamedIndex("i3", "i4")) == 1
+        assert idx.get_loc(NamedIndex("i5", "i6")) == 2
+        assert idx.get_loc(("i1", "i2")) == 0
+        assert idx.get_loc(("i3", "i4")) == 1
+        assert idx.get_loc(("i5", "i6")) == 2
diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
@@ -1,3 +1,4 @@
+from collections import namedtuple
 from collections.abc import Generator
 from contextlib import contextmanager
 import re
@@ -405,9 +406,8 @@ def test_nan_complex_real(self):
         table = ht.PyObjectHashTable()
         table.set_item(nan1, 42)
         assert table.get_item(nan2) == 42
-        with pytest.raises(KeyError, match=None) as error:
+        with pytest.raises(KeyError, match=re.escape(repr(other))):
             table.get_item(other)
-        assert str(error.value) == str(other)
 
     def test_nan_complex_imag(self):
         nan1 = complex(1, float("nan"))
@@ -417,9 +417,8 @@ def test_nan_complex_imag(self):
         table = ht.PyObjectHashTable()
         table.set_item(nan1, 42)
         assert table.get_item(nan2) == 42
-        with pytest.raises(KeyError, match=None) as error:
+        with pytest.raises(KeyError, match=re.escape(repr(other))):
             table.get_item(other)
-        assert str(error.value) == str(other)
 
     def test_nan_in_tuple(self):
         nan1 = (float("nan"),)
@@ -436,9 +435,28 @@ def test_nan_in_nested_tuple(self):
         table = ht.PyObjectHashTable()
         table.set_item(nan1, 42)
         assert table.get_item(nan2) == 42
-        with pytest.raises(KeyError, match=None) as error:
+        with pytest.raises(KeyError, match=re.escape(repr(other))):
+            table.get_item(other)
+
+    def test_nan_in_namedtuple(self):
+        T = namedtuple("T", ["x"])
+        nan1 = T(float("nan"))
+        nan2 = T(float("nan"))
+        assert nan1.x is not nan2.x
+        table = ht.PyObjectHashTable()
+        table.set_item(nan1, 42)
+        assert table.get_item(nan2) == 42
+
+    def test_nan_in_nested_namedtuple(self):
+        T = namedtuple("T", ["x", "y"])
+        nan1 = T(1, (2, (float("nan"),)))
+        nan2 = T(1, (2, (float("nan"),)))
+        other = T(1, 2)
+        table = ht.PyObjectHashTable()
+        table.set_item(nan1, 42)
+        assert table.get_item(nan2) == 42
+        with pytest.raises(KeyError, match=re.escape(repr(other))):
             table.get_item(other)
-        assert str(error.value) == str(other)
 
 
 def test_hash_equal_tuple_with_nans():
@@ -448,6 +466,22 @@ def test_hash_equal_tuple_with_nans():
     assert ht.objects_are_equal(a, b)
 
 
+def test_hash_equal_namedtuple_with_nans():
+    T = namedtuple("T", ["x", "y"])
+    a = T(float("nan"), (float("nan"), float("nan")))
+    b = T(float("nan"), (float("nan"), float("nan")))
+    assert ht.object_hash(a) == ht.object_hash(b)
+    assert ht.objects_are_equal(a, b)
+
+
+def test_hash_equal_namedtuple_and_tuple():
+    T = namedtuple("T", ["x", "y"])
+    a = T(1, (2, 3))
+    b = (1, (2, 3))
+    assert ht.object_hash(a) == ht.object_hash(b)
+    assert ht.objects_are_equal(a, b)
+
+
 def test_get_labels_groupby_for_Int64(writable):
     table = ht.Int64HashTable()
     vals = np.array([1, 2, -1, 2, 1, -1], dtype=np.int64)