From 35e1c23fb0811ab2ed394d18f5eef06e5cd83435 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Tue, 27 Sep 2022 01:00:43 +0200 Subject: [PATCH 1/2] Backport PR #48608: REGR: assert_index_equal raising with non matching pd.NA --- doc/source/whatsnew/v1.5.1.rst | 1 + pandas/_libs/testing.pyx | 4 ++++ pandas/_testing/asserters.py | 3 +++ pandas/tests/util/test_assert_index_equal.py | 12 ++++++++++++ 4 files changed, 20 insertions(+) diff --git a/doc/source/whatsnew/v1.5.1.rst b/doc/source/whatsnew/v1.5.1.rst index 6798c7074228c..509e30be96144 100644 --- a/doc/source/whatsnew/v1.5.1.rst +++ b/doc/source/whatsnew/v1.5.1.rst @@ -84,6 +84,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ - Bug in :meth:`DataFrame.to_hdf` raising ``AssertionError`` with boolean index (:issue:`48667`) +- Bug in :func:`assert_index_equal` for extension arrays with non matching ``NA`` raising ``ValueError`` (:issue:`48608`) - Bug in :meth:`DataFrame.pivot_table` raising unexpected ``FutureWarning`` when setting datetime column as index (:issue:`48683`) - diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index cfe9f40f12452..11d8fe6ebd29a 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -179,6 +179,10 @@ cpdef assert_almost_equal(a, b, # nan / None comparison return True + if isna(a) and not isna(b) or not isna(a) and isna(b): + # boolean value of pd.NA is ambigous + raise AssertionError(f"{a} != {b}") + if a == b: # object comparison return True diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 945639ef4b00a..f6af1c72dfb2f 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -398,6 +398,9 @@ def _get_ilevel_values(index, level): if not left.equals(right): mismatch = left._values != right._values + if is_extension_array_dtype(mismatch): + mismatch = cast("ExtensionArray", mismatch).fillna(True) + diff = np.sum(mismatch.astype(int)) * 100.0 / len(left) msg = f"{obj} values are different ({np.round(diff, 5)} %)" raise_assert_detail(obj, msg, left, right) diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index 1fa7b979070a7..177b88b2c93dd 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -264,3 +264,15 @@ def test_assert_index_equal_object_ints_order_false(): idx1 = Index([1, 3], dtype="object") idx2 = Index([3, 1], dtype="object") tm.assert_index_equal(idx1, idx2, check_order=False) + + +@pytest.mark.parametrize("check_categorical", [True, False]) +@pytest.mark.parametrize("check_names", [True, False]) +def test_assert_ea_index_equal_non_matching_na(check_names, check_categorical): + # GH#48608 + idx1 = Index([1, 2], dtype="Int64") + idx2 = Index([1, NA], dtype="Int64") + with pytest.raises(AssertionError, match="50.0 %"): + tm.assert_index_equal( + idx1, idx2, check_names=check_names, check_categorical=check_categorical + ) From 2252f3e6ccb442374936aff4b0160f16ddab0458 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Tue, 27 Sep 2022 12:24:22 +0200 Subject: [PATCH 2/2] Update test_assert_index_equal.py --- pandas/tests/util/test_assert_index_equal.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index 177b88b2c93dd..8c7f364b03e53 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -2,6 +2,7 @@ import pytest from pandas import ( + NA, Categorical, CategoricalIndex, Index,