From 9bd42bf99a0bfdefbf7b2f64f62bcb3e87de3789 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Sun, 6 Sep 2020 14:29:40 +0200 Subject: [PATCH 01/10] BUG: Enable Series.equals to compare numpy arrays to scalars --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/_libs/lib.pyx | 7 +++++++ pandas/tests/series/methods/test_equals.py | 5 ++--- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index bce6a735b7b07..7b736b768456d 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -247,6 +247,7 @@ Numeric ^^^^^^^ - Bug in :func:`to_numeric` where float precision was incorrect (:issue:`31364`) - Bug in :meth:`DataFrame.any` with ``axis=1`` and ``bool_only=True`` ignoring the ``bool_only`` keyword (:issue:`32432`) +- Bug in :meth: `Series.equals` where a ``ValueError`` was raised when iterables were compared to non-iterables (:issue:`35267`) - Conversion diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 7464fafee2b94..bda45557ba351 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -581,6 +581,13 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool: return False elif (x is C_NA) ^ (y is C_NA): return False + # Only compare scalars to scalars and arrays to arrays + elif cnp.PyArray_IsAnyScalar(x) != cnp.PyArray_IsAnyScalar(y): + return False + # Check if arrays have the same type + elif not (cnp.PyArray_IsPythonScalar(x) or cnp.PyArray_IsPythonScalar(y)) \ + and not (isinstance(x, type(y)) or isinstance(y, type(x))): + return False elif not (PyObject_RichCompareBool(x, y, Py_EQ) or (x is None or is_nan(x)) and (y is None or is_nan(y))): return False diff --git a/pandas/tests/series/methods/test_equals.py b/pandas/tests/series/methods/test_equals.py index 600154adfcda3..5c9f5698632f3 100644 --- a/pandas/tests/series/methods/test_equals.py +++ b/pandas/tests/series/methods/test_equals.py @@ -31,9 +31,8 @@ def test_equals_list_array(): s2 = s1.copy() assert s1.equals(s2) - # TODO: Series equals should also work between single value and list - # s1[1] = 9 - # assert not s1.equals(s2) + s1[1] = 9 + assert not s1.equals(s2) def test_equals_false_negative(): From c84a39d03ec9561f28b7731ad44fe488d9347b6d Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Wed, 9 Sep 2020 18:52:32 +0200 Subject: [PATCH 02/10] Refactor condition with parentheses instead of slash --- pandas/_libs/lib.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index bda45557ba351..5db19c4950de7 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -585,8 +585,8 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool: elif cnp.PyArray_IsAnyScalar(x) != cnp.PyArray_IsAnyScalar(y): return False # Check if arrays have the same type - elif not (cnp.PyArray_IsPythonScalar(x) or cnp.PyArray_IsPythonScalar(y)) \ - and not (isinstance(x, type(y)) or isinstance(y, type(x))): + elif (not (cnp.PyArray_IsPythonScalar(x) or cnp.PyArray_IsPythonScalar(y)) + and not (isinstance(x, type(y)) or isinstance(y, type(x)))): return False elif not (PyObject_RichCompareBool(x, y, Py_EQ) or (x is None or is_nan(x)) and (y is None or is_nan(y))): From 8b26aec577755d3a41a53514918bcae1b3543fcb Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Thu, 10 Sep 2020 20:55:59 +0200 Subject: [PATCH 03/10] Update whatsnew message --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 7b736b768456d..81577d1c0205d 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -247,7 +247,7 @@ Numeric ^^^^^^^ - Bug in :func:`to_numeric` where float precision was incorrect (:issue:`31364`) - Bug in :meth:`DataFrame.any` with ``axis=1`` and ``bool_only=True`` ignoring the ``bool_only`` keyword (:issue:`32432`) -- Bug in :meth: `Series.equals` where a ``ValueError`` was raised when iterables were compared to non-iterables (:issue:`35267`) +- Bug in :meth:`Series.equals` where a ``ValueError`` was raised when numpy arrays were compared to scalars (:issue:`35267`) - Conversion From 6367f9a35f418b9445bbbcf75b36235f856deaf5 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Thu, 10 Sep 2020 23:26:10 +0200 Subject: [PATCH 04/10] Extend tests in test_equals.py with different types --- pandas/tests/series/methods/test_equals.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/methods/test_equals.py b/pandas/tests/series/methods/test_equals.py index 5c9f5698632f3..a229c7a9da63e 100644 --- a/pandas/tests/series/methods/test_equals.py +++ b/pandas/tests/series/methods/test_equals.py @@ -24,14 +24,17 @@ def test_equals(arr, idx): assert not s1.equals(s2) -def test_equals_list_array(): +@pytest.mark.parametrize( + "val", [1, 1.1, 1 + 1j, True, "abc", [1, 2], (1, 2), {1, 2}, {"a": 1}, None] +) +def test_equals_list_array(val): # GH20676 Verify equals operator for list of Numpy arrays arr = np.array([1, 2]) s1 = Series([arr, arr]) s2 = s1.copy() assert s1.equals(s2) - s1[1] = 9 + s1[1] = val assert not s1.equals(s2) From e94db6dbf03710494622223d179fcb517f337bfb Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Fri, 11 Sep 2020 00:14:50 +0200 Subject: [PATCH 05/10] Add tests for array_equivalent with Series --- pandas/tests/dtypes/test_missing.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index a642b23379c6f..5468ea9535fc6 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -383,6 +383,14 @@ def test_array_equivalent(dtype_equal): assert not array_equivalent(DatetimeIndex([0, np.nan]), TimedeltaIndex([0, np.nan])) +@pytest.mark.parametrize( + "val", [1, 1.1, 1 + 1j, True, "abc", [1, 2], (1, 2), {1, 2}, {"a": 1}, None] +) +def test_array_equivalent_series(val): + arr = np.array([1, 2]) + assert not array_equivalent(Series([arr, arr]), Series([arr, val])) + + def test_array_equivalent_different_dtype_but_equal(): # Unclear if this is exposed anywhere in the public-facing API assert array_equivalent(np.array([1, 2]), np.array([1.0, 2.0])) From dc671380a8f0a1f903df6cc0cb8091e20b934ed5 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Sun, 13 Sep 2020 19:53:16 +0200 Subject: [PATCH 06/10] Reword comments and put comments indented inside conditions --- pandas/_libs/lib.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 5db19c4950de7..a9fb6ba9cbd3c 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -581,12 +581,12 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool: return False elif (x is C_NA) ^ (y is C_NA): return False - # Only compare scalars to scalars and arrays to arrays elif cnp.PyArray_IsAnyScalar(x) != cnp.PyArray_IsAnyScalar(y): + # Only compare scalars to scalars and non-scalars to non-scalars return False - # Check if arrays have the same type elif (not (cnp.PyArray_IsPythonScalar(x) or cnp.PyArray_IsPythonScalar(y)) and not (isinstance(x, type(y)) or isinstance(y, type(x)))): + # Check if non-scalars have the same type return False elif not (PyObject_RichCompareBool(x, y, Py_EQ) or (x is None or is_nan(x)) and (y is None or is_nan(y))): From 61a58a9d2684529a5dd8c51045a301f274cb2d06 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Sat, 19 Sep 2020 14:14:48 +0200 Subject: [PATCH 07/10] Put checks to prevent ValueError in try/except clause --- pandas/_libs/lib.pyx | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index a9fb6ba9cbd3c..09eafd27e6050 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -581,13 +581,6 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool: return False elif (x is C_NA) ^ (y is C_NA): return False - elif cnp.PyArray_IsAnyScalar(x) != cnp.PyArray_IsAnyScalar(y): - # Only compare scalars to scalars and non-scalars to non-scalars - return False - elif (not (cnp.PyArray_IsPythonScalar(x) or cnp.PyArray_IsPythonScalar(y)) - and not (isinstance(x, type(y)) or isinstance(y, type(x)))): - # Check if non-scalars have the same type - return False elif not (PyObject_RichCompareBool(x, y, Py_EQ) or (x is None or is_nan(x)) and (y is None or is_nan(y))): return False @@ -598,7 +591,15 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool: if "tz-naive and tz-aware" in str(err): return False raise - + except ValueError: + if cnp.PyArray_IsAnyScalar(x) != cnp.PyArray_IsAnyScalar(y): + # Only compare scalars to scalars and non-scalars to non-scalars + return False + elif (not (cnp.PyArray_IsPythonScalar(x) or cnp.PyArray_IsPythonScalar(y)) + and not (isinstance(x, type(y)) or isinstance(y, type(x)))): + # Check if non-scalars have the same type + return False + raise return True From 993cd932187d51cddc75b8242989a6e76134c823 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Sat, 19 Sep 2020 15:54:42 +0200 Subject: [PATCH 08/10] Assert FutureWarning that is thrown when comparing strings --- pandas/tests/dtypes/test_missing.py | 14 +++++++++++++- pandas/tests/series/methods/test_equals.py | 15 ++++++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 5468ea9535fc6..f3a33755589ba 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -24,6 +24,12 @@ from pandas import DatetimeIndex, Float64Index, NaT, Series, TimedeltaIndex, date_range import pandas._testing as tm +try: + from contextlib import nullcontext +except ImportError: + from contextlib import ExitStack as nullcontext # Py3.6. + + now = pd.Timestamp.now() utcnow = pd.Timestamp.now("UTC") @@ -388,7 +394,13 @@ def test_array_equivalent(dtype_equal): ) def test_array_equivalent_series(val): arr = np.array([1, 2]) - assert not array_equivalent(Series([arr, arr]), Series([arr, val])) + cm = ( + tm.assert_produces_warning(FutureWarning, check_stacklevel=False) + if isinstance(val, str) + else nullcontext() + ) + with cm: + assert not array_equivalent(Series([arr, arr]), Series([arr, val])) def test_array_equivalent_different_dtype_but_equal(): diff --git a/pandas/tests/series/methods/test_equals.py b/pandas/tests/series/methods/test_equals.py index a229c7a9da63e..9b5a0d3ed104e 100644 --- a/pandas/tests/series/methods/test_equals.py +++ b/pandas/tests/series/methods/test_equals.py @@ -1,7 +1,13 @@ +try: + from contextlib import nullcontext +except ImportError: + from contextlib import ExitStack as nullcontext # Py3.6. + import numpy as np import pytest from pandas import MultiIndex, Series +import pandas._testing as tm @pytest.mark.parametrize( @@ -35,7 +41,14 @@ def test_equals_list_array(val): assert s1.equals(s2) s1[1] = val - assert not s1.equals(s2) + + cm = ( + tm.assert_produces_warning(FutureWarning, check_stacklevel=False) + if isinstance(val, str) + else nullcontext() + ) + with cm: + assert not s1.equals(s2) def test_equals_false_negative(): From 98c580f88a4fc095cd6e84520f50b653466dc4ba Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Sat, 19 Sep 2020 15:58:22 +0200 Subject: [PATCH 09/10] Add comment for try/except clause --- pandas/_libs/lib.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 09eafd27e6050..c24ac39becd8c 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -592,6 +592,7 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool: return False raise except ValueError: + # Avoid raising ValueError when comparing Numpy arrays to other types if cnp.PyArray_IsAnyScalar(x) != cnp.PyArray_IsAnyScalar(y): # Only compare scalars to scalars and non-scalars to non-scalars return False From 3b1d7883f7990e635cb26ecf98be0fae61cc8cd7 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Sat, 19 Sep 2020 16:38:18 +0200 Subject: [PATCH 10/10] Move import statement to right place and dont support import python3.6 and lower --- pandas/tests/dtypes/test_missing.py | 7 +------ pandas/tests/series/methods/test_equals.py | 5 +---- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index f3a33755589ba..046b82ef3131a 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -1,3 +1,4 @@ +from contextlib import nullcontext from datetime import datetime from decimal import Decimal @@ -24,12 +25,6 @@ from pandas import DatetimeIndex, Float64Index, NaT, Series, TimedeltaIndex, date_range import pandas._testing as tm -try: - from contextlib import nullcontext -except ImportError: - from contextlib import ExitStack as nullcontext # Py3.6. - - now = pd.Timestamp.now() utcnow = pd.Timestamp.now("UTC") diff --git a/pandas/tests/series/methods/test_equals.py b/pandas/tests/series/methods/test_equals.py index 9b5a0d3ed104e..cf55482fefe22 100644 --- a/pandas/tests/series/methods/test_equals.py +++ b/pandas/tests/series/methods/test_equals.py @@ -1,7 +1,4 @@ -try: - from contextlib import nullcontext -except ImportError: - from contextlib import ExitStack as nullcontext # Py3.6. +from contextlib import nullcontext import numpy as np import pytest