Skip to content

BUG: Enable Series.equals to compare numpy arrays to scalars #36161

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Sep 19, 2020
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ Numeric
^^^^^^^
- Bug in :func:`to_numeric` where float precision was incorrect (:issue:`31364`)
- Bug in :meth:`DataFrame.any` with ``axis=1`` and ``bool_only=True`` ignoring the ``bool_only`` keyword (:issue:`32432`)
- Bug in :meth:`Series.equals` where a ``ValueError`` was raised when numpy arrays were compared to scalars (:issue:`35267`)
-

Conversion
Expand Down
11 changes: 10 additions & 1 deletion pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,16 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool:
if "tz-naive and tz-aware" in str(err):
return False
raise

except ValueError:
# Avoid raising ValueError when comparing Numpy arrays to other types
if cnp.PyArray_IsAnyScalar(x) != cnp.PyArray_IsAnyScalar(y):
# Only compare scalars to scalars and non-scalars to non-scalars
return False
elif (not (cnp.PyArray_IsPythonScalar(x) or cnp.PyArray_IsPythonScalar(y))
and not (isinstance(x, type(y)) or isinstance(y, type(x)))):
# Check if non-scalars have the same type
return False
raise
return True


Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/dtypes/test_missing.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from contextlib import nullcontext
from datetime import datetime
from decimal import Decimal

Expand Down Expand Up @@ -383,6 +384,20 @@ def test_array_equivalent(dtype_equal):
assert not array_equivalent(DatetimeIndex([0, np.nan]), TimedeltaIndex([0, np.nan]))


@pytest.mark.parametrize(
"val", [1, 1.1, 1 + 1j, True, "abc", [1, 2], (1, 2), {1, 2}, {"a": 1}, None]
)
def test_array_equivalent_series(val):
arr = np.array([1, 2])
cm = (
tm.assert_produces_warning(FutureWarning, check_stacklevel=False)
if isinstance(val, str)
else nullcontext()
)
with cm:
assert not array_equivalent(Series([arr, arr]), Series([arr, val]))


def test_array_equivalent_different_dtype_but_equal():
# Unclear if this is exposed anywhere in the public-facing API
assert array_equivalent(np.array([1, 2]), np.array([1.0, 2.0]))
Expand Down
20 changes: 16 additions & 4 deletions pandas/tests/series/methods/test_equals.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from contextlib import nullcontext

import numpy as np
import pytest

from pandas import MultiIndex, Series
import pandas._testing as tm


@pytest.mark.parametrize(
Expand All @@ -24,16 +27,25 @@ def test_equals(arr, idx):
assert not s1.equals(s2)


def test_equals_list_array():
@pytest.mark.parametrize(
"val", [1, 1.1, 1 + 1j, True, "abc", [1, 2], (1, 2), {1, 2}, {"a": 1}, None]
)
def test_equals_list_array(val):
# GH20676 Verify equals operator for list of Numpy arrays
arr = np.array([1, 2])
s1 = Series([arr, arr])
s2 = s1.copy()
assert s1.equals(s2)

# TODO: Series equals should also work between single value and list
# s1[1] = 9
# assert not s1.equals(s2)
s1[1] = val

cm = (
tm.assert_produces_warning(FutureWarning, check_stacklevel=False)
if isinstance(val, str)
else nullcontext()
)
with cm:
assert not s1.equals(s2)


def test_equals_false_negative():
Expand Down