Skip to content

BUG/API: prohibit dtype-changing IntervalArray.__setitem__ #32782

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ Strings

Interval
^^^^^^^^
-
- Bug in :class:`IntervalArray` incorrectly allowing the underlying data to be changed when setting values (:issue:`32782`)
-

Indexing
Expand Down
12 changes: 6 additions & 6 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,19 +542,19 @@ def __setitem__(self, key, value):
msg = f"'value' should be an interval type, got {type(value)} instead."
raise TypeError(msg) from err

if needs_float_conversion:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we're raising here we can remove the future references that are no longer being used:

if needs_float_conversion:
left = left.astype("float")

if needs_float_conversion:
right = right.astype("float")

raise ValueError("Cannot set float NaN to integer-backed IntervalArray")

key = check_array_indexer(self, key)

# Need to ensure that left and right are updated atomically, so we're
# forced to copy, update the copy, and swap in the new values.
left = self.left.copy(deep=True)
if needs_float_conversion:
left = left.astype("float")
left.values[key] = value_left
left._values[key] = value_left
self._left = left

right = self.right.copy(deep=True)
if needs_float_conversion:
right = right.astype("float")
right.values[key] = value_right
right._values[key] = value_right
self._right = right

def __eq__(self, other):
Expand Down
13 changes: 10 additions & 3 deletions pandas/tests/arrays/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,13 @@ class TestSetitem:
def test_set_na(self, left_right_dtypes):
left, right = left_right_dtypes
result = IntervalArray.from_arrays(left, right)

if result.dtype.subtype.kind in ["i", "u"]:
msg = "Cannot set float NaN to integer-backed IntervalArray"
with pytest.raises(ValueError, match=msg):
result[0] = np.NaN
return

result[0] = np.nan

expected_left = Index([left._na_value] + list(left[1:]))
Expand Down Expand Up @@ -182,7 +189,7 @@ def test_arrow_array_missing():
import pyarrow as pa
from pandas.core.arrays._arrow_utils import ArrowIntervalType

arr = IntervalArray.from_breaks([0, 1, 2, 3])
arr = IntervalArray.from_breaks([0.0, 1.0, 2.0, 3.0])
arr[1] = None

result = pa.array(arr)
Expand All @@ -209,8 +216,8 @@ def test_arrow_array_missing():
@pyarrow_skip
@pytest.mark.parametrize(
"breaks",
[[0, 1, 2, 3], pd.date_range("2017", periods=4, freq="D")],
ids=["int", "datetime64[ns]"],
[[0.0, 1.0, 2.0, 3.0], pd.date_range("2017", periods=4, freq="D")],
ids=["float", "datetime64[ns]"],
)
def test_arrow_table_roundtrip(breaks):
import pyarrow as pa
Expand Down
9 changes: 8 additions & 1 deletion pandas/tests/series/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

from pandas.core.dtypes.common import is_interval_dtype

import pandas as pd
import pandas._testing as tm

Expand Down Expand Up @@ -266,7 +268,12 @@ def test_convert_dtypes(self, data, maindtype, params, answerdict):

# Test that it is a copy
copy = series.copy(deep=True)
ns[ns.notna()] = np.nan
if is_interval_dtype(ns.dtype) and ns.dtype.subtype.kind in ["i", "u"]:
msg = "Cannot set float NaN to integer-backed IntervalArray"
with pytest.raises(ValueError, match=msg):
ns[ns.notna()] = np.nan
else:
ns[ns.notna()] = np.nan

# Make sure original not changed
tm.assert_series_equal(series, copy)
Expand Down