From 47202dd881397b66663ebf115c85d6221e698c79 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 26 Feb 2020 17:53:55 -0800 Subject: [PATCH 1/3] BUG: disallow changing IntervalArray backing data --- pandas/core/arrays/interval.py | 4 ++++ pandas/tests/arrays/interval/test_interval.py | 7 +++++++ pandas/tests/series/methods/test_convert_dtypes.py | 9 ++++++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 398ed75c060ca..71065e1c5d0a7 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -541,7 +541,11 @@ def __setitem__(self, key, value): msg = f"'value' should be an interval type, got {type(value)} instead." raise TypeError(msg) + if needs_float_conversion: + raise ValueError("Cannot set float values for integer-backed IntervalArray") + key = check_array_indexer(self, key) + # Need to ensure that left and right are updated atomically, so we're # forced to copy, update the copy, and swap in the new values. left = self.left.copy(deep=True) diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index 35eda4a0ec5bc..c18179b1d39e2 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -86,6 +86,13 @@ class TestSetitem: def test_set_na(self, left_right_dtypes): left, right = left_right_dtypes result = IntervalArray.from_arrays(left, right) + + if result.dtype.subtype.kind in ["i", "u"]: + msg = "Cannot set float values for integer-backed IntervalArray" + with pytest.raises(ValueError, match=msg): + result[0] = np.NaN + return + result[0] = np.nan expected_left = Index([left._na_value] + list(left[1:])) diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 923b5a94c5f41..4cc4096c46e74 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas.core.dtypes.common import is_interval_dtype + import pandas as pd import pandas._testing as tm @@ -242,7 +244,12 @@ def test_convert_dtypes(self, data, maindtype, params, answerdict): # Test that it is a copy copy = series.copy(deep=True) - ns[ns.notna()] = np.nan + if is_interval_dtype(ns.dtype) and ns.dtype.subtype.kind in ["i", "u"]: + msg = "Cannot set float values for integer-backed IntervalArray" + with pytest.raises(ValueError, match=msg): + ns[ns.notna()] = np.nan + else: + ns[ns.notna()] = np.nan # Make sure original not changed tm.assert_series_equal(series, copy) From 23b6b935c6ad7f2f24adeb8379d8524c1404dc1f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 17 Mar 2020 18:34:50 -0700 Subject: [PATCH 2/3] update tests --- pandas/tests/arrays/interval/test_interval.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index 72f1e3b952dd1..d939e2439ba9c 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -189,7 +189,7 @@ def test_arrow_array_missing(): import pyarrow as pa from pandas.core.arrays._arrow_utils import ArrowIntervalType - arr = IntervalArray.from_breaks([0, 1, 2, 3]) + arr = IntervalArray.from_breaks([0.0, 1.0, 2.0, 3.0]) arr[1] = None result = pa.array(arr) @@ -216,8 +216,8 @@ def test_arrow_array_missing(): @pyarrow_skip @pytest.mark.parametrize( "breaks", - [[0, 1, 2, 3], pd.date_range("2017", periods=4, freq="D")], - ids=["int", "datetime64[ns]"], + [[0.0, 1.0, 2.0, 3.0], pd.date_range("2017", periods=4, freq="D")], + ids=["float", "datetime64[ns]"], ) def test_arrow_table_roundtrip(breaks): import pyarrow as pa From 934dabed5fadca144b840bd6237256778f15aab1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 5 Apr 2020 19:03:43 -0700 Subject: [PATCH 3/3] whatsnew, exception message --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/core/arrays/interval.py | 10 +++------- pandas/tests/arrays/interval/test_interval.py | 2 +- pandas/tests/series/methods/test_convert_dtypes.py | 2 +- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index d283d4450e6bf..bcfa71b2c837d 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -344,7 +344,7 @@ Strings Interval ^^^^^^^^ -- +- Bug in :class:`IntervalArray` incorrectly allowing the underlying data to be changed when setting values (:issue:`32782`) - Indexing diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 12c75eae8019f..220b70ff71b28 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -543,22 +543,18 @@ def __setitem__(self, key, value): raise TypeError(msg) from err if needs_float_conversion: - raise ValueError("Cannot set float values for integer-backed IntervalArray") + raise ValueError("Cannot set float NaN to integer-backed IntervalArray") key = check_array_indexer(self, key) # Need to ensure that left and right are updated atomically, so we're # forced to copy, update the copy, and swap in the new values. left = self.left.copy(deep=True) - if needs_float_conversion: - left = left.astype("float") - left.values[key] = value_left + left._values[key] = value_left self._left = left right = self.right.copy(deep=True) - if needs_float_conversion: - right = right.astype("float") - right.values[key] = value_right + right._values[key] = value_right self._right = right def __eq__(self, other): diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index d939e2439ba9c..fef11f0ff3bb2 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -106,7 +106,7 @@ def test_set_na(self, left_right_dtypes): result = IntervalArray.from_arrays(left, right) if result.dtype.subtype.kind in ["i", "u"]: - msg = "Cannot set float values for integer-backed IntervalArray" + msg = "Cannot set float NaN to integer-backed IntervalArray" with pytest.raises(ValueError, match=msg): result[0] = np.NaN return diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 1376570b7cfa3..dd4bf642e68e8 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -269,7 +269,7 @@ def test_convert_dtypes(self, data, maindtype, params, answerdict): # Test that it is a copy copy = series.copy(deep=True) if is_interval_dtype(ns.dtype) and ns.dtype.subtype.kind in ["i", "u"]: - msg = "Cannot set float values for integer-backed IntervalArray" + msg = "Cannot set float NaN to integer-backed IntervalArray" with pytest.raises(ValueError, match=msg): ns[ns.notna()] = np.nan else: