Skip to content

Commit 6069132

Browse files
authored
TST: implement xfailed test for #18415 (#45290)
1 parent 76747f9 commit 6069132

File tree

2 files changed

+183
-138
lines changed

2 files changed

+183
-138
lines changed
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
"""
2+
Tests for values coercion in setitem-like operations on DataFrame.
3+
4+
For the most part, these should be multi-column DataFrames, otherwise
5+
we would share the tests with Series.
6+
"""
7+
import numpy as np
8+
import pytest
9+
10+
import pandas as pd
11+
from pandas import (
12+
DataFrame,
13+
MultiIndex,
14+
NaT,
15+
Series,
16+
Timestamp,
17+
date_range,
18+
)
19+
import pandas._testing as tm
20+
21+
22+
class TestDataFrameSetitemCoercion:
23+
@pytest.mark.xfail(reason="Unnecessary cast.")
24+
@pytest.mark.parametrize("consolidate", [True, False])
25+
def test_loc_setitem_multiindex_columns(self, consolidate):
26+
# GH#18415 Setting values in a single column preserves dtype,
27+
# while setting them in multiple columns did unwanted cast.
28+
29+
# Note that A here has 2 blocks, below we do the same thing
30+
# with a consolidated frame.
31+
A = DataFrame(np.zeros((6, 5), dtype=np.float32))
32+
A = pd.concat([A, A], axis=1, keys=[1, 2])
33+
if consolidate:
34+
A = A._consolidate()
35+
36+
A.loc[2:3, (1, slice(2, 3))] = np.ones((2, 2), dtype=np.float32)
37+
assert (A.dtypes == np.float32).all()
38+
39+
A.loc[0:5, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32)
40+
assert (A.dtypes == np.float32).all()
41+
42+
A.loc[:, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32)
43+
assert (A.dtypes == np.float32).all()
44+
45+
# TODO: i think this isn't about MultiIndex and could be done with iloc?
46+
47+
48+
def test_37477():
49+
# fixed by GH#45121
50+
orig = DataFrame({"A": [1, 2, 3], "B": [3, 4, 5]})
51+
expected = DataFrame({"A": [1, 2, 3], "B": [3, 1.2, 5]})
52+
53+
df = orig.copy()
54+
df.at[1, "B"] = 1.2
55+
tm.assert_frame_equal(df, expected)
56+
57+
df = orig.copy()
58+
df.loc[1, "B"] = 1.2
59+
tm.assert_frame_equal(df, expected)
60+
61+
df = orig.copy()
62+
df.iat[1, 1] = 1.2
63+
tm.assert_frame_equal(df, expected)
64+
65+
df = orig.copy()
66+
df.iloc[1, 1] = 1.2
67+
tm.assert_frame_equal(df, expected)
68+
69+
70+
def test_6942(indexer_al):
71+
# check that the .at __setitem__ after setting "Live" actually sets the data
72+
start = Timestamp("2014-04-01")
73+
t1 = Timestamp("2014-04-23 12:42:38.883082")
74+
t2 = Timestamp("2014-04-24 01:33:30.040039")
75+
76+
dti = date_range(start, periods=1)
77+
orig = DataFrame(index=dti, columns=["timenow", "Live"])
78+
79+
df = orig.copy()
80+
indexer_al(df)[start, "timenow"] = t1
81+
82+
df["Live"] = True
83+
84+
df.at[start, "timenow"] = t2
85+
assert df.iloc[0, 0] == t2
86+
87+
88+
def test_26395(indexer_al):
89+
# .at case fixed by GH#45121 (best guess)
90+
df = DataFrame(index=["A", "B", "C"])
91+
df["D"] = 0
92+
93+
indexer_al(df)["C", "D"] = 2
94+
expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64)
95+
tm.assert_frame_equal(df, expected)
96+
97+
indexer_al(df)["C", "D"] = 44.5
98+
expected = DataFrame({"D": [0, 0, 44.5]}, index=["A", "B", "C"], dtype=np.float64)
99+
tm.assert_frame_equal(df, expected)
100+
101+
indexer_al(df)["C", "D"] = "hello"
102+
expected = DataFrame({"D": [0, 0, "hello"]}, index=["A", "B", "C"], dtype=object)
103+
tm.assert_frame_equal(df, expected)
104+
105+
106+
@pytest.mark.xfail(reason="unwanted upcast")
107+
def test_15231():
108+
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
109+
df.loc[2] = Series({"a": 5, "b": 6})
110+
assert (df.dtypes == np.int64).all()
111+
112+
df.loc[3] = Series({"a": 7})
113+
114+
# df["a"] doesn't have any NaNs, should not have been cast
115+
exp_dtypes = Series([np.int64, np.float64], dtype=object, index=["a", "b"])
116+
tm.assert_series_equal(df.dtypes, exp_dtypes)
117+
118+
119+
@pytest.mark.xfail(reason="Unnecessarily upcasts to float64")
120+
def test_iloc_setitem_unnecesssary_float_upcasting():
121+
# GH#12255
122+
df = DataFrame(
123+
{
124+
0: np.array([1, 3], dtype=np.float32),
125+
1: np.array([2, 4], dtype=np.float32),
126+
2: ["a", "b"],
127+
}
128+
)
129+
orig = df.copy()
130+
131+
values = df[0].values.reshape(2, 1)
132+
df.iloc[:, 0:1] = values
133+
134+
tm.assert_frame_equal(df, orig)
135+
136+
137+
@pytest.mark.xfail(reason="unwanted casting to dt64")
138+
def test_12499():
139+
# TODO: OP in GH#12499 used np.datetim64("NaT") instead of pd.NaT,
140+
# which has consequences for the expected df["two"] (though i think at
141+
# the time it might not have because of a separate bug). See if it makes
142+
# a difference which one we use here.
143+
ts = Timestamp("2016-03-01 03:13:22.98986", tz="UTC")
144+
145+
data = [{"one": 0, "two": ts}]
146+
orig = DataFrame(data)
147+
df = orig.copy()
148+
df.loc[1] = [np.nan, NaT]
149+
150+
expected = DataFrame(
151+
{"one": [0, np.nan], "two": Series([ts, NaT], dtype="datetime64[ns, UTC]")}
152+
)
153+
tm.assert_frame_equal(df, expected)
154+
155+
data = [{"one": 0, "two": ts}]
156+
df = orig.copy()
157+
df.loc[1, :] = [np.nan, NaT]
158+
tm.assert_frame_equal(df, expected)
159+
160+
161+
@pytest.mark.xfail(reason="Too many columns cast to float64")
162+
def test_20476():
163+
mi = MultiIndex.from_product([["A", "B"], ["a", "b", "c"]])
164+
df = DataFrame(-1, index=range(3), columns=mi)
165+
filler = DataFrame([[1, 2, 3.0]] * 3, index=range(3), columns=["a", "b", "c"])
166+
df["A"] = filler
167+
168+
expected = DataFrame(
169+
{
170+
0: [1, 1, 1],
171+
1: [2, 2, 2],
172+
2: [3.0, 3.0, 3.0],
173+
3: [-1, -1, -1],
174+
4: [-1, -1, -1],
175+
5: [-1, -1, -1],
176+
}
177+
)
178+
expected.columns = mi
179+
exp_dtypes = Series(
180+
[np.dtype(np.int64)] * 2 + [np.dtype(np.float64)] + [np.dtype(np.int64)] * 3,
181+
index=mi,
182+
)
183+
tm.assert_series_equal(df.dtypes, exp_dtypes)

pandas/tests/series/indexing/test_setitem.py

Lines changed: 0 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -1360,28 +1360,6 @@ def test_15413():
13601360
tm.assert_series_equal(ser, expected)
13611361

13621362

1363-
def test_37477():
1364-
# fixed by GH#45121
1365-
orig = DataFrame({"A": [1, 2, 3], "B": [3, 4, 5]})
1366-
expected = DataFrame({"A": [1, 2, 3], "B": [3, 1.2, 5]})
1367-
1368-
df = orig.copy()
1369-
df.at[1, "B"] = 1.2
1370-
tm.assert_frame_equal(df, expected)
1371-
1372-
df = orig.copy()
1373-
df.loc[1, "B"] = 1.2
1374-
tm.assert_frame_equal(df, expected)
1375-
1376-
df = orig.copy()
1377-
df.iat[1, 1] = 1.2
1378-
tm.assert_frame_equal(df, expected)
1379-
1380-
df = orig.copy()
1381-
df.iloc[1, 1] = 1.2
1382-
tm.assert_frame_equal(df, expected)
1383-
1384-
13851363
def test_32878_int_itemsize():
13861364
# Fixed by GH#45121
13871365
arr = np.arange(5).astype("i4")
@@ -1392,24 +1370,6 @@ def test_32878_int_itemsize():
13921370
tm.assert_series_equal(ser, expected)
13931371

13941372

1395-
def test_26395(indexer_al):
1396-
# .at case fixed by GH#45121 (best guess)
1397-
df = DataFrame(index=["A", "B", "C"])
1398-
df["D"] = 0
1399-
1400-
indexer_al(df)["C", "D"] = 2
1401-
expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64)
1402-
tm.assert_frame_equal(df, expected)
1403-
1404-
indexer_al(df)["C", "D"] = 44.5
1405-
expected = DataFrame({"D": [0, 0, 44.5]}, index=["A", "B", "C"], dtype=np.float64)
1406-
tm.assert_frame_equal(df, expected)
1407-
1408-
indexer_al(df)["C", "D"] = "hello"
1409-
expected = DataFrame({"D": [0, 0, "hello"]}, index=["A", "B", "C"], dtype=object)
1410-
tm.assert_frame_equal(df, expected)
1411-
1412-
14131373
def test_37692(indexer_al):
14141374
# GH#37692
14151375
ser = Series([1, 2, 3], index=["a", "b", "c"])
@@ -1442,24 +1402,6 @@ def test_setitem_bool_int_float_consistency(indexer_sli):
14421402
indexer_sli(ser)[0] = np.int64(1)
14431403

14441404

1445-
def test_6942(indexer_al):
1446-
# check that the .at __setitem__ after setting "Live" actually sets the data
1447-
start = Timestamp("2014-04-01")
1448-
t1 = Timestamp("2014-04-23 12:42:38.883082")
1449-
t2 = Timestamp("2014-04-24 01:33:30.040039")
1450-
1451-
dti = date_range(start, periods=1)
1452-
orig = DataFrame(index=dti, columns=["timenow", "Live"])
1453-
1454-
df = orig.copy()
1455-
indexer_al(df)[start, "timenow"] = t1
1456-
1457-
df["Live"] = True
1458-
1459-
df.at[start, "timenow"] = t2
1460-
assert df.iloc[0, 0] == t2
1461-
1462-
14631405
def test_setitem_positional_with_casting():
14641406
# GH#45070 case where in __setitem__ we get a KeyError, then when
14651407
# we fallback we *also* get a ValueError if we try to set inplace.
@@ -1479,19 +1421,6 @@ def test_setitem_positional_float_into_int_coerces():
14791421
tm.assert_series_equal(ser, expected)
14801422

14811423

1482-
@pytest.mark.xfail(reason="unwanted upcast")
1483-
def test_15231():
1484-
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
1485-
df.loc[2] = Series({"a": 5, "b": 6})
1486-
assert (df.dtypes == np.int64).all()
1487-
1488-
df.loc[3] = Series({"a": 7})
1489-
1490-
# df["a"] doesn't have any NaNs, should not have been cast
1491-
exp_dtypes = Series([np.int64, np.float64], dtype=object, index=["a", "b"])
1492-
tm.assert_series_equal(df.dtypes, exp_dtypes)
1493-
1494-
14951424
@pytest.mark.xfail(reason="Fails to upcast")
14961425
def test_32878_complex_itemsize():
14971426
# TODO: when fixed, put adjacent to test_32878_int_itemsize
@@ -1507,73 +1436,6 @@ def test_32878_complex_itemsize():
15071436
tm.assert_series_equal(ser, expected)
15081437

15091438

1510-
@pytest.mark.xfail(reason="Unnecessarily upcasts to float64")
1511-
def test_iloc_setitem_unnecesssary_float_upcasting():
1512-
# GH#12255
1513-
df = DataFrame(
1514-
{
1515-
0: np.array([1, 3], dtype=np.float32),
1516-
1: np.array([2, 4], dtype=np.float32),
1517-
2: ["a", "b"],
1518-
}
1519-
)
1520-
orig = df.copy()
1521-
1522-
values = df[0].values.reshape(2, 1)
1523-
df.iloc[:, 0:1] = values
1524-
1525-
tm.assert_frame_equal(df, orig)
1526-
1527-
1528-
@pytest.mark.xfail(reason="unwanted casting to dt64")
1529-
def test_12499():
1530-
# TODO: OP in GH#12499 used np.datetim64("NaT") instead of pd.NaT,
1531-
# which has consequences for the expected df["two"] (though i think at
1532-
# the time it might not have because of a separate bug). See if it makes
1533-
# a difference which one we use here.
1534-
ts = Timestamp("2016-03-01 03:13:22.98986", tz="UTC")
1535-
1536-
data = [{"one": 0, "two": ts}]
1537-
orig = DataFrame(data)
1538-
df = orig.copy()
1539-
df.loc[1] = [np.nan, NaT]
1540-
1541-
expected = DataFrame(
1542-
{"one": [0, np.nan], "two": Series([ts, NaT], dtype="datetime64[ns, UTC]")}
1543-
)
1544-
tm.assert_frame_equal(df, expected)
1545-
1546-
data = [{"one": 0, "two": ts}]
1547-
df = orig.copy()
1548-
df.loc[1, :] = [np.nan, NaT]
1549-
tm.assert_frame_equal(df, expected)
1550-
1551-
1552-
@pytest.mark.xfail(reason="Too many columns cast to float64")
1553-
def test_20476():
1554-
mi = MultiIndex.from_product([["A", "B"], ["a", "b", "c"]])
1555-
df = DataFrame(-1, index=range(3), columns=mi)
1556-
filler = DataFrame([[1, 2, 3.0]] * 3, index=range(3), columns=["a", "b", "c"])
1557-
df["A"] = filler
1558-
1559-
expected = DataFrame(
1560-
{
1561-
0: [1, 1, 1],
1562-
1: [2, 2, 2],
1563-
2: [3.0, 3.0, 3.0],
1564-
3: [-1, -1, -1],
1565-
4: [-1, -1, -1],
1566-
5: [-1, -1, -1],
1567-
}
1568-
)
1569-
expected.columns = mi
1570-
exp_dtypes = Series(
1571-
[np.dtype(np.int64)] * 2 + [np.dtype(np.float64)] + [np.dtype(np.int64)] * 3,
1572-
index=mi,
1573-
)
1574-
tm.assert_series_equal(df.dtypes, exp_dtypes)
1575-
1576-
15771439
def test_setitem_int_as_positional_fallback_deprecation():
15781440
# GH#42215 deprecated falling back to positional on __setitem__ with an
15791441
# int not contained in the index

0 commit comments

Comments
 (0)