Skip to content

BUG: Bug in DataFrame.loc for setitem changed dtype when setting more than one column #37798

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ Indexing
^^^^^^^^
- Bug in :meth:`CategoricalIndex.get_indexer` failing to raise ``InvalidIndexError`` when non-unique (:issue:`38372`)
- Bug in inserting many new columns into a :class:`DataFrame` causing incorrect subsequent indexing behavior (:issue:`38380`)
- Bug in :meth:`DataFrame.loc` converted ``float32`` to ``float64`` when setting values for more than one column (:issue:`18415`)
- Bug in :meth:`DataFrame.iloc.__setitem__` and :meth:`DataFrame.loc.__setitem__` with mixed dtypes when setting with a dictionary value (:issue:`38335`)
-
-
Expand Down
8 changes: 6 additions & 2 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1728,15 +1728,19 @@ def _setitem_with_indexer_2d_value(self, indexer, value):
ilocs = self._ensure_iterable_column_indexer(indexer[1])

# GH#7551 Note that this coerces the dtype if we are mixed
value = np.array(value, dtype=object)
if not isinstance(value, np.ndarray):
value = np.array(value, dtype=object)
if len(ilocs) != value.shape[1]:
raise ValueError(
"Must have equal len keys and value when setting with an ndarray"
)

for i, loc in enumerate(ilocs):
# setting with a list, re-coerces
self._setitem_single_column(loc, value[:, i].tolist(), pi)
values = value[:, i]
if value.dtype == "object":
values = values.tolist()
self._setitem_single_column(loc, values, pi)

def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame", name: str):
ilocs = self._ensure_iterable_column_indexer(indexer[1])
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
DataFrame,
Index,
Interval,
MultiIndex,
NaT,
Period,
PeriodIndex,
Expand Down Expand Up @@ -324,6 +325,14 @@ def test_setitem_complete_column_with_array(self):
)
tm.assert_frame_equal(df, expected)

def test_loc_setitem_complete_column_float32(self):
# GH#18415
mi = MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2)])
df = DataFrame([[0, 0, 0], [0, 0, 0]], dtype=np.float32, columns=mi)
df.loc[:, (1, slice(2, 3))] = np.ones((2, 2), dtype=np.float32)
expected = DataFrame([[1, 1, 0], [1, 1, 0]], dtype=np.float32, columns=mi)
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize("dtype", ["f8", "i8", "u8"])
def test_setitem_bool_with_numeric_index(self, dtype):
# GH#36319
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -962,12 +962,14 @@ def test_margins_dtype(self):
# GH 17013

df = self.data.copy()
df[["D", "E", "F"]] = np.arange(len(df) * 3).reshape(len(df), 3)
df[["D", "E", "F"]] = np.arange(len(df) * 3, dtype="int64").reshape(len(df), 3)

mi_val = list(product(["bar", "foo"], ["one", "two"])) + [("All", "")]
mi = MultiIndex.from_tuples(mi_val, names=("A", "B"))
expected = DataFrame(
{"dull": [12, 21, 3, 9, 45], "shiny": [33, 0, 36, 51, 120]}, index=mi
{"dull": [12, 21, 3, 9, 45], "shiny": [33, 0, 36, 51, 120]},
index=mi,
dtype="int64",
).rename_axis("C", axis=1)
expected["All"] = expected["dull"] + expected["shiny"]

Expand Down