diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 5c61f259a4202..d248131bd1c76 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -406,7 +406,7 @@ Conversion - Bug in :meth:`Series.view` and :meth:`Index.view` when converting between datetime-like (``datetime64[ns]``, ``datetime64[ns, tz]``, ``timedelta64``, ``period``) dtypes (:issue:`39788`) - Bug in creating a :class:`DataFrame` from an empty ``np.recarray`` not retaining the original dtypes (:issue:`40121`) - Bug in :class:`DataFrame` failing to raise ``TypeError`` when constructing from a ``frozenset`` (:issue:`40163`) -- +- Bug in :meth:`~DataFrame.update` unnecessarily changing the dtype of a column (:issue:`4094`) Strings ^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f0f8d813bba96..897bfb2e1c588 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6939,6 +6939,7 @@ def update( if not isinstance(other, DataFrame): other = DataFrame(other) + other_dtypes = other.dtypes other = other.reindex_like(self) for col in self.columns: @@ -6963,7 +6964,11 @@ def update( if mask.all(): continue - self[col] = expressions.where(mask, this, that) + col_array = expressions.where(mask, this, that) + if self[col].dtype == other_dtypes[col] != col_array.dtype: + self[col] = Series(col_array, index=self.index, dtype=self[col].dtype) + else: + self[col] = col_array # ---------------------------------------------------------------------- # Data reshaping diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py index 408113e9bc417..6433691f7e884 100644 --- a/pandas/tests/frame/methods/test_update.py +++ b/pandas/tests/frame/methods/test_update.py @@ -146,3 +146,42 @@ def test_update_with_different_dtype(self): expected = DataFrame({"a": [1, 3], "b": [np.nan, 2], "c": ["foo", np.nan]}) tm.assert_frame_equal(df, expected) + + def test_update_with_subset_and_same_not_nullable_dtype(self, any_real_dtype): + # GH4094 + df = DataFrame({"a": Series([1, 2, 3], dtype=any_real_dtype)}) + update = df.copy()[:-1] + df.update(update) + assert df.a.dtype == any_real_dtype + + def test_update_str_dtype(self, string_dtype): + # GH4094 + df = DataFrame({"a": ["a", "b", "c"]}, dtype=string_dtype) + update = df.copy() + expected = df.copy() + df.update(update) + assert df.a.dtype == expected.a.dtype + + def test_update_with_subset_str_dtype(self, string_dtype): + # GH4094 + df = DataFrame({"a": ["a", "b", "c"]}, dtype=string_dtype) + update = df.copy()[:-1] + expected = df.copy() + df.update(update) + assert df.a.dtype == expected.a.dtype + + def test_update_bool_dtype(self): + # GH4094 + df = DataFrame({"a": [True, False, True]}, dtype=bool) + update = df.copy() + expected = df.copy() + df.update(update) + assert df.a.dtype == expected.a.dtype + + def test_update_with_subset_bool_dtype(self): + # GH4094 + df = DataFrame({"a": [True, False]}, dtype=bool) + update = DataFrame({"a": [False]}, dtype=bool) + expected = df.copy() + df.update(update) + assert df.a.dtype == expected.a.dtype