diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt index dabc6036fc9ba..291b0ffde145e 100644 --- a/doc/source/whatsnew/v0.19.2.txt +++ b/doc/source/whatsnew/v0.19.2.txt @@ -78,7 +78,7 @@ Bug Fixes - Bug in clipboard functions on linux with python2 with unicode and separators (:issue:`13747`) - Bug in clipboard functions on Windows 10 and python 3 (:issue:`14362`, :issue:`12807`) - Bug in ``.to_clipboard()`` and Excel compat (:issue:`12529`) - +- Bug in ``DataFrame.combine_first()`` for integer columns (:issue:`14687`). - Bug in ``pd.read_csv()`` in which the ``dtype`` parameter was not being respected for empty data (:issue:`14712`) - Bug in ``pd.read_csv()`` in which the ``nrows`` parameter was not being respected for large input when using the C engine for parsing (:issue:`7626`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0d4bcd781cf74..78d0f47d473c8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3665,10 +3665,8 @@ def combine(self, other, func, fill_value=None, overwrite=True): otherSeries[other_mask] = fill_value # if we have different dtypes, possibily promote - if notnull(series).all(): - new_dtype = this_dtype - otherSeries = otherSeries.astype(new_dtype) - else: + new_dtype = this_dtype + if not is_dtype_equal(this_dtype, other_dtype): new_dtype = _find_common_type([this_dtype, other_dtype]) if not is_dtype_equal(this_dtype, new_dtype): series = series.astype(new_dtype) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 5b5236843643d..c6b69dad3e6b5 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -725,3 +725,13 @@ def test_combine_first_period(self): exp = pd.DataFrame({'P': exp_dts}, index=[1, 2, 3, 4, 5, 7]) tm.assert_frame_equal(res, exp) self.assertEqual(res['P'].dtype, 'object') + + def test_combine_first_int(self): + # GH14687 - integer series that do no align exactly + + df1 = pd.DataFrame({'a': [0, 1, 3, 5]}, dtype='int64') + df2 = pd.DataFrame({'a': [1, 4]}, dtype='int64') + + res = df1.combine_first(df2) + tm.assert_frame_equal(res, df1) + self.assertEqual(res['a'].dtype, 'int64')