diff --git a/RELEASE.rst b/RELEASE.rst index 1970b00c05add..984dcfa0f1d39 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -38,6 +38,7 @@ pandas 0.11.1 - Fixed various issues with internal pprinting code, the repr() for various objects including TimeStamp and *Index now produces valid python code strings and can be used to recreate the object, (GH3038_), (GH3379_), (GH3251_) + - ``convert_objects`` now accepts a ``copy`` parameter (defaults to ``True``) - ``HDFStore`` - will retain index attributes (freq,tz,name) on recreation (GH3499_) @@ -97,6 +98,7 @@ pandas 0.11.1 - Raise a TypeError on invalid datetime/timedelta operations e.g. add datetimes, multiple timedelta x datetime - Fix ``.diff`` on datelike and timedelta operations (GH3100_) + - ``combine_first`` not returning the same dtype in cases where it can (GH3552_) .. _GH3164: https://github.com/pydata/pandas/issues/3164 .. _GH2786: https://github.com/pydata/pandas/issues/2786 @@ -129,6 +131,7 @@ pandas 0.11.1 .. _GH3499: https://github.com/pydata/pandas/issues/3499 .. _GH3495: https://github.com/pydata/pandas/issues/3495 .. _GH3492: https://github.com/pydata/pandas/issues/3492 +.. _GH3552: https://github.com/pydata/pandas/issues/3552 .. _GH3493: https://github.com/pydata/pandas/issues/3493 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3fd95e161b41a..0ffdcb0e036ce 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1627,21 +1627,23 @@ def info(self, verbose=True, buf=None, max_cols=None): def dtypes(self): return self.apply(lambda x: x.dtype) - def convert_objects(self, convert_dates=True, convert_numeric=False): + def convert_objects(self, convert_dates=True, convert_numeric=False, copy=True): """ Attempt to infer better dtype for object columns - Always returns a copy (even if no object columns) Parameters ---------- convert_dates : if True, attempt to soft convert_dates, if 'coerce', force conversion (and non-convertibles get NaT) convert_numeric : if True attempt to coerce to numerbers (including strings), non-convertibles get NaN + copy : boolean, return a copy if True (True by default) Returns ------- converted : DataFrame """ - return self._constructor(self._data.convert(convert_dates=convert_dates, convert_numeric=convert_numeric)) + return self._constructor(self._data.convert(convert_dates=convert_dates, + convert_numeric=convert_numeric, + copy=copy)) #---------------------------------------------------------------------- # properties for index and columns @@ -3735,7 +3737,10 @@ def combine(self, other, func, fill_value=None, overwrite=True): result[col] = arr - return self._constructor(result, index=new_index, columns=new_columns) + # convert_objects just in case + return self._constructor(result, + index=new_index, + columns=new_columns).convert_objects(copy=False) def combine_first(self, other): """ @@ -4210,7 +4215,7 @@ def _apply_standard(self, func, axis, ignore_failures=False): if axis == 1: result = result.T - result = result.convert_objects() + result = result.convert_objects(copy=False) return result else: diff --git a/pandas/core/series.py b/pandas/core/series.py index d8e7ea74e0560..3509e226d46fb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -933,10 +933,9 @@ def astype(self, dtype): return self._constructor(casted, index=self.index, name=self.name, dtype=casted.dtype) - def convert_objects(self, convert_dates=True, convert_numeric=True): + def convert_objects(self, convert_dates=True, convert_numeric=True, copy=True): """ Attempt to infer better dtype - Always return a copy Parameters ---------- @@ -946,6 +945,8 @@ def convert_objects(self, convert_dates=True, convert_numeric=True): convert_numeric : boolean, default True if True attempt to coerce to numbers (including strings), non-convertibles get NaN + copy : boolean, default True + if True return a copy even if not object dtype Returns ------- @@ -955,7 +956,7 @@ def convert_objects(self, convert_dates=True, convert_numeric=True): return Series(com._possibly_convert_objects(self.values, convert_dates=convert_dates, convert_numeric=convert_numeric), index=self.index, name=self.name) - return self.copy() + return self.copy() if copy else self def repeat(self, reps): """ diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index ed33be33ac02a..e893f83f6d640 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -125,7 +125,7 @@ def _consolidate_inplace(self): # do nothing when DataFrame calls this method pass - def convert_objects(self, convert_dates=True): + def convert_objects(self, convert_dates=True, convert_numeric=False, copy=True): # XXX return self diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 69225c40e36df..0c9dd21d2f645 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -7889,6 +7889,14 @@ def test_combine_first_mixed_bug(self): expected = DataFrame({ 'A' : [1,2,3,5,3,7.], 'B' : [np.nan,2,3,4,6,8] }) assert_frame_equal(result,expected) + # GH3552, return object dtype with bools + df1 = DataFrame([[np.nan, 3.,True], [-4.6, np.nan, True], [np.nan, 7., False]]) + df2 = DataFrame([[-42.6, np.nan, True], [-5., 1.6, False]], index=[1, 2]) + + result = df1.combine_first(df2)[2] + expected = Series([True,True,False]) + assert_series_equal(result,expected) + def test_update(self): df = DataFrame([[1.5, nan, 3.], [1.5, nan, 3.],