Skip to content

Commit 4670e9f

Browse files
committed
Merge pull request #3555 from jreback/GH3552
BUG: ``combine_first`` not returning the same dtype in cases where it can (GH3552)
2 parents a025546 + 59d47b1 commit 4670e9f

File tree

5 files changed

+26
-9
lines changed

5 files changed

+26
-9
lines changed

RELEASE.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ pandas 0.11.1
3838
- Fixed various issues with internal pprinting code, the repr() for various objects
3939
including TimeStamp and *Index now produces valid python code strings and
4040
can be used to recreate the object, (GH3038_), (GH3379_), (GH3251_)
41+
- ``convert_objects`` now accepts a ``copy`` parameter (defaults to ``True``)
4142
- ``HDFStore``
4243
4344
- will retain index attributes (freq,tz,name) on recreation (GH3499_)
@@ -97,6 +98,7 @@ pandas 0.11.1
9798
- Raise a TypeError on invalid datetime/timedelta operations
9899
e.g. add datetimes, multiple timedelta x datetime
99100
- Fix ``.diff`` on datelike and timedelta operations (GH3100_)
101+
- ``combine_first`` not returning the same dtype in cases where it can (GH3552_)
100102

101103
.. _GH3164: https://github.com/pydata/pandas/issues/3164
102104
.. _GH2786: https://github.com/pydata/pandas/issues/2786
@@ -129,6 +131,7 @@ pandas 0.11.1
129131
.. _GH3499: https://github.com/pydata/pandas/issues/3499
130132
.. _GH3495: https://github.com/pydata/pandas/issues/3495
131133
.. _GH3492: https://github.com/pydata/pandas/issues/3492
134+
.. _GH3552: https://github.com/pydata/pandas/issues/3552
132135
.. _GH3493: https://github.com/pydata/pandas/issues/3493
133136

134137

pandas/core/frame.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1627,21 +1627,23 @@ def info(self, verbose=True, buf=None, max_cols=None):
16271627
def dtypes(self):
16281628
return self.apply(lambda x: x.dtype)
16291629

1630-
def convert_objects(self, convert_dates=True, convert_numeric=False):
1630+
def convert_objects(self, convert_dates=True, convert_numeric=False, copy=True):
16311631
"""
16321632
Attempt to infer better dtype for object columns
1633-
Always returns a copy (even if no object columns)
16341633
16351634
Parameters
16361635
----------
16371636
convert_dates : if True, attempt to soft convert_dates, if 'coerce', force conversion (and non-convertibles get NaT)
16381637
convert_numeric : if True attempt to coerce to numerbers (including strings), non-convertibles get NaN
1638+
copy : boolean, return a copy if True (True by default)
16391639
16401640
Returns
16411641
-------
16421642
converted : DataFrame
16431643
"""
1644-
return self._constructor(self._data.convert(convert_dates=convert_dates, convert_numeric=convert_numeric))
1644+
return self._constructor(self._data.convert(convert_dates=convert_dates,
1645+
convert_numeric=convert_numeric,
1646+
copy=copy))
16451647

16461648
#----------------------------------------------------------------------
16471649
# properties for index and columns
@@ -3735,7 +3737,10 @@ def combine(self, other, func, fill_value=None, overwrite=True):
37353737

37363738
result[col] = arr
37373739

3738-
return self._constructor(result, index=new_index, columns=new_columns)
3740+
# convert_objects just in case
3741+
return self._constructor(result,
3742+
index=new_index,
3743+
columns=new_columns).convert_objects(copy=False)
37393744

37403745
def combine_first(self, other):
37413746
"""
@@ -4210,7 +4215,7 @@ def _apply_standard(self, func, axis, ignore_failures=False):
42104215

42114216
if axis == 1:
42124217
result = result.T
4213-
result = result.convert_objects()
4218+
result = result.convert_objects(copy=False)
42144219

42154220
return result
42164221
else:

pandas/core/series.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -933,10 +933,9 @@ def astype(self, dtype):
933933
return self._constructor(casted, index=self.index, name=self.name,
934934
dtype=casted.dtype)
935935

936-
def convert_objects(self, convert_dates=True, convert_numeric=True):
936+
def convert_objects(self, convert_dates=True, convert_numeric=True, copy=True):
937937
"""
938938
Attempt to infer better dtype
939-
Always return a copy
940939
941940
Parameters
942941
----------
@@ -946,6 +945,8 @@ def convert_objects(self, convert_dates=True, convert_numeric=True):
946945
convert_numeric : boolean, default True
947946
if True attempt to coerce to numbers (including strings),
948947
non-convertibles get NaN
948+
copy : boolean, default True
949+
if True return a copy even if not object dtype
949950
950951
Returns
951952
-------
@@ -955,7 +956,7 @@ def convert_objects(self, convert_dates=True, convert_numeric=True):
955956
return Series(com._possibly_convert_objects(self.values,
956957
convert_dates=convert_dates, convert_numeric=convert_numeric),
957958
index=self.index, name=self.name)
958-
return self.copy()
959+
return self.copy() if copy else self
959960

960961
def repeat(self, reps):
961962
"""

pandas/sparse/frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def _consolidate_inplace(self):
125125
# do nothing when DataFrame calls this method
126126
pass
127127

128-
def convert_objects(self, convert_dates=True):
128+
def convert_objects(self, convert_dates=True, convert_numeric=False, copy=True):
129129
# XXX
130130
return self
131131

pandas/tests/test_frame.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7889,6 +7889,14 @@ def test_combine_first_mixed_bug(self):
78897889
expected = DataFrame({ 'A' : [1,2,3,5,3,7.], 'B' : [np.nan,2,3,4,6,8] })
78907890
assert_frame_equal(result,expected)
78917891

7892+
# GH3552, return object dtype with bools
7893+
df1 = DataFrame([[np.nan, 3.,True], [-4.6, np.nan, True], [np.nan, 7., False]])
7894+
df2 = DataFrame([[-42.6, np.nan, True], [-5., 1.6, False]], index=[1, 2])
7895+
7896+
result = df1.combine_first(df2)[2]
7897+
expected = Series([True,True,False])
7898+
assert_series_equal(result,expected)
7899+
78927900
def test_update(self):
78937901
df = DataFrame([[1.5, nan, 3.],
78947902
[1.5, nan, 3.],

0 commit comments

Comments
 (0)