Skip to content

BUG: combine_first not returning the same dtype in cases where it can (GH3552) #3555

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 9, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ pandas 0.11.1
- Fixed various issues with internal pprinting code, the repr() for various objects
including TimeStamp and *Index now produces valid python code strings and
can be used to recreate the object, (GH3038_), (GH3379_), (GH3251_)
- ``convert_objects`` now accepts a ``copy`` parameter (defaults to ``True``)
- ``HDFStore``

- will retain index attributes (freq,tz,name) on recreation (GH3499_)
Expand Down Expand Up @@ -97,6 +98,7 @@ pandas 0.11.1
- Raise a TypeError on invalid datetime/timedelta operations
e.g. add datetimes, multiple timedelta x datetime
- Fix ``.diff`` on datelike and timedelta operations (GH3100_)
- ``combine_first`` not returning the same dtype in cases where it can (GH3552_)

.. _GH3164: https://github.com/pydata/pandas/issues/3164
.. _GH2786: https://github.com/pydata/pandas/issues/2786
Expand Down Expand Up @@ -129,6 +131,7 @@ pandas 0.11.1
.. _GH3499: https://github.com/pydata/pandas/issues/3499
.. _GH3495: https://github.com/pydata/pandas/issues/3495
.. _GH3492: https://github.com/pydata/pandas/issues/3492
.. _GH3552: https://github.com/pydata/pandas/issues/3552
.. _GH3493: https://github.com/pydata/pandas/issues/3493


Expand Down
15 changes: 10 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1627,21 +1627,23 @@ def info(self, verbose=True, buf=None, max_cols=None):
def dtypes(self):
return self.apply(lambda x: x.dtype)

def convert_objects(self, convert_dates=True, convert_numeric=False):
def convert_objects(self, convert_dates=True, convert_numeric=False, copy=True):
"""
Attempt to infer better dtype for object columns
Always returns a copy (even if no object columns)

Parameters
----------
convert_dates : if True, attempt to soft convert_dates, if 'coerce', force conversion (and non-convertibles get NaT)
convert_numeric : if True attempt to coerce to numerbers (including strings), non-convertibles get NaN
copy : boolean, return a copy if True (True by default)

Returns
-------
converted : DataFrame
"""
return self._constructor(self._data.convert(convert_dates=convert_dates, convert_numeric=convert_numeric))
return self._constructor(self._data.convert(convert_dates=convert_dates,
convert_numeric=convert_numeric,
copy=copy))

#----------------------------------------------------------------------
# properties for index and columns
Expand Down Expand Up @@ -3735,7 +3737,10 @@ def combine(self, other, func, fill_value=None, overwrite=True):

result[col] = arr

return self._constructor(result, index=new_index, columns=new_columns)
# convert_objects just in case
return self._constructor(result,
index=new_index,
columns=new_columns).convert_objects(copy=False)

def combine_first(self, other):
"""
Expand Down Expand Up @@ -4210,7 +4215,7 @@ def _apply_standard(self, func, axis, ignore_failures=False):

if axis == 1:
result = result.T
result = result.convert_objects()
result = result.convert_objects(copy=False)

return result
else:
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -933,10 +933,9 @@ def astype(self, dtype):
return self._constructor(casted, index=self.index, name=self.name,
dtype=casted.dtype)

def convert_objects(self, convert_dates=True, convert_numeric=True):
def convert_objects(self, convert_dates=True, convert_numeric=True, copy=True):
"""
Attempt to infer better dtype
Always return a copy

Parameters
----------
Expand All @@ -946,6 +945,8 @@ def convert_objects(self, convert_dates=True, convert_numeric=True):
convert_numeric : boolean, default True
if True attempt to coerce to numbers (including strings),
non-convertibles get NaN
copy : boolean, default True
if True return a copy even if not object dtype

Returns
-------
Expand All @@ -955,7 +956,7 @@ def convert_objects(self, convert_dates=True, convert_numeric=True):
return Series(com._possibly_convert_objects(self.values,
convert_dates=convert_dates, convert_numeric=convert_numeric),
index=self.index, name=self.name)
return self.copy()
return self.copy() if copy else self

def repeat(self, reps):
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def _consolidate_inplace(self):
# do nothing when DataFrame calls this method
pass

def convert_objects(self, convert_dates=True):
def convert_objects(self, convert_dates=True, convert_numeric=False, copy=True):
# XXX
return self

Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7889,6 +7889,14 @@ def test_combine_first_mixed_bug(self):
expected = DataFrame({ 'A' : [1,2,3,5,3,7.], 'B' : [np.nan,2,3,4,6,8] })
assert_frame_equal(result,expected)

# GH3552, return object dtype with bools
df1 = DataFrame([[np.nan, 3.,True], [-4.6, np.nan, True], [np.nan, 7., False]])
df2 = DataFrame([[-42.6, np.nan, True], [-5., 1.6, False]], index=[1, 2])

result = df1.combine_first(df2)[2]
expected = Series([True,True,False])
assert_series_equal(result,expected)

def test_update(self):
df = DataFrame([[1.5, nan, 3.],
[1.5, nan, 3.],
Expand Down