diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 6dc730cae37f7..ed8d41738c62c 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -48,6 +48,7 @@ Other API Changes - :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`) - `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`) - :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`) +- :func:`Dataframe.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`) .. _whatsnew_0220.deprecations: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index eae283e9bc00d..f91624bae7af8 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -320,6 +320,7 @@ def maybe_promote(dtype, fill_value=np.nan): fill_value = iNaT else: dtype = np.object_ + fill_value = np.nan else: dtype = np.object_ diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index d795aa835b00a..04dcea2b9d533 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -783,3 +783,26 @@ def test_stack_preserve_categorical_dtype(self): expected = Series([10, 11, 12], index=midx) tm.assert_series_equal(result, expected) + + +def test_unstack_fill_frame_object(): + # GH12815 Test unstacking with object. + data = pd.Series(['a', 'b', 'c', 'a'], dtype='object') + data.index = pd.MultiIndex.from_tuples( + [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')]) + + # By default missing values will be NaN + result = data.unstack() + expected = pd.DataFrame( + {'a': ['a', np.nan, 'a'], 'b': ['b', 'c', np.nan]}, + index=list('xyz') + ) + assert_frame_equal(result, expected) + + # Fill with any value replaces missing values as expected + result = data.unstack(fill_value='d') + expected = pd.DataFrame( + {'a': ['a', 'd', 'a'], 'b': ['b', 'c', 'd']}, + index=list('xyz') + ) + assert_frame_equal(result, expected)