diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 8048b5fea9e66..47b331adea362 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -395,4 +395,5 @@ Bug Fixes - Bug in `read_msgpack` where DataFrame to decode has duplicate column names (:issue:`9618`) - Bug in ``io.common.get_filepath_or_buffer`` which caused reading of valid S3 files to fail if the bucket also contained keys for which the user does not have read permission (:issue:`10604`) - Bug in vectorised setting of timestamp columns with python ``datetime.date`` and numpy ``datetime64`` (:issue:`10408`, :issue:`10412`) +- Bug in ``pd.DataFrame`` when constructing an empty DataFrame with a string dtype (:issue:`9428`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a7ecb74a67485..d52a859086aae 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -322,6 +322,8 @@ def _init_dict(self, data, index, columns, dtype=None): if dtype is None: # 1783 v = np.empty(len(index), dtype=object) + elif np.issubdtype(dtype, np.flexible): + v = np.empty(len(index), dtype=object) else: v = np.empty(len(index), dtype=dtype) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 4e78e1549fb0e..2a7022da4fdc4 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -3617,6 +3617,20 @@ def test_constructor_column_duplicates(self): [('a', [8]), ('a', [5]), ('b', [6])], columns=['b', 'a', 'a']) + def test_constructor_empty_with_string_dtype(self): + # GH 9428 + expected = DataFrame(index=[0, 1], columns=[0, 1], dtype=object) + + df = DataFrame(index=[0, 1], columns=[0, 1], dtype=str) + assert_frame_equal(df, expected) + df = DataFrame(index=[0, 1], columns=[0, 1], dtype=np.str_) + assert_frame_equal(df, expected) + df = DataFrame(index=[0, 1], columns=[0, 1], dtype=np.unicode_) + assert_frame_equal(df, expected) + df = DataFrame(index=[0, 1], columns=[0, 1], dtype='U5') + assert_frame_equal(df, expected) + + def test_column_dups_operations(self): def check(result, expected=None):