diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index bf67ff6525005..982cfaeecf6f0 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -324,6 +324,7 @@ Numeric Conversion ^^^^^^^^^^ +- Bug in :meth:`Series.to_dict` with ``orient='records'`` now returns python native types (:issue:`25969`) - - diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 520c0cf100784..669bfe08d42b0 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -71,6 +71,7 @@ is_datetime64_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype, + is_datetime_or_timedelta_dtype, is_dtype_equal, is_extension_array_dtype, is_float, @@ -170,6 +171,29 @@ def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scal return value +def maybe_box_native(value: Scalar) -> Scalar: + """ + If passed a scalar cast the scalar to a python native type. + + Parameters + ---------- + value : scalar or Series + + Returns + ------- + scalar or Series + """ + if is_datetime_or_timedelta_dtype(value): + value = maybe_box_datetimelike(value) + elif is_float(value): + value = float(value) + elif is_integer(value): + value = int(value) + elif is_bool(value): + value = bool(value) + return value + + def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar: """ Convert a Timedelta or Timestamp to timedelta64 or datetime64 for setting diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f1328b0eeee5e..3fe330f659513 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -95,7 +95,7 @@ find_common_type, infer_dtype_from_scalar, invalidate_string_dtypes, - maybe_box_datetimelike, + maybe_box_native, maybe_convert_platform, maybe_downcast_to_dtype, maybe_infer_to_datetimelike, @@ -1655,7 +1655,7 @@ def to_dict(self, orient: str = "dict", into=dict): ( "data", [ - list(map(maybe_box_datetimelike, t)) + list(map(maybe_box_native, t)) for t in self.itertuples(index=False, name=None) ], ), @@ -1663,7 +1663,7 @@ def to_dict(self, orient: str = "dict", into=dict): ) elif orient == "series": - return into_c((k, maybe_box_datetimelike(v)) for k, v in self.items()) + return into_c((k, v) for k, v in self.items()) elif orient == "records": columns = self.columns.tolist() @@ -1672,8 +1672,7 @@ def to_dict(self, orient: str = "dict", into=dict): for row in self.itertuples(index=False, name=None) ) return [ - into_c((k, maybe_box_datetimelike(v)) for k, v in row.items()) - for row in rows + into_c((k, maybe_box_native(v)) for k, v in row.items()) for row in rows ] elif orient == "index": diff --git a/pandas/core/series.py b/pandas/core/series.py index 0e1d2f5dafb3f..cbb66918a661b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -58,6 +58,7 @@ from pandas.core.dtypes.cast import ( convert_dtypes, + maybe_box_native, maybe_cast_to_extension_array, validate_numeric_casting, ) @@ -1591,7 +1592,7 @@ def to_dict(self, into=dict): """ # GH16122 into_c = com.standardize_mapping(into) - return into_c(self.items()) + return into_c((k, maybe_box_native(v)) for k, v in self.items()) def to_frame(self, name=None) -> DataFrame: """ diff --git a/pandas/tests/dtypes/cast/test_maybe_box_native.py b/pandas/tests/dtypes/cast/test_maybe_box_native.py new file mode 100644 index 0000000000000..3f62f31dac219 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_maybe_box_native.py @@ -0,0 +1,40 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import maybe_box_native + +from pandas import ( + Interval, + Period, + Timedelta, + Timestamp, +) + + +@pytest.mark.parametrize( + "obj,expected_dtype", + [ + (b"\x00\x10", bytes), + (int(4), int), + (np.uint(4), int), + (np.int32(-4), int), + (np.uint8(4), int), + (float(454.98), float), + (np.float16(0.4), float), + (np.float64(1.4), float), + (np.bool_(False), bool), + (datetime(2005, 2, 25), datetime), + (np.datetime64("2005-02-25"), Timestamp), + (Timestamp("2005-02-25"), Timestamp), + (np.timedelta64(1, "D"), Timedelta), + (Timedelta(1, "D"), Timedelta), + (Interval(0, 1), Interval), + (Period("4Q2005"), Period), + ], +) +def test_maybe_box_native(obj, expected_dtype): + boxed_obj = maybe_box_native(obj) + result_dtype = type(boxed_obj) + assert result_dtype is expected_dtype diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index c17ad197ac09a..6d0d4e045e491 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -263,31 +263,44 @@ def test_to_dict_wide(self): expected = {f"A_{i:d}": i for i in range(256)} assert result == expected - def test_to_dict_orient_dtype(self): - # GH22620 & GH21256 - - df = DataFrame( - { - "bool": [True, True, False], - "datetime": [ + @pytest.mark.parametrize( + "data,dtype", + ( + ([True, True, False], bool), + [ + [ datetime(2018, 1, 1), datetime(2019, 2, 2), datetime(2020, 3, 3), ], - "float": [1.0, 2.0, 3.0], - "int": [1, 2, 3], - "str": ["X", "Y", "Z"], - } - ) + Timestamp, + ], + [[1.0, 2.0, 3.0], float], + [[1, 2, 3], int], + [["X", "Y", "Z"], str], + ), + ) + def test_to_dict_orient_dtype(self, data, dtype): + # GH22620 & GH21256 - expected = { - "int": int, - "float": float, - "str": str, - "datetime": Timestamp, - "bool": bool, - } + df = DataFrame({"a": data}) + d = df.to_dict(orient="records") + assert all(type(record["a"]) is dtype for record in d) + + @pytest.mark.parametrize( + "data,expected_dtype", + ( + [np.uint64(2), int], + [np.int64(-9), int], + [np.float64(1.1), float], + [np.bool_(True), bool], + [np.datetime64("2005-02-25"), Timestamp], + ), + ) + def test_to_dict_scalar_constructor_orient_dtype(self, data, expected_dtype): + # GH22620 & GH21256 - for df_dict in df.to_dict("records"): - result = {col: type(df_dict[col]) for col in list(df.columns)} - assert result == expected + df = DataFrame({"a": data}, index=[0]) + d = df.to_dict(orient="records") + result = type(d[0]["a"]) + assert result is expected_dtype diff --git a/pandas/tests/series/methods/test_to_dict.py b/pandas/tests/series/methods/test_to_dict.py index 47badb0a1bb52..4c3d9592eebe3 100644 --- a/pandas/tests/series/methods/test_to_dict.py +++ b/pandas/tests/series/methods/test_to_dict.py @@ -1,5 +1,6 @@ import collections +import numpy as np import pytest from pandas import Series @@ -20,3 +21,18 @@ def test_to_dict(self, mapping, datetime_series): from_method = Series(datetime_series.to_dict(collections.Counter)) from_constructor = Series(collections.Counter(datetime_series.items())) tm.assert_series_equal(from_method, from_constructor) + + @pytest.mark.parametrize( + "input", + ( + {"a": np.int64(64), "b": 10}, + {"a": np.int64(64), "b": 10, "c": "ABC"}, + {"a": np.uint64(64), "b": 10, "c": "ABC"}, + ), + ) + def test_to_dict_return_types(self, input): + # GH25969 + + d = Series(input).to_dict() + assert isinstance(d["a"], int) + assert isinstance(d["b"], int)