From 4b52a2aa4bcec417144eae2b2842b8f2100b6cdb Mon Sep 17 00:00:00 2001 From: 5j9 <5j9@users.noreply.github.com> Date: Tue, 6 Sep 2022 19:47:57 +0430 Subject: [PATCH 1/4] make sure conversion is not lossy --- pandas/core/dtypes/cast.py | 3 +++ pandas/tests/series/test_constructors.py | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 4244217da7865..ae54454a0e2b4 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1851,6 +1851,9 @@ def maybe_cast_to_integer_array( # doesn't handle `uint64` correctly. arr = np.asarray(arr) + if np.issubdtype(arr.dtype, str) and (casted.astype(str) == arr).all(): + return casted + if is_unsigned_integer_dtype(dtype) and (arr < 0).any(): raise OverflowError("Trying to coerce negative values to unsigned integers") diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index f79714ae6455c..231c52bfad3b2 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1880,6 +1880,18 @@ def test_constructor_bool_dtype_missing_values(self): expected = Series(True, index=[0], dtype="bool") tm.assert_series_equal(result, expected) + def test_constructor_int64_dtype(self, any_int_dtype): + # GH#44923 + result = Series(["0", "1", "2"], dtype=any_int_dtype) + expected = Series([0, 1, 2], dtype=any_int_dtype) + tm.assert_series_equal(result, expected) + + def test_constructor_raise_on_lossy_conversion_of_strings(self): + with pytest.raises( + ValueError, match="values cannot be losslessly cast to int8" + ): + Series(["128"], dtype="int8") + def test_constructor_dtype_timedelta_alternative_construct(self): # GH#35465 result = Series([1000000, 200000, 3000000], dtype="timedelta64[ns]") From 8e9b004b0f858fa356e688c70b4ed37afa6afcf0 Mon Sep 17 00:00:00 2001 From: 5j9 <5j9@users.noreply.github.com> Date: Wed, 7 Sep 2022 04:40:12 +0430 Subject: [PATCH 2/4] add whatsnew and ref issue in test --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/tests/series/test_constructors.py | 1 + 2 files changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index c479c59082464..df07e9179d72b 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -1022,6 +1022,7 @@ Conversion ^^^^^^^^^^ - Bug in :meth:`DataFrame.astype` not preserving subclasses (:issue:`40810`) - Bug in constructing a :class:`Series` from a float-containing list or a floating-dtype ndarray-like (e.g. ``dask.Array``) and an integer dtype raising instead of casting like we would with an ``np.ndarray`` (:issue:`40110`) +- Bug in constructing :class:`Series` with ``int64`` dtype from a string list raising instead of casting (:issue:`44923`) - Bug in :meth:`Float64Index.astype` to unsigned integer dtype incorrectly casting to ``np.int64`` dtype (:issue:`45309`) - Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` from floating dtype to unsigned integer dtype failing to raise in the presence of negative values (:issue:`45151`) - Bug in :func:`array` with ``FloatingDtype`` and values containing float-castable strings incorrectly raising (:issue:`45424`) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 231c52bfad3b2..48abe3020b8c5 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1887,6 +1887,7 @@ def test_constructor_int64_dtype(self, any_int_dtype): tm.assert_series_equal(result, expected) def test_constructor_raise_on_lossy_conversion_of_strings(self): + # GH#44923 with pytest.raises( ValueError, match="values cannot be losslessly cast to int8" ): From 1c6da7993b48ea4643895e2209a204352b9c49eb Mon Sep 17 00:00:00 2001 From: 5j9 <5j9@users.noreply.github.com> Date: Wed, 7 Sep 2022 04:55:22 +0430 Subject: [PATCH 3/4] raise earlier Note the comment at the last line: # No known cases that get here --- pandas/core/dtypes/cast.py | 6 ++++-- pandas/tests/series/test_constructors.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index ae54454a0e2b4..f543efe20bf3c 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1851,8 +1851,10 @@ def maybe_cast_to_integer_array( # doesn't handle `uint64` correctly. arr = np.asarray(arr) - if np.issubdtype(arr.dtype, str) and (casted.astype(str) == arr).all(): - return casted + if np.issubdtype(arr.dtype, str): + if (casted.astype(str) == arr).all(): + return casted + raise ValueError(f"string values cannot be losslessly cast to {dtype}") if is_unsigned_integer_dtype(dtype) and (arr < 0).any(): raise OverflowError("Trying to coerce negative values to unsigned integers") diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 48abe3020b8c5..9a42dd4d90f71 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1889,7 +1889,7 @@ def test_constructor_int64_dtype(self, any_int_dtype): def test_constructor_raise_on_lossy_conversion_of_strings(self): # GH#44923 with pytest.raises( - ValueError, match="values cannot be losslessly cast to int8" + ValueError, match="string values cannot be losslessly cast to int8" ): Series(["128"], dtype="int8") From 18e32d258a3978aba91fff00e979d195c73b91a7 Mon Sep 17 00:00:00 2001 From: 5j9 <5j9@users.noreply.github.com> Date: Wed, 7 Sep 2022 15:21:55 +0430 Subject: [PATCH 4/4] mv whatsnew entry to 1.6.0.rst --- doc/source/whatsnew/v1.5.0.rst | 1 - doc/source/whatsnew/v1.6.0.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index df07e9179d72b..c479c59082464 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -1022,7 +1022,6 @@ Conversion ^^^^^^^^^^ - Bug in :meth:`DataFrame.astype` not preserving subclasses (:issue:`40810`) - Bug in constructing a :class:`Series` from a float-containing list or a floating-dtype ndarray-like (e.g. ``dask.Array``) and an integer dtype raising instead of casting like we would with an ``np.ndarray`` (:issue:`40110`) -- Bug in constructing :class:`Series` with ``int64`` dtype from a string list raising instead of casting (:issue:`44923`) - Bug in :meth:`Float64Index.astype` to unsigned integer dtype incorrectly casting to ``np.int64`` dtype (:issue:`45309`) - Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` from floating dtype to unsigned integer dtype failing to raise in the presence of negative values (:issue:`45151`) - Bug in :func:`array` with ``FloatingDtype`` and values containing float-castable strings incorrectly raising (:issue:`45424`) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index c393b8a57f805..5629c0729e76e 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -138,7 +138,7 @@ Numeric Conversion ^^^^^^^^^^ -- +- Bug in constructing :class:`Series` with ``int64`` dtype from a string list raising instead of casting (:issue:`44923`) - Strings