From 777e80132abb52ab5189b8fee3f8bbb7cb2c45eb Mon Sep 17 00:00:00 2001 From: ganevgv Date: Sat, 9 Nov 2019 20:16:59 +0000 Subject: [PATCH 1/5] add test for .unique() preserving dtype --- pandas/tests/series/test_dtypes.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 6ee120f3bec64..0276dfccc9c7b 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -518,3 +518,30 @@ def test_values_compatibility(self, data): result = pd.Series(data).values expected = np.array(data.astype(object)) tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "data, uniques, dtype", + [ + ([1, 2, 2], [1, 2], "int8"), + ([1, 2, 2], [1, 2], "int16"), + ([1, 2, 2], [1, 2], "int32"), + ([1, 2, 2], [1, 2], "int64"), + ([1, 2, 2], [1, 2], "uint8"), + ([1, 2, 2], [1, 2], "uint16"), + ([1, 2, 2], [1, 2], "uint32"), + ([1, 2, 2], [1, 2], "uint64"), + ([1, 2, 2], [1.0, 2.0], "float16"), + ([1, 2, 2], [1.0, 2.0], "float32"), + ([1, 2, 2], [1.0, 2.0], "float64"), + ([1, 2, 2], [1.0, 2.0], "complex64"), + ([1, 2, 2], [1.0, 2.0], "complex128"), + ([True, True, False], [True, False], "bool"), + (["A", "A", "B"], ["A", "B"], "object"), + ], + ) + def test_unique_preserve_dtype(self, data, uniques, dtype): + # GH 15442 + result = Series(data, dtype=dtype).unique() + expected = np.array(uniques, dtype=dtype) + + tm.assert_numpy_array_equal(result, expected) From 09a602d882aaa6f8c524d9c7fa8b16196fe272f4 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Sat, 9 Nov 2019 20:17:59 +0000 Subject: [PATCH 2/5] move test --- pandas/tests/series/test_dtypes.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 0276dfccc9c7b..c96cf4b2d6bef 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -506,19 +506,6 @@ def test_is_homogeneous_type(self): assert Series([1, 2])._is_homogeneous_type assert Series(pd.Categorical([1, 2]))._is_homogeneous_type - @pytest.mark.parametrize( - "data", - [ - pd.period_range("2000", periods=4), - pd.IntervalIndex.from_breaks([1, 2, 3, 4]), - ], - ) - def test_values_compatibility(self, data): - # https://github.com/pandas-dev/pandas/issues/23995 - result = pd.Series(data).values - expected = np.array(data.astype(object)) - tm.assert_numpy_array_equal(result, expected) - @pytest.mark.parametrize( "data, uniques, dtype", [ @@ -545,3 +532,16 @@ def test_unique_preserve_dtype(self, data, uniques, dtype): expected = np.array(uniques, dtype=dtype) tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "data", + [ + pd.period_range("2000", periods=4), + pd.IntervalIndex.from_breaks([1, 2, 3, 4]), + ], + ) + def test_values_compatibility(self, data): + # https://github.com/pandas-dev/pandas/issues/23995 + result = pd.Series(data).values + expected = np.array(data.astype(object)) + tm.assert_numpy_array_equal(result, expected) From 6af0391a331e4f13d16322e8f66df83fac04555b Mon Sep 17 00:00:00 2001 From: ganevgv Date: Mon, 11 Nov 2019 23:14:15 +0000 Subject: [PATCH 3/5] use np.sctypes to extract all dtypes --- pandas/tests/series/test_dtypes.py | 27 --------------------------- pandas/tests/test_algos.py | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 27 deletions(-) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index c96cf4b2d6bef..6ee120f3bec64 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -506,33 +506,6 @@ def test_is_homogeneous_type(self): assert Series([1, 2])._is_homogeneous_type assert Series(pd.Categorical([1, 2]))._is_homogeneous_type - @pytest.mark.parametrize( - "data, uniques, dtype", - [ - ([1, 2, 2], [1, 2], "int8"), - ([1, 2, 2], [1, 2], "int16"), - ([1, 2, 2], [1, 2], "int32"), - ([1, 2, 2], [1, 2], "int64"), - ([1, 2, 2], [1, 2], "uint8"), - ([1, 2, 2], [1, 2], "uint16"), - ([1, 2, 2], [1, 2], "uint32"), - ([1, 2, 2], [1, 2], "uint64"), - ([1, 2, 2], [1.0, 2.0], "float16"), - ([1, 2, 2], [1.0, 2.0], "float32"), - ([1, 2, 2], [1.0, 2.0], "float64"), - ([1, 2, 2], [1.0, 2.0], "complex64"), - ([1, 2, 2], [1.0, 2.0], "complex128"), - ([True, True, False], [True, False], "bool"), - (["A", "A", "B"], ["A", "B"], "object"), - ], - ) - def test_unique_preserve_dtype(self, data, uniques, dtype): - # GH 15442 - result = Series(data, dtype=dtype).unique() - expected = np.array(uniques, dtype=dtype) - - tm.assert_numpy_array_equal(result, expected) - @pytest.mark.parametrize( "data", [ diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 9dd88fd5dd25b..da4b9f19c6a8d 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -353,6 +353,25 @@ def test_on_index_object(self): tm.assert_almost_equal(result, expected) + @pytest.mark.parametrize( + "data, uniques, dtype_list", + [ + ([1, 2, 2], [1, 2], np.sctypes["int"]), + ([1, 2, 2], [1, 2], np.sctypes["uint"]), + ([1, 2, 2], [1.0, 2.0], np.sctypes["float"]), + ([1, 2, 2], [1.0, 2.0], np.sctypes["complex"]), + ([True, True, False], [True, False], np.sctypes["others"]), # bool, object + ], + ) + def test_dtype_preservation(self, data, uniques, dtype_list): + # GH 15442 + for dtype in dtype_list: + if dtype not in [bytes, str, np.void]: + result = Series(data, dtype=dtype).unique() + expected = np.array(uniques, dtype=dtype) + + tm.assert_numpy_array_equal(result, expected) + def test_datetime64_dtype_array_returned(self): # GH 9431 expected = np_array_datetime64_compat( From 0dea22de0af29a9e4fe7e6de6e67981e8fa665da Mon Sep 17 00:00:00 2001 From: ganevgv Date: Wed, 13 Nov 2019 17:33:14 +0000 Subject: [PATCH 4/5] use any_numpy_dtype to extract dtypes --- pandas/tests/test_algos.py | 51 ++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index da4b9f19c6a8d..d5b724a8d0946 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -12,6 +12,15 @@ from pandas.core.dtypes.dtypes import CategoricalDtype as CDT +from pandas.core.dtypes.common import ( + is_integer_dtype, + is_float_dtype, + is_complex_dtype, + is_bool_dtype, + is_object_dtype, +) +from pandas.conftest import BYTES_DTYPES, STRING_DTYPES + import pandas as pd from pandas import ( Categorical, @@ -353,24 +362,34 @@ def test_on_index_object(self): tm.assert_almost_equal(result, expected) - @pytest.mark.parametrize( - "data, uniques, dtype_list", - [ - ([1, 2, 2], [1, 2], np.sctypes["int"]), - ([1, 2, 2], [1, 2], np.sctypes["uint"]), - ([1, 2, 2], [1.0, 2.0], np.sctypes["float"]), - ([1, 2, 2], [1.0, 2.0], np.sctypes["complex"]), - ([True, True, False], [True, False], np.sctypes["others"]), # bool, object - ], - ) - def test_dtype_preservation(self, data, uniques, dtype_list): + def test_dtype_preservation(self, any_numpy_dtype): # GH 15442 - for dtype in dtype_list: - if dtype not in [bytes, str, np.void]: - result = Series(data, dtype=dtype).unique() - expected = np.array(uniques, dtype=dtype) + if any_numpy_dtype in (BYTES_DTYPES + STRING_DTYPES): + pytest.skip("skip string dtype") + elif is_integer_dtype(any_numpy_dtype): + data = [1, 2, 2] + uniques = [1, 2] + elif is_float_dtype(any_numpy_dtype): + data = [1, 2, 2] + uniques = [1.0, 2.0] + elif is_complex_dtype(any_numpy_dtype): + data = [complex(1, 0), complex(2, 0), complex(2, 0)] + uniques = [complex(1, 0), complex(2, 0)] + elif is_bool_dtype(any_numpy_dtype): + data = [True, True, False] + uniques = [True, False] + elif is_object_dtype(any_numpy_dtype): + data = ["A", "B", "B"] + uniques = ["A", "B"] + else: + # datetime64[ns]/M8[ns]/timedelta64[ns]/m8[ns] tested elsewhere + data = [1, 2, 2] + uniques = [1, 2] - tm.assert_numpy_array_equal(result, expected) + result = Series(data, dtype=any_numpy_dtype).unique() + expected = np.array(uniques, dtype=any_numpy_dtype) + + tm.assert_numpy_array_equal(result, expected) def test_datetime64_dtype_array_returned(self): # GH 9431 From 14793aff30fdc0080e49dd782dfedba61fe58f53 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Wed, 13 Nov 2019 18:39:05 +0000 Subject: [PATCH 5/5] sort imports using isort --- pandas/tests/test_algos.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index d5b724a8d0946..da06b5440c38c 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -10,16 +10,14 @@ from pandas.compat.numpy import np_array_datetime64_compat import pandas.util._test_decorators as td -from pandas.core.dtypes.dtypes import CategoricalDtype as CDT - from pandas.core.dtypes.common import ( - is_integer_dtype, - is_float_dtype, - is_complex_dtype, is_bool_dtype, + is_complex_dtype, + is_float_dtype, + is_integer_dtype, is_object_dtype, ) -from pandas.conftest import BYTES_DTYPES, STRING_DTYPES +from pandas.core.dtypes.dtypes import CategoricalDtype as CDT import pandas as pd from pandas import ( @@ -32,6 +30,7 @@ Timestamp, compat, ) +from pandas.conftest import BYTES_DTYPES, STRING_DTYPES import pandas.core.algorithms as algos from pandas.core.arrays import DatetimeArray import pandas.core.common as com