From 09e84724da1a7b7e9d3da4e839f5ee3dd9ed8efc Mon Sep 17 00:00:00 2001 From: Shirish Kadam Date: Wed, 24 Oct 2018 20:05:09 +0530 Subject: [PATCH 01/13] Added EA check and passed through _from_sequence --- pandas/core/groupby/groupby.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 025be781d9ee8..e0446ef5a3ebe 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -27,6 +27,7 @@ class providing the base-class of operations. from pandas.core.dtypes.common import ( is_numeric_dtype, + is_extension_array_dtype, is_scalar, ensure_float) from pandas.core.dtypes.cast import maybe_downcast_to_dtype @@ -758,6 +759,18 @@ def _try_cast(self, result, obj, numeric_only=False): else: dtype = obj.dtype + if is_extension_array_dtype(obj.dtype): + # The function can return something of any type, so check + # if the type is compatible with the calling EA. + try: + result = obj.values._from_sequence(result) + except Exception: + # https://github.com/pandas-dev/pandas/issues/22850 + # pandas has no control over what 3rd-party ExtensionArrays + # do in _values_from_sequence. We still want ops to work + # though, so we catch any regular Exception. + pass + if not is_scalar(result): if numeric_only and is_numeric_dtype(dtype) or not numeric_only: result = maybe_downcast_to_dtype(result, dtype) From 27ce8281db00069d6cdaa0934a9a966db0cc582b Mon Sep 17 00:00:00 2001 From: Shirish Kadam Date: Wed, 24 Oct 2018 22:42:54 +0530 Subject: [PATCH 02/13] Added test case to check extension arrays dtype --- pandas/tests/groupby/test_grouping.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index e7c0881b11871..3408fb05cead5 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -282,6 +282,13 @@ def test_groupby_categorical_index_and_columns(self, observed): expected = DataFrame(data=expected_data.T, index=expected_columns) assert_frame_equal(result, expected) + def test_groupby_extension_array(self): + df = pd.DataFrame({'Int': pd.Series([1, 2, 3], dtype='Int64'), + 'A': [1, 2, 1]}) + result = df.groupby('A').Int.sum() + assert result is not None + assert result.dtype.name == 'Int64' + def test_grouper_getting_correct_binner(self): # GH 10063 From 28b45f23a8d9b56e233045595d7599e4b9b63fa8 Mon Sep 17 00:00:00 2001 From: Shirish Kadam Date: Thu, 25 Oct 2018 01:42:16 +0530 Subject: [PATCH 03/13] BUG: GH23227 Fixed --- pandas/core/groupby/groupby.py | 2 +- pandas/tests/groupby/test_grouping.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index e0446ef5a3ebe..75a05c957c38b 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -759,7 +759,7 @@ def _try_cast(self, result, obj, numeric_only=False): else: dtype = obj.dtype - if is_extension_array_dtype(obj.dtype): + if is_extension_array_dtype(dtype): # The function can return something of any type, so check # if the type is compatible with the calling EA. try: diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 3408fb05cead5..fbfa9548d9664 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -283,6 +283,9 @@ def test_groupby_categorical_index_and_columns(self, observed): assert_frame_equal(result, expected) def test_groupby_extension_array(self): + + # GH23227 + # groupby on an extension array should return the extension array type df = pd.DataFrame({'Int': pd.Series([1, 2, 3], dtype='Int64'), 'A': [1, 2, 1]}) result = df.groupby('A').Int.sum() From 9243612a737925523fd579866a6f5153193aa188 Mon Sep 17 00:00:00 2001 From: Shirish Kadam Date: Sat, 27 Oct 2018 19:49:24 +0530 Subject: [PATCH 04/13] Fixed arrays tests to check dtypes --- pandas/core/groupby/groupby.py | 25 +++++++++++----------- pandas/tests/arrays/test_integer.py | 30 ++++++++++++++++++--------- pandas/tests/groupby/test_grouping.py | 10 --------- pandas/tests/sparse/test_groupby.py | 2 +- 4 files changed, 33 insertions(+), 34 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 75a05c957c38b..5f627b24ef25e 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -759,20 +759,19 @@ def _try_cast(self, result, obj, numeric_only=False): else: dtype = obj.dtype - if is_extension_array_dtype(dtype): - # The function can return something of any type, so check - # if the type is compatible with the calling EA. - try: - result = obj.values._from_sequence(result) - except Exception: - # https://github.com/pandas-dev/pandas/issues/22850 - # pandas has no control over what 3rd-party ExtensionArrays - # do in _values_from_sequence. We still want ops to work - # though, so we catch any regular Exception. - pass - if not is_scalar(result): - if numeric_only and is_numeric_dtype(dtype) or not numeric_only: + if is_extension_array_dtype(dtype): + # The function can return something of any type, so check + # if the type is compatible with the calling EA. + try: + result = obj.values._from_sequence(result) + except Exception: + # https://github.com/pandas-dev/pandas/issues/22850 + # pandas has no control over what 3rd-party ExtensionArrays + # do in _values_from_sequence. We still want ops to work + # though, so we catch any regular Exception. + pass + elif numeric_only and is_numeric_dtype(dtype) or not numeric_only: result = maybe_downcast_to_dtype(result, dtype) return result diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index e6dae0ffaec28..99f6141004a04 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -651,11 +651,9 @@ def test_preserve_dtypes(op): # groupby result = getattr(df.groupby("A"), op)() - expected = pd.DataFrame({ - "B": np.array([1.0, 3.0]), - "C": np.array([1, 3], dtype="int64") - }, index=pd.Index(['a', 'b'], name='A')) - tm.assert_frame_equal(result, expected) + + assert result.dtypes['B'].name == 'float64' + assert result.dtypes['C'].name == 'Int64' @pytest.mark.parametrize('op', ['mean']) @@ -674,11 +672,23 @@ def test_reduce_to_float(op): # groupby result = getattr(df.groupby("A"), op)() - expected = pd.DataFrame({ - "B": np.array([1.0, 3.0]), - "C": np.array([1, 3], dtype="float64") - }, index=pd.Index(['a', 'b'], name='A')) - tm.assert_frame_equal(result, expected) + + assert result.dtypes['B'].name == 'float64' + assert result.dtypes['C'].name == 'Int64' + + +@pytest.mark.parametrize('op', ['sum']) +def test_groupby_extension_array(op): + # GH23227 + # groupby on an extension array should return the extension array type + df = pd.DataFrame({ + 'Int': pd.Series([1, 2, 3], dtype='Int64'), + 'A': [1, 2, 1] + }) + + result = getattr(df.groupby('A').Int, op)() + assert result is not None + assert result.dtype.name == 'Int64' def test_astype_nansafe(): diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index fbfa9548d9664..e7c0881b11871 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -282,16 +282,6 @@ def test_groupby_categorical_index_and_columns(self, observed): expected = DataFrame(data=expected_data.T, index=expected_columns) assert_frame_equal(result, expected) - def test_groupby_extension_array(self): - - # GH23227 - # groupby on an extension array should return the extension array type - df = pd.DataFrame({'Int': pd.Series([1, 2, 3], dtype='Int64'), - 'A': [1, 2, 1]}) - result = df.groupby('A').Int.sum() - assert result is not None - assert result.dtype.name == 'Int64' - def test_grouper_getting_correct_binner(self): # GH 10063 diff --git a/pandas/tests/sparse/test_groupby.py b/pandas/tests/sparse/test_groupby.py index 1d2129312fb1b..e4409b9ae0807 100644 --- a/pandas/tests/sparse/test_groupby.py +++ b/pandas/tests/sparse/test_groupby.py @@ -55,5 +55,5 @@ def test_groupby_includes_fill_value(fill_value): sdf = df.to_sparse(fill_value=fill_value) result = sdf.groupby('a').sum() expected = df.groupby('a').sum() - tm.assert_frame_equal(result, expected, + tm.assert_frame_equal(result, expected.to_sparse(fill_value=fill_value), check_index_type=False) From 14666a1d85406f9da58381402492a36cee88ba32 Mon Sep 17 00:00:00 2001 From: Shirish Kadam Date: Sat, 27 Oct 2018 20:44:17 +0530 Subject: [PATCH 05/13] Fixed sparse test cases --- pandas/core/groupby/groupby.py | 4 +++- pandas/tests/sparse/test_groupby.py | 12 ++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c39654d1d6349..d24e5a76dfc70 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -27,7 +27,9 @@ class providing the base-class of operations. ) from pandas.core.config import option_context from pandas.core.dtypes.cast import maybe_downcast_to_dtype -from pandas.core.dtypes.common import ensure_float, is_numeric_dtype, is_scalar, is_extension_array_dtype +from pandas.core.dtypes.common import ( + ensure_float, is_numeric_dtype, is_scalar, is_extension_array_dtype +) from pandas.core.dtypes.missing import isna, notna from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame diff --git a/pandas/tests/sparse/test_groupby.py b/pandas/tests/sparse/test_groupby.py index e4409b9ae0807..ecc001702ddf7 100644 --- a/pandas/tests/sparse/test_groupby.py +++ b/pandas/tests/sparse/test_groupby.py @@ -26,9 +26,9 @@ def test_first_last_nth(self): # TODO: shouldn't these all be spares or not? tm.assert_frame_equal(sparse_grouped.first(), - dense_grouped.first()) + dense_grouped.first().to_sparse()) tm.assert_frame_equal(sparse_grouped.last(), - dense_grouped.last()) + dense_grouped.last().to_sparse()) tm.assert_frame_equal(sparse_grouped.nth(1), dense_grouped.nth(1).to_sparse()) @@ -36,15 +36,15 @@ def test_aggfuncs(self): sparse_grouped = self.sparse.groupby('A') dense_grouped = self.dense.groupby('A') - tm.assert_frame_equal(sparse_grouped.mean(), - dense_grouped.mean()) + tm.assert_frame_equal(sparse_grouped.mean().to_sparse(), + dense_grouped.mean().to_sparse()) # ToDo: sparse sum includes str column # tm.assert_frame_equal(sparse_grouped.sum(), # dense_grouped.sum()) - tm.assert_frame_equal(sparse_grouped.count(), - dense_grouped.count()) + tm.assert_frame_equal(sparse_grouped.count().to_sparse(), + dense_grouped.count().to_sparse()) @pytest.mark.parametrize("fill_value", [0, np.nan]) From 44287de1b489825feb139504504881677c3c2f1c Mon Sep 17 00:00:00 2001 From: Shirish Kadam Date: Sat, 27 Oct 2018 22:54:21 +0530 Subject: [PATCH 06/13] Fixed test case to re-sample categorical data for timedelta --- pandas/tests/test_resample.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 5cd31e08e0a9b..7e8aaa41a378b 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -1576,6 +1576,7 @@ def test_resample_categorical_data_with_timedeltaindex(self): 'Group': ['A', 'A']}, index=pd.to_timedelta([0, 10], unit='s')) expected = expected.reindex(['Group_obj', 'Group'], axis=1) + expected['Group'] = expected['Group_obj'].astype('category') tm.assert_frame_equal(result, expected) def test_resample_daily_anchored(self): From d54c7438f2a8caf70d8545be2ac17b6aaa5fdac4 Mon Sep 17 00:00:00 2001 From: Shirish Kadam Date: Thu, 1 Nov 2018 18:49:23 +0530 Subject: [PATCH 07/13] test case dataframe assertion added --- pandas/core/groupby/groupby.py | 2 +- pandas/tests/arrays/test_integer.py | 21 +++++---------------- 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index d24e5a76dfc70..e20b1c65f6499 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -28,7 +28,7 @@ class providing the base-class of operations. from pandas.core.config import option_context from pandas.core.dtypes.cast import maybe_downcast_to_dtype from pandas.core.dtypes.common import ( - ensure_float, is_numeric_dtype, is_scalar, is_extension_array_dtype + ensure_float, is_extension_array_dtype, is_numeric_dtype, is_scalar ) from pandas.core.dtypes.missing import isna, notna from pandas.core.frame import DataFrame diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 42d0fc2844c47..05b84f1808536 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -674,22 +674,11 @@ def test_reduce_to_float(op): # groupby result = getattr(df.groupby("A"), op)() - assert result.dtypes['B'].name == 'float64' - assert result.dtypes['C'].name == 'Int64' - - -@pytest.mark.parametrize('op', ['sum']) -def test_groupby_extension_array(op): - # GH23227 - # groupby on an extension array should return the extension array type - df = pd.DataFrame({ - 'Int': pd.Series([1, 2, 3], dtype='Int64'), - 'A': [1, 2, 1] - }) - - result = getattr(df.groupby('A').Int, op)() - assert result is not None - assert result.dtype.name == 'Int64' + expected = pd.DataFrame({ + "B": np.array([1.0, 3.0]), + "C": integer_array([1, 3], dtype="Int64") + }, index=pd.Index(['a', 'b'], name='A')) + tm.assert_frame_equal(result, expected) def test_astype_nansafe(): From 05eb70c3d7bcf233f163befb57524da2efec525a Mon Sep 17 00:00:00 2001 From: Shirish Kadam Date: Thu, 1 Nov 2018 19:57:42 +0530 Subject: [PATCH 08/13] Added whatsnew in v0.24.0 --- doc/source/whatsnew/v0.24.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 768868d585721..bab76a9b12298 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -825,6 +825,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your - :func:`ExtensionArray.isna` is allowed to return an ``ExtensionArray`` (:issue:`22325`). - Support for reduction operations such as ``sum``, ``mean`` via opt-in base class method override (:issue:`22762`) - :meth:`Series.unstack` no longer converts extension arrays to object-dtype ndarrays. The output ``DataFrame`` will now have the same dtype as the input. This changes behavior for Categorical and Sparse data (:issue:`23077`). +- Bug when grouping :meth:`Dataframe.groupby()` on ``ExtensionArray`` not returning the actual ``ExtensionArray`` dtype (:issue:`23227`:). .. _whatsnew_0240.api.incompatibilities: From c1d8416571452e6cb429df102bc4bbe25b1116ff Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 1 Nov 2018 15:25:57 -0700 Subject: [PATCH 09/13] Fixed isort --- pandas/core/groupby/groupby.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 14a3f553fd481..ea7507799fa9a 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -25,8 +25,7 @@ class providing the base-class of operations. from pandas.core.dtypes.cast import maybe_downcast_to_dtype from pandas.core.dtypes.common import ( - ensure_float, is_extension_array_dtype, is_numeric_dtype, is_scalar -) + ensure_float, is_extension_array_dtype, is_numeric_dtype, is_scalar) from pandas.core.dtypes.missing import isna, notna import pandas.core.algorithms as algorithms From fc5d2f296f21d9494ba238b594e9a26a8f5e46b2 Mon Sep 17 00:00:00 2001 From: Shirish Kadam Date: Fri, 2 Nov 2018 14:40:05 +0530 Subject: [PATCH 10/13] Updated whatsnew for 0.24.0 --- doc/source/whatsnew/v0.24.0.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index b625a60ebdc4f..8c7a1f91d052f 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -850,7 +850,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your - :func:`ExtensionArray.isna` is allowed to return an ``ExtensionArray`` (:issue:`22325`). - Support for reduction operations such as ``sum``, ``mean`` via opt-in base class method override (:issue:`22762`) - :meth:`Series.unstack` no longer converts extension arrays to object-dtype ndarrays. The output ``DataFrame`` will now have the same dtype as the input. This changes behavior for Categorical and Sparse data (:issue:`23077`). -- Bug when grouping :meth:`Dataframe.groupby()` on ``ExtensionArray`` not returning the actual ``ExtensionArray`` dtype (:issue:`23227`:). +- Bug when grouping :meth:`Dataframe.groupby()` and aggregating on ``ExtensionArray`` it won't returning the actual ``ExtensionArray`` dtype (:issue:`23227`:). .. _whatsnew_0240.api.incompatibilities: @@ -1085,6 +1085,7 @@ Categorical - Bug when indexing with a boolean-valued ``Categorical``. Now a boolean-valued ``Categorical`` is treated as a boolean mask (:issue:`22665`) - Constructing a :class:`CategoricalIndex` with empty values and boolean categories was raising a ``ValueError`` after a change to dtype coercion (:issue:`22702`). - Bug in :meth:`Categorical.take` with a user-provided ``fill_value`` not encoding the ``fill_value``, which could result in a ``ValueError``, incorrect results, or a segmentation fault (:issue:`23296`). +- Bug when resampling :meth:`Dataframe.resample()` and aggregating on categorical data, the categorical dtype was getting lost. Datetimelike ^^^^^^^^^^^^ From 2859c7004afdccf9e10de21c8dc17546b6ed5bdc Mon Sep 17 00:00:00 2001 From: Shirish Kadam Date: Mon, 5 Nov 2018 12:56:45 +0530 Subject: [PATCH 11/13] Updated whatsnew for 0.24.0, applied suggestions to sparse/test_group --- doc/source/whatsnew/v0.24.0.txt | 4 ++-- pandas/tests/sparse/test_groupby.py | 13 ++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 8c7a1f91d052f..1a046e2bf0715 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -850,7 +850,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your - :func:`ExtensionArray.isna` is allowed to return an ``ExtensionArray`` (:issue:`22325`). - Support for reduction operations such as ``sum``, ``mean`` via opt-in base class method override (:issue:`22762`) - :meth:`Series.unstack` no longer converts extension arrays to object-dtype ndarrays. The output ``DataFrame`` will now have the same dtype as the input. This changes behavior for Categorical and Sparse data (:issue:`23077`). -- Bug when grouping :meth:`Dataframe.groupby()` and aggregating on ``ExtensionArray`` it won't returning the actual ``ExtensionArray`` dtype (:issue:`23227`:). +- Bug when grouping :meth:`Dataframe.groupby()` and aggregating on ``ExtensionArray`` it was not returning the actual ``ExtensionArray`` dtype (:issue:`23227`:). .. _whatsnew_0240.api.incompatibilities: @@ -1085,7 +1085,7 @@ Categorical - Bug when indexing with a boolean-valued ``Categorical``. Now a boolean-valued ``Categorical`` is treated as a boolean mask (:issue:`22665`) - Constructing a :class:`CategoricalIndex` with empty values and boolean categories was raising a ``ValueError`` after a change to dtype coercion (:issue:`22702`). - Bug in :meth:`Categorical.take` with a user-provided ``fill_value`` not encoding the ``fill_value``, which could result in a ``ValueError``, incorrect results, or a segmentation fault (:issue:`23296`). -- Bug when resampling :meth:`Dataframe.resample()` and aggregating on categorical data, the categorical dtype was getting lost. +- Bug when resampling :meth:`Dataframe.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`:) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/tests/sparse/test_groupby.py b/pandas/tests/sparse/test_groupby.py index ecc001702ddf7..ffb9673a37158 100644 --- a/pandas/tests/sparse/test_groupby.py +++ b/pandas/tests/sparse/test_groupby.py @@ -22,15 +22,15 @@ def setup_method(self, method): def test_first_last_nth(self): # tests for first / last / nth sparse_grouped = self.sparse.groupby('A') - dense_grouped = self.dense.groupby('A') + dense_grouped = self.dense.to_sparse().groupby('A') # TODO: shouldn't these all be spares or not? tm.assert_frame_equal(sparse_grouped.first(), - dense_grouped.first().to_sparse()) + dense_grouped.first()) tm.assert_frame_equal(sparse_grouped.last(), - dense_grouped.last().to_sparse()) + dense_grouped.last()) tm.assert_frame_equal(sparse_grouped.nth(1), - dense_grouped.nth(1).to_sparse()) + dense_grouped.nth(1)) def test_aggfuncs(self): sparse_grouped = self.sparse.groupby('A') @@ -54,6 +54,5 @@ def test_groupby_includes_fill_value(fill_value): 'b': [fill_value, 1, fill_value, fill_value]}) sdf = df.to_sparse(fill_value=fill_value) result = sdf.groupby('a').sum() - expected = df.groupby('a').sum() - tm.assert_frame_equal(result, expected.to_sparse(fill_value=fill_value), - check_index_type=False) + expected = df.groupby('a').sum().to_sparse(fill_value=fill_value) + tm.assert_frame_equal(result, expected, check_index_type=False) From 07ffcc718a482651135819c1d1ad89d4fdfb5199 Mon Sep 17 00:00:00 2001 From: Shirish Kadam Date: Mon, 5 Nov 2018 19:11:53 +0530 Subject: [PATCH 12/13] Updated whatsnew for 0.24.0, applied suggestions to sparse/test_groupby and arrays/test_integer --- doc/source/whatsnew/v0.24.0.txt | 4 ++-- pandas/tests/arrays/test_integer.py | 7 +++++-- pandas/tests/sparse/test_groupby.py | 22 +++++++++++++++------- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 1a046e2bf0715..62069dfcb2262 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -850,7 +850,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your - :func:`ExtensionArray.isna` is allowed to return an ``ExtensionArray`` (:issue:`22325`). - Support for reduction operations such as ``sum``, ``mean`` via opt-in base class method override (:issue:`22762`) - :meth:`Series.unstack` no longer converts extension arrays to object-dtype ndarrays. The output ``DataFrame`` will now have the same dtype as the input. This changes behavior for Categorical and Sparse data (:issue:`23077`). -- Bug when grouping :meth:`Dataframe.groupby()` and aggregating on ``ExtensionArray`` it was not returning the actual ``ExtensionArray`` dtype (:issue:`23227`:). +- Bug when grouping :meth:`Dataframe.groupby()` and aggregating on ``ExtensionArray`` it was not returning the actual ``ExtensionArray`` dtype (:issue:`23227`). .. _whatsnew_0240.api.incompatibilities: @@ -1085,7 +1085,7 @@ Categorical - Bug when indexing with a boolean-valued ``Categorical``. Now a boolean-valued ``Categorical`` is treated as a boolean mask (:issue:`22665`) - Constructing a :class:`CategoricalIndex` with empty values and boolean categories was raising a ``ValueError`` after a change to dtype coercion (:issue:`22702`). - Bug in :meth:`Categorical.take` with a user-provided ``fill_value`` not encoding the ``fill_value``, which could result in a ``ValueError``, incorrect results, or a segmentation fault (:issue:`23296`). -- Bug when resampling :meth:`Dataframe.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`:) +- Bug when resampling :meth:`Dataframe.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 2bc0e982c2227..24bc8ffe2e5a5 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -651,8 +651,11 @@ def test_preserve_dtypes(op): # groupby result = getattr(df.groupby("A"), op)() - assert result.dtypes['B'].name == 'float64' - assert result.dtypes['C'].name == 'Int64' + expected = pd.DataFrame({ + "B": np.array([1.0, 3.0]), + "C": integer_array([1, 3], dtype="Int64") + }, index=pd.Index(['a', 'b'], name='A')) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize('op', ['mean']) diff --git a/pandas/tests/sparse/test_groupby.py b/pandas/tests/sparse/test_groupby.py index ffb9673a37158..61d7ba4cbade0 100644 --- a/pandas/tests/sparse/test_groupby.py +++ b/pandas/tests/sparse/test_groupby.py @@ -22,15 +22,23 @@ def setup_method(self, method): def test_first_last_nth(self): # tests for first / last / nth sparse_grouped = self.sparse.groupby('A') - dense_grouped = self.dense.to_sparse().groupby('A') + dense_grouped = self.dense.groupby('A') + + sparse_grouped_first = sparse_grouped.first() + sparse_grouped_last = sparse_grouped.last() + sparse_grouped_nth = sparse_grouped.nth(1) + + dense_grouped_first = dense_grouped.first().to_sparse() + dense_grouped_last = dense_grouped.last().to_sparse() + dense_grouped_nth = dense_grouped.nth(1).to_sparse() # TODO: shouldn't these all be spares or not? - tm.assert_frame_equal(sparse_grouped.first(), - dense_grouped.first()) - tm.assert_frame_equal(sparse_grouped.last(), - dense_grouped.last()) - tm.assert_frame_equal(sparse_grouped.nth(1), - dense_grouped.nth(1)) + tm.assert_frame_equal(sparse_grouped_first, + dense_grouped_first) + tm.assert_frame_equal(sparse_grouped_last, + dense_grouped_last) + tm.assert_frame_equal(sparse_grouped_nth, + dense_grouped_nth) def test_aggfuncs(self): sparse_grouped = self.sparse.groupby('A') From 2487fd1b028fc87396ac0eb3eba2ef4dcccf39d1 Mon Sep 17 00:00:00 2001 From: Shirish Kadam Date: Mon, 5 Nov 2018 19:24:15 +0530 Subject: [PATCH 13/13] Applied suggestions to sparse/test_groupby --- pandas/tests/sparse/test_groupby.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/tests/sparse/test_groupby.py b/pandas/tests/sparse/test_groupby.py index 61d7ba4cbade0..d0ff2a02c4046 100644 --- a/pandas/tests/sparse/test_groupby.py +++ b/pandas/tests/sparse/test_groupby.py @@ -44,15 +44,19 @@ def test_aggfuncs(self): sparse_grouped = self.sparse.groupby('A') dense_grouped = self.dense.groupby('A') - tm.assert_frame_equal(sparse_grouped.mean().to_sparse(), - dense_grouped.mean().to_sparse()) + result = sparse_grouped.mean().to_sparse() + expected = dense_grouped.mean().to_sparse() + + tm.assert_frame_equal(result, expected) # ToDo: sparse sum includes str column # tm.assert_frame_equal(sparse_grouped.sum(), # dense_grouped.sum()) - tm.assert_frame_equal(sparse_grouped.count().to_sparse(), - dense_grouped.count().to_sparse()) + result = sparse_grouped.count().to_sparse() + expected = dense_grouped.count().to_sparse() + + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("fill_value", [0, np.nan])