From dd7919818077418b3591544f193cda77e3c302e5 Mon Sep 17 00:00:00 2001 From: Mauro Silberberg Date: Wed, 8 Dec 2021 15:09:12 -0300 Subject: [PATCH 1/3] BUG: iterating on a subset of columns in a GroupBy object (#44821) Fixes issue #44821. When trying to iterate on a subset of columns in a GroupBy object, it returned all columns, instead of the selected subset. GroupBy.__iter__ used self.obj instead of self._selected_obj (see PR #6570). --- pandas/core/groupby/groupby.py | 2 +- pandas/tests/groupby/test_indexing.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index e11d420ada29f..c5cd84fd00d13 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -758,7 +758,7 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: Generator yielding sequence of (name, subsetted object) for each group """ - return self.grouper.get_iterator(self.obj, axis=self.axis) + return self.grouper.get_iterator(self._selected_obj, axis=self.axis) # To track operations that expand dimensions, like ohlc diff --git a/pandas/tests/groupby/test_indexing.py b/pandas/tests/groupby/test_indexing.py index b9f71fd4ed96a..e45a7b652376e 100644 --- a/pandas/tests/groupby/test_indexing.py +++ b/pandas/tests/groupby/test_indexing.py @@ -285,3 +285,14 @@ def test_column_axis(column_group_df): expected = column_group_df.iloc[:, [1, 3]] tm.assert_frame_equal(result, expected) + + +def test_columns_on_iter(): + # GitHub issue #44821 + df = pd.DataFrame({k: range(10) for k in "ABC"}) + + # Group-by and select columns + cols = ["A", "B"] + for _, dg in df.groupby(df.A < 4)[cols]: + tm.assert_index_equal(dg.columns, pd.Index(cols)) + assert "C" not in dg.columns From 7f8005defc6b512cf5915d0d559734cf71244047 Mon Sep 17 00:00:00 2001 From: Mauro Silberberg Date: Sun, 19 Dec 2021 16:42:08 -0300 Subject: [PATCH 2/3] BUG: fix test in rolling groupby (#44821) Fixes test due to changes in GroupBy.__iter__ (see #44821). As the column `c` wasn't selected on the manual computation, it failed when trying to set it as an index. --- pandas/tests/window/test_base_indexer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index a7ad409683ec8..5593aa8351c69 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -437,7 +437,7 @@ def test_rolling_groupby_with_fixed_forward_many(group_keys, window_size): result = df.groupby("a")["b"].rolling(window=indexer, min_periods=1).sum() result.index.names = ["a", "c"] - groups = df.groupby("a")[["a", "b"]] + groups = df.groupby("a")[["a", "b", "c"]] manual = concat( [ g.assign( From 832de29326ef8c02b1a774b30e43518644e93de6 Mon Sep 17 00:00:00 2001 From: Mauro Silberberg Date: Mon, 20 Dec 2021 12:28:46 -0300 Subject: [PATCH 3/3] Updated whatsnew 1.4.0 --- doc/source/whatsnew/v1.4.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index caf3a4281561f..f98234b66646d 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -797,6 +797,7 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.nth` failing on ``axis=1`` (:issue:`43926`) - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`3944`) - Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` when using a :class:`pandas.api.indexers.BaseIndexer` subclass that returned unequal start and end arrays would segfault instead of raising a ``ValueError`` (:issue:`44470`) +- Fixed bug in :meth:`GroupBy.__iter__` after selecting a subset of columns in a :class:`GroupBy` object, which returned all columns instead of the chosen subset (:issue:`#44821`). Reshaping ^^^^^^^^^