Skip to content

Commit ccf9519

Browse files
authored
BUG: Sampling over selected groupbys does not reflect the selection (#40039)
1 parent bff366a commit ccf9519

File tree

3 files changed

+13
-1
lines changed

3 files changed

+13
-1
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,7 @@ Groupby/resample/rolling
514514
- Bug in :meth:`DataFrameGroupBy.apply` where a :class:`MultiIndex` would be created instead of an :class:`Index` if a :class:`:meth:`core.window.rolling.RollingGroupby` object was created (:issue:`39732`)
515515
- Bug in :meth:`DataFrameGroupBy.sample` where error was raised when ``weights`` was specified and the index was an :class:`Int64Index` (:issue:`39927`)
516516
- Bug in :meth:`DataFrameGroupBy.aggregate` and :meth:`.Resampler.aggregate` would sometimes raise ``SpecificationError`` when passed a dictionary and columns were missing; will now always raise a ``KeyError`` instead (:issue:`40004`)
517+
- Bug in :meth:`DataFrameGroupBy.sample` where column selection was not applied to sample result (:issue:`39928`)
517518
-
518519

519520
Reshaping

pandas/core/groupby/groupby.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -3098,11 +3098,12 @@ def sample(
30983098
if random_state is not None:
30993099
random_state = com.random_state(random_state)
31003100

3101+
group_iterator = self.grouper.get_iterator(self._selected_obj, self.axis)
31013102
samples = [
31023103
obj.sample(
31033104
n=n, frac=frac, replace=replace, weights=w, random_state=random_state
31043105
)
3105-
for (_, obj), w in zip(self, ws)
3106+
for (_, obj), w in zip(group_iterator, ws)
31063107
]
31073108

31083109
return concat(samples, axis=self.axis)

pandas/tests/groupby/test_sample.py

+10
Original file line numberDiff line numberDiff line change
@@ -132,3 +132,13 @@ def test_groupby_sample_with_weights(index, expected_index):
132132
result = df.groupby("a")["b"].sample(n=2, replace=True, weights=[1, 0, 1, 0])
133133
expected = Series(values, name="b", index=Index(expected_index))
134134
tm.assert_series_equal(result, expected)
135+
136+
137+
def test_groupby_sample_with_selections():
138+
# GH 39928
139+
values = [1] * 10 + [2] * 10
140+
df = DataFrame({"a": values, "b": values, "c": values})
141+
142+
result = df.groupby("a")[["b", "c"]].sample(n=None, frac=None)
143+
expected = DataFrame({"b": [1, 2], "c": [1, 2]}, index=result.index)
144+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)