Skip to content

Commit 30013ee

Browse files
author
Kei
committed
Move calculation of observed grouper to when initialising groupby
1 parent baa1b28 commit 30013ee

File tree

2 files changed

+26
-36
lines changed

2 files changed

+26
-36
lines changed

pandas/core/groupby/generic.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2040,12 +2040,13 @@ def _gotitem(self, key, ndim: int, subset=None):
20402040
group_keys=self.group_keys,
20412041
observed=self.observed,
20422042
dropna=self.dropna,
2043+
observed_grouper=self.observed_grouper,
2044+
observed_exclusions=self.observed_exclusions,
20432045
)
20442046
elif ndim == 1:
20452047
if subset is None:
20462048
subset = self.obj[key]
20472049

2048-
orig_obj = self.orig_obj if not self.observed else None
20492050
return SeriesGroupBy(
20502051
subset,
20512052
self.keys,
@@ -2058,7 +2059,8 @@ def _gotitem(self, key, ndim: int, subset=None):
20582059
group_keys=self.group_keys,
20592060
observed=self.observed,
20602061
dropna=self.dropna,
2061-
orig_obj=orig_obj,
2062+
observed_grouper=self.observed_grouper,
2063+
observed_exclusions=self.observed_exclusions,
20622064
)
20632065

20642066
raise AssertionError("invalid ndim for _gotitem")

pandas/core/groupby/groupby.py

Lines changed: 22 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -586,9 +586,10 @@ class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin):
586586
"keys",
587587
"level",
588588
"obj",
589-
"orig_obj",
590589
"observed",
591590
"sort",
591+
"observed_grouper",
592+
"observed_exclusions",
592593
}
593594

594595
_grouper: ops.BaseGrouper
@@ -1107,7 +1108,8 @@ def __init__(
11071108
group_keys: bool = True,
11081109
observed: bool = False,
11091110
dropna: bool = True,
1110-
orig_obj: NDFrameT | None = None,
1111+
observed_grouper: ops.BaseGrouper | None = None,
1112+
observed_exclusions: frozenset[Hashable] | None = None,
11111113
) -> None:
11121114
self._selection = selection
11131115

@@ -1119,8 +1121,8 @@ def __init__(
11191121
self.sort = sort
11201122
self.group_keys = group_keys
11211123
self.dropna = dropna
1122-
self.orig_obj = obj if orig_obj is None else orig_obj
11231124

1125+
orig_obj = obj
11241126
if grouper is None:
11251127
grouper, exclusions, obj = get_grouper(
11261128
obj,
@@ -1136,6 +1138,21 @@ def __init__(
11361138
self._grouper = grouper
11371139
self.exclusions = frozenset(exclusions) if exclusions else frozenset()
11381140

1141+
if not observed and observed_grouper is None:
1142+
observed_grouper, observed_exclusions, _ = get_grouper(
1143+
orig_obj,
1144+
self.keys,
1145+
level=self.level,
1146+
sort=self.sort,
1147+
observed=True,
1148+
dropna=self.dropna,
1149+
)
1150+
1151+
self.observed_grouper = observed_grouper
1152+
self.observed_exclusions = (
1153+
frozenset(observed_exclusions) if observed_exclusions else frozenset()
1154+
)
1155+
11391156
def __getattr__(self, attr: str):
11401157
if attr in self._internal_names_set:
11411158
return object.__getattribute__(self, attr)
@@ -1887,44 +1904,15 @@ def _transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
18871904
func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs
18881905
)
18891906

1890-
grouper, exclusions, obj = get_grouper(
1891-
self.orig_obj,
1892-
self.keys,
1893-
level=self.level,
1894-
sort=self.sort,
1895-
observed=True,
1896-
dropna=self.dropna,
1897-
)
1898-
exclusions = frozenset(exclusions) if exclusions else frozenset()
1899-
obj_has_not_changed = self.orig_obj.equals(self.obj)
1900-
19011907
with (
19021908
com.temp_setattr(self, "observed", True),
1903-
com.temp_setattr(self, "_grouper", grouper),
1904-
com.temp_setattr(self, "exclusions", exclusions),
1905-
com.temp_setattr(self, "obj", obj, condition=obj_has_not_changed),
1909+
com.temp_setattr(self, "_grouper", self.observed_grouper),
1910+
com.temp_setattr(self, "exclusions", self.observed_exclusions),
19061911
):
19071912
return self._reduction_kernel_transform(
19081913
func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs
19091914
)
19101915

1911-
# with com.temp_setattr(self, "as_index", True):
1912-
# # GH#49834 - result needs groups in the index for
1913-
# # _wrap_transform_fast_result
1914-
# if func in ["idxmin", "idxmax"]:
1915-
# func = cast(Literal["idxmin", "idxmax"], func)
1916-
# result = self._idxmax_idxmin(func, True, *args, **kwargs)
1917-
# else:
1918-
# if engine is not None:
1919-
# kwargs["engine"] = engine
1920-
# kwargs["engine_kwargs"] = engine_kwargs
1921-
# result = getattr(self, func)(*args, **kwargs)
1922-
1923-
# print("result with observed = False\n", result.to_string())
1924-
# r = self._wrap_transform_fast_result(result)
1925-
# print("reindexed result", r.to_string())
1926-
# return r
1927-
19281916
@final
19291917
def _reduction_kernel_transform(
19301918
self, func, *args, engine=None, engine_kwargs=None, **kwargs

0 commit comments

Comments
 (0)