Skip to content

Commit 5f27fb7

Browse files
committed
CLN/TYP: Groupby agg methods
1 parent 1722c05 commit 5f27fb7

File tree

2 files changed

+92
-76
lines changed

2 files changed

+92
-76
lines changed

pandas/core/groupby/groupby.py

Lines changed: 89 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,7 @@ class providing the base-class of operations.
3535

3636
from pandas._libs import Timestamp
3737
import pandas._libs.groupby as libgroupby
38-
from pandas._typing import FrameOrSeries, Scalar
39-
from pandas.compat import set_function_name
38+
from pandas._typing import AnyArrayLike, FrameOrSeries, Scalar
4039
from pandas.compat.numpy import function as nv
4140
from pandas.errors import AbstractMethodError
4241
from pandas.util._decorators import Appender, Substitution, cache_readonly, doc
@@ -192,6 +191,22 @@ class providing the base-class of operations.
192191
""",
193192
)
194193

194+
_groupby_agg_method_template = """
195+
Compute {fname} of group values.
196+
Parameters
197+
----------
198+
numeric_only : bool, default {no}
199+
Include only float, int, boolean columns. If None, will attempt to use
200+
everything, then use only numeric data.
201+
min_count : int, default {mc}
202+
The required number of valid values to perform the operation. If fewer
203+
than ``min_count`` non-NA values are present the result will be NA.
204+
Returns
205+
-------
206+
Series or DataFrame
207+
Computed {fname} of values within each group.
208+
"""
209+
195210
_pipe_template = """
196211
Apply a function `func` with arguments to this %(klass)s object and return
197212
the function's result.
@@ -945,6 +960,37 @@ def _wrap_transformed_output(self, output: Mapping[base.OutputKey, np.ndarray]):
945960
def _wrap_applied_output(self, keys, values, not_indexed_same: bool = False):
946961
raise AbstractMethodError(self)
947962

963+
def _agg_general(
964+
self,
965+
numeric_only: bool = True,
966+
min_count: int = -1,
967+
*,
968+
alias: str,
969+
npfunc: Callable,
970+
):
971+
self._set_group_selection()
972+
973+
# try a cython aggregation if we can
974+
try:
975+
return self._cython_agg_general(
976+
how=alias, alt=npfunc, numeric_only=numeric_only, min_count=min_count,
977+
)
978+
except DataError:
979+
pass
980+
except NotImplementedError as err:
981+
if "function is not implemented for this dtype" in str(
982+
err
983+
) or "category dtype not supported" in str(err):
984+
# raised in _get_cython_function, in some cases can
985+
# be trimmed by implementing cython funcs for more dtypes
986+
pass
987+
else:
988+
raise
989+
990+
# apply a non-cython aggregation
991+
result = self.aggregate(lambda x: npfunc(x, axis=self.axis))
992+
return result
993+
948994
def _cython_agg_general(
949995
self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
950996
):
@@ -1438,73 +1484,35 @@ def size(self):
14381484
result = self._obj_1d_constructor(result)
14391485
return self._reindex_output(result, fill_value=0)
14401486

1441-
@classmethod
1442-
def _add_numeric_operations(cls):
1443-
"""
1444-
Add numeric operations to the GroupBy generically.
1445-
"""
1446-
1447-
def groupby_function(
1448-
name: str,
1449-
alias: str,
1450-
npfunc,
1451-
numeric_only: bool = True,
1452-
min_count: int = -1,
1453-
):
1487+
@doc(_groupby_agg_method_template, fname="sum", no=True, mc=0)
1488+
def sum(self, numeric_only: bool = True, min_count: int = 0):
1489+
return self._agg_general(
1490+
numeric_only=numeric_only, min_count=min_count, alias="add", npfunc=np.sum
1491+
)
14541492

1455-
_local_template = """
1456-
Compute %(f)s of group values.
1457-
1458-
Parameters
1459-
----------
1460-
numeric_only : bool, default %(no)s
1461-
Include only float, int, boolean columns. If None, will attempt to use
1462-
everything, then use only numeric data.
1463-
min_count : int, default %(mc)s
1464-
The required number of valid values to perform the operation. If fewer
1465-
than ``min_count`` non-NA values are present the result will be NA.
1466-
1467-
Returns
1468-
-------
1469-
Series or DataFrame
1470-
Computed %(f)s of values within each group.
1471-
"""
1493+
@doc(_groupby_agg_method_template, fname="prod", no=True, mc=0)
1494+
def prod(self, numeric_only: bool = True, min_count: int = 0):
1495+
return self._agg_general(
1496+
numeric_only=numeric_only, min_count=min_count, alias="prod", npfunc=np.prod
1497+
)
14721498

1473-
@Substitution(name="groupby", f=name, no=numeric_only, mc=min_count)
1474-
@Appender(_common_see_also)
1475-
@Appender(_local_template)
1476-
def func(self, numeric_only=numeric_only, min_count=min_count):
1477-
self._set_group_selection()
1499+
@doc(_groupby_agg_method_template, fname="min", no=False, mc=-1)
1500+
def min(self, numeric_only: bool = False, min_count: int = -1):
1501+
return self._agg_general(
1502+
numeric_only=numeric_only, min_count=min_count, alias="min", npfunc=np.min
1503+
)
14781504

1479-
# try a cython aggregation if we can
1480-
try:
1481-
return self._cython_agg_general(
1482-
how=alias,
1483-
alt=npfunc,
1484-
numeric_only=numeric_only,
1485-
min_count=min_count,
1486-
)
1487-
except DataError:
1488-
pass
1489-
except NotImplementedError as err:
1490-
if "function is not implemented for this dtype" in str(
1491-
err
1492-
) or "category dtype not supported" in str(err):
1493-
# raised in _get_cython_function, in some cases can
1494-
# be trimmed by implementing cython funcs for more dtypes
1495-
pass
1496-
else:
1497-
raise
1498-
1499-
# apply a non-cython aggregation
1500-
result = self.aggregate(lambda x: npfunc(x, axis=self.axis))
1501-
return result
1502-
1503-
set_function_name(func, name, cls)
1504-
1505-
return func
1505+
@doc(_groupby_agg_method_template, fname="max", no=False, mc=-1)
1506+
def max(self, numeric_only: bool = False, min_count: int = -1):
1507+
return self._agg_general(
1508+
numeric_only=numeric_only, min_count=min_count, alias="max", npfunc=np.max
1509+
)
15061510

1511+
@doc(_groupby_agg_method_template, fname="first", no=False, mc=-1)
1512+
def first(self, numeric_only: bool = False, min_count: int = -1):
15071513
def first_compat(obj: FrameOrSeries, axis: int = 0):
1514+
"""Helper function for first item that isn't NA.
1515+
"""
15081516
def first(x: Series):
15091517
x = x.array[notna(x.array)]
15101518
if len(x) == 0:
@@ -1518,7 +1526,18 @@ def first(x: Series):
15181526
else:
15191527
raise TypeError(type(obj))
15201528

1529+
return self._agg_general(
1530+
numeric_only=numeric_only,
1531+
min_count=min_count,
1532+
alias="first",
1533+
npfunc=first_compat,
1534+
)
1535+
1536+
@doc(_groupby_agg_method_template, fname="last", no=False, mc=-1)
1537+
def last(self, numeric_only: bool = False, min_count: int = -1):
15211538
def last_compat(obj: FrameOrSeries, axis: int = 0):
1539+
"""Helper function for last item that isn't NA.
1540+
"""
15221541
def last(x: Series):
15231542
x = x.array[notna(x.array)]
15241543
if len(x) == 0:
@@ -1532,12 +1551,12 @@ def last(x: Series):
15321551
else:
15331552
raise TypeError(type(obj))
15341553

1535-
cls.sum = groupby_function("sum", "add", np.sum, min_count=0)
1536-
cls.prod = groupby_function("prod", "prod", np.prod, min_count=0)
1537-
cls.min = groupby_function("min", "min", np.min, numeric_only=False)
1538-
cls.max = groupby_function("max", "max", np.max, numeric_only=False)
1539-
cls.first = groupby_function("first", "first", first_compat, numeric_only=False)
1540-
cls.last = groupby_function("last", "last", last_compat, numeric_only=False)
1554+
return self._agg_general(
1555+
numeric_only=numeric_only,
1556+
min_count=min_count,
1557+
alias="last",
1558+
npfunc=last_compat,
1559+
)
15411560

15421561
@Substitution(name="groupby")
15431562
@Appender(_common_see_also)
@@ -2637,9 +2656,6 @@ def _reindex_output(
26372656
return output.reset_index(drop=True)
26382657

26392658

2640-
GroupBy._add_numeric_operations()
2641-
2642-
26432659
@doc(GroupBy)
26442660
def get_groupby(
26452661
obj: NDFrame,

pandas/util/_decorators.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]:
329329
return decorate
330330

331331

332-
def doc(*args: Union[str, Callable], **kwargs: str) -> Callable[[F], F]:
332+
def doc(*args: Union[str, Callable], **kwargs) -> Callable[[F], F]:
333333
"""
334334
A decorator take docstring templates, concatenate them and perform string
335335
substitution on it.
@@ -345,8 +345,8 @@ def doc(*args: Union[str, Callable], **kwargs: str) -> Callable[[F], F]:
345345
*args : str or callable
346346
The string / docstring / docstring template to be appended in order
347347
after default docstring under function.
348-
**kwargs : str
349-
The string which would be used to format docstring template.
348+
**kwargs
349+
The objects which would be used to format docstring template.
350350
"""
351351

352352
def decorator(func: F) -> F:

0 commit comments

Comments
 (0)