Skip to content

Commit 05c9a74

Browse files
committed
Merge pull request #5604 from gdraps/groupby-dispatch-part-2
Expand groupby dispatch whitelist (GH5480)
2 parents b8c6756 + b5a07bc commit 05c9a74

File tree

2 files changed

+96
-15
lines changed

2 files changed

+96
-15
lines changed

pandas/core/groupby.py

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,30 @@
5050
# forwarding methods from NDFrames
5151
_plotting_methods = frozenset(['plot', 'boxplot', 'hist'])
5252

53-
_apply_whitelist = frozenset(['last', 'first',
54-
'mean', 'sum', 'min', 'max',
55-
'cumsum', 'cumprod', 'cummin', 'cummax',
56-
'resample',
57-
'describe',
58-
'rank', 'quantile', 'count',
59-
'fillna', 'dtype']) | _plotting_methods
53+
_common_apply_whitelist = frozenset([
54+
'last', 'first',
55+
'head', 'tail', 'median',
56+
'mean', 'sum', 'min', 'max',
57+
'cumsum', 'cumprod', 'cummin', 'cummax', 'cumcount',
58+
'resample',
59+
'describe',
60+
'rank', 'quantile', 'count',
61+
'fillna',
62+
'mad',
63+
'any', 'all',
64+
'irow', 'take',
65+
'shift', 'tshift',
66+
'ffill', 'bfill',
67+
'pct_change', 'skew',
68+
'corr', 'cov',
69+
]) | _plotting_methods
70+
71+
_series_apply_whitelist = \
72+
(_common_apply_whitelist - set(['boxplot'])) | \
73+
frozenset(['dtype', 'value_counts'])
74+
75+
_dataframe_apply_whitelist = \
76+
_common_apply_whitelist | frozenset(['dtypes', 'corrwith'])
6077

6178

6279
class GroupByError(Exception):
@@ -185,6 +202,7 @@ class GroupBy(PandasObject):
185202
len(grouped) : int
186203
Number of groups
187204
"""
205+
_apply_whitelist = _common_apply_whitelist
188206

189207
def __init__(self, obj, keys=None, axis=0, level=None,
190208
grouper=None, exclusions=None, selection=None, as_index=True,
@@ -252,7 +270,7 @@ def _selection_list(self):
252270
return self._selection
253271

254272
def _local_dir(self):
255-
return sorted(set(self.obj._local_dir() + list(_apply_whitelist)))
273+
return sorted(set(self.obj._local_dir() + list(self._apply_whitelist)))
256274

257275
def __getattr__(self, attr):
258276
if attr in self.obj:
@@ -268,7 +286,7 @@ def __getitem__(self, key):
268286
raise NotImplementedError
269287

270288
def _make_wrapper(self, name):
271-
if name not in _apply_whitelist:
289+
if name not in self._apply_whitelist:
272290
is_callable = callable(getattr(self.obj, name, None))
273291
kind = ' callable ' if is_callable else ' '
274292
msg = ("Cannot access{0}attribute {1!r} of {2!r} objects, try "
@@ -1605,6 +1623,7 @@ def _convert_grouper(axis, grouper):
16051623

16061624

16071625
class SeriesGroupBy(GroupBy):
1626+
_apply_whitelist = _series_apply_whitelist
16081627

16091628
def aggregate(self, func_or_funcs, *args, **kwargs):
16101629
"""
@@ -2401,6 +2420,7 @@ def add_indices():
24012420

24022421

24032422
class DataFrameGroupBy(NDFrameGroupBy):
2423+
_apply_whitelist = _dataframe_apply_whitelist
24042424

24052425
_block_agg_axis = 1
24062426

pandas/tests/test_groupby.py

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3221,10 +3221,67 @@ def test_groupby_whitelist(self):
32213221
'letters': Series(random_letters)})
32223222
s = df.floats
32233223

3224-
blacklist = ['eval', 'query', 'abs', 'shift', 'tshift', 'where',
3225-
'mask', 'align', 'groupby', 'clip', 'astype',
3226-
'at', 'combine', 'consolidate', 'convert_objects',
3227-
'corr', 'corr_with', 'cov']
3224+
df_whitelist = frozenset([
3225+
'last', 'first',
3226+
'mean', 'sum', 'min', 'max',
3227+
'head', 'tail',
3228+
'cumsum', 'cumprod', 'cummin', 'cummax', 'cumcount',
3229+
'resample',
3230+
'describe',
3231+
'rank', 'quantile', 'count',
3232+
'fillna',
3233+
'mad',
3234+
'any', 'all',
3235+
'irow', 'take',
3236+
'shift', 'tshift',
3237+
'ffill', 'bfill',
3238+
'pct_change', 'skew',
3239+
'plot', 'boxplot', 'hist',
3240+
'median', 'dtypes',
3241+
'corrwith', 'corr', 'cov',
3242+
])
3243+
s_whitelist = frozenset([
3244+
'last', 'first',
3245+
'mean', 'sum', 'min', 'max',
3246+
'head', 'tail',
3247+
'cumsum', 'cumprod', 'cummin', 'cummax', 'cumcount',
3248+
'resample',
3249+
'describe',
3250+
'rank', 'quantile', 'count',
3251+
'fillna',
3252+
'mad',
3253+
'any', 'all',
3254+
'irow', 'take',
3255+
'shift', 'tshift',
3256+
'ffill', 'bfill',
3257+
'pct_change', 'skew',
3258+
'plot', 'hist',
3259+
'median', 'dtype',
3260+
'corr', 'cov',
3261+
'value_counts',
3262+
])
3263+
3264+
for obj, whitelist in zip((df, s),
3265+
(df_whitelist, s_whitelist)):
3266+
gb = obj.groupby(df.letters)
3267+
self.assertEqual(whitelist, gb._apply_whitelist)
3268+
for m in whitelist:
3269+
getattr(gb, m)
3270+
3271+
def test_groupby_blacklist(self):
3272+
from string import ascii_lowercase
3273+
letters = np.array(list(ascii_lowercase))
3274+
N = 10
3275+
random_letters = letters.take(np.random.randint(0, 26, N))
3276+
df = DataFrame({'floats': N / 10 * Series(np.random.random(N)),
3277+
'letters': Series(random_letters)})
3278+
s = df.floats
3279+
3280+
blacklist = [
3281+
'eval', 'query', 'abs', 'where',
3282+
'mask', 'align', 'groupby', 'clip', 'astype',
3283+
'at', 'combine', 'consolidate', 'convert_objects',
3284+
]
32283285
to_methods = [method for method in dir(df) if method.startswith('to_')]
32293286

32303287
blacklist.extend(to_methods)
@@ -3319,8 +3376,12 @@ def test_tab_completion(self):
33193376
'groups','hist','indices','last','max','mean','median',
33203377
'min','name','ngroups','nth','ohlc','plot', 'prod',
33213378
'size','std','sum','transform','var', 'count', 'head', 'describe',
3322-
'cummax', 'dtype', 'quantile', 'rank', 'cumprod', 'tail',
3323-
'resample', 'cummin', 'fillna', 'cumsum', 'cumcount'])
3379+
'cummax', 'quantile', 'rank', 'cumprod', 'tail',
3380+
'resample', 'cummin', 'fillna', 'cumsum', 'cumcount',
3381+
'all', 'shift', 'skew', 'bfill', 'irow', 'ffill',
3382+
'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith',
3383+
'cov', 'dtypes',
3384+
])
33243385
self.assertEqual(results, expected)
33253386

33263387
def assert_fp_equal(a, b):

0 commit comments

Comments
 (0)