-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
WIP: ENH: pivot/groupby index with nan #12607
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4132,7 +4132,7 @@ def clip_lower(self, threshold, axis=None): | |
return self.where(subset, threshold, axis=axis) | ||
|
||
def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, | ||
group_keys=True, squeeze=False, **kwargs): | ||
group_keys=True, squeeze=False, dropna=True, **kwargs): | ||
""" | ||
Group series using mapper (dict or key function, apply given function | ||
to group, return result as series) or by a series of columns. | ||
|
@@ -4164,6 +4164,10 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, | |
squeeze : boolean, default False | ||
reduce the dimensionality of the return type if possible, | ||
otherwise return a consistent type | ||
dropna : boolean, default True | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. versionadded tag There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
drop NaN in the grouping values | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 0.20.0 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
.. versionadded:: 0.20.0 | ||
|
||
Examples | ||
-------- | ||
|
@@ -4188,7 +4192,7 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, | |
axis = self._get_axis_number(axis) | ||
return groupby(self, by=by, axis=axis, level=level, as_index=as_index, | ||
sort=sort, group_keys=group_keys, squeeze=squeeze, | ||
**kwargs) | ||
dropna=dropna, **kwargs) | ||
|
||
def asfreq(self, freq, method=None, how=None, normalize=False, | ||
fill_value=None): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -359,8 +359,8 @@ class _GroupBy(PandasObject, SelectionMixin): | |
|
||
def __init__(self, obj, keys=None, axis=0, level=None, | ||
grouper=None, exclusions=None, selection=None, as_index=True, | ||
sort=True, group_keys=True, squeeze=False, **kwargs): | ||
|
||
sort=True, group_keys=True, squeeze=False, dropna=True, | ||
**kwargs): | ||
self._selection = selection | ||
|
||
if isinstance(obj, NDFrame): | ||
|
@@ -380,13 +380,15 @@ def __init__(self, obj, keys=None, axis=0, level=None, | |
self.group_keys = group_keys | ||
self.squeeze = squeeze | ||
self.mutated = kwargs.pop('mutated', False) | ||
self.dropna = dropna | ||
|
||
if grouper is None: | ||
grouper, exclusions, obj = _get_grouper(obj, keys, | ||
axis=axis, | ||
level=level, | ||
sort=sort, | ||
mutated=self.mutated) | ||
mutated=self.mutated, | ||
dropna=dropna) | ||
|
||
self.obj = obj | ||
self.axis = obj._get_axis_number(axis) | ||
|
@@ -968,6 +970,10 @@ class GroupBy(_GroupBy): | |
List of columns to exclude | ||
name : string | ||
Most users should ignore this | ||
dropna : boolean, default True | ||
drop NaN in the grouping values | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
||
.. versionadded:: 0.20.0 | ||
|
||
Notes | ||
----- | ||
|
@@ -2324,6 +2330,10 @@ class Grouping(object): | |
level : | ||
in_axis : if the Grouping is a column in self.obj and hence among | ||
Groupby.exclusions list | ||
dropna : boolean, default True | ||
drop NaN in the grouping values | ||
|
||
.. versionadded:: 0.20.0 | ||
|
||
Returns | ||
------- | ||
|
@@ -2337,7 +2347,7 @@ class Grouping(object): | |
""" | ||
|
||
def __init__(self, index, grouper=None, obj=None, name=None, level=None, | ||
sort=True, in_axis=False): | ||
sort=True, in_axis=False, dropna=True): | ||
|
||
self.name = name | ||
self.level = level | ||
|
@@ -2346,6 +2356,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None, | |
self.sort = sort | ||
self.obj = obj | ||
self.in_axis = in_axis | ||
self.dropna = dropna | ||
|
||
# right place for this? | ||
if isinstance(grouper, (Series, Index)) and name is None: | ||
|
@@ -2396,7 +2407,6 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None, | |
|
||
# a passed Grouper like | ||
elif isinstance(self.grouper, Grouper): | ||
|
||
# get the new grouper | ||
grouper = self.grouper._get_binner_for_grouping(self.obj) | ||
self.obj = self.grouper.obj | ||
|
@@ -2433,6 +2443,11 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None, | |
from pandas import to_timedelta | ||
self.grouper = to_timedelta(self.grouper) | ||
|
||
# convert None to NaN if we are going to keep them | ||
if not dropna: | ||
if not isinstance(self.grouper, Index): | ||
self.grouper[np.equal(self.grouper, None)] = np.NaN | ||
|
||
def __repr__(self): | ||
return 'Grouping({0})'.format(self.name) | ||
|
||
|
@@ -2466,7 +2481,7 @@ def group_index(self): | |
def _make_labels(self): | ||
if self._labels is None or self._group_index is None: | ||
labels, uniques = algorithms.factorize( | ||
self.grouper, sort=self.sort) | ||
self.grouper, sort=self.sort, dropna=self.dropna) | ||
uniques = Index(uniques, name=self.name) | ||
self._labels = labels | ||
self._group_index = uniques | ||
|
@@ -2478,7 +2493,7 @@ def groups(self): | |
|
||
|
||
def _get_grouper(obj, key=None, axis=0, level=None, sort=True, | ||
mutated=False): | ||
mutated=False, dropna=True): | ||
""" | ||
create and return a BaseGrouper, which is an internal | ||
mapping of how to create the grouper indexers. | ||
|
@@ -2631,7 +2646,8 @@ def is_in_obj(gpr): | |
name=name, | ||
level=level, | ||
sort=sort, | ||
in_axis=in_axis) \ | ||
in_axis=in_axis, | ||
dropna=dropna) \ | ||
if not isinstance(gpr, Grouping) else gpr | ||
|
||
groupings.append(ping) | ||
|
@@ -3386,7 +3402,6 @@ def _post_process_cython_aggregate(self, obj): | |
return obj | ||
|
||
def aggregate(self, arg, *args, **kwargs): | ||
|
||
_level = kwargs.pop('_level', None) | ||
result, how = self._aggregate(arg, _level=_level, *args, **kwargs) | ||
if how is None: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add version added directive
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done!