-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG: groupby.transform(name) validates name is an aggregation #27597
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
784e621
4ad587e
b6216af
15597f5
0f77cbf
880b2d8
36a15f2
0c77a37
09703f8
8eeb01a
c0a71ce
2ce2bb7
d4bafef
57e2122
f942e55
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,8 +39,58 @@ Backwards incompatible API changes | |
|
||
.. _whatsnew_1000.api.other: | ||
|
||
- :class:`pandas.core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`). | ||
- | ||
Groupby.transform(str) validates name is an aggregation | ||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
|
||
In previous releases, :meth:`DataFrameGroupBy.transform` and | ||
:meth:`SeriesGroupBy.transform` did not validate that the function name | ||
passed was actually the name of an aggregation. As a result, users might get a | ||
cryptic error or worse, erroneous results. Starting with this release, these | ||
methods will rised if the name of a non-aggregation is passed to them. There | ||
is no change in the behavior associated with passing a callable. | ||
|
||
Users who relied on :meth:`DataFrameGroupBy.transform` or :meth:`SeriesGroupBy.transform` | ||
for transformations such as :meth:`DataFrameGroupBy.rank`, :meth:`DataFrameGroupBy.ffill`, | ||
etc, should instead call these method directly | ||
(:issue:`27597`) (:issue:`14274`) (:issue:`19354`) (:issue:`22509`). | ||
|
||
.. ipython:: python | ||
|
||
df = pd.DataFrame([0, 1, 100, 99]) | ||
labels = [0, 0, 1, 1] | ||
g = df.groupby(labels) | ||
|
||
*Previous behavior*: | ||
|
||
.. code-block:: ipython | ||
|
||
In [1]: g.transform('ers >= Decepticons') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just pass it a name like 'foo' |
||
AttributeError: 'DataFrameGroupBy' object has no attribute 'ers >= Decepticons' | ||
|
||
g.transform('rank') | ||
Out[14]: | ||
0 | ||
0 1.0 | ||
1 1.0 | ||
2 2.0 | ||
3 2.0 | ||
|
||
g.rank() | ||
Out[15]: | ||
0 | ||
0 1.0 | ||
1 2.0 | ||
2 2.0 | ||
3 1.0 | ||
|
||
*New behavior*: | ||
|
||
.. ipython:: python | ||
:okexcept: | ||
|
||
g.transform('ers >= Decepticons') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use foo, & make this a code-block (so we don't have the long traceback) put the 'rank' in its own ipython block; I would also show .rank() or at least indicate that they are now the same. |
||
g.transform('rank') | ||
|
||
|
||
Other API changes | ||
^^^^^^^^^^^^^^^^^ | ||
|
@@ -78,6 +128,7 @@ Performance improvements | |
Bug fixes | ||
~~~~~~~~~ | ||
|
||
- | ||
|
||
Categorical | ||
^^^^^^^^^^^ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -241,8 +241,9 @@ class providing the base-class of operations. | |
|
||
Parameters | ||
---------- | ||
f : function | ||
Function to apply to each group | ||
func : callable or str | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. leave this as f, otherwise this is an api change |
||
Callable to apply to each group OR | ||
name of an aggregation function. | ||
|
||
Returns | ||
------- | ||
|
@@ -257,6 +258,10 @@ class providing the base-class of operations. | |
Each group is endowed the attribute 'name' in case you need to know | ||
which group you are working on. | ||
|
||
If `func` is the name of an aggregation, the resulting value for | ||
each group is replicated along the row axis to produce an output | ||
with the same shape as the input. | ||
|
||
The current implementation imposes three requirements on f: | ||
|
||
* f must return a value that either has the same shape as the input | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -581,7 +581,7 @@ def test_cython_transform_series(op, args, targop): | |
# print(data.head()) | ||
expected = data.groupby(labels).transform(targop) | ||
|
||
tm.assert_series_equal(expected, data.groupby(labels).transform(op, *args)) | ||
tm.assert_series_equal(expected, getattr(data.groupby(labels), op)(*args)) | ||
tm.assert_series_equal(expected, getattr(data.groupby(labels), op)(*args)) | ||
|
||
|
||
|
@@ -632,7 +632,7 @@ def test_cython_transform_series(op, args, targop): | |
) | ||
def test_groupby_cum_skipna(op, skipna, input, exp): | ||
df = pd.DataFrame(input) | ||
result = df.groupby("key")["value"].transform(op, skipna=skipna) | ||
result = getattr(df.groupby("key")["value"], op)(skipna=skipna) | ||
if isinstance(exp, dict): | ||
expected = exp[(op, skipna)] | ||
else: | ||
|
@@ -710,20 +710,17 @@ def test_cython_transform_frame(op, args, targop): | |
expected = gb.apply(targop) | ||
|
||
expected = expected.sort_index(axis=1) | ||
tm.assert_frame_equal(expected, gb.transform(op, *args).sort_index(axis=1)) | ||
tm.assert_frame_equal(expected, getattr(gb, op)(*args).sort_index(axis=1)) | ||
# individual columns | ||
for c in df: | ||
if c not in ["float", "int", "float_missing"] and op != "shift": | ||
msg = "No numeric types to aggregate" | ||
with pytest.raises(DataError, match=msg): | ||
gb[c].transform(op) | ||
with pytest.raises(DataError, match=msg): | ||
getattr(gb[c], op)() | ||
else: | ||
expected = gb[c].apply(targop) | ||
expected.name = c | ||
tm.assert_series_equal(expected, gb[c].transform(op, *args)) | ||
tm.assert_series_equal(expected, getattr(gb[c], op)(*args)) | ||
tm.assert_series_equal(expected, getattr(gb[c], op)(*args)) | ||
|
||
|
||
|
@@ -765,7 +762,7 @@ def test_transform_with_non_scalar_group(): | |
), | ||
], | ||
) | ||
@pytest.mark.parametrize("agg_func", ["count", "rank", "size"]) | ||
@pytest.mark.parametrize("agg_func", ["count", "size"]) | ||
def test_transform_numeric_ret(cols, exp, comp_func, agg_func): | ||
if agg_func == "size" and isinstance(cols, list): | ||
pytest.xfail("'size' transformation not supported with NDFrameGroupy") | ||
|
@@ -1007,17 +1004,19 @@ def test_transform_invalid_name_raises(): | |
# GH#27486 | ||
df = DataFrame(dict(a=[0, 1, 1, 2])) | ||
g = df.groupby(["a", "b", "b", "c"]) | ||
with pytest.raises(ValueError, match="not a valid function name"): | ||
with pytest.raises(ValueError, match="exclusively"): | ||
g.transform("some_arbitrary_name") | ||
|
||
# method exists on the object, but is not a valid transformation/agg | ||
# make sure the error suggests using the method directly. | ||
assert hasattr(g, "aggregate") # make sure the method exists | ||
with pytest.raises(ValueError, match="not a valid function name"): | ||
with pytest.raises(ValueError, match="exclusively.+you should try"): | ||
g.transform("aggregate") | ||
|
||
# Test SeriesGroupBy | ||
g = df["a"].groupby(["a", "b", "b", "c"]) | ||
with pytest.raises(ValueError, match="not a valid function name"): | ||
ser = Series(range(4)) | ||
g = ser.groupby(["a", "b", "b", "c"]) | ||
with pytest.raises(ValueError, match="exclusively"): | ||
g.transform("some_arbitrary_name") | ||
|
||
|
||
|
@@ -1052,6 +1051,20 @@ def test_transform_agg_by_name(reduction_func, obj): | |
assert len(set(DataFrame(result).iloc[-3:, -1])) == 1 | ||
|
||
|
||
def test_transform_transformation_by_name(transformation_func): | ||
"""Make sure g.transform('name') raises a helpful error for non-agg | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add the issue refences numbers as a comment |
||
""" | ||
func = transformation_func | ||
obj = DataFrame( | ||
dict(a=[0, 0, 0, 1, 1, 1], b=range(6)), index=["A", "B", "C", "D", "E", "F"] | ||
) | ||
g = obj.groupby(np.repeat([0, 1], 3)) | ||
|
||
match = "exclusively for.+you should try" | ||
with pytest.raises(ValueError, match=match): | ||
g.transform(func) | ||
|
||
|
||
def test_transform_lambda_with_datetimetz(): | ||
# GH 27496 | ||
df = DataFrame( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove the: As a result.... sentence.
rised -> raise