diff --git a/pandas/core/apply.py b/pandas/core/apply.py index bb39e18caeaa2..110aeb0ffb22d 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -136,6 +136,10 @@ def f(x): self.orig_f: AggFuncType = func self.f: AggFuncType = f + # For deprecation warning on apply using agg + self.apply_used_agg = False + self.agg_used_apply = False + @abc.abstractmethod def apply(self) -> DataFrame | Series: pass @@ -514,7 +518,9 @@ def apply_multiple(self) -> DataFrame | Series: result: Series, DataFrame, or None Result when self.f is a list-like or dict-like, None otherwise. """ - return self.obj.aggregate(self.f, self.axis, *self.args, **self.kwargs) + self.apply_used_agg = True + _, result = self.obj._aggregate(self.f, self.axis, *self.args, **self.kwargs) + return result def normalize_dictlike_arg( self, how: str, obj: DataFrame | Series, func: AggFuncTypeDict @@ -711,7 +717,8 @@ def agg(self): result = result.T if result is not None else result if result is None: - result = self.obj.apply(self.orig_f, axis, args=self.args, **self.kwargs) + self.agg_used_apply = True + _, result = obj._apply(self.orig_f, axis, args=self.args, **self.kwargs) return result diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 7cf34635ce9c1..5c80505a3e820 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -361,6 +361,12 @@ def reconstruct(result): elif self.ndim == 1: # ufunc(series, ...) inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs) + if method == "apply": + # Avoid deprecation warnings from an internal call + method = "_apply" + elif method == "agg": + # Avoid deprecation warnings from an internal call + method = "_agg" result = getattr(ufunc, method)(*inputs, **kwargs) else: # ufunc(dataframe) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 48b18a33f9c9f..73a3d2667b13b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8516,6 +8516,19 @@ def _gotitem( examples=_agg_examples_doc, ) def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): + used_apply, result = self._aggregate(func, axis, *args, **kwargs) + if used_apply: + warnings.warn( + "pandas internally used apply() to compute part of the result. " + "In a future version, agg() will be used internally instead, " + "possibly resulting in a different behavior.", + FutureWarning, + stacklevel=2, + ) + return result + + def _aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): + """Method for internal calls to aggregate.""" from pandas.core.apply import frame_apply axis = self._get_axis_number(axis) @@ -8537,7 +8550,7 @@ def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): result_in_dict = relabel_result(result, func, columns, order) result = DataFrame(result_in_dict, index=columns) - return result + return op.agg_used_apply, result agg = aggregate @@ -8702,6 +8715,27 @@ def apply( 1 1 2 2 1 2 """ + used_agg, result = self._apply(func, axis, raw, result_type, args, **kwargs) + if used_agg: + warnings.warn( + "pandas internally used aggregate() to compute part of the result. " + "In a future version, apply() will be used internally instead, " + "possibly resulting in a different behavior.", + FutureWarning, + stacklevel=2, + ) + return result + + def _apply( + self, + func: AggFuncType, + axis: Axis = 0, + raw: bool = False, + result_type=None, + args=(), + **kwargs, + ): + """For internal calls to apply to avoid deprecation warnings.""" from pandas.core.apply import frame_apply op = frame_apply( @@ -8713,7 +8747,8 @@ def apply( args=args, kwargs=kwargs, ) - return op.apply() + result = op.apply() + return op.apply_used_agg, result def applymap( self, func: PythonFuncType, na_action: str | None = None, **kwargs diff --git a/pandas/core/series.py b/pandas/core/series.py index ce986f2dd8038..de7183fedd774 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4209,6 +4209,20 @@ def _gotitem(self, key, ndim, subset=None) -> Series: examples=_agg_examples_doc, ) def aggregate(self, func=None, axis=0, *args, **kwargs): + used_apply, result = self._aggregate(func, axis, *args, **kwargs) + if used_apply: + warnings.warn( + "pandas internally used apply() to compute part of the result. " + "In a future version, agg() will be used internally instead, " + "possibly resulting in a different behavior.", + FutureWarning, + stacklevel=2, + ) + return result + + agg = aggregate + + def _aggregate(self, func=None, axis=0, *args, **kwargs): # Validate the axis parameter self._get_axis_number(axis) @@ -4218,9 +4232,7 @@ def aggregate(self, func=None, axis=0, *args, **kwargs): op = SeriesApply(self, func, convert_dtype=False, args=args, kwargs=kwargs) result = op.agg() - return result - - agg = aggregate + return op.agg_used_apply, result @doc( _shared_docs["transform"], @@ -4347,7 +4359,27 @@ def apply( Helsinki 2.484907 dtype: float64 """ - return SeriesApply(self, func, convert_dtype, args, kwargs).apply() + used_agg, result = self._apply(func, convert_dtype, args, **kwargs) + if used_agg: + warnings.warn( + "pandas internally used aggregate() to compute part of the result. " + "In a future version, apply() will be used internally instead, " + "possibly resulting in a different behavior.", + FutureWarning, + stacklevel=2, + ) + return result + + def _apply( + self, + func: AggFuncType, + convert_dtype: bool = True, + args: tuple[Any, ...] = (), + **kwargs, + ) -> tuple[bool, DataFrame | Series]: + op = SeriesApply(self, func, convert_dtype, args, kwargs) + result = op.apply() + return op.apply_used_agg, result def _reduce( self, diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 62983b5327a26..8cfcbb400cf9b 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -59,7 +59,8 @@ def test_apply_axis1_with_ea(): def test_agg_axis1_duplicate_index(data, dtype): # GH 42380 expected = DataFrame([[data], [data]], index=["a", "a"], dtype=dtype) - result = expected.agg(lambda x: x, axis=1) + with tm.assert_produces_warning(FutureWarning, match="used apply"): + result = expected.agg(lambda x: x, axis=1) tm.assert_frame_equal(result, expected) @@ -651,7 +652,12 @@ def apply_list(row): return [2 * row["A"], 2 * row["C"], 2 * row["B"]] df = DataFrame(np.zeros((4, 4)), columns=list("ABCD")) - result = getattr(df, op)(apply_list, axis=1) + if op == "apply": + klass, msg = None, None + else: + klass, msg = FutureWarning, "used apply" + with tm.assert_produces_warning(klass, match=msg): + result = getattr(df, op)(apply_list, axis=1) expected = Series( [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] ) @@ -1008,7 +1014,8 @@ def test_agg_transform(axis, float_frame): tm.assert_frame_equal(result, expected) # list-like - result = float_frame.apply([np.sqrt], axis=axis) + with tm.assert_produces_warning(FutureWarning, match="used aggregate"): + result = float_frame.apply([np.sqrt], axis=axis) expected = f_sqrt.copy() if axis in {0, "index"}: expected.columns = MultiIndex.from_product([float_frame.columns, ["sqrt"]]) @@ -1019,7 +1026,8 @@ def test_agg_transform(axis, float_frame): # multiple items in list # these are in the order as if we are applying both # functions per series and then concatting - result = float_frame.apply([np.abs, np.sqrt], axis=axis) + with tm.assert_produces_warning(FutureWarning, match="used aggregate"): + result = float_frame.apply([np.abs, np.sqrt], axis=axis) expected = zip_frames([f_abs, f_sqrt], axis=other_axis) if axis in {0, "index"}: expected.columns = MultiIndex.from_product( @@ -1218,24 +1226,29 @@ def test_non_callable_aggregates(how): df = DataFrame( {"A": [None, 2, 3], "B": [1.0, np.nan, 3.0], "C": ["foo", None, "bar"]} ) - - # Function aggregate - result = getattr(df, how)({"A": "count"}) + if how == "apply": + klass, msg = FutureWarning, "used aggregate" + else: + klass, msg = None, None + with tm.assert_produces_warning(klass, match=msg): + result = getattr(df, how)({"A": "count"}) expected = Series({"A": 2}) tm.assert_series_equal(result, expected) # Non-function aggregate - result = getattr(df, how)({"A": "size"}) + with tm.assert_produces_warning(klass, match=msg): + result = getattr(df, how)({"A": "size"}) expected = Series({"A": 3}) tm.assert_series_equal(result, expected) # Mix function and non-function aggs - result1 = getattr(df, how)(["count", "size"]) - result2 = getattr(df, how)( - {"A": ["count", "size"], "B": ["count", "size"], "C": ["count", "size"]} - ) + with tm.assert_produces_warning(klass, match=msg): + result1 = getattr(df, how)(["count", "size"]) + result2 = getattr(df, how)( + {"A": ["count", "size"], "B": ["count", "size"], "C": ["count", "size"]} + ) expected = DataFrame( { "A": {"count": 2, "size": 3}, @@ -1277,7 +1290,8 @@ def test_agg_listlike_result(): def func(group_col): return list(group_col.dropna().unique()) - result = df.agg(func) + with tm.assert_produces_warning(FutureWarning, match="used apply"): + result = df.agg(func) expected = Series([[2, 3], [1.5], ["foo", "bar"]], index=["A", "B", "C"]) tm.assert_series_equal(result, expected) @@ -1310,7 +1324,8 @@ def f(x, a, b, c=3): else: expected = Series([4.0, 8.0]) - result = df.agg(f, axis, *args, **kwargs) + with tm.assert_produces_warning(FutureWarning, match="used apply"): + result = df.agg(f, axis, *args, **kwargs) tm.assert_series_equal(result, expected) @@ -1398,7 +1413,8 @@ def test_apply_empty_list_reduce(): def test_apply_no_suffix_index(): # GH36189 pdf = DataFrame([[4, 9]] * 3, columns=["A", "B"]) - result = pdf.apply(["sum", lambda x: x.sum(), lambda x: x.sum()]) + with tm.assert_produces_warning(FutureWarning, match="used aggregate"): + result = pdf.apply(["sum", lambda x: x.sum(), lambda x: x.sum()]) expected = DataFrame( {"A": [12, 12, 12], "B": [27, 27, 27]}, index=["sum", "", ""] ) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 2af340f0c1bb9..c56a7667e3e0b 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -235,12 +235,14 @@ def test_transform(string_series): tm.assert_series_equal(result, expected) # list-like - result = string_series.apply([np.sqrt]) + with tm.assert_produces_warning(FutureWarning, match="used aggregate"): + result = string_series.apply([np.sqrt]) expected = f_sqrt.to_frame().copy() expected.columns = ["sqrt"] tm.assert_frame_equal(result, expected) - result = string_series.apply(["sqrt"]) + with tm.assert_produces_warning(FutureWarning, match="used aggregate"): + result = string_series.apply(["sqrt"]) tm.assert_frame_equal(result, expected) # multiple items in list @@ -248,7 +250,8 @@ def test_transform(string_series): # series and then concatting expected = concat([f_sqrt, f_abs], axis=1) expected.columns = ["sqrt", "absolute"] - result = string_series.apply([np.sqrt, np.abs]) + with tm.assert_produces_warning(FutureWarning, match="used aggregate"): + result = string_series.apply([np.sqrt, np.abs]) tm.assert_frame_equal(result, expected) # dict, provide renaming @@ -256,7 +259,8 @@ def test_transform(string_series): expected.columns = ["foo", "bar"] expected = expected.unstack().rename("series") - result = string_series.apply({"foo": np.sqrt, "bar": np.abs}) + with tm.assert_produces_warning(FutureWarning, match="used agg"): + result = string_series.apply({"foo": np.sqrt, "bar": np.abs}) tm.assert_series_equal(result.reindex_like(expected), expected) @@ -364,18 +368,19 @@ def test_with_nested_series(datetime_series): def test_replicate_describe(string_series): # this also tests a result set that is all scalars expected = string_series.describe() - result = string_series.apply( - { - "count": "count", - "mean": "mean", - "std": "std", - "min": "min", - "25%": lambda x: x.quantile(0.25), - "50%": "median", - "75%": lambda x: x.quantile(0.75), - "max": "max", - } - ) + with tm.assert_produces_warning(FutureWarning, match="used agg"): + result = string_series.apply( + { + "count": "count", + "mean": "mean", + "std": "std", + "min": "min", + "25%": lambda x: x.quantile(0.25), + "50%": "median", + "75%": lambda x: x.quantile(0.75), + "max": "max", + } + ) tm.assert_series_equal(result, expected) @@ -402,7 +407,13 @@ def test_non_callable_aggregates(how): assert result == expected # test when mixed w/ callable reducers - result = getattr(s, how)(["size", "count", "mean"]) + if how == "apply": + klass = FutureWarning + msg = "used aggregate" + else: + klass, msg = None, None + with tm.assert_produces_warning(klass, match=msg): + result = getattr(s, how)(["size", "count", "mean"]) expected = Series({"size": 3.0, "count": 2.0, "mean": 1.5}) tm.assert_series_equal(result, expected) @@ -410,7 +421,8 @@ def test_non_callable_aggregates(how): def test_series_apply_no_suffix_index(): # GH36189 s = Series([4] * 3) - result = s.apply(["sum", lambda x: x.sum(), lambda x: x.sum()]) + with tm.assert_produces_warning(FutureWarning, match="used agg"): + result = s.apply(["sum", lambda x: x.sum(), lambda x: x.sum()]) expected = Series([12, 12, 12], index=["sum", "", ""]) tm.assert_series_equal(result, expected) @@ -797,7 +809,13 @@ def test_apply_listlike_reducer(string_series, ops, names, how): # GH 39140 expected = Series({name: op(string_series) for name, op in zip(names, ops)}) expected.name = "series" - result = getattr(string_series, how)(ops) + if how == "apply": + klass = FutureWarning + msg = "used aggregate" + else: + klass, msg = None, None + with tm.assert_produces_warning(klass, match=msg): + result = getattr(string_series, how)(ops) tm.assert_series_equal(result, expected) @@ -815,7 +833,13 @@ def test_apply_dictlike_reducer(string_series, ops, how): # GH 39140 expected = Series({name: op(string_series) for name, op in ops.items()}) expected.name = string_series.name - result = getattr(string_series, how)(ops) + if how == "apply": + klass = FutureWarning + msg = "used aggregate" + else: + klass, msg = None, None + with tm.assert_produces_warning(klass, match=msg): + result = getattr(string_series, how)(ops) tm.assert_series_equal(result, expected) @@ -833,7 +857,8 @@ def test_apply_listlike_transformer(string_series, ops, names): with np.errstate(all="ignore"): expected = concat([op(string_series) for op in ops], axis=1) expected.columns = names - result = string_series.apply(ops) + with tm.assert_produces_warning(FutureWarning, match="used aggregate"): + result = string_series.apply(ops) tm.assert_frame_equal(result, expected) @@ -851,5 +876,6 @@ def test_apply_dictlike_transformer(string_series, ops): with np.errstate(all="ignore"): expected = concat({name: op(string_series) for name, op in ops.items()}) expected.name = string_series.name - result = string_series.apply(ops) + with tm.assert_produces_warning(FutureWarning, match="used aggregate"): + result = string_series.apply(ops) tm.assert_series_equal(result, expected)