pandas-dev · mroeschke · Jul 5, 2022 · Jun 29, 2022 · Jul 1, 2022
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -275,6 +275,7 @@ Other enhancements
 - :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, and :class:`.IndexingError` are now exposed in ``pandas.errors`` (:issue:`27656`)
 - Added ``check_like`` argument to :func:`testing.assert_series_equal` (:issue:`47247`)
 - Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files)
+- :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_150.notable_bug_fixes:
@@ -766,7 +767,8 @@ Other Deprecations
 - Deprecated the argument ``na_sentinel`` in :func:`factorize`, :meth:`Index.factorize`, and :meth:`.ExtensionArray.factorize`; pass ``use_na_sentinel=True`` instead to use the sentinel ``-1`` for NaN values and ``use_na_sentinel=False`` instead of ``na_sentinel=None`` to encode NaN values (:issue:`46910`)
 - Deprecated :meth:`DataFrameGroupBy.transform` not aligning the result when the UDF returned DataFrame (:issue:`45648`)
 - Clarified warning from :func:`to_datetime` when delimited dates can't be parsed in accordance to specified ``dayfirst`` argument (:issue:`46210`)
-
+- Deprecated :class:`Series` and :class:`Resampler` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) raising a ``NotImplementedError`` when the dtype is non-numric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`)
+- Deprecated :meth:`Series.rank` returning an empty result when the dtype is non-numeric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_150.performance:

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -8689,6 +8689,15 @@ def ranker(data):
                     )
 
         if numeric_only:
+            if self.ndim == 1 and not is_numeric_dtype(self.dtype):
+                # GH#47500
+                warnings.warn(
+                    f"Calling Series.rank with numeric_only={numeric_only} and dtype "
+                    f"{self.dtype} is deprecated and will raise a TypeError in a "
+                    "future version of pandas",
+                    category=FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
             data = self._get_numeric_data()
         else:
             data = self

@@ -1145,7 +1145,7 @@ def _cython_transform(
     ) -> DataFrame:
         assert axis == 0  # handled by caller
         # TODO: no tests with self.ndim == 1 for DataFrameGroupBy
-        numeric_only_bool = self._resolve_numeric_only(numeric_only, axis)
+        numeric_only_bool = self._resolve_numeric_only(how, numeric_only, axis)
 
         # With self.axis == 0, we have multi-block tests
         #  e.g. test_rank_min_int, test_cython_transform_frame

@@ -1291,7 +1291,7 @@ def _wrap_applied_output(
         raise AbstractMethodError(self)
 
     def _resolve_numeric_only(
-        self, numeric_only: bool | lib.NoDefault, axis: int
+        self, how: str, numeric_only: bool | lib.NoDefault, axis: int
     ) -> bool:
         """
         Determine subclass-specific default value for 'numeric_only'.
@@ -1328,6 +1328,20 @@ def _resolve_numeric_only(
             else:
                 numeric_only = False
 
+        if numeric_only and self.obj.ndim == 1 and not is_numeric_dtype(self.obj.dtype):
+            # GH#47500
+            how = "sum" if how == "add" else how
+            warnings.warn(
+                f"{type(self).__name__}.{how} called with "
+                f"numeric_only={numeric_only} and dtype {self.obj.dtype}. This will "
+                "raise a TypeError in a future version of pandas",
+                category=FutureWarning,
+                stacklevel=find_stack_level(),
+            )
+            raise NotImplementedError(
+                f"{type(self).__name__}.{how} does not implement numeric_only"
+            )
+
         return numeric_only
 
     def _maybe_warn_numeric_only_depr(
@@ -1704,7 +1718,7 @@ def _cython_agg_general(
     ):
         # Note: we never get here with how="ohlc" for DataFrameGroupBy;
         #  that goes through SeriesGroupBy
-        numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0)
+        numeric_only_bool = self._resolve_numeric_only(how, numeric_only, axis=0)
 
         data = self._get_data_to_aggregate()
         is_ser = data.ndim == 1
@@ -2100,7 +2114,7 @@ def mean(
         2    4.0
         Name: B, dtype: float64
         """
-        numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0)
+        numeric_only_bool = self._resolve_numeric_only("mean", numeric_only, axis=0)
 
         if maybe_use_numba(engine):
             from pandas.core._numba.kernels import sliding_mean
@@ -2134,7 +2148,7 @@ def median(self, numeric_only: bool | lib.NoDefault = lib.no_default):
         Series or DataFrame
             Median of values within each group.
         """
-        numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0)
+        numeric_only_bool = self._resolve_numeric_only("median", numeric_only, axis=0)
 
         result = self._cython_agg_general(
             "median",
@@ -2196,10 +2210,15 @@ def std(
             return np.sqrt(self._numba_agg_general(sliding_var, engine_kwargs, ddof))
         else:
             # Resolve numeric_only so that var doesn't warn
-            numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0)
-            if numeric_only_bool and self.obj.ndim == 1:
-                raise NotImplementedError(
-                    f"{type(self).__name__}.std does not implement numeric_only."
+            numeric_only_bool = self._resolve_numeric_only("std", numeric_only, axis=0)
+            if (
+                numeric_only_bool
+                and self.obj.ndim == 1
+                and not is_numeric_dtype(self.obj.dtype)
+            ):
+                raise TypeError(
+                    f"{type(self).__name__}.std called with "
+                    f"numeric_only={numeric_only} and dtype {self.obj.dtype}"
                 )
             result = self._get_cythonized_result(
                 libgroupby.group_var,
@@ -2264,7 +2283,7 @@ def var(
 
             return self._numba_agg_general(sliding_var, engine_kwargs, ddof)
         else:
-            numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0)
+            numeric_only_bool = self._resolve_numeric_only("var", numeric_only, axis=0)
             if ddof == 1:
                 return self._cython_agg_general(
                     "var",
@@ -2304,10 +2323,15 @@ def sem(self, ddof: int = 1, numeric_only: bool | lib.NoDefault = lib.no_default
             Standard error of the mean of values within each group.
         """
         # Reolve numeric_only so that std doesn't warn
-        numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0)
-        if numeric_only_bool and self.obj.ndim == 1:
-            raise NotImplementedError(
-                f"{type(self).__name__}.sem does not implement numeric_only."
+        numeric_only_bool = self._resolve_numeric_only("sem", numeric_only, axis=0)
+        if (
+            numeric_only_bool
+            and self.obj.ndim == 1
+            and not is_numeric_dtype(self.obj.dtype)
+        ):
+            raise TypeError(
+                f"{type(self).__name__}.sem called with "
+                f"numeric_only={numeric_only} and dtype {self.obj.dtype}"
             )
         result = self.std(ddof=ddof, numeric_only=numeric_only_bool)
         self._maybe_warn_numeric_only_depr("sem", result, numeric_only)
@@ -3179,10 +3203,15 @@ def quantile(
         a    2.0
         b    3.0
         """
-        numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0)
-        if numeric_only_bool and self.obj.ndim == 1:
-            raise NotImplementedError(
-                f"{type(self).__name__}.quantile does not implement numeric_only"
+        numeric_only_bool = self._resolve_numeric_only("quantile", numeric_only, axis=0)
+        if (
+            numeric_only_bool
+            and self.obj.ndim == 1
+            and not is_numeric_dtype(self.obj.dtype)
+        ):
+            raise TypeError(
+                f"{type(self).__name__}.quantile called with "
+                f"numeric_only={numeric_only} and dtype {self.obj.dtype}"
             )
 
         def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, np.dtype | None]:
@@ -3671,7 +3700,8 @@ def _get_cythonized_result(
         -------
         `Series` or `DataFrame`  with filled values
         """
-        numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0)
+        how = base_func.__name__
+        numeric_only_bool = self._resolve_numeric_only(how, numeric_only, axis=0)
 
         if post_processing and not callable(post_processing):
             raise ValueError("'post_processing' must be a callable!")
@@ -3682,7 +3712,6 @@ def _get_cythonized_result(
 
         ids, _, ngroups = grouper.group_info
 
-        how = base_func.__name__
         base_func = partial(base_func, labels=ids)
 
         def blk_func(values: ArrayLike) -> ArrayLike:

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -80,6 +80,7 @@
     is_integer,
     is_iterator,
     is_list_like,
+    is_numeric_dtype,
     is_object_dtype,
     is_scalar,
     pandas_dtype,
@@ -4616,10 +4617,17 @@ def _reduce(
 
         else:
             # dispatch to numpy arrays
-            if numeric_only:
+            if numeric_only and not is_numeric_dtype(self.dtype):
                 kwd_name = "numeric_only"
                 if name in ["any", "all"]:
                     kwd_name = "bool_only"
+                # GH#47500 - change to TypeError to match other methods
+                warnings.warn(
+                    f"Calling Series.{name} with {kwd_name}={numeric_only} and "
+                    f"dtype {self.dtype} will raise a TypeError in the future",
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
                 raise NotImplementedError(
                     f"Series.{name} does not implement {kwd_name}."
                 )

diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
@@ -92,8 +92,9 @@ def test_cython_agg_boolean():
 def test_cython_agg_nothing_to_agg():
     frame = DataFrame({"a": np.random.randint(0, 5, 50), "b": ["foo", "bar"] * 25})
 
-    with pytest.raises(NotImplementedError, match="does not implement"):
-        frame.groupby("a")["b"].mean(numeric_only=True)
+    with tm.assert_produces_warning(FutureWarning, match="This will raise a TypeError"):
+        with pytest.raises(NotImplementedError, match="does not implement"):
+            frame.groupby("a")["b"].mean(numeric_only=True)
 
     with pytest.raises(TypeError, match="Could not convert (foo|bar)*"):
         frame.groupby("a")["b"].mean()
@@ -114,8 +115,9 @@ def test_cython_agg_nothing_to_agg_with_dates():
             "dates": pd.date_range("now", periods=50, freq="T"),
         }
     )
-    with pytest.raises(NotImplementedError, match="does not implement"):
-        frame.groupby("b").dates.mean(numeric_only=True)
+    with tm.assert_produces_warning(FutureWarning, match="This will raise a TypeError"):
+        with pytest.raises(NotImplementedError, match="does not implement"):
+            frame.groupby("b").dates.mean(numeric_only=True)
 
 
 def test_cython_agg_frame_columns():