diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 0004bc92b349e..51ca5e32d3467 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -271,7 +271,7 @@ Removal of prior version deprecations/changes - Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`) - Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`) - Changed behavior of :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``; object-dtype columns with all-bool values will no longer be included, manually cast to ``bool`` dtype first (:issue:`46188`) -- +- Enforced deprecation in :method:`DataFrame.mean` and :method:`DataFrame.median` dropping datetime64 and datetime64tz when ``numeric_only=None`` (:issue:`29941`) .. --------------------------------------------------------------------------- .. _whatsnew_200.performance: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fe6fda34a89ef..db436a4415842 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -124,7 +124,6 @@ is_1d_only_ea_dtype, is_bool_dtype, is_dataclass, - is_datetime64_any_dtype, is_dict_like, is_dtype_equal, is_extension_array_dtype, @@ -10739,29 +10738,6 @@ def _reduce( assert filter_type is None or filter_type == "bool", filter_type out_dtype = "bool" if filter_type == "bool" else None - if numeric_only is None and name in ["mean", "median"]: - own_dtypes = [arr.dtype for arr in self._mgr.arrays] - - dtype_is_dt = np.array( - [is_datetime64_any_dtype(dtype) for dtype in own_dtypes], - dtype=bool, - ) - if dtype_is_dt.any(): - warnings.warn( - "DataFrame.mean and DataFrame.median with numeric_only=None " - "will include datetime64 and datetime64tz columns in a " - "future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - # Non-copy equivalent to - # dt64_cols = self.dtypes.apply(is_datetime64_any_dtype) - # cols = self.columns[~dt64_cols] - # self = self[cols] - predicate = lambda x: not is_datetime64_any_dtype(x.dtype) - mgr = self._mgr._get_data_subset(predicate) - self = type(self)(mgr) - # TODO: Make other agg func handle axis=None properly GH#21597 axis = self._get_axis_number(axis) labels = self._get_agg_axis(axis) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 6a4af7a8cd32e..a7e0a0df021a8 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -74,15 +74,12 @@ def assert_stat_op_calc( f = getattr(frame, opname) if check_dates: - expected_warning = FutureWarning if opname in ["mean", "median"] else None df = DataFrame({"b": date_range("1/1/2001", periods=2)}) - with tm.assert_produces_warning(expected_warning): - result = getattr(df, opname)() + result = getattr(df, opname)() assert isinstance(result, Series) df["a"] = range(len(df)) - with tm.assert_produces_warning(expected_warning): - result = getattr(df, opname)() + result = getattr(df, opname)() assert isinstance(result, Series) assert len(result) @@ -384,21 +381,16 @@ def test_nunique(self): def test_mean_mixed_datetime_numeric(self, tz): # https://github.com/pandas-dev/pandas/issues/24752 df = DataFrame({"A": [1, 1], "B": [Timestamp("2000", tz=tz)] * 2}) - with tm.assert_produces_warning(FutureWarning): - result = df.mean() - expected = Series([1.0], index=["A"]) + result = df.mean() + expected = Series([1.0, Timestamp("2000", tz=tz)], index=["A", "B"]) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("tz", [None, "UTC"]) - def test_mean_excludes_datetimes(self, tz): + def test_mean_only_datetimes(self, tz): # https://github.com/pandas-dev/pandas/issues/24752 - # Our long-term desired behavior is unclear, but the behavior in - # 0.24.0rc1 was buggy. df = DataFrame({"A": [Timestamp("2000", tz=tz)] * 2}) - with tm.assert_produces_warning(FutureWarning): - result = df.mean() - - expected = Series(dtype=np.float64) + result = df.mean() + expected = Series([Timestamp("2000", tz=tz)], index=["A"]) tm.assert_series_equal(result, expected) def test_mean_mixed_string_decimal(self): @@ -864,10 +856,8 @@ def test_mean_datetimelike(self): expected = Series({"A": 1.0}) tm.assert_series_equal(result, expected) - with tm.assert_produces_warning(FutureWarning): - # in the future datetime columns will be included - result = df.mean() - expected = Series({"A": 1.0, "C": df.loc[1, "C"]}) + result = df.mean() + expected = Series({"A": 1.0, "B": Timestamp("2016-01-02"), "C": df.loc[1, "C"]}) tm.assert_series_equal(result, expected) def test_mean_datetimelike_numeric_only_false(self):