From 434e2cd70bb88c80709d5a4d1a02466105e0f22c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 13 Jan 2019 15:40:23 -0800 Subject: [PATCH 01/18] implement+test mean for datetimelike EA/Index/Series --- pandas/core/arrays/datetimelike.py | 28 ++++++++ pandas/core/indexes/datetimelike.py | 1 + pandas/core/series.py | 3 + pandas/tests/indexes/datetimes/test_ops.py | 21 ++++++ .../tests/reductions/test_stat_reductions.py | 68 +++++++++++++++++++ 5 files changed, 121 insertions(+) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 8b5445bedd46c..5c5312dc7adad 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1434,6 +1434,34 @@ def max(self, axis=None, skipna=True, *args, **kwargs): # Don't have to worry about NA `result`, since no NA went in. return self._box_func(result) + def mean(self, axis=None, skipna=True): + """ + Return the mean value of the Array or maximum along + an axis. + + See Also + -------- + numpy.ndarray.mean + Series.mean : Return the mean value in a Series. + """ + nv.validate_minmax_axis(axis) + + mask = self.isna() + if skipna: + values = self[~mask] + elif mask.any(): + return NaT + else: + values = self + + if not len(values): + # short-circut for empty max / min + return NaT + + result = nanops.nanmean(values.view('i8'), skipna=skipna) + # Don't have to worry about NA `result`, since no NA went in. + return self._box_func(result) + # ------------------------------------------------------------------- # Shared Constructor Helpers diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index cfca5d1b7d2cc..a2f1102edc309 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -72,6 +72,7 @@ class DatetimeIndexOpsMixin(ExtensionOpsMixin): _maybe_mask_results = ea_passthrough( DatetimeLikeArrayMixin._maybe_mask_results) __iter__ = ea_passthrough(DatetimeLikeArrayMixin.__iter__) + mean = ea_passthrough(DatetimeLikeArrayMixin.mean) @property def _eadata(self): diff --git a/pandas/core/series.py b/pandas/core/series.py index 52b60339a7d68..259ee11cb83be 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3546,6 +3546,9 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, elif is_datetime64_dtype(delegate): # use DatetimeIndex implementation to handle skipna correctly delegate = DatetimeIndex(delegate) + elif is_timedelta64_dtype(delegate) and hasattr(TimedeltaIndex, name): + # use TimedeltaIndex to handle skipna correctly + delegate = TimedeltaIndex(delegate) # dispatch to numpy arrays elif isinstance(delegate, np.ndarray): diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 4be4372f65dcc..83cf24725881e 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -47,6 +47,27 @@ def test_ops_properties_basic(self): assert s.day == 10 pytest.raises(AttributeError, lambda: s.weekday) + # TODO: figure out where in tests.reductions this belongs + def test_mean(self, tz_naive_fixture): + tz = tz_naive_fixture + idx1 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', + '2011-01-03'], tz=tz) + assert idx1.mean() == pd.Timestamp('2011-01-02', tz=tz) + assert idx1._data.mean() == pd.Timestamp('2011-01-02', tz=tz) + + idx2 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', pd.NaT, + '2011-01-03'], tz=tz) + assert idx2.mean(skipna=False) is pd.NaT + assert idx2._data.mean(skipna=False) is pd.NaT + assert idx2.mean(skipna=True) == idx2[1] + assert idx2._data.mean(skipna=True) == idx2[1] + + idx3 = pd.DatetimeIndex([]) + assert idx3.mean() is pd.NaT + assert idx3._data.mean() is pd.NaT + assert idx3.mean(skipna=False) is pd.NaT + assert idx3._data.mean(skipna=False) is pd.NaT + def test_minmax_tz(self, tz_naive_fixture): tz = tz_naive_fixture # monotonic diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py index 11ecd03f6c7e1..e6319892ab4aa 100644 --- a/pandas/tests/reductions/test_stat_reductions.py +++ b/pandas/tests/reductions/test_stat_reductions.py @@ -12,6 +12,74 @@ import pandas as pd from pandas import DataFrame, Series, compat import pandas.util.testing as tm +from pandas.core.arrays import ( + DatetimeArrayMixin as DatetimeArray, + TimedeltaArrayMixin as TimedeltaArray, + PeriodArray) + + +class TestDatetimeLikeStatReductions(object): + @pytest.mark.parametrize('box', [Series, pd.Index, DatetimeArray]) + @pytest.mark.parametrize('tz', [None, 'US/Mountain']) + def test_dt64_mean(self, tz, box): + dti = pd.date_range('2001-01-01', periods=11, tz=tz) + # shuffle so that we are not just working with monotone-increasing + dti = dti.take([4, 1, 3, 10, 9, 7, 8, 5, 0, 2, 6]) + dtarr = dti._data + + obj = box(dtarr) + assert obj.mean() == pd.Timestamp('2001-01-06', tz=tz) + assert obj.mean(skipna=False) == pd.Timestamp('2001-01-06', tz=tz) + + # dtarr[-2] will be the first date 2001-01-1 + dtarr[-2] = pd.NaT + + obj = box(dtarr) + assert obj.mean() == pd.Timestamp('2001-01-06 07:12:00', tz=tz) + assert obj.mean(skipna=False) is pd.NaT + + @pytest.mark.parametrize('box', [Series, pd.Index, PeriodArray]) + def test_period_mean(self, box): + dti = pd.date_range('2001-01-01', periods=11) + # shuffle so that we are not just working with monotone-increasing + dti = dti.take([4, 1, 3, 10, 9, 7, 8, 5, 0, 2, 6]) + + # use hourly frequency to avoid rounding errors in expected results + # TODO: flesh this out with different frequencies + parr = dti._data.to_period('H') + obj = box(parr) + assert obj.mean() == pd.Period('2001-01-06', freq='H') + assert obj.mean(skipna=False) == pd.Period('2001-01-06', freq='H') + + # parr[-2] will be the first date 2001-01-1 + parr[-2] = pd.NaT + + # with rounding, we get the period containing the result + # for the dt64 case above + obj = box(parr) + assert obj.mean() == pd.Period('2001-01-06 07:00', freq='H') + assert obj.mean(skipna=False) is pd.NaT + + @pytest.mark.parametrize('box', [Series, pd.Index, TimedeltaArray]) + def test_td64_mean(self, box): + tdi = pd.TimedeltaIndex([0, 3, -2, -7, 1, 2, -1, 3, 5, -2, 4], + unit='D') + + tdarr = tdi._data + obj = box(tdarr) + + result = obj.mean() + expected = np.array(tdarr).mean() + assert result == expected + + tdarr[0] = pd.NaT + assert obj.mean(skipna=False) is pd.NaT + + result2 = obj.mean(skipna=True) + assert result2 == tdi[1:].mean() + + # exact equality fails by 1 nanosecond + assert result2.round('us') == (result * 11. / 10).round('us') class TestSeriesStatReductions(object): From 30eeb64adb4ce7ab6bc473903154028d04459018 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 13 Jan 2019 18:00:39 -0800 Subject: [PATCH 02/18] update imports --- pandas/tests/reductions/test_stat_reductions.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py index e6319892ab4aa..a23986cd9478b 100644 --- a/pandas/tests/reductions/test_stat_reductions.py +++ b/pandas/tests/reductions/test_stat_reductions.py @@ -12,10 +12,7 @@ import pandas as pd from pandas import DataFrame, Series, compat import pandas.util.testing as tm -from pandas.core.arrays import ( - DatetimeArrayMixin as DatetimeArray, - TimedeltaArrayMixin as TimedeltaArray, - PeriodArray) +from pandas.core.arrays import DatetimeArray, TimedeltaArray, PeriodArray class TestDatetimeLikeStatReductions(object): From d48e2ef990e1c52c38802b24665fed0147c1cf26 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 13 Jan 2019 21:46:52 -0800 Subject: [PATCH 03/18] isort fixup --- pandas/tests/reductions/test_stat_reductions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py index a23986cd9478b..c897088264f55 100644 --- a/pandas/tests/reductions/test_stat_reductions.py +++ b/pandas/tests/reductions/test_stat_reductions.py @@ -11,8 +11,8 @@ import pandas as pd from pandas import DataFrame, Series, compat +from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray import pandas.util.testing as tm -from pandas.core.arrays import DatetimeArray, TimedeltaArray, PeriodArray class TestDatetimeLikeStatReductions(object): From 231458d75eb3e4683584fa4ec39f4600af18b74e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 14 Jan 2019 08:38:59 -0800 Subject: [PATCH 04/18] params for docstring --- pandas/core/arrays/datetimelike.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 619b190448a9d..ad15d9618360e 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1440,6 +1440,13 @@ def mean(self, axis=None, skipna=True): Return the mean value of the Array or maximum along an axis. + Parameters + ---------- + axis : None + Dummy parameter to match NumPy signature + skipna : bool, default True + Whether to ignore any NaT elements + See Also -------- numpy.ndarray.mean From 1129e8c60c61199320cc8e958896424614b627ac Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 14 Jan 2019 08:56:42 -0800 Subject: [PATCH 05/18] test for numeric_only --- pandas/tests/frame/test_analytics.py | 33 ++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 9f64b71ea455c..910d74a03aed0 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1304,6 +1304,39 @@ def test_mean_corner(self, float_frame, float_string_frame): means = float_frame.mean(0) assert means['bool'] == float_frame['bool'].values.mean() + def test_mean_datetimelike(self): + # GH#24757 check that datetimelike are excluded by default, handled + # correctly with numeric_only=True + + df = pd.DataFrame({ + 'A': np.arange(3), + 'B': pd.date_range('2016-01-01', periods=3), + 'C': pd.timedelta_range('1D', periods=3), + 'D': pd.period_range('2016', periods=3, freq='A') + }) + result = df.mean(numeric_only=True) + expected = pd.Series({'A': 1.}) + tm.assert_series_equal(result, expected) + + result = df.mean() + expected = pd.Series({ + 'A': 1., + 'B': df.loc[1, 'B'], + 'C': df.loc[1, 'C'] + }) + tm.assert_series_equal(result, expected) + + # FIXME: df.mean(numeric_only=False) raises TypeError because + # it casts to object-dtype and tries to add Timestamps. + # result = df.mean(numeric_only=False) + # expected = pd.Series({ + # 'A': 1, + # 'B': df.loc[1, 'B'], + # 'C': df.loc[1, 'C'], + # 'D': df.loc[1, 'D'] + # }) + # tm.assert_series_equal(result, expected) + def test_stats_mixed_type(self, float_string_frame): # don't blow up float_string_frame.std(1) From aba90ecbfbb2671e1342b618102cab158b1e9aed Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 16 Jan 2019 06:43:07 -0800 Subject: [PATCH 06/18] copy/paste fixup --- pandas/core/arrays/datetimelike.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index ad15d9618360e..9423d3d4dd19c 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1437,8 +1437,7 @@ def max(self, axis=None, skipna=True, *args, **kwargs): def mean(self, axis=None, skipna=True): """ - Return the mean value of the Array or maximum along - an axis. + Return the mean value of the Array or mean along an axis. Parameters ---------- From ccb790bdd3f46b73ab9a19daa66153be80289936 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 16 Jan 2019 18:47:29 -0800 Subject: [PATCH 07/18] Disable for PeriodArray --- pandas/core/arrays/datetimelike.py | 8 ++++++++ .../tests/reductions/test_stat_reductions.py | 19 ++++++++++++------- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 9423d3d4dd19c..56f0f7722d287 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1451,6 +1451,14 @@ def mean(self, axis=None, skipna=True): numpy.ndarray.mean Series.mean : Return the mean value in a Series. """ + if is_period_dtype(self): + # See discussion in GH#24757 + raise NotImplementedError( + "mean is not implemented for {cls} since the meaning may be " + "ambiguous. An alternative is " + "obj.to_timestamp(how='start').mean()" + .format(cls=type(self).__name__)) + nv.validate_minmax_axis(axis) mask = self.isna() diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py index 79a46bd1a8b42..a29f5fcb813ca 100644 --- a/pandas/tests/reductions/test_stat_reductions.py +++ b/pandas/tests/reductions/test_stat_reductions.py @@ -39,6 +39,7 @@ def test_dt64_mean(self, tz_naive_fixture, box): @pytest.mark.parametrize('box', [Series, pd.Index, PeriodArray]) def test_period_mean(self, box): + # GH#24757 dti = pd.date_range('2001-01-01', periods=11) # shuffle so that we are not just working with monotone-increasing dti = dti.take([4, 1, 3, 10, 9, 7, 8, 5, 0, 2, 6]) @@ -47,17 +48,21 @@ def test_period_mean(self, box): # TODO: flesh this out with different frequencies parr = dti._data.to_period('H') obj = box(parr) - assert obj.mean() == pd.Period('2001-01-06', freq='H') - assert obj.mean(skipna=False) == pd.Period('2001-01-06', freq='H') + with pytest.raises(NotImplementedError, match="ambiguous"): + obj.mean() + with pytest.raises(NotImplementedError, match="ambiguous"): + obj.mean(skipna=True) # parr[-2] will be the first date 2001-01-1 parr[-2] = pd.NaT - # with rounding, we get the period containing the result - # for the dt64 case above - obj = box(parr) - assert obj.mean() == pd.Period('2001-01-06 07:00', freq='H') - assert obj.mean(skipna=False) is pd.NaT + with pytest.raises(NotImplementedError, match="ambiguous"): + obj.mean() + with pytest.raises(NotImplementedError, match="ambiguous"): + obj.mean(skipna=True) + + assert obj.mean() == pd.Period('2001-01-06', freq='H') + assert obj.mean(skipna=False) == pd.Period('2001-01-06', freq='H') @pytest.mark.parametrize('box', [Series, pd.Index, TimedeltaArray]) def test_td64_mean(self, box): From 4f4cb6dc06ec565185b82daa0f94e1cb7e1bb4ee Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 16 Jan 2019 20:51:16 -0800 Subject: [PATCH 08/18] Delete assertions missed in previous commit --- pandas/tests/reductions/test_stat_reductions.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py index a29f5fcb813ca..988b00be2a898 100644 --- a/pandas/tests/reductions/test_stat_reductions.py +++ b/pandas/tests/reductions/test_stat_reductions.py @@ -61,9 +61,6 @@ def test_period_mean(self, box): with pytest.raises(NotImplementedError, match="ambiguous"): obj.mean(skipna=True) - assert obj.mean() == pd.Period('2001-01-06', freq='H') - assert obj.mean(skipna=False) == pd.Period('2001-01-06', freq='H') - @pytest.mark.parametrize('box', [Series, pd.Index, TimedeltaArray]) def test_td64_mean(self, box): tdi = pd.TimedeltaIndex([0, 3, -2, -7, 1, 2, -1, 3, 5, -2, 4], From 5fb1db9db73355d4215d54df1ab20c23c20865b8 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 17 Jan 2019 15:55:37 -0800 Subject: [PATCH 09/18] xfail numeric_only=False case --- pandas/tests/frame/test_analytics.py | 29 ++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 0d9d16920bc52..7d093dfc1f3ea 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1320,16 +1320,25 @@ def test_mean_datetimelike(self): }) tm.assert_series_equal(result, expected) - # FIXME: df.mean(numeric_only=False) raises TypeError because - # it casts to object-dtype and tries to add Timestamps. - # result = df.mean(numeric_only=False) - # expected = pd.Series({ - # 'A': 1, - # 'B': df.loc[1, 'B'], - # 'C': df.loc[1, 'C'], - # 'D': df.loc[1, 'D'] - # }) - # tm.assert_series_equal(result, expected) + @pytest.mark.xfail(reason="casts to object-dtype and then tries to " + "add timestamps", + raises=TypeError, strict=True) + def test_mean_datetimelike_numeric_only_false(self): + df = pd.DataFrame({ + 'A': np.arange(3), + 'B': pd.date_range('2016-01-01', periods=3), + 'C': pd.timedelta_range('1D', periods=3), + 'D': pd.period_range('2016', periods=3, freq='A') + }) + + result = df.mean(numeric_only=False) + expected = pd.Series({ + 'A': 1, + 'B': df.loc[1, 'B'], + 'C': df.loc[1, 'C'], + 'D': df.loc[1, 'D'] + }) + tm.assert_series_equal(result, expected) def test_stats_mixed_type(self, float_string_frame): # don't blow up From 1b24e7da3ec5f104a4d65de61c6d395187c6e668 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 29 Jan 2019 10:33:10 -0800 Subject: [PATCH 10/18] add todo comment --- pandas/core/series.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index 2d4e3ed17db3f..42b9ef99e65b8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3618,6 +3618,7 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, delegate = DatetimeIndex(delegate) elif is_timedelta64_dtype(delegate) and hasattr(TimedeltaIndex, name): # use TimedeltaIndex to handle skipna correctly + # TODO: remove hasattr check after TimedeltaIndex has `std` method delegate = TimedeltaIndex(delegate) # dispatch to numpy arrays From a49da376932e3846d3c1222ce4dda6930c78f4cb Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 29 Jan 2019 14:00:59 -0800 Subject: [PATCH 11/18] dont expect datetime in dataframe.mean --- pandas/tests/frame/test_analytics.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index fcbec0d89675b..b5d29c4b06e7b 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1333,7 +1333,6 @@ def test_mean_datetimelike(self): result = df.mean() expected = pd.Series({ 'A': 1., - 'B': df.loc[1, 'B'], 'C': df.loc[1, 'C'] }) tm.assert_series_equal(result, expected) From 58bca3684f631bd836e07dcebcacb831a8ed61c6 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 28 Feb 2019 09:29:43 -0800 Subject: [PATCH 12/18] whatsnew --- doc/source/whatsnew/v0.25.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index a6f7395f5177e..525808c2bf4e5 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -24,6 +24,7 @@ Other Enhancements - :meth:`DataFrame.at_time` and :meth:`Series.at_time` now support :meth:`datetime.time` objects with timezones (:issue:`24043`) - :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`) - :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behaviour of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`) +- :class:`DatetimeIndex` and :class:`TimedeltaIndex` now have a `mean` method (:issue:`24757`) - .. _whatsnew_0250.api_breaking: From 4df0b1c14fc1faaa935143140356627993084c7f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 3 Mar 2019 09:07:02 -0800 Subject: [PATCH 13/18] add versionadded --- pandas/core/arrays/datetimelike.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index af0ff5d88a33a..3093a9cfafb62 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1439,6 +1439,8 @@ def mean(self, axis=None, skipna=True): """ Return the mean value of the Array or mean along an axis. + .. versionadded:: 0.25.0 + Parameters ---------- axis : None From d2f5e6fdd1ab6a3a330141b7da6307fab4aa9cd6 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 8 Mar 2019 17:01:47 -0800 Subject: [PATCH 14/18] change NotImplementedError to TypeError --- pandas/core/arrays/datetimelike.py | 6 +++++- pandas/tests/reductions/test_stat_reductions.py | 8 ++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 3093a9cfafb62..9dd5ecab4e9c1 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1452,10 +1452,14 @@ def mean(self, axis=None, skipna=True): -------- numpy.ndarray.mean Series.mean : Return the mean value in a Series. + + Notes + ----- + mean is only defined for Datetime and Timedelta dtypes, not for Period. """ if is_period_dtype(self): # See discussion in GH#24757 - raise NotImplementedError( + raise TypeError( "mean is not implemented for {cls} since the meaning may be " "ambiguous. An alternative is " "obj.to_timestamp(how='start').mean()" diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py index 988b00be2a898..7face09ddf6f2 100644 --- a/pandas/tests/reductions/test_stat_reductions.py +++ b/pandas/tests/reductions/test_stat_reductions.py @@ -48,17 +48,17 @@ def test_period_mean(self, box): # TODO: flesh this out with different frequencies parr = dti._data.to_period('H') obj = box(parr) - with pytest.raises(NotImplementedError, match="ambiguous"): + with pytest.raises(TypeError, match="ambiguous"): obj.mean() - with pytest.raises(NotImplementedError, match="ambiguous"): + with pytest.raises(TypeError, match="ambiguous"): obj.mean(skipna=True) # parr[-2] will be the first date 2001-01-1 parr[-2] = pd.NaT - with pytest.raises(NotImplementedError, match="ambiguous"): + with pytest.raises(TypeError, match="ambiguous"): obj.mean() - with pytest.raises(NotImplementedError, match="ambiguous"): + with pytest.raises(TypeError, match="ambiguous"): obj.mean(skipna=True) @pytest.mark.parametrize('box', [Series, pd.Index, TimedeltaArray]) From de9025c050791f77358d8692ead1572d9936ad51 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 8 Mar 2019 17:06:26 -0800 Subject: [PATCH 15/18] add mean to class docstrings and docs/source/reference/indexing.rst --- doc/source/reference/indexing.rst | 14 ++++++++++++++ pandas/core/indexes/datetimes.py | 1 + pandas/core/indexes/timedeltas.py | 1 + 3 files changed, 16 insertions(+) diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst index 680cb7e3dac91..67043ee1f6dee 100644 --- a/doc/source/reference/indexing.rst +++ b/doc/source/reference/indexing.rst @@ -403,6 +403,13 @@ Conversion DatetimeIndex.to_series DatetimeIndex.to_frame +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + DatetimeIndex.mean + TimedeltaIndex -------------- .. autosummary:: @@ -435,6 +442,13 @@ Conversion TimedeltaIndex.ceil TimedeltaIndex.to_frame +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + TimedeltaIndex.mean + .. currentmodule:: pandas PeriodIndex diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index b8d052ce7be04..90e220f2ad2e8 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -207,6 +207,7 @@ class DatetimeIndex(DatetimeIndexOpsMixin, Int64Index, DatetimeDelegateMixin): to_frame month_name day_name + mean Notes ----- diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 830925535dab1..755389f03f3e0 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -130,6 +130,7 @@ class TimedeltaIndex(DatetimeIndexOpsMixin, dtl.TimelikeOps, Int64Index, floor ceil to_frame + mean See Also --------- From 5682b65ff9f1903bb844c31865e1991115bc92c9 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 14 May 2019 16:33:44 +0200 Subject: [PATCH 16/18] remove axis keyword for now --- pandas/core/arrays/datetimelike.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 7466a7f123255..6a2cdd5938c93 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1438,16 +1438,14 @@ def max(self, axis=None, skipna=True, *args, **kwargs): # Don't have to worry about NA `result`, since no NA went in. return self._box_func(result) - def mean(self, axis=None, skipna=True): + def mean(self, skipna=True): """ - Return the mean value of the Array or mean along an axis. + Return the mean value of the Array. .. versionadded:: 0.25.0 Parameters ---------- - axis : None - Dummy parameter to match NumPy signature skipna : bool, default True Whether to ignore any NaT elements @@ -1463,13 +1461,11 @@ def mean(self, axis=None, skipna=True): if is_period_dtype(self): # See discussion in GH#24757 raise TypeError( - "mean is not implemented for {cls} since the meaning may be " + "mean is not implemented for {cls} since the meaning is " "ambiguous. An alternative is " "obj.to_timestamp(how='start').mean()" .format(cls=type(self).__name__)) - nv.validate_minmax_axis(axis) - mask = self.isna() if skipna: values = self[~mask] From 450c8ce6663fb58c498ee9465697367169a2cbec Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 16 May 2019 08:34:29 +0200 Subject: [PATCH 17/18] don't pass axis to methods --- pandas/core/arrays/datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 6a2cdd5938c93..276f698ba3d24 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1382,7 +1382,7 @@ def _ensure_localized(self, arg, ambiguous='raise', nonexistent='raise', def _reduce(self, name, axis=0, skipna=True, **kwargs): op = getattr(self, name, None) if op: - return op(axis=axis, skipna=skipna, **kwargs) + return op(skipna=skipna, **kwargs) else: return super()._reduce(name, skipna, **kwargs) From 3e31ca1e7e1dfa4a0acb42fd51e94676453d792d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 16 May 2019 13:31:29 +0200 Subject: [PATCH 18/18] add returns to docstring --- pandas/core/arrays/datetimelike.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 276f698ba3d24..61594be8ec385 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1449,6 +1449,10 @@ def mean(self, skipna=True): skipna : bool, default True Whether to ignore any NaT elements + Returns + ------- + scalar (Timestamp or Timedelta) + See Also -------- numpy.ndarray.mean