diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 31b6bb0d5575d..4acd77222efd7 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -865,4 +865,5 @@ Bug Fixes - Bug in ``to_json`` which was causing segmentation fault when serializing 0-rank ndarray (:issue:`9576`) - Bug in plotting functions may raise ``IndexError`` when plotted on ``GridSpec`` (:issue:`10819`) - Bug in plot result may show unnecessary minor ticklabels (:issue:`10657`) -- Bug when constructing ``DataFrame`` where passing a dictionary with only scalar values and specifying columns did not raise an error (:issue:`10856`) +- Bug in ``groupby`` incorrect computation for aggregation on ``DataFrame`` with ``NaT`` (E.g ``first``, ``last``, ``min``). (:issue:`10590`) +- Bug when constructing ``DataFrame`` where passing a dictionary with only scalar values and specifying columns did not raise an error (:issue:`10856`) \ No newline at end of file diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 534117b8e9249..1a40b73de8dd2 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1532,6 +1532,7 @@ def aggregate(self, values, how, axis=0): if is_datetime_or_timedelta_dtype(values.dtype): values = values.view('int64') + values[values == tslib.iNaT] = np.nan # GH 7754 is_numeric = True elif is_bool_dtype(values.dtype): @@ -2761,9 +2762,7 @@ def _cython_agg_blocks(self, how, numeric_only=True): for block in data.blocks: - values = block._try_operate(block.values) - - result, _ = self.grouper.aggregate(values, how, axis=agg_axis) + result, _ = self.grouper.aggregate(block.values, how, axis=agg_axis) # see if we can cast the block back to the original dtype result = block._try_coerce_and_cast_result(result) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 41703b3b5a3b7..ec6ab4e0d2ab1 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -5413,6 +5413,24 @@ def test_func(x): expected = DataFrame() tm.assert_frame_equal(result, expected) + def test_first_last_max_min_on_time_data(self): + # GH 10295 + # Verify that NaT is not in the result of max, min, first and last on + # Dataframe with datetime or timedelta values. + from datetime import timedelta as td + df_test=DataFrame({'dt':[nan,'2015-07-24 10:10','2015-07-25 11:11','2015-07-23 12:12',nan], + 'td':[nan,td(days=1),td(days=2),td(days=3),nan]}) + df_test.dt=pd.to_datetime(df_test.dt) + df_test['group']='A' + df_ref=df_test[df_test.dt.notnull()] + + grouped_test=df_test.groupby('group') + grouped_ref=df_ref.groupby('group') + + assert_frame_equal(grouped_ref.max(),grouped_test.max()) + assert_frame_equal(grouped_ref.min(),grouped_test.min()) + assert_frame_equal(grouped_ref.first(),grouped_test.first()) + assert_frame_equal(grouped_ref.last(),grouped_test.last()) def assert_fp_equal(a, b): assert (np.abs(a - b) < 1e-12).all()