diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 0e567972e7823..e168c2191d47b 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -846,6 +846,8 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` incorrectly rounding integer values near the ``int64`` implementations bounds (:issue:`40767`) - Bug in :meth:`.GroupBy.rank` with nullable dtypes incorrectly raising ``TypeError`` (:issue:`41010`) - Bug in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` computing wrong result with nullable data types too large to roundtrip when casting to float (:issue:`37493`) +- Bug in :meth:`DataFrame.rolling` returning mean zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`) +- Bug in :meth:`DataFrame.rolling` returning sum not zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`) Reshaping ^^^^^^^^^ diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 8c8629ad6f032..8d6f899d6f3ca 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -73,7 +73,9 @@ cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x) nogi cdef: float64_t result - if nobs >= minp: + if nobs == 0 == minp: + result = 0 + elif nobs >= minp: result = sum_x else: result = NaN @@ -170,7 +172,7 @@ cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, cdef: float64_t result - if nobs >= minp: + if nobs >= minp and nobs > 0: result = sum_x / nobs if neg_ct == 0 and result < 0: # all positive diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 9abae632e5da3..28465e3a979a7 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1357,3 +1357,57 @@ def test_rolling_std_small_values(): result = s.rolling(2).std() expected = Series([np.nan, 7.071068e-9, 7.071068e-9]) tm.assert_series_equal(result, expected, atol=1.0e-15, rtol=1.0e-15) + + +@pytest.mark.parametrize( + "start, exp_values", + [ + (1, [0.03, 0.0155, 0.0155, 0.011, 0.01025]), + (2, [0.001, 0.001, 0.0015, 0.00366666]), + ], +) +def test_rolling_mean_all_nan_window_floating_artifacts(start, exp_values): + # GH#41053 + df = DataFrame( + [ + 0.03, + 0.03, + 0.001, + np.NaN, + 0.002, + 0.008, + np.NaN, + np.NaN, + np.NaN, + np.NaN, + np.NaN, + np.NaN, + 0.005, + 0.2, + ] + ) + + values = exp_values + [ + 0.00366666, + 0.005, + 0.005, + 0.008, + np.NaN, + np.NaN, + 0.005, + 0.102500, + ] + expected = DataFrame( + values, + index=list(range(start, len(values) + start)), + ) + result = df.iloc[start:].rolling(5, min_periods=0).mean() + tm.assert_frame_equal(result, expected) + + +def test_rolling_sum_all_nan_window_floating_artifacts(): + # GH#41053 + df = DataFrame([0.002, 0.008, 0.005, np.NaN, np.NaN, np.NaN]) + result = df.rolling(3, min_periods=0).sum() + expected = DataFrame([0.002, 0.010, 0.015, 0.013, 0.005, 0.0]) + tm.assert_frame_equal(result, expected)