From 89abf89b2c43c71f830eefd602c741e418018403 Mon Sep 17 00:00:00 2001 From: Guillem Borrell Date: Wed, 3 May 2017 13:03:39 +0200 Subject: [PATCH 01/13] Rolling window quantile defaults fixed Now the rolling window quantile returns the interpolated value like np.percentile and Series.quantile. --- pandas/core/window.pyx | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/pandas/core/window.pyx b/pandas/core/window.pyx index 3bb8abe26c781..9735ad0f9e865 100644 --- a/pandas/core/window.pyx +++ b/pandas/core/window.pyx @@ -1348,6 +1348,7 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, bint is_variable ndarray[int64_t] start, end ndarray[double_t] output + double qlow, qhigh, vlow, vhigh if quantile < 0.0 or quantile > 1.0: raise ValueError("quantile value {0} not in [0, 1]".format(quantile)) @@ -1391,7 +1392,19 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, if nobs >= minp: idx = int(quantile * (nobs - 1)) - output[i] = skiplist.get(idx) + + # Exactly last point + if idx == nobs - 1: + output[i] = skiplist.get(idx) + + # Interpolated percentile + else: + qlow = ( idx) / ((nobs - 1)) + qhigh = ( (idx + 1)) / ((nobs - 1)) + vlow = skiplist.get(idx) + vhigh = skiplist.get(idx+1) + + output[i] = vlow + (vhigh - vlow)*(quantile - qlow)/(qhigh - qlow) else: output[i] = NaN From 4ae3d887925a26541e38a21d911c5dd4df2127e1 Mon Sep 17 00:00:00 2001 From: Guillem Borrell Date: Wed, 3 May 2017 13:50:27 +0200 Subject: [PATCH 02/13] Fixed rolling window tests too --- pandas/tests/test_window.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index d3e427dfb4c7b..e1a011e422a9e 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1079,7 +1079,18 @@ def scoreatpercentile(a, per): values = np.sort(a, axis=0) idx = per / 1. * (values.shape[0] - 1) - return values[int(idx)] + + if idx == values.shape[0] - 1: + retval = values[-1] + + else: + qlow = int(idx) / (values.shape[0] - 1) + qhig = (int(idx) + 1) / (values.shape[0] - 1) + vlow = values[int(idx)] + vhig = values[int(idx + 1)] + retval = vlow + (vhig - vlow)*(per - qlow)/(qhig - qlow) + + return retval for q in qs: @@ -3514,7 +3525,7 @@ def test_ragged_quantile(self): result = df.rolling(window='2s', min_periods=1).quantile(0.5) expected = df.copy() - expected['B'] = [0.0, 1, 1.0, 3.0, 3.0] + expected['B'] = [0.0, 1, 1.5, 3.0, 3.5] tm.assert_frame_equal(result, expected) def test_ragged_std(self): From 4927e2f47e926f68651bec691b7c3992e31384a2 Mon Sep 17 00:00:00 2001 From: Guillem Borrell Date: Wed, 3 May 2017 13:53:00 +0200 Subject: [PATCH 03/13] Corrected small glitch in test --- pandas/tests/test_window.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index e1a011e422a9e..139105d413b2e 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1080,7 +1080,7 @@ def scoreatpercentile(a, per): idx = per / 1. * (values.shape[0] - 1) - if idx == values.shape[0] - 1: + if int(idx) == values.shape[0] - 1: retval = values[-1] else: From 2237503c0d01a79d2f7ebe739d04ad94a941ea26 Mon Sep 17 00:00:00 2001 From: Guillem Borrell Date: Wed, 3 May 2017 15:03:45 +0200 Subject: [PATCH 04/13] Pass linter and add the example from #9413 as test. --- pandas/core/window.pyx | 6 +++--- pandas/tests/test_window.py | 20 +++++++++++++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/pandas/core/window.pyx b/pandas/core/window.pyx index 9735ad0f9e865..c8ba85b2a69aa 100644 --- a/pandas/core/window.pyx +++ b/pandas/core/window.pyx @@ -1397,14 +1397,14 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, if idx == nobs - 1: output[i] = skiplist.get(idx) - # Interpolated percentile + # Interpolated quantile else: qlow = ( idx) / ((nobs - 1)) qhigh = ( (idx + 1)) / ((nobs - 1)) vlow = skiplist.get(idx) - vhigh = skiplist.get(idx+1) + vhigh = skiplist.get(idx + 1) - output[i] = vlow + (vhigh - vlow)*(quantile - qlow)/(qhigh - qlow) + output[i] = vlow + (vhigh - vlow) * (quantile - qlow) / (qhigh - qlow) else: output[i] = NaN diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 139105d413b2e..9faaf8660b86a 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1088,8 +1088,8 @@ def scoreatpercentile(a, per): qhig = (int(idx) + 1) / (values.shape[0] - 1) vlow = values[int(idx)] vhig = values[int(idx + 1)] - retval = vlow + (vhig - vlow)*(per - qlow)/(qhig - qlow) - + retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow) + return retval for q in qs: @@ -1105,6 +1105,20 @@ def alt(x): self._check_moment_func(f, alt, name='quantile', quantile=q) + def test_rolling_quantile_np_percentile(self): + row = 10 + col = 5 + idx = pd.date_range(20100101, periods=row, freq='B') + df = pd.DataFrame(np.random.rand(row*col).reshape((row, -1)), index=idx) + + df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0) + np_percentile = np.percentile(df, [25, 50, 75], axis=0) + + print(np_percentile) + print(df_quantile.values) + + tm.assert_almost_equal(df_quantile.values, np_percentile) + def test_rolling_quantile_param(self): ser = Series([0.0, .1, .5, .9, 1.0]) @@ -1116,7 +1130,7 @@ def test_rolling_quantile_param(self): with pytest.raises(TypeError): ser.rolling(3).quantile('foo') - + def test_rolling_apply(self): # suppress warnings about empty slices, as we are deliberately testing # with a 0-length Series From 332e976a13ec43e6714c64bb5bdae55e8873caee Mon Sep 17 00:00:00 2001 From: Guillem Borrell Date: Wed, 3 May 2017 15:05:49 +0200 Subject: [PATCH 05/13] Deleted printed values for debugging. --- pandas/tests/test_window.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 9faaf8660b86a..ee3587be7ba4b 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1114,9 +1114,6 @@ def test_rolling_quantile_np_percentile(self): df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0) np_percentile = np.percentile(df, [25, 50, 75], axis=0) - print(np_percentile) - print(df_quantile.values) - tm.assert_almost_equal(df_quantile.values, np_percentile) def test_rolling_quantile_param(self): From ba5c7518548adcd2ea5cdff21ad8ba6fbbdd69b0 Mon Sep 17 00:00:00 2001 From: Guillem Borrell Date: Wed, 3 May 2017 15:09:12 +0200 Subject: [PATCH 06/13] Added a whatsnew entry --- doc/source/whatsnew/v0.20.1.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.1.txt b/doc/source/whatsnew/v0.20.1.txt index 504f8004bc8a6..ce843d56c5ae2 100644 --- a/doc/source/whatsnew/v0.20.1.txt +++ b/doc/source/whatsnew/v0.20.1.txt @@ -78,7 +78,7 @@ Reshaping Numeric ^^^^^^^ - +- Bug in ``.rolling.quantile()`` which incorrectly used different defaults than Series.quantile() and DataFrame.quantile() Other From b1894d8a6a76343cafe790c67761ef7c3a897443 Mon Sep 17 00:00:00 2001 From: Guillem Borrell Date: Wed, 3 May 2017 15:11:35 +0200 Subject: [PATCH 07/13] Pass linter. --- pandas/tests/test_window.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index ee3587be7ba4b..9047100c260d1 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1109,13 +1109,14 @@ def test_rolling_quantile_np_percentile(self): row = 10 col = 5 idx = pd.date_range(20100101, periods=row, freq='B') - df = pd.DataFrame(np.random.rand(row*col).reshape((row, -1)), index=idx) + df = pd.DataFrame(np.random.rand(row * col).reshape((row, -1)), + index=idx) df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0) np_percentile = np.percentile(df, [25, 50, 75], axis=0) tm.assert_almost_equal(df_quantile.values, np_percentile) - + def test_rolling_quantile_param(self): ser = Series([0.0, .1, .5, .9, 1.0]) @@ -1127,7 +1128,7 @@ def test_rolling_quantile_param(self): with pytest.raises(TypeError): ser.rolling(3).quantile('foo') - + def test_rolling_apply(self): # suppress warnings about empty slices, as we are deliberately testing # with a 0-length Series From 1378ffcb7805b78937ee3f34098ae1b394e50447 Mon Sep 17 00:00:00 2001 From: Guillem Borrell Date: Wed, 3 May 2017 15:15:58 +0200 Subject: [PATCH 08/13] Added comment for the test related with issue #9413 --- pandas/tests/test_window.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 9047100c260d1..6af38588dbcfc 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1106,6 +1106,7 @@ def alt(x): self._check_moment_func(f, alt, name='quantile', quantile=q) def test_rolling_quantile_np_percentile(self): + # #9431 row = 10 col = 5 idx = pd.date_range(20100101, periods=row, freq='B') From 54bc3205b1a3bc117b7e4f0a2d5254dcf3c7225a Mon Sep 17 00:00:00 2001 From: Guillem Borrell Date: Wed, 3 May 2017 15:21:29 +0200 Subject: [PATCH 09/13] Added examples of #16211 as tests --- pandas/tests/test_window.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 6af38588dbcfc..7af6449a4ca23 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1118,6 +1118,15 @@ def test_rolling_quantile_np_percentile(self): tm.assert_almost_equal(df_quantile.values, np_percentile) + def test_rolling_quantile_series(self): + # #16211 + arr = np.arange(100) + s = pd.Series(arr) + q1 = s.quantile(0.1) + q2 = s.rolling(100).quantile(0.1).iloc[-1] + + tm.assert_almost_equal(q1, q2) + def test_rolling_quantile_param(self): ser = Series([0.0, .1, .5, .9, 1.0]) From 88c7b85f4776017910ddebf1ea72b256d87695f2 Mon Sep 17 00:00:00 2001 From: Guillem Borrell Date: Wed, 3 May 2017 15:24:31 +0200 Subject: [PATCH 10/13] Referred to the wrong issue in the comments --- pandas/tests/test_window.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 7af6449a4ca23..68868eaf64bef 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1106,7 +1106,7 @@ def alt(x): self._check_moment_func(f, alt, name='quantile', quantile=q) def test_rolling_quantile_np_percentile(self): - # #9431 + # #9413 row = 10 col = 5 idx = pd.date_range(20100101, periods=row, freq='B') From df161a4dc7e25f36804d2fe486cfeb607118fbfa Mon Sep 17 00:00:00 2001 From: Guillem Borrell Date: Wed, 3 May 2017 17:29:37 +0200 Subject: [PATCH 11/13] Coerce the dataframe values to a np array. --- pandas/tests/test_window.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 68868eaf64bef..3b01ee3f739e9 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1116,7 +1116,7 @@ def test_rolling_quantile_np_percentile(self): df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0) np_percentile = np.percentile(df, [25, 50, 75], axis=0) - tm.assert_almost_equal(df_quantile.values, np_percentile) + tm.assert_almost_equal(np.array(df_quantile.values), np_percentile) def test_rolling_quantile_series(self): # #16211 From 89942c20b150830ca8320dcc17231c52ef4860b6 Mon Sep 17 00:00:00 2001 From: Guillem Borrell Date: Thu, 4 May 2017 10:53:28 +0200 Subject: [PATCH 12/13] Fixed tests in Python2 --- pandas/core/window.pyx | 4 ++-- pandas/tests/test_window.py | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/core/window.pyx b/pandas/core/window.pyx index c8ba85b2a69aa..275ff26a7eb35 100644 --- a/pandas/core/window.pyx +++ b/pandas/core/window.pyx @@ -1403,8 +1403,8 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, qhigh = ( (idx + 1)) / ((nobs - 1)) vlow = skiplist.get(idx) vhigh = skiplist.get(idx + 1) - - output[i] = vlow + (vhigh - vlow) * (quantile - qlow) / (qhigh - qlow) + output[i] = vlow + (vhigh - vlow) * \ + (quantile - qlow) / (qhigh - qlow) else: output[i] = NaN diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 3b01ee3f739e9..383223c75cce1 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1078,16 +1078,16 @@ def test_rolling_quantile(self): def scoreatpercentile(a, per): values = np.sort(a, axis=0) - idx = per / 1. * (values.shape[0] - 1) + idx = int(per / 1. * (values.shape[0] - 1)) - if int(idx) == values.shape[0] - 1: + if idx == values.shape[0] - 1: retval = values[-1] else: - qlow = int(idx) / (values.shape[0] - 1) - qhig = (int(idx) + 1) / (values.shape[0] - 1) - vlow = values[int(idx)] - vhig = values[int(idx + 1)] + qlow = float(idx) / float(values.shape[0] - 1) + qhig = float(idx + 1) / float(values.shape[0] - 1) + vlow = values[idx] + vhig = values[idx + 1] retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow) return retval From 130cf4139052782763ee66ee9d50546f61b83191 Mon Sep 17 00:00:00 2001 From: Guillem Borrell Date: Thu, 4 May 2017 11:37:47 +0200 Subject: [PATCH 13/13] Coerce type to pass CI. --- pandas/tests/test_window.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 383223c75cce1..82e7c83734944 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1116,7 +1116,7 @@ def test_rolling_quantile_np_percentile(self): df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0) np_percentile = np.percentile(df, [25, 50, 75], axis=0) - tm.assert_almost_equal(np.array(df_quantile.values), np_percentile) + tm.assert_almost_equal(df_quantile.values, np.array(np_percentile)) def test_rolling_quantile_series(self): # #16211