From 91b08fa75e14344cca9969365b5f413a3f94fe1b Mon Sep 17 00:00:00 2001 From: y-p Date: Sun, 24 Mar 2013 23:12:54 +0200 Subject: [PATCH 01/10] =?UTF-8?q?TST:=20add=20rolling=5Fmean=20test,=20val?= =?UTF-8?q?idate=20against=20numpy.convolve=20based=20rolling=5Fmean?= =?UTF-8?q?=E2=80=94=20y-p?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pandas/stats/tests/test_moments.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 88dfcaf5ce7ae..93327a2e8b920 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -768,6 +768,21 @@ def _check_expanding(self, func, static_comp, has_min_periods=True, preserve_nan=preserve_nan) self._check_expanding_structures(func) + def test_rolling_mean_edges(self): + # GH2803 + # actually, covers edge handling more generally + def movingaverage(interval, window_size): + window = np.ones(int(window_size))/float(window_size) + return np.convolve(interval, window, 'same') + + nitems = 25 + for win in range(1,nitems,1): + ser = Series(range(nitems)) + df = DataFrame(index=range(len(ser))) + df['rm'] = mom.rolling_mean(ser, win, center=True, min_periods=1) + df['ma'] = movingaverage(ser, win) + tm.assert_almost_equal(df['rm'] , df['ma']) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], From 978d4dbf405996d3f07a9065be2b4107af5742fb Mon Sep 17 00:00:00 2001 From: y-p Date: Mon, 18 Mar 2013 13:07:30 +0200 Subject: [PATCH 02/10] BUG: rolling_X functions mishandle edges when center = True --- pandas/stats/moments.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index e53916f113e1b..8bb02d591625c 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -285,6 +285,34 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None, rs = return_hook(result) if center: rs = _center_window(rs, window, axis) + # GH2953, fixup edges + if window > 2: + if values.ndim > 1: + # TODO: handle mi vectorized case + pass + else: + # there's an ambiguity on what constitutes + # the "center" when window is even + # we Just close ranks with numpy , see test case + if window % 2 == 0 : + nahead = (window-1)//2 or 1 + else: + nahead = (window)//2 + + # fixup the head + tip = np.append(np.zeros(nahead+1),values[:(2*nahead+1)]) + rs[:nahead+1] = calc(tip)[-(nahead+1):][:nahead+1] + + # fixup the tail + tip = np.append(values[-(2*nahead+1):],np.zeros(nahead)) + rs[-(nahead):] = calc(tip)[-(nahead):] + + if minp > 0: + d = minp - nahead-1 + if d > 0: + rs[:d] = NaN + rs[-(d):] = NaN + return rs From 0c7af788bffdf11253659101adcd2bde42671b5d Mon Sep 17 00:00:00 2001 From: y-p Date: Sun, 24 Mar 2013 14:05:43 +0200 Subject: [PATCH 03/10] ENH: translate min_periods before passing into _rolling_moment minp is relavent for fixing up edges, not just down in the cython code --- pandas/stats/moments.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 8bb02d591625c..393de1ce0532d 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -525,12 +525,11 @@ def _rolling_func(func, desc, check_minp=_use_window): @Substitution(desc, _unary_arg, _type_of_input) @Appender(_doc_template) @wraps(func) + def f(arg, window, min_periods=None, freq=None, center=False, time_rule=None, **kwargs): - def call_cython(arg, window, minp, **kwds): - minp = check_minp(minp, window) - return func(arg, window, minp, **kwds) - return _rolling_moment(arg, window, call_cython, min_periods, + min_periods = check_minp(min_periods, window) + return _rolling_moment(arg, window, func, min_periods, freq=freq, center=center, time_rule=time_rule, **kwargs) From 3bf982a5ef6ad78cd801b551ec7ac8f7a1a093c3 Mon Sep 17 00:00:00 2001 From: y-p Date: Sun, 24 Mar 2013 15:38:30 +0200 Subject: [PATCH 04/10] TST: modify check_moment to accomedate expected center=True behaviour --- pandas/stats/tests/test_moments.py | 96 ++++++++++++++++++++++-------- 1 file changed, 70 insertions(+), 26 deletions(-) diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 93327a2e8b920..e9609b05e7a8d 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -396,23 +396,30 @@ def _check_ndarray(self, func, static_comp, window=50, assert_almost_equal(result[-1], static_comp(arr[10:-10])) if has_center: + win = 20 if has_min_periods: - result = func(arr, 20, min_periods=15, center=True) - expected = func(arr, 20, min_periods=15) + result = func(arr, win, min_periods=15, center=True) + expected = func(arr, win, min_periods=15) else: - result = func(arr, 20, center=True) - expected = func(arr, 20) + result = func(arr, win, center=True) + expected = func(arr, win) assert_almost_equal(result[1], expected[10]) - if fill_value is None: - self.assert_(np.isnan(result[-9:]).all()) - else: - self.assert_((result[-9:] == 0).all()) + # fill_Value only specified by rolling_Count + # which assumes the old 0 append at end + # behavior, no longer true + + # if fill_value is None: + # self.assert_(np.isnan(result[-9:]).all()) + # else: + # self.assert_((result[-9:] == 0).all()) + + if has_min_periods: self.assert_(np.isnan(expected[23])) self.assert_(np.isnan(result[14])) - self.assert_(np.isnan(expected[-5])) self.assert_(np.isnan(result[-14])) + self.assert_(np.isnan(expected[-5])) def _check_structures(self, func, static_comp, has_min_periods=True, has_time_rule=True, @@ -451,28 +458,65 @@ def _check_structures(self, func, static_comp, trunc_frame.apply(static_comp)) if has_center: - if has_min_periods: - minp = 10 - series_xp = func(self.series, 25, min_periods=minp).shift(-12) - frame_xp = func(self.frame, 25, min_periods=minp).shift(-12) - - series_rs = func(self.series, 25, min_periods=minp, - center=True) - frame_rs = func(self.frame, 25, min_periods=minp, - center=True) + # the code in _rolling_function tries avoids np.append + # because of the perf hit of copying, but we can do + # that for testing and simplfy things + minp = 10 + win =25 + + def participating(i,win): + return [x for x in range(i-win//2,i+(win+1)//2)] + + # validate + self.assertEqual(participating(0,3),[-1,0,1]) + self.assertEqual(participating(1,3),[0,1,2]) + self.assertEqual(participating(0,4),[-2,-1,0,1]) + self.assertEqual(participating(1,4),[-1,0,1,2]) + + def get_v(s,f,i,win=win,minp=minp): + _is = np.array(participating(i,win)) + in_range_mask = np.array([ x>=0 and x< len(s) for x in _is ]) + def f_(i,data): + # print( data) + vals = np.array( list(data) ) + if has_min_periods: + return f(vals,win,min_periods=minp)[-1] + else: + return f(vals,win)[-1] + + if all(in_range_mask): # middle + return f_(i,s.take(_is)) + + elif sum(in_range_mask) < minp: + return np.NaN + return "minp_nan" + else: + lpad = np.sum([_is<0]) + rpad = np.sum([_is>= len(s)]) + + _in_is = np.ma.array(_is, mask=~in_range_mask).compressed() + vals = np.array([0] * lpad + list(s.take(_in_is)) + [0]* rpad) + # print( i,lpad,rpad) + # print "is",_is + # print "in_is",_in_is + # # print "vs", vs + # print "vals", vals + return f_(i,vals) + return "edge" + + series_xp = Series(( [get_v(self.series,func,i,win,minp) for i in range(len(self.series))] )) + # frame_xp = func(self.frame, win, min_periods=minp).shift(-(win//2)) + if has_min_periods: + series_rs = func(self.series, win, min_periods=minp, center=True) else: - series_xp = func(self.series, 25).shift(-12) - frame_xp = func(self.frame, 25).shift(-12) - - series_rs = func(self.series, 25, center=True) - frame_rs = func(self.frame, 25, center=True) + series_rs = func(self.series, win, center=True) if fill_value is not None: series_xp = series_xp.fillna(fill_value) - frame_xp = frame_xp.fillna(fill_value) - assert_series_equal(series_xp, series_rs) - assert_frame_equal(frame_xp, frame_rs) + # frame_xp = frame_xp.fillna(fill_value) + np.array_equal(series_xp, series_rs) + # assert_frame_equal(frame_xp, frame_rs) def test_legacy_time_rule_arg(self): from StringIO import StringIO From 115c2377817c41c5cc611ece22799236e4b4facd Mon Sep 17 00:00:00 2001 From: y-p Date: Sun, 24 Mar 2013 23:01:05 +0200 Subject: [PATCH 05/10] ENH: make padding value a param of _rolling_moment --- pandas/stats/moments.py | 27 ++++++++++++++++++--------- pandas/stats/tests/test_moments.py | 2 +- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 393de1ce0532d..5d8d90c22f206 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -42,6 +42,8 @@ freq : None or string alias / date offset object, default=None Frequency to conform to before computing statistic time_rule is a legacy alias for freq +pad_val: Value to use for parts of the window which fall outside the array + bounds. (applies only for center=True, currently) Returns ------- @@ -251,7 +253,7 @@ def rolling_corr_pairwise(df, window, min_periods=None): def _rolling_moment(arg, window, func, minp, axis=0, freq=None, - center=False, time_rule=None, **kwargs): + center=False, time_rule=None, pad_val=np.NaN, **kwargs): """ Rolling statistical measure using supplied function. Designed to be used with passed-in Cython array-based functions. @@ -300,18 +302,25 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None, nahead = (window)//2 # fixup the head - tip = np.append(np.zeros(nahead+1),values[:(2*nahead+1)]) + pad = np.empty(nahead+1) + pad[:] = pad_val + tip = np.append(pad,values[:(2*nahead+1)]) rs[:nahead+1] = calc(tip)[-(nahead+1):][:nahead+1] # fixup the tail - tip = np.append(values[-(2*nahead+1):],np.zeros(nahead)) + pad = np.empty(nahead) + pad[:] = pad_val + tip = np.append(values[-(2*nahead+1):], pad) rs[-(nahead):] = calc(tip)[-(nahead):] if minp > 0: - d = minp - nahead-1 - if d > 0: - rs[:d] = NaN - rs[-(d):] = NaN + ld = minp - nahead-1 + rd = ld-1 if window % 2 == 0 else ld + rd = rd if rd>=0 else 0 + if ld > 0: + rs[:ld] = NaN + if rd > 0: + rs[-(rd):] = NaN return rs @@ -527,11 +536,11 @@ def _rolling_func(func, desc, check_minp=_use_window): @wraps(func) def f(arg, window, min_periods=None, freq=None, center=False, - time_rule=None, **kwargs): + time_rule=None, pad_val=np.NaN, **kwargs): min_periods = check_minp(min_periods, window) return _rolling_moment(arg, window, func, min_periods, freq=freq, center=center, - time_rule=time_rule, **kwargs) + time_rule=time_rule,pad_val=pad_val, **kwargs) return f diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index e9609b05e7a8d..51d58fd7f6796 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -823,7 +823,7 @@ def movingaverage(interval, window_size): for win in range(1,nitems,1): ser = Series(range(nitems)) df = DataFrame(index=range(len(ser))) - df['rm'] = mom.rolling_mean(ser, win, center=True, min_periods=1) + df['rm'] = mom.rolling_mean(ser, win, center=True, min_periods=1,pad_val=0) df['ma'] = movingaverage(ser, win) tm.assert_almost_equal(df['rm'] , df['ma']) From 8bc35c2775e472f0acd47cf65c2eb1c39323dc16 Mon Sep 17 00:00:00 2001 From: jreback Date: Mon, 25 Mar 2013 06:18:56 +0200 Subject: [PATCH 06/10] Fixes to roll_skew, roll_kurt --- pandas/algos.pyx | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/pandas/algos.pyx b/pandas/algos.pyx index cac9c5ccc7a6d..829059c56b705 100644 --- a/pandas/algos.pyx +++ b/pandas/algos.pyx @@ -1104,8 +1104,11 @@ def roll_skew(ndarray[double_t] input, int win, int minp): R = sqrt(B) - output[i] = ((sqrt(nobs * (nobs - 1.)) * C) / - ((nobs-2) * R * R * R)) + if R != 0 and nobs > 2: + output[i] = ((sqrt(nobs * (nobs - 1.)) * C) / + ((nobs-2) * R * R * R)) + else: + output[i] = NaN else: output[i] = NaN @@ -1173,10 +1176,12 @@ def roll_kurt(ndarray[double_t] input, R = R * A D = xxxx / nobs - R - 6*B*A*A - 4*C*A - K = (nobs * nobs - 1.)*D/(B*B) - 3*((nobs-1.)**2) - K = K / ((nobs - 2.)*(nobs-3.)) - - output[i] = K + if B != 0 and nobs > 3: + K = (nobs * nobs - 1.)*D/(B*B) - 3*((nobs-1.)**2) + K = K / ((nobs - 2.)*(nobs-3.)) + output[i] = K + else: + output[i] = NaN else: output[i] = NaN From 4b2adb6aa36851d474336e78aae3c75dae2ec0d3 Mon Sep 17 00:00:00 2001 From: jreback Date: Mon, 25 Mar 2013 06:36:49 +0200 Subject: [PATCH 07/10] generalize to multi dimensional case --- pandas/stats/moments.py | 76 +++++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 33 deletions(-) diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 5d8d90c22f206..ef784f57e1eaa 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -279,48 +279,58 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None, calc = lambda x: func(x, window, minp=minp, **kwargs) return_hook, values = _process_data_structure(arg) # actually calculate the moment. Faster way to do this? - if values.ndim > 1: - result = np.apply_along_axis(calc, axis, values) - else: - result = calc(values) + def calc(x): + _calc = lambda x: func(x, window, minp=minp, **kwargs) + if x.ndim > 1: + return np.apply_along_axis(_calc, axis, x) + else: + return _calc(x) + result = calc(values) rs = return_hook(result) + if center: rs = _center_window(rs, window, axis) # GH2953, fixup edges if window > 2: - if values.ndim > 1: - # TODO: handle mi vectorized case - pass + # there's an ambiguity on what constitutes + # the "center" when window is even + # we Just close ranks with numpy , see test case + if window % 2 == 0 : + nahead = (window-1)//2 or 1 + else: + nahead = (window)//2 + + # fixup the head + shape = list(values.shape) + shape[0] = nahead+1 + pad = np.empty(tuple(shape)) + if len(shape)>1: + pad[:,:] = pad_val else: - # there's an ambiguity on what constitutes - # the "center" when window is even - # we Just close ranks with numpy , see test case - if window % 2 == 0 : - nahead = (window-1)//2 or 1 - else: - nahead = (window)//2 - - # fixup the head - pad = np.empty(nahead+1) pad[:] = pad_val - tip = np.append(pad,values[:(2*nahead+1)]) - rs[:nahead+1] = calc(tip)[-(nahead+1):][:nahead+1] - - # fixup the tail - pad = np.empty(nahead) + tip = np.append(pad,values[:(2*nahead+1)]) + rs[:nahead+1] = calc(tip)[-(nahead+1):][:nahead+1] + + # fixup the tail + shape = list(values.shape) + shape[0] = nahead + pad = np.empty(tuple(shape)) + if len(shape)>1: + pad[:,:] = pad_val + else: pad[:] = pad_val - tip = np.append(values[-(2*nahead+1):], pad) - rs[-(nahead):] = calc(tip)[-(nahead):] - - if minp > 0: - ld = minp - nahead-1 - rd = ld-1 if window % 2 == 0 else ld - rd = rd if rd>=0 else 0 - if ld > 0: - rs[:ld] = NaN - if rd > 0: - rs[-(rd):] = NaN + tip = np.append(values[-(2*nahead+1):], pad) + rs[-(nahead):] = calc(tip)[-(nahead):] + + if minp > 0: + ld = minp - nahead-1 + rd = ld-1 if window % 2 == 0 else ld + rd = rd if rd>=0 else 0 + if ld > 0: + rs[:ld] = NaN + if rd > 0: + rs[-(rd):] = NaN return rs From a9614ff08ae438b6fa1546bea61098fe89aa15b0 Mon Sep 17 00:00:00 2001 From: y-p Date: Sat, 30 Mar 2013 17:09:02 +0300 Subject: [PATCH 08/10] TST: bring back test_moments tests for frames --- pandas/stats/tests/test_moments.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 51d58fd7f6796..9d01512bb74cb 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -505,18 +505,20 @@ def f_(i,data): return "edge" series_xp = Series(( [get_v(self.series,func,i,win,minp) for i in range(len(self.series))] )) - # frame_xp = func(self.frame, win, min_periods=minp).shift(-(win//2)) + frame_xp = func(self.frame, win, min_periods=minp).shift(-(win//2)) if has_min_periods: series_rs = func(self.series, win, min_periods=minp, center=True) + frame_rs = func(self.frame, win, min_periods=minp, center=True) else: series_rs = func(self.series, win, center=True) + frame_rs = func(self.frame, win, center=True) if fill_value is not None: series_xp = series_xp.fillna(fill_value) - # frame_xp = frame_xp.fillna(fill_value) + frame_xp = frame_xp.fillna(fill_value) np.array_equal(series_xp, series_rs) - # assert_frame_equal(frame_xp, frame_rs) + assert_frame_equal(frame_xp, frame_rs) def test_legacy_time_rule_arg(self): from StringIO import StringIO From 394bfe367be60c37a171d6c2142c625c30017189 Mon Sep 17 00:00:00 2001 From: y-p Date: Sun, 31 Mar 2013 15:25:55 +0300 Subject: [PATCH 09/10] Refactor, Sort out the 2d case --- pandas/stats/moments.py | 111 ++++++++++++++++++++--------- pandas/stats/tests/test_moments.py | 2 +- 2 files changed, 77 insertions(+), 36 deletions(-) diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index ef784f57e1eaa..55582919309f5 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -277,64 +277,105 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None, """ arg = _conv_timerule(arg, freq, time_rule) calc = lambda x: func(x, window, minp=minp, **kwargs) + # strips the pandas object into a callback and an ndarray + # the callback takes back an ndarray and reconstitutes the + # pandas object with the new data return_hook, values = _process_data_structure(arg) # actually calculate the moment. Faster way to do this? - def calc(x): + def calc(x,axis): _calc = lambda x: func(x, window, minp=minp, **kwargs) if x.ndim > 1: return np.apply_along_axis(_calc, axis, x) else: return _calc(x) - result = calc(values) - rs = return_hook(result) + def fwd(x): + """ + reshapes a 1d/2d ndarray into a 2d array so the processing can + be done on a fixed case. + """ + v = x.view() + if v.ndim > 1: + v = v if axis == 1 else v.T + else: + v = v.reshape((1,len(v))) + return v + def bkwd(x): + if arg.ndim > 1: + x = x if axis == 1 else x.T + else: + x = x[0] + return x + + def pad(values,nahead,pos="head",window=window,axis=axis,pad_val=pad_val): + v = values + + tip = np.empty((v.shape[0],2*nahead+1+nahead+1),dtype=v.dtype) + if pos == "head": + tip[:,:nahead+1] = pad_val + tip[:,nahead+1:] = v[:,:(2*nahead+1)] + elif pos == "tail": + tip[:,-(nahead+1):] = pad_val + tip[:,:-(nahead+1)] = v[:,-(2*nahead+1):] + else: + raise NotImplementedError() + + return tip + +# window = 5 /4 +# print pad(mkdf(10,5).values,window=5) +# print pad(mkdf(10,5).values,window=5,axis=1) +# print pad(mkdf(10,5).irow(0).values,window=5) + result = calc(values,axis) if center: - rs = _center_window(rs, window, axis) # GH2953, fixup edges if window > 2: - # there's an ambiguity on what constitutes + result = _center_window(result, window, axis) + result = fwd(result) + values = fwd(values) + # with the data always in a consistent alignment + # we can always apply the func along the same axis=1 + # and eliminate special cases + + # there's an ambiguity in what constitutes # the "center" when window is even - # we Just close ranks with numpy , see test case + # we Just close ranks with numpy, so a window of len 4 [0..3] + # 2 is the "center" slot + if window % 2 == 0 : nahead = (window-1)//2 or 1 else: nahead = (window)//2 + tip = pad(values,nahead,'head') + head = calc(tip,axis=1)[:,-(nahead+1):][:,:nahead+1] - # fixup the head - shape = list(values.shape) - shape[0] = nahead+1 - pad = np.empty(tuple(shape)) - if len(shape)>1: - pad[:,:] = pad_val - else: - pad[:] = pad_val - tip = np.append(pad,values[:(2*nahead+1)]) - rs[:nahead+1] = calc(tip)[-(nahead+1):][:nahead+1] - - # fixup the tail - shape = list(values.shape) - shape[0] = nahead - pad = np.empty(tuple(shape)) - if len(shape)>1: - pad[:,:] = pad_val - else: - pad[:] = pad_val - tip = np.append(values[-(2*nahead+1):], pad) - rs[-(nahead):] = calc(tip)[-(nahead):] + tip = pad(values,nahead,'tail') + tail = calc(tip,axis=1)[:,-(nahead+1):][:,:nahead] + + result[:,-(nahead):] = tail + result[:,:nahead+1] = head if minp > 0: ld = minp - nahead-1 rd = ld-1 if window % 2 == 0 else ld - rd = rd if rd>=0 else 0 if ld > 0: - rs[:ld] = NaN - if rd > 0: - rs[-(rd):] = NaN - - return rs - - + result[:,:ld] = NaN + if rd >0: + result[:,-(rd):] = NaN + + result =bkwd(result) + + # rebuild the correct pandas object using the new data + return return_hook(result) +# TODO: test window=2 +# from pandas.stats import moments as mom +# print list(mom.rolling_mean(Series(np.ones(10)),3,min_periods=1,pad_val=0,center=True).values) +# print list(mom.rolling_mean(Series(np.ones(10)),4,min_periods=1,pad_val=0,center=True).values) +# print list(mom.rolling_mean(Series(np.ones(10)),5,min_periods=1,pad_val=0,center=True).values) +# mom.rolling_mean(DataFrame(np.ones((3,10))),3,axis=0,min_periods=1,pad_val=0,center=True) +# mom.rolling_mean(DataFrame(np.ones((3,10))),3,axis=1,min_periods=1,pad_val=0,center=True) +# mom.rolling_mean(Series(range(25)),3,axis=0,min_periods=1,pad_val=0,center=True) def _center_window(rs, window, axis): if axis > rs.ndim-1: raise ValueError("Requested axis is larger then no. of argument dimensions") diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 9d01512bb74cb..1e7ee16251b96 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -487,7 +487,7 @@ def f_(i,data): if all(in_range_mask): # middle return f_(i,s.take(_is)) - elif sum(in_range_mask) < minp: + elif minp and sum(in_range_mask) < minp: return np.NaN return "minp_nan" else: From c2867582935e6a46e1115c06731b3fdeb289c9e0 Mon Sep 17 00:00:00 2001 From: y-p Date: Mon, 1 Apr 2013 00:52:19 +0300 Subject: [PATCH 10/10] Rework tests --- pandas/stats/tests/test_moments.py | 77 +++++++++++++++++++----------- 1 file changed, 48 insertions(+), 29 deletions(-) diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 1e7ee16251b96..fde1427644a56 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -457,12 +457,8 @@ def _check_structures(self, func, static_comp, assert_almost_equal(frame_result.xs(last_date), trunc_frame.apply(static_comp)) - if has_center: - # the code in _rolling_function tries avoids np.append - # because of the perf hit of copying, but we can do - # that for testing and simplfy things - minp = 10 - win =25 + + def test_centered_rolling_moment(self): def participating(i,win): return [x for x in range(i-win//2,i+(win+1)//2)] @@ -473,13 +469,13 @@ def participating(i,win): self.assertEqual(participating(0,4),[-2,-1,0,1]) self.assertEqual(participating(1,4),[-1,0,1,2]) - def get_v(s,f,i,win=win,minp=minp): + def get_v(s,f,i,win,minp,pad_val=np.nan): _is = np.array(participating(i,win)) in_range_mask = np.array([ x>=0 and x< len(s) for x in _is ]) def f_(i,data): # print( data) vals = np.array( list(data) ) - if has_min_periods: + if minp: return f(vals,win,min_periods=minp)[-1] else: return f(vals,win)[-1] @@ -495,30 +491,53 @@ def f_(i,data): rpad = np.sum([_is>= len(s)]) _in_is = np.ma.array(_is, mask=~in_range_mask).compressed() - vals = np.array([0] * lpad + list(s.take(_in_is)) + [0]* rpad) - # print( i,lpad,rpad) - # print "is",_is - # print "in_is",_in_is - # # print "vs", vs - # print "vals", vals + vals = np.array([pad_val] * lpad + list(s.take(_in_is)) + [pad_val]* rpad) return f_(i,vals) return "edge" - series_xp = Series(( [get_v(self.series,func,i,win,minp) for i in range(len(self.series))] )) - frame_xp = func(self.frame, win, min_periods=minp).shift(-(win//2)) - - if has_min_periods: - series_rs = func(self.series, win, min_periods=minp, center=True) - frame_rs = func(self.frame, win, min_periods=minp, center=True) - else: - series_rs = func(self.series, win, center=True) - frame_rs = func(self.frame, win, center=True) - - if fill_value is not None: - series_xp = series_xp.fillna(fill_value) - frame_xp = frame_xp.fillna(fill_value) - np.array_equal(series_xp, series_rs) - assert_frame_equal(frame_xp, frame_rs) + def build_series(data,func,win,minp,pad_val=np.nan): + return Series(( [get_v(data,func,i,win,minp,pad_val=pad_val) for i in range(len(data))] )) + + N,K = 20,5 + for win in range(3,N-1): + for minp in range(1,win+1): + func = mom.rolling_mean + + # self.series = Series(np.ones(N)) + self.series = Series(randn(N)) + series_xp =build_series(self.series,func,win,minp,pad_val=0) + + # frame_xp = DataFrame(np.ones((N,K)), copy=True) + self.frame= DataFrame(randn(N,K), copy=True) + f = lambda i: func(self.frame.icol(i), win, min_periods=minp, pad_val=0, center=True) + data =[f(i) for i in range(len(self.frame.columns))] + frame_xp = DataFrame(np.array(data).T) + series_rs = func(self.series, win, min_periods=minp, pad_val=0, center=True) + frame_rs = func(self.frame, win, min_periods=minp, pad_val=0,center=True) + + assert_series_equal(series_xp, series_rs) + assert_frame_equal(frame_xp, frame_rs) + + N,K = 20,5 + minp=None + for win in range(3,N-1): + func = mom.rolling_count + # self.series = Series(np.ones(N)) + self.series = Series(randn(N)) + series_xp =build_series(self.series,func,win,minp) + + # frame_xp = DataFrame(np.ones((N,K)), copy=True) + self.frame= DataFrame(randn(N,K), copy=True) + + f = lambda i: func(self.frame.icol(i), win, center=True) + data =[f(i) for i in range(len(self.frame.columns))] + frame_xp = DataFrame(np.array(data).T) + + series_rs = func(self.series, win,center=True) + frame_rs = func(self.frame, win,center=True) + + assert_series_equal(series_xp, series_rs) + assert_frame_equal(frame_xp, frame_rs) def test_legacy_time_rule_arg(self): from StringIO import StringIO