From 73f88234c4ea272f21160d1187db8e7c955b85b8 Mon Sep 17 00:00:00 2001 From: Bran Yang Date: Thu, 18 Feb 2016 12:07:03 +0800 Subject: [PATCH 1/5] Fix #12373: rolling functions raise ValueError on float32 data --- doc/source/whatsnew/v0.18.0.txt | 2 + pandas/core/window.py | 44 +++++- pandas/tests/test_window.py | 228 ++++++++++++++++++++++++++++++++ 3 files changed, 267 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 7f253ae437d9f..320520582f1e2 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -1200,3 +1200,5 @@ Bug Fixes - Bug when initializing categorical series with a scalar value. (:issue:`12336`) - Bug when specifying a UTC ``DatetimeIndex`` by setting ``utc=True`` in ``.to_datetime`` (:issue:`11934`) - Bug when increasing the buffer size of CSV reader in ``read_csv`` (:issue:`12494`) + +- Bug in ``.rolling`` in which apply on float32 data will raise a ``ValueError`` (:issue:`12373`) diff --git a/pandas/core/window.py b/pandas/core/window.py index 9c8490f608996..cb586b6ffeef5 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -149,16 +149,17 @@ def _prep_values(self, values=None, kill_inf=True, how=None): if values is None: values = getattr(self._selected_obj, 'values', self._selected_obj) - # coerce dtypes as appropriate + # GH #12373 : rolling functions error on float32 data + # make sure the data is coerced to float64 if com.is_float_dtype(values.dtype): - pass + values = com._ensure_float64(values) elif com.is_integer_dtype(values.dtype): - values = values.astype(float) + values = com._ensure_float64(values) elif com.is_timedelta64_dtype(values.dtype): - values = values.view('i8').astype(float) + values = com._ensure_float64(values.view('i8')) else: try: - values = values.astype(float) + values = com._ensure_float64(values) except (ValueError, TypeError): raise TypeError("cannot handle this type -> {0}" "".format(values.dtype)) @@ -457,7 +458,9 @@ def _apply(self, func, window=None, center=None, check_minp=None, how=None, def func(arg, window, min_periods=None): minp = check_minp(min_periods, window) - return cfunc(arg, window, minp, **kwargs) + # GH #12373: rolling functions error on float32 data + return cfunc(com._ensure_float64(arg), + window, minp, **kwargs) # calculation function if center: @@ -494,8 +497,31 @@ def count(self): obj = self._convert_freq() window = self._get_window() window = min(window, len(obj)) if not self.center else window + + # GH #12373: rolling functions raise ValueError on float32 data + # enables count for timedelta/datatime64 dtypes + def maybe_i8conversion1d(obj): + if com.needs_i8_conversion(obj): + obj = obj.view('i8').astype('float64') + return obj + + def maybe_i8conversion(obj): + if isinstance(obj, (np.ndarray, pd.Series)): + return maybe_i8conversion1d(obj) + elif isinstance(obj, pd.DataFrame): + print('here') + values = obj.values.copy() + values = maybe_i8conversion1d(values) + + result = pd.DataFrame(values, + index=obj.index, + columns=obj.columns) + return result + else: + return obj + try: - converted = np.isfinite(obj).astype(float) + converted = np.isfinite(maybe_i8conversion(obj)).astype(float) except TypeError: converted = np.isfinite(obj.astype(float)).astype(float) result = self._constructor(converted, window=window, min_periods=0, @@ -657,6 +683,10 @@ def cov(self, other=None, pairwise=None, ddof=1, **kwargs): window = self._get_window(other) def _get_cov(X, Y): + # GH #12373 : rolling functions error on float32 data + # to avoid potential overflow, cast the data to float64 + X = X.astype('float64') + Y = Y.astype('float64') mean = lambda x: x.rolling(window, self.min_periods, center=self.center).mean(**kwargs) count = (X + Y).rolling(window=window, diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index cc4a6ba61306d..6f8db83cae055 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -16,6 +16,7 @@ assert_frame_equal, assert_panel_equal, assert_index_equal) import pandas.core.datetools as datetools +import pandas.core.common as com import pandas.stats.moments as mom import pandas.core.window as rwindow from pandas.core.base import SpecificationError @@ -289,6 +290,233 @@ def test_deprecations(self): mom.rolling_mean(Series(np.ones(10)), 3, center=True, axis=0) +# GH #12373 : rolling functions error on float32 data +# make sure rolling functions works for different dtypes +class TestDtype(Base): + dtype = None + window = 2 + # the nan value, timedelta uses tslib.iNaT + naval = np.nan + # Do we construct DataFrame for testing. + include_df = True + + # Function Name : (function, result_dtype, expectation_dtype) + funcs = { + 'count': (lambda v: v.count(), 'float64', 'float64'), + 'max': (lambda v: v.max(), 'float64', 'float64'), + 'min': (lambda v: v.min(), 'float64', 'float64'), + 'sum': (lambda v: v.sum(), 'float64', 'float64'), + 'mean': (lambda v: v.mean(), 'float64', 'float64'), + 'std': (lambda v: v.std(), 'float64', 'float64'), + 'var': (lambda v: v.var(), 'float64', 'float64'), + 'median': (lambda v: v.median(), 'float64', 'float64') + } + + def get_expects(self): + expects = { + 'sr1': { + 'count': Series([1, 2, 2, 2, 2]), + 'max': Series([self.naval, 1, 2, 3, 4]), + 'min': Series([self.naval, 0, 1, 2, 3]), + 'sum': Series([self.naval, 1, 3, 5, 7]), + 'mean': Series([self.naval, .5, 1.5, 2.5, 3.5]), + 'std': Series([self.naval] + [np.sqrt(.5)] * 4), + 'var': Series([self.naval, .5, .5, .5, .5]), + 'median': Series([self.naval, .5, 1.5, 2.5, 3.5]) + }, + 'sr2': { + 'count': Series([1, 2, 2, 2, 2]), + 'max': Series([self.naval, 10, 8, 6, 4]), + 'min': Series([self.naval, 8, 6, 4, 2]), + 'sum': Series([self.naval, 18, 14, 10, 6]), + 'mean': Series([self.naval, 9, 7, 5, 3]), + 'std': Series([self.naval] + [np.sqrt(2)] * 4), + 'var': Series([self.naval, 2, 2, 2, 2]), + 'median': Series([self.naval, 9, 7, 5, 3]) + }, + 'df': { + 'count': DataFrame({0: Series([1, 2, 2, 2, 2]), + 1: Series([1, 2, 2, 2, 2])}), + 'max': DataFrame({0: Series([self.naval, 2, 4, 6, 8]), + 1: Series([self.naval, 3, 5, 7, 9])}), + 'min': DataFrame({0: Series([self.naval, 0, 2, 4, 6]), + 1: Series([self.naval, 1, 3, 5, 7])}), + 'sum': DataFrame({0: Series([self.naval, 2, 6, 10, 14]), + 1: Series([self.naval, 4, 8, 12, 16])}), + 'mean': DataFrame({0: Series([self.naval, 1, 3, 5, 7]), + 1: Series([self.naval, 2, 4, 6, 8])}), + 'std': DataFrame({0: Series([self.naval] + [np.sqrt(2)] * 4), + 1: Series([self.naval] + [np.sqrt(2)] * 4)}), + 'var': DataFrame({0: Series([self.naval, 2, 2, 2, 2]), + 1: Series([self.naval, 2, 2, 2, 2])}), + 'median': DataFrame({0: Series([self.naval, 1, 3, 5, 7]), + 1: Series([self.naval, 2, 4, 6, 8])}), + } + } + return expects + + def _create_dtype_data(self, dtype): + sr1 = Series(range(5), dtype=dtype) + sr2 = Series(range(10, 0, -2), dtype=dtype) + + data = { + 'sr1': sr1, + 'sr2': sr2 + } + if self.include_df: + df = DataFrame(np.arange(10).reshape((5, 2)), dtype=dtype) + data['df'] = df + + return data + + def _create_data(self): + super(TestDtype, self)._create_data() + self.data = self._create_dtype_data(self.dtype) + self.expects = self.get_expects() + + def setUp(self): + self._create_data() + + def _cast_result(self, result, from_dtype, to_dtype): + if com.needs_i8_conversion(from_dtype): + if isinstance(result, Series): + result = result.view('i8') + elif isinstance(result, DataFrame): + final = [] + for idx in result: + final.append(Series(result[idx].view('i8'))) + result = pd.concat(final, axis=1).reindex( + columns=result.columns) + return result.astype(to_dtype) + + def test_dtypes(self): + for f_name, d_name in product(self.funcs.keys(), self.data.keys()): + # Specify if the results and expectations + # need to be coerced to a given dtype + # once we changed the return value for roll_, + # we should change coerce behavior here accordingly + f, res_dtype, exp_dtype = self.funcs[f_name] + d = self.data[d_name] + assert_equal = assert_series_equal if isinstance( + d, Series) else assert_frame_equal + exp = self.expects[d_name][f_name] + if exp_dtype: + exp = exp.astype(com.pandas_dtype(exp_dtype)) + + roll = d.rolling(window=self.window) + result = f(roll) + if res_dtype: + result = self._cast_result(result, + self.dtype, + com.pandas_dtype(res_dtype)) + assert_equal(result, exp) + + +class TestDtype_object(TestDtype): + dtype = object + + +class TestDtype_int8(TestDtype): + dtype = np.int8 + + +class TestDtype_int16(TestDtype): + dtype = np.int16 + + +class TestDtype_int32(TestDtype): + dtype = np.int32 + + +class TestDtype_int64(TestDtype): + dtype = np.int64 + + +class TestDtype_uint8(TestDtype): + dtype = np.uint8 + + +class TestDtype_uint16(TestDtype): + dtype = np.uint16 + + +class TestDtype_uint32(TestDtype): + dtype = np.uint32 + + +class TestDtype_uint64(TestDtype): + dtype = np.uint64 + + +class TestDtype_float16(TestDtype): + dtype = np.float16 + + +class TestDtype_float32(TestDtype): + dtype = np.float32 + + +class TestDtype_float64(TestDtype): + dtype = np.float64 + + +class TestDtype_timedelta(TestDtype): + dtype = np.dtype('m8[ns]') + import pandas.tslib as tslib + naval = tslib.iNaT + + funcs = { + 'count': (lambda v: v.count(), None, 'float64'), + 'max': (lambda v: v.max(), None, 'm8[ns]'), + 'min': (lambda v: v.min(), None, 'm8[ns]'), + 'sum': (lambda v: v.sum(), None, 'm8[ns]'), + 'mean': (lambda v: v.mean(), None, 'm8[ns]'), + 'std': (lambda v: v.std(), None, 'm8[ns]'), + 'var': (lambda v: v.var(), None, 'm8[ns]'), + 'median': (lambda v: v.median(), None, 'm8[ns]') + } + + +class TestDtype_datetime64(TestDtype): + dtype = np.dtype('M8[ns]') + + # Rolling functions apply to datetime64 returns float64 + # So we do not need to coerce the results, just coerce expects + funcs = { + 'count': (lambda v: v.count(), None, 'float64'), + 'max': (lambda v: v.max(), None, 'float64'), + 'min': (lambda v: v.min(), None, 'float64'), + 'sum': (lambda v: v.sum(), None, 'float64'), + 'mean': (lambda v: v.mean(), None, 'float64'), + 'std': (lambda v: v.std(), None, 'float64'), + 'var': (lambda v: v.var(), None, 'float64'), + 'median': (lambda v: v.median(), None, 'float64') + } + + +class TestDtype_datetime64UTC(TestDtype): + dtype = 'datetime64[ns, UTC]' + # Set to True once dtype='datetime64[ns, UTC]' is available + # when constructing a DataFrame + include_df = False + + funcs = { + 'count': (lambda v: v.count(), None, 'float64'), + 'max': (lambda v: v.max(), None, 'float64'), + 'min': (lambda v: v.min(), None, 'float64'), + 'sum': (lambda v: v.sum(), None, 'float64'), + 'mean': (lambda v: v.mean(), None, 'float64'), + 'std': (lambda v: v.std(), None, 'float64'), + 'var': (lambda v: v.var(), None, 'float64'), + 'median': (lambda v: v.median(), None, 'float64') + } + + +class TestDtype_category(TestDtype): + dtype = 'category' + include_df = False + + class TestMoments(Base): def setUp(self): From 9a17990d56166872760aaeeb63caf2d67b942c55 Mon Sep 17 00:00:00 2001 From: Bran Yang Date: Thu, 3 Mar 2016 00:08:25 +0800 Subject: [PATCH 2/5] reverse changes for `count`, refactor datatimelike tests --- pandas/core/window.py | 24 +-------------- pandas/tests/test_window.py | 59 ++++++++----------------------------- 2 files changed, 14 insertions(+), 69 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index cb586b6ffeef5..fc71c59afcdac 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -498,30 +498,8 @@ def count(self): window = self._get_window() window = min(window, len(obj)) if not self.center else window - # GH #12373: rolling functions raise ValueError on float32 data - # enables count for timedelta/datatime64 dtypes - def maybe_i8conversion1d(obj): - if com.needs_i8_conversion(obj): - obj = obj.view('i8').astype('float64') - return obj - - def maybe_i8conversion(obj): - if isinstance(obj, (np.ndarray, pd.Series)): - return maybe_i8conversion1d(obj) - elif isinstance(obj, pd.DataFrame): - print('here') - values = obj.values.copy() - values = maybe_i8conversion1d(values) - - result = pd.DataFrame(values, - index=obj.index, - columns=obj.columns) - return result - else: - return obj - try: - converted = np.isfinite(maybe_i8conversion(obj)).astype(float) + converted = np.isfinite(obj).astype(float) except TypeError: converted = np.isfinite(obj.astype(float)).astype(float) result = self._constructor(converted, window=window, min_periods=0, diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 6f8db83cae055..754919e8b7e06 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -460,60 +460,27 @@ class TestDtype_float64(TestDtype): dtype = np.float64 -class TestDtype_timedelta(TestDtype): - dtype = np.dtype('m8[ns]') - import pandas.tslib as tslib - naval = tslib.iNaT - - funcs = { - 'count': (lambda v: v.count(), None, 'float64'), - 'max': (lambda v: v.max(), None, 'm8[ns]'), - 'min': (lambda v: v.min(), None, 'm8[ns]'), - 'sum': (lambda v: v.sum(), None, 'm8[ns]'), - 'mean': (lambda v: v.mean(), None, 'm8[ns]'), - 'std': (lambda v: v.std(), None, 'm8[ns]'), - 'var': (lambda v: v.var(), None, 'm8[ns]'), - 'median': (lambda v: v.median(), None, 'm8[ns]') - } +class TestDtype_category(TestDtype): + dtype = 'category' + include_df = False -class TestDtype_datetime64(TestDtype): +class TestDatetimeLikeDtype(TestDtype): dtype = np.dtype('M8[ns]') - # Rolling functions apply to datetime64 returns float64 - # So we do not need to coerce the results, just coerce expects - funcs = { - 'count': (lambda v: v.count(), None, 'float64'), - 'max': (lambda v: v.max(), None, 'float64'), - 'min': (lambda v: v.min(), None, 'float64'), - 'sum': (lambda v: v.sum(), None, 'float64'), - 'mean': (lambda v: v.mean(), None, 'float64'), - 'std': (lambda v: v.std(), None, 'float64'), - 'var': (lambda v: v.var(), None, 'float64'), - 'median': (lambda v: v.median(), None, 'float64') - } - + def test_dtypes(self): + with tm.assertRaises(TypeError): + super(TestDatetimeLikeDtype, self).test_dtypes() -class TestDtype_datetime64UTC(TestDtype): - dtype = 'datetime64[ns, UTC]' - # Set to True once dtype='datetime64[ns, UTC]' is available - # when constructing a DataFrame - include_df = False - funcs = { - 'count': (lambda v: v.count(), None, 'float64'), - 'max': (lambda v: v.max(), None, 'float64'), - 'min': (lambda v: v.min(), None, 'float64'), - 'sum': (lambda v: v.sum(), None, 'float64'), - 'mean': (lambda v: v.mean(), None, 'float64'), - 'std': (lambda v: v.std(), None, 'float64'), - 'var': (lambda v: v.var(), None, 'float64'), - 'median': (lambda v: v.median(), None, 'float64') - } +class TestDtype_timedelta(TestDatetimeLikeDtype): + dtype = np.dtype('m8[ns]') -class TestDtype_category(TestDtype): - dtype = 'category' +class TestDtype_datetime64UTC(TestDatetimeLikeDtype): + dtype = 'datetime64[ns, UTC]' + # Turn this to false once DataFrame constructor accept + # 'datetime64[ns, UTC]' as dtype include_df = False From d20adfa87a63b84a43d01b1aa0bd30c12186a2d6 Mon Sep 17 00:00:00 2001 From: Bran Yang Date: Thu, 3 Mar 2016 22:19:54 +0800 Subject: [PATCH 3/5] skip TestDatetimeLikeDtype tests. --- pandas/tests/test_window.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 754919e8b7e06..875705ba2bbbf 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -297,8 +297,6 @@ class TestDtype(Base): window = 2 # the nan value, timedelta uses tslib.iNaT naval = np.nan - # Do we construct DataFrame for testing. - include_df = True # Function Name : (function, result_dtype, expectation_dtype) funcs = { @@ -358,14 +356,13 @@ def get_expects(self): def _create_dtype_data(self, dtype): sr1 = Series(range(5), dtype=dtype) sr2 = Series(range(10, 0, -2), dtype=dtype) + df = DataFrame(np.arange(10).reshape((5, 2)), dtype=dtype) data = { 'sr1': sr1, - 'sr2': sr2 + 'sr2': sr2, + 'df': df } - if self.include_df: - df = DataFrame(np.arange(10).reshape((5, 2)), dtype=dtype) - data['df'] = df return data @@ -464,10 +461,25 @@ class TestDtype_category(TestDtype): dtype = 'category' include_df = False + def _create_dtype_data(self, dtype): + sr1 = Series(range(5), dtype=dtype) + sr2 = Series(range(10, 0, -2), dtype=dtype) + + data = { + 'sr1': sr1, + 'sr2': sr2 + } + + return data + class TestDatetimeLikeDtype(TestDtype): dtype = np.dtype('M8[ns]') + # GH #12373: rolling functions raise ValueError on float32 data + def setUp(self): + raise nose.SkipTest("Skip rolling on DatetimeLike dtypes.") + def test_dtypes(self): with tm.assertRaises(TypeError): super(TestDatetimeLikeDtype, self).test_dtypes() @@ -479,9 +491,6 @@ class TestDtype_timedelta(TestDatetimeLikeDtype): class TestDtype_datetime64UTC(TestDatetimeLikeDtype): dtype = 'datetime64[ns, UTC]' - # Turn this to false once DataFrame constructor accept - # 'datetime64[ns, UTC]' as dtype - include_df = False class TestMoments(Base): From d26a6801b71cb9594332ae0bfd8a2205f33dc270 Mon Sep 17 00:00:00 2001 From: Bran Yang Date: Sun, 6 Mar 2016 00:24:40 +0800 Subject: [PATCH 4/5] remove type cast in tests --- pandas/tests/test_window.py | 106 ++++++++++++++---------------------- 1 file changed, 41 insertions(+), 65 deletions(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 875705ba2bbbf..b34e9101131d6 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -295,60 +295,57 @@ def test_deprecations(self): class TestDtype(Base): dtype = None window = 2 - # the nan value, timedelta uses tslib.iNaT - naval = np.nan - # Function Name : (function, result_dtype, expectation_dtype) funcs = { - 'count': (lambda v: v.count(), 'float64', 'float64'), - 'max': (lambda v: v.max(), 'float64', 'float64'), - 'min': (lambda v: v.min(), 'float64', 'float64'), - 'sum': (lambda v: v.sum(), 'float64', 'float64'), - 'mean': (lambda v: v.mean(), 'float64', 'float64'), - 'std': (lambda v: v.std(), 'float64', 'float64'), - 'var': (lambda v: v.var(), 'float64', 'float64'), - 'median': (lambda v: v.median(), 'float64', 'float64') + 'count': lambda v: v.count(), + 'max': lambda v: v.max(), + 'min': lambda v: v.min(), + 'sum': lambda v: v.sum(), + 'mean': lambda v: v.mean(), + 'std': lambda v: v.std(), + 'var': lambda v: v.var(), + 'median': lambda v: v.median() } def get_expects(self): expects = { 'sr1': { - 'count': Series([1, 2, 2, 2, 2]), - 'max': Series([self.naval, 1, 2, 3, 4]), - 'min': Series([self.naval, 0, 1, 2, 3]), - 'sum': Series([self.naval, 1, 3, 5, 7]), - 'mean': Series([self.naval, .5, 1.5, 2.5, 3.5]), - 'std': Series([self.naval] + [np.sqrt(.5)] * 4), - 'var': Series([self.naval, .5, .5, .5, .5]), - 'median': Series([self.naval, .5, 1.5, 2.5, 3.5]) + 'count': Series([1, 2, 2, 2, 2], dtype='float64'), + 'max': Series([np.nan, 1, 2, 3, 4], dtype='float64'), + 'min': Series([np.nan, 0, 1, 2, 3], dtype='float64'), + 'sum': Series([np.nan, 1, 3, 5, 7], dtype='float64'), + 'mean': Series([np.nan, .5, 1.5, 2.5, 3.5], dtype='float64'), + 'std': Series([np.nan] + [np.sqrt(.5)] * 4, dtype='float64'), + 'var': Series([np.nan, .5, .5, .5, .5], dtype='float64'), + 'median': Series([np.nan, .5, 1.5, 2.5, 3.5], dtype='float64') }, 'sr2': { - 'count': Series([1, 2, 2, 2, 2]), - 'max': Series([self.naval, 10, 8, 6, 4]), - 'min': Series([self.naval, 8, 6, 4, 2]), - 'sum': Series([self.naval, 18, 14, 10, 6]), - 'mean': Series([self.naval, 9, 7, 5, 3]), - 'std': Series([self.naval] + [np.sqrt(2)] * 4), - 'var': Series([self.naval, 2, 2, 2, 2]), - 'median': Series([self.naval, 9, 7, 5, 3]) + 'count': Series([1, 2, 2, 2, 2], dtype='float64'), + 'max': Series([np.nan, 10, 8, 6, 4], dtype='float64'), + 'min': Series([np.nan, 8, 6, 4, 2], dtype='float64'), + 'sum': Series([np.nan, 18, 14, 10, 6], dtype='float64'), + 'mean': Series([np.nan, 9, 7, 5, 3], dtype='float64'), + 'std': Series([np.nan] + [np.sqrt(2)] * 4, dtype='float64'), + 'var': Series([np.nan, 2, 2, 2, 2], dtype='float64'), + 'median': Series([np.nan, 9, 7, 5, 3], dtype='float64') }, 'df': { 'count': DataFrame({0: Series([1, 2, 2, 2, 2]), - 1: Series([1, 2, 2, 2, 2])}), - 'max': DataFrame({0: Series([self.naval, 2, 4, 6, 8]), - 1: Series([self.naval, 3, 5, 7, 9])}), - 'min': DataFrame({0: Series([self.naval, 0, 2, 4, 6]), - 1: Series([self.naval, 1, 3, 5, 7])}), - 'sum': DataFrame({0: Series([self.naval, 2, 6, 10, 14]), - 1: Series([self.naval, 4, 8, 12, 16])}), - 'mean': DataFrame({0: Series([self.naval, 1, 3, 5, 7]), - 1: Series([self.naval, 2, 4, 6, 8])}), - 'std': DataFrame({0: Series([self.naval] + [np.sqrt(2)] * 4), - 1: Series([self.naval] + [np.sqrt(2)] * 4)}), - 'var': DataFrame({0: Series([self.naval, 2, 2, 2, 2]), - 1: Series([self.naval, 2, 2, 2, 2])}), - 'median': DataFrame({0: Series([self.naval, 1, 3, 5, 7]), - 1: Series([self.naval, 2, 4, 6, 8])}), + 1: Series([1, 2, 2, 2, 2])}, dtype='float64'), + 'max': DataFrame({0: Series([np.nan, 2, 4, 6, 8]), + 1: Series([np.nan, 3, 5, 7, 9])}, dtype='float64'), + 'min': DataFrame({0: Series([np.nan, 0, 2, 4, 6]), + 1: Series([np.nan, 1, 3, 5, 7])}, dtype='float64'), + 'sum': DataFrame({0: Series([np.nan, 2, 6, 10, 14]), + 1: Series([np.nan, 4, 8, 12, 16])}, dtype='float64'), + 'mean': DataFrame({0: Series([np.nan, 1, 3, 5, 7]), + 1: Series([np.nan, 2, 4, 6, 8])}, dtype='float64'), + 'std': DataFrame({0: Series([np.nan] + [np.sqrt(2)] * 4), + 1: Series([np.nan] + [np.sqrt(2)] * 4)}, dtype='float64'), + 'var': DataFrame({0: Series([np.nan, 2, 2, 2, 2]), + 1: Series([np.nan, 2, 2, 2, 2])}, dtype='float64'), + 'median': DataFrame({0: Series([np.nan, 1, 3, 5, 7]), + 1: Series([np.nan, 2, 4, 6, 8])}, dtype='float64'), } } return expects @@ -374,38 +371,17 @@ def _create_data(self): def setUp(self): self._create_data() - def _cast_result(self, result, from_dtype, to_dtype): - if com.needs_i8_conversion(from_dtype): - if isinstance(result, Series): - result = result.view('i8') - elif isinstance(result, DataFrame): - final = [] - for idx in result: - final.append(Series(result[idx].view('i8'))) - result = pd.concat(final, axis=1).reindex( - columns=result.columns) - return result.astype(to_dtype) - def test_dtypes(self): for f_name, d_name in product(self.funcs.keys(), self.data.keys()): - # Specify if the results and expectations - # need to be coerced to a given dtype - # once we changed the return value for roll_, - # we should change coerce behavior here accordingly - f, res_dtype, exp_dtype = self.funcs[f_name] + f = self.funcs[f_name] d = self.data[d_name] assert_equal = assert_series_equal if isinstance( d, Series) else assert_frame_equal exp = self.expects[d_name][f_name] - if exp_dtype: - exp = exp.astype(com.pandas_dtype(exp_dtype)) roll = d.rolling(window=self.window) result = f(roll) - if res_dtype: - result = self._cast_result(result, - self.dtype, - com.pandas_dtype(res_dtype)) + assert_equal(result, exp) From 9241d76c2429c935e3c6aad2075f4d3b8fdf2d99 Mon Sep 17 00:00:00 2001 From: Bran Yang Date: Sun, 6 Mar 2016 01:07:20 +0800 Subject: [PATCH 5/5] style change --- pandas/tests/test_window.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index b34e9101131d6..223879ab1cdba 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -16,7 +16,6 @@ assert_frame_equal, assert_panel_equal, assert_index_equal) import pandas.core.datetools as datetools -import pandas.core.common as com import pandas.stats.moments as mom import pandas.core.window as rwindow from pandas.core.base import SpecificationError @@ -331,21 +330,29 @@ def get_expects(self): }, 'df': { 'count': DataFrame({0: Series([1, 2, 2, 2, 2]), - 1: Series([1, 2, 2, 2, 2])}, dtype='float64'), + 1: Series([1, 2, 2, 2, 2])}, + dtype='float64'), 'max': DataFrame({0: Series([np.nan, 2, 4, 6, 8]), - 1: Series([np.nan, 3, 5, 7, 9])}, dtype='float64'), + 1: Series([np.nan, 3, 5, 7, 9])}, + dtype='float64'), 'min': DataFrame({0: Series([np.nan, 0, 2, 4, 6]), - 1: Series([np.nan, 1, 3, 5, 7])}, dtype='float64'), + 1: Series([np.nan, 1, 3, 5, 7])}, + dtype='float64'), 'sum': DataFrame({0: Series([np.nan, 2, 6, 10, 14]), - 1: Series([np.nan, 4, 8, 12, 16])}, dtype='float64'), + 1: Series([np.nan, 4, 8, 12, 16])}, + dtype='float64'), 'mean': DataFrame({0: Series([np.nan, 1, 3, 5, 7]), - 1: Series([np.nan, 2, 4, 6, 8])}, dtype='float64'), + 1: Series([np.nan, 2, 4, 6, 8])}, + dtype='float64'), 'std': DataFrame({0: Series([np.nan] + [np.sqrt(2)] * 4), - 1: Series([np.nan] + [np.sqrt(2)] * 4)}, dtype='float64'), + 1: Series([np.nan] + [np.sqrt(2)] * 4)}, + dtype='float64'), 'var': DataFrame({0: Series([np.nan, 2, 2, 2, 2]), - 1: Series([np.nan, 2, 2, 2, 2])}, dtype='float64'), + 1: Series([np.nan, 2, 2, 2, 2])}, + dtype='float64'), 'median': DataFrame({0: Series([np.nan, 1, 3, 5, 7]), - 1: Series([np.nan, 2, 4, 6, 8])}, dtype='float64'), + 1: Series([np.nan, 2, 4, 6, 8])}, + dtype='float64'), } } return expects