From 61583fc32ae9f94f3e692f845e4930f0b6772007 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 24 May 2017 06:31:43 -0400 Subject: [PATCH] ENH: add ntrheads option to feather-format IO closes #16359 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/feather_format.py | 13 +++++++++++-- pandas/tests/io/test_feather.py | 10 ++++++++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 4cb55ec6b117b..13e0d677b79e8 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -35,6 +35,7 @@ Other Enhancements - ``RangeIndex.append`` now returns a ``RangeIndex`` object when possible (:issue:`16212`) - :func:`to_pickle` has gained a protocol parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL `__ - :func:`api.types.infer_dtype` now infers decimals. (:issue: `15690`) +- :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`) .. _whatsnew_0210.api_breaking: diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 8bdb23fc1ae6a..86d58caa5e816 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -43,6 +43,7 @@ def to_feather(df, path): df : DataFrame path : string File path + """ path = _stringify_path(path) if not isinstance(df, DataFrame): @@ -83,7 +84,7 @@ def to_feather(df, path): feather.write_dataframe(df, path) -def read_feather(path): +def read_feather(path, nthreads=1): """ Load a feather-format object from the file path @@ -93,6 +94,10 @@ def read_feather(path): ---------- path : string File path + nthreads : int, default 1 + Number of CPU threads to use when reading to pandas.DataFrame + + .. versionadded 0.21.0 Returns ------- @@ -102,4 +107,8 @@ def read_feather(path): feather = _try_import() path = _stringify_path(path) - return feather.read_dataframe(path) + + if feather.__version__ < LooseVersion('0.4.0'): + return feather.read_dataframe(path) + + return feather.read_dataframe(path, nthreads=nthreads) diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 42ad9d3e0d8fe..dadfe7ca87e48 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -27,11 +27,11 @@ def check_error_on_write(self, df, exc): with ensure_clean() as path: to_feather(df, path) - def check_round_trip(self, df): + def check_round_trip(self, df, **kwargs): with ensure_clean() as path: to_feather(df, path) - result = read_feather(path) + result = read_feather(path, **kwargs) assert_frame_equal(result, df) def test_error(self): @@ -98,6 +98,12 @@ def test_unsupported_other(self): df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)}) self.check_error_on_write(df, ValueError) + @pytest.mark.skipif(fv < '0.4.0', reason='new in 0.4.0') + def test_rw_nthreads(self): + + df = pd.DataFrame({'A': np.arange(100000)}) + self.check_round_trip(df, nthreads=2) + def test_write_with_index(self): df = pd.DataFrame({'A': [1, 2, 3]})