From 92c79e165b891eda43aaad8b640a8c09ac244444 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Fri, 12 Feb 2021 22:04:06 -0500 Subject: [PATCH 1/2] TST: extend check for leaked files --- ci/deps/actions-37-db.yaml | 2 +- ci/deps/actions-37-minimum_versions.yaml | 1 + ci/deps/actions-38-locale.yaml | 2 +- ci/deps/azure-macos-37.yaml | 2 +- ci/deps/azure-windows-37.yaml | 2 +- doc/source/getting_started/install.rst | 11 ++ doc/source/whatsnew/v1.3.0.rst | 2 + pandas/compat/_optional.py | 2 +- pandas/tests/frame/test_api.py | 2 +- pandas/tests/io/conftest.py | 16 +++ pandas/tests/io/excel/conftest.py | 20 +--- pandas/tests/io/excel/test_readers.py | 2 - pandas/tests/io/formats/test_format.py | 2 +- .../io/parser/common/test_file_buffer_url.py | 7 +- .../io/parser/common/test_read_errors.py | 3 +- pandas/tests/io/sas/test_xport.py | 9 +- pandas/tests/io/test_common.py | 10 ++ .../tests/resample/test_resampler_grouper.py | 2 +- pandas/util/_test_decorators.py | 100 +++++++++++------- setup.cfg | 1 + 20 files changed, 121 insertions(+), 77 deletions(-) diff --git a/ci/deps/actions-37-db.yaml b/ci/deps/actions-37-db.yaml index 5381caaa242cf..dc6658cc45690 100644 --- a/ci/deps/actions-37-db.yaml +++ b/ci/deps/actions-37-db.yaml @@ -51,4 +51,4 @@ dependencies: - brotlipy - coverage - pandas-datareader - - pyxlsb + - pyxlsb>=1.0.8 diff --git a/ci/deps/actions-37-minimum_versions.yaml b/ci/deps/actions-37-minimum_versions.yaml index e14e51a36be31..aa5eec5e19b92 100644 --- a/ci/deps/actions-37-minimum_versions.yaml +++ b/ci/deps/actions-37-minimum_versions.yaml @@ -24,6 +24,7 @@ dependencies: - python-dateutil=2.7.3 - pytz=2017.3 - pyarrow=0.15 + - pyxlsb>=1.0.8 - scipy=1.2 - xlrd=1.2.0 - xlsxwriter=1.0.2 diff --git a/ci/deps/actions-38-locale.yaml b/ci/deps/actions-38-locale.yaml index 629804c71e726..4838fb1ded447 100644 --- a/ci/deps/actions-38-locale.yaml +++ b/ci/deps/actions-38-locale.yaml @@ -38,4 +38,4 @@ dependencies: - pyarrow=1.0.0 - pip - pip: - - pyxlsb + - pyxlsb>=1.0.8 diff --git a/ci/deps/azure-macos-37.yaml b/ci/deps/azure-macos-37.yaml index d667adddda859..3e34af1a666c8 100644 --- a/ci/deps/azure-macos-37.yaml +++ b/ci/deps/azure-macos-37.yaml @@ -33,4 +33,4 @@ dependencies: - pip: - cython>=0.29.21 - pyreadstat - - pyxlsb + - pyxlsb>=1.0.8 diff --git a/ci/deps/azure-windows-37.yaml b/ci/deps/azure-windows-37.yaml index e7ac4c783b855..e9ec808524a73 100644 --- a/ci/deps/azure-windows-37.yaml +++ b/ci/deps/azure-windows-37.yaml @@ -39,4 +39,4 @@ dependencies: - pyreadstat - pip - pip: - - pyxlsb + - pyxlsb>=1.0.8 diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index a9c3d637a41e3..821c6e85297fe 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -275,6 +275,17 @@ Dependency Minimum Version Notes SciPy 1.12.0 Miscellaneous statistical functions numba 0.46.0 Alternative execution engine for rolling operations (see :ref:`Enhancing Performance `) +openpyxl 2.6.0 Reading / writing for xlsx files +pandas-gbq 0.12.0 Google Big Query access +psycopg2 2.7 PostgreSQL engine for sqlalchemy +pyarrow 0.15.0 Parquet, ORC, and feather reading / writing +pymysql 0.8.1 MySQL engine for sqlalchemy +pyreadstat SPSS files (.sav) reading +pyxlsb 1.0.8 Reading for xlsb files +qtpy Clipboard I/O +s3fs 0.4.0 Amazon S3 access +tabulate 0.8.7 Printing in Markdown-friendly format (see `tabulate`_) +>>>>>>> TST: extend check for leaked files xarray 0.12.3 pandas-like API for N-dimensional data ========================= ================== ============================================================= diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 63902b53ea36d..3a50c804b0630 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -431,6 +431,8 @@ Optional libraries below the lowest tested version may still work, but are not c +-----------------+-----------------+---------+ | pytables | 3.5.1 | | +-----------------+-----------------+---------+ +| pyxlsb | 1.0.8 | X | ++-----------------+-----------------+---------+ | s3fs | 0.4.0 | | +-----------------+-----------------+---------+ | scipy | 1.2.0 | | diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index a26da75d921ef..9652b918044cd 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -21,7 +21,7 @@ "pandas_gbq": "0.12.0", "pyarrow": "0.15.0", "pytest": "5.0.1", - "pyxlsb": "1.0.6", + "pyxlsb": "1.0.8", "s3fs": "0.4.0", "scipy": "1.2.0", "sqlalchemy": "1.2.8", diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 76cfd77d254f2..aa138b58348f5 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -269,7 +269,7 @@ def _check_f(base, f): _check_f(d.copy(), f) @async_mark() - @td.check_file_leaks + @td.check_file_leaks() async def test_tab_complete_warning(self, ip, frame_or_series): # GH 16409 pytest.importorskip("IPython", minversion="6.0.0") diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py index 5d4705dbe7d77..83ed90336549a 100644 --- a/pandas/tests/io/conftest.py +++ b/pandas/tests/io/conftest.py @@ -6,6 +6,8 @@ import pytest +import pandas.util._test_decorators as td + import pandas._testing as tm from pandas.io.parsers import read_csv @@ -163,3 +165,17 @@ def add_tips_files(bucket_name): while cli.list_buckets()["Buckets"] and timeout > 0: time.sleep(0.1) timeout -= 0.1 + + +@pytest.fixture(autouse=True) +def check_for_file_leaks(request): + """ + Fixture to run around every test to ensure that we are not leaking files. + + See also + -------- + _test_decorators.check_file_leaks + """ + # GH#30162 + with td.check_file_leaks(ignore_connections=True): + yield diff --git a/pandas/tests/io/excel/conftest.py b/pandas/tests/io/excel/conftest.py index 0455e0d61ad97..958b9b5f777fd 100644 --- a/pandas/tests/io/excel/conftest.py +++ b/pandas/tests/io/excel/conftest.py @@ -43,23 +43,9 @@ def read_ext(request): return request.param +# override auto-fixture to also check connections @pytest.fixture(autouse=True) -def check_for_file_leaks(): - """ - Fixture to run around every test to ensure that we are not leaking files. - - See also - -------- - _test_decorators.check_file_leaks - """ +def check_for_file_leaks(request): # GH#30162 - psutil = td.safe_import("psutil") - if not psutil: - yield - - else: - proc = psutil.Process() - flist = proc.open_files() + with td.check_file_leaks(ignore_connections=False): yield - flist2 = proc.open_files() - assert flist == flist2 diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 382c8412ab050..cb4c3e76135da 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -797,7 +797,6 @@ def test_read_from_pathlib_path(self, read_ext): tm.assert_frame_equal(expected, actual) @td.skip_if_no("py.path") - @td.check_file_leaks def test_read_from_py_localpath(self, read_ext): # GH12655 @@ -811,7 +810,6 @@ def test_read_from_py_localpath(self, read_ext): tm.assert_frame_equal(expected, actual) - @td.check_file_leaks def test_close_from_py_localpath(self, read_ext): # GH31467 diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 17445b2f134d3..28a90f3fcccf1 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3266,7 +3266,7 @@ def test_format_percentiles_integer_idx(): assert result == expected -@td.check_file_leaks +@td.check_file_leaks() def test_repr_html_ipython_config(ip): code = textwrap.dedent( """\ diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py index 2a3d7328aa662..d8305f578b6e6 100644 --- a/pandas/tests/io/parser/common/test_file_buffer_url.py +++ b/pandas/tests/io/parser/common/test_file_buffer_url.py @@ -417,15 +417,12 @@ def test_file_descriptor_leak(all_parsers): parser = all_parsers with tm.ensure_clean() as path: - - def test(): + with td.check_file_leaks(): with pytest.raises(EmptyDataError, match="No columns to parse from file"): parser.read_csv(path) - td.check_file_leaks(test)() - -@td.check_file_leaks +@td.check_file_leaks() def test_memory_map(all_parsers, csv_dir_path): mmap_file = os.path.join(csv_dir_path, "test_mmap.csv") parser = all_parsers diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py index 4e3d99af685ec..90c914dad4f2f 100644 --- a/pandas/tests/io/parser/common/test_read_errors.py +++ b/pandas/tests/io/parser/common/test_read_errors.py @@ -2,6 +2,7 @@ Tests that work on both the Python and C engines but do not have a specific classification into the other test modules. """ + import codecs import csv from io import StringIO @@ -217,7 +218,7 @@ def test_null_byte_char(all_parsers): parser.read_csv(StringIO(data), names=names) -@td.check_file_leaks +@td.check_file_leaks() def test_open_file(all_parsers): # GH 39024 parser = all_parsers diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py index a8713f5bf36c9..69ec6f4451703 100644 --- a/pandas/tests/io/sas/test_xport.py +++ b/pandas/tests/io/sas/test_xport.py @@ -31,7 +31,7 @@ def setup_method(self, datapath): self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt") self.file04 = os.path.join(self.dirpath, "paxraw_d_short.xpt") - with td.file_leak_context(): + with td.check_file_leaks(): yield def test1_basic(self): @@ -129,10 +129,9 @@ def test2_binary(self): numeric_as_float(data_csv) with open(self.file02, "rb") as fd: - with td.file_leak_context(): - # GH#35693 ensure that if we pass an open file, we - # dont incorrectly close it in read_sas - data = read_sas(fd, format="xport") + # GH#35693 ensure that if we pass an open file, we + # dont incorrectly close it in read_sas + data = read_sas(fd, format="xport") tm.assert_frame_equal(data, data_csv) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index d882eb930137b..00ceeec3e9ab4 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -524,3 +524,13 @@ def test_bad_encdoing_errors(): with tm.ensure_clean() as path: with pytest.raises(ValueError, match="Invalid value for `encoding_errors`"): icom.get_handle(path, "w", errors="bad") + + +def test_resource_warnings(): + msg = '[ResourceWarning("unclosed file *)]' + with tm.ensure_clean("_resource_test") as path: + with pytest.raises(AssertionError, match=msg): + with td.check_file_leaks(): + handle = open(path) + # prevent the auto fixture from throwing an AssertionError + handle.close() diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 999d8a6c90ba2..51259b04fb4aa 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -23,7 +23,7 @@ @async_mark() -@td.check_file_leaks +@td.check_file_leaks() async def test_tab_complete_ipython6_warning(ip): from IPython.core.completer import provisionalcompleter diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 752ed43849d2b..0aef33a90f61a 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -23,14 +23,11 @@ def test_foo(): For more information, refer to the ``pytest`` documentation on ``skipif``. """ -from contextlib import contextmanager -from distutils.version import LooseVersion import locale -from typing import ( - Callable, - Optional, -) import warnings +from contextlib import ContextDecorator +from distutils.version import LooseVersion +from typing import Callable, Optional import numpy as np import pytest @@ -42,11 +39,7 @@ def test_foo(): is_platform_windows, ) from pandas.compat._optional import import_optional_dependency - -from pandas.core.computation.expressions import ( - NUMEXPR_INSTALLED, - USE_NUMEXPR, -) +from pandas.core.computation.expressions import NUMEXPR_INSTALLED, USE_NUMEXPR def safe_import(mod_name: str, min_version: Optional[str] = None): @@ -245,38 +238,67 @@ def documented_fixture(fixture): return documented_fixture -def check_file_leaks(func) -> Callable: - """ - Decorate a test function to check that we are not leaking file descriptors. +class check_file_leaks(ContextDecorator): """ - with file_leak_context(): - return func + Use psutil and ResourceWarning to identify forgotten resources. - -@contextmanager -def file_leak_context(): - """ - ContextManager analogue to check_file_leaks. + ResourceWarnings that contain the string 'ssl' are ignored as they are very likely + caused by boto3 (GH#17058). """ - psutil = safe_import("psutil") - if not psutil: - yield - else: - proc = psutil.Process() - flist = proc.open_files() - conns = proc.connections() - - yield - - flist2 = proc.open_files() - # on some builds open_files includes file position, which we _dont_ - # expect to remain unchanged, so we need to compare excluding that - flist_ex = [(x.path, x.fd) for x in flist] - flist2_ex = [(x.path, x.fd) for x in flist2] - assert flist2_ex == flist_ex, (flist2, flist) - conns2 = proc.connections() - assert conns2 == conns, (conns2, conns) + def __init__(self, ignore_connections: bool = False): + super().__init__() + self.ignore_connections = ignore_connections + + def __enter__(self): + # catch warnings + self.catcher = warnings.catch_warnings(record=True) + self.record = self.catcher.__enter__() + + # get files and connections + self.psutil = safe_import("psutil") + if self.psutil: + self.proc = self.psutil.Process() + self.flist = self.proc.open_files() + self.conns = self.proc.connections() + + return self + + def __exit__(self, *exc): + self.catcher.__exit__(*exc) + + # re-throw warnings + fields = ("category", "source", "filename", "lineno") + for message in self.record: + warnings.warn_explicit( + message.message, **{field: getattr(message, field) for field in fields} + ) + + # assert no non-ssl ResourceWarnings + messages = [ + warn.message + for warn in self.record + if issubclass(warn.category, ResourceWarning) + and "ssl" not in str(warn.message) + ] + assert not messages, f"{messages}" + + # psutil + if self.psutil: + flist2 = self.proc.open_files() + + # on some builds open_files includes file position, which we _dont_ + # expect to remain unchanged, so we need to compare excluding that + flist_ex = {(x.path, x.fd) for x in self.flist} + flist2_ex = {(x.path, x.fd) for x in flist2} + assert ( + flist2_ex == flist_ex + ), f"{flist_ex - flist2_ex} {flist2_ex - flist_ex}" + + if not self.ignore_connections: + conns = set(self.conns) + conns2 = set(self.proc.connections()) + assert conns2 == conns, f"{conns - conns2} {conns2 - conns}" def async_mark(): diff --git a/setup.cfg b/setup.cfg index a0b6a0cdfc260..11742d62c8a4c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -118,6 +118,7 @@ xfail_strict = True filterwarnings = error:Sparse:FutureWarning error:The SparseArray:FutureWarning + always::ResourceWarning junit_family = xunit2 [codespell] From 6c62d418bdb93830542555bb673c546310c245fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Tue, 30 Mar 2021 22:37:33 -0400 Subject: [PATCH 2/2] run tests sequentially --- ci/run_tests.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index d73940c1010ad..37cc5db7a08bb 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -1,5 +1,7 @@ #!/bin/bash -e +PYTEST_WORKERS=0 + # Workaround for pytest-xdist (it collects different tests in the workers if PYTHONHASHSEED is not set) # https://github.com/pytest-dev/pytest/issues/920 # https://github.com/pytest-dev/pytest/issues/1075 @@ -19,7 +21,7 @@ if [[ $(uname) == "Linux" && -z $DISPLAY ]]; then XVFB="xvfb-run " fi -PYTEST_CMD="${XVFB}pytest -m \"$PATTERN\" -n $PYTEST_WORKERS --dist=loadfile -s --strict-markers --durations=30 --junitxml=test-data.xml $TEST_ARGS $COVERAGE pandas" +PYTEST_CMD="${XVFB}pytest -m \"$PATTERN\" -n 0 --dist=no -s --strict-markers --durations=30 --junitxml=test-data.xml $TEST_ARGS $COVERAGE pandas" if [[ $(uname) != "Linux" && $(uname) != "Darwin" ]]; then # GH#37455 windows py38 build appears to be running out of memory @@ -30,7 +32,7 @@ fi echo $PYTEST_CMD sh -c "$PYTEST_CMD" -PYTEST_AM_CMD="PANDAS_DATA_MANAGER=array pytest -m \"$PATTERN and arraymanager\" -n $PYTEST_WORKERS --dist=loadfile -s --strict-markers --durations=30 --junitxml=test-data.xml $TEST_ARGS $COVERAGE pandas" +PYTEST_AM_CMD="PANDAS_DATA_MANAGER=array pytest -m \"$PATTERN and arraymanager\" -n 0 --dist=no -s --strict-markers --durations=30 --junitxml=test-data.xml $TEST_ARGS $COVERAGE pandas" echo $PYTEST_AM_CMD sh -c "$PYTEST_AM_CMD"