From ff03d4bc2385530aab700aa6ebe0679d530f64d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sat, 28 Jan 2023 21:47:31 -0500 Subject: [PATCH 1/3] TYP: added missing __init__ in pandas/core/methods --- .pre-commit-config.yaml | 2 +- pandas/core/methods/__init__.py | 0 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 pandas/core/methods/__init__.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 118d77ff71151..4124b2a863df8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -135,7 +135,7 @@ repos: types: [python] stages: [manual] additional_dependencies: &pyright_dependencies - - pyright@1.1.276 + - pyright@1.1.291 - id: pyright_reportGeneralTypeIssues # note: assumes python env is setup and activated name: pyright reportGeneralTypeIssues diff --git a/pandas/core/methods/__init__.py b/pandas/core/methods/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d From ee55bd802d4788c5c56248b610c2223663a2cf40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sun, 29 Jan 2023 12:00:30 -0500 Subject: [PATCH 2/3] fix most typing issues --- .pre-commit-config.yaml | 2 +- pandas/_testing/contexts.py | 9 ++++++++- pandas/core/frame.py | 2 +- pandas/core/window/common.py | 2 +- pandas/io/parsers/base_parser.py | 8 ++++---- pandas/io/parsers/c_parser_wrapper.py | 3 +-- 6 files changed, 16 insertions(+), 10 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4124b2a863df8..56ce92cfa53cd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -135,7 +135,7 @@ repos: types: [python] stages: [manual] additional_dependencies: &pyright_dependencies - - pyright@1.1.291 + - pyright@1.1.284 - id: pyright_reportGeneralTypeIssues # note: assumes python env is setup and activated name: pyright reportGeneralTypeIssues diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index d0de085788782..2d7863e8f185f 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -14,6 +14,11 @@ import numpy as np +from pandas._typing import ( + BaseBuffer, + CompressionOptions, + FilePath, +) from pandas.compat import PYPY from pandas.errors import ChainedAssignmentError @@ -23,7 +28,9 @@ @contextmanager -def decompress_file(path, compression) -> Generator[IO[bytes], None, None]: +def decompress_file( + path: FilePath | BaseBuffer, compression: CompressionOptions +) -> Generator[IO[bytes], None, None]: """ Open a compressed file and return a file object. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1d86c81745a6a..8cff186330adc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9514,7 +9514,7 @@ def _append( def join( self, - other: DataFrame | Series | list[DataFrame | Series], + other: DataFrame | Series | Iterable[DataFrame | Series], on: IndexLabel | None = None, how: MergeHow = "left", lsuffix: str = "", diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index ff1c3c1784bd6..51d9f70c1d018 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -23,7 +23,7 @@ def flex_binary_moment(arg1, arg2, f, pairwise: bool = False): elif isinstance(arg1, ABCDataFrame): from pandas import DataFrame - def dataframe_from_int_dict(data, frame_template): + def dataframe_from_int_dict(data, frame_template) -> DataFrame: result = DataFrame(data, index=frame_template.index) if len(result.columns) > 0: result.columns = frame_template.columns[result.columns] diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 6272f213ccef1..c2d9a6954f717 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -111,7 +111,7 @@ def __init__(self, kwds) -> None: self.index_col = kwds.get("index_col", None) self.unnamed_cols: set = set() self.index_names: Sequence[Hashable] | None = None - self.col_names = None + self.col_names: list | None = None self.parse_dates = _validate_parse_dates_arg(kwds.pop("parse_dates", False)) self._parse_date_cols: Iterable = [] @@ -269,9 +269,9 @@ def _should_parse_dates(self, i: int) -> bool: def _extract_multi_indexer_columns( self, header, - index_names: list | None, + index_names: Sequence[Hashable] | None, passed_names: bool = False, - ): + ) -> tuple[list, Sequence[Hashable] | None, list | None, bool]: """ Extract and return the names, index_names, col_names if the column names are a MultiIndex. @@ -1004,7 +1004,7 @@ def _validate_usecols_arg(self, usecols): return usecols, usecols_dtype return usecols, None - def _clean_index_names(self, columns, index_col): + def _clean_index_names(self, columns, index_col) -> tuple[list | None, list, list]: if not is_index_col(index_col): return None, columns, index_col diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py index 551518b623836..dbc7658e11631 100644 --- a/pandas/io/parsers/c_parser_wrapper.py +++ b/pandas/io/parsers/c_parser_wrapper.py @@ -104,8 +104,7 @@ def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None: # error: Cannot determine type of 'names' if self.names is None: # type: ignore[has-type] - # error: Cannot determine type of 'names' - self.names = list(range(self._reader.table_width)) # type: ignore[has-type] + self.names = list(range(self._reader.table_width)) # gh-9755 # From 19ada4336598559974c0611737837ecc14959c9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sun, 29 Jan 2023 15:48:12 -0500 Subject: [PATCH 3/3] use Sequence[Hashable] more consistenly --- pandas/io/parsers/base_parser.py | 8 +++++--- pandas/io/parsers/python_parser.py | 3 +-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index c2d9a6954f717..6d2f569ddb753 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -106,12 +106,12 @@ class BadLineHandleMethod(Enum): def __init__(self, kwds) -> None: self.names = kwds.get("names") - self.orig_names: list | None = None + self.orig_names: Sequence[Hashable] | None = None self.index_col = kwds.get("index_col", None) self.unnamed_cols: set = set() self.index_names: Sequence[Hashable] | None = None - self.col_names: list | None = None + self.col_names: Sequence[Hashable] | None = None self.parse_dates = _validate_parse_dates_arg(kwds.pop("parse_dates", False)) self._parse_date_cols: Iterable = [] @@ -271,7 +271,9 @@ def _extract_multi_indexer_columns( header, index_names: Sequence[Hashable] | None, passed_names: bool = False, - ) -> tuple[list, Sequence[Hashable] | None, list | None, bool]: + ) -> tuple[ + Sequence[Hashable], Sequence[Hashable] | None, Sequence[Hashable] | None, bool + ]: """ Extract and return the names, index_names, col_names if the column names are a MultiIndex. diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index e97aac82a50ff..62a4e80147780 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -126,7 +126,6 @@ def __init__(self, f: ReadCsvBuffer[str] | list, **kwds) -> None: # Now self.columns has the set of columns that we will process. # The original set is stored in self.original_columns. # error: Cannot determine type of 'index_names' - self.columns: list[Hashable] ( self.columns, self.index_names, @@ -915,7 +914,7 @@ def _clear_buffer(self) -> None: _implicit_index = False def _get_index_name( - self, columns: list[Hashable] + self, columns: Sequence[Hashable] ) -> tuple[Sequence[Hashable] | None, list[Hashable], list[Hashable]]: """ Try several cases to get lines: