GH: 624 - Added dtype_backend to all read_* functions (#655)

ramvikrams · Dr-Irv · web-flow · commit c67da6325b26 · 2023-04-24T13:56:01.000-04:00
* Added `dtype_backend` to all functions

* updated

* Update test_io.py

* Update test_io.py

* Update test_io.py

* Update test_io.py

* Update test_io.py

* Update test_io.py

* Update test_io.py

* Update test_io.py

* Update tests/test_io.py

Co-authored-by: Irv Lustig &lt;irv@princeton.com&gt;

* corrected the  tests

* Update series.pyi

* added the comment from `test_read_swl_table`

---------

Co-authored-by: Irv Lustig &lt;irv@princeton.com&gt;
diff --git a/pandas-stubs/core/generic.pyi b/pandas-stubs/core/generic.pyi
@@ -29,6 +29,7 @@ from pandas._typing import (
     CSVQuoting,
     Dtype,
     DtypeArg,
+    DtypeBackend,
     FilePath,
     FileWriteMode,
     FillnaOptions,
@@ -373,6 +374,7 @@ class NDFrame(PandasObject, indexing.IndexingMixin):
         convert_string: _bool = ...,
         convert_integer: _bool = ...,
         convert_boolean: _bool = ...,
+        dtype_backend: DtypeBackend = ...,
     ) -> NDFrameT: ...
     def fillna(
         self,
diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi
@@ -96,6 +96,7 @@ from pandas._typing import (
     CategoryDtypeArg,
     ComplexDtypeArg,
     CompressionOptions,
+    DtypeBackend,
     DtypeObj,
     FilePath,
     FillnaOptions,
@@ -1133,6 +1134,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
         convert_string: _bool = ...,
         convert_integer: _bool = ...,
         convert_boolean: _bool = ...,
+        dtype_backend: DtypeBackend = ...,
     ) -> Series[S1]: ...
     @overload
     def ffill(
diff --git a/pandas-stubs/core/tools/numeric.pyi b/pandas-stubs/core/tools/numeric.pyi
@@ -7,7 +7,9 @@ import numpy as np
 import pandas as pd
 from typing_extensions import TypeAlias
 
+from pandas._libs.lib import NoDefault
 from pandas._typing import (
+    DtypeBackend,
     IgnoreRaiseCoerce,
     Scalar,
     npt,
@@ -20,22 +22,26 @@ def to_numeric(
     arg: Scalar,
     errors: Literal["raise", "coerce"] = ...,
     downcast: _Downcast = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> float: ...
 @overload
 def to_numeric(
     arg: Scalar,
     errors: Literal["ignore"],
     downcast: _Downcast = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> Scalar: ...
 @overload
 def to_numeric(
     arg: list | tuple | np.ndarray,
     errors: IgnoreRaiseCoerce = ...,
     downcast: _Downcast = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> npt.NDArray: ...
 @overload
 def to_numeric(
     arg: pd.Series,
     errors: IgnoreRaiseCoerce = ...,
     downcast: _Downcast = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> pd.Series: ...
diff --git a/pandas-stubs/io/clipboards.pyi b/pandas-stubs/io/clipboards.pyi
@@ -12,11 +12,13 @@ from typing import (
 
 from pandas.core.frame import DataFrame
 
+from pandas._libs.lib import NoDefault
 from pandas._typing import (
     CompressionOptions,
     CSVEngine,
     CSVQuoting,
     DtypeArg,
+    DtypeBackend,
     ListLikeHashable,
     StorageOptions,
     UsecolsArgType,
@@ -28,6 +30,7 @@ from pandas.io.parsers import TextFileReader
 def read_clipboard(
     sep: str | None = ...,
     *,
+    dtype_backend: DtypeBackend | NoDefault = ...,
     delimiter: str | None = ...,
     header: int | Sequence[int] | Literal["infer"] | None = ...,
     names: ListLikeHashable | None = ...,
@@ -85,6 +88,7 @@ def read_clipboard(
 def read_clipboard(
     sep: str | None = ...,
     *,
+    dtype_backend: DtypeBackend | NoDefault = ...,
     delimiter: str | None = ...,
     header: int | Sequence[int] | Literal["infer"] | None = ...,
     names: ListLikeHashable | None = ...,
@@ -142,6 +146,7 @@ def read_clipboard(
 def read_clipboard(
     sep: str | None = ...,
     *,
+    dtype_backend: DtypeBackend | NoDefault = ...,
     delimiter: str | None = ...,
     header: int | Sequence[int] | Literal["infer"] | None = ...,
     names: ListLikeHashable | None = ...,
diff --git a/pandas-stubs/io/excel/_base.pyi b/pandas-stubs/io/excel/_base.pyi
@@ -19,8 +19,10 @@ import pyxlsb.workbook
 from typing_extensions import Self
 from xlrd.book import Book
 
+from pandas._libs.lib import NoDefault
 from pandas._typing import (
     Dtype,
+    DtypeBackend,
     FilePath,
     ListLikeHashable,
     ReadBuffer,
@@ -66,6 +68,7 @@ def read_excel(
     comment: str | None = ...,
     skipfooter: int = ...,
     storage_options: StorageOptions = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> dict[int | str, DataFrame]: ...
 @overload
 def read_excel(
@@ -104,6 +107,7 @@ def read_excel(
     comment: str | None = ...,
     skipfooter: int = ...,
     storage_options: StorageOptions = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> DataFrame: ...
 
 class ExcelWriter:
diff --git a/pandas-stubs/io/feather_format.pyi b/pandas-stubs/io/feather_format.pyi
@@ -1,6 +1,8 @@
 from pandas import DataFrame
 
+from pandas._libs.lib import NoDefault
 from pandas._typing import (
+    DtypeBackend,
     FilePath,
     HashableT,
     ReadBuffer,
@@ -12,4 +14,5 @@ def read_feather(
     columns: list[HashableT] | None = ...,
     use_threads: bool = ...,
     storage_options: StorageOptions = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> DataFrame: ...
diff --git a/pandas-stubs/io/html.pyi b/pandas-stubs/io/html.pyi
@@ -12,7 +12,9 @@ from typing import (
 
 from pandas.core.frame import DataFrame
 
+from pandas._libs.lib import NoDefault
 from pandas._typing import (
+    DtypeBackend,
     FilePath,
     HashableT1,
     HashableT2,
@@ -49,4 +51,5 @@ def read_html(
     keep_default_na: bool = ...,
     displayed_only: bool = ...,
     extract_links: Literal["header", "footer", "body", "all"] | None = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> list[DataFrame]: ...
diff --git a/pandas-stubs/io/json/_json.pyi b/pandas-stubs/io/json/_json.pyi
@@ -9,9 +9,11 @@ from typing import (
 from pandas.core.frame import DataFrame
 from pandas.core.series import Series
 
+from pandas._libs.lib import NoDefault
 from pandas._typing import (
     CompressionOptions,
     DtypeArg,
+    DtypeBackend,
     FilePath,
     HashableT,
     JsonFrameOrient,
@@ -43,6 +45,7 @@ def read_json(
     compression: CompressionOptions = ...,
     nrows: int | None = ...,
     storage_options: StorageOptions = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> JsonReader[Series]: ...
 @overload
 def read_json(
@@ -66,6 +69,7 @@ def read_json(
     compression: CompressionOptions = ...,
     nrows: int | None = ...,
     storage_options: StorageOptions = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> JsonReader[DataFrame]: ...
 @overload
 def read_json(
@@ -89,6 +93,7 @@ def read_json(
     compression: CompressionOptions = ...,
     nrows: int | None = ...,
     storage_options: StorageOptions = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> Series: ...
 @overload
 def read_json(
@@ -112,6 +117,7 @@ def read_json(
     compression: CompressionOptions = ...,
     nrows: int | None = ...,
     storage_options: StorageOptions = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> DataFrame: ...
 
 class JsonReader(abc.Iterator, Generic[NDFrameT]):
diff --git a/pandas-stubs/io/orc.pyi b/pandas-stubs/io/orc.pyi
@@ -2,7 +2,9 @@ from typing import Any
 
 from pandas import DataFrame
 
+from pandas._libs.lib import NoDefault
 from pandas._typing import (
+    DtypeBackend,
     FilePath,
     HashableT,
     ReadBuffer,
@@ -11,5 +13,6 @@ from pandas._typing import (
 def read_orc(
     path: FilePath | ReadBuffer[bytes],
     columns: list[HashableT] | None = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
     **kwargs: Any,
 ) -> DataFrame: ...
diff --git a/pandas-stubs/io/parsers/readers.pyi b/pandas-stubs/io/parsers/readers.pyi
@@ -18,11 +18,13 @@ from typing import (
 from pandas.core.frame import DataFrame
 from typing_extensions import Self
 
+from pandas._libs.lib import NoDefault
 from pandas._typing import (
     CompressionOptions,
     CSVEngine,
     CSVQuoting,
     DtypeArg,
+    DtypeBackend,
     FilePath,
     ListLikeHashable,
     ReadCsvBuffer,
@@ -91,6 +93,7 @@ def read_csv(
     memory_map: bool = ...,
     float_precision: Literal["high", "legacy", "round_trip"] | None = ...,
     storage_options: StorageOptions | None = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> TextFileReader: ...
 @overload
 def read_csv(
@@ -151,6 +154,7 @@ def read_csv(
     memory_map: bool = ...,
     float_precision: Literal["high", "legacy", "round_trip"] | None = ...,
     storage_options: StorageOptions | None = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> TextFileReader: ...
 @overload
 def read_csv(
@@ -211,6 +215,7 @@ def read_csv(
     memory_map: bool = ...,
     float_precision: Literal["high", "legacy", "round_trip"] | None = ...,
     storage_options: StorageOptions | None = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> DataFrame: ...
 @overload
 def read_table(
@@ -396,6 +401,7 @@ def read_fwf(
     colspecs: Sequence[tuple[int, int]] | Literal["infer"] | None = ...,
     widths: Sequence[int] | None = ...,
     infer_nrows: int = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
     iterator: Literal[True],
     chunksize: int | None = ...,
     **kwds: Any,
@@ -407,6 +413,7 @@ def read_fwf(
     colspecs: Sequence[tuple[int, int]] | Literal["infer"] | None = ...,
     widths: Sequence[int] | None = ...,
     infer_nrows: int = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
     iterator: bool = ...,
     chunksize: int,
     **kwds: Any,
@@ -418,6 +425,7 @@ def read_fwf(
     colspecs: Sequence[tuple[int, int]] | Literal["infer"] | None = ...,
     widths: Sequence[int] | None = ...,
     infer_nrows: int = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
     iterator: Literal[False] = ...,
     chunksize: None = ...,
     **kwds: Any,
diff --git a/pandas-stubs/io/spss.pyi b/pandas-stubs/io/spss.pyi
@@ -1,6 +1,8 @@
 from pandas.core.frame import DataFrame
 
+from pandas._libs.lib import NoDefault
 from pandas._typing import (
+    DtypeBackend,
     FilePath,
     HashableT,
 )
@@ -9,4 +11,5 @@ def read_spss(
     path: FilePath,
     usecols: list[HashableT] | None = ...,
     convert_categoricals: bool = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> DataFrame: ...
diff --git a/pandas-stubs/io/sql.pyi b/pandas-stubs/io/sql.pyi
@@ -40,6 +40,7 @@ def read_sql_table(
     columns: list[str] | None = ...,
     *,
     chunksize: int,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> Generator[DataFrame, None, None]: ...
 @overload
 def read_sql_table(
@@ -51,6 +52,7 @@ def read_sql_table(
     parse_dates: list[str] | dict[str, str] | dict[str, dict[str, Any]] | None = ...,
     columns: list[str] | None = ...,
     chunksize: None = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> DataFrame: ...
 @overload
 def read_sql_query(
@@ -63,6 +65,7 @@ def read_sql_query(
     *,
     chunksize: int,
     dtype: DtypeArg | None = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> Generator[DataFrame, None, None]: ...
 @overload
 def read_sql_query(
@@ -74,6 +77,7 @@ def read_sql_query(
     parse_dates: list[str] | dict[str, str] | dict[str, dict[str, Any]] | None = ...,
     chunksize: None = ...,
     dtype: DtypeArg | None = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> DataFrame: ...
 @overload
 def read_sql(
diff --git a/pandas-stubs/io/xml.pyi b/pandas-stubs/io/xml.pyi
@@ -2,10 +2,12 @@ from collections.abc import Sequence
 
 from pandas.core.frame import DataFrame
 
+from pandas._libs.lib import NoDefault
 from pandas._typing import (
     CompressionOptions,
     ConvertersArg,
     DtypeArg,
+    DtypeBackend,
     FilePath,
     ParseDatesArg,
     ReadBuffer,
@@ -31,4 +33,5 @@ def read_xml(
     iterparse: dict[str, list[str]] | None = ...,
     compression: CompressionOptions = ...,
     storage_options: StorageOptions = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
 ) -> DataFrame: ...
diff --git a/tests/test_frame.py b/tests/test_frame.py
@@ -2597,3 +2597,9 @@ def test_suffix_prefix_index() -> None:
     check(
         assert_type(df.add_prefix("_col", axis="columns"), pd.DataFrame), pd.DataFrame
     )
+
+
+def test_convert_dtypes_dtype_backend() -> None:
+    df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [3, 4, 5, 6]})
+    dfn = df.convert_dtypes(dtype_backend="numpy_nullable")
+    check(assert_type(dfn, pd.DataFrame), pd.DataFrame)
diff --git a/tests/test_io.py b/tests/test_io.py
diff --git a/tests/test_series.py b/tests/test_series.py