Skip to content

Commit 34efdbc

Browse files
committed
ENH: Synchronize io/stata with pandas master
Sychronize and remvoe classes not part of the public API
1 parent 30a87ca commit 34efdbc

File tree

2 files changed

+128
-57
lines changed

2 files changed

+128
-57
lines changed

pandas-stubs/io/stata.pyi

Lines changed: 35 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ from typing import (
66
Hashable,
77
Literal,
88
Sequence,
9+
overload,
910
)
1011

1112
import numpy as np
@@ -22,6 +23,7 @@ from pandas._typing import (
2223
WriteBuffer,
2324
)
2425

26+
@overload
2527
def read_stata(
2628
path: FilePath | ReadBuffer[bytes],
2729
convert_dates: bool = ...,
@@ -32,57 +34,46 @@ def read_stata(
3234
columns: list[HashableT] | None = ...,
3335
order_categoricals: bool = ...,
3436
chunksize: int | None = ...,
35-
iterator: bool = ...,
37+
*,
38+
iterator: Literal[True],
3639
compression: CompressionOptions = ...,
3740
storage_options: StorageOptions = ...,
38-
) -> DataFrame | StataReader: ...
39-
40-
stata_epoch: datetime.datetime = ...
41-
excessive_string_length_error: str
41+
) -> StataReader: ...
42+
@overload
43+
def read_stata(
44+
path: FilePath | ReadBuffer[bytes],
45+
convert_dates: bool,
46+
convert_categoricals: bool,
47+
index_col: str | None,
48+
convert_missing: bool,
49+
preserve_dtypes: bool,
50+
columns: list[HashableT] | None,
51+
order_categoricals: bool,
52+
chunksize: int | None,
53+
iterator: Literal[True],
54+
compression: CompressionOptions = ...,
55+
storage_options: StorageOptions = ...,
56+
) -> StataReader: ...
57+
@overload
58+
def read_stata(
59+
path: FilePath | ReadBuffer[bytes],
60+
convert_dates: bool = ...,
61+
convert_categoricals: bool = ...,
62+
index_col: str | None = ...,
63+
convert_missing: bool = ...,
64+
preserve_dtypes: bool = ...,
65+
columns: list[HashableT] | None = ...,
66+
order_categoricals: bool = ...,
67+
chunksize: int | None = ...,
68+
iterator: Literal[False] = ...,
69+
compression: CompressionOptions = ...,
70+
storage_options: StorageOptions = ...,
71+
) -> DataFrame: ...
4272

4373
class PossiblePrecisionLoss(Warning): ...
44-
45-
precision_loss_doc: str
46-
4774
class ValueLabelTypeMismatch(Warning): ...
48-
49-
value_label_mismatch_doc: str
50-
5175
class InvalidColumnName(Warning): ...
5276

53-
invalid_name_doc: str
54-
55-
class StataValueLabel:
56-
labname: Hashable = ...
57-
value_labels: list[tuple[float, str]] = ...
58-
text_len: int = ...
59-
off: npt.NDArray[np.int32] = ...
60-
val: npt.NDArray[np.int32] = ...
61-
txt: list[bytes] = ...
62-
n: int = ...
63-
len: int = ...
64-
def __init__(
65-
self, catarray: pd.Series, encoding: Literal["latin-1", "utf-8"] = ...
66-
) -> None: ...
67-
def generate_value_label(self, byteorder: str) -> bytes: ...
68-
69-
class StataMissingValue:
70-
MISSING_VALUES: dict[float, str] = ...
71-
bases: tuple[int, int, int] = ...
72-
float32_base: bytes = ...
73-
increment: int = ...
74-
int_value: int = ...
75-
float64_base: bytes = ...
76-
BASE_MISSING_VALUES: dict[str, int] = ...
77-
def __init__(self, value: float) -> None: ...
78-
def __eq__(self, other: object) -> bool: ...
79-
@property
80-
def string(self) -> str: ...
81-
@property
82-
def value(self) -> float: ...
83-
@classmethod
84-
def get_base_missing_value(cls, dtype): ...
85-
8677
class StataParser:
8778
DTYPE_MAP: dict[int, np.dtype] = ...
8879
DTYPE_MAP_XML: dict[int, np.dtype] = ...
@@ -160,19 +151,6 @@ class StataWriter(StataParser):
160151
) -> None: ...
161152
def write_file(self) -> None: ...
162153

163-
class StataStrLWriter:
164-
df: DataFrame = ...
165-
columns: Sequence[str] = ...
166-
def __init__(
167-
self,
168-
df: DataFrame,
169-
columns: Sequence[str],
170-
version: int = ...,
171-
byteorder: str | None = ...,
172-
) -> None: ...
173-
def generate_table(self) -> tuple[dict[str, tuple[int, int]], DataFrame]: ...
174-
def generate_blob(self, gso_table: dict[str, tuple[int, int]]) -> bytes: ...
175-
176154
class StataWriter117(StataWriter):
177155
def __init__(
178156
self,

tests/test_io.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
from __future__ import annotations
2+
3+
from contextlib import contextmanager
4+
from pathlib import Path
5+
import tempfile
6+
from typing import (
7+
IO,
8+
Any,
9+
)
10+
import uuid
11+
12+
import pandas as pd
13+
from pandas import DataFrame
14+
from typing_extensions import assert_type
15+
16+
from tests import check
17+
18+
from pandas.io.stata import (
19+
StataReader,
20+
read_stata,
21+
)
22+
23+
DF = DataFrame({"a": [1, 2, 3], "b": [0.0, 0.0, 0.0]})
24+
25+
26+
@contextmanager
27+
def ensure_clean(filename=None, return_filelike: bool = False, **kwargs: Any):
28+
"""
29+
Gets a temporary path and agrees to remove on close.
30+
This implementation does not use tempfile.mkstemp to avoid having a file handle.
31+
If the code using the returned path wants to delete the file itself, windows
32+
requires that no program has a file handle to it.
33+
Parameters
34+
----------
35+
filename : str (optional)
36+
suffix of the created file.
37+
return_filelike : bool (default False)
38+
if True, returns a file-like which is *always* cleaned. Necessary for
39+
savefig and other functions which want to append extensions.
40+
**kwargs
41+
Additional keywords are passed to open().
42+
"""
43+
folder = Path(tempfile.gettempdir())
44+
45+
if filename is None:
46+
filename = ""
47+
filename = str(uuid.uuid4()) + filename
48+
path = folder / filename
49+
50+
path.touch()
51+
52+
handle_or_str: str | IO = str(path)
53+
if return_filelike:
54+
kwargs.setdefault("mode", "w+b")
55+
handle_or_str = open(path, **kwargs)
56+
57+
try:
58+
yield handle_or_str
59+
finally:
60+
if not isinstance(handle_or_str, str):
61+
handle_or_str.close()
62+
if path.is_file():
63+
path.unlink()
64+
65+
66+
def test_read_stata_df():
67+
with ensure_clean() as path:
68+
DF.to_stata(path)
69+
check(assert_type(read_stata(path), pd.DataFrame), pd.DataFrame)
70+
71+
72+
def test_read_stata_iterator_positional():
73+
with ensure_clean() as path:
74+
str_path = str(path)
75+
DF.to_stata(str_path)
76+
check(
77+
assert_type(
78+
read_stata(
79+
str_path, False, False, None, False, False, None, False, 2, True
80+
),
81+
StataReader,
82+
),
83+
StataReader,
84+
)
85+
86+
87+
def test_read_stata_iterator():
88+
with ensure_clean() as path:
89+
str_path = str(path)
90+
DF.to_stata(str_path)
91+
check(
92+
assert_type(read_stata(str_path, iterator=True), StataReader), StataReader
93+
)

0 commit comments

Comments
 (0)