Skip to content

TYP: incorrect annotations in internals.construction #40061

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 27, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 33 additions & 33 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,9 @@

from pandas._libs import lib
from pandas._typing import (
Axis,
ArrayLike,
DtypeObj,
Manager,
Scalar,
)

from pandas.core.dtypes.cast import (
Expand Down Expand Up @@ -336,7 +335,7 @@ def nested_data_to_arrays(
# By the time we get here we have already checked treat_as_nested(data)

if is_named_tuple(data[0]) and columns is None:
columns = data[0]._fields
columns = ensure_index(data[0]._fields)

arrays, columns = to_arrays(data, columns, dtype=dtype)
columns = ensure_index(columns)
Expand Down Expand Up @@ -574,7 +573,7 @@ def dataclasses_to_dicts(data):
# Conversion of Inputs to Arrays


def to_arrays(data, columns, dtype: Optional[DtypeObj] = None):
def to_arrays(data, columns: Optional[Index], dtype: Optional[DtypeObj] = None):
"""
Return list of arrays, columns.
"""
Expand Down Expand Up @@ -605,48 +604,48 @@ def to_arrays(data, columns, dtype: Optional[DtypeObj] = None):

elif isinstance(data, np.ndarray) and data.dtype.names is not None:
# e.g. recarray
columns = list(data.dtype.names)
columns = Index(list(data.dtype.names))
arrays = [data[k] for k in columns]
return arrays, columns

if isinstance(data[0], (list, tuple)):
content, columns = _list_to_arrays(data, columns)
content = _list_to_arrays(data)
elif isinstance(data[0], abc.Mapping):
content, columns = _list_of_dict_to_arrays(data, columns)
elif isinstance(data[0], ABCSeries):
content, columns = _list_of_series_to_arrays(data, columns)
else:
# last ditch effort
data = [tuple(x) for x in data]
content, columns = _list_to_arrays(data, columns)
content = _list_to_arrays(data)

content, columns = _finalize_columns_and_data(content, columns, dtype)
return content, columns


def _list_to_arrays(
data: List[Scalar],
columns: Union[Index, List],
) -> Tuple[List[Scalar], Union[Index, List[Axis]]]:
def _list_to_arrays(data: List[Union[Tuple, List]]) -> np.ndarray:
# Returned np.ndarray has ndim = 2
# Note: we already check len(data) > 0 before getting hre
if isinstance(data[0], tuple):
content = lib.to_object_array_tuples(data)
else:
# list of lists
content = lib.to_object_array(data)
return content, columns
return content


def _list_of_series_to_arrays(
data: List,
columns: Union[Index, List],
) -> Tuple[List[Scalar], Union[Index, List[Axis]]]:
columns: Optional[Index],
) -> Tuple[np.ndarray, Index]:
# returned np.ndarray has ndim == 2

if columns is None:
# We know pass_data is non-empty because data[0] is a Series
pass_data = [x for x in data if isinstance(x, (ABCSeries, ABCDataFrame))]
columns = get_objs_combined_axis(pass_data, sort=False)

indexer_cache: Dict[int, Scalar] = {}
indexer_cache: Dict[int, np.ndarray] = {}

aligned_values = []
for s in data:
Expand All @@ -669,8 +668,8 @@ def _list_of_series_to_arrays(

def _list_of_dict_to_arrays(
data: List[Dict],
columns: Union[Index, List],
) -> Tuple[List[Scalar], Union[Index, List[Axis]]]:
columns: Optional[Index],
) -> Tuple[np.ndarray, Index]:
"""
Convert list of dicts to numpy arrays

Expand All @@ -687,13 +686,14 @@ def _list_of_dict_to_arrays(

Returns
-------
tuple
arrays, columns
content : np.ndarray[object, ndim=2]
columns : Index
"""
if columns is None:
gen = (list(x.keys()) for x in data)
sort = not any(isinstance(d, dict) for d in data)
columns = lib.fast_unique_multiple_list_gen(gen, sort=sort)
columns = ensure_index(columns)

# assure that they are of the base dict class and not of derived
# classes
Expand All @@ -704,10 +704,10 @@ def _list_of_dict_to_arrays(


def _finalize_columns_and_data(
content: np.ndarray,
columns: Optional[Union[Index, List]],
content: np.ndarray, # ndim == 2
columns: Optional[Index],
dtype: Optional[DtypeObj],
) -> Tuple[List[np.ndarray], Union[Index, List[Axis]]]:
) -> Tuple[List[np.ndarray], Index]:
"""
Ensure we have valid columns, cast object dtypes if possible.
"""
Expand All @@ -725,21 +725,21 @@ def _finalize_columns_and_data(


def _validate_or_indexify_columns(
content: List, columns: Optional[Union[Index, List]]
) -> Union[Index, List[Axis]]:
content: List[np.ndarray], columns: Optional[Index]
) -> Index:
"""
If columns is None, make numbers as column names; Otherwise, validate that
columns have valid length.

Parameters
----------
content: list of data
columns: Iterable or None
content : list of np.ndarrays
columns : Index or None

Returns
-------
columns: If columns is Iterable, return as is; If columns is None, assign
positional column index value as columns.
Index
If columns is None, assign positional column index value as columns.

Raises
------
Expand Down Expand Up @@ -783,19 +783,19 @@ def _validate_or_indexify_columns(


def _convert_object_array(
content: List[Scalar], dtype: Optional[DtypeObj] = None
) -> List[Scalar]:
content: List[np.ndarray], dtype: Optional[DtypeObj]
) -> List[ArrayLike]:
"""
Internal function to convert object array.

Parameters
----------
content: list of processed data records
dtype: np.dtype, default is None
content: List[np.ndarray]
dtype: np.dtype or ExtensionDtype

Returns
-------
arrays: casted content if not object dtype, otherwise return as is in list.
List[ArrayLike]
"""
# provide soft conversion of object dtypes
def convert(arr):
Expand Down