From eaee55fe2f1c4e15837a367cad520e57780e25f7 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 25 Feb 2021 12:13:32 -0800 Subject: [PATCH] TYP: incorrect annotations in internals.construction --- pandas/core/internals/construction.py | 66 +++++++++++++-------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 9903dab9976c4..3290d4b38124a 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -22,10 +22,9 @@ from pandas._libs import lib from pandas._typing import ( - Axis, + ArrayLike, DtypeObj, Manager, - Scalar, ) from pandas.core.dtypes.cast import ( @@ -336,7 +335,7 @@ def nested_data_to_arrays( # By the time we get here we have already checked treat_as_nested(data) if is_named_tuple(data[0]) and columns is None: - columns = data[0]._fields + columns = ensure_index(data[0]._fields) arrays, columns = to_arrays(data, columns, dtype=dtype) columns = ensure_index(columns) @@ -574,7 +573,7 @@ def dataclasses_to_dicts(data): # Conversion of Inputs to Arrays -def to_arrays(data, columns, dtype: Optional[DtypeObj] = None): +def to_arrays(data, columns: Optional[Index], dtype: Optional[DtypeObj] = None): """ Return list of arrays, columns. """ @@ -605,12 +604,12 @@ def to_arrays(data, columns, dtype: Optional[DtypeObj] = None): elif isinstance(data, np.ndarray) and data.dtype.names is not None: # e.g. recarray - columns = list(data.dtype.names) + columns = Index(list(data.dtype.names)) arrays = [data[k] for k in columns] return arrays, columns if isinstance(data[0], (list, tuple)): - content, columns = _list_to_arrays(data, columns) + content = _list_to_arrays(data) elif isinstance(data[0], abc.Mapping): content, columns = _list_of_dict_to_arrays(data, columns) elif isinstance(data[0], ABCSeries): @@ -618,35 +617,35 @@ def to_arrays(data, columns, dtype: Optional[DtypeObj] = None): else: # last ditch effort data = [tuple(x) for x in data] - content, columns = _list_to_arrays(data, columns) + content = _list_to_arrays(data) content, columns = _finalize_columns_and_data(content, columns, dtype) return content, columns -def _list_to_arrays( - data: List[Scalar], - columns: Union[Index, List], -) -> Tuple[List[Scalar], Union[Index, List[Axis]]]: +def _list_to_arrays(data: List[Union[Tuple, List]]) -> np.ndarray: + # Returned np.ndarray has ndim = 2 # Note: we already check len(data) > 0 before getting hre if isinstance(data[0], tuple): content = lib.to_object_array_tuples(data) else: # list of lists content = lib.to_object_array(data) - return content, columns + return content def _list_of_series_to_arrays( data: List, - columns: Union[Index, List], -) -> Tuple[List[Scalar], Union[Index, List[Axis]]]: + columns: Optional[Index], +) -> Tuple[np.ndarray, Index]: + # returned np.ndarray has ndim == 2 + if columns is None: # We know pass_data is non-empty because data[0] is a Series pass_data = [x for x in data if isinstance(x, (ABCSeries, ABCDataFrame))] columns = get_objs_combined_axis(pass_data, sort=False) - indexer_cache: Dict[int, Scalar] = {} + indexer_cache: Dict[int, np.ndarray] = {} aligned_values = [] for s in data: @@ -669,8 +668,8 @@ def _list_of_series_to_arrays( def _list_of_dict_to_arrays( data: List[Dict], - columns: Union[Index, List], -) -> Tuple[List[Scalar], Union[Index, List[Axis]]]: + columns: Optional[Index], +) -> Tuple[np.ndarray, Index]: """ Convert list of dicts to numpy arrays @@ -687,13 +686,14 @@ def _list_of_dict_to_arrays( Returns ------- - tuple - arrays, columns + content : np.ndarray[object, ndim=2] + columns : Index """ if columns is None: gen = (list(x.keys()) for x in data) sort = not any(isinstance(d, dict) for d in data) columns = lib.fast_unique_multiple_list_gen(gen, sort=sort) + columns = ensure_index(columns) # assure that they are of the base dict class and not of derived # classes @@ -704,10 +704,10 @@ def _list_of_dict_to_arrays( def _finalize_columns_and_data( - content: np.ndarray, - columns: Optional[Union[Index, List]], + content: np.ndarray, # ndim == 2 + columns: Optional[Index], dtype: Optional[DtypeObj], -) -> Tuple[List[np.ndarray], Union[Index, List[Axis]]]: +) -> Tuple[List[np.ndarray], Index]: """ Ensure we have valid columns, cast object dtypes if possible. """ @@ -725,21 +725,21 @@ def _finalize_columns_and_data( def _validate_or_indexify_columns( - content: List, columns: Optional[Union[Index, List]] -) -> Union[Index, List[Axis]]: + content: List[np.ndarray], columns: Optional[Index] +) -> Index: """ If columns is None, make numbers as column names; Otherwise, validate that columns have valid length. Parameters ---------- - content: list of data - columns: Iterable or None + content : list of np.ndarrays + columns : Index or None Returns ------- - columns: If columns is Iterable, return as is; If columns is None, assign - positional column index value as columns. + Index + If columns is None, assign positional column index value as columns. Raises ------ @@ -783,19 +783,19 @@ def _validate_or_indexify_columns( def _convert_object_array( - content: List[Scalar], dtype: Optional[DtypeObj] = None -) -> List[Scalar]: + content: List[np.ndarray], dtype: Optional[DtypeObj] +) -> List[ArrayLike]: """ Internal function to convert object array. Parameters ---------- - content: list of processed data records - dtype: np.dtype, default is None + content: List[np.ndarray] + dtype: np.dtype or ExtensionDtype Returns ------- - arrays: casted content if not object dtype, otherwise return as is in list. + List[ArrayLike] """ # provide soft conversion of object dtypes def convert(arr):