22
22
23
23
from pandas ._libs import lib
24
24
from pandas ._typing import (
25
- Axis ,
25
+ ArrayLike ,
26
26
DtypeObj ,
27
27
Manager ,
28
- Scalar ,
29
28
)
30
29
31
30
from pandas .core .dtypes .cast import (
@@ -339,7 +338,7 @@ def nested_data_to_arrays(
339
338
# By the time we get here we have already checked treat_as_nested(data)
340
339
341
340
if is_named_tuple (data [0 ]) and columns is None :
342
- columns = data [0 ]._fields
341
+ columns = ensure_index ( data [0 ]._fields )
343
342
344
343
arrays , columns = to_arrays (data , columns , dtype = dtype )
345
344
columns = ensure_index (columns )
@@ -577,7 +576,7 @@ def dataclasses_to_dicts(data):
577
576
# Conversion of Inputs to Arrays
578
577
579
578
580
- def to_arrays (data , columns , dtype : Optional [DtypeObj ] = None ):
579
+ def to_arrays (data , columns : Optional [ Index ] , dtype : Optional [DtypeObj ] = None ):
581
580
"""
582
581
Return list of arrays, columns.
583
582
"""
@@ -608,48 +607,48 @@ def to_arrays(data, columns, dtype: Optional[DtypeObj] = None):
608
607
609
608
elif isinstance (data , np .ndarray ) and data .dtype .names is not None :
610
609
# e.g. recarray
611
- columns = list (data .dtype .names )
610
+ columns = Index ( list (data .dtype .names ) )
612
611
arrays = [data [k ] for k in columns ]
613
612
return arrays , columns
614
613
615
614
if isinstance (data [0 ], (list , tuple )):
616
- content , columns = _list_to_arrays (data , columns )
615
+ content = _list_to_arrays (data )
617
616
elif isinstance (data [0 ], abc .Mapping ):
618
617
content , columns = _list_of_dict_to_arrays (data , columns )
619
618
elif isinstance (data [0 ], ABCSeries ):
620
619
content , columns = _list_of_series_to_arrays (data , columns )
621
620
else :
622
621
# last ditch effort
623
622
data = [tuple (x ) for x in data ]
624
- content , columns = _list_to_arrays (data , columns )
623
+ content = _list_to_arrays (data )
625
624
626
625
content , columns = _finalize_columns_and_data (content , columns , dtype )
627
626
return content , columns
628
627
629
628
630
- def _list_to_arrays (
631
- data : List [Scalar ],
632
- columns : Union [Index , List ],
633
- ) -> Tuple [List [Scalar ], Union [Index , List [Axis ]]]:
629
+ def _list_to_arrays (data : List [Union [Tuple , List ]]) -> np .ndarray :
630
+ # Returned np.ndarray has ndim = 2
634
631
# Note: we already check len(data) > 0 before getting hre
635
632
if isinstance (data [0 ], tuple ):
636
633
content = lib .to_object_array_tuples (data )
637
634
else :
638
635
# list of lists
639
636
content = lib .to_object_array (data )
640
- return content , columns
637
+ return content
641
638
642
639
643
640
def _list_of_series_to_arrays (
644
641
data : List ,
645
- columns : Union [Index , List ],
646
- ) -> Tuple [List [Scalar ], Union [Index , List [Axis ]]]:
642
+ columns : Optional [Index ],
643
+ ) -> Tuple [np .ndarray , Index ]:
644
+ # returned np.ndarray has ndim == 2
645
+
647
646
if columns is None :
648
647
# We know pass_data is non-empty because data[0] is a Series
649
648
pass_data = [x for x in data if isinstance (x , (ABCSeries , ABCDataFrame ))]
650
649
columns = get_objs_combined_axis (pass_data , sort = False )
651
650
652
- indexer_cache : Dict [int , Scalar ] = {}
651
+ indexer_cache : Dict [int , np . ndarray ] = {}
653
652
654
653
aligned_values = []
655
654
for s in data :
@@ -672,8 +671,8 @@ def _list_of_series_to_arrays(
672
671
673
672
def _list_of_dict_to_arrays (
674
673
data : List [Dict ],
675
- columns : Union [Index , List ],
676
- ) -> Tuple [List [ Scalar ], Union [ Index , List [ Axis ]] ]:
674
+ columns : Optional [Index ],
675
+ ) -> Tuple [np . ndarray , Index ]:
677
676
"""
678
677
Convert list of dicts to numpy arrays
679
678
@@ -690,13 +689,14 @@ def _list_of_dict_to_arrays(
690
689
691
690
Returns
692
691
-------
693
- tuple
694
- arrays, columns
692
+ content : np.ndarray[object, ndim=2]
693
+ columns : Index
695
694
"""
696
695
if columns is None :
697
696
gen = (list (x .keys ()) for x in data )
698
697
sort = not any (isinstance (d , dict ) for d in data )
699
698
columns = lib .fast_unique_multiple_list_gen (gen , sort = sort )
699
+ columns = ensure_index (columns )
700
700
701
701
# assure that they are of the base dict class and not of derived
702
702
# classes
@@ -707,10 +707,10 @@ def _list_of_dict_to_arrays(
707
707
708
708
709
709
def _finalize_columns_and_data (
710
- content : np .ndarray ,
711
- columns : Optional [Union [ Index , List ] ],
710
+ content : np .ndarray , # ndim == 2
711
+ columns : Optional [Index ],
712
712
dtype : Optional [DtypeObj ],
713
- ) -> Tuple [List [np .ndarray ], Union [ Index , List [ Axis ]] ]:
713
+ ) -> Tuple [List [np .ndarray ], Index ]:
714
714
"""
715
715
Ensure we have valid columns, cast object dtypes if possible.
716
716
"""
@@ -728,21 +728,21 @@ def _finalize_columns_and_data(
728
728
729
729
730
730
def _validate_or_indexify_columns (
731
- content : List , columns : Optional [Union [ Index , List ] ]
732
- ) -> Union [ Index , List [ Axis ]] :
731
+ content : List [ np . ndarray ] , columns : Optional [Index ]
732
+ ) -> Index :
733
733
"""
734
734
If columns is None, make numbers as column names; Otherwise, validate that
735
735
columns have valid length.
736
736
737
737
Parameters
738
738
----------
739
- content: list of data
740
- columns: Iterable or None
739
+ content : list of np.ndarrays
740
+ columns : Index or None
741
741
742
742
Returns
743
743
-------
744
- columns: If columns is Iterable, return as is; If columns is None, assign
745
- positional column index value as columns.
744
+ Index
745
+ If columns is None, assign positional column index value as columns.
746
746
747
747
Raises
748
748
------
@@ -786,19 +786,19 @@ def _validate_or_indexify_columns(
786
786
787
787
788
788
def _convert_object_array (
789
- content : List [Scalar ], dtype : Optional [DtypeObj ] = None
790
- ) -> List [Scalar ]:
789
+ content : List [np . ndarray ], dtype : Optional [DtypeObj ]
790
+ ) -> List [ArrayLike ]:
791
791
"""
792
792
Internal function to convert object array.
793
793
794
794
Parameters
795
795
----------
796
- content: list of processed data records
797
- dtype: np.dtype, default is None
796
+ content: List[np.ndarray]
797
+ dtype: np.dtype or ExtensionDtype
798
798
799
799
Returns
800
800
-------
801
- arrays: casted content if not object dtype, otherwise return as is in list.
801
+ List[ArrayLike]
802
802
"""
803
803
# provide soft conversion of object dtypes
804
804
def convert (arr ):
0 commit comments