Skip to content

Commit 2548837

Browse files
authored
REF: reorder_arrays handle missing (#43104)
1 parent 8f150e5 commit 2548837

File tree

2 files changed

+44
-10
lines changed

2 files changed

+44
-10
lines changed

pandas/core/frame.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2019,6 +2019,8 @@ def from_records(
20192019
2 1 c
20202020
3 0 d
20212021
"""
2022+
result_index = None
2023+
20222024
# Make a copy of the input columns so we can modify it
20232025
if columns is not None:
20242026
columns = ensure_index(columns)
@@ -2060,8 +2062,21 @@ def from_records(
20602062
arr_columns_list.append(k)
20612063
arrays.append(v)
20622064

2065+
if len(arrays):
2066+
length = len(arrays[0])
2067+
elif index is not None:
2068+
length = len(index)
2069+
else:
2070+
length = 0
2071+
20632072
arr_columns = Index(arr_columns_list)
2064-
arrays, arr_columns = reorder_arrays(arrays, arr_columns, columns)
2073+
if len(arrays) == 0 and index is None and length == 0:
2074+
# for backward compat use an object Index instead of RangeIndex
2075+
result_index = Index([])
2076+
2077+
arrays, arr_columns = reorder_arrays(
2078+
arrays, arr_columns, columns, length
2079+
)
20652080

20662081
elif isinstance(data, (np.ndarray, DataFrame)):
20672082
arrays, columns = to_arrays(data, columns)
@@ -2088,7 +2103,6 @@ def from_records(
20882103
else:
20892104
exclude = set(exclude)
20902105

2091-
result_index = None
20922106
if index is not None:
20932107
if isinstance(index, str) or not hasattr(index, "__iter__"):
20942108
i = columns.get_loc(index)

pandas/core/internals/construction.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -180,14 +180,14 @@ def rec_array_to_mgr(
180180
# create the manager
181181

182182
# error: Argument 1 to "reorder_arrays" has incompatible type "List[ndarray]";
183-
# expected "List[ExtensionArray]"
183+
# expected "List[Union[ExtensionArray, ndarray]]"
184184
arrays, arr_columns = reorder_arrays(
185-
new_arrays, arr_columns, columns # type: ignore[arg-type]
185+
new_arrays, arr_columns, columns, len(index) # type: ignore[arg-type]
186186
)
187187
if columns is None:
188188
columns = arr_columns
189189

190-
mgr = arrays_to_mgr(arrays, arr_columns, index, columns, dtype=dtype, typ=typ)
190+
mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype, typ=typ)
191191

192192
if copy:
193193
mgr = mgr.copy()
@@ -655,13 +655,33 @@ def _extract_index(data) -> Index:
655655

656656

657657
def reorder_arrays(
658-
arrays: list[ArrayLike], arr_columns: Index, columns: Index | None
658+
arrays: list[ArrayLike], arr_columns: Index, columns: Index | None, length: int
659659
) -> tuple[list[ArrayLike], Index]:
660+
"""
661+
Pre-emptively (cheaply) reindex arrays with new columns.
662+
"""
660663
# reorder according to the columns
661-
if columns is not None and len(columns) and len(arr_columns):
662-
indexer = ensure_index(arr_columns).get_indexer(columns)
663-
arr_columns = ensure_index([arr_columns[i] for i in indexer])
664-
arrays = [arrays[i] for i in indexer]
664+
if columns is not None:
665+
if not columns.equals(arr_columns):
666+
# if they are equal, there is nothing to do
667+
new_arrays: list[ArrayLike | None]
668+
new_arrays = [None] * len(columns)
669+
indexer = arr_columns.get_indexer(columns)
670+
for i, k in enumerate(indexer):
671+
if k == -1:
672+
# by convention default is all-NaN object dtype
673+
arr = np.empty(length, dtype=object)
674+
arr.fill(np.nan)
675+
else:
676+
arr = arrays[k]
677+
new_arrays[i] = arr
678+
679+
# Incompatible types in assignment (expression has type
680+
# "List[Union[ExtensionArray, ndarray[Any, Any], None]]", variable
681+
# has type "List[Union[ExtensionArray, ndarray[Any, Any]]]")
682+
arrays = new_arrays # type: ignore[assignment]
683+
arr_columns = columns
684+
665685
return arrays, arr_columns
666686

667687

0 commit comments

Comments
 (0)