Skip to content

Commit 19c7f75

Browse files
fix creation empty + turn into method
1 parent ee495e5 commit 19c7f75

File tree

1 file changed

+38
-39
lines changed

1 file changed

+38
-39
lines changed

pandas/core/dtypes/concat.py

+38-39
Original file line numberDiff line numberDiff line change
@@ -45,39 +45,40 @@ def __init__(self, n: int):
4545
def shape(self):
4646
return (self.n,)
4747

48+
def to_array(self, dtype: DtypeObj, fill_value=lib.no_default) -> ArrayLike:
49+
"""
50+
Helper function to create the actual all-NA array from the NullArrayProxy
51+
object.
52+
53+
Parameters
54+
----------
55+
arr : NullArrayProxy
56+
dtype : the dtype for the resulting array
57+
fill_value : scalar NA-like value
58+
By default uses the ExtensionDtype's na_value or np.nan. For numpy
59+
arrays, this can be overridden to be something else (eg None).
60+
61+
Returns
62+
-------
63+
np.ndarray or ExtensionArray
64+
"""
65+
if is_extension_array_dtype(dtype):
66+
empty = dtype.construct_array_type()._from_sequence([], dtype=dtype)
67+
indexer = -np.ones(self.n, dtype=np.intp)
68+
return empty.take(indexer, allow_fill=True)
69+
else:
70+
# when introducing missing values, int becomes float, bool becomes object
71+
if is_integer_dtype(dtype):
72+
dtype = np.dtype("float64")
73+
elif is_bool_dtype(dtype):
74+
dtype = np.dtype(object)
4875

49-
def _array_from_proxy(arr, dtype: DtypeObj, fill_value=lib.no_default):
50-
"""
51-
Helper function to create the actual all-NA array from the NullArrayProxy object.
52-
53-
Parameters
54-
----------
55-
arr : NullArrayProxy
56-
dtype : the dtype for the resulting array
57-
fill_value : scalar NA-like value
58-
By default uses the ExtensionDtype's na_value or np.nan. For numpy
59-
arrays, this can be overridden to be something else (eg None).
60-
61-
Returns
62-
-------
63-
np.ndarray or ExtensionArray
64-
"""
65-
if is_extension_array_dtype(dtype):
66-
return dtype.construct_array_type()._from_sequence(
67-
[dtype.na_value] * arr.n, dtype=dtype
68-
)
69-
else:
70-
if is_integer_dtype(dtype):
71-
dtype = np.dtype("float64")
72-
elif is_bool_dtype(dtype):
73-
dtype = np.dtype(object)
74-
75-
if fill_value is lib.no_default:
76-
fill_value = na_value_for_dtype(dtype)
76+
if fill_value is lib.no_default:
77+
fill_value = na_value_for_dtype(dtype)
7778

78-
arr = np.empty(arr.n, dtype=dtype)
79-
arr.fill(fill_value)
80-
return ensure_wrapped_if_datetimelike(arr)
79+
arr = np.empty(self.n, dtype=dtype)
80+
arr.fill(fill_value)
81+
return ensure_wrapped_if_datetimelike(arr)
8182

8283

8384
def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
@@ -86,7 +87,7 @@ def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
8687
cases.
8788
"""
8889
if isinstance(arr, NullArrayProxy):
89-
return _array_from_proxy(arr, dtype)
90+
return arr.to_array(dtype)
9091

9192
if (
9293
is_categorical_dtype(arr.dtype)
@@ -194,7 +195,7 @@ def is_nonempty(x) -> bool:
194195
return np.concatenate(to_concat, axis=axis)
195196

196197

197-
def concat_arrays(to_concat):
198+
def concat_arrays(to_concat) -> ArrayLike:
198199
"""
199200
Alternative for concat_compat but specialized for use in the ArrayManager.
200201
@@ -225,9 +226,7 @@ def concat_arrays(to_concat):
225226
else:
226227
target_dtype = to_concat_no_proxy[0].dtype
227228
to_concat = [
228-
_array_from_proxy(arr, target_dtype)
229-
if isinstance(arr, NullArrayProxy)
230-
else arr
229+
arr.to_array(target_dtype) if isinstance(arr, NullArrayProxy) else arr
231230
for arr in to_concat
232231
]
233232

@@ -247,7 +246,7 @@ def concat_arrays(to_concat):
247246
else:
248247
target_dtype = to_concat_no_proxy[0].dtype
249248
to_concat = [
250-
_array_from_proxy(arr, target_dtype) if isinstance(arr, NullArrayProxy) else arr
249+
arr.to_array(target_dtype) if isinstance(arr, NullArrayProxy) else arr
251250
for arr in to_concat
252251
]
253252

@@ -462,7 +461,7 @@ def _concat_datetime(to_concat, axis=0):
462461
# ensure_wrapped_if_datetimelike ensures that astype(object) wraps
463462
# in Timestamp/Timedelta
464463
to_concat = [
465-
_array_from_proxy(arr, dtype=object, fill_value=None)
464+
arr.to_array(object, fill_value=None)
466465
if isinstance(arr, NullArrayProxy)
467466
else arr
468467
for arr in to_concat
@@ -475,7 +474,7 @@ def _concat_datetime(to_concat, axis=0):
475474
to_concat = [x.reshape(1, -1) if x.ndim == 1 else x for x in to_concat]
476475
else:
477476
to_concat = [
478-
_array_from_proxy(arr, dtype=to_concat_no_proxy[0].dtype)
477+
arr.to_array(to_concat_no_proxy[0].dtype)
479478
if isinstance(arr, NullArrayProxy)
480479
else arr
481480
for arr in to_concat

0 commit comments

Comments
 (0)