Skip to content

Commit 620506d

Browse files
committed
BUG: move nan-handling to tslib and clean up
1 parent f925f83 commit 620506d

File tree

3 files changed

+43
-21
lines changed

3 files changed

+43
-21
lines changed

pandas/_libs/tslib.pyx

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -296,17 +296,34 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None,
296296
return result
297297

298298

299-
def array_with_unit_to_datetime(ndarray values, object unit,
299+
def array_with_unit_to_datetime(ndarray values, ndarray mask, object unit,
300300
str errors='coerce'):
301301
"""
302-
convert the ndarray according to the unit
302+
Convert the ndarray to datetime according to the time unit.
303+
304+
This function converts an array of objects into a numpy array of
305+
datetime64[ns]. It returns the converted array
306+
and also returns the timezone offset
307+
303308
if errors:
304309
- raise: return converted values or raise OutOfBoundsDatetime
305310
if out of range on the conversion or
306311
ValueError for other conversions (e.g. a string)
307312
- ignore: return non-convertible values as the same unit
308313
- coerce: NaT for non-convertibles
309314
315+
Parameters
316+
----------
317+
values : ndarray of object
318+
Date-like objects to convert
319+
mask : ndarray of bool
320+
Not-a-time mask for non-nullable integer types conversion,
321+
can be None
322+
unit : object
323+
Time unit to use during conversion
324+
errors : str, default 'raise'
325+
Error behavior when parsing
326+
310327
Returns
311328
-------
312329
result : ndarray of m8 values
@@ -316,7 +333,6 @@ def array_with_unit_to_datetime(ndarray values, object unit,
316333
Py_ssize_t i, j, n=len(values)
317334
int64_t m
318335
ndarray[float64_t] fvalues
319-
ndarray mask
320336
bint is_ignore = errors=='ignore'
321337
bint is_coerce = errors=='coerce'
322338
bint is_raise = errors=='raise'
@@ -329,9 +345,13 @@ def array_with_unit_to_datetime(ndarray values, object unit,
329345

330346
if unit == 'ns':
331347
if issubclass(values.dtype.type, np.integer):
332-
return values.astype('M8[ns]'), tz
333-
# This will return a tz
334-
return array_to_datetime(values.astype(object), errors=errors)
348+
result = values.astype('M8[ns]')
349+
else:
350+
result, tz = array_to_datetime(values.astype(object), errors=errors)
351+
if mask is not None:
352+
iresult = result.view('i8')
353+
iresult[mask] = NPY_NAT
354+
return result, tz
335355

336356
m = cast_from_unit(None, unit)
337357

@@ -343,7 +363,9 @@ def array_with_unit_to_datetime(ndarray values, object unit,
343363
if values.dtype.kind == "i":
344364
# Note: this condition makes the casting="same_kind" redundant
345365
iresult = values.astype('i8', casting='same_kind', copy=False)
346-
mask = iresult == NPY_NAT
366+
# If no mask, fill mask by comparing to NPY_NAT constant
367+
if mask is None:
368+
mask = iresult == NPY_NAT
347369
iresult[mask] = 0
348370
fvalues = iresult.astype('f8') * m
349371
need_to_iterate = False

pandas/core/tools/datetimes.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
)
3939
from pandas.core.dtypes.missing import notna
4040

41-
from pandas._typing import ArrayLike
4241
from pandas.arrays import IntegerArray
4342
from pandas.core import algorithms
4443
from pandas.core.algorithms import unique
@@ -319,22 +318,20 @@ def _convert_listlike_datetimes(
319318
if format is not None:
320319
raise ValueError("cannot specify both format and unit")
321320
arg = getattr(arg, "_values", arg)
321+
322322
# GH 30050 pass an ndarray to tslib.array_with_unit_to_datetime
323323
# because it expects an ndarray argument
324324
if isinstance(arg, IntegerArray):
325-
# Send only non-na values to array_with_unit_to_datetime
326-
mask_na = arg.isna()
327-
result_np, tz_parsed = tslib.array_with_unit_to_datetime(
328-
np.compress(np.logical_not(mask_na), arg), unit, errors=errors
329-
)
330-
# Insert na values back in proper positions
331-
ins_index = np.ravel(np.argwhere(mask_na))
332-
ins_index -= range(ins_index.shape[0])
333-
result = np.insert(result_np, ins_index, None)
325+
# Explicitly pass NaT mask to array_with_unit_to_datetime
326+
mask = arg.isna()
327+
arg = arg._ndarray_values
334328
else:
335-
result, tz_parsed = tslib.array_with_unit_to_datetime(
336-
arg, unit, errors=errors
337-
)
329+
mask = None
330+
331+
result, tz_parsed = tslib.array_with_unit_to_datetime(
332+
arg, mask, unit, errors=errors
333+
)
334+
338335
if errors == "ignore":
339336
from pandas import Index
340337

pandas/tests/indexes/datetimes/test_tools.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2293,10 +2293,11 @@ def test_should_cache_errors(unique_share, check_count, err_message):
22932293
tools.should_cache(arg, unique_share, check_count)
22942294

22952295

2296-
def test_intarray_to_datetime():
2296+
def test_nullable_integer_to_datetime():
22972297
# Test for #30050
22982298
ser = pd.Series([1, 2, None, 2 ** 61, None])
22992299
ser = ser.astype("Int64")
2300+
ser_copy = ser.copy()
23002301

23012302
res = pd.to_datetime(ser, unit="ns")
23022303

@@ -2310,3 +2311,5 @@ def test_intarray_to_datetime():
23102311
]
23112312
)
23122313
tm.assert_series_equal(res, expected)
2314+
# Check that ser isn't mutated
2315+
tm.assert_series_equal(ser, ser_copy)

0 commit comments

Comments
 (0)