Skip to content

Commit b804372

Browse files
AlexKirkoTomAugspurger
authored andcommitted
BUG: to_datetime with unit with Int64 (#30241)
1 parent 2b1b3da commit b804372

File tree

4 files changed

+68
-9
lines changed

4 files changed

+68
-9
lines changed

doc/source/whatsnew/v1.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -802,6 +802,7 @@ Datetimelike
802802
- Bug in :meth:`DataFrame.append` would remove the timezone-awareness of new data (:issue:`30238`)
803803
- Bug in :meth:`Series.cummin` and :meth:`Series.cummax` with timezone-aware dtype incorrectly dropping its timezone (:issue:`15553`)
804804
- Bug in :class:`DatetimeArray`, :class:`TimedeltaArray`, and :class:`PeriodArray` where inplace addition and subtraction did not actually operate inplace (:issue:`24115`)
805+
- Bug in :func:`pandas.to_datetime` when called with ``Series`` storing ``IntegerArray`` raising ``TypeError`` instead of returning ``Series`` (:issue:`30050`)
805806

806807
Timedelta
807808
^^^^^^^^^

pandas/_libs/tslib.pyx

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -296,17 +296,34 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None,
296296
return result
297297

298298

299-
def array_with_unit_to_datetime(ndarray values, object unit,
299+
def array_with_unit_to_datetime(ndarray values, ndarray mask, object unit,
300300
str errors='coerce'):
301301
"""
302-
convert the ndarray according to the unit
302+
Convert the ndarray to datetime according to the time unit.
303+
304+
This function converts an array of objects into a numpy array of
305+
datetime64[ns]. It returns the converted array
306+
and also returns the timezone offset
307+
303308
if errors:
304309
- raise: return converted values or raise OutOfBoundsDatetime
305310
if out of range on the conversion or
306311
ValueError for other conversions (e.g. a string)
307312
- ignore: return non-convertible values as the same unit
308313
- coerce: NaT for non-convertibles
309314
315+
Parameters
316+
----------
317+
values : ndarray of object
318+
Date-like objects to convert
319+
mask : ndarray of bool
320+
Not-a-time mask for non-nullable integer types conversion,
321+
can be None
322+
unit : object
323+
Time unit to use during conversion
324+
errors : str, default 'raise'
325+
Error behavior when parsing
326+
310327
Returns
311328
-------
312329
result : ndarray of m8 values
@@ -316,7 +333,6 @@ def array_with_unit_to_datetime(ndarray values, object unit,
316333
Py_ssize_t i, j, n=len(values)
317334
int64_t m
318335
ndarray[float64_t] fvalues
319-
ndarray mask
320336
bint is_ignore = errors=='ignore'
321337
bint is_coerce = errors=='coerce'
322338
bint is_raise = errors=='raise'
@@ -329,9 +345,13 @@ def array_with_unit_to_datetime(ndarray values, object unit,
329345

330346
if unit == 'ns':
331347
if issubclass(values.dtype.type, np.integer):
332-
return values.astype('M8[ns]'), tz
333-
# This will return a tz
334-
return array_to_datetime(values.astype(object), errors=errors)
348+
result = values.astype('M8[ns]')
349+
else:
350+
result, tz = array_to_datetime(values.astype(object), errors=errors)
351+
if mask is not None:
352+
iresult = result.view('i8')
353+
iresult[mask] = NPY_NAT
354+
return result, tz
335355

336356
m = cast_from_unit(None, unit)
337357

@@ -343,7 +363,9 @@ def array_with_unit_to_datetime(ndarray values, object unit,
343363
if values.dtype.kind == "i":
344364
# Note: this condition makes the casting="same_kind" redundant
345365
iresult = values.astype('i8', casting='same_kind', copy=False)
346-
mask = iresult == NPY_NAT
366+
# If no mask, fill mask by comparing to NPY_NAT constant
367+
if mask is None:
368+
mask = iresult == NPY_NAT
347369
iresult[mask] = 0
348370
fvalues = iresult.astype('f8') * m
349371
need_to_iterate = False

pandas/core/tools/datetimes.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
)
3939
from pandas.core.dtypes.missing import notna
4040

41+
from pandas.arrays import IntegerArray
4142
from pandas.core import algorithms
4243
from pandas.core.algorithms import unique
4344

@@ -316,8 +317,21 @@ def _convert_listlike_datetimes(
316317
elif unit is not None:
317318
if format is not None:
318319
raise ValueError("cannot specify both format and unit")
319-
arg = getattr(arg, "values", arg)
320-
result, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)
320+
arg = getattr(arg, "_values", arg)
321+
322+
# GH 30050 pass an ndarray to tslib.array_with_unit_to_datetime
323+
# because it expects an ndarray argument
324+
if isinstance(arg, IntegerArray):
325+
# Explicitly pass NaT mask to array_with_unit_to_datetime
326+
mask = arg.isna()
327+
arg = arg._ndarray_values
328+
else:
329+
mask = None
330+
331+
result, tz_parsed = tslib.array_with_unit_to_datetime(
332+
arg, mask, unit, errors=errors
333+
)
334+
321335
if errors == "ignore":
322336
from pandas import Index
323337

pandas/tests/indexes/datetimes/test_tools.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2291,3 +2291,25 @@ def test_should_cache_errors(unique_share, check_count, err_message):
22912291

22922292
with pytest.raises(AssertionError, match=err_message):
22932293
tools.should_cache(arg, unique_share, check_count)
2294+
2295+
2296+
def test_nullable_integer_to_datetime():
2297+
# Test for #30050
2298+
ser = pd.Series([1, 2, None, 2 ** 61, None])
2299+
ser = ser.astype("Int64")
2300+
ser_copy = ser.copy()
2301+
2302+
res = pd.to_datetime(ser, unit="ns")
2303+
2304+
expected = pd.Series(
2305+
[
2306+
np.datetime64("1970-01-01 00:00:00.000000001"),
2307+
np.datetime64("1970-01-01 00:00:00.000000002"),
2308+
np.datetime64("NaT"),
2309+
np.datetime64("2043-01-25 23:56:49.213693952"),
2310+
np.datetime64("NaT"),
2311+
]
2312+
)
2313+
tm.assert_series_equal(res, expected)
2314+
# Check that ser isn't mutated
2315+
tm.assert_series_equal(ser, ser_copy)

0 commit comments

Comments
 (0)