Skip to content

Commit a1eae0e

Browse files
authored
BUG: Fix mixed datetime dtype inference (#33749)
1 parent f7681d3 commit a1eae0e

File tree

5 files changed

+58
-5
lines changed

5 files changed

+58
-5
lines changed

doc/source/whatsnew/v1.1.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,7 @@ Datetimelike
759759
- Bug in :meth:`DatetimeIndex.to_period` not infering the frequency when called with no arguments (:issue:`33358`)
760760
- Bug in :meth:`DatetimeIndex.tz_localize` incorrectly retaining ``freq`` in some cases where the original freq is no longer valid (:issue:`30511`)
761761
- Bug in :meth:`DatetimeIndex.intersection` losing ``freq`` and timezone in some cases (:issue:`33604`)
762+
- Bug in :meth:`DatetimeIndex.get_indexer` where incorrect output would be returned for mixed datetime-like targets (:issue:`33741`)
762763
- Bug in :class:`DatetimeIndex` addition and subtraction with some types of :class:`DateOffset` objects incorrectly retaining an invalid ``freq`` attribute (:issue:`33779`)
763764
- Bug in :class:`DatetimeIndex` where setting the ``freq`` attribute on an index could silently change the ``freq`` attribute on another index viewing the same data (:issue:`33552`)
764765
- :meth:`DataFrame.min`/:meth:`DataFrame.max` not returning consistent result with :meth:`Series.min`/:meth:`Series.max` when called on objects initialized with empty :func:`pd.to_datetime`

pandas/_libs/lib.pyx

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1380,8 +1380,10 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
13801380
return "mixed-integer"
13811381

13821382
elif PyDateTime_Check(val):
1383-
if is_datetime_array(values):
1383+
if is_datetime_array(values, skipna=skipna):
13841384
return "datetime"
1385+
elif is_date_array(values, skipna=skipna):
1386+
return "date"
13851387

13861388
elif PyDate_Check(val):
13871389
if is_date_array(values, skipna=skipna):
@@ -1752,10 +1754,10 @@ cdef class DatetimeValidator(TemporalValidator):
17521754
return is_null_datetime64(value)
17531755

17541756

1755-
cpdef bint is_datetime_array(ndarray values):
1757+
cpdef bint is_datetime_array(ndarray values, bint skipna=True):
17561758
cdef:
17571759
DatetimeValidator validator = DatetimeValidator(len(values),
1758-
skipna=True)
1760+
skipna=skipna)
17591761
return validator.validate(values)
17601762

17611763

pandas/core/indexes/base.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4701,7 +4701,10 @@ def _maybe_promote(self, other: "Index"):
47014701
"""
47024702

47034703
if self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex):
4704-
return type(other)(self), other
4704+
try:
4705+
return type(other)(self), other
4706+
except OutOfBoundsDatetime:
4707+
return self, other
47054708
elif self.inferred_type == "timedelta" and isinstance(other, ABCTimedeltaIndex):
47064709
# TODO: we dont have tests that get here
47074710
return type(other)(self), other

pandas/tests/dtypes/test_inference.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1106,6 +1106,21 @@ def test_date(self):
11061106
result = lib.infer_dtype(dates, skipna=True)
11071107
assert result == "date"
11081108

1109+
@pytest.mark.parametrize(
1110+
"values",
1111+
[
1112+
[date(2020, 1, 1), pd.Timestamp("2020-01-01")],
1113+
[pd.Timestamp("2020-01-01"), date(2020, 1, 1)],
1114+
[date(2020, 1, 1), pd.NaT],
1115+
[pd.NaT, date(2020, 1, 1)],
1116+
],
1117+
)
1118+
@pytest.mark.parametrize("skipna", [True, False])
1119+
def test_infer_dtype_date_order_invariant(self, values, skipna):
1120+
# https://github.com/pandas-dev/pandas/issues/33741
1121+
result = lib.infer_dtype(values, skipna=skipna)
1122+
assert result == "date"
1123+
11091124
def test_is_numeric_array(self):
11101125

11111126
assert lib.is_float_array(np.array([1, 2.0]))

pandas/tests/indexes/datetimes/test_indexing.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from datetime import datetime, time, timedelta
1+
from datetime import date, datetime, time, timedelta
22

33
import numpy as np
44
import pytest
@@ -575,6 +575,38 @@ def test_get_indexer(self):
575575
with pytest.raises(ValueError, match="abbreviation w/o a number"):
576576
idx.get_indexer(idx[[0]], method="nearest", tolerance="foo")
577577

578+
@pytest.mark.parametrize(
579+
"target",
580+
[
581+
[date(2020, 1, 1), pd.Timestamp("2020-01-02")],
582+
[pd.Timestamp("2020-01-01"), date(2020, 1, 2)],
583+
],
584+
)
585+
def test_get_indexer_mixed_dtypes(self, target):
586+
# https://github.com/pandas-dev/pandas/issues/33741
587+
values = pd.DatetimeIndex(
588+
[pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02")]
589+
)
590+
result = values.get_indexer(target)
591+
expected = np.array([0, 1], dtype=np.intp)
592+
tm.assert_numpy_array_equal(result, expected)
593+
594+
@pytest.mark.parametrize(
595+
"target, positions",
596+
[
597+
([date(9999, 1, 1), pd.Timestamp("2020-01-01")], [-1, 0]),
598+
([pd.Timestamp("2020-01-01"), date(9999, 1, 1)], [0, -1]),
599+
([date(9999, 1, 1), date(9999, 1, 1)], [-1, -1]),
600+
],
601+
)
602+
def test_get_indexer_out_of_bounds_date(self, target, positions):
603+
values = pd.DatetimeIndex(
604+
[pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02")]
605+
)
606+
result = values.get_indexer(target)
607+
expected = np.array(positions, dtype=np.intp)
608+
tm.assert_numpy_array_equal(result, expected)
609+
578610

579611
class TestMaybeCastSliceBound:
580612
def test_maybe_cast_slice_bounds_empty(self):

0 commit comments

Comments
 (0)