diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 605e2135edc9f..337e131f0a2c9 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -376,7 +376,8 @@ cpdef array_to_datetime( bint dayfirst=False, bint yearfirst=False, bint utc=False, - bint require_iso8601=False + bint require_iso8601=False, + bint allow_mixed=False, ): """ Converts a 1D array of date-like values to a numpy array of either: @@ -405,6 +406,8 @@ cpdef array_to_datetime( indicator whether the dates should be UTC require_iso8601 : bool, default False indicator whether the datetime string should be iso8601 + allow_mixed : bool, default False + Whether to allow mixed datetimes and integers. Returns ------- @@ -597,7 +600,7 @@ cpdef array_to_datetime( return ignore_errors_out_of_bounds_fallback(values), tz_out except TypeError: - return array_to_datetime_object(values, errors, dayfirst, yearfirst) + return _array_to_datetime_object(values, errors, dayfirst, yearfirst) if seen_datetime and seen_integer: # we have mixed datetimes & integers @@ -609,10 +612,12 @@ cpdef array_to_datetime( val = values[i] if is_integer_object(val) or is_float_object(val): result[i] = NPY_NAT + elif allow_mixed: + pass elif is_raise: raise ValueError("mixed datetimes and integers in passed array") else: - return array_to_datetime_object(values, errors, dayfirst, yearfirst) + return _array_to_datetime_object(values, errors, dayfirst, yearfirst) if seen_datetime_offset and not utc_convert: # GH#17697 @@ -623,7 +628,7 @@ cpdef array_to_datetime( # (with individual dateutil.tzoffsets) are returned is_same_offsets = len(out_tzoffset_vals) == 1 if not is_same_offsets: - return array_to_datetime_object(values, errors, dayfirst, yearfirst) + return _array_to_datetime_object(values, errors, dayfirst, yearfirst) else: tz_offset = out_tzoffset_vals.pop() tz_out = pytz.FixedOffset(tz_offset / 60.) @@ -670,7 +675,7 @@ cdef ignore_errors_out_of_bounds_fallback(ndarray[object] values): @cython.wraparound(False) @cython.boundscheck(False) -cdef array_to_datetime_object( +cdef _array_to_datetime_object( ndarray[object] values, str errors, bint dayfirst=False, diff --git a/pandas/core/construction.py b/pandas/core/construction.py index f5f49e0e5fc20..d0fe5b5ab0c19 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -669,6 +669,8 @@ def _try_cast( subarr = arr else: subarr = maybe_cast_to_datetime(arr, dtype) + if dtype is not None and dtype.kind == "M": + return subarr if not isinstance(subarr, ABCExtensionArray): subarr = construct_1d_ndarray_preserving_na(subarr, dtype, copy=copy) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 0917cf1787d5b..a6b4befb8ea5c 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1583,10 +1583,24 @@ def maybe_cast_to_datetime( value = to_timedelta(value, errors="raise")._values except OutOfBoundsDatetime: raise - except ValueError: + except ValueError as err: # TODO(GH#40048): only catch dateutil's ParserError # once we can reliably import it in all supported versions - pass + if "mixed datetimes and integers in passed array" in str(err): + # We need to catch this in array_to_datetime, otherwise + # we end up going through numpy which will lose nanoseconds + # from Timestamps + try: + i8vals, tz = tslib.array_to_datetime( + value, allow_mixed=True + ) + except ValueError: + pass + else: + from pandas.core.arrays import DatetimeArray + + dta = DatetimeArray(i8vals).tz_localize(tz) + value = dta # coerce datetimelike to object elif is_datetime64_dtype( diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 20536c7a94695..fa6c9a7a5b7b7 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -232,15 +232,26 @@ def ndarray_to_mgr(values, index, columns, dtype: Optional[DtypeObj], copy: bool values = _prep_ndarray(values, copy=copy) if dtype is not None and not is_dtype_equal(values.dtype, dtype): - try: - values = construct_1d_ndarray_preserving_na( - values.ravel(), dtype=dtype, copy=False - ).reshape(values.shape) - except Exception as orig: - # e.g. ValueError when trying to cast object dtype to float64 - raise ValueError( - f"failed to cast to '{dtype}' (Exception was: {orig})" - ) from orig + shape = values.shape + flat = values.ravel() + + if not is_integer_dtype(dtype): + # TODO: skipping integer_dtype is needed to keep the tests passing, + # not clear it is correct + # Note: we really only need _try_cast, but keeping to exposed funcs + values = sanitize_array( + flat, None, dtype=dtype, copy=copy, raise_cast_failure=True + ) + else: + try: + values = construct_1d_ndarray_preserving_na( + flat, dtype=dtype, copy=False + ) + except Exception as err: + # e.g. ValueError when trying to cast object dtype to float64 + msg = f"failed to cast to '{dtype}' (Exception was: {err})" + raise ValueError(msg) from err + values = values.reshape(shape) # _prep_ndarray ensures that values.ndim == 2 at this point index, columns = _get_axes( diff --git a/pandas/tests/base/test_constructors.py b/pandas/tests/base/test_constructors.py index b042e29986c80..ceb882ff9c963 100644 --- a/pandas/tests/base/test_constructors.py +++ b/pandas/tests/base/test_constructors.py @@ -124,9 +124,7 @@ class TestConstruction: [ Series, lambda x, **kwargs: DataFrame({"a": x}, **kwargs)["a"], - pytest.param( - lambda x, **kwargs: DataFrame(x, **kwargs)[0], marks=pytest.mark.xfail - ), + lambda x, **kwargs: DataFrame(x, **kwargs)[0], Index, ], ) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index afc7ccb516c7f..5fb805ecd77f6 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1100,7 +1100,8 @@ def test_constructor_more(self, float_frame): # can't cast mat = np.array(["foo", "bar"], dtype=object).reshape(2, 1) - with pytest.raises(ValueError, match="cast"): + msg = "could not convert string to float: 'foo'" + with pytest.raises(ValueError, match=msg): DataFrame(mat, index=[0, 1], columns=[0], dtype=float) dm = DataFrame(DataFrame(float_frame._series)) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index c2d0bf5975059..63c9b4d899622 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -760,6 +760,14 @@ def test_constructor_datelike_coercion(self): result = df.loc["216"] assert result.dtype == object + def test_constructor_mixed_int_and_timestamp(self, frame_or_series): + # specifically Timestamp with nanos, not datetimes + objs = [Timestamp(9), 10, NaT.value] + result = frame_or_series(objs, dtype="M8[ns]") + + expected = frame_or_series([Timestamp(9), Timestamp(10), NaT]) + tm.assert_equal(result, expected) + def test_constructor_datetimes_with_nulls(self): # gh-15869 for arr in [