From 0a252b5b8a9251f6728e7cd238a96cb60ba2d358 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 11 Jun 2022 08:27:34 -0700 Subject: [PATCH] ENH: Timestamp.replace support non-nano --- pandas/_libs/tslibs/conversion.pxd | 3 +- pandas/_libs/tslibs/conversion.pyx | 55 ++++++++++++------ pandas/_libs/tslibs/np_datetime.pxd | 4 ++ pandas/_libs/tslibs/np_datetime.pyx | 58 +++++++++++-------- pandas/_libs/tslibs/timestamps.pyx | 15 ++--- .../tests/scalar/timestamp/test_unary_ops.py | 11 ++++ 6 files changed, 94 insertions(+), 52 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index fb0c7d71ad58f..637a84998751f 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -27,7 +27,8 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, int32_t nanos=*) cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz, - int32_t nanos=*) + int32_t nanos=*, + NPY_DATETIMEUNIT reso=*) cdef int64_t get_datetime64_nanos(object val) except? -1 diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 4e1fcbbcdcc61..6cbc06830471e 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -31,6 +31,7 @@ from cpython.datetime cimport ( import_datetime() from pandas._libs.tslibs.base cimport ABCTimestamp +from pandas._libs.tslibs.dtypes cimport periods_per_second from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, @@ -40,11 +41,14 @@ from pandas._libs.tslibs.np_datetime cimport ( dtstruct_to_dt64, get_datetime64_unit, get_datetime64_value, + get_implementation_bounds, get_unit_from_dtype, npy_datetime, npy_datetimestruct, + npy_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, pydatetime_to_dt64, + pydatetime_to_dtstruct, string_to_dts, ) @@ -307,11 +311,15 @@ cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso): if obj.value != NPY_NAT: # check_overflows needs to run after _localize_tso check_dts_bounds(&obj.dts, reso) - check_overflows(obj) + check_overflows(obj, reso) -cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz, - int32_t nanos=0): +cdef _TSObject convert_datetime_to_tsobject( + datetime ts, + tzinfo tz, + int32_t nanos=0, + NPY_DATETIMEUNIT reso=NPY_FR_ns, +): """ Convert a datetime (or Timestamp) input `ts`, along with optional timezone object `tz` to a _TSObject. @@ -327,6 +335,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz, timezone for the timezone-aware output nanos : int32_t, default is 0 nanoseconds supplement the precision of the datetime input ts + reso : NPY_DATETIMEUNIT, default NPY_FR_ns Returns ------- @@ -334,6 +343,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz, """ cdef: _TSObject obj = _TSObject() + int64_t pps obj.fold = ts.fold if tz is not None: @@ -342,34 +352,35 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz, if ts.tzinfo is not None: # Convert the current timezone to the passed timezone ts = ts.astimezone(tz) - obj.value = pydatetime_to_dt64(ts, &obj.dts) + pydatetime_to_dtstruct(ts, &obj.dts) obj.tzinfo = ts.tzinfo elif not is_utc(tz): ts = _localize_pydatetime(ts, tz) - obj.value = pydatetime_to_dt64(ts, &obj.dts) + pydatetime_to_dtstruct(ts, &obj.dts) obj.tzinfo = ts.tzinfo else: # UTC - obj.value = pydatetime_to_dt64(ts, &obj.dts) + pydatetime_to_dtstruct(ts, &obj.dts) obj.tzinfo = tz else: - obj.value = pydatetime_to_dt64(ts, &obj.dts) + pydatetime_to_dtstruct(ts, &obj.dts) obj.tzinfo = ts.tzinfo - if obj.tzinfo is not None and not is_utc(obj.tzinfo): - offset = get_utcoffset(obj.tzinfo, ts) - obj.value -= int(offset.total_seconds() * 1e9) - if isinstance(ts, ABCTimestamp): - obj.value += ts.nanosecond obj.dts.ps = ts.nanosecond * 1000 if nanos: - obj.value += nanos obj.dts.ps = nanos * 1000 - check_dts_bounds(&obj.dts) - check_overflows(obj) + obj.value = npy_datetimestruct_to_datetime(reso, &obj.dts) + + if obj.tzinfo is not None and not is_utc(obj.tzinfo): + offset = get_utcoffset(obj.tzinfo, ts) + pps = periods_per_second(reso) + obj.value -= int(offset.total_seconds() * pps) + + check_dts_bounds(&obj.dts, reso) + check_overflows(obj, reso) return obj @@ -401,7 +412,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, obj.tzinfo = pytz.FixedOffset(tzoffset) obj.value = tz_localize_to_utc_single(value, obj.tzinfo) if tz is None: - check_overflows(obj) + check_overflows(obj, NPY_FR_ns) return obj cdef: @@ -515,13 +526,14 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit, return convert_datetime_to_tsobject(dt, tz) -cdef inline check_overflows(_TSObject obj): +cdef inline check_overflows(_TSObject obj, NPY_DATETIMEUNIT reso=NPY_FR_ns): """ Check that we haven't silently overflowed in timezone conversion Parameters ---------- obj : _TSObject + reso : NPY_DATETIMEUNIT, default NPY_FR_ns Returns ------- @@ -532,7 +544,12 @@ cdef inline check_overflows(_TSObject obj): OutOfBoundsDatetime """ # GH#12677 - if obj.dts.year == 1677: + cdef: + npy_datetimestruct lb, ub + + get_implementation_bounds(reso, &lb, &ub) + + if obj.dts.year == lb.year: if not (obj.value < 0): from pandas._libs.tslibs.timestamps import Timestamp fmt = (f"{obj.dts.year}-{obj.dts.month:02d}-{obj.dts.day:02d} " @@ -540,7 +557,7 @@ cdef inline check_overflows(_TSObject obj): raise OutOfBoundsDatetime( f"Converting {fmt} underflows past {Timestamp.min}" ) - elif obj.dts.year == 2262: + elif obj.dts.year == ub.year: if not (obj.value > 0): from pandas._libs.tslibs.timestamps import Timestamp fmt = (f"{obj.dts.year}-{obj.dts.month:02d}-{obj.dts.day:02d} " diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index d4dbcbe2acd6e..2f775912da141 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -79,6 +79,7 @@ cdef int64_t dtstruct_to_dt64(npy_datetimestruct* dts) nogil cdef void dt64_to_dtstruct(int64_t dt64, npy_datetimestruct* out) nogil cdef int64_t pydatetime_to_dt64(datetime val, npy_datetimestruct *dts) +cdef void pydatetime_to_dtstruct(datetime dt, npy_datetimestruct *dts) cdef int64_t pydate_to_dt64(date val, npy_datetimestruct *dts) cdef void pydate_to_dtstruct(date val, npy_datetimestruct *dts) @@ -104,3 +105,6 @@ cpdef cnp.ndarray astype_overflowsafe( ) cdef bint cmp_dtstructs(npy_datetimestruct* left, npy_datetimestruct* right, int op) +cdef get_implementation_bounds( + NPY_DATETIMEUNIT reso, npy_datetimestruct *lower, npy_datetimestruct *upper +) diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index cf967509a84c0..24ba329120a51 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -167,6 +167,26 @@ class OutOfBoundsTimedelta(ValueError): pass +cdef get_implementation_bounds(NPY_DATETIMEUNIT reso, npy_datetimestruct *lower, npy_datetimestruct *upper): + if reso == NPY_FR_ns: + upper[0] = _NS_MAX_DTS + lower[0] = _NS_MIN_DTS + elif reso == NPY_FR_us: + upper[0] = _US_MAX_DTS + lower[0] = _US_MIN_DTS + elif reso == NPY_FR_ms: + upper[0] = _MS_MAX_DTS + lower[0] = _MS_MIN_DTS + elif reso == NPY_FR_s: + upper[0] = _S_MAX_DTS + lower[0] = _S_MIN_DTS + elif reso == NPY_FR_m: + upper[0] = _M_MAX_DTS + lower[0] = _M_MIN_DTS + else: + raise NotImplementedError(reso) + + cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=NPY_FR_ns): """Raises OutOfBoundsDatetime if the given date is outside the range that can be represented by nanosecond-resolution 64-bit integers.""" @@ -174,23 +194,7 @@ cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=NPY_FR_ns): bint error = False npy_datetimestruct cmp_upper, cmp_lower - if unit == NPY_FR_ns: - cmp_upper = _NS_MAX_DTS - cmp_lower = _NS_MIN_DTS - elif unit == NPY_FR_us: - cmp_upper = _US_MAX_DTS - cmp_lower = _US_MIN_DTS - elif unit == NPY_FR_ms: - cmp_upper = _MS_MAX_DTS - cmp_lower = _MS_MIN_DTS - elif unit == NPY_FR_s: - cmp_upper = _S_MAX_DTS - cmp_lower = _S_MIN_DTS - elif unit == NPY_FR_m: - cmp_upper = _M_MAX_DTS - cmp_lower = _M_MIN_DTS - else: - raise NotImplementedError(unit) + get_implementation_bounds(unit, &cmp_lower, &cmp_upper) if cmp_npy_datetimestruct(dts, &cmp_lower) == -1: error = True @@ -229,19 +233,23 @@ def py_td64_to_tdstruct(int64_t td64, NPY_DATETIMEUNIT unit): return tds # <- returned as a dict to python +cdef inline void pydatetime_to_dtstruct(datetime dt, npy_datetimestruct *dts): + dts.year = PyDateTime_GET_YEAR(dt) + dts.month = PyDateTime_GET_MONTH(dt) + dts.day = PyDateTime_GET_DAY(dt) + dts.hour = PyDateTime_DATE_GET_HOUR(dt) + dts.min = PyDateTime_DATE_GET_MINUTE(dt) + dts.sec = PyDateTime_DATE_GET_SECOND(dt) + dts.us = PyDateTime_DATE_GET_MICROSECOND(dt) + dts.ps = dts.as = 0 + + cdef inline int64_t pydatetime_to_dt64(datetime val, npy_datetimestruct *dts): """ Note we are assuming that the datetime object is timezone-naive. """ - dts.year = PyDateTime_GET_YEAR(val) - dts.month = PyDateTime_GET_MONTH(val) - dts.day = PyDateTime_GET_DAY(val) - dts.hour = PyDateTime_DATE_GET_HOUR(val) - dts.min = PyDateTime_DATE_GET_MINUTE(val) - dts.sec = PyDateTime_DATE_GET_SECOND(val) - dts.us = PyDateTime_DATE_GET_MICROSECOND(val) - dts.ps = dts.as = 0 + pydatetime_to_dtstruct(val, dts) return dtstruct_to_dt64(dts) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index c6bae70d04a98..afa97c32fec30 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -2156,9 +2156,6 @@ default 'raise' datetime ts_input tzinfo_type tzobj - if self._reso != NPY_FR_ns: - raise NotImplementedError(self._reso) - # set to naive if needed tzobj = self.tzinfo value = self.value @@ -2171,7 +2168,7 @@ default 'raise' value = tz_convert_from_utc_single(value, tzobj, reso=self._reso) # setup components - dt64_to_dtstruct(value, &dts) + pandas_datetime_to_datetimestruct(value, self._reso, &dts) dts.ps = self.nanosecond * 1000 # replace @@ -2218,12 +2215,16 @@ default 'raise' 'fold': fold} ts_input = datetime(**kwargs) - ts = convert_datetime_to_tsobject(ts_input, tzobj) + ts = convert_datetime_to_tsobject(ts_input, tzobj, nanos=0, reso=self._reso) + # TODO: passing nanos=dts.ps // 1000 causes a RecursionError in + # TestTimestampConstructors.test_constructor; not clear why value = ts.value + (dts.ps // 1000) if value != NPY_NAT: - check_dts_bounds(&dts) + check_dts_bounds(&dts, self._reso) - return create_timestamp_from_ts(value, dts, tzobj, self._freq, fold) + return create_timestamp_from_ts( + value, dts, tzobj, self._freq, fold, reso=self._reso + ) def to_julian_date(self) -> np.float64: """ diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 5f07cabd51ca1..35065a3c9877c 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -19,6 +19,7 @@ iNaT, to_offset, ) +from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG import pandas.util._test_decorators as td @@ -338,6 +339,16 @@ def checker(res, ts, nanos): # -------------------------------------------------------------- # Timestamp.replace + def test_replace_non_nano(self): + ts = Timestamp._from_value_and_reso( + 91514880000000000, NpyDatetimeUnit.NPY_FR_us.value, None + ) + assert ts.to_pydatetime() == datetime(4869, 12, 28) + + result = ts.replace(year=4900) + assert result._reso == ts._reso + assert result.to_pydatetime() == datetime(4900, 12, 28) + def test_replace_naive(self): # GH#14621, GH#7825 ts = Timestamp("2016-01-01 09:00:00")