Skip to content

ENH: Timestamp.replace support non-nano #47312

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pandas/_libs/tslibs/conversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
int32_t nanos=*)

cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz,
int32_t nanos=*)
int32_t nanos=*,
NPY_DATETIMEUNIT reso=*)

cdef int64_t get_datetime64_nanos(object val) except? -1

Expand Down
55 changes: 36 additions & 19 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ from cpython.datetime cimport (
import_datetime()

from pandas._libs.tslibs.base cimport ABCTimestamp
from pandas._libs.tslibs.dtypes cimport periods_per_second
from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
NPY_FR_ns,
Expand All @@ -40,11 +41,14 @@ from pandas._libs.tslibs.np_datetime cimport (
dtstruct_to_dt64,
get_datetime64_unit,
get_datetime64_value,
get_implementation_bounds,
get_unit_from_dtype,
npy_datetime,
npy_datetimestruct,
npy_datetimestruct_to_datetime,
pandas_datetime_to_datetimestruct,
pydatetime_to_dt64,
pydatetime_to_dtstruct,
string_to_dts,
)

Expand Down Expand Up @@ -307,11 +311,15 @@ cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso):
if obj.value != NPY_NAT:
# check_overflows needs to run after _localize_tso
check_dts_bounds(&obj.dts, reso)
check_overflows(obj)
check_overflows(obj, reso)


cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz,
int32_t nanos=0):
cdef _TSObject convert_datetime_to_tsobject(
datetime ts,
tzinfo tz,
int32_t nanos=0,
NPY_DATETIMEUNIT reso=NPY_FR_ns,
):
"""
Convert a datetime (or Timestamp) input `ts`, along with optional timezone
object `tz` to a _TSObject.
Expand All @@ -327,13 +335,15 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz,
timezone for the timezone-aware output
nanos : int32_t, default is 0
nanoseconds supplement the precision of the datetime input ts
reso : NPY_DATETIMEUNIT, default NPY_FR_ns

Returns
-------
obj : _TSObject
"""
cdef:
_TSObject obj = _TSObject()
int64_t pps

obj.fold = ts.fold
if tz is not None:
Expand All @@ -342,34 +352,35 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz,
if ts.tzinfo is not None:
# Convert the current timezone to the passed timezone
ts = ts.astimezone(tz)
obj.value = pydatetime_to_dt64(ts, &obj.dts)
pydatetime_to_dtstruct(ts, &obj.dts)
obj.tzinfo = ts.tzinfo
elif not is_utc(tz):
ts = _localize_pydatetime(ts, tz)
obj.value = pydatetime_to_dt64(ts, &obj.dts)
pydatetime_to_dtstruct(ts, &obj.dts)
obj.tzinfo = ts.tzinfo
else:
# UTC
obj.value = pydatetime_to_dt64(ts, &obj.dts)
pydatetime_to_dtstruct(ts, &obj.dts)
obj.tzinfo = tz
else:
obj.value = pydatetime_to_dt64(ts, &obj.dts)
pydatetime_to_dtstruct(ts, &obj.dts)
obj.tzinfo = ts.tzinfo

if obj.tzinfo is not None and not is_utc(obj.tzinfo):
offset = get_utcoffset(obj.tzinfo, ts)
obj.value -= int(offset.total_seconds() * 1e9)

if isinstance(ts, ABCTimestamp):
obj.value += <int64_t>ts.nanosecond
obj.dts.ps = ts.nanosecond * 1000

if nanos:
obj.value += nanos
obj.dts.ps = nanos * 1000

check_dts_bounds(&obj.dts)
check_overflows(obj)
obj.value = npy_datetimestruct_to_datetime(reso, &obj.dts)

if obj.tzinfo is not None and not is_utc(obj.tzinfo):
offset = get_utcoffset(obj.tzinfo, ts)
pps = periods_per_second(reso)
obj.value -= int(offset.total_seconds() * pps)

check_dts_bounds(&obj.dts, reso)
check_overflows(obj, reso)
return obj


Expand Down Expand Up @@ -401,7 +412,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,
obj.tzinfo = pytz.FixedOffset(tzoffset)
obj.value = tz_localize_to_utc_single(value, obj.tzinfo)
if tz is None:
check_overflows(obj)
check_overflows(obj, NPY_FR_ns)
return obj

cdef:
Expand Down Expand Up @@ -515,13 +526,14 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
return convert_datetime_to_tsobject(dt, tz)


cdef inline check_overflows(_TSObject obj):
cdef inline check_overflows(_TSObject obj, NPY_DATETIMEUNIT reso=NPY_FR_ns):
"""
Check that we haven't silently overflowed in timezone conversion

Parameters
----------
obj : _TSObject
reso : NPY_DATETIMEUNIT, default NPY_FR_ns

Returns
-------
Expand All @@ -532,15 +544,20 @@ cdef inline check_overflows(_TSObject obj):
OutOfBoundsDatetime
"""
# GH#12677
if obj.dts.year == 1677:
cdef:
npy_datetimestruct lb, ub

get_implementation_bounds(reso, &lb, &ub)

if obj.dts.year == lb.year:
if not (obj.value < 0):
from pandas._libs.tslibs.timestamps import Timestamp
fmt = (f"{obj.dts.year}-{obj.dts.month:02d}-{obj.dts.day:02d} "
f"{obj.dts.hour:02d}:{obj.dts.min:02d}:{obj.dts.sec:02d}")
raise OutOfBoundsDatetime(
f"Converting {fmt} underflows past {Timestamp.min}"
)
elif obj.dts.year == 2262:
elif obj.dts.year == ub.year:
if not (obj.value > 0):
from pandas._libs.tslibs.timestamps import Timestamp
fmt = (f"{obj.dts.year}-{obj.dts.month:02d}-{obj.dts.day:02d} "
Expand Down
4 changes: 4 additions & 0 deletions pandas/_libs/tslibs/np_datetime.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ cdef int64_t dtstruct_to_dt64(npy_datetimestruct* dts) nogil
cdef void dt64_to_dtstruct(int64_t dt64, npy_datetimestruct* out) nogil

cdef int64_t pydatetime_to_dt64(datetime val, npy_datetimestruct *dts)
cdef void pydatetime_to_dtstruct(datetime dt, npy_datetimestruct *dts)
cdef int64_t pydate_to_dt64(date val, npy_datetimestruct *dts)
cdef void pydate_to_dtstruct(date val, npy_datetimestruct *dts)

Expand All @@ -104,3 +105,6 @@ cpdef cnp.ndarray astype_overflowsafe(
)

cdef bint cmp_dtstructs(npy_datetimestruct* left, npy_datetimestruct* right, int op)
cdef get_implementation_bounds(
NPY_DATETIMEUNIT reso, npy_datetimestruct *lower, npy_datetimestruct *upper
)
58 changes: 33 additions & 25 deletions pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -167,30 +167,34 @@ class OutOfBoundsTimedelta(ValueError):
pass


cdef get_implementation_bounds(NPY_DATETIMEUNIT reso, npy_datetimestruct *lower, npy_datetimestruct *upper):
if reso == NPY_FR_ns:
upper[0] = _NS_MAX_DTS
lower[0] = _NS_MIN_DTS
elif reso == NPY_FR_us:
upper[0] = _US_MAX_DTS
lower[0] = _US_MIN_DTS
elif reso == NPY_FR_ms:
upper[0] = _MS_MAX_DTS
lower[0] = _MS_MIN_DTS
elif reso == NPY_FR_s:
upper[0] = _S_MAX_DTS
lower[0] = _S_MIN_DTS
elif reso == NPY_FR_m:
upper[0] = _M_MAX_DTS
lower[0] = _M_MIN_DTS
else:
raise NotImplementedError(reso)


cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=NPY_FR_ns):
"""Raises OutOfBoundsDatetime if the given date is outside the range that
can be represented by nanosecond-resolution 64-bit integers."""
cdef:
bint error = False
npy_datetimestruct cmp_upper, cmp_lower

if unit == NPY_FR_ns:
cmp_upper = _NS_MAX_DTS
cmp_lower = _NS_MIN_DTS
elif unit == NPY_FR_us:
cmp_upper = _US_MAX_DTS
cmp_lower = _US_MIN_DTS
elif unit == NPY_FR_ms:
cmp_upper = _MS_MAX_DTS
cmp_lower = _MS_MIN_DTS
elif unit == NPY_FR_s:
cmp_upper = _S_MAX_DTS
cmp_lower = _S_MIN_DTS
elif unit == NPY_FR_m:
cmp_upper = _M_MAX_DTS
cmp_lower = _M_MIN_DTS
else:
raise NotImplementedError(unit)
get_implementation_bounds(unit, &cmp_lower, &cmp_upper)

if cmp_npy_datetimestruct(dts, &cmp_lower) == -1:
error = True
Expand Down Expand Up @@ -229,19 +233,23 @@ def py_td64_to_tdstruct(int64_t td64, NPY_DATETIMEUNIT unit):
return tds # <- returned as a dict to python


cdef inline void pydatetime_to_dtstruct(datetime dt, npy_datetimestruct *dts):
dts.year = PyDateTime_GET_YEAR(dt)
dts.month = PyDateTime_GET_MONTH(dt)
dts.day = PyDateTime_GET_DAY(dt)
dts.hour = PyDateTime_DATE_GET_HOUR(dt)
dts.min = PyDateTime_DATE_GET_MINUTE(dt)
dts.sec = PyDateTime_DATE_GET_SECOND(dt)
dts.us = PyDateTime_DATE_GET_MICROSECOND(dt)
dts.ps = dts.as = 0


cdef inline int64_t pydatetime_to_dt64(datetime val,
npy_datetimestruct *dts):
"""
Note we are assuming that the datetime object is timezone-naive.
"""
dts.year = PyDateTime_GET_YEAR(val)
dts.month = PyDateTime_GET_MONTH(val)
dts.day = PyDateTime_GET_DAY(val)
dts.hour = PyDateTime_DATE_GET_HOUR(val)
dts.min = PyDateTime_DATE_GET_MINUTE(val)
dts.sec = PyDateTime_DATE_GET_SECOND(val)
dts.us = PyDateTime_DATE_GET_MICROSECOND(val)
dts.ps = dts.as = 0
pydatetime_to_dtstruct(val, dts)
return dtstruct_to_dt64(dts)


Expand Down
15 changes: 8 additions & 7 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2156,9 +2156,6 @@ default 'raise'
datetime ts_input
tzinfo_type tzobj

if self._reso != NPY_FR_ns:
raise NotImplementedError(self._reso)

# set to naive if needed
tzobj = self.tzinfo
value = self.value
Expand All @@ -2171,7 +2168,7 @@ default 'raise'
value = tz_convert_from_utc_single(value, tzobj, reso=self._reso)

# setup components
dt64_to_dtstruct(value, &dts)
pandas_datetime_to_datetimestruct(value, self._reso, &dts)
dts.ps = self.nanosecond * 1000

# replace
Expand Down Expand Up @@ -2218,12 +2215,16 @@ default 'raise'
'fold': fold}
ts_input = datetime(**kwargs)

ts = convert_datetime_to_tsobject(ts_input, tzobj)
ts = convert_datetime_to_tsobject(ts_input, tzobj, nanos=0, reso=self._reso)
# TODO: passing nanos=dts.ps // 1000 causes a RecursionError in
# TestTimestampConstructors.test_constructor; not clear why
value = ts.value + (dts.ps // 1000)
if value != NPY_NAT:
check_dts_bounds(&dts)
check_dts_bounds(&dts, self._reso)

return create_timestamp_from_ts(value, dts, tzobj, self._freq, fold)
return create_timestamp_from_ts(
value, dts, tzobj, self._freq, fold, reso=self._reso
)

def to_julian_date(self) -> np.float64:
"""
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/scalar/timestamp/test_unary_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
iNaT,
to_offset,
)
from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG
import pandas.util._test_decorators as td

Expand Down Expand Up @@ -338,6 +339,16 @@ def checker(res, ts, nanos):
# --------------------------------------------------------------
# Timestamp.replace

def test_replace_non_nano(self):
ts = Timestamp._from_value_and_reso(
91514880000000000, NpyDatetimeUnit.NPY_FR_us.value, None
)
assert ts.to_pydatetime() == datetime(4869, 12, 28)

result = ts.replace(year=4900)
assert result._reso == ts._reso
assert result.to_pydatetime() == datetime(4900, 12, 28)

def test_replace_naive(self):
# GH#14621, GH#7825
ts = Timestamp("2016-01-01 09:00:00")
Expand Down