From 74cca61fce97b804c27146eef980f47ad7fe1667 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 29 Jan 2020 13:34:50 -0800 Subject: [PATCH 1/6] share parts of get_string_slice --- pandas/core/indexes/datetimelike.py | 64 +++++++++++++++++++++++++++-- pandas/core/indexes/datetimes.py | 39 ++---------------- pandas/core/indexes/period.py | 39 ++++-------------- pandas/core/indexes/timedeltas.py | 1 + 4 files changed, 73 insertions(+), 70 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index b87dd0f02252f..0093bd63a2c20 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -1,8 +1,9 @@ """ Base and utility classes for tseries type pandas objects. """ +from datetime import datetime import operator -from typing import Any, List, Optional, Set +from typing import Any, List, Optional, Set, Union import numpy as np @@ -31,7 +32,7 @@ from pandas.core import algorithms from pandas.core.accessor import PandasDelegate -from pandas.core.arrays import DatetimeArray, ExtensionArray, TimedeltaArray +from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin from pandas.core.base import _shared_docs import pandas.core.indexes.base as ibase @@ -90,7 +91,7 @@ class DatetimeIndexOpsMixin(ExtensionIndex): Common ops mixin to support a unified interface datetimelike Index. """ - _data: ExtensionArray + _data: Union[DatetimeArray, TimedeltaArray, PeriodArray] freq: Optional[DateOffset] freqstr: Optional[str] _resolution: int @@ -376,6 +377,7 @@ def _format_attrs(self): return attrs # -------------------------------------------------------------------- + # Indexing Methods def _convert_scalar_indexer(self, key, kind=None): """ @@ -402,6 +404,60 @@ def _convert_scalar_indexer(self, key, kind=None): return super()._convert_scalar_indexer(key, kind=kind) + def _validate_partial_date_slice(self, reso: str): + raise NotImplementedError + + def _parsed_string_to_bounds(self, reso: str, parsed: datetime): + raise NotImplementedError + + def _partial_date_slice( + self, reso: str, parsed: datetime, use_lhs: bool = True, use_rhs: bool = True + ): + """ + Parameters + ---------- + reso : str + parsed : datetime + use_lhs : bool, default True + use_rhs : bool, default True + + Returns + ------- + slice or ndarray[intp] + """ + self._validate_partial_date_slice(reso) + + t1, t2 = self._parsed_string_to_bounds(reso, parsed) + i8vals = self.asi8 + unbox = self._data._unbox_scalar # type: ignore + + if self.is_monotonic: + + # we are out of range + if len(self) and ( + (use_lhs and t1 < self[0] and t2 < self[0]) + or ((use_rhs and t1 > self[-1] and t2 > self[-1])) + ): + raise KeyError + + # TODO: does this depend on being monotonic _increasing_? + # If so, DTI will also be affected. + + # a monotonic (sorted) series can be sliced + # Use asi8.searchsorted to avoid re-validating Periods/Timestamps + left = i8vals.searchsorted(unbox(t1), side="left") if use_lhs else None + right = i8vals.searchsorted(unbox(t2), side="right") if use_rhs else None + return slice(left, right) + + else: + lhs_mask = (i8vals >= unbox(t1)) if use_lhs else True + rhs_mask = (i8vals <= unbox(t2)) if use_rhs else True + + # try to find a the dates + return (lhs_mask & rhs_mask).nonzero()[0] + + # -------------------------------------------------------------------- + __add__ = make_wrapped_arith_op("__add__") __radd__ = make_wrapped_arith_op("__radd__") __sub__ = make_wrapped_arith_op("__sub__") @@ -955,7 +1011,7 @@ class DatetimelikeDelegateMixin(PandasDelegate): _raw_methods: Set[str] = set() # raw_properties : dispatch properties that shouldn't be boxed in an Index _raw_properties: Set[str] = set() - _data: ExtensionArray + _data: Union[DatetimeArray, TimedeltaArray, PeriodArray] def _delegate_property_get(self, name, *args, **kwargs): result = getattr(self._data, name) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 3afd1ff35806d..ceb98075b8fd9 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -219,6 +219,7 @@ class DatetimeIndex(DatetimeTimedeltaMixin, DatetimeDelegateMixin): _infer_as_myclass = True tz: Optional[tzinfo] + _data: DatetimeArray # -------------------------------------------------------------------- # Constructors @@ -580,19 +581,9 @@ def _parsed_string_to_bounds(self, reso, parsed): end = end.tz_localize(self.tz) return start, end - def _partial_date_slice( - self, reso: str, parsed, use_lhs: bool = True, use_rhs: bool = True - ): - """ - Parameters - ---------- - reso : str - use_lhs : bool, default True - use_rhs : bool, default True - """ - is_monotonic = self.is_monotonic + def _validate_partial_date_slice(self, reso: str): if ( - is_monotonic + self.is_monotonic and reso in ["day", "hour", "minute", "second"] and self._resolution >= Resolution.get_reso(reso) ): @@ -607,30 +598,6 @@ def _partial_date_slice( # _parsed_string_to_bounds allows it. raise KeyError - t1, t2 = self._parsed_string_to_bounds(reso, parsed) - stamps = self.asi8 - - if is_monotonic: - - # we are out of range - if len(stamps) and ( - (use_lhs and t1.value < stamps[0] and t2.value < stamps[0]) - or ((use_rhs and t1.value > stamps[-1] and t2.value > stamps[-1])) - ): - raise KeyError - - # a monotonic (sorted) series can be sliced - left = stamps.searchsorted(t1.value, side="left") if use_lhs else None - right = stamps.searchsorted(t2.value, side="right") if use_rhs else None - - return slice(left, right) - - lhs_mask = (stamps >= t1.value) if use_lhs else True - rhs_mask = (stamps <= t2.value) if use_rhs else True - - # try to find a the dates - return (lhs_mask & rhs_mask).nonzero()[0] - def _maybe_promote(self, other): if other.inferred_type == "date": other = DatetimeIndex(other) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 1e18c16d02784..abacd87320fb2 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -652,9 +652,7 @@ def _parsed_string_to_bounds(self, reso: str, parsed: datetime): iv = Period(parsed, freq=(grp, 1)) return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end")) - def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True): - # TODO: Check for non-True use_lhs/use_rhs - parsed, reso = parse_time_string(key, self.freq) + def _validate_partial_date_slice(self, reso: str): grp = resolution.Resolution.get_freq_group(reso) freqn = resolution.get_freq_group(self.freq) @@ -662,35 +660,16 @@ def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True # TODO: we used to also check for # reso in ["day", "hour", "minute", "second"] # why is that check not needed? - raise ValueError(key) - - t1, t2 = self._parsed_string_to_bounds(reso, parsed) - i8vals = self.asi8 - - if self.is_monotonic: - - # we are out of range - if len(self) and ( - (use_lhs and t1 < self[0] and t2 < self[0]) - or ((use_rhs and t1 > self[-1] and t2 > self[-1])) - ): - raise KeyError(key) - - # TODO: does this depend on being monotonic _increasing_? - # If so, DTI will also be affected. + raise ValueError - # a monotonic (sorted) series can be sliced - # Use asi8.searchsorted to avoid re-validating Periods - left = i8vals.searchsorted(t1.ordinal, side="left") if use_lhs else None - right = i8vals.searchsorted(t2.ordinal, side="right") if use_rhs else None - return slice(left, right) - - else: - lhs_mask = (i8vals >= t1.ordinal) if use_lhs else True - rhs_mask = (i8vals <= t2.ordinal) if use_rhs else True + def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True): + # TODO: Check for non-True use_lhs/use_rhs + parsed, reso = parse_time_string(key, self.freq) - # try to find a the dates - return (lhs_mask & rhs_mask).nonzero()[0] + try: + return self._partial_date_slice(reso, parsed, use_lhs, use_rhs) + except KeyError: + raise KeyError(key) def _convert_tolerance(self, tolerance, target): tolerance = DatetimeIndexOpsMixin._convert_tolerance(self, tolerance, target) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 1257e410b4125..8bfcfcd977469 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -134,6 +134,7 @@ class TimedeltaIndex( _attributes = ["name", "freq"] _is_numeric_dtype = True _infer_as_myclass = True + _data: TimedeltaArray # ------------------------------------------------------------------- # Constructors From 76e24818824c8cd88fa2c9f4f08ec8ce4afc54ff Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 31 Jan 2020 10:52:50 -0800 Subject: [PATCH 2/6] cln --- pandas/core/indexes/timedeltas.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 2710cc7f0545f..08a07e8d30348 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -124,7 +124,6 @@ class TimedeltaIndex(DatetimeTimedeltaMixin, dtl.TimelikeOps): _attributes = ["name", "freq"] _is_numeric_dtype = True _infer_as_myclass = True - _data: TimedeltaArray _data: TimedeltaArray From 7579b440525b2f3c20f0a5df4f6cb9da14cb2b9e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 2 Feb 2020 18:04:30 -0800 Subject: [PATCH 3/6] merge fixup --- pandas/core/series.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index bfe9969daaa8e..040fcf392733b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -23,6 +23,7 @@ from pandas._config import get_option from pandas._libs import lib, properties, reshape, tslibs +from pandas._libs.index import validate_numeric_casting from pandas._typing import Label from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution @@ -1022,7 +1023,7 @@ def __setitem__(self, key, value): def _set_with_engine(self, key, value): # fails with AttributeError for IntervalIndex loc = self.index._engine.get_loc(key) - libindex.validate_numeric_casting(self.dtype, value) + validate_numeric_casting(self.dtype, value) self._values[loc] = value def _set_with(self, key, value): @@ -1105,7 +1106,7 @@ def _set_value(self, label, value, takeable: bool = False): self._values[label] = value else: loc = self.index.get_loc(label) - libindex.validate_numeric_casting(self.dtype, value) + validate_numeric_casting(self.dtype, value) self._values[loc] = value except KeyError: From ed25ee747f10cce1433204bd34a57c58993a17b0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 4 Feb 2020 15:35:15 -0800 Subject: [PATCH 4/6] unignore --- pandas/core/indexes/datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 3f172cf4c3d82..a6bfd3845bcbd 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -436,7 +436,7 @@ def _partial_date_slice( t1, t2 = self._parsed_string_to_bounds(reso, parsed) i8vals = self.asi8 - unbox = self._data._unbox_scalar # type: ignore + unbox = self._data._unbox_scalar if self.is_monotonic: From 9814e0ac72c8d1c850aea7a30b1144778753696d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 6 Feb 2020 15:27:39 -0800 Subject: [PATCH 5/6] revert unnecessary --- pandas/core/indexes/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 0dc3883c7cfdb..b67d0dcea0ac6 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -189,8 +189,8 @@ class DatetimeIndex(DatetimeTimedeltaMixin): _is_numeric_dtype = False _infer_as_myclass = True - tz: Optional[tzinfo] _data: DatetimeArray + tz: Optional[tzinfo] # -------------------------------------------------------------------- # Constructors From 66b94c33bb40f1bfb241d31975c2ad2f303c4441 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 6 Feb 2020 17:51:34 -0800 Subject: [PATCH 6/6] comment cleanup --- pandas/core/indexes/datetimelike.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index b773d80a614f1..d622baf515b82 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -442,15 +442,14 @@ def _partial_date_slice( if self.is_monotonic: - # we are out of range if len(self) and ( (use_lhs and t1 < self[0] and t2 < self[0]) or ((use_rhs and t1 > self[-1] and t2 > self[-1])) ): + # we are out of range raise KeyError # TODO: does this depend on being monotonic _increasing_? - # If so, DTI will also be affected. # a monotonic (sorted) series can be sliced # Use asi8.searchsorted to avoid re-validating Periods/Timestamps @@ -462,7 +461,7 @@ def _partial_date_slice( lhs_mask = (i8vals >= unbox(t1)) if use_lhs else True rhs_mask = (i8vals <= unbox(t2)) if use_rhs else True - # try to find a the dates + # try to find the dates return (lhs_mask & rhs_mask).nonzero()[0] # --------------------------------------------------------------------