From 9e57d6d212a4edf9227c137cf56ca92239dfb4f5 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Tue, 21 Dec 2021 13:39:55 -0500 Subject: [PATCH 1/8] fix column_arrays for array manager --- pandas/core/internals/array_manager.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 09f16a2ddab67..06849bffff5ca 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -794,7 +794,14 @@ def column_arrays(self) -> list[ArrayLike]: """ Used in the JSON C code to access column arrays. """ - return self.arrays + + def convert_array(arr: ArrayLike) -> ArrayLike: + if isinstance(arr, ExtensionArray): + return arr.to_numpy() + else: + return arr + + return [convert_array(arr) for arr in self.arrays] def iset( self, loc: int | slice | np.ndarray, value: ArrayLike, inplace: bool = False From 84b119faea9b3409c5f1a1afe388b77bfff29e45 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Wed, 23 Feb 2022 07:24:01 -0500 Subject: [PATCH 2/8] remove dead code in arrays/interval.py --- pandas/core/arrays/interval.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index d23910c37b52b..d809ad90ad1b5 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1664,12 +1664,8 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray: nc = combined.view("i8").reshape(-1, 2) dtype = self._left.dtype - if needs_i8_conversion(dtype): - new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype) - new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype) - else: - new_left = nc[:, 0].view(dtype) - new_right = nc[:, 1].view(dtype) + new_left = nc[:, 0].view(dtype) + new_right = nc[:, 1].view(dtype) return self._shallow_copy(left=new_left, right=new_right) def unique(self) -> IntervalArray: From 64fedddc8d03f8b6ebfad6ce5827c9d778f0a1b7 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 25 Feb 2022 14:51:00 -0500 Subject: [PATCH 3/8] Undo Revert "remove dead code in arrays/interval.py" This reverts commit 84b119faea9b3409c5f1a1afe388b77bfff29e45. --- pandas/core/arrays/interval.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index d809ad90ad1b5..d23910c37b52b 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1664,8 +1664,12 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray: nc = combined.view("i8").reshape(-1, 2) dtype = self._left.dtype - new_left = nc[:, 0].view(dtype) - new_right = nc[:, 1].view(dtype) + if needs_i8_conversion(dtype): + new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype) + new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype) + else: + new_left = nc[:, 0].view(dtype) + new_right = nc[:, 1].view(dtype) return self._shallow_copy(left=new_left, right=new_right) def unique(self) -> IntervalArray: From 39b3b98e14c401b8498220cddca3e033ee0af061 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sun, 27 Feb 2022 15:53:04 -0500 Subject: [PATCH 4/8] remove NaTType as possible result of Timestamp and Timedelta constructor --- pandas/_libs/tslibs/timedeltas.pyi | 6 ++---- pandas/_libs/tslibs/timestamps.pyi | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index d8369f0cc90f9..da95a833e0545 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -33,14 +33,12 @@ class Timedelta(timedelta): max: ClassVar[Timedelta] resolution: ClassVar[Timedelta] value: int # np.int64 - - # error: "__new__" must return a class instance (got "Union[Timedelta, NaTType]") - def __new__( # type: ignore[misc] + def __new__( cls: Type[_S], value=..., unit: str = ..., **kwargs: int | float | np.integer | np.floating, - ) -> _S | NaTType: ... + ) -> _S: ... @property def days(self) -> int: ... @property diff --git a/pandas/_libs/tslibs/timestamps.pyi b/pandas/_libs/tslibs/timestamps.pyi index ecddd83322bbf..9d84311690747 100644 --- a/pandas/_libs/tslibs/timestamps.pyi +++ b/pandas/_libs/tslibs/timestamps.pyi @@ -31,9 +31,7 @@ class Timestamp(datetime): resolution: ClassVar[Timedelta] value: int # np.int64 - - # error: "__new__" must return a class instance (got "Union[Timestamp, NaTType]") - def __new__( # type: ignore[misc] + def __new__( cls: type[_DatetimeT], ts_input: int | np.integer @@ -56,7 +54,7 @@ class Timestamp(datetime): tzinfo: _tzinfo | None = ..., *, fold: int | None = ..., - ) -> _DatetimeT | NaTType: ... + ) -> _DatetimeT: ... def _set_freq(self, freq: BaseOffset | None) -> None: ... @property def year(self) -> int: ... From 031397cdcc010f20ce123a86762cf02bdaaf5de6 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Wed, 2 Mar 2022 05:49:36 -0500 Subject: [PATCH 5/8] add comments with ref to PR for reasoning --- pandas/_libs/tslibs/timedeltas.pyi | 3 +++ pandas/_libs/tslibs/timestamps.pyi | 3 +++ 2 files changed, 6 insertions(+) diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index da95a833e0545..f298d285a7207 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -39,6 +39,9 @@ class Timedelta(timedelta): unit: str = ..., **kwargs: int | float | np.integer | np.floating, ) -> _S: ... + # GH 46171 + # While Timedelta can return pd.NaT, having the constructor return + # a Union with NaTType makes things awkward for users of pandas @property def days(self) -> int: ... @property diff --git a/pandas/_libs/tslibs/timestamps.pyi b/pandas/_libs/tslibs/timestamps.pyi index 9d84311690747..c7711f1ec2e33 100644 --- a/pandas/_libs/tslibs/timestamps.pyi +++ b/pandas/_libs/tslibs/timestamps.pyi @@ -55,6 +55,9 @@ class Timestamp(datetime): *, fold: int | None = ..., ) -> _DatetimeT: ... + # GH 46171 + # While Timestamp can return pd.NaT, having the constructor return + # a Union with NaTType makes things awkward for users of pandas def _set_freq(self, freq: BaseOffset | None) -> None: ... @property def year(self) -> int: ... From 773e80818b8b08cf7f69019fd66ea6c218d95dc2 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Wed, 2 Mar 2022 20:07:23 -0500 Subject: [PATCH 6/8] don't make NaTType a subclass of datetime --- pandas/_libs/tslibs/nattype.pyi | 82 +++++++------------- pandas/_libs/tslibs/timedeltas.pyi | 9 +-- pandas/_libs/tslibs/timestamps.pyi | 6 +- pandas/core/arrays/datetimes.py | 1 - pandas/io/formats/format.py | 7 +- pandas/io/sas/sas_xport.py | 3 +- pandas/tests/resample/test_datetime_index.py | 11 ++- 7 files changed, 45 insertions(+), 74 deletions(-) diff --git a/pandas/_libs/tslibs/nattype.pyi b/pandas/_libs/tslibs/nattype.pyi index 8b409935b8fb8..efadd8f0220b3 100644 --- a/pandas/_libs/tslibs/nattype.pyi +++ b/pandas/_libs/tslibs/nattype.pyi @@ -3,7 +3,10 @@ from datetime import ( timedelta, tzinfo as _tzinfo, ) -from typing import Any +from typing import ( + Any, + Union, +) import numpy as np @@ -15,7 +18,12 @@ nat_strings: set[str] def is_null_datetimelike(val: object, inat_is_null: bool = ...) -> bool: ... -class NaTType(datetime): +_NaTComparisonTypes = Union[datetime, timedelta, Period, np.datetime64, np.timedelta64] + +class _NatComparison: + def __call__(self, other: _NaTComparisonTypes) -> bool: ... + +class NaTType: value: np.int64 def asm8(self) -> np.datetime64: ... def to_datetime64(self) -> np.datetime64: ... @@ -54,17 +62,11 @@ class NaTType(datetime): def weekofyear(self) -> float: ... def day_name(self) -> float: ... def month_name(self) -> float: ... - # error: Return type "float" of "weekday" incompatible with return - # type "int" in supertype "date" - def weekday(self) -> float: ... # type: ignore[override] - # error: Return type "float" of "isoweekday" incompatible with return - # type "int" in supertype "date" - def isoweekday(self) -> float: ... # type: ignore[override] + def weekday(self) -> float: ... + def isoweekday(self) -> float: ... def total_seconds(self) -> float: ... - # error: Signature of "today" incompatible with supertype "datetime" - def today(self, *args, **kwargs) -> NaTType: ... # type: ignore[override] - # error: Signature of "today" incompatible with supertype "datetime" - def now(self, *args, **kwargs) -> NaTType: ... # type: ignore[override] + def today(self, *args, **kwargs) -> NaTType: ... + def now(self, *args, **kwargs) -> NaTType: ... def to_pydatetime(self) -> NaTType: ... def date(self) -> NaTType: ... def round(self) -> NaTType: ... @@ -72,8 +74,7 @@ class NaTType(datetime): def ceil(self) -> NaTType: ... def tz_convert(self) -> NaTType: ... def tz_localize(self) -> NaTType: ... - # error: Signature of "replace" incompatible with supertype "datetime" - def replace( # type: ignore[override] + def replace( self, year: int | None = ..., month: int | None = ..., @@ -86,38 +87,24 @@ class NaTType(datetime): tzinfo: _tzinfo | None = ..., fold: int | None = ..., ) -> NaTType: ... - # error: Return type "float" of "year" incompatible with return - # type "int" in supertype "date" @property - def year(self) -> float: ... # type: ignore[override] + def year(self) -> float: ... @property def quarter(self) -> float: ... - # error: Return type "float" of "month" incompatible with return - # type "int" in supertype "date" @property - def month(self) -> float: ... # type: ignore[override] - # error: Return type "float" of "day" incompatible with return - # type "int" in supertype "date" + def month(self) -> float: ... @property - def day(self) -> float: ... # type: ignore[override] - # error: Return type "float" of "hour" incompatible with return - # type "int" in supertype "date" + def day(self) -> float: ... @property - def hour(self) -> float: ... # type: ignore[override] - # error: Return type "float" of "minute" incompatible with return - # type "int" in supertype "date" + def hour(self) -> float: ... @property - def minute(self) -> float: ... # type: ignore[override] - # error: Return type "float" of "second" incompatible with return - # type "int" in supertype "date" + def minute(self) -> float: ... @property - def second(self) -> float: ... # type: ignore[override] + def second(self) -> float: ... @property def millisecond(self) -> float: ... - # error: Return type "float" of "microsecond" incompatible with return - # type "int" in supertype "date" @property - def microsecond(self) -> float: ... # type: ignore[override] + def microsecond(self) -> float: ... @property def nanosecond(self) -> float: ... # inject Timedelta properties @@ -132,24 +119,7 @@ class NaTType(datetime): def qyear(self) -> float: ... def __eq__(self, other: Any) -> bool: ... def __ne__(self, other: Any) -> bool: ... - # https://github.com/python/mypy/issues/9015 - # error: Argument 1 of "__lt__" is incompatible with supertype "date"; - # supertype defines the argument type as "date" - def __lt__( # type: ignore[override] - self, other: datetime | timedelta | Period | np.datetime64 | np.timedelta64 - ) -> bool: ... - # error: Argument 1 of "__le__" is incompatible with supertype "date"; - # supertype defines the argument type as "date" - def __le__( # type: ignore[override] - self, other: datetime | timedelta | Period | np.datetime64 | np.timedelta64 - ) -> bool: ... - # error: Argument 1 of "__gt__" is incompatible with supertype "date"; - # supertype defines the argument type as "date" - def __gt__( # type: ignore[override] - self, other: datetime | timedelta | Period | np.datetime64 | np.timedelta64 - ) -> bool: ... - # error: Argument 1 of "__ge__" is incompatible with supertype "date"; - # supertype defines the argument type as "date" - def __ge__( # type: ignore[override] - self, other: datetime | timedelta | Period | np.datetime64 | np.timedelta64 - ) -> bool: ... + __lt__: _NatComparison + __le__: _NatComparison + __gt__: _NatComparison + __ge__: _NatComparison diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index f298d285a7207..035f83c77d9a6 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -7,12 +7,7 @@ from typing import ( ) import numpy as np - -from pandas._libs.tslibs import ( - NaTType, - Tick, -) -from pandas._typing import npt +import numpy.typing as npt _S = TypeVar("_S", bound=timedelta) @@ -26,7 +21,7 @@ def array_to_timedelta64( errors: str = ..., ) -> np.ndarray: ... # np.ndarray[m8ns] def parse_timedelta_unit(unit: str | None) -> str: ... -def delta_to_nanoseconds(delta: Tick | np.timedelta64 | timedelta | int) -> int: ... +def delta_to_nanoseconds(delta: np.timedelta64 | timedelta | int) -> int: ... class Timedelta(timedelta): min: ClassVar[Timedelta] diff --git a/pandas/_libs/tslibs/timestamps.pyi b/pandas/_libs/tslibs/timestamps.pyi index c7711f1ec2e33..4831921932f71 100644 --- a/pandas/_libs/tslibs/timestamps.pyi +++ b/pandas/_libs/tslibs/timestamps.pyi @@ -144,10 +144,10 @@ class Timestamp(datetime): def __add__(self: _DatetimeT, other: timedelta | np.timedelta64) -> _DatetimeT: ... def __radd__(self: _DatetimeT, other: timedelta) -> _DatetimeT: ... @overload # type: ignore - def __sub__(self, other: datetime) -> timedelta: ... + def __sub__(self, other: datetime) -> Timedelta: ... @overload # TODO: other can also be Tick (but it cannot be resolved) - def __sub__(self, other: timedelta | np.timedelta64) -> datetime: ... + def __sub__(self: _DatetimeT, other: timedelta | np.timedelta64) -> _DatetimeT: ... def __hash__(self) -> int: ... def weekday(self) -> int: ... def isoweekday(self) -> int: ... @@ -206,3 +206,5 @@ class Timestamp(datetime): def to_numpy( self, dtype: np.dtype | None = ..., copy: bool = ... ) -> np.datetime64: ... + @property + def _date_repr(self) -> str: ... diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 01347401c67c5..b55e203fb409f 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -775,7 +775,6 @@ def _add_offset(self, offset) -> DatetimeArray: def _sub_datetimelike_scalar(self, other): # subtract a datetime from myself, yielding a ndarray[timedelta64[ns]] assert isinstance(other, (datetime, np.datetime64)) - assert other is not NaT other = Timestamp(other) # error: Non-overlapping identity check (left operand type: "Timestamp", # right operand type: "NaTType") diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 4bc45e290ce4a..baa9254461d62 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1767,16 +1767,13 @@ def _format_datetime64_dateonly( nat_rep: str = "NaT", date_format: str | None = None, ) -> str: - if x is NaT: + if x is NaT or isinstance(x, NaTType): return nat_rep if date_format: return x.strftime(date_format) else: - # error: Item "NaTType" of "Union[NaTType, Any]" has no attribute "_date_repr" - # The underlying problem here is that mypy doesn't understand that NaT - # is a singleton, so that the check above excludes it here. - return x._date_repr # type: ignore[union-attr] + return x._date_repr def get_format_datetime64( diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index eefb619b0fd9f..f25951456e5b0 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -16,6 +16,7 @@ import numpy as np +from pandas._libs.tslibs import NaTType from pandas._typing import ( FilePath, ReadBuffer, @@ -139,7 +140,7 @@ """ -def _parse_date(datestr: str) -> datetime: +def _parse_date(datestr: str) -> datetime | NaTType: """Given a date in xport format, return Python date.""" try: # e.g. "16FEB11:10:07:55" diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 877467ea9350b..2a8d5f9b7c37f 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1,12 +1,17 @@ from datetime import datetime from functools import partial from io import StringIO +from typing import ( + List, + Union, +) import numpy as np import pytest import pytz from pandas._libs import lib +from pandas._libs.tslibs import NaTType from pandas.errors import UnsupportedFunctionCall import pandas as pd @@ -1286,7 +1291,7 @@ def test_resample_consistency(): tm.assert_series_equal(s10_2, rl) -dates1 = [ +dates1: List[Union[datetime, NaTType]] = [ datetime(2014, 10, 1), datetime(2014, 9, 3), datetime(2014, 11, 5), @@ -1295,7 +1300,9 @@ def test_resample_consistency(): datetime(2014, 7, 15), ] -dates2 = dates1[:2] + [pd.NaT] + dates1[2:4] + [pd.NaT] + dates1[4:] +dates2: List[Union[datetime, NaTType]] = ( + dates1[:2] + [pd.NaT] + dates1[2:4] + [pd.NaT] + dates1[4:] +) dates3 = [pd.NaT] + dates1 + [pd.NaT] # type: ignore[operator] From 5a7b312c2617bee5671dd811c93a1753c5e104f1 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Thu, 3 Mar 2022 09:27:40 -0500 Subject: [PATCH 7/8] put back code in core/arrays/datetimes.py --- pandas/core/arrays/datetimes.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b55e203fb409f..6ac0d14d4d583 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -775,6 +775,9 @@ def _add_offset(self, offset) -> DatetimeArray: def _sub_datetimelike_scalar(self, other): # subtract a datetime from myself, yielding a ndarray[timedelta64[ns]] assert isinstance(other, (datetime, np.datetime64)) + # error: Non-overlapping identity check (left operand type: "Union[datetime, + # datetime64]", right operand type: "NaTType") [comparison-overlap] + assert other is not NaT # type: ignore[comparison-overlap] other = Timestamp(other) # error: Non-overlapping identity check (left operand type: "Timestamp", # right operand type: "NaTType") From e79d2536094c0b5c64b50910343670d46965f1df Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Mon, 7 Mar 2022 12:49:06 -0500 Subject: [PATCH 8/8] Add DatetimeNaTType type Change test for NaT in io/formats/format.py --- pandas/_typing.py | 5 +++++ pandas/io/formats/format.py | 2 +- pandas/io/sas/sas_xport.py | 4 ++-- pandas/tests/resample/test_datetime_index.py | 11 ++++------- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index b897a4e8fe199..cabf0e8275d08 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -35,6 +35,7 @@ import numpy.typing as npt from pandas._libs import ( + NaTType, Period, Timedelta, Timestamp, @@ -308,3 +309,7 @@ def closed(self) -> bool: # Interval closed type IntervalClosedType = Literal["left", "right", "both", "neither"] + +# datetime and NaTType + +DatetimeNaTType = Union[datetime, "NaTType"] diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index baa9254461d62..609fc2a45aa21 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1767,7 +1767,7 @@ def _format_datetime64_dateonly( nat_rep: str = "NaT", date_format: str | None = None, ) -> str: - if x is NaT or isinstance(x, NaTType): + if isinstance(x, NaTType): return nat_rep if date_format: diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index f25951456e5b0..3b33529eb4aca 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -16,8 +16,8 @@ import numpy as np -from pandas._libs.tslibs import NaTType from pandas._typing import ( + DatetimeNaTType, FilePath, ReadBuffer, ) @@ -140,7 +140,7 @@ """ -def _parse_date(datestr: str) -> datetime | NaTType: +def _parse_date(datestr: str) -> DatetimeNaTType: """Given a date in xport format, return Python date.""" try: # e.g. "16FEB11:10:07:55" diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 2a8d5f9b7c37f..8a96643b9834f 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1,17 +1,14 @@ from datetime import datetime from functools import partial from io import StringIO -from typing import ( - List, - Union, -) +from typing import List import numpy as np import pytest import pytz from pandas._libs import lib -from pandas._libs.tslibs import NaTType +from pandas._typing import DatetimeNaTType from pandas.errors import UnsupportedFunctionCall import pandas as pd @@ -1291,7 +1288,7 @@ def test_resample_consistency(): tm.assert_series_equal(s10_2, rl) -dates1: List[Union[datetime, NaTType]] = [ +dates1: List[DatetimeNaTType] = [ datetime(2014, 10, 1), datetime(2014, 9, 3), datetime(2014, 11, 5), @@ -1300,7 +1297,7 @@ def test_resample_consistency(): datetime(2014, 7, 15), ] -dates2: List[Union[datetime, NaTType]] = ( +dates2: List[DatetimeNaTType] = ( dates1[:2] + [pd.NaT] + dates1[2:4] + [pd.NaT] + dates1[4:] ) dates3 = [pd.NaT] + dates1 + [pd.NaT] # type: ignore[operator]