Skip to content

REF: back DatetimeBlock, TimedeltaBlock by DTA/TDA #40456

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Mar 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions pandas/core/array_algos/quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,14 @@ def _quantile_ea_compat(

if not is_sparse(orig.dtype):
# shape[0] should be 1 as long as EAs are 1D
assert result.shape == (1, len(qs)), result.shape
result = type(orig)._from_factorized(result[0], orig)

if orig.ndim == 2:
# i.e. DatetimeArray
result = type(orig)._from_factorized(result, orig)

else:
assert result.shape == (1, len(qs)), result.shape
result = type(orig)._from_factorized(result[0], orig)

# error: Incompatible return value type (got "ndarray", expected "ExtensionArray")
return result # type: ignore[return-value]
9 changes: 8 additions & 1 deletion pandas/core/array_algos/take.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import functools
from typing import (
TYPE_CHECKING,
cast,
overload,
)

Expand All @@ -21,6 +22,7 @@
from pandas.core.construction import ensure_wrapped_if_datetimelike

if TYPE_CHECKING:
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
from pandas.core.arrays.base import ExtensionArray


Expand Down Expand Up @@ -89,7 +91,12 @@ def take_nd(

if not isinstance(arr, np.ndarray):
# i.e. ExtensionArray,
# includes for EA to catch DatetimeArray, TimedeltaArray
if arr.ndim == 2:
# e.g. DatetimeArray, TimedeltArray
arr = cast("NDArrayBackedExtensionArray", arr)
return arr.take(
indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis
)
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)

arr = np.asarray(arr)
Expand Down
22 changes: 21 additions & 1 deletion pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@
cache_readonly,
doc,
)
from pandas.util._validators import validate_fillna_kwargs
from pandas.util._validators import (
validate_bool_kwarg,
validate_fillna_kwargs,
)

from pandas.core.dtypes.common import is_dtype_equal
from pandas.core.dtypes.missing import array_equivalent
Expand All @@ -39,6 +42,7 @@
from pandas.core.arrays.base import ExtensionArray
from pandas.core.construction import extract_array
from pandas.core.indexers import check_array_indexer
from pandas.core.sorting import nargminmax

NDArrayBackedExtensionArrayT = TypeVar(
"NDArrayBackedExtensionArrayT", bound="NDArrayBackedExtensionArray"
Expand Down Expand Up @@ -189,6 +193,22 @@ def equals(self, other) -> bool:
def _values_for_argsort(self):
return self._ndarray

# Signature of "argmin" incompatible with supertype "ExtensionArray"
def argmin(self, axis: int = 0, skipna: bool = True): # type:ignore[override]
# override base class by adding axis keyword
validate_bool_kwarg(skipna, "skipna")
if not skipna and self.isna().any():
raise NotImplementedError
return nargminmax(self, "argmin", axis=axis)

# Signature of "argmax" incompatible with supertype "ExtensionArray"
def argmax(self, axis: int = 0, skipna: bool = True): # type:ignore[override]
# override base class by adding axis keyword
validate_bool_kwarg(skipna, "skipna")
if not skipna and self.isna().any():
raise NotImplementedError
return nargminmax(self, "argmax", axis=axis)

def copy(self: NDArrayBackedExtensionArrayT) -> NDArrayBackedExtensionArrayT:
new_data = self._ndarray.copy()
return self._from_backing_data(new_data)
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9544,6 +9544,9 @@ def func(values: np.ndarray):

def blk_func(values, axis=1):
if isinstance(values, ExtensionArray):
if values.ndim == 2:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isn't this just axis=axis ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure. next pass

# i.e. DatetimeArray, TimedeltaArray
return values._reduce(name, axis=1, skipna=skipna, **kwds)
return values._reduce(name, skipna=skipna, **kwds)
else:
return op(values, axis=axis, skipna=skipna, **kwds)
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
maybe_fill,
)

from pandas.core.arrays import ExtensionArray
from pandas.core.base import SelectionMixin
import pandas.core.common as com
from pandas.core.frame import DataFrame
Expand Down Expand Up @@ -267,7 +268,9 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
group_keys = self._get_group_keys()
result_values = None

if data.ndim == 2 and np.any(data.dtypes.apply(is_extension_array_dtype)):
if data.ndim == 2 and any(
isinstance(x, ExtensionArray) for x in data._iter_column_arrays()
):
# calling splitter.fast_apply will raise TypeError via apply_frame_axis0
# if we pass EA instead of ndarray
# TODO: can we have a workaround for EAs backed by ndarray?
Expand Down
9 changes: 6 additions & 3 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,9 +493,12 @@ def apply_with_block(self: T, f, align_keys=None, swap_axis=True, **kwargs) -> T
if isinstance(applied, list):
applied = applied[0]
arr = applied.values
if self.ndim == 2:
if isinstance(arr, np.ndarray):
arr = arr[0, :]
if self.ndim == 2 and arr.ndim == 2:
assert len(arr) == 1
# error: Invalid index type "Tuple[int, slice]" for
# "Union[ndarray, ExtensionArray]"; expected type
# "Union[int, slice, ndarray]"
arr = arr[0, :] # type: ignore[index]
result_arrays.append(arr)

return type(self)(result_arrays, self._axes)
Expand Down
87 changes: 35 additions & 52 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
writers,
)
from pandas._libs.internals import BlockPlacement
from pandas._libs.tslibs import conversion
from pandas._typing import (
ArrayLike,
Dtype,
Expand All @@ -47,7 +46,6 @@
maybe_downcast_numeric,
maybe_downcast_to_dtype,
maybe_upcast,
sanitize_to_nanoseconds,
soft_convert_objects,
)
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -938,7 +936,11 @@ def setitem(self, indexer, value):
return self.coerce_to_target_dtype(value).setitem(indexer, value)

if self.dtype.kind in ["m", "M"]:
arr = self.array_values.T
arr = self.values
if self.ndim > 1:
# Dont transpose with ndim=1 bc we would fail to invalidate
# arr.freq
arr = arr.T
arr[indexer] = value
return self

Expand Down Expand Up @@ -1172,6 +1174,7 @@ def _interpolate_with_fill(
limit_area=limit_area,
)

values = maybe_coerce_values(values)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prob can have maybe_coerce_values do downcasting (maybe)

blocks = [self.make_block_same_class(values)]
return self._maybe_downcast(blocks, downcast)

Expand Down Expand Up @@ -1227,6 +1230,7 @@ def func(yvalues: np.ndarray) -> np.ndarray:

# interp each column independently
interp_values = np.apply_along_axis(func, axis, data)
interp_values = maybe_coerce_values(interp_values)

blocks = [self.make_block_same_class(interp_values)]
return self._maybe_downcast(blocks, downcast)
Expand Down Expand Up @@ -1788,27 +1792,32 @@ class NDArrayBackedExtensionBlock(HybridMixin, Block):
Block backed by an NDArrayBackedExtensionArray
"""

values: NDArrayBackedExtensionArray

@property
def is_view(self) -> bool:
""" return a boolean if I am possibly a view """
# check the ndarray values of the DatetimeIndex values
return self.values._ndarray.base is not None

def internal_values(self):
# Override to return DatetimeArray and TimedeltaArray
return self.array_values
return self.values

def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray:
"""
return object dtype as boxed values, such as Timestamps/Timedelta
"""
values = self.array_values
values = self.values
if is_object_dtype(dtype):
# DTA/TDA constructor and astype can handle 2D
# error: "Callable[..., Any]" has no attribute "astype"
values = values.astype(object) # type: ignore[attr-defined]
values = values.astype(object)
# TODO(EA2D): reshape not needed with 2D EAs
return np.asarray(values).reshape(self.shape)

def iget(self, key):
# GH#31649 we need to wrap scalars in Timestamp/Timedelta
# TODO(EA2D): this can be removed if we ever have 2D EA
# error: "Callable[..., Any]" has no attribute "reshape"
return self.array_values.reshape(self.shape)[key] # type: ignore[attr-defined]
return self.values.reshape(self.shape)[key]

def putmask(self, mask, new) -> List[Block]:
mask = extract_bool_array(mask)
Expand All @@ -1817,16 +1826,13 @@ def putmask(self, mask, new) -> List[Block]:
return self.astype(object).putmask(mask, new)

# TODO(EA2D): reshape unnecessary with 2D EAs
# error: "Callable[..., Any]" has no attribute "reshape"
arr = self.array_values.reshape(self.shape) # type: ignore[attr-defined]
arr = cast("NDArrayBackedExtensionArray", arr)
arr = self.values.reshape(self.shape)
arr.T.putmask(mask, new)
return [self]

def where(self, other, cond, errors="raise") -> List[Block]:
# TODO(EA2D): reshape unnecessary with 2D EAs
# error: "Callable[..., Any]" has no attribute "reshape"
arr = self.array_values.reshape(self.shape) # type: ignore[attr-defined]
arr = self.values.reshape(self.shape)

cond = extract_bool_array(cond)

Expand All @@ -1837,7 +1843,6 @@ def where(self, other, cond, errors="raise") -> List[Block]:

# TODO(EA2D): reshape not needed with 2D EAs
res_values = res_values.reshape(self.values.shape)
res_values = maybe_coerce_values(res_values)
nb = self.make_block_same_class(res_values)
return [nb]

Expand All @@ -1862,19 +1867,15 @@ def diff(self, n: int, axis: int = 0) -> List[Block]:
by apply.
"""
# TODO(EA2D): reshape not necessary with 2D EAs
# error: "Callable[..., Any]" has no attribute "reshape"
values = self.array_values.reshape(self.shape) # type: ignore[attr-defined]
values = self.values.reshape(self.shape)

new_values = values - values.shift(n, axis=axis)
new_values = maybe_coerce_values(new_values)
return [self.make_block(new_values)]

def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Block]:
# TODO(EA2D) this is unnecessary if these blocks are backed by 2D EA
# error: "Callable[..., Any]" has no attribute "reshape"
values = self.array_values.reshape(self.shape) # type: ignore[attr-defined]
# TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs
values = self.values.reshape(self.shape)
new_values = values.shift(periods, fill_value=fill_value, axis=axis)
new_values = maybe_coerce_values(new_values)
return [self.make_block_same_class(new_values)]

def fillna(
Expand All @@ -1887,38 +1888,27 @@ def fillna(
# TODO: don't special-case td64
return self.astype(object).fillna(value, limit, inplace, downcast)

values = self.array_values
# error: "Callable[..., Any]" has no attribute "copy"
values = values if inplace else values.copy() # type: ignore[attr-defined]
# error: "Callable[..., Any]" has no attribute "fillna"
new_values = values.fillna( # type: ignore[attr-defined]
value=value, limit=limit
)
new_values = maybe_coerce_values(new_values)
values = self.values
values = values if inplace else values.copy()
new_values = values.fillna(value=value, limit=limit)
return [self.make_block_same_class(values=new_values)]


class DatetimeLikeBlockMixin(NDArrayBackedExtensionBlock):
"""Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock."""

values: Union[DatetimeArray, TimedeltaArray]

is_numeric = False

@cache_readonly
def array_values(self):
return ensure_wrapped_if_datetimelike(self.values)
return self.values


class DatetimeBlock(DatetimeLikeBlockMixin):
__slots__ = ()

def set_inplace(self, locs, values):
"""
See Block.set.__doc__
"""
values = conversion.ensure_datetime64ns(values, copy=False)

self.values[locs] = values


class DatetimeTZBlock(ExtensionBlock, DatetimeLikeBlockMixin):
""" implement a datetime64 block with a tz attribute """
Expand All @@ -1936,13 +1926,10 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeLikeBlockMixin):
putmask = DatetimeLikeBlockMixin.putmask
fillna = DatetimeLikeBlockMixin.fillna

array_values = ExtensionBlock.array_values

@property
def is_view(self) -> bool:
""" return a boolean if I am possibly a view """
# check the ndarray values of the DatetimeIndex values
return self.values._data.base is not None
# error: Incompatible types in assignment (expression has type
# "Callable[[NDArrayBackedExtensionBlock], bool]", base class "ExtensionBlock"
# defined the type as "bool") [assignment]
is_view = NDArrayBackedExtensionBlock.is_view # type: ignore[assignment]


class TimeDeltaBlock(DatetimeLikeBlockMixin):
Expand Down Expand Up @@ -2029,15 +2016,11 @@ def maybe_coerce_values(values) -> ArrayLike:
values = extract_array(values, extract_numpy=True)

if isinstance(values, np.ndarray):
values = sanitize_to_nanoseconds(values)
values = ensure_wrapped_if_datetimelike(values)

if issubclass(values.dtype.type, str):
values = np.array(values, dtype=object)

elif isinstance(values.dtype, np.dtype):
# i.e. not datetime64tz, extract DTA/TDA -> ndarray
values = values._data

return values


Expand Down
11 changes: 9 additions & 2 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,10 +423,17 @@ def _concatenate_join_units(
concat_values = concat_values.copy()
else:
concat_values = concat_values.copy()
elif any(isinstance(t, ExtensionArray) for t in to_concat):
elif any(isinstance(t, ExtensionArray) and t.ndim == 1 for t in to_concat):
# concatting with at least one EA means we are concatting a single column
# the non-EA values are 2D arrays with shape (1, n)
to_concat = [t if isinstance(t, ExtensionArray) else t[0, :] for t in to_concat]
# error: Invalid index type "Tuple[int, slice]" for
# "Union[ExtensionArray, ndarray]"; expected type "Union[int, slice, ndarray]"
to_concat = [
t
if (isinstance(t, ExtensionArray) and t.ndim == 1)
else t[0, :] # type: ignore[index]
for t in to_concat
]
concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True)
concat_values = ensure_block_shape(concat_values, 2)

Expand Down
Loading