diff --git a/pandas/core/array_algos/quantile.py b/pandas/core/array_algos/quantile.py index 5f9e67a484d24..efa36a5bd3ae9 100644 --- a/pandas/core/array_algos/quantile.py +++ b/pandas/core/array_algos/quantile.py @@ -133,8 +133,14 @@ def _quantile_ea_compat( if not is_sparse(orig.dtype): # shape[0] should be 1 as long as EAs are 1D - assert result.shape == (1, len(qs)), result.shape - result = type(orig)._from_factorized(result[0], orig) + + if orig.ndim == 2: + # i.e. DatetimeArray + result = type(orig)._from_factorized(result, orig) + + else: + assert result.shape == (1, len(qs)), result.shape + result = type(orig)._from_factorized(result[0], orig) # error: Incompatible return value type (got "ndarray", expected "ExtensionArray") return result # type: ignore[return-value] diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py index 6dfdc99f4fd9c..8318a02d5d214 100644 --- a/pandas/core/array_algos/take.py +++ b/pandas/core/array_algos/take.py @@ -3,6 +3,7 @@ import functools from typing import ( TYPE_CHECKING, + cast, overload, ) @@ -21,6 +22,7 @@ from pandas.core.construction import ensure_wrapped_if_datetimelike if TYPE_CHECKING: + from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.arrays.base import ExtensionArray @@ -89,7 +91,12 @@ def take_nd( if not isinstance(arr, np.ndarray): # i.e. ExtensionArray, - # includes for EA to catch DatetimeArray, TimedeltaArray + if arr.ndim == 2: + # e.g. DatetimeArray, TimedeltArray + arr = cast("NDArrayBackedExtensionArray", arr) + return arr.take( + indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis + ) return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) arr = np.asarray(arr) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 678e532f05772..427b3106ea10c 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -24,7 +24,10 @@ cache_readonly, doc, ) -from pandas.util._validators import validate_fillna_kwargs +from pandas.util._validators import ( + validate_bool_kwarg, + validate_fillna_kwargs, +) from pandas.core.dtypes.common import is_dtype_equal from pandas.core.dtypes.missing import array_equivalent @@ -39,6 +42,7 @@ from pandas.core.arrays.base import ExtensionArray from pandas.core.construction import extract_array from pandas.core.indexers import check_array_indexer +from pandas.core.sorting import nargminmax NDArrayBackedExtensionArrayT = TypeVar( "NDArrayBackedExtensionArrayT", bound="NDArrayBackedExtensionArray" @@ -189,6 +193,22 @@ def equals(self, other) -> bool: def _values_for_argsort(self): return self._ndarray + # Signature of "argmin" incompatible with supertype "ExtensionArray" + def argmin(self, axis: int = 0, skipna: bool = True): # type:ignore[override] + # override base class by adding axis keyword + validate_bool_kwarg(skipna, "skipna") + if not skipna and self.isna().any(): + raise NotImplementedError + return nargminmax(self, "argmin", axis=axis) + + # Signature of "argmax" incompatible with supertype "ExtensionArray" + def argmax(self, axis: int = 0, skipna: bool = True): # type:ignore[override] + # override base class by adding axis keyword + validate_bool_kwarg(skipna, "skipna") + if not skipna and self.isna().any(): + raise NotImplementedError + return nargminmax(self, "argmax", axis=axis) + def copy(self: NDArrayBackedExtensionArrayT) -> NDArrayBackedExtensionArrayT: new_data = self._ndarray.copy() return self._from_backing_data(new_data) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 46501c97cf38a..510bdfcb0079f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9544,6 +9544,9 @@ def func(values: np.ndarray): def blk_func(values, axis=1): if isinstance(values, ExtensionArray): + if values.ndim == 2: + # i.e. DatetimeArray, TimedeltaArray + return values._reduce(name, axis=1, skipna=skipna, **kwds) return values._reduce(name, skipna=skipna, **kwds) else: return op(values, axis=axis, skipna=skipna, **kwds) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 746e27c31f423..5442f90a25580 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -72,6 +72,7 @@ maybe_fill, ) +from pandas.core.arrays import ExtensionArray from pandas.core.base import SelectionMixin import pandas.core.common as com from pandas.core.frame import DataFrame @@ -267,7 +268,9 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0): group_keys = self._get_group_keys() result_values = None - if data.ndim == 2 and np.any(data.dtypes.apply(is_extension_array_dtype)): + if data.ndim == 2 and any( + isinstance(x, ExtensionArray) for x in data._iter_column_arrays() + ): # calling splitter.fast_apply will raise TypeError via apply_frame_axis0 # if we pass EA instead of ndarray # TODO: can we have a workaround for EAs backed by ndarray? diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 950d229c45f9e..dacacb4329d12 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -493,9 +493,12 @@ def apply_with_block(self: T, f, align_keys=None, swap_axis=True, **kwargs) -> T if isinstance(applied, list): applied = applied[0] arr = applied.values - if self.ndim == 2: - if isinstance(arr, np.ndarray): - arr = arr[0, :] + if self.ndim == 2 and arr.ndim == 2: + assert len(arr) == 1 + # error: Invalid index type "Tuple[int, slice]" for + # "Union[ndarray, ExtensionArray]"; expected type + # "Union[int, slice, ndarray]" + arr = arr[0, :] # type: ignore[index] result_arrays.append(arr) return type(self)(result_arrays, self._axes) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 09e214237b736..a77ea61d9e6de 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -27,7 +27,6 @@ writers, ) from pandas._libs.internals import BlockPlacement -from pandas._libs.tslibs import conversion from pandas._typing import ( ArrayLike, Dtype, @@ -47,7 +46,6 @@ maybe_downcast_numeric, maybe_downcast_to_dtype, maybe_upcast, - sanitize_to_nanoseconds, soft_convert_objects, ) from pandas.core.dtypes.common import ( @@ -938,7 +936,11 @@ def setitem(self, indexer, value): return self.coerce_to_target_dtype(value).setitem(indexer, value) if self.dtype.kind in ["m", "M"]: - arr = self.array_values.T + arr = self.values + if self.ndim > 1: + # Dont transpose with ndim=1 bc we would fail to invalidate + # arr.freq + arr = arr.T arr[indexer] = value return self @@ -1172,6 +1174,7 @@ def _interpolate_with_fill( limit_area=limit_area, ) + values = maybe_coerce_values(values) blocks = [self.make_block_same_class(values)] return self._maybe_downcast(blocks, downcast) @@ -1227,6 +1230,7 @@ def func(yvalues: np.ndarray) -> np.ndarray: # interp each column independently interp_values = np.apply_along_axis(func, axis, data) + interp_values = maybe_coerce_values(interp_values) blocks = [self.make_block_same_class(interp_values)] return self._maybe_downcast(blocks, downcast) @@ -1788,27 +1792,32 @@ class NDArrayBackedExtensionBlock(HybridMixin, Block): Block backed by an NDArrayBackedExtensionArray """ + values: NDArrayBackedExtensionArray + + @property + def is_view(self) -> bool: + """ return a boolean if I am possibly a view """ + # check the ndarray values of the DatetimeIndex values + return self.values._ndarray.base is not None + def internal_values(self): # Override to return DatetimeArray and TimedeltaArray - return self.array_values + return self.values def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray: """ return object dtype as boxed values, such as Timestamps/Timedelta """ - values = self.array_values + values = self.values if is_object_dtype(dtype): - # DTA/TDA constructor and astype can handle 2D - # error: "Callable[..., Any]" has no attribute "astype" - values = values.astype(object) # type: ignore[attr-defined] + values = values.astype(object) # TODO(EA2D): reshape not needed with 2D EAs return np.asarray(values).reshape(self.shape) def iget(self, key): # GH#31649 we need to wrap scalars in Timestamp/Timedelta # TODO(EA2D): this can be removed if we ever have 2D EA - # error: "Callable[..., Any]" has no attribute "reshape" - return self.array_values.reshape(self.shape)[key] # type: ignore[attr-defined] + return self.values.reshape(self.shape)[key] def putmask(self, mask, new) -> List[Block]: mask = extract_bool_array(mask) @@ -1817,16 +1826,13 @@ def putmask(self, mask, new) -> List[Block]: return self.astype(object).putmask(mask, new) # TODO(EA2D): reshape unnecessary with 2D EAs - # error: "Callable[..., Any]" has no attribute "reshape" - arr = self.array_values.reshape(self.shape) # type: ignore[attr-defined] - arr = cast("NDArrayBackedExtensionArray", arr) + arr = self.values.reshape(self.shape) arr.T.putmask(mask, new) return [self] def where(self, other, cond, errors="raise") -> List[Block]: # TODO(EA2D): reshape unnecessary with 2D EAs - # error: "Callable[..., Any]" has no attribute "reshape" - arr = self.array_values.reshape(self.shape) # type: ignore[attr-defined] + arr = self.values.reshape(self.shape) cond = extract_bool_array(cond) @@ -1837,7 +1843,6 @@ def where(self, other, cond, errors="raise") -> List[Block]: # TODO(EA2D): reshape not needed with 2D EAs res_values = res_values.reshape(self.values.shape) - res_values = maybe_coerce_values(res_values) nb = self.make_block_same_class(res_values) return [nb] @@ -1862,19 +1867,15 @@ def diff(self, n: int, axis: int = 0) -> List[Block]: by apply. """ # TODO(EA2D): reshape not necessary with 2D EAs - # error: "Callable[..., Any]" has no attribute "reshape" - values = self.array_values.reshape(self.shape) # type: ignore[attr-defined] + values = self.values.reshape(self.shape) new_values = values - values.shift(n, axis=axis) - new_values = maybe_coerce_values(new_values) return [self.make_block(new_values)] def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Block]: - # TODO(EA2D) this is unnecessary if these blocks are backed by 2D EA - # error: "Callable[..., Any]" has no attribute "reshape" - values = self.array_values.reshape(self.shape) # type: ignore[attr-defined] + # TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs + values = self.values.reshape(self.shape) new_values = values.shift(periods, fill_value=fill_value, axis=axis) - new_values = maybe_coerce_values(new_values) return [self.make_block_same_class(new_values)] def fillna( @@ -1887,38 +1888,27 @@ def fillna( # TODO: don't special-case td64 return self.astype(object).fillna(value, limit, inplace, downcast) - values = self.array_values - # error: "Callable[..., Any]" has no attribute "copy" - values = values if inplace else values.copy() # type: ignore[attr-defined] - # error: "Callable[..., Any]" has no attribute "fillna" - new_values = values.fillna( # type: ignore[attr-defined] - value=value, limit=limit - ) - new_values = maybe_coerce_values(new_values) + values = self.values + values = values if inplace else values.copy() + new_values = values.fillna(value=value, limit=limit) return [self.make_block_same_class(values=new_values)] class DatetimeLikeBlockMixin(NDArrayBackedExtensionBlock): """Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock.""" + values: Union[DatetimeArray, TimedeltaArray] + is_numeric = False @cache_readonly def array_values(self): - return ensure_wrapped_if_datetimelike(self.values) + return self.values class DatetimeBlock(DatetimeLikeBlockMixin): __slots__ = () - def set_inplace(self, locs, values): - """ - See Block.set.__doc__ - """ - values = conversion.ensure_datetime64ns(values, copy=False) - - self.values[locs] = values - class DatetimeTZBlock(ExtensionBlock, DatetimeLikeBlockMixin): """ implement a datetime64 block with a tz attribute """ @@ -1936,13 +1926,10 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeLikeBlockMixin): putmask = DatetimeLikeBlockMixin.putmask fillna = DatetimeLikeBlockMixin.fillna - array_values = ExtensionBlock.array_values - - @property - def is_view(self) -> bool: - """ return a boolean if I am possibly a view """ - # check the ndarray values of the DatetimeIndex values - return self.values._data.base is not None + # error: Incompatible types in assignment (expression has type + # "Callable[[NDArrayBackedExtensionBlock], bool]", base class "ExtensionBlock" + # defined the type as "bool") [assignment] + is_view = NDArrayBackedExtensionBlock.is_view # type: ignore[assignment] class TimeDeltaBlock(DatetimeLikeBlockMixin): @@ -2029,15 +2016,11 @@ def maybe_coerce_values(values) -> ArrayLike: values = extract_array(values, extract_numpy=True) if isinstance(values, np.ndarray): - values = sanitize_to_nanoseconds(values) + values = ensure_wrapped_if_datetimelike(values) if issubclass(values.dtype.type, str): values = np.array(values, dtype=object) - elif isinstance(values.dtype, np.dtype): - # i.e. not datetime64tz, extract DTA/TDA -> ndarray - values = values._data - return values diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index d5e549ec874da..02d582c439ea2 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -423,10 +423,17 @@ def _concatenate_join_units( concat_values = concat_values.copy() else: concat_values = concat_values.copy() - elif any(isinstance(t, ExtensionArray) for t in to_concat): + elif any(isinstance(t, ExtensionArray) and t.ndim == 1 for t in to_concat): # concatting with at least one EA means we are concatting a single column # the non-EA values are 2D arrays with shape (1, n) - to_concat = [t if isinstance(t, ExtensionArray) else t[0, :] for t in to_concat] + # error: Invalid index type "Tuple[int, slice]" for + # "Union[ExtensionArray, ndarray]"; expected type "Union[int, slice, ndarray]" + to_concat = [ + t + if (isinstance(t, ExtensionArray) and t.ndim == 1) + else t[0, :] # type: ignore[index] + for t in to_concat + ] concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True) concat_values = ensure_block_shape(concat_values, 2) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 856bd7d159c71..6024c083fcc6b 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -398,7 +398,7 @@ def nargsort( return indexer -def nargminmax(values, method: str): +def nargminmax(values, method: str, axis: int = 0): """ Implementation of np.argmin/argmax but for ExtensionArray and which handles missing values. @@ -407,6 +407,7 @@ def nargminmax(values, method: str): ---------- values : ExtensionArray method : {"argmax", "argmin"} + axis: int, default 0 Returns ------- @@ -418,7 +419,23 @@ def nargminmax(values, method: str): mask = np.asarray(isna(values)) values = values._values_for_argsort() - idx = np.arange(len(values)) + if values.ndim > 1: + if mask.any(): + if axis == 1: + zipped = zip(values, mask) + else: + zipped = zip(values.T, mask.T) + return np.array([_nanargminmax(v, m, func) for v, m in zipped]) + return func(values, axis=axis) + + return _nanargminmax(values, mask, func) + + +def _nanargminmax(values, mask, func) -> int: + """ + See nanargminmax.__doc__. + """ + idx = np.arange(values.shape[0]) non_nans = values[~mask] non_nan_idx = idx[~mask] diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 99931123b0c81..8658bb654b787 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3963,7 +3963,9 @@ def _create_axes( typ = klass._get_atom(data_converted) kind = _dtype_to_kind(data_converted.dtype.name) - tz = _get_tz(data_converted.tz) if hasattr(data_converted, "tz") else None + tz = None + if getattr(data_converted, "tz", None) is not None: + tz = _get_tz(data_converted.tz) meta = metadata = ordered = None if is_categorical_dtype(data_converted.dtype): diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index f75b3800f623f..215b51dd88ef4 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1968,6 +1968,7 @@ def test_operators_datetimelike_with_timezones(self): td1 = Series(pd.timedelta_range("1 days 1 min", periods=5, freq="H")) td2 = td1.copy() td2.iloc[1] = np.nan + assert td2._values.freq is None result = dt1 + td1[0] exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index a4fedd9a4c5da..6c1161294dd17 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -30,6 +30,7 @@ import numpy as np +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.core.dtypes.common import pandas_dtype import pandas as pd @@ -217,11 +218,9 @@ def _values_for_factorize(self): return frozen, () def _values_for_argsort(self): - # Disable NumPy's shape inference by including an empty tuple... - # If all the elements of self are the same size P, NumPy will - # cast them to an (N, P) array, instead of an (N,) array of tuples. - frozen = [()] + [tuple(x.items()) for x in self] - return np.array(frozen, dtype=object)[1:] + # Bypass NumPy's shape inference to get a (N,) array of tuples. + frozen = [tuple(x.items()) for x in self] + return construct_1d_object_array_from_listlike(frozen) def make_data(): diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index b294c97409951..616405c01cc2a 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1003,6 +1003,43 @@ def test_idxmax_mixed_dtype(self): expected = Series([0, 2, 0], index=[1, 2, 3]) tm.assert_series_equal(result, expected) + # with NaTs + df.loc[0, 3] = pd.NaT + result = df.idxmax() + expected = Series([1, 0, 2], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + result = df.idxmin() + expected = Series([0, 2, 1], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + # with multi-column dt64 block + df[4] = dti[::-1] + df._consolidate_inplace() + + result = df.idxmax() + expected = Series([1, 0, 2, 0], index=[1, 2, 3, 4]) + tm.assert_series_equal(result, expected) + + result = df.idxmin() + expected = Series([0, 2, 1, 2], index=[1, 2, 3, 4]) + tm.assert_series_equal(result, expected) + + def test_idxmax_dt64_multicolumn_axis1(self): + dti = date_range("2016-01-01", periods=3) + df = DataFrame({3: dti, 4: dti[::-1]}) + df.iloc[0, 0] = pd.NaT + + df._consolidate_inplace() + + result = df.idxmax(axis=1) + expected = Series([4, 3, 3]) + tm.assert_series_equal(result, expected) + + result = df.idxmin(axis=1) + expected = Series([4, 3, 4]) + tm.assert_series_equal(result, expected) + # ---------------------------------------------------------------------- # Logical reductions diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index eb54887cea277..117612696df11 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1008,11 +1008,8 @@ def test_apply_function_with_indexing_return_column(): tm.assert_frame_equal(result, expected) -def test_apply_with_timezones_aware(using_array_manager, request): +def test_apply_with_timezones_aware(): # GH: 27212 - if not using_array_manager: - request.node.add_marker(pytest.mark.xfail(reason="GH-34998")) - dates = ["2001-01-01"] * 2 + ["2001-01-02"] * 2 + ["2001-01-03"] * 2 index_no_tz = pd.DatetimeIndex(dates) index_tz = pd.DatetimeIndex(dates, tz="UTC") diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 12247e2445295..de508b8cd78ec 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -840,12 +840,7 @@ def test_omit_nuisance(df): # won't work with axis = 1 grouped = df.groupby({"A": 0, "C": 0, "D": 1, "E": 1}, axis=1) - msg = "|".join( - [ - "reduction operation 'sum' not allowed for this dtype", - "'DatetimeArray' does not implement reduction 'sum'", - ] - ) + msg = "'DatetimeArray' does not implement reduction 'sum'" with pytest.raises(TypeError, match=msg): grouped.agg(lambda x: x.sum(0, numeric_only=False)) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index fc06b85b1f954..c242623520b75 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -27,7 +27,10 @@ ) import pandas._testing as tm import pandas.core.algorithms as algos -from pandas.core.arrays import SparseArray +from pandas.core.arrays import ( + DatetimeArray, + SparseArray, +) from pandas.core.internals import ( BlockManager, SingleBlockManager, @@ -438,13 +441,26 @@ def test_copy(self, mgr): cp = mgr.copy(deep=True) for blk, cp_blk in zip(mgr.blocks, cp.blocks): + bvals = blk.values + cpvals = cp_blk.values + + tm.assert_equal(cpvals, bvals) + + if isinstance(cpvals, np.ndarray): + lbase = cpvals.base + rbase = bvals.base + else: + lbase = cpvals._ndarray.base + rbase = bvals._ndarray.base + # copy assertion we either have a None for a base or in case of # some blocks it is an array (e.g. datetimetz), but was copied - tm.assert_equal(cp_blk.values, blk.values) - if not isinstance(cp_blk.values, np.ndarray): - assert cp_blk.values._data.base is not blk.values._data.base + if isinstance(cpvals, DatetimeArray): + assert (lbase is None and rbase is None) or (lbase is not rbase) + elif not isinstance(cpvals, np.ndarray): + assert lbase is not rbase else: - assert cp_blk.values.base is None and blk.values.base is None + assert lbase is None and rbase is None def test_sparse(self): mgr = create_mgr("a: sparse-1; b: sparse-2")