From 0ae017e0afd077d2825b51b6d0e376798862af0b Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 30 Dec 2020 14:52:47 -0800 Subject: [PATCH 1/3] avoid unnecessary check --- pandas/core/dtypes/concat.py | 2 +- pandas/core/frame.py | 6 +++--- pandas/core/internals/blocks.py | 12 +++++++----- pandas/core/internals/concat.py | 17 ++++++++++------- pandas/core/tools/numeric.py | 2 +- pandas/tests/plotting/frame/test_frame.py | 4 ++-- 6 files changed, 24 insertions(+), 19 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index a9355e30cd3c2..aea9029972de6 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -89,7 +89,7 @@ def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike: # wrap datetime-likes in EA to ensure astype(object) gives Timestamp/Timedelta # this can happen when concat_compat is called directly on arrays (when arrays # are not coming from Index/Series._values), eg in BlockManager.quantile - arr = array(arr) + arr = ensure_wrapped_if_datetimelike(arr) if is_extension_array_dtype(dtype): if isinstance(arr, np.ndarray): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cc89823cd7817..03d439bd461da 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9600,7 +9600,7 @@ def _from_nested_dict(data) -> collections.defaultdict: return new_data -def _reindex_for_setitem(value, index: Index): +def _reindex_for_setitem(value: FrameOrSeriesUnion, index: Index) -> ArrayLike: # reindex if necessary if value.index.equals(index) or not len(index): @@ -9608,7 +9608,7 @@ def _reindex_for_setitem(value, index: Index): # GH#4107 try: - value = value.reindex(index)._values + reindexed_value = value.reindex(index)._values except ValueError as err: # raised in MultiIndex.from_tuples, see test_insert_error_msmgs if not value.index.is_unique: @@ -9618,7 +9618,7 @@ def _reindex_for_setitem(value, index: Index): raise TypeError( "incompatible index of inserted column with frame index" ) from err - return value + return reindexed_value def _maybe_atleast_2d(value): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 38976ee632419..17054934bc9e7 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2583,7 +2583,7 @@ def _block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: return values -def safe_reshape(arr, new_shape: Shape): +def safe_reshape(arr: ArrayLike, new_shape: Shape) -> ArrayLike: """ If possible, reshape `arr` to have shape `new_shape`, with a couple of exceptions (see gh-13012): @@ -2595,11 +2595,13 @@ def safe_reshape(arr, new_shape: Shape): Parameters ---------- - arr : array-like, object to be reshaped - new_shape : int or tuple of ints, the new shape + arr : np.ndarray or ExtensionArray + new_shape : Tuple[int] + + Returns + ------- + np.ndarray or ExtensionArray """ - if isinstance(arr, ABCSeries): - arr = arr._values if not is_extension_array_dtype(arr.dtype): # Note: this will include TimedeltaArray and tz-naive DatetimeArray # TODO(EA2D): special case will be unnecessary with 2D EAs diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index dd3a04ccb38e2..013e52248f5c4 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -5,7 +5,7 @@ import numpy as np from pandas._libs import NaT, internals as libinternals -from pandas._typing import DtypeObj, Shape +from pandas._typing import ArrayLike, DtypeObj, Shape from pandas.util._decorators import cache_readonly from pandas.core.dtypes.cast import maybe_promote @@ -29,11 +29,12 @@ from pandas.core.internals.managers import BlockManager if TYPE_CHECKING: + from pandas import Index from pandas.core.arrays.sparse.dtype import SparseDtype def concatenate_block_managers( - mgrs_indexers, axes, concat_axis: int, copy: bool + mgrs_indexers, axes: List["Index"], concat_axis: int, copy: bool ) -> BlockManager: """ Concatenate block managers into one. @@ -96,7 +97,7 @@ def concatenate_block_managers( return BlockManager(blocks, axes) -def _get_mgr_concatenation_plan(mgr, indexers): +def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: Dict[int, np.ndarray]): """ Construct concatenation plan for given block manager and indexers. @@ -235,7 +236,7 @@ def is_na(self) -> bool: return isna_all(values_flat) - def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na): + def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: if upcasted_na is None: # No upcasting is necessary fill_value = self.block.fill_value @@ -307,7 +308,9 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na): return values -def _concatenate_join_units(join_units, concat_axis, copy): +def _concatenate_join_units( + join_units: List[JoinUnit], concat_axis: int, copy: bool +) -> ArrayLike: """ Concatenate values from several join units along selected axis. """ @@ -513,7 +516,7 @@ def _is_uniform_reindex(join_units) -> bool: ) -def _trim_join_unit(join_unit, length): +def _trim_join_unit(join_unit: JoinUnit, length: int) -> JoinUnit: """ Reduce join_unit's shape along item axis to length. @@ -540,7 +543,7 @@ def _trim_join_unit(join_unit, length): return JoinUnit(block=extra_block, indexers=extra_indexers, shape=extra_shape) -def _combine_concat_plans(plans, concat_axis): +def _combine_concat_plans(plans, concat_axis: int): """ Combine multiple concatenation plans into one. diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 1389aba9525d3..d8a5855d05dfd 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -171,7 +171,7 @@ def to_numeric(arg, errors="raise", downcast=None): if is_numeric_dtype(values_dtype): pass elif is_datetime_or_timedelta_dtype(values_dtype): - values = values.astype(np.int64) + values = values.view(np.int64) else: values = ensure_object(values) coerce_numeric = errors not in ("ignore", "raise") diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 31a16e21b7ac4..a6aa6c02d1a79 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -156,8 +156,8 @@ def test_nullable_int_plot(self): "A": [1, 2, 3, 4, 5], "B": [1.0, 2.0, 3.0, 4.0, 5.0], "C": [7, 5, np.nan, 3, 2], - "D": pd.to_datetime(dates, format="%Y"), - "E": pd.to_datetime(dates, format="%Y", utc=True), + "D": pd.to_datetime(dates, format="%Y").view("i8"), + "E": pd.to_datetime(dates, format="%Y", utc=True).view("i8"), }, dtype=np.int64, ) From 3d99f302e64c2cdeb397c11ed6699e576ad4ce22 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 30 Dec 2020 17:57:34 -0800 Subject: [PATCH 2/3] CLN: remove _maybe_reshape_where_args --- pandas/core/internals/blocks.py | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 17054934bc9e7..b934bedc1f01c 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1274,22 +1274,6 @@ def shift(self, periods: int, axis: int = 0, fill_value=None): return [self.make_block(new_values)] - def _maybe_reshape_where_args(self, values, other, cond, axis): - transpose = self.ndim == 2 - - cond = _extract_bool_array(cond) - - # If the default broadcasting would go in the wrong direction, then - # explicitly reshape other instead - if getattr(other, "ndim", 0) >= 1: - if values.ndim - 1 == other.ndim and axis == 1: - other = other.reshape(tuple(other.shape + (1,))) - elif transpose and values.ndim == self.ndim - 1: - # TODO(EA2D): not neceesssary with 2D EAs - cond = cond.T - - return other, cond - def where( self, other, cond, errors="raise", try_cast: bool = False, axis: int = 0 ) -> List["Block"]: @@ -1322,7 +1306,7 @@ def where( if transpose: values = values.T - other, cond = self._maybe_reshape_where_args(values, other, cond, axis) + cond = _extract_bool_array(cond) if cond.ravel("K").all(): result = values @@ -2081,7 +2065,7 @@ def where( # TODO(EA2D): reshape unnecessary with 2D EAs arr = self.array_values().reshape(self.shape) - other, cond = self._maybe_reshape_where_args(arr, other, cond, axis) + cond = _extract_bool_array(cond) try: res_values = arr.T.where(cond, other).T From 4850c5da891e2dd746690b33283166580d33c0f8 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 31 Dec 2020 08:19:36 -0800 Subject: [PATCH 3/3] update docstring --- pandas/core/internals/blocks.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index b934bedc1f01c..2fd78ce66a282 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2569,13 +2569,8 @@ def _block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: def safe_reshape(arr: ArrayLike, new_shape: Shape) -> ArrayLike: """ - If possible, reshape `arr` to have shape `new_shape`, - with a couple of exceptions (see gh-13012): - - 1) If `arr` is a ExtensionArray or Index, `arr` will be - returned as is. - 2) If `arr` is a Series, the `_values` attribute will - be reshaped and returned. + Reshape `arr` to have shape `new_shape`, unless it is an ExtensionArray, + in which case it will be returned unchanged (see gh-13012). Parameters ----------