From 1781ee5b28376c1dc54e1191bd307f49220596ae Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 6 May 2021 10:47:17 -0700 Subject: [PATCH 1/2] REF: preserve Index dtype in BlockManager._combine --- pandas/core/internals/array_manager.py | 6 +----- pandas/core/internals/managers.py | 17 +++++++++++------ pandas/tests/generic/test_generic.py | 8 ++++---- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 71e6d14e6a716..dd02771f735a6 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -274,9 +274,6 @@ def apply( else: new_axes = self._axes - if len(result_arrays) == 0: - return self.make_empty(new_axes) - # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]"; # expected "List[Union[ndarray, ExtensionArray]]" return type(self)(result_arrays, new_axes) # type: ignore[arg-type] @@ -487,7 +484,7 @@ def _get_data_subset(self: T, predicate: Callable) -> T: indices = [i for i, arr in enumerate(self.arrays) if predicate(arr)] arrays = [self.arrays[i] for i in indices] # TODO copy? - new_axes = [self._axes[0], self._axes[1][np.array(indices, dtype="int64")]] + new_axes = [self._axes[0], self._axes[1][np.array(indices, dtype="intp")]] return type(self)(arrays, new_axes, verify_integrity=False) def get_bool_data(self: T, copy: bool = False) -> T: @@ -696,7 +693,6 @@ def _equal_values(self, other) -> bool: return True # TODO - # equals # to_dict diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 73f463997c085..a5b604868fb5d 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -345,9 +345,6 @@ def apply( if ignore_failures: return self._combine(result_blocks) - if len(result_blocks) == 0: - return self.make_empty(self.axes) - return type(self).from_blocks(result_blocks, self.axes) def where(self: T, other, cond, align: bool, errors: str) -> T: @@ -532,7 +529,15 @@ def _combine( ) -> T: """ return a new manager with the blocks """ if len(blocks) == 0: - return self.make_empty() + if self.ndim == 2: + # retain our own Index dtype + if index is not None: + axes = [self.items[:0], index] + else: + axes = [self.items[:0]] + self.axes[1:] + else: + axes = None + return self.make_empty(axes) # FIXME: optimization potential indexer = np.sort(np.concatenate([b.mgr_locs.as_array for b in blocks])) @@ -1233,7 +1238,7 @@ def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T: index = Index(range(result_blocks[0].values.shape[-1])) if ignore_failures: - return self._combine(result_blocks, index=index) + return self._combine(result_blocks, copy=False, index=index) return type(self).from_blocks(result_blocks, [self.axes[0], index]) @@ -1270,7 +1275,7 @@ def reduce( new_mgr = self._combine(res_blocks, copy=False, index=index) else: indexer = [] - new_mgr = type(self).from_blocks([], [Index([]), index]) + new_mgr = type(self).from_blocks([], [self.items[:0], index]) else: indexer = np.arange(self.shape[0]) new_mgr = type(self).from_blocks(res_blocks, [self.items, index]) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index d07f843f4acfc..771d31aa6865b 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -85,7 +85,7 @@ def test_rename(self): # multiple axes at once - def test_get_numeric_data(self, using_array_manager): + def test_get_numeric_data(self): n = 4 kwargs = { @@ -100,9 +100,9 @@ def test_get_numeric_data(self, using_array_manager): # non-inclusion result = o._get_bool_data() expected = self._construct(n, value="empty", **kwargs) - if using_array_manager and isinstance(o, DataFrame): - # INFO(ArrayManager) preserve the dtype of the columns Index - expected.columns = expected.columns.astype("int64") + if isinstance(o, DataFrame): + # preserve columns dtype + expected.columns = o.columns[:0] self._compare(result, expected) # get the bool data From 5dda642d69e79ac6d1310ba5b3eff58be440c44f Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 6 May 2021 11:44:40 -0700 Subject: [PATCH 2/2] mypy fixup --- pandas/core/internals/managers.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index a5b604868fb5d..cdb7d8a6ccd45 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -535,9 +535,8 @@ def _combine( axes = [self.items[:0], index] else: axes = [self.items[:0]] + self.axes[1:] - else: - axes = None - return self.make_empty(axes) + return self.make_empty(axes) + return self.make_empty() # FIXME: optimization potential indexer = np.sort(np.concatenate([b.mgr_locs.as_array for b in blocks]))