Skip to content

PERF/REF: require BlockPlacement in Block.__init__ #40361

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 33 commits into from
Mar 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
a8459ae
REF: de-duplicate Block.__init__
jbrockmendel Nov 28, 2020
cbb6c19
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Nov 28, 2020
e105769
deprecate allowing ndim=None
jbrockmendel Nov 28, 2020
766e481
catch warnings in parquet docs
jbrockmendel Nov 28, 2020
db17e6d
okwarning
jbrockmendel Nov 28, 2020
b93c82f
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Nov 29, 2020
5b70d6e
FutureWarning -> DeprecationWarning
jbrockmendel Nov 29, 2020
52c122c
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Dec 2, 2020
cb371ff
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Dec 3, 2020
16aa603
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Dec 9, 2020
724fc56
catch warnings from pyarrow
jbrockmendel Dec 9, 2020
c6ba826
post-merge fixup
jbrockmendel Dec 9, 2020
0a14bb5
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Dec 17, 2020
8dc0f94
update stacklevel
jbrockmendel Dec 17, 2020
60a8649
CLN: BlockManager.get_slice require only slice arg
jbrockmendel Mar 6, 2021
91436a5
Merge branch 'master' into ref-slice
jbrockmendel Mar 6, 2021
b17ad65
mypy fixup
jbrockmendel Mar 6, 2021
9fa658f
Merge branch 'master' into ref-slice
jbrockmendel Mar 6, 2021
5afce04
PERF: implement Index._getitem_slice
jbrockmendel Mar 6, 2021
f3d693c
raise instead of warn
jbrockmendel Mar 7, 2021
3e83444
Merge branch 'master' into ref-blk-init
jbrockmendel Mar 7, 2021
21a9438
Merge branch 'master' into ref-blk-init
jbrockmendel Mar 8, 2021
69097bc
Merge branch 'ref-slice' into perf-blk-init
jbrockmendel Mar 8, 2021
7e23233
Merge branch 'master' into perf-blk-init
jbrockmendel Mar 10, 2021
5aeab0a
Merge branch 'master' into perf-blk-init
jbrockmendel Mar 10, 2021
97a1e22
REF: require BlockPlacement in the Block constructor
jbrockmendel Mar 10, 2021
afdc559
mypy fixup
jbrockmendel Mar 11, 2021
935a292
mypy fixup
jbrockmendel Mar 11, 2021
aa0616e
Merge branch 'master' into perf-blk-init
jbrockmendel Mar 15, 2021
69c64b8
Merge branch 'master' into perf-blk-init
jbrockmendel Mar 16, 2021
8bb805e
Merge branch 'master' into perf-blk-init
jbrockmendel Mar 16, 2021
abdf059
mypy fixup
jbrockmendel Mar 16, 2021
809a508
Merge branch 'master' into perf-blk-init
jbrockmendel Mar 16, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
from pandas.core.dtypes.common import (
DT64NS_DTYPE,
TD64NS_DTYPE,
is_categorical_dtype,
is_dtype_equal,
is_float_dtype,
is_integer_dtype,
Expand All @@ -53,7 +52,10 @@
pandas_dtype,
)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.generic import ABCMultiIndex
from pandas.core.dtypes.generic import (
ABCCategorical,
ABCMultiIndex,
)
from pandas.core.dtypes.missing import isna

from pandas.core import nanops
Expand Down Expand Up @@ -970,7 +972,7 @@ def sequence_to_td64ns(
elif not isinstance(data, (np.ndarray, ExtensionArray)):
# GH#24539 e.g. xarray, dask object
data = np.asarray(data)
elif is_categorical_dtype(data.dtype):
elif isinstance(data, ABCCategorical):
data = data.categories.take(data.codes, fill_value=NaT)._values
copy = False

Expand Down
34 changes: 22 additions & 12 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def _simple_new(
obj._mgr_locs = placement
return obj

def __init__(self, values, placement, ndim: int):
def __init__(self, values, placement: BlockPlacement, ndim: int):
"""
Parameters
----------
Expand All @@ -183,8 +183,10 @@ def __init__(self, values, placement, ndim: int):
ndim : int
1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame
"""
assert isinstance(ndim, int)
assert isinstance(placement, BlockPlacement)
self.ndim = ndim
self.mgr_locs = placement
self._mgr_locs = placement
self.values = values

@property
Expand Down Expand Up @@ -263,14 +265,12 @@ def fill_value(self):
return np.nan

@property
def mgr_locs(self):
def mgr_locs(self) -> BlockPlacement:
return self._mgr_locs

@mgr_locs.setter
def mgr_locs(self, new_mgr_locs):
if not isinstance(new_mgr_locs, libinternals.BlockPlacement):
new_mgr_locs = libinternals.BlockPlacement(new_mgr_locs)

def mgr_locs(self, new_mgr_locs: BlockPlacement):
assert isinstance(new_mgr_locs, BlockPlacement)
self._mgr_locs = new_mgr_locs

@final
Expand All @@ -289,7 +289,9 @@ def make_block(self, values, placement=None) -> Block:
return new_block(values, placement=placement, ndim=self.ndim)

@final
def make_block_same_class(self, values, placement=None) -> Block:
def make_block_same_class(
self, values, placement: Optional[BlockPlacement] = None
) -> Block:
""" Wrap given values in a block of same type as self. """
if placement is None:
placement = self._mgr_locs
Expand Down Expand Up @@ -1221,7 +1223,11 @@ def func(yvalues: np.ndarray) -> np.ndarray:
return self._maybe_downcast(blocks, downcast)

def take_nd(
self, indexer, axis: int, new_mgr_locs=None, fill_value=lib.no_default
self,
indexer,
axis: int,
new_mgr_locs: Optional[BlockPlacement] = None,
fill_value=lib.no_default,
) -> Block:
"""
Take values according to indexer and return them as a block.bb
Expand Down Expand Up @@ -1569,7 +1575,11 @@ def to_native_types(self, na_rep="nan", quoting=None, **kwargs):
return self.make_block(new_values)

def take_nd(
self, indexer, axis: int = 0, new_mgr_locs=None, fill_value=lib.no_default
self,
indexer,
axis: int = 0,
new_mgr_locs: Optional[BlockPlacement] = None,
fill_value=lib.no_default,
) -> Block:
"""
Take values according to indexer and return them as a block.
Expand Down Expand Up @@ -2258,8 +2268,8 @@ def check_ndim(values, placement: BlockPlacement, ndim: int):


def extract_pandas_array(
values: ArrayLike, dtype: Optional[DtypeObj], ndim: int
) -> Tuple[ArrayLike, Optional[DtypeObj]]:
values: Union[np.ndarray, ExtensionArray], dtype: Optional[DtypeObj], ndim: int
) -> Tuple[Union[np.ndarray, ExtensionArray], Optional[DtypeObj]]:
"""
Ensure that we don't allow PandasArray / PandasDtype in internals.
"""
Expand Down
32 changes: 20 additions & 12 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,8 @@ def make_empty(self: T, axes=None) -> T:
assert isinstance(self, SingleBlockManager) # for mypy
blk = self.blocks[0]
arr = blk.values[:0]
nb = blk.make_block_same_class(arr, placement=slice(0, 0))
bp = BlockPlacement(slice(0, 0))
nb = blk.make_block_same_class(arr, placement=bp)
blocks = [nb]
else:
blocks = []
Expand Down Expand Up @@ -786,7 +787,7 @@ def _combine(
new_blocks: List[Block] = []
for b in blocks:
b = b.copy(deep=copy)
b.mgr_locs = inv_indexer[b.mgr_locs.indexer]
b.mgr_locs = BlockPlacement(inv_indexer[b.mgr_locs.indexer])
new_blocks.append(b)

axes = list(self.axes)
Expand Down Expand Up @@ -1053,8 +1054,9 @@ def iget(self, i: int) -> SingleBlockManager:
values = block.iget(self.blklocs[i])

# shortcut for select a single-dim from a 2-dim BM
bp = BlockPlacement(slice(0, len(values)))
values = maybe_coerce_values(values)
nb = type(block)(values, placement=slice(0, len(values)), ndim=1)
nb = type(block)(values, placement=bp, ndim=1)
return SingleBlockManager(nb, self.axes[1])

def iget_values(self, i: int) -> ArrayLike:
Expand Down Expand Up @@ -1266,7 +1268,7 @@ def insert(
else:
new_mgr_locs = blk.mgr_locs.as_array.copy()
new_mgr_locs[new_mgr_locs >= loc] += 1
blk.mgr_locs = new_mgr_locs
blk.mgr_locs = BlockPlacement(new_mgr_locs)

# Accessing public blklocs ensures the public versions are initialized
if loc == self.blklocs.shape[0]:
Expand Down Expand Up @@ -1415,11 +1417,12 @@ def _slice_take_blocks_ax0(
# all(np.shares_memory(nb.values, blk.values) for nb in blocks)
return blocks
else:
bp = BlockPlacement(slice(0, sllen))
return [
blk.take_nd(
slobj,
axis=0,
new_mgr_locs=slice(0, sllen),
new_mgr_locs=bp,
fill_value=fill_value,
)
]
Expand Down Expand Up @@ -1456,7 +1459,7 @@ def _slice_take_blocks_ax0(
# item.
for mgr_loc in mgr_locs:
newblk = blk.copy(deep=False)
newblk.mgr_locs = slice(mgr_loc, mgr_loc + 1)
newblk.mgr_locs = BlockPlacement(slice(mgr_loc, mgr_loc + 1))
blocks.append(newblk)

else:
Expand Down Expand Up @@ -1655,12 +1658,15 @@ def getitem_mgr(self, indexer) -> SingleBlockManager:
# similar to get_slice, but not restricted to slice indexer
blk = self._block
array = blk._slice(indexer)
if array.ndim > blk.values.ndim:
if array.ndim > 1:
# This will be caught by Series._get_values
raise ValueError("dimension-expanding indexing not allowed")

block = blk.make_block_same_class(array, placement=slice(0, len(array)))
return type(self)(block, self.index[indexer])
bp = BlockPlacement(slice(0, len(array)))
block = blk.make_block_same_class(array, placement=bp)

new_idx = self.index[indexer]
return type(self)(block, new_idx)

def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager:
assert isinstance(slobj, slice), type(slobj)
Expand All @@ -1669,7 +1675,8 @@ def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager:

blk = self._block
array = blk._slice(slobj)
block = blk.make_block_same_class(array, placement=slice(0, len(array)))
bp = BlockPlacement(slice(0, len(array)))
block = blk.make_block_same_class(array, placement=bp)
new_index = self.index._getitem_slice(slobj)
return type(self)(block, new_index)

Expand Down Expand Up @@ -1733,7 +1740,7 @@ def set_values(self, values: ArrayLike):
valid for the current Block/SingleBlockManager (length, dtype, etc).
"""
self.blocks[0].values = values
self.blocks[0]._mgr_locs = libinternals.BlockPlacement(slice(len(values)))
self.blocks[0]._mgr_locs = BlockPlacement(slice(len(values)))


# --------------------------------------------------------------------
Expand Down Expand Up @@ -1985,7 +1992,8 @@ def _merge_blocks(
new_values = new_values[argsort]
new_mgr_locs = new_mgr_locs[argsort]

return [new_block(new_values, placement=new_mgr_locs, ndim=2)]
bp = BlockPlacement(new_mgr_locs)
return [new_block(new_values, placement=bp, ndim=2)]

# can't consolidate --> no merge
return blocks
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def _reset_block_mgr_locs(nbs: List[Block], locs):
Reset mgr_locs to correspond to our original DataFrame.
"""
for nb in nbs:
nblocs = locs.as_array[nb.mgr_locs.indexer]
nblocs = locs[nb.mgr_locs.indexer]
nb.mgr_locs = nblocs
# Assertions are disabled for performance, but should hold:
# assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape)
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/extension/test_external_block.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas._libs.internals import BlockPlacement

import pandas as pd
from pandas.core.internals import BlockManager
from pandas.core.internals.blocks import ExtensionBlock
Expand All @@ -17,7 +19,8 @@ def df():
df1 = pd.DataFrame({"a": [1, 2, 3]})
blocks = df1._mgr.blocks
values = np.arange(3, dtype="int64")
custom_block = CustomBlock(values, placement=slice(1, 2), ndim=2)
bp = BlockPlacement(slice(1, 2))
custom_block = CustomBlock(values, placement=bp, ndim=2)
blocks = blocks + (custom_block,)
block_manager = BlockManager(blocks, [pd.Index(["a", "b"]), df1.index])
return pd.DataFrame(block_manager)
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,8 +327,8 @@ def test_duplicate_ref_loc_failure(self):

axes, blocks = tmp_mgr.axes, tmp_mgr.blocks

blocks[0].mgr_locs = np.array([0])
blocks[1].mgr_locs = np.array([0])
blocks[0].mgr_locs = BlockPlacement(np.array([0]))
blocks[1].mgr_locs = BlockPlacement(np.array([0]))

# test trying to create block manager with overlapping ref locs

Expand All @@ -338,8 +338,8 @@ def test_duplicate_ref_loc_failure(self):
mgr = BlockManager(blocks, axes)
mgr._rebuild_blknos_and_blklocs()

blocks[0].mgr_locs = np.array([0])
blocks[1].mgr_locs = np.array([1])
blocks[0].mgr_locs = BlockPlacement(np.array([0]))
blocks[1].mgr_locs = BlockPlacement(np.array([1]))
mgr = BlockManager(blocks, axes)
mgr.iget(1)

Expand Down