Skip to content

PERF: extract_array earlier in DataFrame construction #42774

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,9 +420,9 @@ def extract_array(
return obj._values
return obj

obj = obj.array
obj = obj._values

if extract_numpy and isinstance(obj, ABCPandasArray):
elif extract_numpy and isinstance(obj, ABCPandasArray):
obj = obj.to_numpy()

return obj
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1920,11 +1920,11 @@ def get_block_type(values, dtype: DtypeObj | None = None):


def new_block(values, placement, *, ndim: int, klass=None) -> Block:
# caller is responsible for ensuring values is NOT a PandasArray

if not isinstance(placement, BlockPlacement):
placement = BlockPlacement(placement)

values, _ = extract_pandas_array(values, None, ndim)
check_ndim(values, placement, ndim)

if klass is None:
Expand Down
7 changes: 1 addition & 6 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1775,11 +1775,6 @@ def create_block_manager_from_blocks(
return mgr


# We define this here so we can override it in tests.extension.test_numpy
def _extract_array(obj):
return extract_array(obj, extract_numpy=True)


def create_block_manager_from_arrays(
arrays,
names: Index,
Expand All @@ -1791,7 +1786,7 @@ def create_block_manager_from_arrays(
# assert isinstance(axes, list)
# assert all(isinstance(x, Index) for x in axes)

arrays = [_extract_array(x) for x in arrays]
arrays = [extract_array(x, extract_numpy=True) for x in arrays]

try:
blocks = _form_blocks(arrays, names, axes, consolidate)
Expand Down
6 changes: 2 additions & 4 deletions pandas/core/strings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from pandas.core.dtypes.missing import isna

from pandas.core.base import NoNewAttributesMixin
from pandas.core.construction import extract_array

if TYPE_CHECKING:
from pandas import (
Expand Down Expand Up @@ -213,10 +214,7 @@ def _validate(data):
# see _libs/lib.pyx for list of inferred types
allowed_types = ["string", "empty", "bytes", "mixed", "mixed-integer"]

# TODO: avoid kludge for tests.extension.test_numpy
from pandas.core.internals.managers import _extract_array

data = _extract_array(data)
data = extract_array(data)

values = getattr(data, "categories", data) # categorical / normal

Expand Down
18 changes: 1 addition & 17 deletions pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,32 +23,17 @@
ExtensionDtype,
PandasDtype,
)
from pandas.core.dtypes.generic import ABCPandasArray

import pandas as pd
import pandas._testing as tm
from pandas.core.arrays.numpy_ import PandasArray
from pandas.core.internals import (
blocks,
managers,
)
from pandas.core.internals import blocks
from pandas.tests.extension import base

# TODO(ArrayManager) PandasArray
pytestmark = td.skip_array_manager_not_yet_implemented


def _extract_array_patched(obj):
if isinstance(obj, (pd.Index, pd.Series)):
obj = obj._values
if isinstance(obj, ABCPandasArray):
# TODO for reasons unclear, we get here in a couple of tests
# with PandasArray._typ *not* patched
obj = obj.to_numpy()

return obj


def _can_hold_element_patched(obj, element) -> bool:
if isinstance(element, PandasArray):
element = element.to_numpy()
Expand Down Expand Up @@ -98,7 +83,6 @@ def allow_in_pandas(monkeypatch):
"""
with monkeypatch.context() as m:
m.setattr(PandasArray, "_typ", "extension")
m.setattr(managers, "_extract_array", _extract_array_patched)
m.setattr(blocks, "can_hold_element", _can_hold_element_patched)
m.setattr(tm.asserters, "assert_attr_equal", _assert_attr_equal)
yield
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1376,9 +1376,11 @@ def test_make_block_no_pandas_array(block_maker):
# PandasArray, no dtype
result = block_maker(arr, slice(len(arr)), ndim=arr.ndim)
assert result.dtype.kind in ["i", "u"]
assert result.is_extension is False

if block_maker is make_block:
# new_block requires caller to unwrap PandasArray
assert result.is_extension is False

# PandasArray, PandasDtype
result = block_maker(arr, slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim)
assert result.dtype.kind in ["i", "u"]
Expand Down