Skip to content

CLN: simplify take_2d_multi #29065

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 18, 2019
79 changes: 32 additions & 47 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1304,7 +1304,7 @@ def get_indexer(current_indexer, other_indexer):
return frame.sort_values(columns, ascending=ascending, kind="mergesort")


# ------- ## ---- #
# ---- #
# take #
# ---- #

Expand Down Expand Up @@ -1712,59 +1712,44 @@ def take_nd(
take_1d = take_nd


def take_2d_multi(
arr, indexer, out=None, fill_value=np.nan, mask_info=None, allow_fill=True
):
def take_2d_multi(arr, indexer, fill_value=np.nan):
"""
Specialized Cython take which sets NaN values in one pass
"""
if indexer is None or (indexer[0] is None and indexer[1] is None):
row_idx = np.arange(arr.shape[0], dtype=np.int64)
col_idx = np.arange(arr.shape[1], dtype=np.int64)
indexer = row_idx, col_idx
dtype, fill_value = arr.dtype, arr.dtype.type()
else:
row_idx, col_idx = indexer
if row_idx is None:
row_idx = np.arange(arr.shape[0], dtype=np.int64)
else:
row_idx = ensure_int64(row_idx)
if col_idx is None:
col_idx = np.arange(arr.shape[1], dtype=np.int64)
else:
col_idx = ensure_int64(col_idx)
indexer = row_idx, col_idx
if not allow_fill:
# This is only called from one place in DataFrame._reindex_multi,
# so we know indexer is well-behaved.
assert indexer is not None
assert indexer[0] is not None
assert indexer[1] is not None

row_idx, col_idx = indexer

row_idx = ensure_int64(row_idx)
col_idx = ensure_int64(col_idx)
indexer = row_idx, col_idx
mask_info = None

# check for promotion based on types only (do this first because
# it's faster than computing a mask)
dtype, fill_value = maybe_promote(arr.dtype, fill_value)
if dtype != arr.dtype:
# check if promotion is actually required based on indexer
row_mask = row_idx == -1
col_mask = col_idx == -1
row_needs = row_mask.any()
col_needs = col_mask.any()
mask_info = (row_mask, col_mask), (row_needs, col_needs)

if not (row_needs or col_needs):
# if not, then depromote, set fill_value to dummy
# (it won't be used but we don't want the cython code
# to crash when trying to cast it to dtype)
dtype, fill_value = arr.dtype, arr.dtype.type()
mask_info = None, False
else:
# check for promotion based on types only (do this first because
# it's faster than computing a mask)
dtype, fill_value = maybe_promote(arr.dtype, fill_value)
if dtype != arr.dtype and (out is None or out.dtype != dtype):
# check if promotion is actually required based on indexer
if mask_info is not None:
(row_mask, col_mask), (row_needs, col_needs) = mask_info
else:
row_mask = row_idx == -1
col_mask = col_idx == -1
row_needs = row_mask.any()
col_needs = col_mask.any()
mask_info = (row_mask, col_mask), (row_needs, col_needs)
if row_needs or col_needs:
if out is not None and out.dtype != dtype:
raise TypeError("Incompatible type for fill_value")
else:
# if not, then depromote, set fill_value to dummy
# (it won't be used but we don't want the cython code
# to crash when trying to cast it to dtype)
dtype, fill_value = arr.dtype, arr.dtype.type()

# at this point, it's guaranteed that dtype can hold both the arr values
# and the fill_value
if out is None:
out_shape = len(row_idx), len(col_idx)
out = np.empty(out_shape, dtype=dtype)
out_shape = len(row_idx), len(col_idx)
out = np.empty(out_shape, dtype=dtype)

func = _take_2d_multi_dict.get((arr.dtype.name, out.dtype.name), None)
if func is None and arr.dtype != out.dtype:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4606,7 +4606,7 @@ def _needs_reindex_multi(self, axes, method, level):
)

def _reindex_multi(self, axes, copy, fill_value):
return NotImplemented
raise AbstractMethodError(self)

def _reindex_with_indexers(
self, reindexers, fill_value=None, copy=False, allow_dups=False
Expand Down