From 4e046481a5c6526c5ee663a5548f187d858b433b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 16 Oct 2019 19:07:38 -0700 Subject: [PATCH 1/4] CLN: remove unused args/kwargs from take_multi_2d --- pandas/core/algorithms.py | 76 ++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 46 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 2e5ab0d182aff..3698edb37d8c0 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1712,59 +1712,43 @@ def take_nd( take_1d = take_nd -def take_2d_multi( - arr, indexer, out=None, fill_value=np.nan, mask_info=None, allow_fill=True -): +def take_2d_multi(arr, indexer, fill_value=np.nan): """ Specialized Cython take which sets NaN values in one pass """ - if indexer is None or (indexer[0] is None and indexer[1] is None): - row_idx = np.arange(arr.shape[0], dtype=np.int64) - col_idx = np.arange(arr.shape[1], dtype=np.int64) - indexer = row_idx, col_idx - dtype, fill_value = arr.dtype, arr.dtype.type() - else: - row_idx, col_idx = indexer - if row_idx is None: - row_idx = np.arange(arr.shape[0], dtype=np.int64) - else: - row_idx = ensure_int64(row_idx) - if col_idx is None: - col_idx = np.arange(arr.shape[1], dtype=np.int64) - else: - col_idx = ensure_int64(col_idx) - indexer = row_idx, col_idx - if not allow_fill: + # This is only called from one place in DataFrame._reindex_multi, + # so we know indexer is well-behaved. + assert indexer is not None + assert indexer[0] is not None + assert indexer[1] is not None + + row_idx, col_idx = indexer + + row_idx = ensure_int64(row_idx) + col_idx = ensure_int64(col_idx) + indexer = row_idx, col_idx + + # check for promotion based on types only (do this first because + # it's faster than computing a mask) + dtype, fill_value = maybe_promote(arr.dtype, fill_value) + if dtype != arr.dtype: + # check if promotion is actually required based on indexer + row_mask = row_idx == -1 + col_mask = col_idx == -1 + row_needs = row_mask.any() + col_needs = col_mask.any() + mask_info = (row_mask, col_mask), (row_needs, col_needs) + + if not (row_needs or col_needs): + # if not, then depromote, set fill_value to dummy + # (it won't be used but we don't want the cython code + # to crash when trying to cast it to dtype) dtype, fill_value = arr.dtype, arr.dtype.type() - mask_info = None, False - else: - # check for promotion based on types only (do this first because - # it's faster than computing a mask) - dtype, fill_value = maybe_promote(arr.dtype, fill_value) - if dtype != arr.dtype and (out is None or out.dtype != dtype): - # check if promotion is actually required based on indexer - if mask_info is not None: - (row_mask, col_mask), (row_needs, col_needs) = mask_info - else: - row_mask = row_idx == -1 - col_mask = col_idx == -1 - row_needs = row_mask.any() - col_needs = col_mask.any() - mask_info = (row_mask, col_mask), (row_needs, col_needs) - if row_needs or col_needs: - if out is not None and out.dtype != dtype: - raise TypeError("Incompatible type for fill_value") - else: - # if not, then depromote, set fill_value to dummy - # (it won't be used but we don't want the cython code - # to crash when trying to cast it to dtype) - dtype, fill_value = arr.dtype, arr.dtype.type() # at this point, it's guaranteed that dtype can hold both the arr values # and the fill_value - if out is None: - out_shape = len(row_idx), len(col_idx) - out = np.empty(out_shape, dtype=dtype) + out_shape = len(row_idx), len(col_idx) + out = np.empty(out_shape, dtype=dtype) func = _take_2d_multi_dict.get((arr.dtype.name, out.dtype.name), None) if func is None and arr.dtype != out.dtype: From 4ba17e07351e2ac0ed90916d6f52cc07f6f5b0ae Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 17 Oct 2019 17:52:35 -0700 Subject: [PATCH 2/4] ClN --- pandas/core/algorithms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 3698edb37d8c0..eb553a1af1ef5 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1304,7 +1304,7 @@ def get_indexer(current_indexer, other_indexer): return frame.sort_values(columns, ascending=ascending, kind="mergesort") -# ------- ## ---- # +# ---- # # take # # ---- # From eca1893a4cfe1f9bd6459535149912d094c3f414 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 17 Oct 2019 17:53:58 -0700 Subject: [PATCH 3/4] define mask_info --- pandas/core/algorithms.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index eb553a1af1ef5..717c2eb26be8b 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1727,6 +1727,7 @@ def take_2d_multi(arr, indexer, fill_value=np.nan): row_idx = ensure_int64(row_idx) col_idx = ensure_int64(col_idx) indexer = row_idx, col_idx + mask_info = None # check for promotion based on types only (do this first because # it's faster than computing a mask) From 84ab36e3426f507980003bb4c732249409a9af2e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 17 Oct 2019 18:53:54 -0700 Subject: [PATCH 4/4] Fix NotImplemented --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e97772a418982..e3e59639de56b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4606,7 +4606,7 @@ def _needs_reindex_multi(self, axes, method, level): ) def _reindex_multi(self, axes, copy, fill_value): - return NotImplemented + raise AbstractMethodError(self) def _reindex_with_indexers( self, reindexers, fill_value=None, copy=False, allow_dups=False