Skip to content

Commit c6c273a

Browse files
authored
PERF: Fixes performance regression in DataFrame[bool_indexer] (#33924) (#34199)
* PERF: Remove unnecessary copies in sorting functions * PERF: Create array from list with given dtype=bool * Run black * Run tests * Run tests * Run tests * Fix imports * Add asv * Run black * Remove asv * Add requested changes * Run black * Delete newline * Fix whatsnew * Add requested changes * Fix * Fix * Fix typo * Fix * Update asv Co-authored-by: mproszewska <[email protected]>
1 parent 8daf274 commit c6c273a

File tree

3 files changed

+12
-9
lines changed

3 files changed

+12
-9
lines changed

asv_bench/benchmarks/indexing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,9 @@ def time_boolean_rows_boolean(self):
158158
class DataFrameNumericIndexing:
159159
def setup(self):
160160
self.idx_dupe = np.array(range(30)) * 99
161-
self.df = DataFrame(np.random.randn(10000, 5))
161+
self.df = DataFrame(np.random.randn(100000, 5))
162162
self.df_dup = concat([self.df, 2 * self.df, 3 * self.df])
163-
self.bool_indexer = [True] * 5000 + [False] * 5000
163+
self.bool_indexer = [True] * 50000 + [False] * 50000
164164

165165
def time_iloc_dups(self):
166166
self.df_dup.iloc[self.idx_dupe]

doc/source/whatsnew/v1.1.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,7 @@ Performance improvements
727727
- Performance improvement in arithmetic operations between two :class:`DataFrame` objects (:issue:`32779`)
728728
- Performance improvement in :class:`pandas.core.groupby.RollingGroupby` (:issue:`34052`)
729729
- Performance improvement in arithmetic operations (sub, add, mul, div) for MultiIndex (:issue:`34297`)
730+
- Performance improvement in `DataFrame[bool_indexer]` when `bool_indexer` is a list (:issue:`33924`)
730731

731732
.. ---------------------------------------------------------------------------
732733

pandas/core/indexing.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from pandas.util._decorators import doc
99

1010
from pandas.core.dtypes.common import (
11+
is_array_like,
1112
is_hashable,
1213
is_integer,
1314
is_iterator,
@@ -22,6 +23,7 @@
2223
from pandas.core.dtypes.missing import _infer_fill_value, isna
2324

2425
import pandas.core.common as com
26+
from pandas.core.construction import array as pd_array
2527
from pandas.core.indexers import (
2628
check_array_indexer,
2729
is_list_like_indexer,
@@ -2167,15 +2169,15 @@ def check_bool_indexer(index: Index, key) -> np.ndarray:
21672169
"indexer (index of the boolean Series and of "
21682170
"the indexed object do not match)."
21692171
)
2170-
result = result.astype(bool)._values
2171-
elif is_object_dtype(key):
2172+
return result.astype(bool)._values
2173+
if is_object_dtype(key):
21722174
# key might be object-dtype bool, check_array_indexer needs bool array
21732175
result = np.asarray(result, dtype=bool)
2174-
result = check_array_indexer(index, result)
2175-
else:
2176-
result = check_array_indexer(index, result)
2177-
2178-
return result
2176+
elif not is_array_like(result):
2177+
# GH 33924
2178+
# key may contain nan elements, check_array_indexer needs bool array
2179+
result = pd_array(result, dtype=bool)
2180+
return check_array_indexer(index, result)
21792181

21802182

21812183
def convert_missing_indexer(indexer):

0 commit comments

Comments
 (0)