Skip to content

Commit f4f006c

Browse files
authored
PERF: use the libjoin fast paths in a few more cases (#56841)
* use libjoin.left_join_indexer_unique in more cases * whatsnew
1 parent e6d0c1a commit f4f006c

File tree

2 files changed

+17
-22
lines changed

2 files changed

+17
-22
lines changed

doc/source/whatsnew/v2.3.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ Deprecations
101101

102102
Performance improvements
103103
~~~~~~~~~~~~~~~~~~~~~~~~
104+
- Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
104105
- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
105106
- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
106107
-

pandas/core/indexes/base.py

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4996,35 +4996,29 @@ def _join_monotonic(
49964996
ridx: npt.NDArray[np.intp] | None
49974997
lidx: npt.NDArray[np.intp] | None
49984998

4999-
if self.is_unique and other.is_unique:
5000-
# We can perform much better than the general case
5001-
if how == "left":
4999+
if how == "left":
5000+
if other.is_unique:
5001+
# We can perform much better than the general case
50025002
join_index = self
50035003
lidx = None
50045004
ridx = self._left_indexer_unique(other)
5005-
elif how == "right":
5005+
else:
5006+
join_array, lidx, ridx = self._left_indexer(other)
5007+
join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
5008+
elif how == "right":
5009+
if self.is_unique:
5010+
# We can perform much better than the general case
50065011
join_index = other
50075012
lidx = other._left_indexer_unique(self)
50085013
ridx = None
5009-
elif how == "inner":
5010-
join_array, lidx, ridx = self._inner_indexer(other)
5011-
join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
5012-
elif how == "outer":
5013-
join_array, lidx, ridx = self._outer_indexer(other)
5014-
join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
5015-
else:
5016-
if how == "left":
5017-
join_array, lidx, ridx = self._left_indexer(other)
5018-
elif how == "right":
5014+
else:
50195015
join_array, ridx, lidx = other._left_indexer(self)
5020-
elif how == "inner":
5021-
join_array, lidx, ridx = self._inner_indexer(other)
5022-
elif how == "outer":
5023-
join_array, lidx, ridx = self._outer_indexer(other)
5024-
5025-
assert lidx is not None
5026-
assert ridx is not None
5027-
5016+
join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
5017+
elif how == "inner":
5018+
join_array, lidx, ridx = self._inner_indexer(other)
5019+
join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
5020+
elif how == "outer":
5021+
join_array, lidx, ridx = self._outer_indexer(other)
50285022
join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
50295023

50305024
lidx = None if lidx is None else ensure_platform_int(lidx)

0 commit comments

Comments
 (0)