-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG: Index.union() inconsistent with non-unique Indexes #36299
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
b8f27e2
070abad
7a90556
dbcd0a7
7a9aec7
01d55c6
8463078
cddd7f9
3c6b079
90056b7
46b7c6c
9f55905
870c0ac
97fe91f
e724ade
920f9ff
c36ccff
396c70f
1062778
cf06418
6f04408
167d695
0b3b548
73d9ab3
561f835
59dbdf6
51131be
04817b4
a1887c7
fe41a6f
b63a732
fa49dfe
2a6f6ed
d80949c
48e041a
b57f00f
aa4533a
8fd90a3
091942a
446eb50
6b8fa64
484f4f8
5209bf0
6cabad8
b80fbdd
60bceec
2547f65
25885d4
fcc4635
ccaa0c1
f4ee466
20e62e6
e92ab7a
7ffa07a
76ded89
0af939d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2622,41 +2622,42 @@ def _union(self, other, sort): | |
lvals = self._values | ||
rvals = other._values | ||
|
||
if sort is None and self.is_monotonic and other.is_monotonic: | ||
try: | ||
# if sort is None and self.is_monotonic and other.is_monotonic: | ||
phofl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
try: | ||
phofl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if sort is None and self.is_monotonic and other.is_monotonic: | ||
result = self._outer_indexer(lvals, rvals)[0] | ||
except TypeError: | ||
# incomparable objects | ||
result = list(lvals) | ||
|
||
# worth making this faster? a very unusual case | ||
value_set = set(lvals) | ||
result.extend([x for x in rvals if x not in value_set]) | ||
result = Index(result)._values # do type inference here | ||
else: | ||
# find indexes of things in "other" that are not in "self" | ||
if self.is_unique: | ||
indexer = self.get_indexer(other) | ||
indexer = (indexer == -1).nonzero()[0] | ||
else: | ||
indexer = algos.unique1d(self.get_indexer_non_unique(other)[1]) | ||
|
||
if len(indexer) > 0: | ||
other_diff = algos.take_nd(rvals, indexer, allow_fill=False) | ||
result = concat_compat((lvals, other_diff)) | ||
|
||
else: | ||
result = lvals | ||
|
||
if sort is None: | ||
try: | ||
result = algos.safe_sort(result) | ||
except TypeError as err: | ||
warnings.warn( | ||
f"{err}, sort order is undefined for incomparable objects", | ||
RuntimeWarning, | ||
stacklevel=3, | ||
# We calculate the sorted union and resort it afterwards | ||
phofl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
new_index_sorted = self._outer_indexer(np.sort(lvals), np.sort(rvals)) | ||
l_reindexer = self.unique().reindex(new_index_sorted[0])[1] | ||
if l_reindexer is None: | ||
result = lvals | ||
else: | ||
l_result = self.unique().take( | ||
np.sort(l_reindexer[l_reindexer != -1]) | ||
) | ||
r_reindexer = other.unique().reindex(new_index_sorted[0])[1] | ||
r_result = other.unique().take( | ||
np.sort(r_reindexer[new_index_sorted[1] == -1]) | ||
) | ||
result = np.append(l_result, r_result) | ||
except TypeError: | ||
# incomparable objects | ||
result = list(lvals) | ||
|
||
# worth making this faster? a very unusual case | ||
value_set = set(lvals) | ||
result.extend([x for x in rvals if x not in value_set]) | ||
result = Index(result)._values # do type inference here | ||
if sort is None and (not self.is_monotonic or not other.is_monotonic): | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
try: | ||
result = algos.safe_sort(result) | ||
except TypeError as err: | ||
warnings.warn( | ||
f"{err}, sort order is undefined for incomparable objects", | ||
RuntimeWarning, | ||
stacklevel=3, | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. potential separate PR: i think there are 4 places in this file where we do this safe_sort call, and we are not consistent in a) issuing this warning or b) catching the TypeError. Could be refactored into a small helper method. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will look into this in the coming days |
||
|
||
# for subclasses | ||
return self._wrap_setop_result(other, result) | ||
|
Uh oh!
There was an error while loading. Please reload this page.