Skip to content

Commit 213d59a

Browse files
Address some review comments
1 parent 0bf7416 commit 213d59a

File tree

5 files changed

+32
-56
lines changed

5 files changed

+32
-56
lines changed

pandas/_libs/index.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ cdef class IndexEngine:
281281
cdef:
282282
ndarray values, x
283283
ndarray[int64_t] result, missing
284-
set stargets
284+
set stargets, remaining_stargets
285285
dict d = {}
286286
object val
287287
int count = 0, count_missing = 0

pandas/core/indexes/base.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2490,13 +2490,11 @@ def _union(self, other, sort):
24902490
result.extend([x for x in rvals if x not in value_set])
24912491
else:
24922492
# find indexes of things in "other" that are not in "self"
2493-
try:
2493+
if self.is_unique:
24942494
indexer = self.get_indexer(other)
2495-
except InvalidIndexError:
2496-
# duplicates
2497-
indexer = algos.unique1d(self.get_indexer_non_unique(other)[1])
2498-
else:
24992495
indexer, = (indexer == -1).nonzero()
2496+
else:
2497+
indexer = algos.unique1d(self.get_indexer_non_unique(other)[1])
25002498

25012499
if len(indexer) > 0:
25022500
other_diff = algos.take_nd(rvals, indexer, allow_fill=False)

pandas/tests/indexing/test_categorical.py

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -46,18 +46,6 @@ def setup_method(self, method):
4646
"B": (Series([1, 1, 2, 1, 3, 2]).astype(CDT([3, 2, 1], ordered=False))),
4747
}
4848
).set_index("B")
49-
self.df5 = DataFrame(
50-
{
51-
"A": np.arange(3, dtype="int64"),
52-
"B": Series(list("abc")).astype(CDT(list("cabe"))),
53-
}
54-
).set_index("B")
55-
self.df6 = DataFrame(
56-
{
57-
"A": np.arange(3, dtype="int64"),
58-
"B": (Series([1, 3, 2]).astype(CDT([3, 2, 1], ordered=False))),
59-
}
60-
).set_index("B")
6149

6250
def test_loc_scalar(self):
6351
result = self.df.loc["a"]
@@ -573,67 +561,73 @@ def test_read_only_source(self):
573561
assert_frame_equal(rw_df.loc[1:3], ro_df.loc[1:3])
574562

575563
def test_reindexing(self):
564+
df = DataFrame(
565+
{
566+
"A": np.arange(3, dtype="int64"),
567+
"B": Series(list("abc")).astype(CDT(list("cabe"))),
568+
}
569+
).set_index("B")
576570

577571
# reindexing
578572
# convert to a regular index
579-
result = self.df5.reindex(["a", "b", "e"])
573+
result = df.reindex(["a", "b", "e"])
580574
expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index(
581575
"B"
582576
)
583577
assert_frame_equal(result, expected, check_index_type=True)
584578

585-
result = self.df5.reindex(["a", "b"])
579+
result = df.reindex(["a", "b"])
586580
expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B")
587581
assert_frame_equal(result, expected, check_index_type=True)
588582

589-
result = self.df5.reindex(["e"])
583+
result = df.reindex(["e"])
590584
expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
591585
assert_frame_equal(result, expected, check_index_type=True)
592586

593-
result = self.df5.reindex(["d"])
587+
result = df.reindex(["d"])
594588
expected = DataFrame({"A": [np.nan], "B": Series(["d"])}).set_index("B")
595589
assert_frame_equal(result, expected, check_index_type=True)
596590

597591
# since we are actually reindexing with a Categorical
598592
# then return a Categorical
599593
cats = list("cabe")
600594

601-
result = self.df5.reindex(Categorical(["a", "e"], categories=cats))
595+
result = df.reindex(Categorical(["a", "e"], categories=cats))
602596
expected = DataFrame(
603597
{"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats))}
604598
).set_index("B")
605599
assert_frame_equal(result, expected, check_index_type=True)
606600

607-
result = self.df5.reindex(Categorical(["a"], categories=cats))
601+
result = df.reindex(Categorical(["a"], categories=cats))
608602
expected = DataFrame(
609603
{"A": [0], "B": Series(list("a")).astype(CDT(cats))}
610604
).set_index("B")
611605
assert_frame_equal(result, expected, check_index_type=True)
612606

613-
result = self.df5.reindex(["a", "b", "e"])
607+
result = df.reindex(["a", "b", "e"])
614608
expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index(
615609
"B"
616610
)
617611
assert_frame_equal(result, expected, check_index_type=True)
618612

619-
result = self.df5.reindex(["a", "b"])
613+
result = df.reindex(["a", "b"])
620614
expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B")
621615
assert_frame_equal(result, expected, check_index_type=True)
622616

623-
result = self.df5.reindex(["e"])
617+
result = df.reindex(["e"])
624618
expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
625619
assert_frame_equal(result, expected, check_index_type=True)
626620

627621
# give back the type of categorical that we received
628-
result = self.df5.reindex(
622+
result = df.reindex(
629623
Categorical(["a", "e"], categories=cats, ordered=True)
630624
)
631625
expected = DataFrame(
632626
{"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats, ordered=True))}
633627
).set_index("B")
634628
assert_frame_equal(result, expected, check_index_type=True)
635629

636-
result = self.df5.reindex(Categorical(["a", "d"], categories=["a", "d"]))
630+
result = df.reindex(Categorical(["a", "d"], categories=["a", "d"]))
637631
expected = DataFrame(
638632
{"A": [0, np.nan], "B": Series(list("ad")).astype(CDT(["a", "d"]))}
639633
).set_index("B")
@@ -647,11 +641,11 @@ def test_reindexing(self):
647641
# args NotImplemented ATM
648642
msg = r"argument {} is not implemented for CategoricalIndex\.reindex"
649643
with pytest.raises(NotImplementedError, match=msg.format("method")):
650-
self.df5.reindex(["a"], method="ffill")
644+
df.reindex(["a"], method="ffill")
651645
with pytest.raises(NotImplementedError, match=msg.format("level")):
652-
self.df5.reindex(["a"], level=1)
646+
df.reindex(["a"], level=1)
653647
with pytest.raises(NotImplementedError, match=msg.format("limit")):
654-
self.df5.reindex(["a"], limit=2)
648+
df.reindex(["a"], limit=2)
655649

656650
def test_loc_slice(self):
657651
# slicing

pandas/tests/series/test_operators.py

Lines changed: 7 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -274,41 +274,24 @@ def test_logical_ops_with_index(self, op):
274274
assert_series_equal(result, expected)
275275

276276
@pytest.mark.parametrize(
277-
"op",
277+
"op, index_op",
278278
[
279-
pytest.param(
280-
ops.rand_,
281-
marks=pytest.mark.xfail(
282-
reason="GH#22092 Index implementation returns Index",
283-
raises=AssertionError,
284-
strict=True,
285-
),
286-
),
287-
pytest.param(
288-
ops.ror_,
289-
marks=pytest.mark.xfail(
290-
reason="GH#22092 Index implementation returns Index",
291-
raises=AssertionError,
292-
strict=True,
293-
),
294-
),
295-
ops.rxor,
279+
(ops.rand_, Index.intersection),
280+
(ops.ror_, Index.union),
281+
(ops.rxor, Index.symmetric_difference),
296282
],
297283
)
298-
def test_reversed_logical_ops_with_index(self, op):
284+
def test_reversed_logical_ops_with_index(self, op, index_op):
299285
# GH#22092, GH#19792
300286
ser = Series([True, True, False, False])
301287
idx1 = Index([True, False, True, False])
302288
idx2 = Index([1, 0, 1, 0])
303289

304-
# symmetric_difference is only for rxor, but other 2 should fail
305-
expected = idx1.symmetric_difference(ser)
306-
290+
expected = index_op(idx1, ser)
307291
result = op(ser, idx1)
308292
assert_index_equal(result, expected)
309293

310-
expected = idx2.symmetric_difference(ser)
311-
294+
expected = index_op(idx2, ser)
312295
result = op(ser, idx2)
313296
assert_index_equal(result, expected)
314297

pandas/tests/test_base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -991,6 +991,7 @@ def test_bool_indexing(self, indexer_klass, indexer):
991991
tm.assert_series_equal(s[indexer_klass(indexer)], s.iloc[exp_idx])
992992

993993
def test_get_indexer_non_unique_dtype_mismatch(self):
994+
# GH 25459
994995
indexes, missing = pd.Index(["A", "B"]).get_indexer_non_unique(pd.Index([0]))
995996
tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
996997
tm.assert_numpy_array_equal(np.array([0], dtype=np.int64), missing)

0 commit comments

Comments
 (0)