Skip to content

Commit 1cb5f69

Browse files
authored
BUG: Index.intersection casting to object instead of numeric (#38122)
1 parent 224d2e8 commit 1cb5f69

File tree

5 files changed

+58
-10
lines changed

5 files changed

+58
-10
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -782,6 +782,7 @@ Other
782782
- Fixed metadata propagation in the :class:`Series.dt`, :class:`Series.str` accessors, :class:`DataFrame.duplicated`, :class:`DataFrame.stack`, :class:`DataFrame.unstack`, :class:`DataFrame.pivot`, :class:`DataFrame.append`, :class:`DataFrame.diff`, :class:`DataFrame.applymap` and :class:`DataFrame.update` methods (:issue:`28283`, :issue:`37381`)
783783
- Fixed metadata propagation when selecting columns with ``DataFrame.__getitem__`` (:issue:`28283`)
784784
- Bug in :meth:`Index.union` behaving differently depending on whether operand is an :class:`Index` or other list-like (:issue:`36384`)
785+
- Bug in :meth:`Index.intersection` with non-matching numeric dtypes casting to ``object`` dtype instead of minimal common dtype (:issue:`38122`)
785786
- Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError`` rather than a bare ``Exception`` (:issue:`35744`)
786787
- Bug in ``dir`` where ``dir(obj)`` wouldn't show attributes defined on the instance for pandas objects (:issue:`37173`)
787788
- Bug in :meth:`RangeIndex.difference` returning :class:`Int64Index` in some cases where it should return :class:`RangeIndex` (:issue:`38028`)

pandas/core/indexes/base.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from pandas.util._decorators import Appender, cache_readonly, doc
3434

3535
from pandas.core.dtypes.cast import (
36+
find_common_type,
3637
maybe_cast_to_integer_array,
3738
validate_numeric_casting,
3839
)
@@ -2826,8 +2827,9 @@ def intersection(self, other, sort=False):
28262827
return self._get_reconciled_name_object(other)
28272828

28282829
if not is_dtype_equal(self.dtype, other.dtype):
2829-
this = self.astype("O")
2830-
other = other.astype("O")
2830+
dtype = find_common_type([self.dtype, other.dtype])
2831+
this = self.astype(dtype)
2832+
other = other.astype(dtype)
28312833
return this.intersection(other, sort=sort)
28322834

28332835
result = self._intersection(other, sort=sort)

pandas/core/indexes/multi.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -3717,16 +3717,14 @@ def _convert_can_do_setop(self, other):
37173717
if not isinstance(other, Index):
37183718

37193719
if len(other) == 0:
3720-
other = MultiIndex(
3721-
levels=[[]] * self.nlevels,
3722-
codes=[[]] * self.nlevels,
3723-
verify_integrity=False,
3724-
)
3720+
return self[:0], self.names
37253721
else:
37263722
msg = "other must be a MultiIndex or a list of tuples"
37273723
try:
37283724
other = MultiIndex.from_tuples(other)
3729-
except TypeError as err:
3725+
except (ValueError, TypeError) as err:
3726+
# ValueError raised by tupels_to_object_array if we
3727+
# have non-object dtype
37303728
raise TypeError(msg) from err
37313729
else:
37323730
result_names = get_unanimous_names(self, other)

pandas/tests/indexes/multi/test_setops.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import pytest
33

44
import pandas as pd
5-
from pandas import MultiIndex, Series
5+
from pandas import Index, MultiIndex, Series
66
import pandas._testing as tm
77

88

@@ -294,6 +294,24 @@ def test_intersection(idx, sort):
294294
# assert result.equals(tuples)
295295

296296

297+
def test_intersection_non_object(idx, sort):
298+
other = Index(range(3), name="foo")
299+
300+
result = idx.intersection(other, sort=sort)
301+
expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=None)
302+
tm.assert_index_equal(result, expected, exact=True)
303+
304+
# if we pass a length-0 ndarray (i.e. no name, we retain our idx.name)
305+
result = idx.intersection(np.asarray(other)[:0], sort=sort)
306+
expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=idx.names)
307+
tm.assert_index_equal(result, expected, exact=True)
308+
309+
msg = "other must be a MultiIndex or a list of tuples"
310+
with pytest.raises(TypeError, match=msg):
311+
# With non-zero length non-index, we try and fail to convert to tuples
312+
idx.intersection(np.asarray(other), sort=sort)
313+
314+
297315
def test_intersect_equal_sort():
298316
# GH-24959
299317
idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]])

pandas/tests/indexes/ranges/test_setops.py

+30-1
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,40 @@
33
import numpy as np
44
import pytest
55

6-
from pandas import Index, Int64Index, RangeIndex
6+
from pandas import Index, Int64Index, RangeIndex, UInt64Index
77
import pandas._testing as tm
88

99

1010
class TestRangeIndexSetOps:
11+
@pytest.mark.parametrize("klass", [RangeIndex, Int64Index, UInt64Index])
12+
def test_intersection_mismatched_dtype(self, klass):
13+
# check that we cast to float, not object
14+
index = RangeIndex(start=0, stop=20, step=2, name="foo")
15+
index = klass(index)
16+
17+
flt = index.astype(np.float64)
18+
19+
# bc index.equals(flt), we go through fastpath and get RangeIndex back
20+
result = index.intersection(flt)
21+
tm.assert_index_equal(result, index, exact=True)
22+
23+
result = flt.intersection(index)
24+
tm.assert_index_equal(result, flt, exact=True)
25+
26+
# neither empty, not-equals
27+
result = index.intersection(flt[1:])
28+
tm.assert_index_equal(result, flt[1:], exact=True)
29+
30+
result = flt[1:].intersection(index)
31+
tm.assert_index_equal(result, flt[1:], exact=True)
32+
33+
# empty other
34+
result = index.intersection(flt[:0])
35+
tm.assert_index_equal(result, flt[:0], exact=True)
36+
37+
result = flt[:0].intersection(index)
38+
tm.assert_index_equal(result, flt[:0], exact=True)
39+
1140
def test_intersection(self, sort):
1241
# intersect with Int64Index
1342
index = RangeIndex(start=0, stop=20, step=2)

0 commit comments

Comments
 (0)