diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 049ccc0e6c4df..0702cb8c70955 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -771,6 +771,7 @@ Other - Fixed metadata propagation in the :class:`Series.dt`, :class:`Series.str` accessors, :class:`DataFrame.duplicated`, :class:`DataFrame.stack`, :class:`DataFrame.unstack`, :class:`DataFrame.pivot`, :class:`DataFrame.append`, :class:`DataFrame.diff`, :class:`DataFrame.applymap` and :class:`DataFrame.update` methods (:issue:`28283`, :issue:`37381`) - Fixed metadata propagation when selecting columns with ``DataFrame.__getitem__`` (:issue:`28283`) - Bug in :meth:`Index.union` behaving differently depending on whether operand is an :class:`Index` or other list-like (:issue:`36384`) +- Bug in :meth:`Index.intersection` with non-matching numeric dtypes casting to ``object`` dtype instead of minimal common dtype (:issue:`38122`) - Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError`` rather than a bare ``Exception`` (:issue:`35744`) - Bug in ``dir`` where ``dir(obj)`` wouldn't show attributes defined on the instance for pandas objects (:issue:`37173`) - Bug in :meth:`RangeIndex.difference` returning :class:`Int64Index` in some cases where it should return :class:`RangeIndex` (:issue:`38028`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c49f3f9457161..467250f9ee5cf 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -33,6 +33,7 @@ from pandas.util._decorators import Appender, cache_readonly, doc from pandas.core.dtypes.cast import ( + find_common_type, maybe_cast_to_integer_array, validate_numeric_casting, ) @@ -2826,8 +2827,9 @@ def intersection(self, other, sort=False): return self._get_reconciled_name_object(other) if not is_dtype_equal(self.dtype, other.dtype): - this = self.astype("O") - other = other.astype("O") + dtype = find_common_type([self.dtype, other.dtype]) + this = self.astype(dtype) + other = other.astype(dtype) return this.intersection(other, sort=sort) result = self._intersection(other, sort=sort) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 9b4b459d9a122..8b773dbce6298 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3713,16 +3713,14 @@ def _convert_can_do_setop(self, other): if not isinstance(other, Index): if len(other) == 0: - other = MultiIndex( - levels=[[]] * self.nlevels, - codes=[[]] * self.nlevels, - verify_integrity=False, - ) + return self[:0], self.names else: msg = "other must be a MultiIndex or a list of tuples" try: other = MultiIndex.from_tuples(other) - except TypeError as err: + except (ValueError, TypeError) as err: + # ValueError raised by tupels_to_object_array if we + # have non-object dtype raise TypeError(msg) from err else: result_names = get_unanimous_names(self, other) diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index 4ac9a27069a3f..805da75813a4b 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -2,7 +2,7 @@ import pytest import pandas as pd -from pandas import MultiIndex, Series +from pandas import Index, MultiIndex, Series import pandas._testing as tm @@ -294,6 +294,24 @@ def test_intersection(idx, sort): # assert result.equals(tuples) +def test_intersection_non_object(idx, sort): + other = Index(range(3), name="foo") + + result = idx.intersection(other, sort=sort) + expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=None) + tm.assert_index_equal(result, expected, exact=True) + + # if we pass a length-0 ndarray (i.e. no name, we retain our idx.name) + result = idx.intersection(np.asarray(other)[:0], sort=sort) + expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=idx.names) + tm.assert_index_equal(result, expected, exact=True) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + # With non-zero length non-index, we try and fail to convert to tuples + idx.intersection(np.asarray(other), sort=sort) + + def test_intersect_equal_sort(): # GH-24959 idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]]) diff --git a/pandas/tests/indexes/ranges/test_setops.py b/pandas/tests/indexes/ranges/test_setops.py index 1fd41b017221b..5623b0904c0d5 100644 --- a/pandas/tests/indexes/ranges/test_setops.py +++ b/pandas/tests/indexes/ranges/test_setops.py @@ -3,11 +3,40 @@ import numpy as np import pytest -from pandas import Index, Int64Index, RangeIndex +from pandas import Index, Int64Index, RangeIndex, UInt64Index import pandas._testing as tm class TestRangeIndexSetOps: + @pytest.mark.parametrize("klass", [RangeIndex, Int64Index, UInt64Index]) + def test_intersection_mismatched_dtype(self, klass): + # check that we cast to float, not object + index = RangeIndex(start=0, stop=20, step=2, name="foo") + index = klass(index) + + flt = index.astype(np.float64) + + # bc index.equals(flt), we go through fastpath and get RangeIndex back + result = index.intersection(flt) + tm.assert_index_equal(result, index, exact=True) + + result = flt.intersection(index) + tm.assert_index_equal(result, flt, exact=True) + + # neither empty, not-equals + result = index.intersection(flt[1:]) + tm.assert_index_equal(result, flt[1:], exact=True) + + result = flt[1:].intersection(index) + tm.assert_index_equal(result, flt[1:], exact=True) + + # empty other + result = index.intersection(flt[:0]) + tm.assert_index_equal(result, flt[:0], exact=True) + + result = flt[:0].intersection(index) + tm.assert_index_equal(result, flt[:0], exact=True) + def test_intersection(self, sort): # intersect with Int64Index index = RangeIndex(start=0, stop=20, step=2)