diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index c9a1dbd0ae90d..e841818c8ae05 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -474,6 +474,7 @@ Reshaping - Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`) - Bug in :meth:`DataFrame.agg` with ``func={'name':}`` incorrectly raising ``TypeError`` when ``DataFrame.columns==['Name']`` (:issue:`36212`) - Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`) +- Bug in :func:`join` returned a non deterministic level-order for the resulting :class:`MultiIndex` (:issue:`36910`) - Sparse diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 87dd15d5b142b..be65e59abad3e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3581,8 +3581,12 @@ def _join_multi(self, other, how, return_indexers=True): from pandas.core.reshape.merge import restore_dropped_levels_multijoin # figure out join names - self_names = set(com.not_none(*self.names)) - other_names = set(com.not_none(*other.names)) + self_names_list = list(com.not_none(*self.names)) + other_names_list = list(com.not_none(*other.names)) + self_names_order = self_names_list.index + other_names_order = other_names_list.index + self_names = set(self_names_list) + other_names = set(other_names_list) overlap = self_names & other_names # need at least 1 in common @@ -3592,8 +3596,8 @@ def _join_multi(self, other, how, return_indexers=True): if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): # Drop the non-matching levels from left and right respectively - ldrop_names = list(self_names - overlap) - rdrop_names = list(other_names - overlap) + ldrop_names = sorted(self_names - overlap, key=self_names_order) + rdrop_names = sorted(other_names - overlap, key=other_names_order) # if only the order differs if not len(ldrop_names + rdrop_names): diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index d4d4c4190417e..3c0cb0426ae16 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -879,3 +879,20 @@ def _join_by_hand(a, b, how="left"): for col, s in b_re.items(): a_re[col] = s return a_re.reindex(columns=result_columns) + + +def test_join_inner_multiindex_deterministic_order(): + # GH: 36910 + left = pd.DataFrame( + data={"e": 5}, + index=pd.MultiIndex.from_tuples([(1, 2, 4)], names=("a", "b", "d")), + ) + right = pd.DataFrame( + data={"f": 6}, index=pd.MultiIndex.from_tuples([(2, 3)], names=("b", "c")) + ) + result = left.join(right, how="inner") + expected = pd.DataFrame( + {"e": [5], "f": [6]}, + index=pd.MultiIndex.from_tuples([(2, 1, 4, 3)], names=("b", "a", "d", "c")), + ) + tm.assert_frame_equal(result, expected)