From 460d84ee07cb27afa74321104639e906e853305f Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 25 Oct 2020 23:11:43 +0100 Subject: [PATCH 1/7] BUG: Fix inconsistent ordering between left and right in merge --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/reshape/merge.py | 6 +++--- pandas/tests/reshape/merge/test_merge.py | 10 ++++++++++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index a9b4ad2e5374a..60e32620f2bb6 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -507,6 +507,7 @@ Reshaping - Bug in :meth:`DataFrame.agg` with ``func={'name':}`` incorrectly raising ``TypeError`` when ``DataFrame.columns==['Name']`` (:issue:`36212`) - Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`) - Bug in :func:`join` returned a non deterministic level-order for the resulting :class:`MultiIndex` (:issue:`36910`) +- Bug in :meth:`df.merge() ` returned inconsistent ordering in result for ``how=right`` and ``how=left`` (:issue:`35382`) - Sparse diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 5012be593820e..f48dc9f99b7a2 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1336,7 +1336,7 @@ def get_join_indexers( lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort, how=how) # preserve left frame order if how == 'left' and sort == False kwargs = copy.copy(kwargs) - if how == "left": + if how in ("left", "right"): kwargs["sort"] = sort join_func = { "inner": libjoin.inner_join, @@ -1861,8 +1861,8 @@ def _left_join_on_index(left_ax: Index, right_ax: Index, join_keys, sort: bool = return left_ax, None, right_indexer -def _right_outer_join(x, y, max_groups): - right_indexer, left_indexer = libjoin.left_outer_join(y, x, max_groups) +def _right_outer_join(x, y, max_groups, **kwargs): + right_indexer, left_indexer = libjoin.left_outer_join(y, x, max_groups, **kwargs) return left_indexer, right_indexer diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index c4c9b0e516192..96ac682976181 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2283,3 +2283,13 @@ def test_merge_join_categorical_multiindex(): expected = expected.drop(["Cat", "Int"], axis=1) result = a.join(b, on=["Cat1", "Int1"]) tm.assert_frame_equal(expected, result) + + +@pytest.mark.parametrize("how", ["left", "right"]) +def test_merge_same_order_left_right(how): + # GH: 35382 + df = pd.DataFrame({"a": [1, 0, 1]}) + + result = df.merge(df, on="a", how=how, sort=False) + expected = pd.DataFrame([1, 1, 0, 1, 1], columns=["a"]) + tm.assert_frame_equal(result, expected) From 9b298f22bdd595058761cc1661adff7f70bdc83a Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 25 Oct 2020 23:49:32 +0100 Subject: [PATCH 2/7] Fix pattern --- pandas/tests/reshape/merge/test_merge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 96ac682976181..051a11924821f 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2288,8 +2288,8 @@ def test_merge_join_categorical_multiindex(): @pytest.mark.parametrize("how", ["left", "right"]) def test_merge_same_order_left_right(how): # GH: 35382 - df = pd.DataFrame({"a": [1, 0, 1]}) + df = DataFrame({"a": [1, 0, 1]}) result = df.merge(df, on="a", how=how, sort=False) - expected = pd.DataFrame([1, 1, 0, 1, 1], columns=["a"]) + expected = DataFrame([1, 1, 0, 1, 1], columns=["a"]) tm.assert_frame_equal(result, expected) From 492c75655d7d6b37f1ef3280bf53fdebeb00a34c Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 31 Oct 2020 23:33:53 +0100 Subject: [PATCH 3/7] Add more tests --- pandas/tests/reshape/merge/test_merge.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 051a11924821f..3c0de41532961 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -601,6 +601,18 @@ def test_merge_nosort(self): assert (df.var3.unique() == result.var3.unique()).all() + @pytest.mark.parametrize( + ("sort", "values"), [(False, [1, 1, 0, 1, 1]), (True, [0, 1, 1, 1, 1])] + ) + @pytest.mark.parametrize("how", ["left", "right"]) + def test_merge_same_order_left_right(self, sort, values, how): + # GH: 35382 + df = DataFrame({"a": [1, 0, 1]}) + + result = df.merge(df, on="a", how=how, sort=sort) + expected = DataFrame(values, columns=["a"]) + tm.assert_frame_equal(result, expected) + def test_merge_nan_right(self): df1 = DataFrame({"i1": [0, 1], "i2": [0, 1]}) df2 = DataFrame({"i1": [0], "i3": [0]}) @@ -2283,13 +2295,3 @@ def test_merge_join_categorical_multiindex(): expected = expected.drop(["Cat", "Int"], axis=1) result = a.join(b, on=["Cat1", "Int1"]) tm.assert_frame_equal(expected, result) - - -@pytest.mark.parametrize("how", ["left", "right"]) -def test_merge_same_order_left_right(how): - # GH: 35382 - df = DataFrame({"a": [1, 0, 1]}) - - result = df.merge(df, on="a", how=how, sort=False) - expected = DataFrame([1, 1, 0, 1, 1], columns=["a"]) - tm.assert_frame_equal(result, expected) From a7153f1e4bda5e40425da4a29e5127062c59b161 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 19 Nov 2020 20:50:14 +0100 Subject: [PATCH 4/7] Change git reference --- pandas/tests/reshape/merge/test_merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 7d86ccdd1289d..b62d7e15348b5 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -606,7 +606,7 @@ def test_merge_nosort(self): ) @pytest.mark.parametrize("how", ["left", "right"]) def test_merge_same_order_left_right(self, sort, values, how): - # GH: 35382 + # GH#35382 df = DataFrame({"a": [1, 0, 1]}) result = df.merge(df, on="a", how=how, sort=sort) From 813289c08ba57acf09658bd535962e3603f6e07c Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 19 Nov 2020 20:57:05 +0100 Subject: [PATCH 5/7] Use lambda expression --- pandas/core/reshape/merge.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 722774370f3b3..50448e90e763f 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1344,7 +1344,9 @@ def get_join_indexers( join_func = { "inner": libjoin.inner_join, "left": libjoin.left_outer_join, - "right": _right_outer_join, + "right": lambda x, y, count, **kwargs: libjoin.left_outer_join( + y, x, count, **kwargs + )[::-1], "outer": libjoin.full_outer_join, }[how] @@ -1864,11 +1866,6 @@ def _left_join_on_index(left_ax: Index, right_ax: Index, join_keys, sort: bool = return left_ax, None, right_indexer -def _right_outer_join(x, y, max_groups, **kwargs): - right_indexer, left_indexer = libjoin.left_outer_join(y, x, max_groups, **kwargs) - return left_indexer, right_indexer - - def _factorize_keys( lk: ArrayLike, rk: ArrayLike, sort: bool = True, how: str = "inner" ) -> Tuple[np.ndarray, np.ndarray, int]: From 50d69a02597604c34c608f10d15e8679ac549c92 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 19 Nov 2020 20:58:41 +0100 Subject: [PATCH 6/7] Change whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 7af6be5c8591f..dab433f912a01 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -712,7 +712,7 @@ Reshaping - Fixed regression in :func:`merge` on merging DatetimeIndex with empty DataFrame (:issue:`36895`) - Bug in :meth:`DataFrame.apply` not setting index of return value when ``func`` return type is ``dict`` (:issue:`37544`) - Bug in :func:`concat` resulting in a ``ValueError`` when at least one of both inputs had a non-unique index (:issue:`36263`) -- Bug in :meth:`df.merge() ` returned inconsistent ordering in result for ``how=right`` and ``how=left`` (:issue:`35382`) +- Bug in :meth:`df.merge() ` returning inconsistent ordering in result for ``how=right`` and ``how=left`` (:issue:`35382`) Sparse ^^^^^^ From 8f7c8d6534db0476b5aba390c3030682299edf64 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 21 Nov 2020 00:47:26 +0100 Subject: [PATCH 7/7] Improve whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index dab433f912a01..68b13c2fe28f5 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -712,7 +712,7 @@ Reshaping - Fixed regression in :func:`merge` on merging DatetimeIndex with empty DataFrame (:issue:`36895`) - Bug in :meth:`DataFrame.apply` not setting index of return value when ``func`` return type is ``dict`` (:issue:`37544`) - Bug in :func:`concat` resulting in a ``ValueError`` when at least one of both inputs had a non-unique index (:issue:`36263`) -- Bug in :meth:`df.merge() ` returning inconsistent ordering in result for ``how=right`` and ``how=left`` (:issue:`35382`) +- Bug in :meth:`DataFrame.merge` and :meth:`pandas.merge` returning inconsistent ordering in result for ``how=right`` and ``how=left`` (:issue:`35382`) Sparse ^^^^^^