Skip to content

Commit 9e47bbe

Browse files
committed
BUG: in _nsorted for frame with duplicated values index
1 parent 62b4327 commit 9e47bbe

File tree

3 files changed

+43
-3
lines changed

3 files changed

+43
-3
lines changed

doc/source/whatsnew/v0.19.0.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,5 @@ Performance Improvements
8181

8282
Bug Fixes
8383
~~~~~~~~~
84+
85+
- Bug in ``DataFrame.nlargest`` and ``DataFrame.nsmallest`` when data-frame has duplicated value index. (:issue:`13412`)

pandas/core/frame.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3315,10 +3315,24 @@ def _nsorted(self, columns, n, method, keep):
33153315
if not com.is_list_like(columns):
33163316
columns = [columns]
33173317
columns = list(columns)
3318-
ser = getattr(self[columns[0]], method)(n, keep=keep)
3318+
3319+
df = self
3320+
index_unique = df.index.is_unique
3321+
3322+
if not index_unique:
3323+
df = df.reset_index()
3324+
3325+
ser = getattr(df[columns[0]], method)(n, keep=keep)
33193326
ascending = dict(nlargest=False, nsmallest=True)[method]
3320-
return self.loc[ser.index].sort_values(columns, ascending=ascending,
3321-
kind='mergesort')
3327+
df = df.loc[ser.index]
3328+
3329+
if not index_unique:
3330+
index_names = self.index.names
3331+
nl = self.index.nlevels
3332+
df = df.set_index(df.columns[:nl].tolist())
3333+
df.index = df.index.set_names(index_names)
3334+
3335+
return df.sort_values(columns, ascending=ascending, kind='mergesort')
33223336

33233337
def nlargest(self, n, columns, keep='first'):
33243338
"""Get the rows of a DataFrame sorted by the `n` largest

pandas/tests/frame/test_analytics.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1253,6 +1253,30 @@ def test_nsmallest_multiple_columns(self):
12531253
expected = df.sort_values(['a', 'c']).head(5)
12541254
tm.assert_frame_equal(result, expected)
12551255

1256+
def test_nsorted_duplicated_index(self):
1257+
# GH13412
1258+
df = pd.DataFrame({'a': [1, 2, 3, 4],
1259+
'b': [4, 3, 2, 1]}, index=[0, 0, 1, 1])
1260+
result = df.nlargest(1, 'a')
1261+
result2 = df.nlargest(2, 'a')
1262+
expected = df.sort_values('a', ascending=False).head(1)
1263+
expected2 = df.sort_values('a', ascending=False).head(2)
1264+
tm.assert_frame_equal(result, expected)
1265+
tm.assert_frame_equal(result2, expected2)
1266+
1267+
def test_nsorted_multiple_index(self):
1268+
# GH13412
1269+
df = pd.DataFrame({'a': [1, 2, 3, 4],
1270+
'b': [4, 3, 2, 1],
1271+
'c': [4, 3, 2, 1]}, index=[0, 0, 1, 1])
1272+
df = df.reset_index().set_index(['index', 'c'])
1273+
result = df.nlargest(1, 'a')
1274+
result2 = df.nlargest(2, 'a')
1275+
expected = df.sort_values('a', ascending=False).head(1)
1276+
expected2 = df.sort_values('a', ascending=False).head(2)
1277+
tm.assert_frame_equal(result, expected)
1278+
tm.assert_frame_equal(result2, expected2)
1279+
12561280
# ----------------------------------------------------------------------
12571281
# Isin
12581282

0 commit comments

Comments
 (0)