Skip to content

Commit 67ad556

Browse files
committed
Merge pull request #3459 from jreback/GH3455
BUG: GH3455 Duplicate indexes with getitem will return items in the correct order
2 parents 3c728a7 + 99c1f15 commit 67ad556

File tree

4 files changed

+41
-6
lines changed

4 files changed

+41
-6
lines changed

RELEASE.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,15 @@ pandas 0.12.0
5252
columns (GH3437_)
5353
- ``.loc`` was not raising when passed an integer list (GH3449_)
5454
- Unordered time series selection was misbehaving when using label slicing (GH3448_)
55+
- Duplicate indexes with getitem will return items in the correct order (GH3455_, GH3457_)
5556

5657
.. _GH3164: https://github.com/pydata/pandas/issues/3164
5758
.. _GH3251: https://github.com/pydata/pandas/issues/3251
5859
.. _GH3379: https://github.com/pydata/pandas/issues/3379
5960
.. _GH3038: https://github.com/pydata/pandas/issues/3038
6061
.. _GH3437: https://github.com/pydata/pandas/issues/3437
62+
.. _GH3455: https://github.com/pydata/pandas/issues/3455
63+
.. _GH3457: https://github.com/pydata/pandas/issues/3457
6164
.. _GH3448: https://github.com/pydata/pandas/issues/3448
6265
.. _GH3449: https://github.com/pydata/pandas/issues/3449
6366

pandas/core/indexing.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -563,26 +563,34 @@ def _convert_to_indexer(self, obj, axis=0):
563563
check = labels.levels[0].get_indexer(objarr)
564564
else:
565565
level = None
566-
# XXX
566+
567+
# unique index
567568
if labels.is_unique:
568569
indexer = check = labels.get_indexer(objarr)
570+
571+
# non-unique (dups)
569572
else:
570-
mask = np.zeros(len(labels), dtype=bool)
573+
indexer = []
574+
check = np.arange(len(labels))
571575
lvalues = labels.values
572576
for x in objarr:
573577
# ugh
574578
to_or = lib.map_infer(lvalues, x.__eq__)
575579
if not to_or.any():
576580
raise KeyError('%s not in index' % str(x))
577-
mask |= to_or
578581

579-
indexer = check = mask.nonzero()[0]
582+
# add the indicies (as we want to take)
583+
indexer.extend(check[to_or])
584+
585+
indexer = Index(indexer)
586+
580587

581588
mask = check == -1
582589
if mask.any():
583590
raise KeyError('%s not in index' % objarr[mask])
584-
591+
585592
return indexer
593+
586594
else:
587595
return labels.get_loc(obj)
588596

pandas/tests/test_frame.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4621,7 +4621,6 @@ def test_to_csv_from_csv(self):
46214621
xp.columns = map(int,xp.columns)
46224622
assert_frame_equal(xp,rs)
46234623

4624-
46254624
@slow
46264625
def test_to_csv_moar(self):
46274626
from pandas.util.testing import makeCustomDataframe as mkdf
@@ -4935,6 +4934,21 @@ def test_to_csv_dups_cols(self):
49354934
with ensure_clean() as filename:
49364935
self.assertRaises(Exception, df.to_csv, filename)
49374936

4937+
# GH3457
4938+
from pandas.util.testing import makeCustomDataframe as mkdf
4939+
4940+
N=10
4941+
df= mkdf(N, 3)
4942+
df.columns = ['a','a','b']
4943+
4944+
with ensure_clean() as filename:
4945+
df.to_csv(filename)
4946+
4947+
# read_csv will rename the dups columns
4948+
result = read_csv(filename,index_col=0)
4949+
result = result.rename(columns={ 'a.1' : 'a' })
4950+
assert_frame_equal(result,df)
4951+
49384952
def test_to_csv_chunking(self):
49394953

49404954
aa=DataFrame({'A':range(100000)})

pandas/tests/test_indexing.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -761,6 +761,16 @@ def test_setitem_iloc(self):
761761
expected = DataFrame(np.array([0,101,102,3,104,105,6,7,8]).reshape((3, 3)), index=["A", "B", "C"], columns=["A", "B", "C"])
762762
assert_frame_equal(df,expected)
763763

764+
def test_dups_fancy_indexing(self):
765+
766+
# GH 3455
767+
from pandas.util.testing import makeCustomDataframe as mkdf
768+
df= mkdf(10, 3)
769+
df.columns = ['a','a','b']
770+
cols = ['b','a']
771+
result = df[['b','a']].columns
772+
expected = Index(['b','a','a'])
773+
self.assert_(result.equals(expected))
764774

765775
if __name__ == '__main__':
766776
import nose

0 commit comments

Comments
 (0)