Merge pull request #3459 from jreback/GH3455

jreback · jreback · commit 67ad55623037 · 2013-04-25T07:38:06.000-07:00
BUG: GH3455 Duplicate indexes with getitem will return items in the correct order
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -52,12 +52,15 @@ pandas 0.12.0
     columns (GH3437_)
   - ``.loc`` was not raising when passed an integer list (GH3449_)
   - Unordered time series selection was misbehaving when using label slicing (GH3448_)
+  - Duplicate indexes with getitem will return items in the correct order (GH3455_, GH3457_)
 
 .. _GH3164: https://github.com/pydata/pandas/issues/3164
 .. _GH3251: https://github.com/pydata/pandas/issues/3251
 .. _GH3379: https://github.com/pydata/pandas/issues/3379
 .. _GH3038: https://github.com/pydata/pandas/issues/3038
 .. _GH3437: https://github.com/pydata/pandas/issues/3437
+.. _GH3455: https://github.com/pydata/pandas/issues/3455
+.. _GH3457: https://github.com/pydata/pandas/issues/3457
 .. _GH3448: https://github.com/pydata/pandas/issues/3448
 .. _GH3449: https://github.com/pydata/pandas/issues/3449
 
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -563,26 +563,34 @@ def _convert_to_indexer(self, obj, axis=0):
                     check = labels.levels[0].get_indexer(objarr)
                 else:
                     level = None
-                    # XXX
+
+                    # unique index
                     if labels.is_unique:
                         indexer = check = labels.get_indexer(objarr)
+
+                    # non-unique (dups)
                     else:
-                        mask = np.zeros(len(labels), dtype=bool)
+                        indexer = []
+                        check   = np.arange(len(labels))
                         lvalues = labels.values
                         for x in objarr:
                             # ugh
                             to_or = lib.map_infer(lvalues, x.__eq__)
                             if not to_or.any():
                                 raise KeyError('%s not in index' % str(x))
-                            mask |= to_or
 
-                        indexer = check = mask.nonzero()[0]
+                            # add the indicies (as we want to take)
+                            indexer.extend(check[to_or])
+
+                        indexer = Index(indexer)
+
 
                 mask = check == -1
                 if mask.any():
                     raise KeyError('%s not in index' % objarr[mask])
-
+            
                 return indexer
+
         else:
             return labels.get_loc(obj)
 
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -4621,7 +4621,6 @@ def test_to_csv_from_csv(self):
             xp.columns = map(int,xp.columns)
             assert_frame_equal(xp,rs)
 
-
     @slow
     def test_to_csv_moar(self):
         from pandas.util.testing import makeCustomDataframe as mkdf
@@ -4935,6 +4934,21 @@ def test_to_csv_dups_cols(self):
         with ensure_clean() as filename:
             self.assertRaises(Exception, df.to_csv, filename)
 
+        # GH3457
+        from pandas.util.testing import makeCustomDataframe as mkdf
+
+        N=10
+        df= mkdf(N, 3)
+        df.columns = ['a','a','b']
+
+        with ensure_clean() as filename:
+            df.to_csv(filename)
+
+            # read_csv will rename the dups columns
+            result = read_csv(filename,index_col=0)
+            result = result.rename(columns={ 'a.1' : 'a' })
+            assert_frame_equal(result,df)
+
     def test_to_csv_chunking(self):
 
         aa=DataFrame({'A':range(100000)})
diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py
@@ -761,6 +761,16 @@ def test_setitem_iloc(self):
         expected = DataFrame(np.array([0,101,102,3,104,105,6,7,8]).reshape((3, 3)), index=["A", "B", "C"], columns=["A", "B", "C"])
         assert_frame_equal(df,expected)
 
+    def test_dups_fancy_indexing(self):
+
+        # GH 3455
+        from pandas.util.testing import makeCustomDataframe as mkdf
+        df= mkdf(10, 3)
+        df.columns = ['a','a','b']
+        cols = ['b','a']
+        result = df[['b','a']].columns
+        expected = Index(['b','a','a'])
+        self.assert_(result.equals(expected))
 
 if __name__ == '__main__':
     import nose