pandas-dev · jreback · May 2, 2013 · Apr 29, 2013 · Apr 29, 2013 · Apr 30, 2013
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -61,8 +61,20 @@ pandas 0.11.1
   - Fix regression in a DataFrame apply with axis=1, objects were not being converted back
     to base dtypes correctly (GH3480_)
   - Fix issue when storing uint dtypes in an HDFStore. (GH3493_)
+  - Non-unique index support clarified (GH3468_)
+
+    - Fix assigning a new index to a duplicate index in a DataFrame would fail (GH3468_)
+    - Fix construction of a DataFrame with a duplicate index
+    - ref_locs support to allow duplicative indices across dtypes,
+      allows iget support to always find the index (even across dtypes) (GH2194_)
+    - applymap on a DataFrame with a non-unique index now works
+      (removed warning) (GH2786_), and fix (GH3230_)
+    - Fix to_csv to handle non-unique columns (GH3495_)
 
 .. _GH3164: https://github.com/pydata/pandas/issues/3164
+.. _GH2786: https://github.com/pydata/pandas/issues/2786
+.. _GH2194: https://github.com/pydata/pandas/issues/2194
+.. _GH3230: https://github.com/pydata/pandas/issues/3230
 .. _GH3251: https://github.com/pydata/pandas/issues/3251
 .. _GH3379: https://github.com/pydata/pandas/issues/3379
 .. _GH3480: https://github.com/pydata/pandas/issues/3480
@@ -75,8 +87,10 @@ pandas 0.11.1
 .. _GH3455: https://github.com/pydata/pandas/issues/3455
 .. _GH3457: https://github.com/pydata/pandas/issues/3457
 .. _GH3461: https://github.com/pydata/pandas/issues/3461
+.. _GH3468: https://github.com/pydata/pandas/issues/3468
 .. _GH3448: https://github.com/pydata/pandas/issues/3448
 .. _GH3449: https://github.com/pydata/pandas/issues/3449
+.. _GH3495: https://github.com/pydata/pandas/issues/3495
 .. _GH3493: https://github.com/pydata/pandas/issues/3493
 
 

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -1156,6 +1156,7 @@ def _default_index(n):
     values = np.arange(n, dtype=np.int64)
     result = values.view(Int64Index)
     result.name = None
+    result.is_unique = True
     return result
 
 

diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -820,21 +820,7 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
         self.blocks = self.obj._data.blocks
         ncols = sum(len(b.items) for b in self.blocks)
         self.data =[None] * ncols
-
-        if self.obj.columns.is_unique:
-            self.colname_map = dict((k,i) for i,k in  enumerate(self.obj.columns))
-        else:
-            ks = [set(x.items) for x in self.blocks]
-            u = len(reduce(lambda a,x: a.union(x),ks,set()))
-            t = sum(map(len,ks))
-            if u != t:
-                if len(set(self.cols)) != len(self.cols):
-                    raise NotImplementedError("duplicate columns with differing dtypes are unsupported")
-            else:
-                # if columns are not unique and we acces this,
-                # we're doing it wrong
-                pass
-
+        self.column_map = self.obj._data.get_items_map()
 
         if chunksize is None:
             chunksize = (100000/ (len(self.cols) or 1)) or 1
@@ -1034,18 +1020,13 @@ def _save_chunk(self, start_i, end_i):
 
         # create the data for a chunk
         slicer = slice(start_i,end_i)
-        if self.obj.columns.is_unique:
-            for i in range(len(self.blocks)):
-                b = self.blocks[i]
-                d = b.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
-                for j, k in enumerate(b.items):
-                    # self.data is a preallocated list
-                    self.data[self.colname_map[k]] = d[j]
-        else:
-            # self.obj should contain a proper view of the dataframes
-            # with the specified ordering of cols if cols was specified
-            for i in range(len(self.obj.columns)):
-                self.data[i] = self.obj.icol(i).values[slicer].tolist()
+        for i in range(len(self.blocks)):
+            b = self.blocks[i]
+            d = b.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
+            for i, item in enumerate(b.items):
+
+                # self.data is a preallocated list
+                self.data[self.column_map[b][i]] = d[i]
 
         ix = data_index.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4261,9 +4261,6 @@ def infer(x):
             if com.is_datetime64_dtype(x):
                 x = lib.map_infer(x, lib.Timestamp)
             return lib.map_infer(x, func)
-        #GH2786
-        if not self.columns.is_unique:
-            raise ValueError("applymap does not support dataframes having duplicate column labels")
         return self.apply(infer)
 
     #----------------------------------------------------------------------

diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -278,7 +278,7 @@ def is_monotonic(self):
     def is_lexsorted_for_tuple(self, tup):
         return True
 
-    @cache_readonly
+    @cache_readonly(allow_setting=True)
     def is_unique(self):
         return self._engine.is_unique