From c8a45cb67bc20672e5d6225ed69a435d14de531f Mon Sep 17 00:00:00 2001 From: y-p Date: Sun, 11 Nov 2012 02:07:13 +0200 Subject: [PATCH 1/5] TST: df with dupe cols should raise KeyError on accessing non-existent col via list --- pandas/tests/test_frame.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 0b36e8d39a00a..c30dde8cc3490 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -62,6 +62,15 @@ def test_getitem(self): self.assert_('random' not in self.frame) self.assertRaises(Exception, self.frame.__getitem__, 'random') + def test_getitem_dupe_cols(self): + df=DataFrame([[1,2,3],[4,5,6]],columns=['a','a','b']) + try: + df[['baf']] + except KeyError: + pass + else: + self.fail("Dataframe failed to raise KeyError") + def test_get(self): b = self.frame.get('B') assert_series_equal(b, self.frame['B']) From 65e716af386e309a1d54e45d426966181c89bd86 Mon Sep 17 00:00:00 2001 From: y-p Date: Sun, 11 Nov 2012 02:25:35 +0200 Subject: [PATCH 2/5] BUG: df with dupe cols should raise KeyError on accessing non-existent col via list #2218 --- pandas/core/frame.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 31c1a09f409c3..2bd4655371b70 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1796,13 +1796,18 @@ def _getitem_array(self, key): indexer = self.columns.get_indexer(key) mask = indexer == -1 if mask.any(): - raise KeyError("No column(s) named: %s" % str(key[mask])) + raise KeyError("No column(s) named: %s" % + com.pprint_thing(key[mask])) result = self.reindex(columns=key) if result.columns.name is None: result.columns.name = self.columns.name return result else: mask = self.columns.isin(key) + for k in key: + if k not in self.columns: + raise KeyError("No column(s) named: %s" % + com.pprint_thing(k)) return self.take(mask.nonzero()[0], axis=1) def _slice(self, slobj, axis=0): From 611cd0f53d3e1ddcbe38d026f047f20ac59ec4b0 Mon Sep 17 00:00:00 2001 From: y-p Date: Sun, 11 Nov 2012 01:44:16 +0200 Subject: [PATCH 3/5] TST: df.iteritems() should yield Series even with non-unique column labels --- pandas/tests/test_frame.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index c30dde8cc3490..01b5d6ae46fd4 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1145,6 +1145,11 @@ def test_get_value(self): expected = self.frame[col][idx] assert_almost_equal(result, expected) + def test_iteritems(self): + df=DataFrame([[1,2,3],[4,5,6]],columns=['a','a','b']) + for k,v in df.iteritems(): + self.assertEqual(type(v),Series) + def test_lookup(self): def alt(df, rows, cols): result = [] @@ -7458,6 +7463,7 @@ def __nonzero__(self): self.assert_(r0.all()) self.assert_(r1.all()) + if __name__ == '__main__': # unittest.main() import nose From 4a5b75b44b00483df9ff4816b6a39b5043f9e2d3 Mon Sep 17 00:00:00 2001 From: y-p Date: Sun, 11 Nov 2012 01:31:09 +0200 Subject: [PATCH 4/5] BUG: modify df.iteritems to support duplicate column labels #2219 --- pandas/core/frame.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2bd4655371b70..e4e9705e562d0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -633,7 +633,8 @@ def keys(self): def iteritems(self): """Iterator over (column, series) pairs""" - return ((k, self[k]) for k in self.columns) + for i, k in enumerate(self.columns): + yield (k,self.take([i],axis=1)[k]) def iterrows(self): """ From bd45d391bce7fa2310c7dbe4646d4b1666783332 Mon Sep 17 00:00:00 2001 From: y-p Date: Sun, 11 Nov 2012 02:43:59 +0200 Subject: [PATCH 5/5] ENH: warn user when invoking to_dict() on df with non-unique columns --- pandas/core/frame.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e4e9705e562d0..05d3713375481 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -837,6 +837,10 @@ def to_dict(self, outtype='dict'): ------- result : dict like {column -> {index -> value}} """ + import warnings + if not self.columns.is_unique: + warnings.warn("DataFrame columns are not unique, some " + "columns will be omitted.",UserWarning) if outtype.lower().startswith('d'): return dict((k, v.to_dict()) for k, v in self.iteritems()) elif outtype.lower().startswith('l'):