Skip to content

Commit d56d0e6

Browse files
committed
BUG: fix internal error in constructing DataFrame.values with duplicate column names. close #2236
1 parent 3001af2 commit d56d0e6

File tree

3 files changed

+27
-5
lines changed

3 files changed

+27
-5
lines changed

RELEASE.rst

+1
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ pandas 0.9.1
9999
- Many unicode formatting fixes (#2201)
100100
- Fix improper MultiIndex conversion issue when assigning
101101
e.g. DataFrame.index (#2200)
102+
- Fix conversion of mixed-type DataFrame to ndarray with dup columns (#2236)
102103
103104
pandas 0.9.0
104105
============

pandas/core/internals.py

+15-5
Original file line numberDiff line numberDiff line change
@@ -732,12 +732,22 @@ def _interleave(self, items):
732732

733733
# By construction, all of the item should be covered by one of the
734734
# blocks
735-
for block in self.blocks:
736-
indexer = items.get_indexer(block.items)
737-
assert((indexer != -1).all())
738-
result[indexer] = block.get_values(dtype)
739-
itemmask[indexer] = 1
735+
if items.is_unique:
736+
for block in self.blocks:
737+
indexer = items.get_indexer(block.items)
738+
assert((indexer != -1).all())
739+
result[indexer] = block.get_values(dtype)
740+
itemmask[indexer] = 1
741+
else:
742+
for block in self.blocks:
743+
mask = items.isin(block.items)
744+
indexer = mask.nonzero()[0]
745+
assert(len(indexer) == len(block.items))
746+
result[indexer] = block.get_values(dtype)
747+
itemmask[indexer] = 1
748+
740749
assert(itemmask.all())
750+
741751
return result
742752

743753
def xs(self, key, axis=1, copy=True):

pandas/tests/test_frame.py

+11
Original file line numberDiff line numberDiff line change
@@ -4078,6 +4078,17 @@ def test_as_matrix(self):
40784078
expected = self.frame.reindex(columns=['A', 'B']).values
40794079
assert_almost_equal(mat, expected)
40804080

4081+
def test_as_matrix_duplicates(self):
4082+
df = DataFrame([[1, 2, 'a', 'b'],
4083+
[1, 2, 'a', 'b']],
4084+
columns=['one', 'one', 'two', 'two'])
4085+
4086+
result = df.values
4087+
expected = np.array([[1, 2, 'a', 'b'], [1, 2, 'a', 'b']],
4088+
dtype=object)
4089+
4090+
self.assertTrue(np.array_equal(result, expected))
4091+
40814092
def test_values(self):
40824093
self.frame.values[:, 0] = 5.
40834094
self.assert_((self.frame.values[:, 0] == 5).all())

0 commit comments

Comments
 (0)