Skip to content

Commit 553bacc

Browse files
committed
ENH: can pass external arrays in lieu of column names to DataFrame.set_index, close #402
1 parent 4c31c83 commit 553bacc

File tree

4 files changed

+95
-73
lines changed

4 files changed

+95
-73
lines changed

RELEASE.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ pandas 0.8.0
4747
e.g. Series.describe and DataFrame.describe (GH #1092)
4848
- Can create MultiIndex by passing list of lists or list of arrays to Series,
4949
DataFrame constructor, etc. (#831)
50+
- Can pass arrays in addition to column names to DataFrame.set_index (#402)
5051

5152
**API Changes**
5253

pandas/core/frame.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2069,31 +2069,36 @@ def reindex_like(self, other, method=None, copy=True, limit=None):
20692069

20702070
truncate = generic.truncate
20712071

2072-
def set_index(self, col_or_cols, drop=True, inplace=False,
2073-
verify_integrity=True):
2072+
def set_index(self, keys, drop=True, inplace=False,
2073+
verify_integrity=False):
20742074
"""
20752075
Set the DataFrame index (row labels) using one or more existing
20762076
columns. By default yields a new object.
20772077
20782078
Parameters
20792079
----------
2080-
col_or_cols : column label or list of column labels
2080+
keys : column label or list of column labels / arrays
20812081
drop : boolean, default True
20822082
Delete columns to be used as the new index
20832083
inplace : boolean, default False
20842084
Modify the DataFrame in place (do not create a new object)
2085-
verify_integrity : boolean, default True
2085+
verify_integrity : boolean, default False
20862086
Check the new index for duplicates. Otherwise defer the check until
20872087
necessary. Setting to False will improve the performance of this
20882088
method
20892089
2090+
Examples
2091+
--------
2092+
indexed_df = df.set_index(['A', 'B'])
2093+
indexed_df2 = df.set_index(['A', [0, 1, 2, 0, 1, 2]])
2094+
indexed_df3 = df.set_index([[0, 1, 2, 0, 1, 2]])
2095+
20902096
Returns
20912097
-------
20922098
dataframe : DataFrame
20932099
"""
2094-
cols = col_or_cols
2095-
if not isinstance(col_or_cols, (list, tuple)):
2096-
cols = [col_or_cols]
2100+
if not isinstance(keys, (list, tuple)):
2101+
keys = [keys]
20972102

20982103
if inplace:
20992104
frame = self
@@ -2102,13 +2107,16 @@ def set_index(self, col_or_cols, drop=True, inplace=False,
21022107
frame = self.copy()
21032108

21042109
arrays = []
2105-
for col in cols:
2106-
level = frame[col]
2107-
if drop:
2108-
del frame[col]
2110+
for col in keys:
2111+
if isinstance(col, (list, Series, np.ndarray)):
2112+
level = col
2113+
else:
2114+
level = frame[col]
2115+
if drop:
2116+
del frame[col]
21092117
arrays.append(level)
21102118

2111-
index = MultiIndex.from_arrays(arrays, names=cols)
2119+
index = MultiIndex.from_arrays(arrays, names=keys)
21122120

21132121
if verify_integrity and not index.is_unique:
21142122
duplicates = index.get_duplicates()

pandas/src/tseries.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ def fast_zip(list ndarrays):
369369
arr = ndarrays[j]
370370
it = <flatiter> PyArray_IterNew(arr)
371371
if len(arr) != n:
372-
raise ValueError('all arrays but be same length')
372+
raise ValueError('all arrays must be same length')
373373

374374
for i in range(n):
375375
val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it))

pandas/tests/test_frame.py

Lines changed: 73 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1235,6 +1235,79 @@ def test_set_index(self):
12351235
self.assertRaises(Exception, setattr, self.mixed_frame, 'index',
12361236
idx[::2])
12371237

1238+
def test_set_index2(self):
1239+
df = DataFrame({'A' : ['foo', 'foo', 'foo', 'bar', 'bar'],
1240+
'B' : ['one', 'two', 'three', 'one', 'two'],
1241+
'C' : ['a', 'b', 'c', 'd', 'e'],
1242+
'D' : np.random.randn(5),
1243+
'E' : np.random.randn(5)})
1244+
1245+
# new object, single-column
1246+
result = df.set_index('C')
1247+
result_nodrop = df.set_index('C', drop=False)
1248+
1249+
index = Index(df['C'], name='C')
1250+
1251+
expected = df.ix[:, ['A', 'B', 'D', 'E']]
1252+
expected.index = index
1253+
1254+
expected_nodrop = df.copy()
1255+
expected_nodrop.index = index
1256+
1257+
assert_frame_equal(result, expected)
1258+
assert_frame_equal(result_nodrop, expected_nodrop)
1259+
self.assertEqual(result.index.name, index.name)
1260+
1261+
# inplace, single
1262+
df2 = df.copy()
1263+
df2.set_index('C', inplace=True)
1264+
assert_frame_equal(df2, expected)
1265+
1266+
df3 = df.copy()
1267+
df3.set_index('C', drop=False, inplace=True)
1268+
assert_frame_equal(df3, expected_nodrop)
1269+
1270+
# create new object, multi-column
1271+
result = df.set_index(['A', 'B'])
1272+
result_nodrop = df.set_index(['A', 'B'], drop=False)
1273+
1274+
index = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B'])
1275+
1276+
expected = df.ix[:, ['C', 'D', 'E']]
1277+
expected.index = index
1278+
1279+
expected_nodrop = df.copy()
1280+
expected_nodrop.index = index
1281+
1282+
assert_frame_equal(result, expected)
1283+
assert_frame_equal(result_nodrop, expected_nodrop)
1284+
self.assertEqual(result.index.names, index.names)
1285+
1286+
# inplace
1287+
df2 = df.copy()
1288+
df2.set_index(['A', 'B'], inplace=True)
1289+
assert_frame_equal(df2, expected)
1290+
1291+
df3 = df.copy()
1292+
df3.set_index(['A', 'B'], drop=False, inplace=True)
1293+
assert_frame_equal(df3, expected_nodrop)
1294+
1295+
# corner case
1296+
self.assertRaises(Exception, df.set_index, 'A', verify_integrity=True)
1297+
1298+
def test_set_index_pass_arrays(self):
1299+
df = DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
1300+
'foo', 'bar', 'foo', 'foo'],
1301+
'B' : ['one', 'one', 'two', 'three',
1302+
'two', 'two', 'one', 'three'],
1303+
'C' : np.random.randn(8),
1304+
'D' : np.random.randn(8)})
1305+
1306+
# multiple columns
1307+
result = df.set_index(['A', df['B'].values], drop=False)
1308+
expected = df.set_index(['A', 'B'], drop=False)
1309+
assert_frame_equal(result, expected)
1310+
12381311
def test_set_columns(self):
12391312
cols = Index(np.arange(len(self.mixed_frame.columns)))
12401313
self.mixed_frame.columns = cols
@@ -3502,66 +3575,6 @@ def test_reindex_fill_value(self):
35023575
expected = df.reindex(range(15)).fillna(0)
35033576
assert_frame_equal(result, expected)
35043577

3505-
def test_set_index2(self):
3506-
df = DataFrame({'A' : ['foo', 'foo', 'foo', 'bar', 'bar'],
3507-
'B' : ['one', 'two', 'three', 'one', 'two'],
3508-
'C' : ['a', 'b', 'c', 'd', 'e'],
3509-
'D' : np.random.randn(5),
3510-
'E' : np.random.randn(5)})
3511-
3512-
# new object, single-column
3513-
result = df.set_index('C')
3514-
result_nodrop = df.set_index('C', drop=False)
3515-
3516-
index = Index(df['C'], name='C')
3517-
3518-
expected = df.ix[:, ['A', 'B', 'D', 'E']]
3519-
expected.index = index
3520-
3521-
expected_nodrop = df.copy()
3522-
expected_nodrop.index = index
3523-
3524-
assert_frame_equal(result, expected)
3525-
assert_frame_equal(result_nodrop, expected_nodrop)
3526-
self.assertEqual(result.index.name, index.name)
3527-
3528-
# inplace, single
3529-
df2 = df.copy()
3530-
df2.set_index('C', inplace=True)
3531-
assert_frame_equal(df2, expected)
3532-
3533-
df3 = df.copy()
3534-
df3.set_index('C', drop=False, inplace=True)
3535-
assert_frame_equal(df3, expected_nodrop)
3536-
3537-
# create new object, multi-column
3538-
result = df.set_index(['A', 'B'])
3539-
result_nodrop = df.set_index(['A', 'B'], drop=False)
3540-
3541-
index = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B'])
3542-
3543-
expected = df.ix[:, ['C', 'D', 'E']]
3544-
expected.index = index
3545-
3546-
expected_nodrop = df.copy()
3547-
expected_nodrop.index = index
3548-
3549-
assert_frame_equal(result, expected)
3550-
assert_frame_equal(result_nodrop, expected_nodrop)
3551-
self.assertEqual(result.index.names, index.names)
3552-
3553-
# inplace
3554-
df2 = df.copy()
3555-
df2.set_index(['A', 'B'], inplace=True)
3556-
assert_frame_equal(df2, expected)
3557-
3558-
df3 = df.copy()
3559-
df3.set_index(['A', 'B'], drop=False, inplace=True)
3560-
assert_frame_equal(df3, expected_nodrop)
3561-
3562-
# corner case
3563-
self.assertRaises(Exception, df.set_index, 'A')
3564-
35653578
def test_align(self):
35663579
af, bf = self.frame.align(self.frame)
35673580
self.assert_(af._data is not self.frame._data)

0 commit comments

Comments
 (0)