Skip to content

BUG: Bug in user-facing take with negative indicies was incorrect #3027

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 12, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,8 @@ pandas 0.11.0
- Bug in value_counts of ``datetime64[ns]`` Series (GH3002_)
- Fixed printing of ``NaT` in an index
- Bug in idxmin/idxmax of ``datetime64[ns]`` Series with ``NaT`` (GH2982__)
- Bug in ``icol`` with negative indicies was incorrect producing incorrect return values (see GH2922_)
- Bug in ``icol, take`` with negative indicies was producing incorrect return
values (see GH2922_, GH2892_), also check for out-of-bounds indices (GH3029_)
- Bug in DataFrame column insertion when the column creation fails, existing frame is left in
an irrecoverable state (GH3010_)

Expand All @@ -160,6 +161,7 @@ pandas 0.11.0
.. _GH2807: https://github.com/pydata/pandas/issues/2807
.. _GH2849: https://github.com/pydata/pandas/issues/2849
.. _GH2898: https://github.com/pydata/pandas/issues/2898
.. _GH2892: https://github.com/pydata/pandas/issues/2892
.. _GH2909: https://github.com/pydata/pandas/issues/2909
.. _GH2922: https://github.com/pydata/pandas/issues/2922
.. _GH2931: https://github.com/pydata/pandas/issues/2931
Expand All @@ -170,6 +172,7 @@ pandas 0.11.0
.. _GH3002: https://github.com/pydata/pandas/issues/3002
.. _GH3010: https://github.com/pydata/pandas/issues/3010
.. _GH3012: https://github.com/pydata/pandas/issues/3012
.. _GH3029: https://github.com/pydata/pandas/issues/3029


pandas 0.10.1
Expand Down
16 changes: 7 additions & 9 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
from pandas.core.generic import NDFrame
from pandas.core.index import Index, MultiIndex, _ensure_index
from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels,
_is_index_slice, _check_bool_indexer)
_is_index_slice, _check_bool_indexer,
_maybe_convert_indices)
from pandas.core.internals import BlockManager, make_block, form_blocks
from pandas.core.series import Series, _radd_compat
import pandas.core.expressions as expressions
Expand Down Expand Up @@ -1928,11 +1929,6 @@ def _ixs(self, i, axis=0, copy=False):
label = self.columns[i]
if isinstance(label, Index):

# if we have negative indicies, translate to postive here
# (take doesen't deal properly with these)
l = len(self.columns)
i = [ v if v >= 0 else l+v for v in i ]

return self.take(i, axis=1)

values = self._data.iget(i)
Expand Down Expand Up @@ -2911,11 +2907,13 @@ def take(self, indices, axis=0):
-------
taken : DataFrame
"""
if isinstance(indices, list):
indices = np.array(indices)

# check/convert indicies here
indices = _maybe_convert_indices(indices, len(self._get_axis(axis)))

if self._is_mixed_type:
if axis == 0:
new_data = self._data.take(indices, axis=1)
new_data = self._data.take(indices, axis=1, verify=False)
return DataFrame(new_data)
else:
new_columns = self.columns.take(indices)
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from pandas.core.index import MultiIndex
import pandas.core.indexing as indexing
from pandas.core.indexing import _maybe_convert_indices
from pandas.tseries.index import DatetimeIndex
import pandas.core.common as com
import pandas.lib as lib
Expand Down Expand Up @@ -943,12 +944,16 @@ def take(self, indices, axis=0):
-------
taken : type of caller
"""

# check/convert indicies here
indices = _maybe_convert_indices(indices, len(self._get_axis(axis)))

if axis == 0:
labels = self._get_axis(axis)
new_items = labels.take(indices)
new_data = self._data.reindex_axis(new_items, axis=0)
else:
new_data = self._data.take(indices, axis=axis)
new_data = self._data.take(indices, axis=axis, verify=False)
return self._constructor(new_data)

def tz_convert(self, tz, axis=0, copy=True):
Expand Down
14 changes: 14 additions & 0 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,20 @@ def _is_series(obj):
return isinstance(obj, Series)


def _maybe_convert_indices(indices, n):
""" if we have negative indicies, translate to postive here
if have indicies that are out-of-bounds, raise an IndexError """
if isinstance(indices, list):
indices = np.array(indices)

mask = indices<0
if mask.any():
indices[mask] += n
mask = (indices>=n) | (indices<0)
if mask.any():
raise IndexError("indices are out-of-bounds")
return indices

def _maybe_convert_ix(*args):
"""
We likely want to take the cross-product
Expand Down
9 changes: 6 additions & 3 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import numpy as np

from pandas.core.index import Index, _ensure_index, _handle_legacy_indexes
from pandas.core.indexing import _check_slice_bounds
from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices
import pandas.core.common as com
import pandas.lib as lib
import pandas.tslib as tslib
Expand Down Expand Up @@ -1517,13 +1517,16 @@ def _make_na_block(self, items, ref_items, fill_value=np.nan):
na_block = make_block(block_values, items, ref_items)
return na_block

def take(self, indexer, axis=1):
def take(self, indexer, axis=1, verify=True):
if axis < 1:
raise AssertionError('axis must be at least 1, got %d' % axis)

indexer = com._ensure_platform_int(indexer)

n = len(self.axes[axis])

if verify:
indexer = _maybe_convert_indices(indexer, n)

if ((indexer == -1) | (indexer >= n)).any():
raise Exception('Indices must be nonzero and less than '
'the axis length')
Expand Down
46 changes: 45 additions & 1 deletion pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8602,12 +8602,43 @@ def test_fillna_col_reordering(self):
self.assert_(df.columns.tolist() == filled.columns.tolist())

def test_take(self):

# homogeneous
#----------------------------------------
order = [3, 1, 2, 0]
for df in [self.frame]:

result = df.take(order, axis=0)
expected = df.reindex(df.index.take(order))
assert_frame_equal(result, expected)

# axis = 1
result = df.take(order, axis=1)
expected = df.ix[:, ['D', 'B', 'C', 'A']]
assert_frame_equal(result, expected, check_names=False)

# neg indicies
order = [2,1,-1]
for df in [self.frame]:

result = df.take(order, axis=0)
expected = df.reindex(df.index.take(order))
assert_frame_equal(result, expected)

# axis = 1
result = df.take(order, axis=1)
expected = df.ix[:, ['C', 'B', 'D']]
assert_frame_equal(result, expected, check_names=False)

# illegal indices
self.assertRaises(IndexError, df.take, [3,1,2,30], axis=0)
self.assertRaises(IndexError, df.take, [3,1,2,-31], axis=0)
self.assertRaises(IndexError, df.take, [3,1,2,5], axis=1)
self.assertRaises(IndexError, df.take, [3,1,2,-5], axis=1)

# mixed-dtype
#----------------------------------------
order = [4, 1, 2, 0, 3]
order = [4, 1, 2, 0, 3]
for df in [self.mixed_frame]:

result = df.take(order, axis=0)
Expand All @@ -8619,6 +8650,19 @@ def test_take(self):
expected = df.ix[:, ['foo', 'B', 'C', 'A', 'D']]
assert_frame_equal(result, expected)

# neg indicies
order = [4,1,-2]
for df in [self.mixed_frame]:

result = df.take(order, axis=0)
expected = df.reindex(df.index.take(order))
assert_frame_equal(result, expected)

# axis = 1
result = df.take(order, axis=1)
expected = df.ix[:, ['foo', 'B', 'D']]
assert_frame_equal(result, expected)

# by dtype
order = [1, 2, 0, 3]
for df in [self.mixed_float,self.mixed_int]:
Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/test_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1013,7 +1013,11 @@ def test_take(self):
expected = self.panel.reindex(minor=['D', 'A', 'B', 'C'])
assert_panel_equal(result, expected)

self.assertRaises(Exception, self.panel.take, [3, -1, 1, 2], axis=2)
# neg indicies ok
expected = self.panel.reindex(minor=['D', 'D', 'B', 'C'])
result = self.panel.take([3, -1, 1, 2], axis=2)
assert_panel_equal(result, expected)

self.assertRaises(Exception, self.panel.take, [4, 0, 1, 2], axis=2)

def test_sort_index(self):
Expand Down