Skip to content

Commit f4bcfd4

Browse files
committed
Merge pull request #6299 from jreback/iloc_max
API: allow the iloc indexer to run off the end and not raise IndexError (GH6296)
2 parents 043d165 + 9f0dc3b commit f4bcfd4

File tree

10 files changed

+156
-56
lines changed

10 files changed

+156
-56
lines changed

doc/source/10min.rst

-19
Original file line numberDiff line numberDiff line change
@@ -273,25 +273,6 @@ For getting fast access to a scalar (equiv to the prior method)
273273
274274
df.iat[1,1]
275275
276-
There is one signficant departure from standard python/numpy slicing semantics.
277-
python/numpy allow slicing past the end of an array without an associated
278-
error.
279-
280-
.. ipython:: python
281-
282-
# these are allowed in python/numpy.
283-
x = list('abcdef')
284-
x[4:10]
285-
x[8:10]
286-
287-
Pandas will detect this and raise ``IndexError``, rather than return an empty
288-
structure.
289-
290-
::
291-
292-
>>> df.iloc[:,8:10]
293-
IndexError: out-of-bounds on slice (end)
294-
295276
Boolean Indexing
296277
~~~~~~~~~~~~~~~~
297278

doc/source/indexing.rst

+14-6
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,9 @@ of multi-axis indexing.
7777
See more at :ref:`Selection by Label <indexing.label>`
7878

7979
- ``.iloc`` is strictly integer position based (from ``0`` to ``length-1`` of
80-
the axis), will raise ``IndexError`` when the requested indicies are out of
81-
bounds. Allowed inputs are:
80+
the axis), will raise ``IndexError`` if a single index is requested and it
81+
is out-of-bounds, otherwise it will conform the bounds to size of the object.
82+
Allowed inputs are:
8283

8384
- An integer e.g. ``5``
8485
- A list or array of integers ``[4, 3, 0]``
@@ -420,12 +421,19 @@ python/numpy allow slicing past the end of an array without an associated error.
420421
x[4:10]
421422
x[8:10]
422423
423-
Pandas will detect this and raise ``IndexError``, rather than return an empty structure.
424+
- as of v0.14.0, ``iloc`` will now accept out-of-bounds indexers, e.g. a value that exceeds the length of the object being
425+
indexed. These will be excluded. This will make pandas conform more with pandas/numpy indexing of out-of-bounds
426+
values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise
427+
``IndexError`` (:issue:`6296`). This could result in an empty axis (e.g. an empty DataFrame being returned)
424428

425-
::
429+
.. ipython:: python
426430
427-
>>> df.iloc[:,3:6]
428-
IndexError: out-of-bounds on slice (end)
431+
df = DataFrame(np.random.randn(5,2),columns=list('AB'))
432+
df
433+
df.iloc[[4,5,6]]
434+
df.iloc[4:6]
435+
df.iloc[:,2:3]
436+
df.iloc[:,1:3]
429437
430438
.. _indexing.basics.partial_setting:
431439

doc/source/release.rst

+4
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ New features
5656
API Changes
5757
~~~~~~~~~~~
5858

59+
- ``iloc`` will now accept out-of-bounds indexers, e.g. a value that exceeds the length of the object being
60+
indexed. These will be excluded. This will make pandas conform more with pandas/numpy indexing of out-of-bounds
61+
values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise
62+
``IndexError`` (:issue:`6296`)
5963

6064
Experimental Features
6165
~~~~~~~~~~~~~~~~~~~~~

doc/source/v0.14.0.txt

+14
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,20 @@ Highlights include:
1515
API changes
1616
~~~~~~~~~~~
1717

18+
- ``iloc`` will now accept out-of-bounds indexers, e.g. a value that exceeds the length of the object being
19+
indexed. These will be excluded. This will make pandas conform more with pandas/numpy indexing of out-of-bounds
20+
values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise
21+
``IndexError`` (:issue:`6296`). This could result in an empty axis (e.g. an empty DataFrame being returned)
22+
23+
.. ipython:: python
24+
25+
df = DataFrame(np.random.randn(5,2),columns=list('AB'))
26+
df
27+
df.iloc[[4,5,6]]
28+
df.iloc[4:6]
29+
df.iloc[:,2:3]
30+
df.iloc[:,1:3]
31+
1832
Prior Version Deprecations/Changes
1933
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2034

pandas/core/generic.py

-8
Original file line numberDiff line numberDiff line change
@@ -1756,10 +1756,6 @@ def head(self, n=5):
17561756
l = len(self)
17571757
if l == 0 or n==0:
17581758
return self
1759-
if n > l:
1760-
n = l
1761-
elif n < -l:
1762-
n = -l
17631759
return self.iloc[:n]
17641760

17651761
def tail(self, n=5):
@@ -1769,10 +1765,6 @@ def tail(self, n=5):
17691765
l = len(self)
17701766
if l == 0 or n == 0:
17711767
return self
1772-
if n > l:
1773-
n = l
1774-
elif n < -l:
1775-
n = -l
17761768
return self.iloc[-n:]
17771769

17781770
#----------------------------------------------------------------------

pandas/core/index.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -621,9 +621,15 @@ def __getitem__(self, key):
621621
if com._is_bool_indexer(key):
622622
key = np.asarray(key)
623623

624-
result = arr_idx[key]
625-
if result.ndim > 1:
626-
return result
624+
try:
625+
result = arr_idx[key]
626+
if result.ndim > 1:
627+
return result
628+
except (IndexError):
629+
if not len(key):
630+
result = []
631+
else:
632+
raise
627633

628634
return Index(result, name=self.name)
629635

pandas/core/indexing.py

+44-1
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,29 @@ def _get_loc(self, key, axis=0):
7373
return self.obj._ixs(key, axis=axis)
7474

7575
def _slice(self, obj, axis=0, raise_on_error=False, typ=None):
76+
77+
# make out-of-bounds into bounds of the object
78+
if typ == 'iloc':
79+
ax = self.obj._get_axis(axis)
80+
l = len(ax)
81+
start = obj.start
82+
stop = obj.stop
83+
step = obj.step
84+
if start is not None:
85+
# degenerate to return nothing
86+
if start >= l:
87+
return self._getitem_axis(tuple(),axis=axis)
88+
89+
# equiv to a null slice
90+
elif start <= -l:
91+
start = None
92+
if stop is not None:
93+
if stop > l:
94+
stop = None
95+
elif stop <= -l:
96+
stop = None
97+
obj = slice(start,stop,step)
98+
7699
return self.obj._slice(obj, axis=axis, raise_on_error=raise_on_error,
77100
typ=typ)
78101

@@ -1188,14 +1211,23 @@ def _getitem_tuple(self, tup):
11881211
pass
11891212

11901213
retval = self.obj
1214+
axis=0
11911215
for i, key in enumerate(tup):
11921216
if i >= self.obj.ndim:
11931217
raise IndexingError('Too many indexers')
11941218

11951219
if _is_null_slice(key):
1220+
axis += 1
11961221
continue
11971222

1198-
retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
1223+
retval = getattr(retval, self.name)._getitem_axis(key, axis=axis)
1224+
1225+
# if the dim was reduced, then pass a lower-dim the next time
1226+
if retval.ndim<self.ndim:
1227+
axis -= 1
1228+
1229+
# try to get for the next axis
1230+
axis += 1
11991231

12001232
return retval
12011233

@@ -1224,17 +1256,28 @@ def _getitem_axis(self, key, axis=0):
12241256
# a single integer or a list of integers
12251257
else:
12261258

1259+
ax = self.obj._get_axis(axis)
12271260
if _is_list_like(key):
12281261

1262+
# coerce the key to not exceed the maximum size of the index
1263+
arr = np.array(key)
1264+
l = len(ax)
1265+
if len(arr) and (arr.max() >= l or arr.min() <= -l):
1266+
key = arr[(arr>-l) & (arr<l)]
1267+
12291268
# force an actual list
12301269
key = list(key)
1270+
12311271
else:
12321272
key = self._convert_scalar_indexer(key, axis)
12331273

12341274
if not com.is_integer(key):
12351275
raise TypeError("Cannot index by location index with a "
12361276
"non-integer key")
12371277

1278+
if key > len(ax):
1279+
raise IndexError("single indexer is out-of-bounds")
1280+
12381281
return self._get_loc(key, axis=axis)
12391282

12401283
def _convert_to_indexer(self, obj, axis=0, is_setter=False):

pandas/core/internals.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3246,7 +3246,7 @@ def reindex_indexer(self, new_axis, indexer, axis=1, fill_value=None,
32463246
pandas-indexer with -1's only.
32473247
"""
32483248
# trying to reindex on an axis with duplicates
3249-
if not allow_dups and not self.axes[axis].is_unique:
3249+
if not allow_dups and not self.axes[axis].is_unique and len(indexer):
32503250
raise ValueError("cannot reindex from a duplicate axis")
32513251

32523252
if not self.is_consolidated():

pandas/tests/test_generic.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -873,7 +873,7 @@ def test_equals(self):
873873

874874
s2[0] = 9.9
875875
self.assert_(not s1.equals(s2))
876-
876+
877877
idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')])
878878
s1 = Series([1, 2, np.nan], index=idx)
879879
s2 = s1.copy()
@@ -900,17 +900,17 @@ def test_equals(self):
900900
# different dtype
901901
different = df1.copy()
902902
different['floats'] = different['floats'].astype('float32')
903-
self.assert_(not df1.equals(different))
903+
self.assert_(not df1.equals(different))
904904

905905
# different index
906906
different_index = -index
907907
different = df2.set_index(different_index)
908-
self.assert_(not df1.equals(different))
908+
self.assert_(not df1.equals(different))
909909

910910
# different columns
911911
different = df2.copy()
912912
different.columns = df2.columns[::-1]
913-
self.assert_(not df1.equals(different))
913+
self.assert_(not df1.equals(different))
914914

915915
# DatetimeIndex
916916
index = pd.date_range('2000-1-1', periods=10, freq='T')

pandas/tests/test_indexing.py

+66-14
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,72 @@ def test_repeated_getitem_dups(self):
339339
result = df.loc[:,0].loc['A']
340340
assert_series_equal(result,expected)
341341

342+
def test_iloc_exceeds_bounds(self):
343+
344+
# GH6296
345+
# iloc should allow indexers that exceed the bounds
346+
df = DataFrame(np.random.random_sample((20,5)), columns=list('ABCDE'))
347+
expected = df
348+
result = df.iloc[:,[0,1,2,3,4,5]]
349+
assert_frame_equal(result,expected)
350+
351+
result = df.iloc[[1,30]]
352+
expected = df.iloc[[1]]
353+
assert_frame_equal(result,expected)
354+
355+
result = df.iloc[[1,-30]]
356+
expected = df.iloc[[1]]
357+
assert_frame_equal(result,expected)
358+
359+
result = df.iloc[:,4:10]
360+
expected = df.iloc[:,4:]
361+
assert_frame_equal(result,expected)
362+
363+
result = df.iloc[:,-4:-10]
364+
expected = df.iloc[:,-4:]
365+
assert_frame_equal(result,expected)
366+
367+
result = df.iloc[[100]]
368+
expected = DataFrame(columns=df.columns)
369+
assert_frame_equal(result,expected)
370+
371+
# still raise on a single indexer
372+
def f():
373+
df.iloc[30]
374+
self.assertRaises(IndexError, f)
375+
376+
s = df['A']
377+
result = s.iloc[[100]]
378+
expected = Series()
379+
assert_series_equal(result,expected)
380+
381+
result = s.iloc[[-100]]
382+
expected = Series()
383+
assert_series_equal(result,expected)
384+
385+
# slice
386+
result = s.iloc[18:30]
387+
expected = s.iloc[18:]
388+
assert_series_equal(result,expected)
389+
390+
# doc example
391+
df = DataFrame(np.random.randn(5,2),columns=list('AB'))
392+
result = df.iloc[[4,5,6]]
393+
expected = df.iloc[[4]]
394+
assert_frame_equal(result,expected)
395+
396+
result = df.iloc[4:6]
397+
expected = df.iloc[[4]]
398+
assert_frame_equal(result,expected)
399+
400+
result = df.iloc[:,2:3]
401+
expected = DataFrame(index=df.index)
402+
assert_frame_equal(result,expected)
403+
404+
result = df.iloc[:,1:3]
405+
expected = df.iloc[:,[1]]
406+
assert_frame_equal(result,expected)
407+
342408
def test_iloc_getitem_int(self):
343409

344410
# integer
@@ -442,14 +508,6 @@ def test_iloc_getitem_multiindex(self):
442508
xp = df.xs('b',drop_level=False)
443509
assert_frame_equal(rs,xp)
444510

445-
def test_iloc_getitem_out_of_bounds(self):
446-
447-
# out-of-bounds slice
448-
self.assertRaises(IndexError, self.frame_ints.iloc.__getitem__, tuple([slice(None),slice(1,5,None)]))
449-
self.assertRaises(IndexError, self.frame_ints.iloc.__getitem__, tuple([slice(None),slice(-5,3,None)]))
450-
self.assertRaises(IndexError, self.frame_ints.iloc.__getitem__, tuple([slice(1,5,None)]))
451-
self.assertRaises(IndexError, self.frame_ints.iloc.__getitem__, tuple([slice(-5,3,None)]))
452-
453511
def test_iloc_setitem(self):
454512
df = self.frame_ints
455513

@@ -738,12 +796,6 @@ def test_iloc_getitem_frame(self):
738796
expected = df.ix[[2,4,6,8]]
739797
assert_frame_equal(result, expected)
740798

741-
# out-of-bounds slice
742-
self.assertRaises(IndexError, df.iloc.__getitem__, tuple([slice(None),slice(1,5,None)]))
743-
self.assertRaises(IndexError, df.iloc.__getitem__, tuple([slice(None),slice(-5,3,None)]))
744-
self.assertRaises(IndexError, df.iloc.__getitem__, tuple([slice(1,11,None)]))
745-
self.assertRaises(IndexError, df.iloc.__getitem__, tuple([slice(-11,3,None)]))
746-
747799
# try with labelled frame
748800
df = DataFrame(np.random.randn(10, 4), index=list('abcdefghij'), columns=list('ABCD'))
749801

0 commit comments

Comments
 (0)