Skip to content

Fix scalar iloc #15778

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.20.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Indexing
^^^^^^^^

- Bug in ``DataFrame.reset_index(level=)`` with single level index (:issue:`16263`)

- Bug in ``DataFrame.iloc`` with duplicate labels (:issue:`15686`)

I/O
^^^
Expand Down
21 changes: 21 additions & 0 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1026,3 +1026,24 @@ def find_common_type(types):
return np.object

return np.find_common_type(types, [])


def _maybe_convert_indexer(indexer, until):
"""
Convert slice, tuple, list or scalar "indexer" to 1-d array of indices,
using "until" as maximum for upwards open slices.
"""

if is_scalar(indexer):
return np.array([indexer], dtype=int)

if isinstance(indexer, np.ndarray):
if indexer.dtype == bool:
return np.where(indexer)[0]
return indexer

if isinstance(indexer, slice):
stop = until if indexer.stop is None else indexer.stop
return np.arange(stop, dtype=int)[indexer]

return np.array(indexer, dtype=int)
138 changes: 105 additions & 33 deletions pandas/core/indexing.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
_is_unorderable_exception,
_ensure_platform_int)
from pandas.core.dtypes.missing import isnull, _infer_fill_value
from pandas.core.dtypes.cast import _maybe_convert_indexer

from pandas.core.index import Index, MultiIndex

Expand Down Expand Up @@ -81,6 +82,24 @@ def __getitem__(self, arg):
IndexSlice = _IndexSlice()


class InfoCleaner:
"""
A context manager which temporarily removes labels on the "info" axis,
replacing them with a RangeIndex, and then puts them back in place.
Used to unambiguously index by position.
"""
def __init__(self, obj):
self._obj = obj
self._info_axis = self._obj._AXIS_NAMES[self._obj._info_axis_number]

def __enter__(self):
self._old_col = getattr(self._obj, self._info_axis)
setattr(self._obj, self._info_axis, range(len(self._old_col)))

def __exit__(self, *args):
setattr(self._obj, self._info_axis, self._old_col)


class IndexingError(Exception):
pass

Expand Down Expand Up @@ -492,29 +511,10 @@ def _setitem_with_indexer(self, indexer, value):
else:
lplane_indexer = 0

def setter(item, v):
s = self.obj[item]
pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer

# perform the equivalent of a setitem on the info axis
# as we have a null slice or a slice with full bounds
# which means essentially reassign to the columns of a
# multi-dim object
# GH6149 (null slice), GH10408 (full bounds)
if (isinstance(pi, tuple) and
all(is_null_slice(idx) or
is_full_slice(idx, len(self.obj))
for idx in pi)):
s = v
else:
# set the item, possibly having a dtype change
s._consolidate_inplace()
s = s.copy()
s._data = s._data.setitem(indexer=pi, value=v)
s._maybe_update_cacher(clear=True)

# reset the sliced object if unique
self.obj[item] = s
setter_kwargs = {'items': labels,
'indexer': indexer,
'pi': plane_indexer[0] if lplane_indexer == 1
else plane_indexer}

def can_do_equal_len():
""" return True if we have an equal len settable """
Expand Down Expand Up @@ -542,7 +542,7 @@ def can_do_equal_len():
sub_indexer = list(indexer)
multiindex_indexer = isinstance(labels, MultiIndex)

for item in labels:
for idx, item in enumerate(labels):
if item in value:
sub_indexer[info_axis] = item
v = self._align_series(
Expand All @@ -551,7 +551,7 @@ def can_do_equal_len():
else:
v = np.nan

setter(item, v)
self._setter(idx, v, force_loc=True, **setter_kwargs)

# we have an equal len ndarray/convertible to our labels
elif np.array(value).ndim == 2:
Expand All @@ -563,14 +563,15 @@ def can_do_equal_len():
raise ValueError('Must have equal len keys and value '
'when setting with an ndarray')

for i, item in enumerate(labels):
for i in range(len(labels)):

# setting with a list, recoerces
setter(item, value[:, i].tolist())
self._setter(i, value[:, i].tolist(), force_loc=True,
**setter_kwargs)

# we have an equal len list/ndarray
elif can_do_equal_len():
setter(labels[0], value)
self._setter(0, value, **setter_kwargs)

# per label values
else:
Expand All @@ -579,13 +580,12 @@ def can_do_equal_len():
raise ValueError('Must have equal len keys and value '
'when setting with an iterable')

for item, v in zip(labels, value):
setter(item, v)
for i, v in zip(range(len(labels)), value):
self._setter(i, v, **setter_kwargs)
else:

# scalar
for item in labels:
setter(item, value)
for idx in range(len(labels)):
self._setter(idx, value, **setter_kwargs)

else:
if isinstance(indexer, tuple):
Expand Down Expand Up @@ -619,6 +619,47 @@ def can_do_equal_len():
value=value)
self.obj._maybe_update_cacher(clear=True)

def _setter(self, idx, v, items, pi, **kwargs):
"""
Set a single value on the underlying object. Label-based.

Parameters
----------
idx : int
The index of the desired element inside "items"

v : any
The value to assign to the specified location

items: list
A list of labels

pi: tuple or list-like
Components of original indexer preceding the info axis
"""
item = items[idx]
s = self.obj[item]

# perform the equivalent of a setitem on the info axis
# as we have a null slice or a slice with full bounds
# which means essentially reassign to the columns of a
# multi-dim object
# GH6149 (null slice), GH10408 (full bounds)
if (isinstance(pi, tuple) and
all(is_null_slice(ix) or
is_full_slice(ix, len(self.obj))
for ix in pi)):
s = v
else:
# set the item, possibly having a dtype change
s._consolidate_inplace()
s = s.copy()
s._data = s._data.setitem(indexer=pi, value=v)
s._maybe_update_cacher(clear=True)

# reset the sliced object if unique
self.obj[item] = s

def _align_series(self, indexer, ser, multiindex_indexer=False):
"""
Parameters
Expand Down Expand Up @@ -1766,6 +1807,37 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
raise ValueError("Can only index by location with a [%s]" %
self._valid_types)

def _setter(self, idx, v, indexer, force_loc=False, **kwargs):
"""
Set a single value on the underlying object. Position-based by default.

Parameters
----------
idx : int
The index of the desired element

v : any
The value to assign to the specified location

indexer: list
The original indexer

force_loc: bool
If True, use location-based indexing.

Other keyword arguments are forwarded to _NDFrameIndexer._setter()
"""

if force_loc:
super(_iLocIndexer, self)._setter(idx, v, **kwargs)
else:
info_axis = self.obj._info_axis_number
max_idx = len(self.obj._get_axis(info_axis))
kwargs['items'] = _maybe_convert_indexer(indexer[info_axis],
max_idx)
with InfoCleaner(self.obj):
super(_iLocIndexer, self)._setter(idx, v, **kwargs)


class _ScalarAccessIndexer(_NDFrameIndexer):
""" access scalars quickly """
Expand Down
33 changes: 31 additions & 2 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,15 +288,33 @@ def test_iloc_setitem_dups(self):
# iloc with a mask aligning from another iloc
df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}])
df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}])
df = concat([df1, df2], axis=1)
df_orig = concat([df1, df2], axis=1)
df = df_orig.copy()

# GH 15686
# iloc with mask, duplicated index and multiple blocks
expected = df.fillna(3)
expected['A'] = expected['A'].astype('float64')
expected.iloc[:, 0] = expected.iloc[:, 0].astype('float64')
inds = np.isnan(df.iloc[:, 0])
mask = inds[inds].index
df.iloc[mask, 0] = df.iloc[mask, 2]
tm.assert_frame_equal(df, expected)

# GH 15686
# iloc with scalar, duplicated index and multiple blocks
df = df_orig.copy()
expected = df.fillna(15)
df.iloc[0, 0] = 15
tm.assert_frame_equal(df, expected)

# GH 15686
# iloc with repeated value, duplicated index and multiple blocks
df = df_orig.copy()
expected = concat([DataFrame([{'A': 15, 'B': 1}, {'A': 15, 'B': 2}]),
df2], axis=1)
df.iloc[:, 0] = 15
tm.assert_frame_equal(df, expected)

# del a dup column across blocks
expected = DataFrame({0: [1, 2], 1: [3, 4]})
expected.columns = ['B', 'B']
Expand All @@ -314,6 +332,17 @@ def test_iloc_setitem_dups(self):
drop=True)
tm.assert_frame_equal(df, expected)

@pytest.mark.xfail(reason="BlockManager.setitem() broken")
def test_iloc_setitem_dups_mixed_df(self):
# GH 12991
df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}])
df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}])
df = concat([df1, df2], axis=1)

expected = df.fillna(15)
df.iloc[0, 0] = 15
tm.assert_frame_equal(df, expected)

def test_iloc_getitem_frame(self):
df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2),
columns=lrange(0, 8, 2))
Expand Down