From f82bb652b233fc0ab5ab06eabc016066a1e28063 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 2 Feb 2020 15:14:25 -0800 Subject: [PATCH 1/3] REF: Move Loc-only methods to Loc --- pandas/core/indexing.py | 415 +++++++++++++++++++++------------------- 1 file changed, 216 insertions(+), 199 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 77003719360d9..2a6a040fb7ac1 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -711,40 +711,6 @@ def _convert_slice_indexer(self, key: slice, axis: int): ax = self.obj._get_axis(min(axis, self.ndim - 1)) return ax._convert_slice_indexer(key, kind=self.name) - def _has_valid_setitem_indexer(self, indexer) -> bool: - return True - - def _has_valid_positional_setitem_indexer(self, indexer) -> bool: - """ - Validate that a positional indexer cannot enlarge its target - will raise if needed, does not modify the indexer externally. - - Returns - ------- - bool - """ - if isinstance(indexer, dict): - raise IndexError(f"{self.name} cannot enlarge its target object") - else: - if not isinstance(indexer, tuple): - indexer = _tuplify(self.ndim, indexer) - for ax, i in zip(self.obj.axes, indexer): - if isinstance(i, slice): - # should check the stop slice? - pass - elif is_list_like_indexer(i): - # should check the elements? - pass - elif is_integer(i): - if i >= len(ax): - raise IndexError( - f"{self.name} cannot enlarge its target object" - ) - elif isinstance(i, dict): - raise IndexError(f"{self.name} cannot enlarge its target object") - - return True - def _setitem_with_indexer(self, indexer, value): self._has_valid_setitem_indexer(indexer) @@ -1232,80 +1198,6 @@ def _align_frame(self, indexer, df: ABCDataFrame): raise ValueError("Incompatible indexer with DataFrame") - def _getitem_tuple(self, tup: Tuple): - try: - return self._getitem_lowerdim(tup) - except IndexingError: - pass - - # no multi-index, so validate all of the indexers - self._has_valid_tuple(tup) - - # ugly hack for GH #836 - if self._multi_take_opportunity(tup): - return self._multi_take(tup) - - # no shortcut needed - retval = self.obj - for i, key in enumerate(tup): - if com.is_null_slice(key): - continue - - retval = getattr(retval, self.name)._getitem_axis(key, axis=i) - - return retval - - def _multi_take_opportunity(self, tup: Tuple) -> bool: - """ - Check whether there is the possibility to use ``_multi_take``. - - Currently the limit is that all axes being indexed, must be indexed with - list-likes. - - Parameters - ---------- - tup : tuple - Tuple of indexers, one per axis. - - Returns - ------- - bool - Whether the current indexing, - can be passed through `_multi_take`. - """ - if not all(is_list_like_indexer(x) for x in tup): - return False - - # just too complicated - if any(com.is_bool_indexer(x) for x in tup): - return False - - return True - - def _multi_take(self, tup: Tuple): - """ - Create the indexers for the passed tuple of keys, and - executes the take operation. This allows the take operation to be - executed all at once, rather than once for each dimension. - Improving efficiency. - - Parameters - ---------- - tup : tuple - Tuple of indexers, one per axis. - - Returns - ------- - values: same type as the object being indexed - """ - # GH 836 - o = self.obj - d = { - axis: self._get_listlike_indexer(key, axis) - for (key, axis) in zip(tup, o._AXIS_ORDERS) - } - return o._reindex_with_indexers(d, copy=True, allow_dups=True) - def _handle_lowerdim_multi_index_axis0(self, tup: Tuple): # we have an axis0 multi-index, handle or raise axis = self.axis or 0 @@ -1426,97 +1318,6 @@ def _getitem_nested_tuple(self, tup: Tuple): return obj - def _get_listlike_indexer(self, key, axis: int, raise_missing: bool = False): - """ - Transform a list-like of keys into a new index and an indexer. - - Parameters - ---------- - key : list-like - Targeted labels. - axis: int - Dimension on which the indexing is being made. - raise_missing: bool, default False - Whether to raise a KeyError if some labels were not found. - Will be removed in the future, and then this method will always behave as - if ``raise_missing=True``. - - Raises - ------ - KeyError - If at least one key was requested but none was found, and - raise_missing=True. - - Returns - ------- - keyarr: Index - New index (coinciding with 'key' if the axis is unique). - values : array-like - Indexer for the return object, -1 denotes keys not found. - """ - o = self.obj - ax = o._get_axis(axis) - - # Have the index compute an indexer or return None - # if it cannot handle: - indexer, keyarr = ax._convert_listlike_indexer(key, kind=self.name) - # We only act on all found values: - if indexer is not None and (indexer != -1).all(): - self._validate_read_indexer(key, indexer, axis, raise_missing=raise_missing) - return ax[indexer], indexer - - if ax.is_unique and not getattr(ax, "is_overlapping", False): - indexer = ax.get_indexer_for(key) - keyarr = ax.reindex(keyarr)[0] - else: - keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr) - - self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing) - return keyarr, indexer - - def _getitem_iterable(self, key, axis: int): - """ - Index current object with an an iterable key. - - The iterable key can be a boolean indexer or a collection of keys. - - Parameters - ---------- - key : iterable - Targeted labels or boolean indexer. - axis: int - Dimension on which the indexing is being made. - - Raises - ------ - KeyError - If no key was found. Will change in the future to raise if not all - keys were found. - IndexingError - If the boolean indexer is unalignable with the object being - indexed. - - Returns - ------- - scalar, DataFrame, or Series: indexed value(s). - """ - # caller is responsible for ensuring non-None axis - self._validate_key(key, axis) - - labels = self.obj._get_axis(axis) - - if com.is_bool_indexer(key): - # A boolean indexer - key = check_bool_indexer(labels, key) - (inds,) = key.nonzero() - return self.obj._take_with_is_copy(inds, axis=axis) - else: - # A collection of keys - keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False) - return self.obj._reindex_with_indexers( - {axis: [keyarr, indexer]}, copy=True, allow_dups=True - ) - def _validate_read_indexer( self, key, indexer, axis: int, raise_missing: bool = False ): @@ -1600,9 +1401,15 @@ def __getitem__(self, key): def _is_scalar_access(self, key: Tuple): raise NotImplementedError() + def _getitem_tuple(self, tup: Tuple): + raise AbstractMethodError(self) + def _getitem_axis(self, key, axis: int): raise NotImplementedError() + def _has_valid_setitem_indexer(self, indexer) -> bool: + raise AbstractMethodError(self) + def _getbool_axis(self, key, axis: int): # caller is responsible for ensuring non-None axis labels = self.obj._get_axis(axis) @@ -1620,6 +1427,9 @@ class _LocIndexer(_LocationIndexer): "index is integers), listlike of labels, boolean" ) + # ------------------------------------------------------------------- + # Key Checks + @Appender(_LocationIndexer._validate_key.__doc__) def _validate_key(self, key, axis: int): @@ -1637,6 +1447,9 @@ def _validate_key(self, key, axis: int): if not is_list_like_indexer(key): self._convert_scalar_indexer(key, axis) + def _has_valid_setitem_indexer(self, indexer) -> bool: + return True + def _is_scalar_access(self, key: Tuple) -> bool: """ Returns @@ -1668,6 +1481,61 @@ def _is_scalar_access(self, key: Tuple) -> bool: return True + # ------------------------------------------------------------------- + # MultiIndex Handling + + def _multi_take_opportunity(self, tup: Tuple) -> bool: + """ + Check whether there is the possibility to use ``_multi_take``. + + Currently the limit is that all axes being indexed, must be indexed with + list-likes. + + Parameters + ---------- + tup : tuple + Tuple of indexers, one per axis. + + Returns + ------- + bool + Whether the current indexing, + can be passed through `_multi_take`. + """ + if not all(is_list_like_indexer(x) for x in tup): + return False + + # just too complicated + if any(com.is_bool_indexer(x) for x in tup): + return False + + return True + + def _multi_take(self, tup: Tuple): + """ + Create the indexers for the passed tuple of keys, and + executes the take operation. This allows the take operation to be + executed all at once, rather than once for each dimension. + Improving efficiency. + + Parameters + ---------- + tup : tuple + Tuple of indexers, one per axis. + + Returns + ------- + values: same type as the object being indexed + """ + # GH 836 + d = { + axis: self._get_listlike_indexer(key, axis) + for (key, axis) in zip(tup, self.obj._AXIS_ORDERS) + } + return self.obj._reindex_with_indexers(d, copy=True, allow_dups=True) + + # ------------------------------------------------------------------- + def _get_partial_string_timestamp_match_key(self, key, labels): """ Translate any partial string timestamp matches in key, returning the @@ -1700,6 +1568,72 @@ def _get_partial_string_timestamp_match_key(self, key, labels): return key + def _getitem_iterable(self, key, axis: int): + """ + Index current object with an an iterable key. + + The iterable key can be a boolean indexer or a collection of keys. + + Parameters + ---------- + key : iterable + Targeted labels or boolean indexer. + axis: int + Dimension on which the indexing is being made. + + Raises + ------ + KeyError + If no key was found. Will change in the future to raise if not all + keys were found. + IndexingError + If the boolean indexer is unalignable with the object being + indexed. + + Returns + ------- + scalar, DataFrame, or Series: indexed value(s). + """ + # caller is responsible for ensuring non-None axis + self._validate_key(key, axis) + + labels = self.obj._get_axis(axis) + + if com.is_bool_indexer(key): + # A boolean indexer + key = check_bool_indexer(labels, key) + (inds,) = key.nonzero() + return self.obj._take_with_is_copy(inds, axis=axis) + else: + # A collection of keys + keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False) + return self.obj._reindex_with_indexers( + {axis: [keyarr, indexer]}, copy=True, allow_dups=True + ) + + def _getitem_tuple(self, tup: Tuple): + try: + return self._getitem_lowerdim(tup) + except IndexingError: + pass + + # no multi-index, so validate all of the indexers + self._has_valid_tuple(tup) + + # ugly hack for GH #836 + if self._multi_take_opportunity(tup): + return self._multi_take(tup) + + # no shortcut needed + retval = self.obj + for i, key in enumerate(tup): + if com.is_null_slice(key): + continue + + retval = getattr(retval, self.name)._getitem_axis(key, axis=i) + + return retval + def _getitem_axis(self, key, axis: int): key = item_from_zerodim(key) if is_iterator(key): @@ -1864,6 +1798,53 @@ def _convert_to_indexer(self, key, axis: int): return {"key": key} raise + def _get_listlike_indexer(self, key, axis: int, raise_missing: bool = False): + """ + Transform a list-like of keys into a new index and an indexer. + + Parameters + ---------- + key : list-like + Targeted labels. + axis: int + Dimension on which the indexing is being made. + raise_missing: bool, default False + Whether to raise a KeyError if some labels were not found. + Will be removed in the future, and then this method will always behave as + if ``raise_missing=True``. + + Raises + ------ + KeyError + If at least one key was requested but none was found, and + raise_missing=True. + + Returns + ------- + keyarr: Index + New index (coinciding with 'key' if the axis is unique). + values : array-like + Indexer for the return object, -1 denotes keys not found. + """ + ax = self.obj._get_axis(axis) + + # Have the index compute an indexer or return None + # if it cannot handle: + indexer, keyarr = ax._convert_listlike_indexer(key, kind=self.name) + # We only act on all found values: + if indexer is not None and (indexer != -1).all(): + self._validate_read_indexer(key, indexer, axis, raise_missing=raise_missing) + return ax[indexer], indexer + + if ax.is_unique and not getattr(ax, "is_overlapping", False): + indexer = ax.get_indexer_for(key) + keyarr = ax.reindex(keyarr)[0] + else: + keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr) + + self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing) + return keyarr, indexer + @Appender(IndexingMixin.iloc.__doc__) class _iLocIndexer(_LocationIndexer): @@ -1873,6 +1854,9 @@ class _iLocIndexer(_LocationIndexer): ) _takeable = True + # ------------------------------------------------------------------- + # Key Checks + def _validate_key(self, key, axis: int): if com.is_bool_indexer(key): if hasattr(key, "index") and isinstance(key.index, Index): @@ -1913,6 +1897,37 @@ def _validate_key(self, key, axis: int): def _has_valid_setitem_indexer(self, indexer): self._has_valid_positional_setitem_indexer(indexer) + def _has_valid_positional_setitem_indexer(self, indexer) -> bool: + """ + Validate that a positional indexer cannot enlarge its target + will raise if needed, does not modify the indexer externally. + + Returns + ------- + bool + """ + if isinstance(indexer, dict): + raise IndexError(f"{self.name} cannot enlarge its target object") + else: + if not isinstance(indexer, tuple): + indexer = _tuplify(self.ndim, indexer) + for ax, i in zip(self.obj.axes, indexer): + if isinstance(i, slice): + # should check the stop slice? + pass + elif is_list_like_indexer(i): + # should check the elements? + pass + elif is_integer(i): + if i >= len(ax): + raise IndexError( + f"{self.name} cannot enlarge its target object" + ) + elif isinstance(i, dict): + raise IndexError(f"{self.name} cannot enlarge its target object") + + return True + def _is_scalar_access(self, key: Tuple) -> bool: """ Returns @@ -1956,6 +1971,8 @@ def _validate_integer(self, key: int, axis: int) -> None: if key >= len_axis or key < -len_axis: raise IndexError("single positional indexer is out-of-bounds") + # ------------------------------------------------------------------- + def _getitem_tuple(self, tup: Tuple): self._has_valid_tuple(tup) From 70a268991322dbfb2d42ceea456942f2edeb17aa Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 2 Feb 2020 16:36:41 -0800 Subject: [PATCH 2/3] rebase fixup --- pandas/core/series.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index bfe9969daaa8e..040fcf392733b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -23,6 +23,7 @@ from pandas._config import get_option from pandas._libs import lib, properties, reshape, tslibs +from pandas._libs.index import validate_numeric_casting from pandas._typing import Label from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution @@ -1022,7 +1023,7 @@ def __setitem__(self, key, value): def _set_with_engine(self, key, value): # fails with AttributeError for IntervalIndex loc = self.index._engine.get_loc(key) - libindex.validate_numeric_casting(self.dtype, value) + validate_numeric_casting(self.dtype, value) self._values[loc] = value def _set_with(self, key, value): @@ -1105,7 +1106,7 @@ def _set_value(self, label, value, takeable: bool = False): self._values[label] = value else: loc = self.index.get_loc(label) - libindex.validate_numeric_casting(self.dtype, value) + validate_numeric_casting(self.dtype, value) self._values[loc] = value except KeyError: From 49d73804b4ffc2a56e533c49215e43dd1afa7cb8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 4 Feb 2020 18:04:18 -0800 Subject: [PATCH 3/3] merge fixup --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 7db82e23d2627..39a11f6f9af94 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1823,7 +1823,7 @@ def _get_listlike_indexer(self, key, axis: int, raise_missing: bool = False): # Have the index compute an indexer or return None # if it cannot handle: - indexer, keyarr = ax._convert_listlike_indexer(key, kind="loc") + indexer, keyarr = ax._convert_listlike_indexer(key) # We only act on all found values: if indexer is not None and (indexer != -1).all(): self._validate_read_indexer(key, indexer, axis, raise_missing=raise_missing)