diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index bd16495e472b1..384676ede15f2 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -42,6 +42,7 @@ from pandas.core.indexing import check_setitem_lengths from pandas.core.internals.arrays import extract_array import pandas.core.missing as missing +from pandas.core.nanops import nanpercentile from pandas.io.formats.printing import pprint_thing @@ -1438,7 +1439,7 @@ def _unstack(self, unstacker_func, new_columns, n_rows, fill_value): blocks = [make_block(new_values, placement=new_placement)] return blocks, mask - def quantile(self, qs, interpolation='linear', axis=0, axes=None): + def quantile(self, qs, interpolation='linear', axis=0): """ compute the quantiles of the @@ -1447,94 +1448,53 @@ def quantile(self, qs, interpolation='linear', axis=0, axes=None): qs: a scalar or list of the quantiles to be computed interpolation: type of interpolation, default 'linear' axis: axis to compute, default 0 - axes : BlockManager.axes Returns ------- - tuple of (axis, block) - + Block """ - kw = {'interpolation': interpolation} values = self.get_values() values, _ = self._try_coerce_args(values, values) - def _nanpercentile1D(values, mask, q, **kw): - # mask is Union[ExtensionArray, ndarray] - values = values[~mask] - - if len(values) == 0: - if lib.is_scalar(q): - return self._na_value - else: - return np.array([self._na_value] * len(q), - dtype=values.dtype) - - return np.percentile(values, q, **kw) - - def _nanpercentile(values, q, axis, **kw): - - mask = isna(self.values) - if not lib.is_scalar(mask) and mask.any(): - if self.ndim == 1: - return _nanpercentile1D(values, mask, q, **kw) - else: - # for nonconsolidatable blocks mask is 1D, but values 2D - if mask.ndim < values.ndim: - mask = mask.reshape(values.shape) - if axis == 0: - values = values.T - mask = mask.T - result = [_nanpercentile1D(val, m, q, **kw) for (val, m) - in zip(list(values), list(mask))] - result = np.array(result, dtype=values.dtype, copy=False).T - return result - else: - return np.percentile(values, q, axis=axis, **kw) - - from pandas import Float64Index is_empty = values.shape[axis] == 0 - if is_list_like(qs): - ax = Float64Index(qs) + orig_scalar = not is_list_like(qs) + if orig_scalar: + # make list-like, unpack later + qs = [qs] - if is_empty: - if self.ndim == 1: - result = self._na_value - else: - # create the array of na_values - # 2d len(values) * len(qs) - result = np.repeat(np.array([self._na_value] * len(qs)), - len(values)).reshape(len(values), - len(qs)) + if is_empty: + if self.ndim == 1: + result = self._na_value else: - result = _nanpercentile(values, np.array(qs) * 100, - axis=axis, **kw) - - result = np.array(result, copy=False) - if self.ndim > 1: - result = result.T - + # create the array of na_values + # 2d len(values) * len(qs) + result = np.repeat(np.array([self._na_value] * len(qs)), + len(values)).reshape(len(values), + len(qs)) else: + mask = isna(self.values) + result = nanpercentile(values, np.array(qs) * 100, + axis=axis, na_value=self._na_value, + mask=mask, ndim=self.ndim, + interpolation=interpolation) - if self.ndim == 1: - ax = Float64Index([qs]) - else: - ax = axes[0] + result = np.array(result, copy=False) + if self.ndim > 1: + result = result.T - if is_empty: - if self.ndim == 1: - result = self._na_value - else: - result = np.array([self._na_value] * len(self)) - else: - result = _nanpercentile(values, qs * 100, axis=axis, **kw) + if orig_scalar and not lib.is_scalar(result): + # result could be scalar in case with is_empty and self.ndim == 1 + assert result.shape[-1] == 1, result.shape + result = result[..., 0] + result = lib.item_from_zerodim(result) ndim = getattr(result, 'ndim', None) or 0 result = self._try_coerce_result(result) if lib.is_scalar(result): - return ax, self.make_block_scalar(result) - return ax, make_block(result, - placement=np.arange(len(result)), - ndim=ndim) + return self.make_block_scalar(result) + return make_block(result, + placement=np.arange(len(result)), + ndim=ndim) def _replace_coerce(self, to_replace, value, inplace=True, regex=False, convert=False, mask=None): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index eba49d18431ef..0ad0a994e8a95 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -16,7 +16,7 @@ maybe_promote) from pandas.core.dtypes.common import ( _NS_DTYPE, is_datetimelike_v_numeric, is_extension_array_dtype, - is_extension_type, is_numeric_v_string_like, is_scalar) + is_extension_type, is_list_like, is_numeric_v_string_like, is_scalar) import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.generic import ABCExtensionArray, ABCSeries from pandas.core.dtypes.missing import isna @@ -402,34 +402,47 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False, bm._consolidate_inplace() return bm - def reduction(self, f, axis=0, consolidate=True, transposed=False, - **kwargs): + def quantile(self, axis=0, consolidate=True, transposed=False, + interpolation='linear', qs=None, numeric_only=None): """ - iterate over the blocks, collect and create a new block manager. + Iterate over blocks applying quantile reduction. This routine is intended for reduction type operations and will do inference on the generated blocks. Parameters ---------- - f: the callable or function name to operate on at the block level axis: reduction axis, default 0 consolidate: boolean, default True. Join together blocks having same dtype transposed: boolean, default False we are holding transposed data + interpolation : type of interpolation, default 'linear' + qs : a scalar or list of the quantiles to be computed + numeric_only : ignored Returns ------- Block Manager (new object) - """ if consolidate: self._consolidate_inplace() + def get_axe(block, qs, axes): + from pandas import Float64Index + if is_list_like(qs): + ax = Float64Index(qs) + elif block.ndim == 1: + ax = Float64Index([qs]) + else: + ax = axes[0] + return ax + axes, blocks = [], [] for b in self.blocks: - axe, block = getattr(b, f)(axis=axis, axes=self.axes, **kwargs) + block = b.quantile(axis=axis, qs=qs, interpolation=interpolation) + + axe = get_axe(b, qs, axes=self.axes) axes.append(axe) blocks.append(block) @@ -496,9 +509,6 @@ def isna(self, func, **kwargs): def where(self, **kwargs): return self.apply('where', **kwargs) - def quantile(self, **kwargs): - return self.reduction('quantile', **kwargs) - def setitem(self, **kwargs): return self.apply('setitem', **kwargs) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index f95c133163ddb..89e191f171f97 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1194,3 +1194,75 @@ def f(x, y): nanle = make_nancomp(operator.le) naneq = make_nancomp(operator.eq) nanne = make_nancomp(operator.ne) + + +def _nanpercentile_1d(values, mask, q, na_value, interpolation): + """ + Wraper for np.percentile that skips missing values, specialized to + 1-dimensional case. + + Parameters + ---------- + values : array over which to find quantiles + mask : ndarray[bool] + locations in values that should be considered missing + q : scalar or array of quantile indices to find + na_value : scalar + value to return for empty or all-null values + interpolation : str + + Returns + ------- + quantiles : scalar or array + """ + # mask is Union[ExtensionArray, ndarray] + values = values[~mask] + + if len(values) == 0: + if lib.is_scalar(q): + return na_value + else: + return np.array([na_value] * len(q), + dtype=values.dtype) + + return np.percentile(values, q, interpolation=interpolation) + + +def nanpercentile(values, q, axis, na_value, mask, ndim, interpolation): + """ + Wraper for np.percentile that skips missing values. + + Parameters + ---------- + values : array over which to find quantiles + q : scalar or array of quantile indices to find + axis : {0, 1} + na_value : scalar + value to return for empty or all-null values + mask : ndarray[bool] + locations in values that should be considered missing + ndim : {1, 2} + interpolation : str + + Returns + ------- + quantiles : scalar or array + """ + if not lib.is_scalar(mask) and mask.any(): + if ndim == 1: + return _nanpercentile_1d(values, mask, q, na_value, + interpolation=interpolation) + else: + # for nonconsolidatable blocks mask is 1D, but values 2D + if mask.ndim < values.ndim: + mask = mask.reshape(values.shape) + if axis == 0: + values = values.T + mask = mask.T + result = [_nanpercentile_1d(val, m, q, na_value, + interpolation=interpolation) + for (val, m) in zip(list(values), list(mask))] + result = np.array(result, dtype=values.dtype, copy=False).T + return result + else: + return np.percentile(values, q, axis=axis, interpolation=interpolation)