diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 9decd5e212cbf..e304684036766 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -10,6 +10,7 @@ from pandas.core.algorithms import factorize from pandas.core.base import PandasObject, PandasDelegate import pandas.core.common as com +from pandas.core.missing import interpolate_2d from pandas.util.decorators import cache_readonly, deprecate_kwarg from pandas.core.common import (ABCSeries, ABCIndexClass, ABCPeriodIndex, ABCCategoricalIndex, @@ -1312,7 +1313,7 @@ def fillna(self, value=None, method=None, limit=None): if method is not None: values = self.to_dense().reshape(-1, len(self)) - values = com.interpolate_2d( + values = interpolate_2d( values, method, 0, None, value).astype(self.categories.dtype)[0] values = _get_codes_for_values(values, self.categories) diff --git a/pandas/core/common.py b/pandas/core/common.py index d6aa6e6bb90cc..4490aaf58a002 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -1420,350 +1420,6 @@ def _fill_zeros(result, x, y, name, fill): return result -def _interp_wrapper(f, wrap_dtype, na_override=None): - def wrapper(arr, mask, limit=None): - view = arr.view(wrap_dtype) - f(view, mask, limit=limit) - return wrapper - - -_pad_1d_datetime = _interp_wrapper(algos.pad_inplace_int64, np.int64) -_pad_2d_datetime = _interp_wrapper(algos.pad_2d_inplace_int64, np.int64) -_backfill_1d_datetime = _interp_wrapper(algos.backfill_inplace_int64, - np.int64) -_backfill_2d_datetime = _interp_wrapper(algos.backfill_2d_inplace_int64, - np.int64) - - -def pad_1d(values, limit=None, mask=None, dtype=None): - - if dtype is None: - dtype = values.dtype - _method = None - if is_float_dtype(values): - _method = getattr(algos, 'pad_inplace_%s' % dtype.name, None) - elif dtype in _DATELIKE_DTYPES or is_datetime64_dtype(values): - _method = _pad_1d_datetime - elif is_integer_dtype(values): - values = _ensure_float64(values) - _method = algos.pad_inplace_float64 - elif values.dtype == np.object_: - _method = algos.pad_inplace_object - - if _method is None: - raise ValueError('Invalid dtype for pad_1d [%s]' % dtype.name) - - if mask is None: - mask = isnull(values) - mask = mask.view(np.uint8) - _method(values, mask, limit=limit) - return values - - -def backfill_1d(values, limit=None, mask=None, dtype=None): - - if dtype is None: - dtype = values.dtype - _method = None - if is_float_dtype(values): - _method = getattr(algos, 'backfill_inplace_%s' % dtype.name, None) - elif dtype in _DATELIKE_DTYPES or is_datetime64_dtype(values): - _method = _backfill_1d_datetime - elif is_integer_dtype(values): - values = _ensure_float64(values) - _method = algos.backfill_inplace_float64 - elif values.dtype == np.object_: - _method = algos.backfill_inplace_object - - if _method is None: - raise ValueError('Invalid dtype for backfill_1d [%s]' % dtype.name) - - if mask is None: - mask = isnull(values) - mask = mask.view(np.uint8) - - _method(values, mask, limit=limit) - return values - - -def pad_2d(values, limit=None, mask=None, dtype=None): - - if dtype is None: - dtype = values.dtype - _method = None - if is_float_dtype(values): - _method = getattr(algos, 'pad_2d_inplace_%s' % dtype.name, None) - elif dtype in _DATELIKE_DTYPES or is_datetime64_dtype(values): - _method = _pad_2d_datetime - elif is_integer_dtype(values): - values = _ensure_float64(values) - _method = algos.pad_2d_inplace_float64 - elif values.dtype == np.object_: - _method = algos.pad_2d_inplace_object - - if _method is None: - raise ValueError('Invalid dtype for pad_2d [%s]' % dtype.name) - - if mask is None: - mask = isnull(values) - mask = mask.view(np.uint8) - - if np.all(values.shape): - _method(values, mask, limit=limit) - else: - # for test coverage - pass - return values - - -def backfill_2d(values, limit=None, mask=None, dtype=None): - - if dtype is None: - dtype = values.dtype - _method = None - if is_float_dtype(values): - _method = getattr(algos, 'backfill_2d_inplace_%s' % dtype.name, None) - elif dtype in _DATELIKE_DTYPES or is_datetime64_dtype(values): - _method = _backfill_2d_datetime - elif is_integer_dtype(values): - values = _ensure_float64(values) - _method = algos.backfill_2d_inplace_float64 - elif values.dtype == np.object_: - _method = algos.backfill_2d_inplace_object - - if _method is None: - raise ValueError('Invalid dtype for backfill_2d [%s]' % dtype.name) - - if mask is None: - mask = isnull(values) - mask = mask.view(np.uint8) - - if np.all(values.shape): - _method(values, mask, limit=limit) - else: - # for test coverage - pass - return values - - -def _clean_interp_method(method, **kwargs): - order = kwargs.get('order') - valid = ['linear', 'time', 'index', 'values', 'nearest', 'zero', 'slinear', - 'quadratic', 'cubic', 'barycentric', 'polynomial', - 'krogh', 'piecewise_polynomial', - 'pchip', 'spline'] - if method in ('spline', 'polynomial') and order is None: - raise ValueError("You must specify the order of the spline or " - "polynomial.") - if method not in valid: - raise ValueError("method must be one of {0}." - "Got '{1}' instead.".format(valid, method)) - return method - - -def interpolate_1d(xvalues, yvalues, method='linear', limit=None, - limit_direction='forward', - fill_value=None, bounds_error=False, order=None, **kwargs): - """ - Logic for the 1-d interpolation. The result should be 1-d, inputs - xvalues and yvalues will each be 1-d arrays of the same length. - - Bounds_error is currently hardcoded to False since non-scipy ones don't - take it as an argumnet. - """ - # Treat the original, non-scipy methods first. - - invalid = isnull(yvalues) - valid = ~invalid - - if not valid.any(): - # have to call np.asarray(xvalues) since xvalues could be an Index - # which cant be mutated - result = np.empty_like(np.asarray(xvalues), dtype=np.float64) - result.fill(np.nan) - return result - - if valid.all(): - return yvalues - - if method == 'time': - if not getattr(xvalues, 'is_all_dates', None): - # if not issubclass(xvalues.dtype.type, np.datetime64): - raise ValueError('time-weighted interpolation only works ' - 'on Series or DataFrames with a ' - 'DatetimeIndex') - method = 'values' - - def _interp_limit(invalid, fw_limit, bw_limit): - "Get idx of values that won't be filled b/c they exceed the limits." - for x in np.where(invalid)[0]: - if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): - yield x - - valid_limit_directions = ['forward', 'backward', 'both'] - limit_direction = limit_direction.lower() - if limit_direction not in valid_limit_directions: - msg = 'Invalid limit_direction: expecting one of %r, got %r.' % ( - valid_limit_directions, limit_direction) - raise ValueError(msg) - - from pandas import Series - ys = Series(yvalues) - start_nans = set(range(ys.first_valid_index())) - end_nans = set(range(1 + ys.last_valid_index(), len(valid))) - - # This is a list of the indexes in the series whose yvalue is currently NaN, - # but whose interpolated yvalue will be overwritten with NaN after computing - # the interpolation. For each index in this list, one of these conditions is - # true of the corresponding NaN in the yvalues: - # - # a) It is one of a chain of NaNs at the beginning of the series, and either - # limit is not specified or limit_direction is 'forward'. - # b) It is one of a chain of NaNs at the end of the series, and limit is - # specified and limit_direction is 'backward' or 'both'. - # c) Limit is nonzero and it is further than limit from the nearest non-NaN - # value (with respect to the limit_direction setting). - # - # The default behavior is to fill forward with no limit, ignoring NaNs at - # the beginning (see issues #9218 and #10420) - violate_limit = sorted(start_nans) - - if limit: - if limit_direction == 'forward': - violate_limit = sorted(start_nans | set(_interp_limit(invalid, limit, 0))) - if limit_direction == 'backward': - violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0, limit))) - if limit_direction == 'both': - violate_limit = sorted(_interp_limit(invalid, limit, limit)) - - xvalues = getattr(xvalues, 'values', xvalues) - yvalues = getattr(yvalues, 'values', yvalues) - result = yvalues.copy() - - if method in ['linear', 'time', 'index', 'values']: - if method in ('values', 'index'): - inds = np.asarray(xvalues) - # hack for DatetimeIndex, #1646 - if issubclass(inds.dtype.type, np.datetime64): - inds = inds.view(np.int64) - if inds.dtype == np.object_: - inds = lib.maybe_convert_objects(inds) - else: - inds = xvalues - result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid]) - result[violate_limit] = np.nan - return result - - sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic', - 'barycentric', 'krogh', 'spline', 'polynomial', - 'piecewise_polynomial', 'pchip'] - if method in sp_methods: - inds = np.asarray(xvalues) - # hack for DatetimeIndex, #1646 - if issubclass(inds.dtype.type, np.datetime64): - inds = inds.view(np.int64) - result[invalid] = _interpolate_scipy_wrapper( - inds[valid], yvalues[valid], inds[invalid], method=method, - fill_value=fill_value, - bounds_error=bounds_error, order=order, **kwargs) - result[violate_limit] = np.nan - return result - - -def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None, - bounds_error=False, order=None, **kwargs): - """ - passed off to scipy.interpolate.interp1d. method is scipy's kind. - Returns an array interpolated at new_x. Add any new methods to - the list in _clean_interp_method - """ - try: - from scipy import interpolate - from pandas import DatetimeIndex - except ImportError: - raise ImportError('{0} interpolation requires Scipy'.format(method)) - - new_x = np.asarray(new_x) - - # ignores some kwargs that could be passed along. - alt_methods = { - 'barycentric': interpolate.barycentric_interpolate, - 'krogh': interpolate.krogh_interpolate, - 'piecewise_polynomial': interpolate.piecewise_polynomial_interpolate, - } - - if getattr(x, 'is_all_dates', False): - # GH 5975, scipy.interp1d can't hande datetime64s - x, new_x = x._values.astype('i8'), new_x.astype('i8') - - try: - alt_methods['pchip'] = interpolate.pchip_interpolate - except AttributeError: - if method == 'pchip': - raise ImportError("Your version of scipy does not support " - "PCHIP interpolation.") - - interp1d_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic', - 'polynomial'] - if method in interp1d_methods: - if method == 'polynomial': - method = order - terp = interpolate.interp1d(x, y, kind=method, fill_value=fill_value, - bounds_error=bounds_error) - new_y = terp(new_x) - elif method == 'spline': - # GH #10633 - if not order: - raise ValueError("order needs to be specified and greater than 0") - terp = interpolate.UnivariateSpline(x, y, k=order, **kwargs) - new_y = terp(new_x) - else: - # GH 7295: need to be able to write for some reason - # in some circumstances: check all three - if not x.flags.writeable: - x = x.copy() - if not y.flags.writeable: - y = y.copy() - if not new_x.flags.writeable: - new_x = new_x.copy() - method = alt_methods[method] - new_y = method(x, y, new_x, **kwargs) - return new_y - - -def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None): - """ perform an actual interpolation of values, values will be make 2-d if - needed fills inplace, returns the result - """ - - transf = (lambda x: x) if axis == 0 else (lambda x: x.T) - - # reshape a 1 dim if needed - ndim = values.ndim - if values.ndim == 1: - if axis != 0: # pragma: no cover - raise AssertionError("cannot interpolate on a ndim == 1 with " - "axis != 0") - values = values.reshape(tuple((1,) + values.shape)) - - if fill_value is None: - mask = None - else: # todo create faster fill func without masking - mask = mask_missing(transf(values), fill_value) - - method = _clean_fill_method(method) - if method == 'pad': - values = transf(pad_2d(transf(values), limit=limit, mask=mask, dtype=dtype)) - else: - values = transf(backfill_2d(transf(values), limit=limit, mask=mask, dtype=dtype)) - - # reshape back - if ndim == 1: - values = values[0] - - return values - - def _consensus_name_attr(objs): name = objs[0].name for obj in objs[1:]: @@ -1772,14 +1428,6 @@ def _consensus_name_attr(objs): return name -_fill_methods = {'pad': pad_1d, 'backfill': backfill_1d} - - -def _get_fill_func(method): - method = _clean_fill_method(method) - return _fill_methods[method] - - #---------------------------------------------------------------------- # Lots of little utilities @@ -2815,31 +2463,6 @@ def _astype_nansafe(arr, dtype, copy=True): return arr.view(dtype) -def _clean_fill_method(method, allow_nearest=False): - if method is None: - return None - method = method.lower() - if method == 'ffill': - method = 'pad' - if method == 'bfill': - method = 'backfill' - - valid_methods = ['pad', 'backfill'] - expecting = 'pad (ffill) or backfill (bfill)' - if allow_nearest: - valid_methods.append('nearest') - expecting = 'pad (ffill), backfill (bfill) or nearest' - if method not in valid_methods: - msg = ('Invalid fill method. Expecting %s. Got %s' - % (expecting, method)) - raise ValueError(msg) - return method - - -def _clean_reindex_fill_method(method): - return _clean_fill_method(method, allow_nearest=True) - - def _all_none(*args): for arg in args: if arg is not None: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c18f4ec0a1f47..f46296bb6f70c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -16,6 +16,7 @@ from pandas.tseries.period import PeriodIndex from pandas.core.internals import BlockManager import pandas.core.common as com +import pandas.core.missing as mis import pandas.core.datetools as datetools from pandas import compat from pandas.compat import map, zip, lrange, string_types, isidentifier @@ -50,7 +51,7 @@ def _single_replace(self, to_replace, method, inplace, limit): orig_dtype = self.dtype result = self if inplace else self.copy() - fill_f = com._get_fill_func(method) + fill_f = mis._get_fill_func(method) mask = com.mask_missing(result.values, to_replace) values = fill_f(result.values, limit=limit, mask=mask) @@ -1928,7 +1929,7 @@ def reindex(self, *args, **kwargs): # construct the args axes, kwargs = self._construct_axes_from_arguments(args, kwargs) - method = com._clean_reindex_fill_method(kwargs.pop('method', None)) + method = mis._clean_reindex_fill_method(kwargs.pop('method', None)) level = kwargs.pop('level', None) copy = kwargs.pop('copy', True) limit = kwargs.pop('limit', None) @@ -2041,7 +2042,7 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, axis_name = self._get_axis_name(axis) axis_values = self._get_axis(axis_name) - method = com._clean_reindex_fill_method(method) + method = mis._clean_reindex_fill_method(method) new_index, indexer = axis_values.reindex(labels, method, level, limit=limit) return self._reindex_with_indexers( @@ -2775,7 +2776,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, if axis is None: axis = 0 axis = self._get_axis_number(axis) - method = com._clean_fill_method(method) + method = mis._clean_fill_method(method) from pandas import DataFrame if value is None: @@ -2806,7 +2807,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, return self._constructor.from_dict(result).__finalize__(self) # 2d or less - method = com._clean_fill_method(method) + method = mis._clean_fill_method(method) new_data = self._data.interpolate(method=method, axis=axis, limit=limit, @@ -3749,7 +3750,7 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, fill_value=None, method=None, limit=None, fill_axis=0, broadcast_axis=None): from pandas import DataFrame, Series - method = com._clean_fill_method(method) + method = mis._clean_fill_method(method) if broadcast_axis == 1 and self.ndim != other.ndim: if isinstance(self, Series): diff --git a/pandas/core/index.py b/pandas/core/index.py index 003e444672d85..855e3f013bfd3 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -19,6 +19,7 @@ from pandas.util.decorators import (Appender, Substitution, cache_readonly, deprecate, deprecate_kwarg) import pandas.core.common as com +from pandas.core.missing import _clean_reindex_fill_method from pandas.core.common import (isnull, array_equivalent, is_dtype_equal, is_object_dtype, is_datetimetz, ABCSeries, ABCCategorical, ABCPeriodIndex, _values_from_object, is_float, is_integer, is_iterator, is_categorical_dtype, @@ -1832,7 +1833,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): positions matches the corresponding target values. Missing values in the target are marked by -1. """ - method = com._clean_reindex_fill_method(method) + method = _clean_reindex_fill_method(method) target = _ensure_index(target) if tolerance is not None: tolerance = self._convert_tolerance(tolerance) @@ -3347,7 +3348,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): ------- (indexer, mask) : (ndarray, ndarray) """ - method = com._clean_reindex_fill_method(method) + method = _clean_reindex_fill_method(method) target = _ensure_index(target) if isinstance(target, CategoricalIndex): @@ -5131,7 +5132,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): ------- (indexer, mask) : (ndarray, ndarray) """ - method = com._clean_reindex_fill_method(method) + method = _clean_reindex_fill_method(method) target = _ensure_index(target) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index b3e7e82b5feb7..1b08140ebec09 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -25,6 +25,7 @@ from pandas.core.categorical import Categorical, maybe_to_categorical from pandas.tseries.index import DatetimeIndex import pandas.core.common as com +import pandas.core.missing as mis import pandas.core.convert as convert from pandas.sparse.array import _maybe_to_sparse, SparseArray import pandas.lib as lib @@ -852,7 +853,7 @@ def check_int_bool(self, inplace): # a fill na type method try: - m = com._clean_fill_method(method) + m = mis._clean_fill_method(method) except: m = None @@ -870,7 +871,7 @@ def check_int_bool(self, inplace): mgr=mgr) # try an interp method try: - m = com._clean_interp_method(method, **kwargs) + m = mis._clean_interp_method(method, **kwargs) except: m = None @@ -909,7 +910,7 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, values = self.values if inplace else self.values.copy() values, _, fill_value, _ = self._try_coerce_args(values, fill_value) values = self._try_operate(values) - values = com.interpolate_2d(values, + values = mis.interpolate_2d(values, method=method, axis=axis, limit=limit, @@ -949,8 +950,8 @@ def func(x): # process a 1-d slice, returning it # should the axis argument be handled below in apply_along_axis? - # i.e. not an arg to com.interpolate_1d - return com.interpolate_1d(index, x, method=method, limit=limit, + # i.e. not an arg to mis.interpolate_1d + return mis.interpolate_1d(index, x, method=method, limit=limit, limit_direction=limit_direction, fill_value=fill_value, bounds_error=False, **kwargs) @@ -2357,7 +2358,7 @@ def make_block_same_class(self, values, placement, def interpolate(self, method='pad', axis=0, inplace=False, limit=None, fill_value=None, **kwargs): - values = com.interpolate_2d( + values = mis.interpolate_2d( self.values.to_dense(), method, axis, limit, fill_value) return self.make_block_same_class(values=values, placement=self.mgr_locs) @@ -3773,7 +3774,7 @@ def reindex(self, new_axis, indexer=None, method=None, fill_value=None, # fill if needed if method is not None or limit is not None: - new_values = com.interpolate_2d(new_values, method=method, + new_values = mis.interpolate_2d(new_values, method=method, limit=limit, fill_value=fill_value) if self._block.is_sparse: diff --git a/pandas/core/missing.py b/pandas/core/missing.py new file mode 100644 index 0000000000000..f1143ad808b91 --- /dev/null +++ b/pandas/core/missing.py @@ -0,0 +1,390 @@ +""" +Routines for filling missing data +""" + +from functools import partial + +import numpy as np + +import pandas as pd +import pandas.core.common as com +import pandas.algos as algos +import pandas.lib as lib +from pandas.compat import range + + +def _clean_fill_method(method, allow_nearest=False): + if method is None: + return None + method = method.lower() + if method == 'ffill': + method = 'pad' + if method == 'bfill': + method = 'backfill' + + valid_methods = ['pad', 'backfill'] + expecting = 'pad (ffill) or backfill (bfill)' + if allow_nearest: + valid_methods.append('nearest') + expecting = 'pad (ffill), backfill (bfill) or nearest' + if method not in valid_methods: + msg = ('Invalid fill method. Expecting %s. Got %s' + % (expecting, method)) + raise ValueError(msg) + return method + + +def _clean_interp_method(method, **kwargs): + order = kwargs.get('order') + valid = ['linear', 'time', 'index', 'values', 'nearest', 'zero', 'slinear', + 'quadratic', 'cubic', 'barycentric', 'polynomial', + 'krogh', 'piecewise_polynomial', + 'pchip', 'spline'] + if method in ('spline', 'polynomial') and order is None: + raise ValueError("You must specify the order of the spline or " + "polynomial.") + if method not in valid: + raise ValueError("method must be one of {0}." + "Got '{1}' instead.".format(valid, method)) + return method + + +def interpolate_1d(xvalues, yvalues, method='linear', limit=None, + limit_direction='forward', + fill_value=None, bounds_error=False, order=None, **kwargs): + """ + Logic for the 1-d interpolation. The result should be 1-d, inputs + xvalues and yvalues will each be 1-d arrays of the same length. + + Bounds_error is currently hardcoded to False since non-scipy ones don't + take it as an argumnet. + """ + # Treat the original, non-scipy methods first. + + invalid = com.isnull(yvalues) + valid = ~invalid + + if not valid.any(): + # have to call np.asarray(xvalues) since xvalues could be an Index + # which cant be mutated + result = np.empty_like(np.asarray(xvalues), dtype=np.float64) + result.fill(np.nan) + return result + + if valid.all(): + return yvalues + + if method == 'time': + if not getattr(xvalues, 'is_all_dates', None): + # if not issubclass(xvalues.dtype.type, np.datetime64): + raise ValueError('time-weighted interpolation only works ' + 'on Series or DataFrames with a ' + 'DatetimeIndex') + method = 'values' + + def _interp_limit(invalid, fw_limit, bw_limit): + "Get idx of values that won't be filled b/c they exceed the limits." + for x in np.where(invalid)[0]: + if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): + yield x + + valid_limit_directions = ['forward', 'backward', 'both'] + limit_direction = limit_direction.lower() + if limit_direction not in valid_limit_directions: + msg = 'Invalid limit_direction: expecting one of %r, got %r.' % ( + valid_limit_directions, limit_direction) + raise ValueError(msg) + + from pandas import Series + ys = Series(yvalues) + start_nans = set(range(ys.first_valid_index())) + end_nans = set(range(1 + ys.last_valid_index(), len(valid))) + + # This is a list of the indexes in the series whose yvalue is currently NaN, + # but whose interpolated yvalue will be overwritten with NaN after computing + # the interpolation. For each index in this list, one of these conditions is + # true of the corresponding NaN in the yvalues: + # + # a) It is one of a chain of NaNs at the beginning of the series, and either + # limit is not specified or limit_direction is 'forward'. + # b) It is one of a chain of NaNs at the end of the series, and limit is + # specified and limit_direction is 'backward' or 'both'. + # c) Limit is nonzero and it is further than limit from the nearest non-NaN + # value (with respect to the limit_direction setting). + # + # The default behavior is to fill forward with no limit, ignoring NaNs at + # the beginning (see issues #9218 and #10420) + violate_limit = sorted(start_nans) + + if limit: + if limit_direction == 'forward': + violate_limit = sorted(start_nans | set(_interp_limit(invalid, limit, 0))) + if limit_direction == 'backward': + violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0, limit))) + if limit_direction == 'both': + violate_limit = sorted(_interp_limit(invalid, limit, limit)) + + xvalues = getattr(xvalues, 'values', xvalues) + yvalues = getattr(yvalues, 'values', yvalues) + result = yvalues.copy() + + if method in ['linear', 'time', 'index', 'values']: + if method in ('values', 'index'): + inds = np.asarray(xvalues) + # hack for DatetimeIndex, #1646 + if issubclass(inds.dtype.type, np.datetime64): + inds = inds.view(np.int64) + if inds.dtype == np.object_: + inds = lib.maybe_convert_objects(inds) + else: + inds = xvalues + result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid]) + result[violate_limit] = np.nan + return result + + sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic', + 'barycentric', 'krogh', 'spline', 'polynomial', + 'piecewise_polynomial', 'pchip'] + if method in sp_methods: + inds = np.asarray(xvalues) + # hack for DatetimeIndex, #1646 + if issubclass(inds.dtype.type, np.datetime64): + inds = inds.view(np.int64) + result[invalid] = _interpolate_scipy_wrapper( + inds[valid], yvalues[valid], inds[invalid], method=method, + fill_value=fill_value, + bounds_error=bounds_error, order=order, **kwargs) + result[violate_limit] = np.nan + return result + + +def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None, + bounds_error=False, order=None, **kwargs): + """ + passed off to scipy.interpolate.interp1d. method is scipy's kind. + Returns an array interpolated at new_x. Add any new methods to + the list in _clean_interp_method + """ + try: + from scipy import interpolate + from pandas import DatetimeIndex + except ImportError: + raise ImportError('{0} interpolation requires Scipy'.format(method)) + + new_x = np.asarray(new_x) + + # ignores some kwargs that could be passed along. + alt_methods = { + 'barycentric': interpolate.barycentric_interpolate, + 'krogh': interpolate.krogh_interpolate, + 'piecewise_polynomial': interpolate.piecewise_polynomial_interpolate, + } + + if getattr(x, 'is_all_dates', False): + # GH 5975, scipy.interp1d can't hande datetime64s + x, new_x = x._values.astype('i8'), new_x.astype('i8') + + try: + alt_methods['pchip'] = interpolate.pchip_interpolate + except AttributeError: + if method == 'pchip': + raise ImportError("Your version of scipy does not support " + "PCHIP interpolation.") + + interp1d_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic', + 'polynomial'] + if method in interp1d_methods: + if method == 'polynomial': + method = order + terp = interpolate.interp1d(x, y, kind=method, fill_value=fill_value, + bounds_error=bounds_error) + new_y = terp(new_x) + elif method == 'spline': + # GH #10633 + if not order: + raise ValueError("order needs to be specified and greater than 0") + terp = interpolate.UnivariateSpline(x, y, k=order, **kwargs) + new_y = terp(new_x) + else: + # GH 7295: need to be able to write for some reason + # in some circumstances: check all three + if not x.flags.writeable: + x = x.copy() + if not y.flags.writeable: + y = y.copy() + if not new_x.flags.writeable: + new_x = new_x.copy() + method = alt_methods[method] + new_y = method(x, y, new_x, **kwargs) + return new_y + + +def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None): + """ perform an actual interpolation of values, values will be make 2-d if + needed fills inplace, returns the result + """ + + transf = (lambda x: x) if axis == 0 else (lambda x: x.T) + + # reshape a 1 dim if needed + ndim = values.ndim + if values.ndim == 1: + if axis != 0: # pragma: no cover + raise AssertionError("cannot interpolate on a ndim == 1 with " + "axis != 0") + values = values.reshape(tuple((1,) + values.shape)) + + if fill_value is None: + mask = None + else: # todo create faster fill func without masking + mask = com.mask_missing(transf(values), fill_value) + + method = _clean_fill_method(method) + if method == 'pad': + values = transf(pad_2d(transf(values), limit=limit, mask=mask, dtype=dtype)) + else: + values = transf(backfill_2d(transf(values), limit=limit, mask=mask, dtype=dtype)) + + # reshape back + if ndim == 1: + values = values[0] + + return values + + +def _interp_wrapper(f, wrap_dtype, na_override=None): + def wrapper(arr, mask, limit=None): + view = arr.view(wrap_dtype) + f(view, mask, limit=limit) + return wrapper + + +_pad_1d_datetime = _interp_wrapper(algos.pad_inplace_int64, np.int64) +_pad_2d_datetime = _interp_wrapper(algos.pad_2d_inplace_int64, np.int64) +_backfill_1d_datetime = _interp_wrapper(algos.backfill_inplace_int64, + np.int64) +_backfill_2d_datetime = _interp_wrapper(algos.backfill_2d_inplace_int64, + np.int64) + + +def pad_1d(values, limit=None, mask=None, dtype=None): + + if dtype is None: + dtype = values.dtype + _method = None + if com.is_float_dtype(values): + _method = getattr(algos, 'pad_inplace_%s' % dtype.name, None) + elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values): + _method = _pad_1d_datetime + elif com.is_integer_dtype(values): + values = com._ensure_float64(values) + _method = algos.pad_inplace_float64 + elif values.dtype == np.object_: + _method = algos.pad_inplace_object + + if _method is None: + raise ValueError('Invalid dtype for pad_1d [%s]' % dtype.name) + + if mask is None: + mask = com.isnull(values) + mask = mask.view(np.uint8) + _method(values, mask, limit=limit) + return values + + +def backfill_1d(values, limit=None, mask=None, dtype=None): + + if dtype is None: + dtype = values.dtype + _method = None + if com.is_float_dtype(values): + _method = getattr(algos, 'backfill_inplace_%s' % dtype.name, None) + elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values): + _method = _backfill_1d_datetime + elif com.is_integer_dtype(values): + values = com._ensure_float64(values) + _method = algos.backfill_inplace_float64 + elif values.dtype == np.object_: + _method = algos.backfill_inplace_object + + if _method is None: + raise ValueError('Invalid dtype for backfill_1d [%s]' % dtype.name) + + if mask is None: + mask = com.isnull(values) + mask = mask.view(np.uint8) + + _method(values, mask, limit=limit) + return values + + +def pad_2d(values, limit=None, mask=None, dtype=None): + + if dtype is None: + dtype = values.dtype + _method = None + if com.is_float_dtype(values): + _method = getattr(algos, 'pad_2d_inplace_%s' % dtype.name, None) + elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values): + _method = _pad_2d_datetime + elif com.is_integer_dtype(values): + values = com._ensure_float64(values) + _method = algos.pad_2d_inplace_float64 + elif values.dtype == np.object_: + _method = algos.pad_2d_inplace_object + + if _method is None: + raise ValueError('Invalid dtype for pad_2d [%s]' % dtype.name) + + if mask is None: + mask = com.isnull(values) + mask = mask.view(np.uint8) + + if np.all(values.shape): + _method(values, mask, limit=limit) + else: + # for test coverage + pass + return values + + +def backfill_2d(values, limit=None, mask=None, dtype=None): + + if dtype is None: + dtype = values.dtype + _method = None + if com.is_float_dtype(values): + _method = getattr(algos, 'backfill_2d_inplace_%s' % dtype.name, None) + elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values): + _method = _backfill_2d_datetime + elif com.is_integer_dtype(values): + values = com._ensure_float64(values) + _method = algos.backfill_2d_inplace_float64 + elif values.dtype == np.object_: + _method = algos.backfill_2d_inplace_object + + if _method is None: + raise ValueError('Invalid dtype for backfill_2d [%s]' % dtype.name) + + if mask is None: + mask = com.isnull(values) + mask = mask.view(np.uint8) + + if np.all(values.shape): + _method(values, mask, limit=limit) + else: + # for test coverage + pass + return values + + +_fill_methods = {'pad': pad_1d, 'backfill': backfill_1d} + + +def _get_fill_func(method): + method = _clean_fill_method(method) + return _fill_methods[method] + + +def _clean_reindex_fill_method(method): + return _clean_fill_method(method, allow_nearest=True)