From 1759b836343508a4a3864bf12f8b4772a58a3910 Mon Sep 17 00:00:00 2001 From: Chris Warth Date: Tue, 29 Dec 2015 08:01:22 -0800 Subject: [PATCH 1/4] Add example usage to DataFrame.filter --- pandas/core/generic.py | 42 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 85f23b988778f..b0c873d6b101d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2090,7 +2090,7 @@ def _reindex_axis(self, new_index, fill_method, axis, copy): def filter(self, items=None, like=None, regex=None, axis=None): """ - Restrict the info axis to set of items or wildcard + Subset rows or columns of dataframe according to specified filters. Parameters ---------- @@ -2101,14 +2101,44 @@ def filter(self, items=None, like=None, regex=None, axis=None): regex : string (regular expression) Keep info axis with re.search(regex, col) == True axis : int or None - The axis to filter on. By default this is the info axis. The "info - axis" is the axis that is used when indexing with ``[]``. For - example, ``df = DataFrame({'a': [1, 2, 3, 4]]}); df['a']``. So, - the ``DataFrame`` columns are the info axis. + The axis to filter on. + + Examples + -------- + >>> df + one two three + mouse 1 2 3 + rabbit 4 5 6 + >>> # select columns by name + >>> df.filter(items=['one', 'three']) + one three + mouse 1 3 + rabbit 4 6 + >>> # select columns by regular expression + >>> df.filter(regex='e$', axis=1) + one three + mouse 1 3 + rabbit 4 6 + >>> # select rows containing 'm' + >>> df.filter(like='m', axis=0) + one two three + mouse 1 2 3 + >>> # select rows containing 'bbi' + >>> df.filter(like='bbi', axis=0) + one two three + rabbit 4 5 6 + + Returns + ------- + same type as input object with filtered info axis Notes ----- - Arguments are mutually exclusive, but this is not checked for + Arguments are mutually exclusive, but this is not checked for. + + ``axis`` defaults to the info axis that is used when indexing with ``[]``, + e.g. ``df = DataFrame({'a': [1, 2, 3, 4]]}); df['a']`` + In this instance the ``DataFrame`` columns are the info axis. """ import re From 910422fc1c113677f673baad15e2af9252d775a3 Mon Sep 17 00:00:00 2001 From: chris warth Date: Fri, 19 Feb 2016 10:40:21 -0800 Subject: [PATCH 2/4] update doc comments for dataframe.filter --- pandas/core/generic.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b0c873d6b101d..8388ea091d87f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2090,8 +2090,12 @@ def _reindex_axis(self, new_index, fill_method, axis, copy): def filter(self, items=None, like=None, regex=None, axis=None): """ - Subset rows or columns of dataframe according to specified filters. + Subset rows or columns of dataframe according to labels in the index. + Note that this routine does not filter a dataframe on its contents. The filter is + applied to the labels of the index. + This method is a thin veneer on top of :ref:`DateFrame Select ` + Parameters ---------- items : list-like @@ -2109,20 +2113,19 @@ def filter(self, items=None, like=None, regex=None, axis=None): one two three mouse 1 2 3 rabbit 4 5 6 + >>> # select columns by name >>> df.filter(items=['one', 'three']) one three mouse 1 3 rabbit 4 6 + >>> # select columns by regular expression >>> df.filter(regex='e$', axis=1) one three mouse 1 3 rabbit 4 6 - >>> # select rows containing 'm' - >>> df.filter(like='m', axis=0) - one two three - mouse 1 2 3 + >>> # select rows containing 'bbi' >>> df.filter(like='bbi', axis=0) one two three @@ -2134,11 +2137,9 @@ def filter(self, items=None, like=None, regex=None, axis=None): Notes ----- - Arguments are mutually exclusive, but this is not checked for. + The ``items``, ``like``, and ``regex`` parameters should be mutually exclusive, but this is not checked. - ``axis`` defaults to the info axis that is used when indexing with ``[]``, - e.g. ``df = DataFrame({'a': [1, 2, 3, 4]]}); df['a']`` - In this instance the ``DataFrame`` columns are the info axis. + ``axis`` defaults to the info axis that is used when indexing with ``[]``. """ import re From 5f9e21771d0182365d5a56a1068ba3f7c2fc5f81 Mon Sep 17 00:00:00 2001 From: chris warth Date: Fri, 19 Feb 2016 14:49:49 -0800 Subject: [PATCH 3/4] merge from upstream/master --- pandas/core/generic.py | 1428 ++++++++++++++++++++++++---------------- 1 file changed, 878 insertions(+), 550 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8388ea091d87f..14d788fdded7e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -15,6 +15,7 @@ from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex from pandas.core.internals import BlockManager +import pandas.core.algorithms as algos import pandas.core.common as com import pandas.core.missing as mis import pandas.core.datetools as datetools @@ -32,11 +33,10 @@ # goal is to be able to define the docs close to function, while still being # able to share _shared_docs = dict() -_shared_doc_kwargs = dict(axes='keywords for axes', - klass='NDFrame', +_shared_doc_kwargs = dict(axes='keywords for axes', klass='NDFrame', axes_single_arg='int or labels for object', args_transpose='axes to permute (int or label for' - ' object)') + ' object)') def is_dictlike(x): @@ -69,7 +69,6 @@ def _single_replace(self, to_replace, method, inplace, limit): class NDFrame(PandasObject): - """ N-dimensional analogue of DataFrame. Store multi-dimensional in a size-mutable, labeled data structure @@ -80,10 +79,10 @@ class NDFrame(PandasObject): axes : list copy : boolean, default False """ - _internal_names = ['_data', '_cacher', '_item_cache', '_cache', - 'is_copy', '_subtyp', '_index', - '_default_kind', '_default_fill_value', '_metadata', - '__array_struct__', '__array_interface__'] + _internal_names = ['_data', '_cacher', '_item_cache', '_cache', 'is_copy', + '_subtyp', '_index', '_default_kind', + '_default_fill_value', '_metadata', '__array_struct__', + '__array_interface__'] _internal_names_set = set(_internal_names) _accessors = frozenset([]) _metadata = [] @@ -123,8 +122,9 @@ def _init_mgr(self, mgr, axes=None, dtype=None, copy=False): """ passed a manager and a axes dict """ for a, axe in axes.items(): if axe is not None: - mgr = mgr.reindex_axis( - axe, axis=self._get_block_manager_axis(a), copy=False) + mgr = mgr.reindex_axis(axe, + axis=self._get_block_manager_axis(a), + copy=False) # make a copy if explicitly requested if copy: @@ -135,7 +135,7 @@ def _init_mgr(self, mgr, axes=None, dtype=None, copy=False): mgr = mgr.astype(dtype=dtype) return mgr - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Construction @property @@ -154,7 +154,7 @@ def __unicode__(self): def _dir_additions(self): """ add the string-like attributes from the info_axis """ return set([c for c in self._info_axis - if isinstance(c, string_types) and isidentifier(c)]) + if isinstance(c, string_types) and isidentifier(c)]) @property def _constructor_sliced(self): @@ -170,31 +170,32 @@ def _constructor_expanddim(self): """ raise NotImplementedError - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Axis @classmethod - def _setup_axes( - cls, axes, info_axis=None, stat_axis=None, aliases=None, slicers=None, - axes_are_reversed=False, build_axes=True, ns=None): - """ provide axes setup for the major PandasObjects - - axes : the names of the axes in order (lowest to highest) - info_axis_num : the axis of the selector dimension (int) - stat_axis_num : the number of axis for the default stats (int) - aliases : other names for a single axis (dict) - slicers : how axes slice to others (dict) - axes_are_reversed : boolean whether to treat passed axes as - reversed (DataFrame) - build_axes : setup the axis properties (default True) - """ + def _setup_axes(cls, axes, info_axis=None, stat_axis=None, aliases=None, + slicers=None, axes_are_reversed=False, build_axes=True, + ns=None): + """Provide axes setup for the major PandasObjects. + + Parameters + ---------- + axes : the names of the axes in order (lowest to highest) + info_axis_num : the axis of the selector dimension (int) + stat_axis_num : the number of axis for the default stats (int) + aliases : other names for a single axis (dict) + slicers : how axes slice to others (dict) + axes_are_reversed : boolean whether to treat passed axes as + reversed (DataFrame) + build_axes : setup the axis properties (default True) + """ cls._AXIS_ORDERS = axes cls._AXIS_NUMBERS = dict((a, i) for i, a in enumerate(axes)) cls._AXIS_LEN = len(axes) cls._AXIS_ALIASES = aliases or dict() - cls._AXIS_IALIASES = dict((v, k) - for k, v in cls._AXIS_ALIASES.items()) + cls._AXIS_IALIASES = dict((v, k) for k, v in cls._AXIS_ALIASES.items()) cls._AXIS_NAMES = dict(enumerate(axes)) cls._AXIS_SLICEMAP = slicers or None cls._AXIS_REVERSED = axes_are_reversed @@ -234,29 +235,31 @@ def set_axis(a, i): setattr(cls, k, v) def _construct_axes_dict(self, axes=None, **kwargs): - """ return an axes dictionary for myself """ + """Return an axes dictionary for myself.""" d = dict([(a, self._get_axis(a)) for a in (axes or self._AXIS_ORDERS)]) d.update(kwargs) return d @staticmethod def _construct_axes_dict_from(self, axes, **kwargs): - """ return an axes dictionary for the passed axes """ + """Return an axes dictionary for the passed axes.""" d = dict([(a, ax) for a, ax in zip(self._AXIS_ORDERS, axes)]) d.update(kwargs) return d def _construct_axes_dict_for_slice(self, axes=None, **kwargs): - """ return an axes dictionary for myself """ + """Return an axes dictionary for myself.""" d = dict([(self._AXIS_SLICEMAP[a], self._get_axis(a)) - for a in (axes or self._AXIS_ORDERS)]) + for a in (axes or self._AXIS_ORDERS)]) d.update(kwargs) return d def _construct_axes_from_arguments(self, args, kwargs, require_all=False): - """ construct and returns axes if supplied in args/kwargs - if require_all, raise if all axis arguments are not supplied - return a tuple of (axes, kwargs) """ + """Construct and returns axes if supplied in args/kwargs. + + If require_all, raise if all axis arguments are not supplied + return a tuple of (axes, kwargs). + """ # construct the args args = list(args) @@ -267,10 +270,8 @@ def _construct_axes_from_arguments(self, args, kwargs, require_all=False): if alias is not None: if a in kwargs: if alias in kwargs: - raise TypeError( - "arguments are mutually exclusive for [%s,%s]" % - (a, alias) - ) + raise TypeError("arguments are mutually exclusive " + "for [%s,%s]" % (a, alias)) continue if alias in kwargs: kwargs[a] = kwargs.pop(alias) @@ -280,10 +281,10 @@ def _construct_axes_from_arguments(self, args, kwargs, require_all=False): if a not in kwargs: try: kwargs[a] = args.pop(0) - except (IndexError): + except IndexError: if require_all: - raise TypeError( - "not enough/duplicate arguments specified!") + raise TypeError("not enough/duplicate arguments " + "specified!") axes = dict([(a, kwargs.pop(a, None)) for a in self._AXIS_ORDERS]) return axes, kwargs @@ -331,7 +332,7 @@ def _get_axis(self, axis): return getattr(self, name) def _get_block_manager_axis(self, axis): - """ map the axis to the block_manager axis """ + """Map the axis to the block_manager axis.""" axis = self._get_axis_number(axis) if self._AXIS_REVERSED: m = self._AXIS_LEN - 1 @@ -384,24 +385,24 @@ def _stat_axis(self): @property def shape(self): - "Return a tuple of axis dimensions" + """Return a tuple of axis dimensions""" return tuple(len(self._get_axis(a)) for a in self._AXIS_ORDERS) @property def axes(self): - "Return index label(s) of the internal NDFrame" + """Return index label(s) of the internal NDFrame""" # we do it this way because if we have reversed axes, then # the block manager shows then reversed return [self._get_axis(a) for a in self._AXIS_ORDERS] @property def ndim(self): - "Number of axes / array dimensions" + """Number of axes / array dimensions""" return self._data.ndim @property def size(self): - "number of elements in the NDFrame" + """number of elements in the NDFrame""" return np.prod(self.shape) def _expand_axes(self, key): @@ -418,7 +419,7 @@ def _expand_axes(self, key): def set_axis(self, axis, labels): """ public verson of axis assignment """ - setattr(self,self._get_axis_name(axis),labels) + setattr(self, self._get_axis_name(axis), labels) def _set_axis(self, axis, labels): self._data.set_axis(axis, labels) @@ -448,26 +449,26 @@ def _set_axis(self, axis, labels): def transpose(self, *args, **kwargs): # construct the args - axes, kwargs = self._construct_axes_from_arguments( - args, kwargs, require_all=True) + axes, kwargs = self._construct_axes_from_arguments(args, kwargs, + require_all=True) axes_names = tuple([self._get_axis_name(axes[a]) for a in self._AXIS_ORDERS]) axes_numbers = tuple([self._get_axis_number(axes[a]) - for a in self._AXIS_ORDERS]) + for a in self._AXIS_ORDERS]) # we must have unique axes if len(axes) != len(set(axes)): raise ValueError('Must specify %s unique axes' % self._AXIS_LEN) - new_axes = self._construct_axes_dict_from( - self, [self._get_axis(x) for x in axes_names]) + new_axes = self._construct_axes_dict_from(self, [self._get_axis(x) + for x in axes_names]) new_values = self.values.transpose(axes_numbers) if kwargs.pop('copy', None) or (len(args) and args[-1]): new_values = new_values.copy() if kwargs: raise TypeError('transpose() got an unexpected keyword ' - 'argument "{0}"'.format(list(kwargs.keys())[0])) + 'argument "{0}"'.format(list(kwargs.keys())[0])) return self._constructor(new_values, **new_axes).__finalize__(self) @@ -511,10 +512,10 @@ def pop(self, item): return result def squeeze(self): - """ squeeze length 1 dimensions """ + """Squeeze length 1 dimensions.""" try: return self.iloc[tuple([0 if len(a) == 1 else slice(None) - for a in self.axes])] + for a in self.axes])] except: return self @@ -537,7 +538,7 @@ def swaplevel(self, i, j, axis=0): result._data.set_axis(axis, labels.swaplevel(i, j)) return result - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Rename # TODO: define separate funcs for DataFrame, Series and Panel so you can @@ -545,13 +546,16 @@ def swaplevel(self, i, j, axis=0): _shared_docs['rename'] = """ Alter axes input function or functions. Function / dict values must be unique (1-to-1). Labels not contained in a dict / Series will be left - as-is. + as-is. Alternatively, change ``Series.name`` with a scalar + value (Series only). Parameters ---------- - %(axes)s : dict-like or function, optional - Transformation to apply to that axis values - + %(axes)s : scalar, list-like, dict-like or function, optional + Scalar or list-like will alter the ``Series.name`` attribute, + and raise on DataFrame or Panel. + dict-like or functions are transformations to apply to + that axis' values copy : boolean, default True Also copy underlying data inplace : boolean, default False @@ -561,6 +565,43 @@ def swaplevel(self, i, j, axis=0): Returns ------- renamed : %(klass)s (new object) + + See Also + -------- + pandas.NDFrame.rename_axis + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s + 0 1 + 1 2 + 2 3 + dtype: int64 + >>> s.rename("my_name") # scalar, changes Series.name + 0 1 + 1 2 + 2 3 + Name: my_name, dtype: int64 + >>> s.rename(lambda x: x ** 2) # function, changes labels + 0 1 + 1 2 + 4 3 + dtype: int64 + >>> s.rename({1: 3, 2: 5}) # mapping, changes labels + 0 1 + 3 2 + 5 3 + dtype: int64 + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + >>> df.rename(2) + ... + TypeError: 'int' object is not callable + >>> df.rename(index=str, columns={"A": "a", "B": "c"}) + a c + 0 1 4 + 1 2 5 + 2 3 6 """ @Appender(_shared_docs['rename'] % dict(axes='axes keywords for this' @@ -573,14 +614,15 @@ def rename(self, *args, **kwargs): if kwargs: raise TypeError('rename() got an unexpected keyword ' - 'argument "{0}"'.format(list(kwargs.keys())[0])) + 'argument "{0}"'.format(list(kwargs.keys())[0])) - if (com._count_not_none(*axes.values()) == 0): + if com._count_not_none(*axes.values()) == 0: raise TypeError('must pass an index to rename') # renamer function if passed a dict def _get_rename_function(mapper): if isinstance(mapper, (dict, ABCSeries)): + def f(x): if x in mapper: return mapper[x] @@ -615,12 +657,15 @@ def f(x): def rename_axis(self, mapper, axis=0, copy=True, inplace=False): """ Alter index and / or columns using input function or functions. + A scaler or list-like for ``mapper`` will alter the ``Index.name`` + or ``MultiIndex.names`` attribute. + A function or dict for ``mapper`` will alter the labels. Function / dict values must be unique (1-to-1). Labels not contained in a dict / Series will be left as-is. Parameters ---------- - mapper : dict-like or function, optional + mapper : scalar, list-like, dict-like or function, optional axis : int or string, default 0 copy : boolean, default True Also copy underlying data @@ -629,13 +674,90 @@ def rename_axis(self, mapper, axis=0, copy=True, inplace=False): Returns ------- renamed : type of caller + + See Also + -------- + pandas.NDFrame.rename + pandas.Index.rename + + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + >>> df.rename_axis("foo") # scalar, alters df.index.name + A B + foo + 0 1 4 + 1 2 5 + 2 3 6 + >>> df.rename_axis(lambda x: 2 * x) # function: alters labels + A B + 0 1 4 + 2 2 5 + 4 3 6 + >>> df.rename_axis({"A": "ehh", "C": "see"}, axis="columns") # mapping + ehh B + 0 1 4 + 1 2 5 + 2 3 6 + """ + is_scalar_or_list = ( + (not com.is_sequence(mapper) and not callable(mapper)) or + (com.is_list_like(mapper) and not com.is_dict_like(mapper)) + ) + + if is_scalar_or_list: + return self._set_axis_name(mapper, axis=axis) + else: + axis = self._get_axis_name(axis) + d = {'copy': copy, 'inplace': inplace} + d[axis] = mapper + return self.rename(**d) + + def _set_axis_name(self, name, axis=0): + """ + Alter the name or names of the axis, returning self. + + Parameters + ---------- + name : str or list of str + Name for the Index, or list of names for the MultiIndex + axis : int or str + 0 or 'index' for the index; 1 or 'columns' for the columns + + Returns + ------- + renamed : type of caller + + See Also + -------- + pandas.DataFrame.rename + pandas.Series.rename + pandas.Index.rename + + Examples + -------- + >>> df._set_axis_name("foo") + A + foo + 0 1 + 1 2 + 2 3 + >>> df.index = pd.MultiIndex.from_product([['A'], ['a', 'b', 'c']]) + >>> df._set_axis_name(["bar", "baz"]) + A + bar baz + A a 1 + b 2 + c 3 """ - axis = self._get_axis_name(axis) - d = {'copy': copy, 'inplace': inplace} - d[axis] = mapper - return self.rename(**d) + axis = self._get_axis_number(axis) + idx = self._get_axis(axis).set_names(name) + + renamed = self.copy(deep=True) + renamed.set_axis(axis, idx) + return renamed - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Comparisons def _indexed_same(self, other): @@ -664,14 +786,14 @@ def __invert__(self): def equals(self, other): """ - Determines if two NDFrame objects contain the same elements. NaNs in the - same location are considered equal. + Determines if two NDFrame objects contain the same elements. NaNs in + the same location are considered equal. """ if not isinstance(other, self._constructor): return False return self._data.equals(other._data) - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Iteration def __hash__(self): @@ -679,9 +801,7 @@ def __hash__(self): ' hashed'.format(self.__class__.__name__)) def __iter__(self): - """ - Iterate over infor axis - """ + """Iterate over infor axis""" return iter(self._info_axis) # can we get a better explanation of this? @@ -689,7 +809,8 @@ def keys(self): """Get the 'info axis' (see Indexing for more) This is index for Series, columns for DataFrame and major_axis for - Panel.""" + Panel. + """ return self._info_axis def iteritems(self): @@ -707,21 +828,21 @@ def iteritems(self): def iterkv(self, *args, **kwargs): "iteritems alias used to get around 2to3. Deprecated" warnings.warn("iterkv is deprecated and will be removed in a future " - "release, use ``iteritems`` instead.", - FutureWarning, stacklevel=2) + "release, use ``iteritems`` instead.", FutureWarning, + stacklevel=2) return self.iteritems(*args, **kwargs) def __len__(self): - """Returns length of info axis """ + """Returns length of info axis""" return len(self._info_axis) def __contains__(self, key): - """True if the key is in the info axis """ + """True if the key is in the info axis""" return key in self._info_axis @property def empty(self): - "True if NDFrame is entirely empty [no items]" + """True if NDFrame is entirely empty [no items]""" return not all(len(self._get_axis(a)) > 0 for a in self._AXIS_ORDERS) def __nonzero__(self): @@ -732,11 +853,12 @@ def __nonzero__(self): __bool__ = __nonzero__ def bool(self): - """ Return the bool of a single element PandasObject - This must be a boolean scalar value, either True or False + """Return the bool of a single element PandasObject. - Raise a ValueError if the PandasObject does not have exactly - 1 element, or that element is not boolean """ + This must be a boolean scalar value, either True or False. Raise a + ValueError if the PandasObject does not have exactly 1 element, or that + element is not boolean + """ v = self.squeeze() if isinstance(v, (bool, np.bool_)): return bool(v) @@ -749,10 +871,10 @@ def bool(self): def __abs__(self): return self.abs() - def __round__(self,decimals=0): + def __round__(self, decimals=0): return self.round(decimals) - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Array Interface def __array__(self, dtype=None): @@ -764,24 +886,24 @@ def __array_wrap__(self, result, context=None): # ideally we would define this to avoid the getattr checks, but # is slower - #@property - #def __array_interface__(self): + # @property + # def __array_interface__(self): # """ provide numpy array interface method """ # values = self.values # return dict(typestr=values.dtype.str,shape=values.shape,data=values) def to_dense(self): - "Return dense representation of NDFrame (as opposed to sparse)" + """Return dense representation of NDFrame (as opposed to sparse)""" # compat return self - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Picklability def __getstate__(self): meta = dict((k, getattr(self, k, None)) for k in self._metadata) - return dict(_data=self._data, _typ=self._typ, - _metadata=self._metadata, **meta) + return dict(_data=self._data, _typ=self._typ, _metadata=self._metadata, + **meta) def __setstate__(self, state): @@ -822,10 +944,10 @@ def __setstate__(self, state): self._item_cache = {} - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # IO - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # I/O Methods def to_json(self, path_or_buf=None, orient=None, date_format='epoch', @@ -886,17 +1008,14 @@ def to_json(self, path_or_buf=None, orient=None, date_format='epoch', """ from pandas.io import json - return json.to_json( - path_or_buf=path_or_buf, - obj=self, orient=orient, - date_format=date_format, - double_precision=double_precision, - force_ascii=force_ascii, - date_unit=date_unit, - default_handler=default_handler) + return json.to_json(path_or_buf=path_or_buf, obj=self, orient=orient, + date_format=date_format, + double_precision=double_precision, + force_ascii=force_ascii, date_unit=date_unit, + default_handler=default_handler) def to_hdf(self, path_or_buf, key, **kwargs): - """ activate the HDFStore + """Activate the HDFStore. Parameters ---------- @@ -940,7 +1059,7 @@ def to_hdf(self, path_or_buf, key, **kwargs): from pandas.io import pytables return pytables.to_hdf(path_or_buf, key, self, **kwargs) - def to_msgpack(self, path_or_buf=None, **kwargs): + def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs): """ msgpack (serialize) object to input file path @@ -958,7 +1077,8 @@ def to_msgpack(self, path_or_buf=None, **kwargs): """ from pandas.io import packers - return packers.to_msgpack(path_or_buf, self, **kwargs) + return packers.to_msgpack(path_or_buf, self, encoding=encoding, + **kwargs) def to_sql(self, name, con, flavor='sqlite', schema=None, if_exists='fail', index=True, index_label=None, chunksize=None, dtype=None): @@ -975,8 +1095,8 @@ def to_sql(self, name, con, flavor='sqlite', schema=None, if_exists='fail', If a DBAPI2 object, only sqlite3 is supported. flavor : {'sqlite', 'mysql'}, default 'sqlite' The flavor of SQL to use. Ignored when using SQLAlchemy engine. - 'mysql' is deprecated and will be removed in future versions, but it - will be further supported through SQLAlchemy engines. + 'mysql' is deprecated and will be removed in future versions, but + it will be further supported through SQLAlchemy engines. schema : string, default None Specify the schema (if database flavor supports this). If None, use default schema. @@ -999,14 +1119,13 @@ def to_sql(self, name, con, flavor='sqlite', schema=None, if_exists='fail', """ from pandas.io import sql - sql.to_sql( - self, name, con, flavor=flavor, schema=schema, if_exists=if_exists, - index=index, index_label=index_label, chunksize=chunksize, - dtype=dtype) + sql.to_sql(self, name, con, flavor=flavor, schema=schema, + if_exists=if_exists, index=index, index_label=index_label, + chunksize=chunksize, dtype=dtype) def to_pickle(self, path): """ - Pickle (serialize) object to input file path + Pickle (serialize) object to input file path. Parameters ---------- @@ -1041,12 +1160,109 @@ def to_clipboard(self, excel=None, sep=None, **kwargs): from pandas.io import clipboard clipboard.to_clipboard(self, excel=excel, sep=sep, **kwargs) - #---------------------------------------------------------------------- + def to_xarray(self): + """ + Return an xarray object from the pandas object. + + Returns + ------- + a DataArray for a Series + a Dataset for a DataFrame + a DataArray for higher dims + + Examples + -------- + >>> df = pd.DataFrame({'A' : [1, 1, 2], + 'B' : ['foo', 'bar', 'foo'], + 'C' : np.arange(4.,7)}) + >>> df + A B C + 0 1 foo 4.0 + 1 1 bar 5.0 + 2 2 foo 6.0 + + >>> df.to_xarray() + + Dimensions: (index: 3) + Coordinates: + * index (index) int64 0 1 2 + Data variables: + A (index) int64 1 1 2 + B (index) object 'foo' 'bar' 'foo' + C (index) float64 4.0 5.0 6.0 + + >>> df = pd.DataFrame({'A' : [1, 1, 2], + 'B' : ['foo', 'bar', 'foo'], + 'C' : np.arange(4.,7)} + ).set_index(['B','A']) + >>> df + C + B A + foo 1 4.0 + bar 1 5.0 + foo 2 6.0 + + >>> df.to_xarray() + + Dimensions: (A: 2, B: 2) + Coordinates: + * B (B) object 'bar' 'foo' + * A (A) int64 1 2 + Data variables: + C (B, A) float64 5.0 nan 4.0 6.0 + + >>> p = pd.Panel(np.arange(24).reshape(4,3,2), + items=list('ABCD'), + major_axis=pd.date_range('20130101', periods=3), + minor_axis=['first', 'second']) + >>> p + + Dimensions: 4 (items) x 3 (major_axis) x 2 (minor_axis) + Items axis: A to D + Major_axis axis: 2013-01-01 00:00:00 to 2013-01-03 00:00:00 + Minor_axis axis: first to second + + >>> p.to_xarray() + + array([[[ 0, 1], + [ 2, 3], + [ 4, 5]], + [[ 6, 7], + [ 8, 9], + [10, 11]], + [[12, 13], + [14, 15], + [16, 17]], + [[18, 19], + [20, 21], + [22, 23]]]) + Coordinates: + * items (items) object 'A' 'B' 'C' 'D' + * major_axis (major_axis) datetime64[ns] 2013-01-01 2013-01-02 2013-01-03 # noqa + * minor_axis (minor_axis) object 'first' 'second' + + Notes + ----- + See the `xarray docs `__ + """ + import xarray + if self.ndim == 1: + return xarray.DataArray.from_series(self) + elif self.ndim == 2: + return xarray.Dataset.from_dataframe(self) + + # > 2 dims + coords = [(a, self._get_axis(a)) for a in self._AXIS_ORDERS] + return xarray.DataArray(self, + coords=coords, + ) + + # ---------------------------------------------------------------------- # Fancy Indexing @classmethod def _create_indexer(cls, name, indexer): - """ create an indexer like _name in the class """ + """Create an indexer like _name in the class.""" if getattr(cls, name, None) is None: iname = '_%s' % name @@ -1067,7 +1283,7 @@ def _indexer(self): def get(self, key, default=None): """ Get item from object for given key (DataFrame column, Panel slice, - etc.). Returns default value if not found + etc.). Returns default value if not found. Parameters ---------- @@ -1086,7 +1302,7 @@ def __getitem__(self, item): return self._get_item_cache(item) def _get_item_cache(self, item): - """ return the cached item, item represents a label indexer """ + """Return the cached item, item represents a label indexer.""" cache = self._item_cache res = cache.get(item) if res is None: @@ -1100,17 +1316,18 @@ def _get_item_cache(self, item): return res def _set_as_cached(self, item, cacher): - """ set the _cacher attribute on the calling object with - a weakref to cacher """ + """Set the _cacher attribute on the calling object with a weakref to + cacher. + """ self._cacher = (item, weakref.ref(cacher)) def _reset_cacher(self): - """ reset the cacher """ - if hasattr(self,'_cacher'): + """Reset the cacher.""" + if hasattr(self, '_cacher'): del self._cacher def _iget_item_cache(self, item): - """ return the cached item, item represents a positional indexer """ + """Return the cached item, item represents a positional indexer.""" ax = self._info_axis if ax.is_unique: lower = self._get_item_cache(ax[item]) @@ -1122,9 +1339,7 @@ def _box_item_values(self, key, values): raise AbstractMethodError(self) def _maybe_cache_changed(self, item, value): - """ - the object has called back to us saying - maybe it has changed + """The object has called back to us saying maybe it has changed. numpy < 1.8 has an issue with object arrays and aliasing GH6026 @@ -1133,11 +1348,11 @@ def _maybe_cache_changed(self, item, value): @property def _is_cached(self): - """ boolean : return if I am cached """ + """Return boolean indicating if self is cached or not.""" return getattr(self, '_cacher', None) is not None def _get_cacher(self): - """ return my cacher or None """ + """return my cacher or None""" cacher = getattr(self, '_cacher', None) if cacher is not None: cacher = cacher[1]() @@ -1145,14 +1360,13 @@ def _get_cacher(self): @property def _is_view(self): - """ boolean : return if I am a view of another array """ + """Return boolean indicating if self is view of another array """ return self._data.is_view def _maybe_update_cacher(self, clear=False, verify_is_copy=True): """ - - see if we need to update our parent cacher - if clear, then clear our cache + See if we need to update our parent cacher if clear, then clear our + cache. Parameters ---------- @@ -1194,7 +1408,6 @@ def _slice(self, slobj, axis=0, kind=None): Construct a slice of this container. kind parameter is maintained for compatibility with Series slicing. - """ axis = self._get_block_manager_axis(axis) result = self._constructor(self._data.get_slice(slobj, axis=axis)) @@ -1202,7 +1415,7 @@ def _slice(self, slobj, axis=0, kind=None): # this could be a view # but only in a single-dtyped view slicable case - is_copy = axis!=0 or result._is_view + is_copy = axis != 0 or result._is_view result._set_is_copy(self, copy=is_copy) return result @@ -1221,18 +1434,20 @@ def _set_is_copy(self, ref=None, copy=True): def _check_is_chained_assignment_possible(self): """ - check if we are a view, have a cacher, and are of mixed type - if so, then force a setitem_copy check + Check if we are a view, have a cacher, and are of mixed type. + If so, then force a setitem_copy check. - should be called just near setting a value + Should be called just near setting a value - will return a boolean if it we are a view and are cached, but a single-dtype - meaning that the cacher should be updated following setting + Will return a boolean if it we are a view and are cached, but a + single-dtype meaning that the cacher should be updated following + setting. """ if self._is_view and self._is_cached: ref = self._get_cacher() if ref is not None and ref._is_mixed_type: - self._check_setitem_copy(stacklevel=4, t='referant', force=True) + self._check_setitem_copy(stacklevel=4, t='referant', + force=True) return True elif self.is_copy: self._check_setitem_copy(stacklevel=4, t='referant') @@ -1255,16 +1470,16 @@ def _check_setitem_copy(self, stacklevel=4, t='setting', force=False): user will see the error *at the level of setting* It is technically possible to figure out that we are setting on - a copy even WITH a multi-dtyped pandas object. In other words, some blocks - may be views while other are not. Currently _is_view will ALWAYS return False - for multi-blocks to avoid having to handle this case. + a copy even WITH a multi-dtyped pandas object. In other words, some + blocks may be views while other are not. Currently _is_view will ALWAYS + return False for multi-blocks to avoid having to handle this case. df = DataFrame(np.arange(0,9), columns=['count']) df['group'] = 'b' - # this technically need not raise SettingWithCopy if both are view (which is not - # generally guaranteed but is usually True - # however, this is in general not a good practice and we recommend using .loc + # This technically need not raise SettingWithCopy if both are view + # (which is not # generally guaranteed but is usually True. However, + # this is in general not a good practice and we recommend using .loc. df.iloc[0:5]['group'] = 'a' """ @@ -1302,15 +1517,19 @@ def _check_setitem_copy(self, stacklevel=4, t='setting', force=False): "A value is trying to be set on a copy of a slice from a " "DataFrame\n\n" "See the caveats in the documentation: " - "http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy") + "http://pandas.pydata.org/pandas-docs/stable/" + "indexing.html#indexing-view-versus-copy" + ) else: t = ("\n" "A value is trying to be set on a copy of a slice from a " "DataFrame.\n" - "Try using .loc[row_indexer,col_indexer] = value instead\n\n" - "See the caveats in the documentation: " - "http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy") + "Try using .loc[row_indexer,col_indexer] = value " + "instead\n\nSee the caveats in the documentation: " + "http://pandas.pydata.org/pandas-docs/stable/" + "indexing.html#indexing-view-versus-copy" + ) if value == 'raise': raise SettingWithCopyError(t) @@ -1334,7 +1553,7 @@ def __delitem__(self, key): # Allow shorthand to delete all columns whose first len(key) # elements match key: if not isinstance(key, tuple): - key = (key,) + key = (key, ) for col in self.columns: if isinstance(col, tuple) and col[:len(key)] == key: del self[col] @@ -1382,8 +1601,8 @@ def take(self, indices, axis=0, convert=True, is_copy=True): def xs(self, key, axis=0, level=None, copy=None, drop_level=True): """ - Returns a cross-section (row(s) or column(s)) from the Series/DataFrame. - Defaults to cross-section on the rows (axis=0). + Returns a cross-section (row(s) or column(s)) from the + Series/DataFrame. Defaults to cross-section on the rows (axis=0). Parameters ---------- @@ -1446,8 +1665,9 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True): ----- xs is only for getting, not setting values. - MultiIndex Slicers is a generic way to get/set values on any level or levels - it is a superset of xs functionality, see :ref:`MultiIndex Slicers ` + MultiIndex Slicers is a generic way to get/set values on any level or + levels. It is a superset of xs functionality, see + :ref:`MultiIndex Slicers ` """ if copy is not None: @@ -1509,10 +1729,8 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True): if not is_list_like(new_values) or self.ndim == 1: return _maybe_box_datetimelike(new_values) - result = Series(new_values, - index=self.columns, - name=self.index[loc], - copy=copy, + result = Series(new_values, index=self.columns, + name=self.index[loc], copy=copy, dtype=new_values.dtype) else: @@ -1555,7 +1773,7 @@ def select(self, crit, axis=0): def reindex_like(self, other, method=None, copy=True, limit=None, tolerance=None): - """ return an object with matching indicies to myself + """Return an object with matching indices to myself. Parameters ---------- @@ -1579,15 +1797,15 @@ def reindex_like(self, other, method=None, copy=True, limit=None, ------- reindexed : same as input """ - d = other._construct_axes_dict(axes=self._AXIS_ORDERS, - method=method, copy=copy, limit=limit, - tolerance=tolerance) + d = other._construct_axes_dict(axes=self._AXIS_ORDERS, method=method, + copy=copy, limit=limit, + tolerance=tolerance) return self.reindex(**d) def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'): """ - Return new object with labels in requested axis removed + Return new object with labels in requested axis removed. Parameters ---------- @@ -1629,8 +1847,8 @@ def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'): if level is not None: if not isinstance(axis, MultiIndex): raise AssertionError('axis must be a MultiIndex') - indexer = ~lib.ismember(axis.get_level_values(level).values, - set(labels)) + indexer = ~lib.ismember( + axis.get_level_values(level).values, set(labels)) else: indexer = ~axis.isin(labels) @@ -1646,7 +1864,7 @@ def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'): def _update_inplace(self, result, verify_is_copy=True): """ - replace self internals with result. + Replace self internals with result. Parameters ---------- @@ -1659,7 +1877,7 @@ def _update_inplace(self, result, verify_is_copy=True): self._reset_cache() self._clear_item_cache() - self._data = getattr(result,'_data',result) + self._data = getattr(result, '_data', result) self._maybe_update_cacher(verify_is_copy=verify_is_copy) def add_prefix(self, prefix): @@ -1679,7 +1897,7 @@ def add_prefix(self, prefix): def add_suffix(self, suffix): """ - Concatenate suffix string with panel items names + Concatenate suffix string with panel items names. Parameters ---------- @@ -1702,14 +1920,16 @@ def add_suffix(self, suffix): by : string name or list of names which refer to the axis items axis : %(axes)s to direct sorting ascending : bool or list of bool - Sort ascending vs. descending. Specify list for multiple sort orders. - If this is a list of bools, must match the length of the by + Sort ascending vs. descending. Specify list for multiple sort + orders. If this is a list of bools, must match the length of + the by. inplace : bool if True, perform operation in-place kind : {`quicksort`, `mergesort`, `heapsort`} - Choice of sorting algorithm. See also ndarray.np.sort for more information. - `mergesort` is the only stable algorithm. For DataFrames, this option is - only applied when sorting on a single column or label. + Choice of sorting algorithm. See also ndarray.np.sort for more + information. `mergesort` is the only stable algorithm. For + DataFrames, this option is only applied when sorting on a single + column or label. na_position : {'first', 'last'} `first` puts NaNs at the beginning, `last` puts NaNs at the end @@ -1717,6 +1937,7 @@ def add_suffix(self, suffix): ------- sorted_obj : %(klass)s """ + def sort_values(self, by, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last'): raise AbstractMethodError(self) @@ -1734,14 +1955,15 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False, inplace : bool if True, perform operation in-place kind : {`quicksort`, `mergesort`, `heapsort`} - Choice of sorting algorithm. See also ndarray.np.sort for more information. - `mergesort` is the only stable algorithm. For DataFrames, this option is - only applied when sorting on a single column or label. + Choice of sorting algorithm. See also ndarray.np.sort for more + information. `mergesort` is the only stable algorithm. For + DataFrames, this option is only applied when sorting on a single + column or label. na_position : {'first', 'last'} `first` puts NaNs at the beginning, `last` puts NaNs at the end sort_remaining : bool - if true and sorting by level and index is multilevel, sort by other levels - too (in order) after sorting by specified level + if true and sorting by level and index is multilevel, sort by other + levels too (in order) after sorting by specified level Returns ------- @@ -1784,7 +2006,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Please note: this is only applicable to DataFrames/Series with a monotonically increasing/decreasing index. * default: don't fill gaps - * pad / ffill: propagate last valid observation forward to next valid + * pad / ffill: propagate last valid observation forward to next + valid * backfill / bfill: use next valid observation to fill gap * nearest: use nearest valid observations to fill gap copy : boolean, default True @@ -1923,6 +2146,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, ------- reindexed : %(klass)s """ + # TODO: Decide if we care about having different examples for different # kinds @@ -1940,7 +2164,7 @@ def reindex(self, *args, **kwargs): if kwargs: raise TypeError('reindex() got an unexpected keyword ' - 'argument "{0}"'.format(list(kwargs.keys())[0])) + 'argument "{0}"'.format(list(kwargs.keys())[0])) self._consolidate_inplace() @@ -1960,12 +2184,12 @@ def reindex(self, *args, **kwargs): pass # perform the reindex on the axes - return self._reindex_axes(axes, level, limit, tolerance, - method, fill_value, copy).__finalize__(self) + return self._reindex_axes(axes, level, limit, tolerance, method, + fill_value, copy).__finalize__(self) - def _reindex_axes(self, axes, level, limit, tolerance, method, - fill_value, copy): - """ perform the reinxed for all the axes """ + def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, + copy): + """Perform the reindex for all the axes.""" obj = self for a in self._AXIS_ORDERS: labels = axes[a] @@ -1973,30 +2197,29 @@ def _reindex_axes(self, axes, level, limit, tolerance, method, continue ax = self._get_axis(a) - new_index, indexer = ax.reindex( - labels, level=level, limit=limit, tolerance=tolerance, - method=method) + new_index, indexer = ax.reindex(labels, level=level, limit=limit, + tolerance=tolerance, method=method) axis = self._get_axis_number(a) - obj = obj._reindex_with_indexers( - {axis: [new_index, indexer]}, - fill_value=fill_value, copy=copy, allow_dups=False) + obj = obj._reindex_with_indexers({axis: [new_index, indexer]}, + fill_value=fill_value, + copy=copy, allow_dups=False) return obj def _needs_reindex_multi(self, axes, method, level): - """ check if we do need a multi reindex """ + """Check if we do need a multi reindex.""" return ((com._count_not_none(*axes.values()) == self._AXIS_LEN) and method is None and level is None and not self._is_mixed_type) def _reindex_multi(self, axes, copy, fill_value): return NotImplemented - _shared_docs['reindex_axis'] = ( - """Conform input object to new index with optional filling logic, - placing NA/NaN in locations having no value in the previous index. A - new object is produced unless the new index is equivalent to the - current one and copy=False + _shared_docs[ + 'reindex_axis'] = ("""Conform input object to new index with optional + filling logic, placing NA/NaN in locations having no value in the + previous index. A new object is produced unless the new index is + equivalent to the current one and copy=False Parameters ---------- @@ -2007,7 +2230,8 @@ def _reindex_multi(self, axes, copy, fill_value): method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}, optional Method to use for filling holes in reindexed DataFrame: * default: don't fill gaps - * pad / ffill: propagate last valid observation forward to next valid + * pad / ffill: propagate last valid observation forward to next + valid * backfill / bfill: use next valid observation to fill gap * nearest: use nearest valid observations to fill gap copy : boolean, default True @@ -2028,7 +2252,7 @@ def _reindex_multi(self, axes, copy, fill_value): -------- >>> df.reindex_axis(['A', 'B', 'C'], axis=1) - See also + See Also -------- reindex, reindex_like @@ -2047,15 +2271,14 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, method = mis._clean_reindex_fill_method(method) new_index, indexer = axis_values.reindex(labels, method, level, limit=limit) - return self._reindex_with_indexers( - {axis: [new_index, indexer]}, fill_value=fill_value, copy=copy) + return self._reindex_with_indexers({axis: [new_index, indexer]}, + fill_value=fill_value, copy=copy) - def _reindex_with_indexers(self, reindexers, - fill_value=np.nan, copy=False, + def _reindex_with_indexers(self, reindexers, fill_value=np.nan, copy=False, allow_dups=False): - """ allow_dups indicates an internal call here """ + """allow_dups indicates an internal call here """ - # reindex doing multiple operations on different axes if indiciated + # reindex doing multiple operations on different axes if indicated new_data = self._data for axis in sorted(reindexers.keys()): index, indexer = reindexers[axis] @@ -2090,12 +2313,8 @@ def _reindex_axis(self, new_index, fill_method, axis, copy): def filter(self, items=None, like=None, regex=None, axis=None): """ - Subset rows or columns of dataframe according to labels in the index. + Restrict the info axis to set of items or wildcard - Note that this routine does not filter a dataframe on its contents. The filter is - applied to the labels of the index. - This method is a thin veneer on top of :ref:`DateFrame Select ` - Parameters ---------- items : list-like @@ -2105,41 +2324,14 @@ def filter(self, items=None, like=None, regex=None, axis=None): regex : string (regular expression) Keep info axis with re.search(regex, col) == True axis : int or None - The axis to filter on. - - Examples - -------- - >>> df - one two three - mouse 1 2 3 - rabbit 4 5 6 - - >>> # select columns by name - >>> df.filter(items=['one', 'three']) - one three - mouse 1 3 - rabbit 4 6 - - >>> # select columns by regular expression - >>> df.filter(regex='e$', axis=1) - one three - mouse 1 3 - rabbit 4 6 - - >>> # select rows containing 'bbi' - >>> df.filter(like='bbi', axis=0) - one two three - rabbit 4 5 6 - - Returns - ------- - same type as input object with filtered info axis + The axis to filter on. By default this is the info axis. The "info + axis" is the axis that is used when indexing with ``[]``. For + example, ``df = DataFrame({'a': [1, 2, 3, 4]]}); df['a']``. So, + the ``DataFrame`` columns are the info axis. Notes ----- - The ``items``, ``like``, and ``regex`` parameters should be mutually exclusive, but this is not checked. - - ``axis`` defaults to the info axis that is used when indexing with ``[]``. + Arguments are mutually exclusive, but this is not checked for """ import re @@ -2150,11 +2342,11 @@ def filter(self, items=None, like=None, regex=None, axis=None): axis_values = self._get_axis(axis_name) if items is not None: - return self.reindex(**{axis_name: [r for r in items - if r in axis_values]}) + return self.reindex(**{axis_name: + [r for r in items if r in axis_values]}) elif like: - matchf = lambda x: (like in x if isinstance(x, string_types) - else like in str(x)) + matchf = lambda x: (like in x if isinstance(x, string_types) else + like in str(x)) return self.select(matchf, axis=axis_name) elif regex: matcher = re.compile(regex) @@ -2167,22 +2359,18 @@ def head(self, n=5): """ Returns first n rows """ - l = len(self) - if l == 0 or n==0: - return self return self.iloc[:n] def tail(self, n=5): """ Returns last n rows """ - l = len(self) - if l == 0 or n == 0: - return self + if n == 0: + return self.iloc[0:0] return self.iloc[-n:] - - def sample(self, n=None, frac=None, replace=False, weights=None, random_state=None, axis=None): + def sample(self, n=None, frac=None, replace=False, weights=None, + random_state=None, axis=None): """ Returns a random sample of items from an axis of object. @@ -2287,22 +2475,28 @@ def sample(self, n=None, frac=None, replace=False, weights=None, random_state=No try: weights = self[weights] except KeyError: - raise KeyError("String passed to weights not a valid column") + raise KeyError("String passed to weights not a " + "valid column") else: - raise ValueError("Strings can only be passed to weights when sampling from rows on a DataFrame") + raise ValueError("Strings can only be passed to " + "weights when sampling from rows on " + "a DataFrame") else: - raise ValueError("Strings cannot be passed as weights when sampling from a Series or Panel.") + raise ValueError("Strings cannot be passed as weights " + "when sampling from a Series or Panel.") weights = pd.Series(weights, dtype='float64') if len(weights) != axis_length: - raise ValueError("Weights and axis to be sampled must be of same length") + raise ValueError("Weights and axis to be sampled must be of " + "same length") if (weights == np.inf).any() or (weights == -np.inf).any(): raise ValueError("weight vector may not include `inf` values") if (weights < 0).any(): - raise ValueError("weight vector many not include negative values") + raise ValueError("weight vector many not include negative " + "values") # If has nan, set to zero. weights = weights.fillna(0) @@ -2324,16 +2518,17 @@ def sample(self, n=None, frac=None, replace=False, weights=None, random_state=No elif n is None and frac is not None: n = int(round(frac * axis_length)) elif n is not None and frac is not None: - raise ValueError('Please enter a value for `frac` OR `n`, not both') + raise ValueError('Please enter a value for `frac` OR `n`, not ' + 'both') # Check for negative sizes if n < 0: - raise ValueError("A negative number of rows requested. Please provide positive value.") + raise ValueError("A negative number of rows requested. Please " + "provide positive value.") locs = rs.choice(axis_length, size=n, replace=replace, p=weights) return self.take(locs, axis=axis, is_copy=False) - _shared_docs['pipe'] = (""" Apply func(self, \*args, \*\*kwargs) @@ -2383,26 +2578,26 @@ def sample(self, n=None, frac=None, replace=False, weights=None, random_state=No pandas.DataFrame.apply pandas.DataFrame.applymap pandas.Series.map - """ - ) + """) + @Appender(_shared_docs['pipe'] % _shared_doc_kwargs) def pipe(self, func, *args, **kwargs): if isinstance(func, tuple): func, target = func if target in kwargs: - msg = '%s is both the pipe target and a keyword argument' % target - raise ValueError(msg) + raise ValueError('%s is both the pipe target and a keyword ' + 'argument' % target) kwargs[target] = self return func(*args, **kwargs) else: return func(self, *args, **kwargs) - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Attribute access def __finalize__(self, other, method=None, **kwargs): """ - propagate metadata from other to self + Propagate metadata from other to self. Parameters ---------- @@ -2421,12 +2616,12 @@ def __getattr__(self, name): """After regular attribute access, try looking up the name This allows simpler access to columns for interactive use. """ + # Note: obj.x will always call obj.__getattribute__('x') prior to # calling obj.__getattr__('x'). - if (name in self._internal_names_set - or name in self._metadata - or name in self._accessors): + if (name in self._internal_names_set or name in self._metadata or + name in self._accessors): return object.__getattribute__(self, name) else: if name in self._info_axis: @@ -2435,7 +2630,9 @@ def __getattr__(self, name): def __setattr__(self, name, value): """After regular attribute access, try setting the name - This allows simpler access to columns for interactive use.""" + This allows simpler access to columns for interactive use. + """ + # first try regular attribute access via __getattribute__, so that # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify # the same attribute. @@ -2464,14 +2661,16 @@ def __setattr__(self, name, value): except (AttributeError, TypeError): object.__setattr__(self, name, value) - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Getting and setting elements - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Consolidation of internals def _protect_consolidate(self, f): - """ consolidate _data. if the blocks have changed, then clear the cache """ + """Consolidate _data -- if the blocks have changed, then clear the + cache + """ blocks_before = len(self._data.blocks) result = f() if len(self._data.blocks) != blocks_before: @@ -2479,9 +2678,11 @@ def _protect_consolidate(self, f): return result def _consolidate_inplace(self): - """ we are inplace consolidating; return None """ + """Consolidate data in place and return None""" + def f(): self._data = self._data.consolidate() + self._protect_consolidate(f) def consolidate(self, inplace=False): @@ -2534,8 +2735,8 @@ def _check_inplace_setting(self, value): except: pass - raise TypeError( - 'Cannot do inplace boolean setting on mixed-types with a non np.nan value') + raise TypeError('Cannot do inplace boolean setting on ' + 'mixed-types with a non np.nan value') return True @@ -2546,7 +2747,7 @@ def _get_numeric_data(self): def _get_bool_data(self): return self._constructor(self._data.get_bool_data()).__finalize__(self) - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Internal Interface Methods def as_matrix(self, columns=None): @@ -2609,7 +2810,7 @@ def values(self): @property def _values(self): - """ internal implementation """ + """internal implementation""" return self.values @property @@ -2618,22 +2819,22 @@ def _get_values(self): return self.as_matrix() def get_values(self): - """ same as values (but handles sparseness conversions) """ + """same as values (but handles sparseness conversions)""" return self.as_matrix() def get_dtype_counts(self): - """ Return the counts of dtypes in this object """ + """Return the counts of dtypes in this object.""" from pandas import Series return Series(self._data.get_dtype_counts()) def get_ftype_counts(self): - """ Return the counts of ftypes in this object """ + """Return the counts of ftypes in this object.""" from pandas import Series return Series(self._data.get_ftype_counts()) @property def dtypes(self): - """ Return the dtypes in this object """ + """Return the dtypes in this object.""" from pandas import Series return Series(self._data.get_dtypes(), index=self._info_axis, dtype=np.object_) @@ -2683,7 +2884,7 @@ def as_blocks(self, copy=True): @property def blocks(self): - "Internal property, property synonym for as_blocks()" + """Internal property, property synonym for as_blocks()""" return self.as_blocks() def astype(self, dtype, copy=True, raise_on_error=True, **kwargs): @@ -2702,8 +2903,8 @@ def astype(self, dtype, copy=True, raise_on_error=True, **kwargs): casted : type of caller """ - mgr = self._data.astype( - dtype=dtype, copy=copy, raise_on_error=raise_on_error, **kwargs) + mgr = self._data.astype(dtype=dtype, copy=copy, + raise_on_error=raise_on_error, **kwargs) return self._constructor(mgr).__finalize__(self) def copy(self, deep=True): @@ -2749,16 +2950,16 @@ def _convert(self, datetime=False, numeric=False, timedelta=False, converted : same as input object """ return self._constructor( - self._data.convert(datetime=datetime, - numeric=numeric, - timedelta=timedelta, - coerce=coerce, - copy=copy)).__finalize__(self) + self._data.convert(datetime=datetime, numeric=numeric, + timedelta=timedelta, coerce=coerce, + copy=copy)).__finalize__(self) # TODO: Remove in 0.18 or 2017, which ever is sooner def convert_objects(self, convert_dates=True, convert_numeric=False, convert_timedeltas=True, copy=True): """ + Deprecated. + Attempt to infer better dtype for object columns Parameters @@ -2777,6 +2978,13 @@ def convert_objects(self, convert_dates=True, convert_numeric=False, conversion was done). Note: This is meant for internal use, and should not be confused with inplace. + See Also + -------- + pandas.to_datetime : Convert argument to datetime. + pandas.to_timedelta : Convert argument to timedelta. + pandas.to_numeric : Return a fixed frequency timedelta index, + with day as the default. + Returns ------- converted : same as input object @@ -2792,20 +3000,20 @@ def convert_objects(self, convert_dates=True, convert_numeric=False, convert_timedeltas=convert_timedeltas, copy=copy)).__finalize__(self) - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Filling NA's - _shared_docs['fillna'] = ( - """ + _shared_docs['fillna'] = (""" Fill NA/NaN values using the specified method Parameters ---------- value : scalar, dict, Series, or DataFrame - Value to use to fill holes (e.g. 0), alternately a dict/Series/DataFrame of - values specifying which value to use for each index (for a Series) or - column (for a DataFrame). (values not in the dict/Series/DataFrame will not be - filled). This value cannot be a list. + Value to use to fill holes (e.g. 0), alternately a + dict/Series/DataFrame of values specifying which value to use for + each index (for a Series) or column (for a DataFrame). (values not + in the dict/Series/DataFrame will not be filled). This value cannot + be a list. method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None Method to use for filling holes in reindexed Series pad / ffill: propagate last valid observation forward to next valid @@ -2827,15 +3035,14 @@ def convert_objects(self, convert_dates=True, convert_numeric=False, or the string 'infer' which will try to downcast to an appropriate equal type (e.g. float64 to int64 if possible) - See also + See Also -------- reindex, asfreq Returns ------- filled : %(klass)s - """ - ) + """) @Appender(_shared_docs['fillna'] % _shared_doc_kwargs) def fillna(self, value=None, method=None, axis=None, inplace=False, @@ -2868,9 +3075,8 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, # > 3d if self.ndim > 3: - raise NotImplementedError( - 'Cannot fillna with a method for > 3dims' - ) + raise NotImplementedError('Cannot fillna with a method for > ' + '3dims') # 3d elif self.ndim == 3: @@ -2882,12 +3088,9 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, # 2d or less method = mis._clean_fill_method(method) - new_data = self._data.interpolate(method=method, - axis=axis, - limit=limit, - inplace=inplace, - coerce=True, - downcast=downcast) + new_data = self._data.interpolate(method=method, axis=axis, + limit=limit, inplace=inplace, + coerce=True, downcast=downcast) else: if method is not None: raise ValueError('cannot specify both a fill method and value') @@ -2902,10 +3105,10 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, elif not com.is_list_like(value): pass else: - raise ValueError("invalid fill value with a %s" % type(value)) + raise ValueError("invalid fill value with a %s" % + type(value)) - new_data = self._data.fillna(value=value, - limit=limit, + new_data = self._data.fillna(value=value, limit=limit, inplace=inplace, downcast=downcast) @@ -2923,8 +3126,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, obj.fillna(v, limit=limit, inplace=True) return result elif not com.is_list_like(value): - new_data = self._data.fillna(value=value, - limit=limit, + new_data = self._data.fillna(value=value, limit=limit, inplace=inplace, downcast=downcast) elif isinstance(value, DataFrame) and self.ndim == 2: @@ -2938,12 +3140,12 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, return self._constructor(new_data).__finalize__(self) def ffill(self, axis=None, inplace=False, limit=None, downcast=None): - "Synonym for NDFrame.fillna(method='ffill')" + """Synonym for NDFrame.fillna(method='ffill')""" return self.fillna(method='ffill', axis=axis, inplace=inplace, limit=limit, downcast=downcast) def bfill(self, axis=None, inplace=False, limit=None, downcast=None): - "Synonym for NDFrame.fillna(method='bfill')" + """Synonym for NDFrame.fillna(method='bfill')""" return self.fillna(method='bfill', axis=axis, inplace=inplace, limit=limit, downcast=downcast) @@ -3013,7 +3215,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, The method to use when for replacement, when ``to_replace`` is a ``list``. - See also + See Also -------- NDFrame.reindex NDFrame.asfreq @@ -3120,8 +3322,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, if c in value and c in self: res[c] = res[c].replace(to_replace=src, value=value[c], - inplace=False, - regex=regex) + inplace=False, regex=regex) return None if inplace else res # {'A': NA} -> 0 @@ -3151,13 +3352,11 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, else: # [NA, ''] -> 0 new_data = self._data.replace(to_replace=to_replace, - value=value, - inplace=inplace, + value=value, inplace=inplace, regex=regex) elif to_replace is None: if not (com.is_re_compilable(regex) or - com.is_list_like(regex) or - is_dictlike(regex)): + com.is_list_like(regex) or is_dictlike(regex)): raise TypeError("'regex' must be a string or a compiled " "regular expression or a list or dict of " "strings or regular expressions, you " @@ -3174,14 +3373,14 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, for k, v in compat.iteritems(value): if k in self: new_data = new_data.replace(to_replace=to_replace, - value=v, - filter=[k], + value=v, filter=[k], inplace=inplace, regex=regex) elif not com.is_list_like(value): # NA -> 0 - new_data = self._data.replace(to_replace=to_replace, value=value, - inplace=inplace, regex=regex) + new_data = self._data.replace(to_replace=to_replace, + value=value, inplace=inplace, + regex=regex) else: msg = ('Invalid "to_replace" type: ' '{0!r}').format(type(to_replace).__name__) @@ -3197,8 +3396,8 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, """ Interpolate values according to different methods. - Please note that only ``method='linear'`` is supported for DataFrames/Series - with a MultiIndex. + Please note that only ``method='linear'`` is supported for + DataFrames/Series with a MultiIndex. Parameters ---------- @@ -3222,8 +3421,8 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, wrappers around the scipy interpolation methods of similar names. These use the actual numerical values of the index. See the scipy documentation for more on their behavior - `here `__ - `and here `__ + `here `__ # noqa + `and here `__ # noqa axis : {0, 1}, default 0 * 0: fill column-by-column @@ -3283,16 +3482,19 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, else: alt_ax = ax - if isinstance(_maybe_transposed_self.index, MultiIndex) and method != 'linear': + if (isinstance(_maybe_transposed_self.index, MultiIndex) and + method != 'linear'): raise ValueError("Only `method=linear` interpolation is supported " "on MultiIndexes.") - if _maybe_transposed_self._data.get_dtype_counts().get('object') == len(_maybe_transposed_self.T): + if _maybe_transposed_self._data.get_dtype_counts().get( + 'object') == len(_maybe_transposed_self.T): raise TypeError("Cannot interpolate with all NaNs.") # create/use the index if method == 'linear': - index = np.arange(len(_maybe_transposed_self._get_axis(alt_ax))) # prior default + # prior default + index = np.arange(len(_maybe_transposed_self._get_axis(alt_ax))) else: index = _maybe_transposed_self._get_axis(alt_ax) @@ -3300,17 +3502,13 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, raise NotImplementedError("Interpolation with NaNs in the index " "has not been implemented. Try filling " "those NaNs before interpolating.") - new_data = _maybe_transposed_self._data.interpolate( - method=method, - axis=ax, - index=index, - values=_maybe_transposed_self, - limit=limit, - limit_direction=limit_direction, - inplace=inplace, - downcast=downcast, - **kwargs - ) + data = _maybe_transposed_self._data + new_data = data.interpolate(method=method, axis=ax, index=index, + values=_maybe_transposed_self, limit=limit, + limit_direction=limit_direction, + inplace=inplace, downcast=downcast, + **kwargs) + if inplace: if axis == 1: new_data = self._constructor(new_data).T._data @@ -3321,14 +3519,14 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, res = res.T return res - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Action Methods def isnull(self): """ - Return a boolean same-sized object indicating if the values are null + Return a boolean same-sized object indicating if the values are null. - See also + See Also -------- notnull : boolean inverse of isnull """ @@ -3336,9 +3534,9 @@ def isnull(self): def notnull(self): """Return a boolean same-sized object indicating if the values are - not null + not null. - See also + See Also -------- isnull : boolean inverse of notnull """ @@ -3346,7 +3544,7 @@ def notnull(self): def clip(self, lower=None, upper=None, out=None, axis=None): """ - Trim values at input threshold(s) + Trim values at input threshold(s). Parameters ---------- @@ -3408,7 +3606,7 @@ def clip(self, lower=None, upper=None, out=None, axis=None): def clip_upper(self, threshold, axis=None): """ - Return copy of input with values above given value(s) truncated + Return copy of input with values above given value(s) truncated. Parameters ---------- @@ -3416,7 +3614,7 @@ def clip_upper(self, threshold, axis=None): axis : int or string axis name, optional Align object with threshold along the given axis. - See also + See Also -------- clip @@ -3432,7 +3630,7 @@ def clip_upper(self, threshold, axis=None): def clip_lower(self, threshold, axis=None): """ - Return copy of the input with values below given value(s) truncated + Return copy of the input with values below given value(s) truncated. Parameters ---------- @@ -3440,7 +3638,7 @@ def clip_lower(self, threshold, axis=None): axis : int or string axis name, optional Align object with threshold along the given axis. - See also + See Also -------- clip @@ -3458,7 +3656,7 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False): """ Group series using mapper (dict or key function, apply given function - to group, return result as series) or by a series of columns + to group, return result as series) or by a series of columns. Parameters ---------- @@ -3477,8 +3675,8 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, effectively "SQL-style" grouped output sort : boolean, default True Sort group keys. Get better performance by turning this off. - Note this does not influence the order of observations within each group. - groupby preserves the order of rows within each group. + Note this does not influence the order of observations within each + group. groupby preserves the order of rows within each group. group_keys : boolean, default True When calling apply, add group keys to index to identify pieces squeeze : boolean, default False @@ -3531,12 +3729,11 @@ def asfreq(self, freq, method=None, how=None, normalize=False): converted : type of caller """ from pandas.tseries.resample import asfreq - return asfreq(self, freq, method=method, how=how, - normalize=normalize) + return asfreq(self, freq, method=method, how=how, normalize=normalize) def at_time(self, time, asof=False): """ - Select values at particular time of day (e.g. 9:30AM) + Select values at particular time of day (e.g. 9:30AM). Parameters ---------- @@ -3555,7 +3752,7 @@ def at_time(self, time, asof=False): def between_time(self, start_time, end_time, include_start=True, include_end=True): """ - Select values between particular times of the day (e.g., 9:00-9:30 AM) + Select values between particular times of the day (e.g., 9:00-9:30 AM). Parameters ---------- @@ -3576,9 +3773,9 @@ def between_time(self, start_time, end_time, include_start=True, except AttributeError: raise TypeError('Index must be DatetimeIndex') - def resample(self, rule, how=None, axis=0, fill_method=None, - closed=None, label=None, convention='start', - kind=None, loffset=None, limit=None, base=0): + def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, + label=None, convention='start', kind=None, loffset=None, + limit=None, base=0): """ Convenience method for frequency conversion and resampling of regular time-series data. @@ -3587,22 +3784,14 @@ def resample(self, rule, how=None, axis=0, fill_method=None, ---------- rule : string the offset string or object representing target conversion - how : string - method for down- or re-sampling, default to 'mean' for - downsampling axis : int, optional, default 0 - fill_method : string, default None - fill_method for upsampling closed : {'right', 'left'} Which side of bin interval is closed label : {'right', 'left'} Which bin edge label to label bucket with convention : {'start', 'end', 's', 'e'} - kind : "period"/"timestamp" loffset : timedelta Adjust the resampled time labels - limit : int, default None - Maximum size gap to when reindexing with fill_method base : int, default 0 For frequencies that evenly subdivide 1 day, the "origin" of the aggregated intervals. For example, for '5min' frequency, base could @@ -3631,7 +3820,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None, Downsample the series into 3 minute bins and sum the values of the timestamps falling into a bin. - >>> series.resample('3T', how='sum') + >>> series.resample('3T').sum() 2000-01-01 00:00:00 3 2000-01-01 00:03:00 12 2000-01-01 00:06:00 21 @@ -3647,7 +3836,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None, To include this value close the right side of the bin interval as illustrated in the example below this one. - >>> series.resample('3T', how='sum', label='right') + >>> series.resample('3T', label='right').sum() 2000-01-01 00:03:00 3 2000-01-01 00:06:00 12 2000-01-01 00:09:00 21 @@ -3656,7 +3845,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None, Downsample the series into 3 minute bins as above, but close the right side of the bin interval. - >>> series.resample('3T', how='sum', label='right', closed='right') + >>> series.resample('3T', label='right', closed='right').sum() 2000-01-01 00:00:00 0 2000-01-01 00:03:00 6 2000-01-01 00:06:00 15 @@ -3665,7 +3854,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None, Upsample the series into 30 second bins. - >>> series.resample('30S')[0:5] #select first 5 rows + >>> series.resample('30S').asfreq()[0:5] #select first 5 rows 2000-01-01 00:00:00 0 2000-01-01 00:00:30 NaN 2000-01-01 00:01:00 1 @@ -3676,7 +3865,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None, Upsample the series into 30 second bins and fill the ``NaN`` values using the ``pad`` method. - >>> series.resample('30S', fill_method='pad')[0:5] + >>> series.resample('30S').pad()[0:5] 2000-01-01 00:00:00 0 2000-01-01 00:00:30 0 2000-01-01 00:01:00 1 @@ -3687,7 +3876,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None, Upsample the series into 30 second bins and fill the ``NaN`` values using the ``bfill`` method. - >>> series.resample('30S', fill_method='bfill')[0:5] + >>> series.resample('30S').bfill()[0:5] 2000-01-01 00:00:00 0 2000-01-01 00:00:30 1 2000-01-01 00:01:00 1 @@ -3695,31 +3884,74 @@ def resample(self, rule, how=None, axis=0, fill_method=None, 2000-01-01 00:02:00 2 Freq: 30S, dtype: int64 - Pass a custom function to ``how``. + Pass a custom function via ``apply`` >>> def custom_resampler(array_like): ... return np.sum(array_like)+5 - >>> series.resample('3T', how=custom_resampler) + >>> series.resample('3T').apply(custom_resampler) 2000-01-01 00:00:00 8 2000-01-01 00:03:00 17 2000-01-01 00:06:00 26 Freq: 3T, dtype: int64 """ + from pandas.tseries.resample import resample - from pandas.tseries.resample import TimeGrouper axis = self._get_axis_number(axis) - sampler = TimeGrouper(rule, label=label, closed=closed, how=how, - axis=axis, kind=kind, loffset=loffset, - fill_method=fill_method, convention=convention, - limit=limit, base=base) - return sampler.resample(self).__finalize__(self) + r = resample(self, freq=rule, label=label, closed=closed, + axis=axis, kind=kind, loffset=loffset, + fill_method=fill_method, convention=convention, + limit=limit, base=base) + + # deprecation warnings + # but call methods anyhow + + if how is not None: + + # .resample(..., how='sum') + if isinstance(how, compat.string_types): + method = "{0}()".format(how) + + # .resample(..., how=lambda x: ....) + else: + method = ".apply()" + + # if we have both a how and fill_method, then show + # the following warning + if fill_method is None: + warnings.warn("how in .resample() is deprecated\n" + "the new syntax is " + ".resample(...).{method}".format( + method=method), + FutureWarning, stacklevel=2) + r = r.aggregate(how) + + if fill_method is not None: + + # show the prior function call + method = '.' + method if how is not None else '' + + args = "limit={0}".format(limit) if limit is not None else "" + warnings.warn("fill_method is deprecated to .resample()\n" + "the new syntax is .resample(...){method}" + ".{fill_method}({args})".format( + method=method, + fill_method=fill_method, + args=args), + FutureWarning, stacklevel=2) + + if how is not None: + r = getattr(r, fill_method)(limit=limit) + else: + r = r.aggregate(fill_method, limit=limit) + + return r def first(self, offset): """ Convenience method for subsetting initial periods of time series data - based on a date offset + based on a date offset. Parameters ---------- @@ -3727,7 +3959,7 @@ def first(self, offset): Examples -------- - ts.last('10D') -> First 10 days + ts.first('10D') -> First 10 days Returns ------- @@ -3754,7 +3986,7 @@ def first(self, offset): def last(self, offset): """ Convenience method for subsetting final periods of time series data - based on a date offset + based on a date offset. Parameters ---------- @@ -3782,8 +4014,67 @@ def last(self, offset): start = self.index.searchsorted(start_date, side='right') return self.ix[start:] - _shared_docs['align'] = ( + def rank(self, axis=0, method='average', numeric_only=None, + na_option='keep', ascending=True, pct=False): """ + Compute numerical data ranks (1 through n) along axis. Equal values are + assigned a rank that is the average of the ranks of those values + + Parameters + ---------- + axis: {0 or 'index', 1 or 'columns'}, default 0 + index to direct ranking + method : {'average', 'min', 'max', 'first', 'dense'} + * average: average rank of group + * min: lowest rank in group + * max: highest rank in group + * first: ranks assigned in order they appear in the array + * dense: like 'min', but rank always increases by 1 between groups + numeric_only : boolean, default None + Include only float, int, boolean data. Valid only for DataFrame or + Panel objects + na_option : {'keep', 'top', 'bottom'} + * keep: leave NA values where they are + * top: smallest rank if ascending + * bottom: smallest rank if descending + ascending : boolean, default True + False for ranks by high (1) to low (N) + pct : boolean, default False + Computes percentage rank of data + + Returns + ------- + ranks : same type as caller + """ + axis = self._get_axis_number(axis) + + if self.ndim > 2: + msg = "rank does not make sense when ndim > 2" + raise NotImplementedError(msg) + + def ranker(data): + ranks = algos.rank(data.values, axis=axis, method=method, + ascending=ascending, na_option=na_option, + pct=pct) + ranks = self._constructor(ranks, **data._construct_axes_dict()) + return ranks.__finalize__(self) + + # if numeric_only is None, and we can't get anything, we try with + # numeric_only=True + if numeric_only is None: + try: + return ranker(self) + except TypeError: + numeric_only = True + + if numeric_only: + data = self._get_numeric_data() + else: + data = self + + return ranker(data) + + _shared_docs['align'] = (""" Align two object on their axes with the specified join method for each axis Index @@ -3816,8 +4107,7 @@ def last(self, offset): ------- (left, right) : (%(klass)s, type of other) Aligned objects - """ - ) + """) @Appender(_shared_docs['align'] % _shared_doc_kwargs) def align(self, other, join='outer', axis=None, level=None, copy=True, @@ -3828,15 +4118,18 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, if broadcast_axis == 1 and self.ndim != other.ndim: if isinstance(self, Series): - # this means other is a DataFrame, and we need to broadcast self - df = DataFrame(dict((c, self) for c in other.columns), - **other._construct_axes_dict()) - return df._align_frame(other, join=join, axis=axis, level=level, - copy=copy, fill_value=fill_value, - method=method, limit=limit, - fill_axis=fill_axis) + # this means other is a DataFrame, and we need to broadcast + # self + df = DataFrame( + dict((c, self) for c in other.columns), + **other._construct_axes_dict()) + return df._align_frame(other, join=join, axis=axis, + level=level, copy=copy, + fill_value=fill_value, method=method, + limit=limit, fill_axis=fill_axis) elif isinstance(other, Series): - # this means self is a DataFrame, and we need to broadcast other + # this means self is a DataFrame, and we need to broadcast + # other df = DataFrame(dict((c, other) for c in self.columns), **self._construct_axes_dict()) return self._align_frame(df, join=join, axis=axis, level=level, @@ -3869,15 +4162,13 @@ def _align_frame(self, other, join='outer', axis=None, level=None, if axis is None or axis == 0: if not self.index.equals(other.index): - join_index, ilidx, iridx = \ - self.index.join(other.index, how=join, level=level, - return_indexers=True) + join_index, ilidx, iridx = self.index.join( + other.index, how=join, level=level, return_indexers=True) if axis is None or axis == 1: if not self.columns.equals(other.columns): - join_columns, clidx, cridx = \ - self.columns.join(other.columns, how=join, level=level, - return_indexers=True) + join_columns, clidx, cridx = self.columns.join( + other.columns, how=join, level=level, return_indexers=True) left = self._reindex_with_indexers({0: [join_index, ilidx], 1: [join_columns, clidx]}, @@ -3906,7 +4197,7 @@ def _align_series(self, other, join='outer', axis=None, level=None, 'axis 0') # equal - if self.index.equals(other.index): + if self.index.equals(other.index): join_index, lidx, ridx = None, None, None else: join_index, lidx, ridx = self.index.join(other.index, how=join, @@ -3923,9 +4214,9 @@ def _align_series(self, other, join='outer', axis=None, level=None, join_index = self.index lidx, ridx = None, None if not self.index.equals(other.index): - join_index, lidx, ridx = \ - self.index.join(other.index, how=join, level=level, - return_indexers=True) + join_index, lidx, ridx = self.index.join( + other.index, how=join, level=level, + return_indexers=True) if lidx is not None: fdata = fdata.reindex_indexer(join_index, lidx, axis=1) @@ -3934,9 +4225,9 @@ def _align_series(self, other, join='outer', axis=None, level=None, join_index = self.columns lidx, ridx = None, None if not self.columns.equals(other.index): - join_index, lidx, ridx = \ - self.columns.join(other.index, how=join, level=level, - return_indexers=True) + join_index, lidx, ridx = self.columns.join( + other.index, how=join, level=level, + return_indexers=True) if lidx is not None: fdata = fdata.reindex_indexer(join_index, lidx, axis=0) @@ -3956,13 +4247,15 @@ def _align_series(self, other, join='outer', axis=None, level=None, # fill fill_na = notnull(fill_value) or (method is not None) if fill_na: - left = left.fillna(fill_value, method=method, limit=limit, axis=fill_axis) + left = left.fillna(fill_value, method=method, limit=limit, + axis=fill_axis) right = right.fillna(fill_value, method=method, limit=limit) - return (left.__finalize__(self), right.__finalize__(other)) + return left.__finalize__(self), right.__finalize__(other) _shared_docs['where'] = (""" Return an object of same shape as self and whose corresponding - entries are from self where cond is %(cond)s and otherwise are from other. + entries are from self where cond is %(cond)s and otherwise are from + other. Parameters ---------- @@ -3982,6 +4275,7 @@ def _align_series(self, other, join='outer', axis=None, level=None, ------- wh : same type as caller """) + @Appender(_shared_docs['where'] % dict(_shared_doc_kwargs, cond="True")) def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, try_cast=False, raise_on_error=True): @@ -3993,8 +4287,8 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, raise ValueError('where requires an ndarray like object for ' 'its condition') if cond.shape != self.shape: - raise ValueError( - 'Array conditional must be same shape as self') + raise ValueError('Array conditional must be same shape as ' + 'self') cond = self._constructor(cond, **self._construct_axes_dict()) if inplace: @@ -4009,9 +4303,8 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, # align with me if other.ndim <= self.ndim: - _, other = self.align(other, join='left', - axis=axis, level=level, - fill_value=np.nan) + _, other = self.align(other, join='left', axis=axis, + level=level, fill_value=np.nan) # if we are NOT aligned, raise as we cannot where index if (axis is None and @@ -4021,9 +4314,8 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, # slice me out of the other else: - raise NotImplemented( - "cannot align with a higher dimensional NDFrame" - ) + raise NotImplemented("cannot align with a higher dimensional " + "NDFrame") elif is_list_like(other): @@ -4053,7 +4345,9 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, other = np.array(other) else: other = np.asarray(other) - other = np.asarray(other, dtype=np.common_type(other, new_other)) + other = np.asarray(other, + dtype=np.common_type(other, + new_other)) # we need to use the new dtype try_quick = False @@ -4101,8 +4395,8 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, other = new_other else: - raise ValueError( - 'Length of replacements must equal series length') + raise ValueError('Length of replacements must equal ' + 'series length') else: raise ValueError('other must be the same shape as self ' @@ -4144,7 +4438,8 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, def mask(self, cond, other=np.nan, inplace=False, axis=None, level=None, try_cast=False, raise_on_error=True): return self.where(~cond, other=other, inplace=inplace, axis=axis, - level=level, try_cast=try_cast, raise_on_error=raise_on_error) + level=level, try_cast=try_cast, + raise_on_error=raise_on_error) _shared_docs['shift'] = (""" Shift index by desired number of periods with an optional time freq @@ -4168,6 +4463,7 @@ def mask(self, cond, other=np.nan, inplace=False, axis=None, level=None, ------- shifted : %(klass)s """) + @Appender(_shared_docs['shift'] % _shared_doc_kwargs) def shift(self, periods=1, freq=None, axis=0): if periods == 0: @@ -4219,7 +4515,7 @@ def slice_shift(self, periods=1, axis=0): def tshift(self, periods=1, freq=None, axis=0): """ - Shift the time index, using the index's frequency if available + Shift the time index, using the index's frequency if available. Parameters ---------- @@ -4352,10 +4648,10 @@ def _tz_convert(ax, tz): if not hasattr(ax, 'tz_convert'): if len(ax) > 0: ax_name = self._get_axis_name(axis) - raise TypeError('%s is not a valid DatetimeIndex or PeriodIndex' % - ax_name) + raise TypeError('%s is not a valid DatetimeIndex or ' + 'PeriodIndex' % ax_name) else: - ax = DatetimeIndex([],tz=tz) + ax = DatetimeIndex([], tz=tz) else: ax = ax.tz_convert(tz) return ax @@ -4369,18 +4665,19 @@ def _tz_convert(ax, tz): else: if level not in (None, 0, ax.name): raise ValueError("The level {0} is not valid".format(level)) - ax = _tz_convert(ax, tz) + ax = _tz_convert(ax, tz) result = self._constructor(self._data, copy=copy) - result.set_axis(axis,ax) + result.set_axis(axis, ax) return result.__finalize__(self) @deprecate_kwarg(old_arg_name='infer_dst', new_arg_name='ambiguous', - mapping={True: 'infer', False: 'raise'}) + mapping={True: 'infer', + False: 'raise'}) def tz_localize(self, tz, axis=0, level=None, copy=True, ambiguous='raise'): """ - Localize tz-naive TimeSeries to target time zone + Localize tz-naive TimeSeries to target time zone. Parameters ---------- @@ -4392,11 +4689,14 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, copy : boolean, default True Also make a copy of the underlying data ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' - - 'infer' will attempt to infer fall dst-transition hours based on order + - 'infer' will attempt to infer fall dst-transition hours based on + order - bool-ndarray where True signifies a DST time, False designates - a non-DST time (note that this flag is only applicable for ambiguous times) + a non-DST time (note that this flag is only applicable for + ambiguous times) - 'NaT' will return NaT where there are ambiguous times - - 'raise' will raise an AmbiguousTimeError if there are ambiguous times + - 'raise' will raise an AmbiguousTimeError if there are ambiguous + times infer_dst : boolean, default False (DEPRECATED) Attempt to infer fall dst-transition hours based on order @@ -4415,10 +4715,10 @@ def _tz_localize(ax, tz, ambiguous): if not hasattr(ax, 'tz_localize'): if len(ax) > 0: ax_name = self._get_axis_name(axis) - raise TypeError('%s is not a valid DatetimeIndex or PeriodIndex' % - ax_name) + raise TypeError('%s is not a valid DatetimeIndex or ' + 'PeriodIndex' % ax_name) else: - ax = DatetimeIndex([],tz=tz) + ax = DatetimeIndex([], tz=tz) else: ax = ax.tz_localize(tz, ambiguous=ambiguous) return ax @@ -4432,18 +4732,18 @@ def _tz_localize(ax, tz, ambiguous): else: if level not in (None, 0, ax.name): raise ValueError("The level {0} is not valid".format(level)) - ax = _tz_localize(ax, tz, ambiguous) + ax = _tz_localize(ax, tz, ambiguous) result = self._constructor(self._data, copy=copy) - result.set_axis(axis,ax) + result.set_axis(axis, ax) return result.__finalize__(self) - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Numeric Methods def abs(self): """ - Return an object with absolute value taken. Only applicable to objects - that are all numeric + Return an object with absolute value taken--only applicable to objects + that are all numeric. Returns ------- @@ -4463,8 +4763,8 @@ def abs(self): include, exclude : list-like, 'all', or None (default) Specify the form of the returned result. Either: - - None to both (default). The result will include only numeric-typed - columns or, if none are, only categorical columns. + - None to both (default). The result will include only + numeric-typed columns or, if none are, only categorical columns. - A list of dtypes or strings to be included/excluded. To select all numeric types use numpy numpy.number. To select categorical objects use type object. See also the select_dtypes @@ -4498,13 +4798,13 @@ def abs(self): The include, exclude arguments are ignored for Series. - See also + See Also -------- DataFrame.select_dtypes """ @Appender(_shared_docs['describe'] % _shared_doc_kwargs) - def describe(self, percentiles=None, include=None, exclude=None ): + def describe(self, percentiles=None, include=None, exclude=None): if self.ndim >= 3: msg = "describe is not implemented on on Panel or PanelND objects." raise NotImplementedError(msg) @@ -4531,20 +4831,20 @@ def pretty_name(x): def describe_numeric_1d(series, percentiles): stat_index = (['count', 'mean', 'std', 'min'] + - [pretty_name(x) for x in percentiles] + ['max']) + [pretty_name(x) for x in percentiles] + ['max']) d = ([series.count(), series.mean(), series.std(), series.min()] + [series.quantile(x) for x in percentiles] + [series.max()]) return pd.Series(d, index=stat_index, name=series.name) - def describe_categorical_1d(data): names = ['count', 'unique'] objcounts = data.value_counts() - result = [data.count(), len(objcounts[objcounts!=0])] + result = [data.count(), len(objcounts[objcounts != 0])] if result[1] > 0: top, freq = objcounts.index[0], objcounts.iloc[0] - if data.dtype == object or com.is_categorical_dtype(data.dtype): + if (data.dtype == object or + com.is_categorical_dtype(data.dtype)): names += ['top', 'freq'] result += [top, freq] @@ -4594,7 +4894,7 @@ def describe_1d(data, percentiles): return d def _check_percentile(self, q): - """ Validate percentiles. Used by describe and quantile """ + """Validate percentiles (used by describe and quantile).""" msg = ("percentiles should all be in the interval [0, 1]. " "Try {0} instead.") @@ -4643,8 +4943,8 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, else: data = self.fillna(method=fill_method, limit=limit, axis=axis) - rs = (data.div(data.shift(periods=periods, freq=freq, - axis=axis, **kwargs)) - 1) + rs = (data.div(data.shift(periods=periods, freq=freq, axis=axis, + **kwargs)) - 1) if freq is None: mask = com.isnull(_values_from_object(self)) np.putmask(rs.values, mask, np.nan) @@ -4661,7 +4961,7 @@ def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs): @classmethod def _add_numeric_operations(cls): - """ add the operations to the cls; evaluate the doc strings again """ + """Add the operations to the cls; evaluate the doc strings again""" axis_descr, name, name2 = _doc_parms(cls) @@ -4677,11 +4977,9 @@ def _add_numeric_operations(cls): @Substitution(outname='mad', desc="Return the mean absolute deviation of the values " "for the requested axis", - name1=name, - name2=name2, - axis_descr=axis_descr) + name1=name, name2=name2, axis_descr=axis_descr) @Appender(_num_doc) - def mad(self, axis=None, skipna=None, level=None): + def mad(self, axis=None, skipna=None, level=None): if skipna is None: skipna = True if axis is None: @@ -4696,58 +4994,51 @@ def mad(self, axis=None, skipna=None, level=None): else: demeaned = data.sub(data.mean(axis=1), axis=0) return np.abs(demeaned).mean(axis=axis, skipna=skipna) + cls.mad = mad cls.sem = _make_stat_function_ddof( 'sem', name, name2, axis_descr, - "Return unbiased standard error of the mean over " - "requested axis.\n\nNormalized by N-1 by default. " - "This can be changed using the ddof argument", + "Return unbiased standard error of the mean over requested " + "axis.\n\nNormalized by N-1 by default. This can be changed " + "using the ddof argument", nanops.nansem) cls.var = _make_stat_function_ddof( 'var', name, name2, axis_descr, - "Return unbiased variance over requested " - "axis.\n\nNormalized by N-1 by default. " - "This can be changed using the ddof argument", + "Return unbiased variance over requested axis.\n\nNormalized by " + "N-1 by default. This can be changed using the ddof argument", nanops.nanvar) cls.std = _make_stat_function_ddof( 'std', name, name2, axis_descr, - "Return unbiased standard deviation over requested " - "axis.\n\nNormalized by N-1 by default. " - "This can be changed using the ddof argument", + "Return sample standard deviation over requested axis." + "\n\nNormalized by N-1 by default. This can be changed using the " + "ddof argument", nanops.nanstd) @Substitution(outname='compounded', desc="Return the compound percentage of the values for " - "the requested axis", - name1=name, - name2=name2, + "the requested axis", name1=name, name2=name2, axis_descr=axis_descr) @Appender(_num_doc) def compound(self, axis=None, skipna=None, level=None): if skipna is None: skipna = True return (1 + self).prod(axis=axis, skipna=skipna, level=level) - 1 + cls.compound = compound cls.cummin = _make_cum_function( - 'min', name, name2, axis_descr, - "cumulative minimum", - lambda y, axis: np.minimum.accumulate(y, axis), - np.inf, np.nan) + 'min', name, name2, axis_descr, "cumulative minimum", + lambda y, axis: np.minimum.accumulate(y, axis), np.inf, np.nan) cls.cumsum = _make_cum_function( - 'sum', name, name2, axis_descr, - "cumulative sum", + 'sum', name, name2, axis_descr, "cumulative sum", lambda y, axis: y.cumsum(axis), 0., np.nan) cls.cumprod = _make_cum_function( - 'prod', name, name2, axis_descr, - "cumulative product", + 'prod', name, name2, axis_descr, "cumulative product", lambda y, axis: y.cumprod(axis), 1., np.nan) cls.cummax = _make_cum_function( - 'max', name, name2, axis_descr, - "cumulative max", - lambda y, axis: np.maximum.accumulate(y, axis), - -np.inf, np.nan) + 'max', name, name2, axis_descr, "cumulative max", + lambda y, axis: np.maximum.accumulate(y, axis), -np.inf, np.nan) cls.sum = _make_stat_function( 'sum', name, name2, axis_descr, @@ -4763,9 +5054,9 @@ def compound(self, axis=None, skipna=None, level=None): nanops.nanskew) cls.kurt = _make_stat_function( 'kurt', name, name2, axis_descr, - 'Return unbiased kurtosis over requested axis using Fisher''s ' - 'definition of\nkurtosis (kurtosis of normal == 0.0). Normalized ' - 'by N-1\n', + "Return unbiased kurtosis over requested axis using Fisher's " + "definition of\nkurtosis (kurtosis of normal == 0.0). Normalized " + "by N-1\n", nanops.nankurt) cls.kurtosis = cls.kurt cls.prod = _make_stat_function( @@ -4777,20 +5068,24 @@ def compound(self, axis=None, skipna=None, level=None): 'median', name, name2, axis_descr, 'Return the median of the values for the requested axis', nanops.nanmedian) - cls.max = _make_stat_function('max', name, name2, axis_descr, - """This method returns the maximum of the values in the object. If you - want the *index* of the maximum, use ``idxmax``. This is the - equivalent of the ``numpy.ndarray`` method ``argmax``.""", - nanops.nanmax) - cls.min = _make_stat_function('min', name, name2, axis_descr, - """This method returns the minimum of the values in the object. If you - want the *index* of the minimum, use ``idxmin``. This is the - equivalent of the ``numpy.ndarray`` method ``argmin``.""", - nanops.nanmin) + cls.max = _make_stat_function( + 'max', name, name2, axis_descr, + """This method returns the maximum of the values in the object. + If you want the *index* of the maximum, use ``idxmax``. This is + the equivalent of the ``numpy.ndarray`` method ``argmax``.""", + nanops.nanmax) + cls.min = _make_stat_function( + 'min', name, name2, axis_descr, + """This method returns the minimum of the values in the object. + If you want the *index* of the minimum, use ``idxmin``. This is + the equivalent of the ``numpy.ndarray`` method ``argmin``.""", + nanops.nanmin) @classmethod def _add_series_only_operations(cls): - """ add the series only operations to the cls; evaluate the doc strings again """ + """Add the series only operations to the cls; evaluate the doc + strings again. + """ axis_descr, name, name2 = _doc_parms(cls) @@ -4799,16 +5094,18 @@ def nanptp(values, axis=0, skipna=True): nmin = nanops.nanmin(values, axis, skipna) return nmax - nmin - cls.ptp = _make_stat_function('ptp', name, name2, axis_descr, - """ - Returns the difference between the maximum value and the minimum - value in the object. This is the equivalent of the ``numpy.ndarray`` - method ``ptp``.""", nanptp) - + cls.ptp = _make_stat_function( + 'ptp', name, name2, axis_descr, + """Returns the difference between the maximum value and the + minimum value in the object. This is the equivalent of the + ``numpy.ndarray`` method ``ptp``.""", + nanptp) @classmethod def _add_series_or_dataframe_operations(cls): - """ add the series or dataframe only operations to the cls; evaluate the doc strings again """ + """Add the series or dataframe only operations to the cls; evaluate + the doc strings again. + """ from pandas.core import window as rwindow @@ -4816,35 +5113,41 @@ def _add_series_or_dataframe_operations(cls): def rolling(self, window, min_periods=None, freq=None, center=False, win_type=None, axis=0): axis = self._get_axis_number(axis) - return rwindow.rolling(self, window=window, min_periods=min_periods, freq=freq, center=center, - win_type=win_type, axis=axis) + return rwindow.rolling(self, window=window, + min_periods=min_periods, freq=freq, + center=center, win_type=win_type, axis=axis) + cls.rolling = rolling @Appender(rwindow.expanding.__doc__) def expanding(self, min_periods=1, freq=None, center=False, axis=0): axis = self._get_axis_number(axis) - return rwindow.expanding(self, min_periods=min_periods, freq=freq, center=center, - axis=axis) + return rwindow.expanding(self, min_periods=min_periods, freq=freq, + center=center, axis=axis) + cls.expanding = expanding @Appender(rwindow.ewm.__doc__) - def ewm(self, com=None, span=None, halflife=None, min_periods=0, freq=None, - adjust=True, ignore_na=False, axis=0): + def ewm(self, com=None, span=None, halflife=None, min_periods=0, + freq=None, adjust=True, ignore_na=False, axis=0): axis = self._get_axis_number(axis) - return rwindow.ewm(self, com=com, span=span, halflife=halflife, min_periods=min_periods, - freq=freq, adjust=adjust, ignore_na=ignore_na, axis=axis) + return rwindow.ewm(self, com=com, span=span, halflife=halflife, + min_periods=min_periods, freq=freq, + adjust=adjust, ignore_na=ignore_na, axis=axis) + cls.ewm = ewm + def _doc_parms(cls): - """ return a tuple of the doc parms """ - axis_descr = "{%s}" % ', '.join([ - "{0} ({1})".format(a, i) for i, a in enumerate(cls._AXIS_ORDERS) - ]) + """Return a tuple of the doc parms.""" + axis_descr = "{%s}" % ', '.join(["{0} ({1})".format(a, i) + for i, a in enumerate(cls._AXIS_ORDERS)]) name = (cls._constructor_sliced.__name__ if cls._AXIS_LEN > 1 else 'scalar') name2 = cls.__name__ return axis_descr, name, name2 + _num_doc = """ %(desc)s @@ -4923,12 +5226,30 @@ def _doc_parms(cls): ------- %(outname)s : %(name1)s\n""" -def _make_stat_function(name, name1, name2, axis_descr, desc, f): - @Substitution(outname=name, desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) +def _validate_kwargs(fname, kwargs, *compat_args): + """ + Checks whether parameters passed to the + **kwargs argument in a 'stat' function 'fname' + are valid parameters as specified in *compat_args + + """ + list(map(kwargs.__delitem__, filter( + kwargs.__contains__, compat_args))) + if kwargs: + bad_arg = list(kwargs)[0] # first 'key' element + raise TypeError(("{fname}() got an unexpected " + "keyword argument '{arg}'". + format(fname=fname, arg=bad_arg))) + + +def _make_stat_function(name, name1, name2, axis_descr, desc, f): + @Substitution(outname=name, desc=desc, name1=name1, name2=name2, + axis_descr=axis_descr) @Appender(_num_doc) - def stat_func(self, axis=None, skipna=None, level=None, - numeric_only=None, **kwargs): + def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None, + **kwargs): + _validate_kwargs(name, kwargs, 'out', 'dtype') if skipna is None: skipna = True if axis is None: @@ -4936,17 +5257,20 @@ def stat_func(self, axis=None, skipna=None, level=None, if level is not None: return self._agg_by_level(name, axis=axis, level=level, skipna=skipna) - return self._reduce(f, name, axis=axis, - skipna=skipna, numeric_only=numeric_only) + return self._reduce(f, name, axis=axis, skipna=skipna, + numeric_only=numeric_only) + stat_func.__name__ = name return stat_func -def _make_stat_function_ddof(name, name1, name2, axis_descr, desc, f): - @Substitution(outname=name, desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) +def _make_stat_function_ddof(name, name1, name2, axis_descr, desc, f): + @Substitution(outname=name, desc=desc, name1=name1, name2=name2, + axis_descr=axis_descr) @Appender(_num_ddof_doc) def stat_func(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs): + _validate_kwargs(name, kwargs, 'out', 'dtype') if skipna is None: skipna = True if axis is None: @@ -4954,19 +5278,21 @@ def stat_func(self, axis=None, skipna=None, level=None, ddof=1, if level is not None: return self._agg_by_level(name, axis=axis, level=level, skipna=skipna, ddof=ddof) - return self._reduce(f, name, axis=axis, - numeric_only=numeric_only, + return self._reduce(f, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof) + stat_func.__name__ = name return stat_func -def _make_cum_function(name, name1, name2, axis_descr, desc, accum_func, mask_a, mask_b): - @Substitution(outname=name, desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) - @Appender("Return cumulative {0} over requested axis.".format(name) - + _cnum_doc) - def func(self, axis=None, dtype=None, out=None, skipna=True, - **kwargs): +def _make_cum_function(name, name1, name2, axis_descr, desc, accum_func, + mask_a, mask_b): + @Substitution(outname=name, desc=desc, name1=name1, name2=name2, + axis_descr=axis_descr) + @Appender("Return cumulative {0} over requested axis.".format(name) + + _cnum_doc) + def func(self, axis=None, dtype=None, out=None, skipna=True, **kwargs): + _validate_kwargs(name, kwargs, 'out', 'dtype') if axis is None: axis = self._stat_axis_number else: @@ -4974,8 +5300,8 @@ def func(self, axis=None, dtype=None, out=None, skipna=True, y = _values_from_object(self).copy() - if skipna and issubclass(y.dtype.type, - (np.datetime64, np.timedelta64)): + if (skipna and + issubclass(y.dtype.type, (np.datetime64, np.timedelta64))): result = accum_func(y, axis) mask = isnull(self) np.putmask(result, mask, pd.tslib.iNaT) @@ -4994,26 +5320,28 @@ def func(self, axis=None, dtype=None, out=None, skipna=True, func.__name__ = name return func -def _make_logical_function(name, name1, name2, axis_descr, desc, f): - @Substitution(outname=name, desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) +def _make_logical_function(name, name1, name2, axis_descr, desc, f): + @Substitution(outname=name, desc=desc, name1=name1, name2=name2, + axis_descr=axis_descr) @Appender(_bool_doc) - def logical_func(self, axis=None, bool_only=None, skipna=None, - level=None, **kwargs): + def logical_func(self, axis=None, bool_only=None, skipna=None, level=None, + **kwargs): + _validate_kwargs(name, kwargs, 'out', 'dtype') if skipna is None: skipna = True if axis is None: axis = self._stat_axis_number if level is not None: if bool_only is not None: - raise NotImplementedError( - "Option bool_only is not implemented with option " - "level.") + raise NotImplementedError("Option bool_only is not " + "implemented with option level.") return self._agg_by_level(name, axis=axis, level=level, - skipna=skipna) + skipna=skipna) return self._reduce(f, axis=axis, skipna=skipna, numeric_only=bool_only, filter_type='bool', name=name) + logical_func.__name__ = name return logical_func From e63d4583eade93c35a507127e4dabe605a262663 Mon Sep 17 00:00:00 2001 From: chris warth Date: Fri, 19 Feb 2016 14:55:43 -0800 Subject: [PATCH 4/4] cleaup format problems identified by flake8 --- pandas/core/generic.py | 55 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 14d788fdded7e..f395b0a034332 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2312,28 +2312,63 @@ def _reindex_axis(self, new_index, fill_method, axis, copy): return self._constructor(new_data).__finalize__(self) def filter(self, items=None, like=None, regex=None, axis=None): + """ - Restrict the info axis to set of items or wildcard + + Subset rows or columns of dataframe according to labels in the index. + + Note that this routine does not filter a dataframe on its + contents. The filter is applied to the labels of the index. + This method is a thin veneer on top of :ref:`DateFrame Select + ` Parameters ---------- items : list-like - List of info axis to restrict to (must not all be present) + List of info axis to restrict to (must not all be present) like : string - Keep info axis where "arg in col == True" + Keep info axis where "arg in col == True" regex : string (regular expression) - Keep info axis with re.search(regex, col) == True + Keep info axis with re.search(regex, col) == True axis : int or None - The axis to filter on. By default this is the info axis. The "info - axis" is the axis that is used when indexing with ``[]``. For - example, ``df = DataFrame({'a': [1, 2, 3, 4]]}); df['a']``. So, - the ``DataFrame`` columns are the info axis. + The axis to filter on. + + Examples + -------- + >>> df + one two three + mouse 1 2 3 + rabbit 4 5 6 + + >>> # select columns by name + >>> df.filter(items=['one', 'three']) + one three + mouse 1 3 + rabbit 4 6 + + >>> # select columns by regular expression + >>> df.filter(regex='e$', axis=1) + one three + mouse 1 3 + rabbit 4 6 + + >>> # select rows containing 'bbi' + >>> df.filter(like='bbi', axis=0) + one two three + rabbit 4 5 6 + + Returns + ------- + same type as input object with filtered info axis Notes ----- - Arguments are mutually exclusive, but this is not checked for + The ``items``, ``like``, and ``regex`` parameters should be + mutually exclusive, but this is not checked. - """ + ``axis`` defaults to the info axis that is used when indexing + with ``[]``. +""" import re if axis is None: