diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 572bb3668caf8..4a8d40893c9a3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3926,22 +3926,36 @@ def shift(self, periods=1, freq=None, axis=0): def set_index(self, keys, drop=True, append=False, inplace=False, verify_integrity=False): """ + Set the DataFrame index using existing columns. + Set the DataFrame index (row labels) using one or more existing - columns. By default yields a new object. + columns. The index can replace the existing index or expand on it. Parameters ---------- - keys : column label or list of column labels / arrays - drop : boolean, default True - Delete columns to be used as the new index - append : boolean, default False - Whether to append columns to existing index - inplace : boolean, default False - Modify the DataFrame in place (do not create a new object) - verify_integrity : boolean, default False + keys : label or list of label + Name or names of the columns that will be used as the index. + drop : bool, default True + Delete columns to be used as the new index. + append : bool, default False + Whether to append columns to existing index. + inplace : bool, default False + Modify the DataFrame in place (do not create a new object). + verify_integrity : bool, default False Check the new index for duplicates. Otherwise defer the check until necessary. Setting to False will improve the performance of this - method + method. + + Returns + ------- + DataFrame + Changed row labels. + + See Also + -------- + DataFrame.reset_index : Opposite of set_index. + DataFrame.reindex : Change to new indices or expand indices. + DataFrame.reindex_like : Change to same indices as other DataFrame. Returns ------- @@ -3951,22 +3965,23 @@ def set_index(self, keys, drop=True, append=False, inplace=False, -------- >>> df = pd.DataFrame({'month': [1, 4, 7, 10], ... 'year': [2012, 2014, 2013, 2014], - ... 'sale':[55, 40, 84, 31]}) - month sale year - 0 1 55 2012 - 1 4 40 2014 - 2 7 84 2013 - 3 10 31 2014 + ... 'sale': [55, 40, 84, 31]}) + >>> df + month year sale + 0 1 2012 55 + 1 4 2014 40 + 2 7 2013 84 + 3 10 2014 31 Set the index to become the 'month' column: >>> df.set_index('month') - sale year + year sale month - 1 55 2012 - 4 40 2014 - 7 84 2013 - 10 31 2014 + 1 2012 55 + 4 2014 40 + 7 2013 84 + 10 2014 31 Create a multi-index using columns 'year' and 'month': @@ -4074,22 +4089,22 @@ def set_index(self, keys, drop=True, append=False, inplace=False, def reset_index(self, level=None, drop=False, inplace=False, col_level=0, col_fill=''): """ - For DataFrame with multi-level index, return new DataFrame with - labeling information in the columns under the index names, defaulting - to 'level_0', 'level_1', etc. if any are None. For a standard index, - the index name will be used (if set), otherwise a default 'index' or - 'level_0' (if 'index' is already taken) will be used. + Reset the index, or a level of it. + + Reset the index of the DataFrame, and use the default one instead. + If the DataFrame has a MultiIndex, this method can remove one or more + levels. Parameters ---------- level : int, str, tuple, or list, default None Only remove the given levels from the index. Removes all levels by - default - drop : boolean, default False + default. + drop : bool, default False Do not try to insert index into dataframe columns. This resets the index to the default integer index. - inplace : boolean, default False - Modify the DataFrame in place (do not create a new object) + inplace : bool, default False + Modify the DataFrame in place (do not create a new object). col_level : int or str, default 0 If the columns have multiple levels, determines which level the labels are inserted into. By default it is inserted into the first @@ -4100,13 +4115,20 @@ def reset_index(self, level=None, drop=False, inplace=False, col_level=0, Returns ------- - resetted : DataFrame + DataFrame + DataFrame with the new index. + + See Also + -------- + DataFrame.set_index : Opposite of reset_index. + DataFrame.reindex : Change to new indices or expand indices. + DataFrame.reindex_like : Change to same indices as other DataFrame. Examples -------- - >>> df = pd.DataFrame([('bird', 389.0), - ... ('bird', 24.0), - ... ('mammal', 80.5), + >>> df = pd.DataFrame([('bird', 389.0), + ... ('bird', 24.0), + ... ('mammal', 80.5), ... ('mammal', np.nan)], ... index=['falcon', 'parrot', 'lion', 'monkey'], ... columns=('class', 'max_speed')) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5bb364e1d1605..c6c9589bdc059 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3434,29 +3434,99 @@ def select(self, crit, axis=0): def reindex_like(self, other, method=None, copy=True, limit=None, tolerance=None): - """Return an object with matching indices to myself. + """ + Return an object with matching indices as other object. + + Conform the object to the same index on all axes. Optional + filling logic, placing NaN in locations having no value + in the previous index. A new object is produced unless the + new index is equivalent to the current one and copy=False. Parameters ---------- - other : Object - method : string or None - copy : boolean, default True + other : Object of the same data type + Its row and column indices are used to define the new indices + of this object. + method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'} + Method to use for filling holes in reindexed DataFrame. + Please note: this is only applicable to DataFrames/Series with a + monotonically increasing/decreasing index. + + * None (default): don't fill gaps + * pad / ffill: propagate last valid observation forward to next + valid + * backfill / bfill: use next valid observation to fill gap + * nearest: use nearest valid observations to fill gap + + copy : bool, default True + Return a new object, even if the passed indexes are the same. limit : int, default None Maximum number of consecutive labels to fill for inexact matches. tolerance : optional - Maximum distance between labels of the other object and this - object for inexact matches. Can be list-like. + Maximum distance between original and new labels for inexact + matches. The values of the index at the matching locations most + satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. .. versionadded:: 0.21.0 (list-like tolerance) + Returns + ------- + Series or DataFrame + Same type as caller, but with changed indices on each axis. + + See Also + -------- + DataFrame.set_index : Set row labels. + DataFrame.reset_index : Remove row labels or move them to new columns. + DataFrame.reindex : Change to new indices or expand indices. + Notes ----- - Like calling s.reindex(index=other.index, columns=other.columns, - method=...) + Same as calling + ``.reindex(index=other.index, columns=other.columns,...)``. - Returns - ------- - reindexed : same as input + Examples + -------- + >>> df1 = pd.DataFrame([[24.3, 75.7, 'high'], + ... [31, 87.8, 'high'], + ... [22, 71.6, 'medium'], + ... [35, 95, 'medium']], + ... columns=['temp_celsius', 'temp_fahrenheit', 'windspeed'], + ... index=pd.date_range(start='2014-02-12', + ... end='2014-02-15', freq='D')) + + >>> df1 + temp_celsius temp_fahrenheit windspeed + 2014-02-12 24.3 75.7 high + 2014-02-13 31.0 87.8 high + 2014-02-14 22.0 71.6 medium + 2014-02-15 35.0 95.0 medium + + >>> df2 = pd.DataFrame([[28, 'low'], + ... [30, 'low'], + ... [35.1, 'medium']], + ... columns=['temp_celsius', 'windspeed'], + ... index=pd.DatetimeIndex(['2014-02-12', '2014-02-13', + ... '2014-02-15'])) + + >>> df2 + temp_celsius windspeed + 2014-02-12 28.0 low + 2014-02-13 30.0 low + 2014-02-15 35.1 medium + + >>> df2.reindex_like(df1) + temp_celsius temp_fahrenheit windspeed + 2014-02-12 28.0 NaN low + 2014-02-13 30.0 NaN low + 2014-02-14 NaN NaN NaN + 2014-02-15 35.1 NaN medium """ d = other._construct_axes_dict(axes=self._AXIS_ORDERS, method=method, copy=copy, limit=limit, @@ -3823,36 +3893,36 @@ def reindex(self, *args, **kwargs): Conform %(klass)s to new index with optional filling logic, placing NA/NaN in locations having no value in the previous index. A new object is produced unless the new index is equivalent to the current one and - copy=False + ``copy=False``. Parameters ---------- %(optional_labels)s - %(axes)s : array-like, optional (should be specified using keywords) - New labels / index to conform to. Preferably an Index object to - avoid duplicating data + %(axes)s : array-like, optional + New labels / index to conform to, should be specified using + keywords. Preferably an Index object to avoid duplicating data %(optional_axis)s - method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}, optional - method to use for filling holes in reindexed DataFrame. + method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'} + Method to use for filling holes in reindexed DataFrame. Please note: this is only applicable to DataFrames/Series with a monotonically increasing/decreasing index. - * default: don't fill gaps + * None (default): don't fill gaps * pad / ffill: propagate last valid observation forward to next valid * backfill / bfill: use next valid observation to fill gap * nearest: use nearest valid observations to fill gap - copy : boolean, default True - Return a new object, even if the passed indexes are the same + copy : bool, default True + Return a new object, even if the passed indexes are the same. level : int or name Broadcast across a level, matching Index values on the - passed MultiIndex level + passed MultiIndex level. fill_value : scalar, default np.NaN Value to use for missing values. Defaults to NaN, but can be any - "compatible" value + "compatible" value. limit : int, default None - Maximum number of consecutive elements to forward or backward fill + Maximum number of consecutive elements to forward or backward fill. tolerance : optional Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations most @@ -3866,6 +3936,12 @@ def reindex(self, *args, **kwargs): .. versionadded:: 0.21.0 (list-like tolerance) + See Also + -------- + DataFrame.set_index : Set row labels. + DataFrame.reset_index : Remove row labels or move them to new columns. + DataFrame.reindex_like : Change to same indices as other DataFrame. + Examples -------- @@ -3957,12 +4033,12 @@ def reindex(self, *args, **kwargs): ... index=date_index) >>> df2 prices - 2010-01-01 100 - 2010-01-02 101 + 2010-01-01 100.0 + 2010-01-02 101.0 2010-01-03 NaN - 2010-01-04 100 - 2010-01-05 89 - 2010-01-06 88 + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 Suppose we decide to expand the dataframe to cover a wider date range. @@ -3973,12 +4049,12 @@ def reindex(self, *args, **kwargs): 2009-12-29 NaN 2009-12-30 NaN 2009-12-31 NaN - 2010-01-01 100 - 2010-01-02 101 + 2010-01-01 100.0 + 2010-01-02 101.0 2010-01-03 NaN - 2010-01-04 100 - 2010-01-05 89 - 2010-01-06 88 + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 2010-01-07 NaN The index entries that did not have a value in the original data frame @@ -3991,15 +4067,15 @@ def reindex(self, *args, **kwargs): >>> df2.reindex(date_index2, method='bfill') prices - 2009-12-29 100 - 2009-12-30 100 - 2009-12-31 100 - 2010-01-01 100 - 2010-01-02 101 + 2009-12-29 100.0 + 2009-12-30 100.0 + 2009-12-31 100.0 + 2010-01-01 100.0 + 2010-01-02 101.0 2010-01-03 NaN - 2010-01-04 100 - 2010-01-05 89 - 2010-01-06 88 + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 2010-01-07 NaN Please note that the ``NaN`` value present in the original dataframe @@ -4013,7 +4089,7 @@ def reindex(self, *args, **kwargs): Returns ------- - reindexed : %(klass)s + %(klass)s with changed index. """ # TODO: Decide if we care about having different examples for different # kinds @@ -4085,11 +4161,10 @@ def _needs_reindex_multi(self, axes, method, level): def _reindex_multi(self, axes, copy, fill_value): return NotImplemented - _shared_docs[ - 'reindex_axis'] = ("""Conform input object to new index with optional - filling logic, placing NA/NaN in locations having no value in the - previous index. A new object is produced unless the new index is - equivalent to the current one and copy=False + _shared_docs['reindex_axis'] = ("""Conform input object to new index + with optional filling logic, placing NA/NaN in locations having + no value in the previous index. A new object is produced unless + the new index is equivalent to the current one and copy=False. Parameters ---------- @@ -4126,17 +4201,20 @@ def _reindex_multi(self, axes, copy, fill_value): .. versionadded:: 0.21.0 (list-like tolerance) - Examples - -------- - >>> df.reindex_axis(['A', 'B', 'C'], axis=1) - See Also -------- - reindex, reindex_like + DataFrame.set_index : Set row labels. + DataFrame.reset_index : Remove row labels or move them to new columns. + DataFrame.reindex : Change to new indices or expand indices. + DataFrame.reindex_like : Change to same indices as other DataFrame. Returns ------- - reindexed : %(klass)s + %(klass)s + + Examples + -------- + >>> df.reindex_axis(['A', 'B', 'C'], axis=1) """) @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs)