diff --git a/pandas/core/base.py b/pandas/core/base.py index fd039480fc6f1..257b26b64e642 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1197,24 +1197,6 @@ def drop_duplicates(self, keep='first', inplace=False): else: return result - _shared_docs['duplicated'] = ( - """Return boolean %(duplicated)s denoting duplicate values - - Parameters - ---------- - keep : {'first', 'last', False}, default 'first' - - ``first`` : Mark duplicates as ``True`` except for the first - occurrence. - - ``last`` : Mark duplicates as ``True`` except for the last - occurrence. - - False : Mark all duplicates as ``True``. - - Returns - ------- - duplicated : %(duplicated)s - """) - - @Appender(_shared_docs['duplicated'] % _indexops_doc_kwargs) def duplicated(self, keep='first'): from pandas.core.algorithms import duplicated if isinstance(self, ABCIndexClass): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f69777af31c9c..de4ea5fcfaefa 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4325,8 +4325,60 @@ def drop_duplicates(self, keep='first'): """ return super(Index, self).drop_duplicates(keep=keep) - @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): + """ + Indicate duplicate index values. + + Duplicated values are indicated as ``True`` values in the resulting + array. Either all duplicates, all except the first, or all except the + last occurrence of duplicates can be indicated. + + Parameters + ---------- + keep : {'first', 'last', False}, default 'first' + The value or values in a set of duplicates to mark as missing. + + - 'first' : Mark duplicates as ``True`` except for the first + occurrence. + - 'last' : Mark duplicates as ``True`` except for the last + occurrence. + - ``False`` : Mark all duplicates as ``True``. + + Examples + -------- + By default, for each set of duplicated values, the first occurrence is + set to False and all others to True: + + >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama']) + >>> idx.duplicated() + array([False, False, True, False, True]) + + which is equivalent to + + >>> idx.duplicated(keep='first') + array([False, False, True, False, True]) + + By using 'last', the last occurrence of each set of duplicated values + is set on False and all others on True: + + >>> idx.duplicated(keep='last') + array([ True, False, True, False, False]) + + By setting keep on ``False``, all duplicates are True: + + >>> idx.duplicated(keep=False) + array([ True, False, True, False, True]) + + Returns + ------- + numpy.ndarray + + See Also + -------- + pandas.Series.duplicated : Equivalent method on pandas.Series + pandas.DataFrame.duplicated : Equivalent method on pandas.DataFrame + pandas.Index.drop_duplicates : Remove duplicate values from Index + """ return super(Index, self).duplicated(keep=keep) _index_shared_docs['fillna'] = """ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 71d39ad812d20..332c6613a230c 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -399,7 +399,7 @@ def unique(self, level=None): return self._shallow_copy(result, categories=result.categories, ordered=result.ordered) - @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) + @Appender(Index.duplicated.__doc__) def duplicated(self, keep='first'): from pandas._libs.hashtable import duplicated_int64 codes = self.codes.astype('i8') diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index be64f6f4bfd0f..56f1f3c0bdd67 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -916,7 +916,7 @@ def f(k, stringify): for k, stringify in zip(key, self._have_mixed_levels)]) return hash_tuple(key) - @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) + @Appender(Index.duplicated.__doc__) def duplicated(self, keep='first'): from pandas.core.sorting import get_group_index from pandas._libs.hashtable import duplicated_int64 diff --git a/pandas/core/series.py b/pandas/core/series.py index d9ee1c856600f..07cfc671cbd28 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1458,8 +1458,78 @@ def drop_duplicates(self, keep='first', inplace=False): """ return super(Series, self).drop_duplicates(keep=keep, inplace=inplace) - @Appender(base._shared_docs['duplicated'] % _shared_doc_kwargs) def duplicated(self, keep='first'): + """ + Indicate duplicate Series values. + + Duplicated values are indicated as ``True`` values in the resulting + Series. Either all duplicates, all except the first or all except the + last occurrence of duplicates can be indicated. + + Parameters + ---------- + keep : {'first', 'last', False}, default 'first' + - 'first' : Mark duplicates as ``True`` except for the first + occurrence. + - 'last' : Mark duplicates as ``True`` except for the last + occurrence. + - ``False`` : Mark all duplicates as ``True``. + + Examples + -------- + By default, for each set of duplicated values, the first occurrence is + set on False and all others on True: + + >>> animals = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama']) + >>> animals.duplicated() + 0 False + 1 False + 2 True + 3 False + 4 True + dtype: bool + + which is equivalent to + + >>> animals.duplicated(keep='first') + 0 False + 1 False + 2 True + 3 False + 4 True + dtype: bool + + By using 'last', the last occurrence of each set of duplicated values + is set on False and all others on True: + + >>> animals.duplicated(keep='last') + 0 True + 1 False + 2 True + 3 False + 4 False + dtype: bool + + By setting keep on ``False``, all duplicates are True: + + >>> animals.duplicated(keep=False) + 0 True + 1 False + 2 True + 3 False + 4 True + dtype: bool + + Returns + ------- + pandas.core.series.Series + + See Also + -------- + pandas.Index.duplicated : Equivalent method on pandas.Index + pandas.DataFrame.duplicated : Equivalent method on pandas.DataFrame + pandas.Series.drop_duplicates : Remove duplicate values from Series + """ return super(Series, self).duplicated(keep=keep) def idxmin(self, axis=None, skipna=True, *args, **kwargs):