pandas-dev · jorisvandenbossche · Aug 29, 2016 · Aug 12, 2016 · jreback · Aug 25, 2016
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -457,7 +457,7 @@ API changes
 - ``Timestamp``, ``Period``, ``DatetimeIndex``, ``PeriodIndex`` and ``.dt`` accessor have gained a ``.is_leap_year`` property to check whether the date belongs to a leap year. (:issue:`13727`)
 - ``pd.read_hdf`` will now raise a ``ValueError`` instead of ``KeyError``, if a mode other than ``r``, ``r+`` and ``a`` is supplied. (:issue:`13623`)
 - ``DataFrame.values`` will now return ``float64`` with a ``DataFrame`` of mixed ``int64`` and ``uint64`` dtypes, conforming to ``np.find_common_type`` (:issue:`10364`, :issue:`13917`)
-
+- ``Series.unique()`` with datetime and timezone now returns return array of ``Timestamp`` with timezone (:issue:`13565`)
 
 
 
@@ -904,6 +904,35 @@ New Behavior:
    idx1.difference(idx2)
    idx1.symmetric_difference(idx2)
 
+.. _whatsnew_0190.api.unique_index:
+
+``Index.unique`` consistently returns ``Index``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``Index.unique()`` now returns unique values as an
+``Index`` of the appropriate ``dtype``. (:issue:`13395`)
+
+Previously, most ``Index`` classes returned ``np.ndarray``, and ``DatetimeIndex``,
+``TimedeltaIndex`` and ``PeriodIndex`` returned ``Index`` to keep metadata like timezone.
+
+Previous Behavior:
+
+.. code-block:: ipython
+
+   In [1]: pd.Index([1, 2, 3]).unique()
+   Out[1]: array([1, 2, 3])
+   In [2]: pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], tz='Asia/Tokyo').unique()
+   Out[2]: DatetimeIndex(['2011-01-01 00:00:00+09:00', '2011-01-02 00:00:00+09:00',
+                          '2011-01-03 00:00:00+09:00'],
+                         dtype='datetime64[ns, Asia/Tokyo]', freq=None)
+
+New Behavior:
+
+.. ipython:: python
+
+   pd.Index([1, 2, 3]).unique()
+   pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], tz='Asia/Tokyo').unique()
+
 .. _whatsnew_0190.api.autogenerated_chunksize_index:
 
 ``read_csv`` will progressively enumerate chunks
@@ -1181,6 +1210,7 @@ Bug Fixes
 - Bug in ``pd.read_csv``, ``pd.read_table``, ``pd.read_fwf``, ``pd.read_stata`` and ``pd.read_sas`` where files were opened by parsers but not closed if both ``chunksize`` and ``iterator`` were ``None``. (:issue:`13940`)
 - Bug in ``StataReader``, ``StataWriter``, ``XportReader`` and ``SAS7BDATReader`` where a file was not properly closed when an error was raised. (:issue:`13940`)
 
+
 - Bug in ``pd.pivot_table()`` where ``margins_name`` is ignored when ``aggfunc`` is a list (:issue:`13354`)
 - Bug in ``pd.Series.str.zfill``, ``center``, ``ljust``, ``rjust``, and ``pad`` when passing non-integers, did not raise ``TypeError`` (:issue:`13598`)
 - Bug in checking for any null objects in a ``TimedeltaIndex``, which always returned ``True`` (:issue:`13603`)
@@ -1248,7 +1278,6 @@ Bug Fixes
 
 - Bug in ``agg()`` function on groupby dataframe changes dtype of ``datetime64[ns]`` column to ``float64`` (:issue:`12821`)
 - Bug in using NumPy ufunc with ``PeriodIndex`` to add or subtract integer raise ``IncompatibleFrequency``. Note that using standard operator like ``+`` or ``-`` is recommended, because standard operators use more efficient path (:issue:`13980`)
-
 - Bug in operations on ``NaT`` returning ``float`` instead of ``datetime64[ns]`` (:issue:`12941`)
 - Bug in ``Series`` flexible arithmetic methods (like ``.add()``) raises ``ValueError`` when ``axis=None`` (:issue:`13894`)
 

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -7,8 +7,7 @@
 
 from pandas.types.missing import isnull
 from pandas.types.generic import ABCDataFrame, ABCSeries, ABCIndexClass
-from pandas.types.common import (is_object_dtype,
-                                 is_list_like, is_scalar)
+from pandas.types.common import is_object_dtype, is_list_like, is_scalar
 
 from pandas.core import common as com
 import pandas.core.nanops as nanops
@@ -21,7 +20,7 @@
 
 _shared_docs = dict()
 _indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='',
-                            duplicated='IndexOpsMixin')
+                            unique='IndexOpsMixin', duplicated='IndexOpsMixin')
 
 
 class StringMixin(object):
@@ -952,21 +951,27 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
                               normalize=normalize, bins=bins, dropna=dropna)
         return result
 
-    def unique(self):
+    _shared_docs['unique'] = (
         """
-        Return array of unique values in the object. Significantly faster than
-        numpy.unique. Includes NA values.
+        Return %(unique)s of unique values in the object.
+        Significantly faster than numpy.unique. Includes NA values.
+        The order of the original is preserved.
 
         Returns
         -------
-        uniques : ndarray
-        """
-        from pandas.core.nanops import unique1d
-        values = self.values
-        if hasattr(values, 'unique'):
-            return values.unique()
+        uniques : %(unique)s
+        """)
 
-        return unique1d(values)
+    @Appender(_shared_docs['unique'] % _indexops_doc_kwargs)
+    def unique(self):
+        values = self._values
+
+        if hasattr(values, 'unique'):
+            result = values.unique()
+        else:
+            from pandas.core.nanops import unique1d
+            result = unique1d(values)
+        return result
 
     def nunique(self, dropna=True):
         """

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -18,6 +18,7 @@
                                  is_float_dtype,
                                  is_extension_type, is_datetimetz,
                                  is_datetimelike,
+                                 is_datetime64tz_dtype,
                                  is_timedelta64_dtype,
                                  is_list_like,
                                  is_hashable,
@@ -77,7 +78,7 @@
     axes='index', klass='Series', axes_single_arg="{0, 'index'}",
     inplace="""inplace : boolean, default False
         If True, performs operation inplace and returns None.""",
-    duplicated='Series',
+    unique='np.ndarray', duplicated='Series',
     optional_by='')
 
 
@@ -1231,6 +1232,15 @@ def mode(self):
         # TODO: Add option for bins like value_counts()
         return algos.mode(self)
 
+    @Appender(base._shared_docs['unique'] % _shared_doc_kwargs)
+    def unique(self):
+        result = super(Series, self).unique()
+        if is_datetime64tz_dtype(self.dtype):
+            # to return array of Timestamp with tz
+            # ToDo: it must return DatetimeArray with tz in pandas 2.0
+            return result.asobject.values
+        return result
+
     @deprecate_kwarg('take_last', 'keep', mapping={True: 'last',
                                                    False: 'first'})
     @Appender(base._shared_docs['drop_duplicates'] % _shared_doc_kwargs)

diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py
@@ -60,7 +60,8 @@
 
 _unsortable_types = frozenset(('mixed', 'mixed-integer'))
 
-_index_doc_kwargs = dict(klass='Index', inplace='', duplicated='np.array')
+_index_doc_kwargs = dict(klass='Index', inplace='',
+                         unique='Index', duplicated='np.ndarray')
 _index_shared_docs = dict()
 
 
@@ -3217,6 +3218,11 @@ def drop(self, labels, errors='raise'):
             indexer = indexer[~mask]
         return self.delete(indexer)
 
+    @Appender(base._shared_docs['unique'] % _index_doc_kwargs)
+    def unique(self):
+        result = super(Index, self).unique()
+        return self._shallow_copy(result)
+
     @deprecate_kwarg('take_last', 'keep', mapping={True: 'last',
                                                    False: 'first'})
     @Appender(base._shared_docs['drop_duplicates'] % _index_doc_kwargs)

diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py
@@ -283,6 +283,14 @@ def _engine(self):
     def is_unique(self):
         return not self.duplicated().any()
 
+    @Appender(base._shared_docs['unique'] % ibase._index_doc_kwargs)
+    def unique(self):
+        result = base.IndexOpsMixin.unique(self)
+        # CategoricalIndex._shallow_copy uses keeps original categories
+        # and ordered if not otherwise specified
+        return self._shallow_copy(result, categories=result.categories,
+                                  ordered=result.ordered)
+
     @deprecate_kwarg('take_last', 'keep', mapping={True: 'last',
                                                    False: 'first'})
     @Appender(base._shared_docs['duplicated'] % ibase._index_doc_kwargs)

diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
@@ -395,6 +395,7 @@ def test_duplicates(self):
 
         expected = CategoricalIndex([0], name='foo')
         self.assert_index_equal(idx.drop_duplicates(), expected)
+        self.assert_index_equal(idx.unique(), expected)
 
     def test_get_indexer(self):
 

diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
@@ -1927,6 +1927,38 @@ def test_get_unique_index(self):
             self.assertTrue(result.unique)
             self.assert_index_equal(result, expected)
 
+    def test_unique(self):
+        mi = pd.MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]])
+
+        res = mi.unique()
+        exp = pd.MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]])
+        tm.assert_index_equal(res, exp)
+
+        mi = pd.MultiIndex.from_arrays([list('aaaa'), list('abab')])
+        res = mi.unique()
+        exp = pd.MultiIndex.from_arrays([list('aa'), list('ab')])
+        tm.assert_index_equal(res, exp)
+
+        mi = pd.MultiIndex.from_arrays([list('aaaa'), list('aaaa')])
+        res = mi.unique()
+        exp = pd.MultiIndex.from_arrays([['a'], ['a']])
+        tm.assert_index_equal(res, exp)
+
+    def test_unique_datetimelike(self):
+        idx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01',
+                                 '2015-01-01', 'NaT', 'NaT'])
+        idx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-02',
+                                 '2015-01-02', 'NaT', '2015-01-01'],
+                                tz='Asia/Tokyo')
+        result = pd.MultiIndex.from_arrays([idx1, idx2]).unique()
+
+        eidx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', 'NaT', 'NaT'])
+        eidx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-02',
+                                  'NaT', '2015-01-01'],
+                                 tz='Asia/Tokyo')
+        exp = pd.MultiIndex.from_arrays([eidx1, eidx2])
+        tm.assert_index_equal(result, exp)
+
     def test_tolist(self):
         result = self.index.tolist()
         exp = list(self.index.values)