From 158d925c277faff3cf8849c2fd2cce74d512a815 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sat, 2 Feb 2019 00:42:59 +0100
Subject: [PATCH 01/15] Code & whatsnew

---
 doc/source/whatsnew/v0.25.0.rst   | 21 ++++++++++++++++++
 pandas/core/algorithms.py         | 37 +++++++++++++++++++++++++++++--
 pandas/core/arrays/categorical.py | 27 ++++++++++++++++++----
 3 files changed, 79 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 09626be713c4f..43a07cd57b4dd 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -14,6 +14,27 @@ These are the changes in pandas 0.25.0. See :ref:`release` for a full changelog
 including other versions of pandas.
 
 
+Enhancements
+~~~~~~~~~~~~
+
+.. _whatsnew_0250.enhancements.unique:
+
+Changes to the ``unique``-method
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The method :meth:`pandas.unique` now supports the keyword ``return_inverse``, which, if passed,
+makes the output a tuple where the second component is an ndarray that contains the
+mapping from the indices of the values to their location in the return unique values.
+
+.. ipython:: python
+
+    idx = pd.Index([1, 0, 0, 1])
+    uniques, inverse = pd.unique(idx, return_inverse=True)
+    uniques
+    inverse
+    reconstruct = pd.Index(uniques[inverse])
+    reconstruct.equals(idx)
+
 .. _whatsnew_0250.enhancements.other:
 
 Other Enhancements
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index b473a7aef929e..4e2a3e512a929 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -275,7 +275,7 @@ def match(to_match, values, na_sentinel=-1):
     return result
 
 
-def unique(values):
+def unique(values, return_inverse=False):
     """
     Hash table-based unique. Uniques are returned in order
     of appearance. This does NOT sort.
@@ -285,6 +285,13 @@ def unique(values):
     Parameters
     ----------
     values : 1d array-like
+    return_inverse : boolean, default False
+        Whether to return the inverse of the unique values. If True, the
+        output will be a tuple of two np.ndarray. The second component
+        contains the mapping between the indices of the elements in the
+        calling Categorical and their locations in the unique values.
+
+        .. versionadded:: 0.25.0
 
     Returns
     -------
@@ -348,19 +355,45 @@ def unique(values):
     >>> pd.unique([('a', 'b'), ('b', 'a'), ('a', 'c'), ('b', 'a')])
     array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object)
     """
+    from pandas import Index
 
     values = _ensure_arraylike(values)
 
     if is_extension_array_dtype(values):
         # Dispatch to extension dtype's unique.
+        if return_inverse:
+            # as long as return_inverse is not part of the EA.unique contract,
+            # test if this works
+            try:
+                # make sure that we're not calling from an Index/Series
+                # container, as these do not support return_inverse yet
+                ea_val = getattr(values, 'array', values)
+                result, inverse = ea_val.unique(return_inverse=return_inverse)
+
+                if is_categorical_dtype(values) and isinstance(values, Index):
+                    # pd.unique(CategoricalIndex) returns Index not Categorical
+                    result = Index(result)
+                return result, inverse
+            except TypeError:
+                msg = ('The Extension Array class for type {dtype} does not '
+                       'yet support the unique-method with '
+                       '"return_inverse=True".'.format(dtype=type(values)))
+                raise NotImplementedError(msg)
         return values.unique()
 
     original = values
     htable, _, values, dtype, ndtype = _get_hashtable_algo(values)
 
     table = htable(len(values))
-    uniques = table.unique(values)
+    if return_inverse:
+        uniques, inverse = table.unique(values, return_inverse=True)
+    else:
+        uniques = table.unique(values)
+
     uniques = _reconstruct_data(uniques, dtype, original)
+
+    if return_inverse:
+        return uniques, inverse
     return uniques
 
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 35b662eaae9a5..cbb0098fe815e 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2258,7 +2258,7 @@ def mode(self, dropna=True):
         codes = sorted(htable.mode_int64(ensure_int64(codes), dropna))
         return self._constructor(values=codes, dtype=self.dtype, fastpath=True)
 
-    def unique(self):
+    def unique(self, return_inverse=False):
         """
         Return the ``Categorical`` which ``categories`` and ``codes`` are
         unique. Unused categories are NOT returned.
@@ -2268,9 +2268,21 @@ def unique(self):
         - ordered category: values are sorted by appearance order, categories
           keeps existing order.
 
+        Parameters
+        ----------
+        return_inverse : boolean, default False
+            Whether to return the inverse of the unique values. If True, the
+            output will be a tuple of two np.ndarray. The second component
+            contains the mapping between the indices of the elements in the
+            calling Categorical and their locations in the unique values.
+
+            .. versionadded:: 0.25.0
+
         Returns
         -------
-        unique values : ``Categorical``
+        uniques : ``Categorical``
+        inverse : np.ndarray (if `return_inverse=True`)
+            The inverse from the `uniques` back to the calling ``Categorical``.
 
         Examples
         --------
@@ -2302,7 +2314,10 @@ def unique(self):
         """
 
         # unlike np.unique, unique1d does not sort
-        unique_codes = unique1d(self.codes)
+        if return_inverse:
+            unique_codes, inverse = unique1d(self.codes, return_inverse=True)
+        else:
+            unique_codes = unique1d(self.codes, return_inverse=False)
         cat = self.copy()
 
         # keep nan in codes
@@ -2312,7 +2327,11 @@ def unique(self):
         take_codes = unique_codes[unique_codes != -1]
         if self.ordered:
             take_codes = np.sort(take_codes)
-        return cat.set_categories(cat.categories.take(take_codes))
+        result = cat.set_categories(cat.categories.take(take_codes))
+
+        if return_inverse:
+            return result, inverse
+        return result
 
     def _values_for_factorize(self):
         codes = self.codes.astype('int64')

From 49e42f1ca039b6484a87c99e8822c48061909a4d Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sat, 2 Feb 2019 00:48:36 +0100
Subject: [PATCH 02/15] Extend and parametrize unique-test to all numpy dtypes

---
 pandas/tests/test_algos.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 3d28b17750540..4f2b3355521ec 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -324,14 +324,9 @@ def test_parametrized_factorize_na_value(self, data, na_value):
 
 class TestUnique(object):
 
-    def test_ints(self):
-        arr = np.random.randint(0, 100, size=50)
-
-        result = algos.unique(arr)
-        assert isinstance(result, np.ndarray)
-
-    def test_objects(self):
-        arr = np.random.randint(0, 100, size=50).astype('O')
+    def test_unique_all_dtypes(self, any_numpy_dtype):
+        dtype = any_numpy_dtype
+        arr = np.random.randint(0, 100, size=50).astype(dtype)
 
         result = algos.unique(arr)
         assert isinstance(result, np.ndarray)

From 9285e8ce59e48090f26c08dff5f76cb80a38db32 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sat, 2 Feb 2019 00:49:23 +0100
Subject: [PATCH 03/15] Also test inverse in test_unique_all_dtypes

---
 pandas/tests/test_algos.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 4f2b3355521ec..0b82c77d1c80b 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -331,6 +331,16 @@ def test_unique_all_dtypes(self, any_numpy_dtype):
         result = algos.unique(arr)
         assert isinstance(result, np.ndarray)
 
+        # reuse result as expected outcome of return_inverse case
+        expected_uniques = result.copy()
+
+        result_uniques, result_inverse = algos.unique(arr, return_inverse=True)
+        tm.assert_numpy_array_equal(result_uniques, expected_uniques)
+
+        # reconstruction can only work if inverse is correct
+        reconstr = result_uniques[result_inverse]
+        tm.assert_numpy_array_equal(reconstr, arr, check_dtype=False)
+
     def test_object_refcount_bug(self):
         lst = ['A', 'B', 'C', 'D', 'E']
         for i in range(1000):

From 45eff6715dce278071a13e69f931d7b0739beee1 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sat, 2 Feb 2019 00:51:18 +0100
Subject: [PATCH 04/15] Parametrize test_timedelta64_dtype_array_returned

---
 pandas/tests/test_algos.py | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 0b82c77d1c80b..b3c942c6e3783 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -384,24 +384,16 @@ def test_datetime64_dtype_array_returned(self):
         tm.assert_numpy_array_equal(result, expected)
         assert result.dtype == expected.dtype
 
-    def test_timedelta64_dtype_array_returned(self):
+    @pytest.mark.parametrize('box', [Index, Series, np.array])
+    def test_timedelta64_dtype_array_returned(self, box):
         # GH 9431
         expected = np.array([31200, 45678, 10000], dtype='m8[ns]')
 
         td_index = pd.to_timedelta([31200, 45678, 31200, 10000, 45678])
-        result = algos.unique(td_index)
-        tm.assert_numpy_array_equal(result, expected)
-        assert result.dtype == expected.dtype
+        obj = box(td_index)
 
-        s = Series(td_index)
-        result = algos.unique(s)
+        result = algos.unique(obj)
         tm.assert_numpy_array_equal(result, expected)
-        assert result.dtype == expected.dtype
-
-        arr = s.values
-        result = algos.unique(arr)
-        tm.assert_numpy_array_equal(result, expected)
-        assert result.dtype == expected.dtype
 
     def test_uint64_overflow(self):
         s = Series([1, 2, 2**63, 2**63], dtype=np.uint64)

From de22c62cbe7acff1851441b25d7dd77992fd05cd Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sat, 2 Feb 2019 00:52:39 +0100
Subject: [PATCH 05/15] Add inverse to test_timedelta64_dtype_array_returned

---
 pandas/tests/test_algos.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index b3c942c6e3783..296aa65a67461 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -28,6 +28,20 @@
 from pandas.util.testing import assert_almost_equal
 
 
+def assert_series_or_index_or_array_or_categorical_equal(left, right):
+    if isinstance(left, Series):
+        tm.assert_series_equal(left, right)
+    elif isinstance(left, Index):
+        tm.assert_index_equal(left, right)
+    elif isinstance(left, np.ndarray):
+        tm.assert_numpy_array_equal(left, right)
+    elif isinstance(left, Categorical):
+        tm.assert_categorical_equal(left, right)
+    else:
+        # will fail
+        assert isinstance(left, (Series, Index, np.ndarray, Categorical))
+
+
 class TestMatch(object):
 
     def test_ints(self):
@@ -395,6 +409,16 @@ def test_timedelta64_dtype_array_returned(self, box):
         result = algos.unique(obj)
         tm.assert_numpy_array_equal(result, expected)
 
+        # reuse result as expected outcome of return_inverse case
+        expected_uniques = result.copy()
+
+        result_uniques, result_inverse = algos.unique(obj, return_inverse=True)
+        tm.assert_numpy_array_equal(result_uniques, expected_uniques)
+
+        # reconstruction can only work if inverse is correct
+        reconstr = box(result_uniques[result_inverse])
+        assert_series_or_index_or_array_or_categorical_equal(reconstr, obj)
+
     def test_uint64_overflow(self):
         s = Series([1, 2, 2**63, 2**63], dtype=np.uint64)
         exp = np.array([1, 2, 2**63], dtype=np.uint64)

From 2a4f4a7f82d98b93ec4aae761c2f2591409ed089 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sat, 2 Feb 2019 00:56:21 +0100
Subject: [PATCH 06/15] Add inverse to test_nan_in_object_array

---
 pandas/tests/test_algos.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 296aa65a67461..b37e8f004b1b5 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -430,6 +430,11 @@ def test_nan_in_object_array(self):
         expected = np.array(['a', np.nan, 'c'], dtype=object)
         tm.assert_numpy_array_equal(result, expected)
 
+        result_uniques, result_inverse = pd.unique(duplicated_items,
+                                                   return_inverse=True)
+        expected_inverse = np.array([0, 1, 2, 2], dtype='int64')
+        tm.assert_numpy_array_equal(result_inverse, expected_inverse)
+
     def test_categorical(self):
 
         # we are expecting to return in the order

From 39a2e6418549b7dbfa362a4f0e62bf2c0abe3e8c Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sat, 2 Feb 2019 01:19:42 +0100
Subject: [PATCH 07/15] Parametrize test_categorical

---
 pandas/tests/test_algos.py | 59 ++++++++++++--------------------------
 1 file changed, 19 insertions(+), 40 deletions(-)

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index b37e8f004b1b5..129616c81b73e 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -435,49 +435,28 @@ def test_nan_in_object_array(self):
         expected_inverse = np.array([0, 1, 2, 2], dtype='int64')
         tm.assert_numpy_array_equal(result_inverse, expected_inverse)
 
-    def test_categorical(self):
-
-        # we are expecting to return in the order
-        # of appearance
-        expected = Categorical(list('bac'), categories=list('bac'))
-
-        # we are expecting to return in the order
-        # of the categories
-        expected_o = Categorical(
-            list('bac'), categories=list('abc'), ordered=True)
+    @pytest.mark.parametrize('ordered', [True, False])
+    @pytest.mark.parametrize('box', [lambda x: x, Series, Index],
+                             ids=['Categorical', 'Series', 'Index'])
+    @pytest.mark.parametrize('method', [lambda x, **kwargs: x.unique(**kwargs),
+                                        pd.unique],
+                             ids=['classmethod', 'toplevel'])
+    def test_categorical(self, method, box, ordered):
+
+        categories = list('abc') if ordered else list('bac')
+        expected = Categorical(list('bac'), categories=categories,
+                               ordered=ordered)
+
+        # Index.unique always returns Index
+        # pd.unique(Index) stays Index (only) for Categorical
+        expected = box(expected) if box == Index else expected
 
         # GH 15939
-        c = Categorical(list('baabc'))
-        result = c.unique()
-        tm.assert_categorical_equal(result, expected)
-
-        result = algos.unique(c)
-        tm.assert_categorical_equal(result, expected)
-
-        c = Categorical(list('baabc'), ordered=True)
-        result = c.unique()
-        tm.assert_categorical_equal(result, expected_o)
-
-        result = algos.unique(c)
-        tm.assert_categorical_equal(result, expected_o)
+        c = box(Categorical(list('baabc'), categories=categories,
+                            ordered=ordered))
+        result = method(c)
 
-        # Series of categorical dtype
-        s = Series(Categorical(list('baabc')), name='foo')
-        result = s.unique()
-        tm.assert_categorical_equal(result, expected)
-
-        result = pd.unique(s)
-        tm.assert_categorical_equal(result, expected)
-
-        # CI -> return CI
-        ci = CategoricalIndex(Categorical(list('baabc'),
-                                          categories=list('bac')))
-        expected = CategoricalIndex(expected)
-        result = ci.unique()
-        tm.assert_index_equal(result, expected)
-
-        result = pd.unique(ci)
-        tm.assert_index_equal(result, expected)
+        assert_series_or_index_or_array_or_categorical_equal(result, expected)
 
     def test_datetime64tz_aware(self):
         # GH 15939

From bfc310fbbe67818ee7a546499e87912a7700b211 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sat, 2 Feb 2019 01:21:56 +0100
Subject: [PATCH 08/15] Add inverse to test_categorical

---
 pandas/tests/test_algos.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 129616c81b73e..c0e08b52f0de3 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -458,6 +458,20 @@ def test_categorical(self, method, box, ordered):
 
         assert_series_or_index_or_array_or_categorical_equal(result, expected)
 
+        if method == pd.unique:
+            # [Series/Index].unique do not yet support return_inverse=True
+
+            # reuse result as expected outcome of return_inverse case
+            expected_uniques = result.copy()
+            result_uniques, result_inverse = method(c, return_inverse=True)
+
+            assert_series_or_index_or_array_or_categorical_equal(
+                result_uniques, expected_uniques)
+
+            # reconstruction can only work if inverse is correct
+            reconstr = box(result_uniques[result_inverse])
+            assert_series_or_index_or_array_or_categorical_equal(reconstr, c)
+
     def test_datetime64tz_aware(self):
         # GH 15939
 

From af7d8f3b33d87f0d8b96060278fa7a9b48df46a3 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sat, 2 Feb 2019 01:23:22 +0100
Subject: [PATCH 09/15] Parametrize test_datetime64tz_aware

---
 pandas/tests/test_algos.py | 60 ++++++++++++--------------------------
 1 file changed, 18 insertions(+), 42 deletions(-)

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index c0e08b52f0de3..ae3cc055ddbc9 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -472,36 +472,26 @@ def test_categorical(self, method, box, ordered):
             reconstr = box(result_uniques[result_inverse])
             assert_series_or_index_or_array_or_categorical_equal(reconstr, c)
 
-    def test_datetime64tz_aware(self):
+    @pytest.mark.parametrize('box', [Series, Index])
+    @pytest.mark.parametrize('method', [lambda x, **kwargs: x.unique(**kwargs),
+                                        pd.unique],
+                             ids=['classmethod', 'toplevel'])
+    def test_datetime64tz_aware(self, method, box):
         # GH 15939
 
-        result = Series(
-            Index([Timestamp('20160101', tz='US/Eastern'),
-                   Timestamp('20160101', tz='US/Eastern')])).unique()
-        expected = DatetimeArray._from_sequence(np.array([
-            Timestamp('2016-01-01 00:00:00-0500', tz="US/Eastern")
-        ]))
-        tm.assert_extension_array_equal(result, expected)
-
-        result = Index([Timestamp('20160101', tz='US/Eastern'),
-                        Timestamp('20160101', tz='US/Eastern')]).unique()
-        expected = DatetimeIndex(['2016-01-01 00:00:00'],
-                                 dtype='datetime64[ns, US/Eastern]', freq=None)
-        tm.assert_index_equal(result, expected)
-
-        result = pd.unique(
-            Series(Index([Timestamp('20160101', tz='US/Eastern'),
-                          Timestamp('20160101', tz='US/Eastern')])))
-        expected = DatetimeArray._from_sequence(np.array([
-            Timestamp('2016-01-01', tz="US/Eastern"),
-        ]))
-        tm.assert_extension_array_equal(result, expected)
-
-        result = pd.unique(Index([Timestamp('20160101', tz='US/Eastern'),
-                                  Timestamp('20160101', tz='US/Eastern')]))
-        expected = DatetimeIndex(['2016-01-01 00:00:00'],
-                                 dtype='datetime64[ns, US/Eastern]', freq=None)
-        tm.assert_index_equal(result, expected)
+        ts = Timestamp('20160101', tz='US/Eastern')
+        obj = box([ts, ts])
+
+        if box == Series:
+            expected = DatetimeArray._from_sequence(np.array([
+                Timestamp('2016-01-01 00:00:00-0500', tz="US/Eastern")
+            ]))
+        else:  # Index
+            expected = DatetimeIndex(['2016-01-01 00:00:00'],
+                                     dtype='datetime64[ns, US/Eastern]',
+                                     freq=None)
+        result = method(obj)
+        assert_series_or_index_or_array_or_categorical_equal(result, expected)
 
     def test_order_of_appearance(self):
         # 9346
@@ -515,20 +505,6 @@ def test_order_of_appearance(self):
         tm.assert_numpy_array_equal(result,
                                     np.array([2, 1], dtype='int64'))
 
-        result = pd.unique(Series([Timestamp('20160101'),
-                                   Timestamp('20160101')]))
-        expected = np.array(['2016-01-01T00:00:00.000000000'],
-                            dtype='datetime64[ns]')
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = pd.unique(Index(
-            [Timestamp('20160101', tz='US/Eastern'),
-             Timestamp('20160101', tz='US/Eastern')]))
-        expected = DatetimeIndex(['2016-01-01 00:00:00'],
-                                 dtype='datetime64[ns, US/Eastern]',
-                                 freq=None)
-        tm.assert_index_equal(result, expected)
-
         result = pd.unique(list('aabc'))
         expected = np.array(['a', 'b', 'c'], dtype=object)
         tm.assert_numpy_array_equal(result, expected)

From c089f1fb6fb5036cb2e93c375fff6a1f66ebaf88 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sat, 2 Feb 2019 01:23:55 +0100
Subject: [PATCH 10/15] Add inverse to test_datetime64tz_aware

---
 pandas/tests/test_algos.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index ae3cc055ddbc9..559c40f709e86 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -493,6 +493,20 @@ def test_datetime64tz_aware(self, method, box):
         result = method(obj)
         assert_series_or_index_or_array_or_categorical_equal(result, expected)
 
+        if method == pd.unique:
+            # [Series/Index].unique do not yet support return_inverse=True
+
+            # reuse result as expected outcome of return_inverse case
+            expected_uniques = result.copy()
+            result_uniques, result_inverse = method(obj, return_inverse=True)
+
+            assert_series_or_index_or_array_or_categorical_equal(
+                result_uniques, expected_uniques)
+
+            # reconstruction can only work if inverse is correct
+            reconstr = box(result_uniques[result_inverse])
+            assert_series_or_index_or_array_or_categorical_equal(reconstr, obj)
+
     def test_order_of_appearance(self):
         # 9346
         # light testing of guarantee of order of appearance

From 78d4758f20618632ee693937735b1fe7abbe8937 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sat, 2 Feb 2019 01:24:18 +0100
Subject: [PATCH 11/15] Remove test case that is covered elsewhere

---
 pandas/tests/test_algos.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 559c40f709e86..3fbba9236992d 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -523,10 +523,6 @@ def test_order_of_appearance(self):
         expected = np.array(['a', 'b', 'c'], dtype=object)
         tm.assert_numpy_array_equal(result, expected)
 
-        result = pd.unique(Series(Categorical(list('aabc'))))
-        expected = Categorical(list('abc'))
-        tm.assert_categorical_equal(result, expected)
-
     @pytest.mark.parametrize("arg ,expected", [
         (('1', '1', '2'), np.array(['1', '2'], dtype=object)),
         (('foo',), np.array(['foo'], dtype=object))

From 006d7ad8403e7e856846280e5de05b6380d2397b Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sat, 2 Feb 2019 01:50:55 +0100
Subject: [PATCH 12/15] Fix DatetimeArray-case in test_datetime64tz_aware;
 create TODO

---
 pandas/tests/test_algos.py | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 3fbba9236992d..971ea26cbe03d 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -37,9 +37,12 @@ def assert_series_or_index_or_array_or_categorical_equal(left, right):
         tm.assert_numpy_array_equal(left, right)
     elif isinstance(left, Categorical):
         tm.assert_categorical_equal(left, right)
+    elif isinstance(left, DatetimeArray):
+        tm.assert_extension_array_equal(left, right)
     else:
         # will fail
-        assert isinstance(left, (Series, Index, np.ndarray, Categorical))
+        assert isinstance(left, (Series, Index, np.ndarray,
+                                 Categorical, DatetimeArray))
 
 
 class TestMatch(object):
@@ -493,19 +496,19 @@ def test_datetime64tz_aware(self, method, box):
         result = method(obj)
         assert_series_or_index_or_array_or_categorical_equal(result, expected)
 
-        if method == pd.unique:
-            # [Series/Index].unique do not yet support return_inverse=True
-
-            # reuse result as expected outcome of return_inverse case
-            expected_uniques = result.copy()
-            result_uniques, result_inverse = method(obj, return_inverse=True)
-
-            assert_series_or_index_or_array_or_categorical_equal(
-                result_uniques, expected_uniques)
-
-            # reconstruction can only work if inverse is correct
-            reconstr = box(result_uniques[result_inverse])
-            assert_series_or_index_or_array_or_categorical_equal(reconstr, obj)
+        # TODO: add support for return_inverse to DatetimeArray/DatetimeIndex,
+        # as well as [[Series/Index].unique
+
+        # # reuse result as expected outcome of return_inverse case
+        # expected_uniques = result.copy()
+        # result_uniques, result_inverse = method(obj, return_inverse=True)
+        #
+        # assert_series_or_index_or_array_or_categorical_equal(
+        #     result_uniques, expected_uniques)
+        #
+        # # reconstruction can only work if inverse is correct
+        # reconstr = box(result_uniques[result_inverse])
+        # assert_series_or_index_or_array_or_categorical_equal(reconstr, obj)
 
     def test_order_of_appearance(self):
         # 9346

From b310b487ceb54413ad291ab5ef6d95a469d81d72 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Fri, 11 Oct 2019 13:51:27 +0200
Subject: [PATCH 13/15] blackify conflict files

---
 pandas/core/algorithms.py  |  595 +++++++++--------
 pandas/tests/test_algos.py | 1280 +++++++++++++++++++++++-------------
 2 files changed, 1146 insertions(+), 729 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 4e2a3e512a929..a1bd66b58d642 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -14,16 +14,37 @@
 from pandas.util._decorators import Appender, Substitution, deprecate_kwarg
 
 from pandas.core.dtypes.cast import (
-    construct_1d_object_array_from_listlike, maybe_promote)
+    construct_1d_object_array_from_listlike,
+    maybe_promote,
+)
 from pandas.core.dtypes.common import (
-    ensure_float64, ensure_int64, ensure_object, ensure_platform_int,
-    ensure_uint64, is_array_like, is_bool_dtype, is_categorical_dtype,
-    is_complex_dtype, is_datetime64_any_dtype, is_datetime64tz_dtype,
-    is_datetimelike, is_extension_array_dtype, is_float_dtype,
-    is_integer_dtype, is_interval_dtype, is_list_like, is_numeric_dtype,
-    is_object_dtype, is_period_dtype, is_scalar, is_signed_integer_dtype,
-    is_sparse, is_timedelta64_dtype, is_unsigned_integer_dtype,
-    needs_i8_conversion)
+    ensure_float64,
+    ensure_int64,
+    ensure_object,
+    ensure_platform_int,
+    ensure_uint64,
+    is_array_like,
+    is_bool_dtype,
+    is_categorical_dtype,
+    is_complex_dtype,
+    is_datetime64_any_dtype,
+    is_datetime64tz_dtype,
+    is_datetimelike,
+    is_extension_array_dtype,
+    is_float_dtype,
+    is_integer_dtype,
+    is_interval_dtype,
+    is_list_like,
+    is_numeric_dtype,
+    is_object_dtype,
+    is_period_dtype,
+    is_scalar,
+    is_signed_integer_dtype,
+    is_sparse,
+    is_timedelta64_dtype,
+    is_unsigned_integer_dtype,
+    needs_i8_conversion,
+)
 from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
 from pandas.core.dtypes.missing import isna, na_value_for_dtype
 
@@ -63,20 +84,19 @@ def _ensure_data(values, dtype=None):
     # we check some simple dtypes first
     try:
         if is_object_dtype(dtype):
-            return ensure_object(np.asarray(values)), 'object', 'object'
+            return ensure_object(np.asarray(values)), "object", "object"
         if is_bool_dtype(values) or is_bool_dtype(dtype):
             # we are actually coercing to uint64
             # until our algos support uint8 directly (see TODO)
-            return np.asarray(values).astype('uint64'), 'bool', 'uint64'
+            return np.asarray(values).astype("uint64"), "bool", "uint64"
         elif is_signed_integer_dtype(values) or is_signed_integer_dtype(dtype):
-            return ensure_int64(values), 'int64', 'int64'
-        elif (is_unsigned_integer_dtype(values) or
-              is_unsigned_integer_dtype(dtype)):
-            return ensure_uint64(values), 'uint64', 'uint64'
+            return ensure_int64(values), "int64", "int64"
+        elif is_unsigned_integer_dtype(values) or is_unsigned_integer_dtype(dtype):
+            return ensure_uint64(values), "uint64", "uint64"
         elif is_float_dtype(values) or is_float_dtype(dtype):
-            return ensure_float64(values), 'float64', 'float64'
+            return ensure_float64(values), "float64", "float64"
         elif is_object_dtype(values) and dtype is None:
-            return ensure_object(np.asarray(values)), 'object', 'object'
+            return ensure_object(np.asarray(values)), "object", "object"
         elif is_complex_dtype(values) or is_complex_dtype(dtype):
 
             # ignore the fact that we are casting to float
@@ -84,49 +104,55 @@ def _ensure_data(values, dtype=None):
             with catch_warnings():
                 simplefilter("ignore", np.ComplexWarning)
                 values = ensure_float64(values)
-            return values, 'float64', 'float64'
+            return values, "float64", "float64"
 
     except (TypeError, ValueError, OverflowError):
         # if we are trying to coerce to a dtype
         # and it is incompat this will fall thru to here
-        return ensure_object(values), 'object', 'object'
+        return ensure_object(values), "object", "object"
 
     # datetimelike
-    if (needs_i8_conversion(values) or
-            is_period_dtype(dtype) or
-            is_datetime64_any_dtype(dtype) or
-            is_timedelta64_dtype(dtype)):
+    if (
+        needs_i8_conversion(values)
+        or is_period_dtype(dtype)
+        or is_datetime64_any_dtype(dtype)
+        or is_timedelta64_dtype(dtype)
+    ):
         if is_period_dtype(values) or is_period_dtype(dtype):
             from pandas import PeriodIndex
+
             values = PeriodIndex(values)
             dtype = values.dtype
         elif is_timedelta64_dtype(values) or is_timedelta64_dtype(dtype):
             from pandas import TimedeltaIndex
+
             values = TimedeltaIndex(values)
             dtype = values.dtype
         else:
             # Datetime
             from pandas import DatetimeIndex
+
             values = DatetimeIndex(values)
             dtype = values.dtype
 
-        return values.asi8, dtype, 'int64'
+        return values.asi8, dtype, "int64"
 
-    elif (is_categorical_dtype(values) and
-          (is_categorical_dtype(dtype) or dtype is None)):
-        values = getattr(values, 'values', values)
+    elif is_categorical_dtype(values) and (
+        is_categorical_dtype(dtype) or dtype is None
+    ):
+        values = getattr(values, "values", values)
         values = values.codes
-        dtype = 'category'
+        dtype = "category"
 
         # we are actually coercing to int64
         # until our algos support int* directly (not all do)
         values = ensure_int64(values)
 
-        return values, dtype, 'int64'
+        return values, dtype, "int64"
 
     # we have failed, return object
     values = np.asarray(values, dtype=np.object)
-    return ensure_object(values), 'object', 'object'
+    return ensure_object(values), "object", "object"
 
 
 def _reconstruct_data(values, dtype, original):
@@ -144,6 +170,7 @@ def _reconstruct_data(values, dtype, original):
     Index for extension types, otherwise ndarray casted to dtype
     """
     from pandas import Index
+
     if is_extension_array_dtype(dtype):
         values = dtype.construct_array_type()._from_sequence(values)
     elif is_datetime64tz_dtype(dtype) or is_period_dtype(dtype):
@@ -166,7 +193,7 @@ def _ensure_arraylike(values):
     """
     if not is_array_like(values):
         inferred = lib.infer_dtype(values, skipna=False)
-        if inferred in ['mixed', 'string', 'unicode']:
+        if inferred in ["mixed", "string", "unicode"]:
             if isinstance(values, tuple):
                 values = list(values)
             values = construct_1d_object_array_from_listlike(values)
@@ -176,11 +203,11 @@ def _ensure_arraylike(values):
 
 
 _hashtables = {
-    'float64': (htable.Float64HashTable, htable.Float64Vector),
-    'uint64': (htable.UInt64HashTable, htable.UInt64Vector),
-    'int64': (htable.Int64HashTable, htable.Int64Vector),
-    'string': (htable.StringHashTable, htable.ObjectVector),
-    'object': (htable.PyObjectHashTable, htable.ObjectVector)
+    "float64": (htable.Float64HashTable, htable.Float64Vector),
+    "uint64": (htable.UInt64HashTable, htable.UInt64Vector),
+    "int64": (htable.Int64HashTable, htable.Int64Vector),
+    "string": (htable.StringHashTable, htable.ObjectVector),
+    "object": (htable.PyObjectHashTable, htable.ObjectVector),
 }
 
 
@@ -200,15 +227,15 @@ def _get_hashtable_algo(values):
     """
     values, dtype, ndtype = _ensure_data(values)
 
-    if ndtype == 'object':
+    if ndtype == "object":
 
         # it's cheaper to use a String Hash Table than Object; we infer
         # including nulls because that is the only difference between
         # StringHashTable and ObjectHashtable
-        if lib.infer_dtype(values, skipna=False) in ['string']:
-            ndtype = 'string'
+        if lib.infer_dtype(values, skipna=False) in ["string"]:
+            ndtype = "string"
         else:
-            ndtype = 'object'
+            ndtype = "object"
 
     htable, table = _hashtables[ndtype]
     return (htable, table, values, dtype, ndtype)
@@ -220,15 +247,15 @@ def _get_data_algo(values, func_map):
         values = values._values_for_rank()
 
     values, dtype, ndtype = _ensure_data(values)
-    if ndtype == 'object':
+    if ndtype == "object":
 
         # it's cheaper to use a String Hash Table than Object; we infer
         # including nulls because that is the only difference between
         # StringHashTable and ObjectHashtable
-        if lib.infer_dtype(values, skipna=False) in ['string']:
-            ndtype = 'string'
+        if lib.infer_dtype(values, skipna=False) in ["string"]:
+            ndtype = "string"
 
-    f = func_map.get(ndtype, func_map['object'])
+    f = func_map.get(ndtype, func_map["object"])
 
     return f, values
 
@@ -237,6 +264,7 @@ def _get_data_algo(values, func_map):
 # top-level algos #
 # --------------- #
 
+
 def match(to_match, values, na_sentinel=-1):
     """
     Compute locations of to_match into values
@@ -269,6 +297,7 @@ def match(to_match, values, na_sentinel=-1):
         # replace but return a numpy array
         # use a Series because it handles dtype conversions properly
         from pandas import Series
+
         result = Series(result.ravel()).replace(-1, na_sentinel)
         result = result.values.reshape(result.shape)
 
@@ -367,7 +396,7 @@ def unique(values, return_inverse=False):
             try:
                 # make sure that we're not calling from an Index/Series
                 # container, as these do not support return_inverse yet
-                ea_val = getattr(values, 'array', values)
+                ea_val = getattr(values, "array", values)
                 result, inverse = ea_val.unique(return_inverse=return_inverse)
 
                 if is_categorical_dtype(values) and isinstance(values, Index):
@@ -375,9 +404,11 @@ def unique(values, return_inverse=False):
                     result = Index(result)
                 return result, inverse
             except TypeError:
-                msg = ('The Extension Array class for type {dtype} does not '
-                       'yet support the unique-method with '
-                       '"return_inverse=True".'.format(dtype=type(values)))
+                msg = (
+                    "The Extension Array class for type {dtype} does not "
+                    "yet support the unique-method with "
+                    '"return_inverse=True".'.format(dtype=type(values))
+                )
                 raise NotImplementedError(msg)
         return values.unique()
 
@@ -415,13 +446,19 @@ def isin(comps, values):
     """
 
     if not is_list_like(comps):
-        raise TypeError("only list-like objects are allowed to be passed"
-                        " to isin(), you passed a [{comps_type}]"
-                        .format(comps_type=type(comps).__name__))
+        raise TypeError(
+            "only list-like objects are allowed to be passed"
+            " to isin(), you passed a [{comps_type}]".format(
+                comps_type=type(comps).__name__
+            )
+        )
     if not is_list_like(values):
-        raise TypeError("only list-like objects are allowed to be passed"
-                        " to isin(), you passed a [{values_type}]"
-                        .format(values_type=type(values).__name__))
+        raise TypeError(
+            "only list-like objects are allowed to be passed"
+            " to isin(), you passed a [{values_type}]".format(
+                values_type=type(values).__name__
+            )
+        )
 
     if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)):
         values = construct_1d_object_array_from_listlike(list(values))
@@ -445,8 +482,8 @@ def isin(comps, values):
         f = lambda x, y: np.in1d(x, y)
     elif is_integer_dtype(comps):
         try:
-            values = values.astype('int64', copy=False)
-            comps = comps.astype('int64', copy=False)
+            values = values.astype("int64", copy=False)
+            comps = comps.astype("int64", copy=False)
             f = lambda x, y: htable.ismember_int64(x, y)
         except (TypeError, ValueError, OverflowError):
             values = values.astype(object)
@@ -454,8 +491,8 @@ def isin(comps, values):
 
     elif is_float_dtype(comps):
         try:
-            values = values.astype('float64', copy=False)
-            comps = comps.astype('float64', copy=False)
+            values = values.astype("float64", copy=False)
+            comps = comps.astype("float64", copy=False)
             f = lambda x, y: htable.ismember_float64(x, y)
         except (TypeError, ValueError):
             values = values.astype(object)
@@ -464,8 +501,7 @@ def isin(comps, values):
     return f(comps, values)
 
 
-def _factorize_array(values, na_sentinel=-1, size_hint=None,
-                     na_value=None):
+def _factorize_array(values, na_sentinel=-1, size_hint=None, na_value=None):
     """Factorize an array-like to labels and uniques.
 
     This doesn't do any coercion of types or unboxing before factorization.
@@ -489,14 +525,17 @@ def _factorize_array(values, na_sentinel=-1, size_hint=None,
     (hash_klass, _), values = _get_data_algo(values, _hashtables)
 
     table = hash_klass(size_hint or len(values))
-    uniques, labels = table.factorize(values, na_sentinel=na_sentinel,
-                                      na_value=na_value)
+    uniques, labels = table.factorize(
+        values, na_sentinel=na_sentinel, na_value=na_value
+    )
 
     labels = ensure_platform_int(labels)
     return labels, uniques
 
 
-_shared_docs['factorize'] = """
+_shared_docs[
+    "factorize"
+] = """
     Encode the object as an enumerated type or categorical variable.
 
     This method is useful for obtaining a numeric representation of an
@@ -590,29 +629,37 @@ def _factorize_array(values, na_sentinel=-1, size_hint=None,
 
 
 @Substitution(
-    values=dedent("""\
+    values=dedent(
+        """\
     values : sequence
         A 1-D sequence. Sequences that aren't pandas objects are
         coerced to ndarrays before factorization.
-    """),
-    order=dedent("""\
+    """
+    ),
+    order=dedent(
+        """\
     order
         .. deprecated:: 0.23.0
 
            This parameter has no effect and is deprecated.
-    """),
-    sort=dedent("""\
+    """
+    ),
+    sort=dedent(
+        """\
     sort : bool, default False
         Sort `uniques` and shuffle `labels` to maintain the
         relationship.
-    """),
-    size_hint=dedent("""\
+    """
+    ),
+    size_hint=dedent(
+        """\
     size_hint : int, optional
         Hint to the hashtable sizer.
-    """),
+    """
+    ),
 )
-@Appender(_shared_docs['factorize'])
-@deprecate_kwarg(old_arg_name='order', new_arg_name=None)
+@Appender(_shared_docs["factorize"])
+@deprecate_kwarg(old_arg_name="order", new_arg_name=None)
 def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
     # Implementation notes: This method is responsible for 3 things
     # 1.) coercing data to array-like (ndarray, Index, extension array)
@@ -627,26 +674,28 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
     original = values
 
     if is_extension_array_dtype(values):
-        values = getattr(values, '_values', values)
+        values = getattr(values, "_values", values)
         labels, uniques = values.factorize(na_sentinel=na_sentinel)
         dtype = original.dtype
     else:
         values, dtype, _ = _ensure_data(values)
 
-        if (is_datetime64_any_dtype(original) or
-                is_timedelta64_dtype(original) or
-                is_period_dtype(original)):
+        if (
+            is_datetime64_any_dtype(original)
+            or is_timedelta64_dtype(original)
+            or is_period_dtype(original)
+        ):
             na_value = na_value_for_dtype(original.dtype)
         else:
             na_value = None
 
-        labels, uniques = _factorize_array(values,
-                                           na_sentinel=na_sentinel,
-                                           size_hint=size_hint,
-                                           na_value=na_value)
+        labels, uniques = _factorize_array(
+            values, na_sentinel=na_sentinel, size_hint=size_hint, na_value=na_value
+        )
 
     if sort and len(uniques) > 0:
         from pandas.core.sorting import safe_sort
+
         try:
             order = uniques.argsort()
             order2 = order.argsort()
@@ -654,9 +703,9 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
             uniques = uniques.take(order)
         except TypeError:
             # Mixed types, where uniques.argsort fails.
-            uniques, labels = safe_sort(uniques, labels,
-                                        na_sentinel=na_sentinel,
-                                        assume_unique=True)
+            uniques, labels = safe_sort(
+                uniques, labels, na_sentinel=na_sentinel, assume_unique=True
+            )
 
     uniques = _reconstruct_data(uniques, dtype, original)
 
@@ -665,13 +714,15 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
         uniques = original._shallow_copy(uniques, name=None)
     elif isinstance(original, ABCSeries):
         from pandas import Index
+
         uniques = Index(uniques)
 
     return labels, uniques
 
 
-def value_counts(values, sort=True, ascending=False, normalize=False,
-                 bins=None, dropna=True):
+def value_counts(
+    values, sort=True, ascending=False, normalize=False, bins=None, dropna=True
+):
     """
     Compute a histogram of the counts of non-null values.
 
@@ -696,11 +747,13 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
 
     """
     from pandas.core.series import Series, Index
-    name = getattr(values, 'name', None)
+
+    name = getattr(values, "name", None)
 
     if bins is not None:
         try:
             from pandas.core.reshape.tile import cut
+
             values = Series(values)
             ii = cut(values, bins, include_lowest=True)
         except TypeError:
@@ -709,7 +762,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
         # count, remove nulls (from the index), and but the bins
         result = ii.value_counts(dropna=dropna)
         result = result[result.index.notna()]
-        result.index = result.index.astype('interval')
+        result.index = result.index.astype("interval")
         result = result.sort_index()
 
         # if we are dropna and we have NO values
@@ -787,7 +840,7 @@ def _value_counts_arraylike(values, dropna):
     return keys, counts
 
 
-def duplicated(values, keep='first'):
+def duplicated(values, keep="first"):
     """
     Return boolean ndarray denoting duplicate values.
 
@@ -859,8 +912,7 @@ def mode(values, dropna=True):
     return Series(result)
 
 
-def rank(values, axis=0, method='average', na_option='keep',
-         ascending=True, pct=False):
+def rank(values, axis=0, method="average", na_option="keep", ascending=True, pct=False):
     """
     Rank the values along a given axis.
 
@@ -886,12 +938,23 @@ def rank(values, axis=0, method='average', na_option='keep',
     """
     if values.ndim == 1:
         f, values = _get_data_algo(values, _rank1d_functions)
-        ranks = f(values, ties_method=method, ascending=ascending,
-                  na_option=na_option, pct=pct)
+        ranks = f(
+            values,
+            ties_method=method,
+            ascending=ascending,
+            na_option=na_option,
+            pct=pct,
+        )
     elif values.ndim == 2:
         f, values = _get_data_algo(values, _rank2d_functions)
-        ranks = f(values, axis=axis, ties_method=method,
-                  ascending=ascending, na_option=na_option, pct=pct)
+        ranks = f(
+            values,
+            axis=axis,
+            ties_method=method,
+            ascending=ascending,
+            na_option=na_option,
+            pct=pct,
+        )
     else:
         raise TypeError("Array with ndim > 2 are not supported.")
 
@@ -962,10 +1025,12 @@ def checked_add_with_arr(arr, b, arr_mask=None, b_mask=None):
     elif not mask2.any():
         to_raise = ((np.iinfo(np.int64).max - b2 < arr) & not_nan).any()
     else:
-        to_raise = (((np.iinfo(np.int64).max -
-                      b2[mask1] < arr[mask1]) & not_nan[mask1]).any() or
-                    ((np.iinfo(np.int64).min -
-                      b2[mask2] > arr[mask2]) & not_nan[mask2]).any())
+        to_raise = (
+            ((np.iinfo(np.int64).max - b2[mask1] < arr[mask1]) & not_nan[mask1]).any()
+            or (
+                (np.iinfo(np.int64).min - b2[mask2] > arr[mask2]) & not_nan[mask2]
+            ).any()
+        )
 
     if to_raise:
         raise OverflowError("Overflow in int64 addition")
@@ -973,21 +1038,21 @@ def checked_add_with_arr(arr, b, arr_mask=None, b_mask=None):
 
 
 _rank1d_functions = {
-    'float64': algos.rank_1d_float64,
-    'int64': algos.rank_1d_int64,
-    'uint64': algos.rank_1d_uint64,
-    'object': algos.rank_1d_object
+    "float64": algos.rank_1d_float64,
+    "int64": algos.rank_1d_int64,
+    "uint64": algos.rank_1d_uint64,
+    "object": algos.rank_1d_object,
 }
 
 _rank2d_functions = {
-    'float64': algos.rank_2d_float64,
-    'int64': algos.rank_2d_int64,
-    'uint64': algos.rank_2d_uint64,
-    'object': algos.rank_2d_object
+    "float64": algos.rank_2d_float64,
+    "int64": algos.rank_2d_int64,
+    "uint64": algos.rank_2d_uint64,
+    "object": algos.rank_2d_object,
 }
 
 
-def quantile(x, q, interpolation_method='fraction'):
+def quantile(x, q, interpolation_method="fraction"):
     """
     Compute sample quantile or quantiles of the input array. For example, q=0.5
     computes the median.
@@ -1047,16 +1112,17 @@ def _get_score(at):
         if idx % 1 == 0:
             score = values[int(idx)]
         else:
-            if interpolation_method == 'fraction':
-                score = _interpolate(values[int(idx)], values[int(idx) + 1],
-                                     idx % 1)
-            elif interpolation_method == 'lower':
+            if interpolation_method == "fraction":
+                score = _interpolate(values[int(idx)], values[int(idx) + 1], idx % 1)
+            elif interpolation_method == "lower":
                 score = values[np.floor(idx)]
-            elif interpolation_method == 'higher':
+            elif interpolation_method == "higher":
                 score = values[np.ceil(idx)]
             else:
-                raise ValueError("interpolation_method can only be 'fraction' "
-                                 ", 'lower' or 'higher'")
+                raise ValueError(
+                    "interpolation_method can only be 'fraction' "
+                    ", 'lower' or 'higher'"
+                )
 
         return score
 
@@ -1071,21 +1137,21 @@ def _get_score(at):
 # select n        #
 # --------------- #
 
-class SelectN(object):
 
+class SelectN(object):
     def __init__(self, obj, n, keep):
         self.obj = obj
         self.n = n
         self.keep = keep
 
-        if self.keep not in ('first', 'last', 'all'):
+        if self.keep not in ("first", "last", "all"):
             raise ValueError('keep must be either "first", "last" or "all"')
 
     def nlargest(self):
-        return self.compute('nlargest')
+        return self.compute("nlargest")
 
     def nsmallest(self):
-        return self.compute('nsmallest')
+        return self.compute("nsmallest")
 
     @staticmethod
     def is_valid_dtype_n_method(dtype):
@@ -1093,8 +1159,9 @@ def is_valid_dtype_n_method(dtype):
         Helper function to determine if dtype is valid for
         nsmallest/nlargest methods
         """
-        return ((is_numeric_dtype(dtype) and not is_complex_dtype(dtype)) or
-                needs_i8_conversion(dtype))
+        return (
+            is_numeric_dtype(dtype) and not is_complex_dtype(dtype)
+        ) or needs_i8_conversion(dtype)
 
 
 class SelectNSeries(SelectN):
@@ -1117,9 +1184,10 @@ def compute(self, method):
         n = self.n
         dtype = self.obj.dtype
         if not self.is_valid_dtype_n_method(dtype):
-            raise TypeError("Cannot use method '{method}' with "
-                            "dtype {dtype}".format(method=method,
-                                                   dtype=dtype))
+            raise TypeError(
+                "Cannot use method '{method}' with "
+                "dtype {dtype}".format(method=method, dtype=dtype)
+            )
 
         if n <= 0:
             return self.obj[[]]
@@ -1129,20 +1197,20 @@ def compute(self, method):
         # slow method
         if n >= len(self.obj):
 
-            reverse_it = (self.keep == 'last' or method == 'nlargest')
-            ascending = method == 'nsmallest'
+            reverse_it = self.keep == "last" or method == "nlargest"
+            ascending = method == "nsmallest"
             slc = np.s_[::-1] if reverse_it else np.s_[:]
             return dropped[slc].sort_values(ascending=ascending).head(n)
 
         # fast method
         arr, pandas_dtype, _ = _ensure_data(dropped.values)
-        if method == 'nlargest':
+        if method == "nlargest":
             arr = -arr
             if is_integer_dtype(pandas_dtype):
                 # GH 21426: ensure reverse ordering at boundaries
                 arr -= 1
 
-        if self.keep == 'last':
+        if self.keep == "last":
             arr = arr[::-1]
 
         narr = len(arr)
@@ -1150,12 +1218,12 @@ def compute(self, method):
 
         kth_val = algos.kth_smallest(arr.copy(), n - 1)
         ns, = np.nonzero(arr <= kth_val)
-        inds = ns[arr[ns].argsort(kind='mergesort')]
+        inds = ns[arr[ns].argsort(kind="mergesort")]
 
-        if self.keep != 'all':
+        if self.keep != "all":
             inds = inds[:n]
 
-        if self.keep == 'last':
+        if self.keep == "last":
             # reverse indices
             inds = narr - 1 - inds
 
@@ -1188,6 +1256,7 @@ def __init__(self, obj, n, keep, columns):
     def compute(self, method):
 
         from pandas import Int64Index
+
         n = self.n
         frame = self.obj
         columns = self.columns
@@ -1195,16 +1264,18 @@ def compute(self, method):
         for column in columns:
             dtype = frame[column].dtype
             if not self.is_valid_dtype_n_method(dtype):
-                raise TypeError((
-                    "Column {column!r} has dtype {dtype}, cannot use method "
-                    "{method!r} with this dtype"
-                ).format(column=column, dtype=dtype, method=method))
+                raise TypeError(
+                    (
+                        "Column {column!r} has dtype {dtype}, cannot use method "
+                        "{method!r} with this dtype"
+                    ).format(column=column, dtype=dtype, method=method)
+                )
 
         def get_indexer(current_indexer, other_indexer):
             """Helper function to concat `current_indexer` and `other_indexer`
             depending on `method`
             """
-            if method == 'nsmallest':
+            if method == "nsmallest":
                 return current_indexer.append(other_indexer)
             else:
                 return other_indexer.append(current_indexer)
@@ -1226,8 +1297,8 @@ def get_indexer(current_indexer, other_indexer):
             series = cur_frame[column]
             is_last_column = len(columns) - 1 == i
             values = getattr(series, method)(
-                cur_n,
-                keep=self.keep if is_last_column else 'all')
+                cur_n, keep=self.keep if is_last_column else "all"
+            )
 
             if is_last_column or len(values) <= cur_n:
                 indexer = get_indexer(indexer, values.index)
@@ -1260,12 +1331,9 @@ def get_indexer(current_indexer, other_indexer):
         if len(columns) == 1:
             return frame
 
-        ascending = method == 'nsmallest'
+        ascending = method == "nsmallest"
 
-        return frame.sort_values(
-            columns,
-            ascending=ascending,
-            kind='mergesort')
+        return frame.sort_values(columns, ascending=ascending, kind="mergesort")
 
 
 # ------- ## ---- #
@@ -1334,110 +1402,103 @@ def _take_nd_object(arr, indexer, out, axis, fill_value, mask_info):
 
 
 _take_1d_dict = {
-    ('int8', 'int8'): algos.take_1d_int8_int8,
-    ('int8', 'int32'): algos.take_1d_int8_int32,
-    ('int8', 'int64'): algos.take_1d_int8_int64,
-    ('int8', 'float64'): algos.take_1d_int8_float64,
-    ('int16', 'int16'): algos.take_1d_int16_int16,
-    ('int16', 'int32'): algos.take_1d_int16_int32,
-    ('int16', 'int64'): algos.take_1d_int16_int64,
-    ('int16', 'float64'): algos.take_1d_int16_float64,
-    ('int32', 'int32'): algos.take_1d_int32_int32,
-    ('int32', 'int64'): algos.take_1d_int32_int64,
-    ('int32', 'float64'): algos.take_1d_int32_float64,
-    ('int64', 'int64'): algos.take_1d_int64_int64,
-    ('int64', 'float64'): algos.take_1d_int64_float64,
-    ('float32', 'float32'): algos.take_1d_float32_float32,
-    ('float32', 'float64'): algos.take_1d_float32_float64,
-    ('float64', 'float64'): algos.take_1d_float64_float64,
-    ('object', 'object'): algos.take_1d_object_object,
-    ('bool', 'bool'): _view_wrapper(algos.take_1d_bool_bool, np.uint8,
-                                    np.uint8),
-    ('bool', 'object'): _view_wrapper(algos.take_1d_bool_object, np.uint8,
-                                      None),
-    ('datetime64[ns]', 'datetime64[ns]'): _view_wrapper(
-        algos.take_1d_int64_int64, np.int64, np.int64, np.int64)
+    ("int8", "int8"): algos.take_1d_int8_int8,
+    ("int8", "int32"): algos.take_1d_int8_int32,
+    ("int8", "int64"): algos.take_1d_int8_int64,
+    ("int8", "float64"): algos.take_1d_int8_float64,
+    ("int16", "int16"): algos.take_1d_int16_int16,
+    ("int16", "int32"): algos.take_1d_int16_int32,
+    ("int16", "int64"): algos.take_1d_int16_int64,
+    ("int16", "float64"): algos.take_1d_int16_float64,
+    ("int32", "int32"): algos.take_1d_int32_int32,
+    ("int32", "int64"): algos.take_1d_int32_int64,
+    ("int32", "float64"): algos.take_1d_int32_float64,
+    ("int64", "int64"): algos.take_1d_int64_int64,
+    ("int64", "float64"): algos.take_1d_int64_float64,
+    ("float32", "float32"): algos.take_1d_float32_float32,
+    ("float32", "float64"): algos.take_1d_float32_float64,
+    ("float64", "float64"): algos.take_1d_float64_float64,
+    ("object", "object"): algos.take_1d_object_object,
+    ("bool", "bool"): _view_wrapper(algos.take_1d_bool_bool, np.uint8, np.uint8),
+    ("bool", "object"): _view_wrapper(algos.take_1d_bool_object, np.uint8, None),
+    ("datetime64[ns]", "datetime64[ns]"): _view_wrapper(
+        algos.take_1d_int64_int64, np.int64, np.int64, np.int64
+    ),
 }
 
 _take_2d_axis0_dict = {
-    ('int8', 'int8'): algos.take_2d_axis0_int8_int8,
-    ('int8', 'int32'): algos.take_2d_axis0_int8_int32,
-    ('int8', 'int64'): algos.take_2d_axis0_int8_int64,
-    ('int8', 'float64'): algos.take_2d_axis0_int8_float64,
-    ('int16', 'int16'): algos.take_2d_axis0_int16_int16,
-    ('int16', 'int32'): algos.take_2d_axis0_int16_int32,
-    ('int16', 'int64'): algos.take_2d_axis0_int16_int64,
-    ('int16', 'float64'): algos.take_2d_axis0_int16_float64,
-    ('int32', 'int32'): algos.take_2d_axis0_int32_int32,
-    ('int32', 'int64'): algos.take_2d_axis0_int32_int64,
-    ('int32', 'float64'): algos.take_2d_axis0_int32_float64,
-    ('int64', 'int64'): algos.take_2d_axis0_int64_int64,
-    ('int64', 'float64'): algos.take_2d_axis0_int64_float64,
-    ('float32', 'float32'): algos.take_2d_axis0_float32_float32,
-    ('float32', 'float64'): algos.take_2d_axis0_float32_float64,
-    ('float64', 'float64'): algos.take_2d_axis0_float64_float64,
-    ('object', 'object'): algos.take_2d_axis0_object_object,
-    ('bool', 'bool'): _view_wrapper(algos.take_2d_axis0_bool_bool, np.uint8,
-                                    np.uint8),
-    ('bool', 'object'): _view_wrapper(algos.take_2d_axis0_bool_object,
-                                      np.uint8, None),
-    ('datetime64[ns]', 'datetime64[ns]'):
-    _view_wrapper(algos.take_2d_axis0_int64_int64, np.int64, np.int64,
-                  fill_wrap=np.int64)
+    ("int8", "int8"): algos.take_2d_axis0_int8_int8,
+    ("int8", "int32"): algos.take_2d_axis0_int8_int32,
+    ("int8", "int64"): algos.take_2d_axis0_int8_int64,
+    ("int8", "float64"): algos.take_2d_axis0_int8_float64,
+    ("int16", "int16"): algos.take_2d_axis0_int16_int16,
+    ("int16", "int32"): algos.take_2d_axis0_int16_int32,
+    ("int16", "int64"): algos.take_2d_axis0_int16_int64,
+    ("int16", "float64"): algos.take_2d_axis0_int16_float64,
+    ("int32", "int32"): algos.take_2d_axis0_int32_int32,
+    ("int32", "int64"): algos.take_2d_axis0_int32_int64,
+    ("int32", "float64"): algos.take_2d_axis0_int32_float64,
+    ("int64", "int64"): algos.take_2d_axis0_int64_int64,
+    ("int64", "float64"): algos.take_2d_axis0_int64_float64,
+    ("float32", "float32"): algos.take_2d_axis0_float32_float32,
+    ("float32", "float64"): algos.take_2d_axis0_float32_float64,
+    ("float64", "float64"): algos.take_2d_axis0_float64_float64,
+    ("object", "object"): algos.take_2d_axis0_object_object,
+    ("bool", "bool"): _view_wrapper(algos.take_2d_axis0_bool_bool, np.uint8, np.uint8),
+    ("bool", "object"): _view_wrapper(algos.take_2d_axis0_bool_object, np.uint8, None),
+    ("datetime64[ns]", "datetime64[ns]"): _view_wrapper(
+        algos.take_2d_axis0_int64_int64, np.int64, np.int64, fill_wrap=np.int64
+    ),
 }
 
 _take_2d_axis1_dict = {
-    ('int8', 'int8'): algos.take_2d_axis1_int8_int8,
-    ('int8', 'int32'): algos.take_2d_axis1_int8_int32,
-    ('int8', 'int64'): algos.take_2d_axis1_int8_int64,
-    ('int8', 'float64'): algos.take_2d_axis1_int8_float64,
-    ('int16', 'int16'): algos.take_2d_axis1_int16_int16,
-    ('int16', 'int32'): algos.take_2d_axis1_int16_int32,
-    ('int16', 'int64'): algos.take_2d_axis1_int16_int64,
-    ('int16', 'float64'): algos.take_2d_axis1_int16_float64,
-    ('int32', 'int32'): algos.take_2d_axis1_int32_int32,
-    ('int32', 'int64'): algos.take_2d_axis1_int32_int64,
-    ('int32', 'float64'): algos.take_2d_axis1_int32_float64,
-    ('int64', 'int64'): algos.take_2d_axis1_int64_int64,
-    ('int64', 'float64'): algos.take_2d_axis1_int64_float64,
-    ('float32', 'float32'): algos.take_2d_axis1_float32_float32,
-    ('float32', 'float64'): algos.take_2d_axis1_float32_float64,
-    ('float64', 'float64'): algos.take_2d_axis1_float64_float64,
-    ('object', 'object'): algos.take_2d_axis1_object_object,
-    ('bool', 'bool'): _view_wrapper(algos.take_2d_axis1_bool_bool, np.uint8,
-                                    np.uint8),
-    ('bool', 'object'): _view_wrapper(algos.take_2d_axis1_bool_object,
-                                      np.uint8, None),
-    ('datetime64[ns]', 'datetime64[ns]'):
-    _view_wrapper(algos.take_2d_axis1_int64_int64, np.int64, np.int64,
-                  fill_wrap=np.int64)
+    ("int8", "int8"): algos.take_2d_axis1_int8_int8,
+    ("int8", "int32"): algos.take_2d_axis1_int8_int32,
+    ("int8", "int64"): algos.take_2d_axis1_int8_int64,
+    ("int8", "float64"): algos.take_2d_axis1_int8_float64,
+    ("int16", "int16"): algos.take_2d_axis1_int16_int16,
+    ("int16", "int32"): algos.take_2d_axis1_int16_int32,
+    ("int16", "int64"): algos.take_2d_axis1_int16_int64,
+    ("int16", "float64"): algos.take_2d_axis1_int16_float64,
+    ("int32", "int32"): algos.take_2d_axis1_int32_int32,
+    ("int32", "int64"): algos.take_2d_axis1_int32_int64,
+    ("int32", "float64"): algos.take_2d_axis1_int32_float64,
+    ("int64", "int64"): algos.take_2d_axis1_int64_int64,
+    ("int64", "float64"): algos.take_2d_axis1_int64_float64,
+    ("float32", "float32"): algos.take_2d_axis1_float32_float32,
+    ("float32", "float64"): algos.take_2d_axis1_float32_float64,
+    ("float64", "float64"): algos.take_2d_axis1_float64_float64,
+    ("object", "object"): algos.take_2d_axis1_object_object,
+    ("bool", "bool"): _view_wrapper(algos.take_2d_axis1_bool_bool, np.uint8, np.uint8),
+    ("bool", "object"): _view_wrapper(algos.take_2d_axis1_bool_object, np.uint8, None),
+    ("datetime64[ns]", "datetime64[ns]"): _view_wrapper(
+        algos.take_2d_axis1_int64_int64, np.int64, np.int64, fill_wrap=np.int64
+    ),
 }
 
 _take_2d_multi_dict = {
-    ('int8', 'int8'): algos.take_2d_multi_int8_int8,
-    ('int8', 'int32'): algos.take_2d_multi_int8_int32,
-    ('int8', 'int64'): algos.take_2d_multi_int8_int64,
-    ('int8', 'float64'): algos.take_2d_multi_int8_float64,
-    ('int16', 'int16'): algos.take_2d_multi_int16_int16,
-    ('int16', 'int32'): algos.take_2d_multi_int16_int32,
-    ('int16', 'int64'): algos.take_2d_multi_int16_int64,
-    ('int16', 'float64'): algos.take_2d_multi_int16_float64,
-    ('int32', 'int32'): algos.take_2d_multi_int32_int32,
-    ('int32', 'int64'): algos.take_2d_multi_int32_int64,
-    ('int32', 'float64'): algos.take_2d_multi_int32_float64,
-    ('int64', 'int64'): algos.take_2d_multi_int64_int64,
-    ('int64', 'float64'): algos.take_2d_multi_int64_float64,
-    ('float32', 'float32'): algos.take_2d_multi_float32_float32,
-    ('float32', 'float64'): algos.take_2d_multi_float32_float64,
-    ('float64', 'float64'): algos.take_2d_multi_float64_float64,
-    ('object', 'object'): algos.take_2d_multi_object_object,
-    ('bool', 'bool'): _view_wrapper(algos.take_2d_multi_bool_bool, np.uint8,
-                                    np.uint8),
-    ('bool', 'object'): _view_wrapper(algos.take_2d_multi_bool_object,
-                                      np.uint8, None),
-    ('datetime64[ns]', 'datetime64[ns]'):
-    _view_wrapper(algos.take_2d_multi_int64_int64, np.int64, np.int64,
-                  fill_wrap=np.int64)
+    ("int8", "int8"): algos.take_2d_multi_int8_int8,
+    ("int8", "int32"): algos.take_2d_multi_int8_int32,
+    ("int8", "int64"): algos.take_2d_multi_int8_int64,
+    ("int8", "float64"): algos.take_2d_multi_int8_float64,
+    ("int16", "int16"): algos.take_2d_multi_int16_int16,
+    ("int16", "int32"): algos.take_2d_multi_int16_int32,
+    ("int16", "int64"): algos.take_2d_multi_int16_int64,
+    ("int16", "float64"): algos.take_2d_multi_int16_float64,
+    ("int32", "int32"): algos.take_2d_multi_int32_int32,
+    ("int32", "int64"): algos.take_2d_multi_int32_int64,
+    ("int32", "float64"): algos.take_2d_multi_int32_float64,
+    ("int64", "int64"): algos.take_2d_multi_int64_int64,
+    ("int64", "float64"): algos.take_2d_multi_int64_float64,
+    ("float32", "float32"): algos.take_2d_multi_float32_float32,
+    ("float32", "float64"): algos.take_2d_multi_float32_float64,
+    ("float64", "float64"): algos.take_2d_multi_float64_float64,
+    ("object", "object"): algos.take_2d_multi_object_object,
+    ("bool", "bool"): _view_wrapper(algos.take_2d_multi_bool_bool, np.uint8, np.uint8),
+    ("bool", "object"): _view_wrapper(algos.take_2d_multi_bool_object, np.uint8, None),
+    ("datetime64[ns]", "datetime64[ns]"): _view_wrapper(
+        algos.take_2d_multi_int64_int64, np.int64, np.int64, fill_wrap=np.int64
+    ),
 }
 
 
@@ -1468,8 +1529,9 @@ def _get_take_nd_function(ndim, arr_dtype, out_dtype, axis=0, mask_info=None):
 
     def func(arr, indexer, out, fill_value=np.nan):
         indexer = ensure_int64(indexer)
-        _take_nd_object(arr, indexer, out, axis=axis, fill_value=fill_value,
-                        mask_info=mask_info)
+        _take_nd_object(
+            arr, indexer, out, axis=axis, fill_value=fill_value, mask_info=mask_info
+        )
 
     return func
 
@@ -1560,16 +1622,18 @@ def take(arr, indices, axis=0, allow_fill=False, fill_value=None):
     if allow_fill:
         # Pandas style, -1 means NA
         validate_indices(indices, len(arr))
-        result = take_1d(arr, indices, axis=axis, allow_fill=True,
-                         fill_value=fill_value)
+        result = take_1d(
+            arr, indices, axis=axis, allow_fill=True, fill_value=fill_value
+        )
     else:
         # NumPy style
         result = arr.take(indices, axis=axis)
     return result
 
 
-def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
-            allow_fill=True):
+def take_nd(
+    arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, allow_fill=True
+):
     """
     Specialized Cython take which sets NaN values in one pass
 
@@ -1644,7 +1708,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
                     mask_info = mask, needs_masking
                 if needs_masking:
                     if out is not None and out.dtype != dtype:
-                        raise TypeError('Incompatible type for fill_value')
+                        raise TypeError("Incompatible type for fill_value")
                 else:
                     # if not, then depromote, set fill_value to dummy
                     # (it won't be used but we don't want the cython code
@@ -1673,12 +1737,13 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
             # for dataframes initialized directly from 2-d ndarrays
             # (s.t. df.values is c-contiguous and df._data.blocks[0] is its
             # f-contiguous transpose)
-            out = np.empty(out_shape, dtype=dtype, order='F')
+            out = np.empty(out_shape, dtype=dtype, order="F")
         else:
             out = np.empty(out_shape, dtype=dtype)
 
-    func = _get_take_nd_function(arr.ndim, arr.dtype, out.dtype, axis=axis,
-                                 mask_info=mask_info)
+    func = _get_take_nd_function(
+        arr.ndim, arr.dtype, out.dtype, axis=axis, mask_info=mask_info
+    )
     func(arr, indexer, out, fill_value)
 
     if flip_order:
@@ -1689,8 +1754,9 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
 take_1d = take_nd
 
 
-def take_2d_multi(arr, indexer, out=None, fill_value=np.nan, mask_info=None,
-                  allow_fill=True):
+def take_2d_multi(
+    arr, indexer, out=None, fill_value=np.nan, mask_info=None, allow_fill=True
+):
     """
     Specialized Cython take which sets NaN values in one pass
     """
@@ -1729,7 +1795,7 @@ def take_2d_multi(arr, indexer, out=None, fill_value=np.nan, mask_info=None,
                     mask_info = (row_mask, col_mask), (row_needs, col_needs)
                 if row_needs or col_needs:
                     if out is not None and out.dtype != dtype:
-                        raise TypeError('Incompatible type for fill_value')
+                        raise TypeError("Incompatible type for fill_value")
                 else:
                     # if not, then depromote, set fill_value to dummy
                     # (it won't be used but we don't want the cython code
@@ -1750,8 +1816,9 @@ def take_2d_multi(arr, indexer, out=None, fill_value=np.nan, mask_info=None,
     if func is None:
 
         def func(arr, indexer, out, fill_value=np.nan):
-            _take_2d_multi_object(arr, indexer, out, fill_value=fill_value,
-                                  mask_info=mask_info)
+            _take_2d_multi_object(
+                arr, indexer, out, fill_value=fill_value, mask_info=mask_info
+            )
 
     func(arr, indexer, out=out, fill_value=fill_value)
     return out
@@ -1762,12 +1829,12 @@ def func(arr, indexer, out, fill_value=np.nan):
 # ---- #
 
 _diff_special = {
-    'float64': algos.diff_2d_float64,
-    'float32': algos.diff_2d_float32,
-    'int64': algos.diff_2d_int64,
-    'int32': algos.diff_2d_int32,
-    'int16': algos.diff_2d_int16,
-    'int8': algos.diff_2d_int8,
+    "float64": algos.diff_2d_float64,
+    "float32": algos.diff_2d_float32,
+    "int64": algos.diff_2d_int64,
+    "int32": algos.diff_2d_int32,
+    "int16": algos.diff_2d_int16,
+    "int8": algos.diff_2d_int8,
 }
 
 
@@ -1797,7 +1864,7 @@ def diff(arr, n, axis=0):
     is_timedelta = False
     if needs_i8_conversion(arr):
         dtype = np.float64
-        arr = arr.view('i8')
+        arr = arr.view("i8")
         na = iNaT
         is_timedelta = True
 
@@ -1847,7 +1914,11 @@ def diff(arr, n, axis=0):
 
     if is_timedelta:
         from pandas import TimedeltaIndex
-        out_arr = TimedeltaIndex(out_arr.ravel().astype('int64')).asi8.reshape(
-            out_arr.shape).astype('timedelta64[ns]')
+
+        out_arr = (
+            TimedeltaIndex(out_arr.ravel().astype("int64"))
+            .asi8.reshape(out_arr.shape)
+            .astype("timedelta64[ns]")
+        )
 
     return out_arr
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 971ea26cbe03d..6fe35cb87b020 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -9,8 +9,7 @@
 from numpy.random import RandomState
 import pytest
 
-from pandas._libs import (
-    algos as libalgos, groupby as libgroupby, hashtable as ht)
+from pandas._libs import algos as libalgos, groupby as libgroupby, hashtable as ht
 from pandas.compat import lrange, range
 from pandas.compat.numpy import np_array_datetime64_compat
 import pandas.util._test_decorators as td
@@ -19,8 +18,15 @@
 
 import pandas as pd
 from pandas import (
-    Categorical, CategoricalIndex, DatetimeIndex, Index, IntervalIndex, Series,
-    Timestamp, compat)
+    Categorical,
+    CategoricalIndex,
+    DatetimeIndex,
+    Index,
+    IntervalIndex,
+    Series,
+    Timestamp,
+    compat,
+)
 import pandas.core.algorithms as algos
 from pandas.core.arrays import DatetimeArray
 import pandas.core.common as com
@@ -41,12 +47,10 @@ def assert_series_or_index_or_array_or_categorical_equal(left, right):
         tm.assert_extension_array_equal(left, right)
     else:
         # will fail
-        assert isinstance(left, (Series, Index, np.ndarray,
-                                 Categorical, DatetimeArray))
+        assert isinstance(left, (Series, Index, np.ndarray, Categorical, DatetimeArray))
 
 
 class TestMatch(object):
-
     def test_ints(self):
         values = np.array([0, 2, 1])
         to_match = np.array([0, 1, 2, 2, 0, 1, 3, 0])
@@ -69,8 +73,8 @@ def test_ints(self):
         tm.assert_series_equal(result, expected)
 
     def test_strings(self):
-        values = ['foo', 'bar', 'baz']
-        to_match = ['bar', 'foo', 'qux', 'foo', 'bar', 'baz', 'qux']
+        values = ["foo", "bar", "baz"]
+        to_match = ["bar", "foo", "qux", "foo", "bar", "baz", "qux"]
 
         result = algos.match(to_match, values)
         expected = np.array([1, 0, -1, 0, 1, 2, -1], dtype=np.int64)
@@ -82,19 +86,17 @@ def test_strings(self):
 
 
 class TestFactorize(object):
-
     def test_basic(self):
 
-        labels, uniques = algos.factorize(['a', 'b', 'b', 'a', 'a', 'c', 'c',
-                                           'c'])
-        tm.assert_numpy_array_equal(
-            uniques, np.array(['a', 'b', 'c'], dtype=object))
+        labels, uniques = algos.factorize(["a", "b", "b", "a", "a", "c", "c", "c"])
+        tm.assert_numpy_array_equal(uniques, np.array(["a", "b", "c"], dtype=object))
 
-        labels, uniques = algos.factorize(['a', 'b', 'b', 'a',
-                                           'a', 'c', 'c', 'c'], sort=True)
+        labels, uniques = algos.factorize(
+            ["a", "b", "b", "a", "a", "c", "c", "c"], sort=True
+        )
         exp = np.array([0, 1, 1, 0, 0, 2, 2, 2], dtype=np.intp)
         tm.assert_numpy_array_equal(labels, exp)
-        exp = np.array(['a', 'b', 'c'], dtype=object)
+        exp = np.array(["a", "b", "c"], dtype=object)
         tm.assert_numpy_array_equal(uniques, exp)
 
         labels, uniques = algos.factorize(list(reversed(range(5))))
@@ -110,41 +112,40 @@ def test_basic(self):
         exp = np.array([0, 1, 2, 3, 4], dtype=np.int64)
         tm.assert_numpy_array_equal(uniques, exp)
 
-        labels, uniques = algos.factorize(list(reversed(np.arange(5.))))
+        labels, uniques = algos.factorize(list(reversed(np.arange(5.0))))
         exp = np.array([0, 1, 2, 3, 4], dtype=np.intp)
         tm.assert_numpy_array_equal(labels, exp)
-        exp = np.array([4., 3., 2., 1., 0.], dtype=np.float64)
+        exp = np.array([4.0, 3.0, 2.0, 1.0, 0.0], dtype=np.float64)
         tm.assert_numpy_array_equal(uniques, exp)
 
-        labels, uniques = algos.factorize(list(reversed(np.arange(5.))),
-                                          sort=True)
+        labels, uniques = algos.factorize(list(reversed(np.arange(5.0))), sort=True)
         exp = np.array([4, 3, 2, 1, 0], dtype=np.intp)
         tm.assert_numpy_array_equal(labels, exp)
-        exp = np.array([0., 1., 2., 3., 4.], dtype=np.float64)
+        exp = np.array([0.0, 1.0, 2.0, 3.0, 4.0], dtype=np.float64)
         tm.assert_numpy_array_equal(uniques, exp)
 
     def test_mixed(self):
 
         # doc example reshaping.rst
-        x = Series(['A', 'A', np.nan, 'B', 3.14, np.inf])
+        x = Series(["A", "A", np.nan, "B", 3.14, np.inf])
         labels, uniques = algos.factorize(x)
 
         exp = np.array([0, 0, -1, 1, 2, 3], dtype=np.intp)
         tm.assert_numpy_array_equal(labels, exp)
-        exp = Index(['A', 'B', 3.14, np.inf])
+        exp = Index(["A", "B", 3.14, np.inf])
         tm.assert_index_equal(uniques, exp)
 
         labels, uniques = algos.factorize(x, sort=True)
         exp = np.array([2, 2, -1, 3, 0, 1], dtype=np.intp)
         tm.assert_numpy_array_equal(labels, exp)
-        exp = Index([3.14, np.inf, 'A', 'B'])
+        exp = Index([3.14, np.inf, "A", "B"])
         tm.assert_index_equal(uniques, exp)
 
     def test_datelike(self):
 
         # M8
-        v1 = Timestamp('20130101 09:00:00.00004')
-        v2 = Timestamp('20130101')
+        v1 = Timestamp("20130101 09:00:00.00004")
+        v2 = Timestamp("20130101")
         x = Series([v1, v1, v1, v2, v2, v1])
         labels, uniques = algos.factorize(x)
 
@@ -160,8 +161,8 @@ def test_datelike(self):
         tm.assert_index_equal(uniques, exp)
 
         # period
-        v1 = pd.Period('201302', freq='M')
-        v2 = pd.Period('201303', freq='M')
+        v1 = pd.Period("201302", freq="M")
+        v2 = pd.Period("201303", freq="M")
         x = Series([v1, v1, v1, v2, v2, v1])
 
         # periods are not 'sorted' as they are converted back into an index
@@ -176,8 +177,8 @@ def test_datelike(self):
         tm.assert_index_equal(uniques, pd.PeriodIndex([v1, v2]))
 
         # GH 5986
-        v1 = pd.to_timedelta('1 day 1 min')
-        v2 = pd.to_timedelta('1 day')
+        v1 = pd.to_timedelta("1 day 1 min")
+        v2 = pd.to_timedelta("1 day")
         x = Series([v1, v2, v1, v1, v2, v2, v1])
         labels, uniques = algos.factorize(x)
         exp = np.array([0, 1, 0, 0, 1, 1, 0], dtype=np.intp)
@@ -193,52 +194,48 @@ def test_factorize_nan(self):
         # nan should map to na_sentinel, not reverse_indexer[na_sentinel]
         # rizer.factorize should not raise an exception if na_sentinel indexes
         # outside of reverse_indexer
-        key = np.array([1, 2, 1, np.nan], dtype='O')
+        key = np.array([1, 2, 1, np.nan], dtype="O")
         rizer = ht.Factorizer(len(key))
         for na_sentinel in (-1, 20):
             ids = rizer.factorize(key, sort=True, na_sentinel=na_sentinel)
-            expected = np.array([0, 1, 0, na_sentinel], dtype='int32')
+            expected = np.array([0, 1, 0, na_sentinel], dtype="int32")
             assert len(set(key)) == len(set(expected))
-            tm.assert_numpy_array_equal(pd.isna(key),
-                                        expected == na_sentinel)
+            tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel)
 
         # nan still maps to na_sentinel when sort=False
-        key = np.array([0, np.nan, 1], dtype='O')
+        key = np.array([0, np.nan, 1], dtype="O")
         na_sentinel = -1
 
         # TODO(wesm): unused?
         ids = rizer.factorize(key, sort=False, na_sentinel=na_sentinel)  # noqa
 
-        expected = np.array([2, -1, 0], dtype='int32')
+        expected = np.array([2, -1, 0], dtype="int32")
         assert len(set(key)) == len(set(expected))
         tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel)
 
-    @pytest.mark.parametrize("data,expected_label,expected_level", [
-        (
-            [(1, 1), (1, 2), (0, 0), (1, 2), 'nonsense'],
-            [0, 1, 2, 1, 3],
-            [(1, 1), (1, 2), (0, 0), 'nonsense']
-        ),
-        (
-            [(1, 1), (1, 2), (0, 0), (1, 2), (1, 2, 3)],
-            [0, 1, 2, 1, 3],
-            [(1, 1), (1, 2), (0, 0), (1, 2, 3)]
-        ),
-        (
-            [(1, 1), (1, 2), (0, 0), (1, 2)],
-            [0, 1, 2, 1],
-            [(1, 1), (1, 2), (0, 0)]
-        )
-    ])
+    @pytest.mark.parametrize(
+        "data,expected_label,expected_level",
+        [
+            (
+                [(1, 1), (1, 2), (0, 0), (1, 2), "nonsense"],
+                [0, 1, 2, 1, 3],
+                [(1, 1), (1, 2), (0, 0), "nonsense"],
+            ),
+            (
+                [(1, 1), (1, 2), (0, 0), (1, 2), (1, 2, 3)],
+                [0, 1, 2, 1, 3],
+                [(1, 1), (1, 2), (0, 0), (1, 2, 3)],
+            ),
+            ([(1, 1), (1, 2), (0, 0), (1, 2)], [0, 1, 2, 1], [(1, 1), (1, 2), (0, 0)]),
+        ],
+    )
     def test_factorize_tuple_list(self, data, expected_label, expected_level):
         # GH9454
         result = pd.factorize(data)
 
-        tm.assert_numpy_array_equal(result[0],
-                                    np.array(expected_label, dtype=np.intp))
+        tm.assert_numpy_array_equal(result[0], np.array(expected_label, dtype=np.intp))
 
-        expected_level_array = com.asarray_tuplesafe(expected_level,
-                                                     dtype=object)
+        expected_level_array = com.asarray_tuplesafe(expected_level, dtype=object)
         tm.assert_numpy_array_equal(result[1], expected_level_array)
 
     def test_complex_sorting(self):
@@ -258,42 +255,40 @@ def test_float64_factorize(self, writable):
         tm.assert_numpy_array_equal(uniques, exp_uniques)
 
     def test_uint64_factorize(self, writable):
-        data = np.array([2**64 - 1, 1, 2**64 - 1], dtype=np.uint64)
+        data = np.array([2 ** 64 - 1, 1, 2 ** 64 - 1], dtype=np.uint64)
         data.setflags(write=writable)
         exp_labels = np.array([0, 1, 0], dtype=np.intp)
-        exp_uniques = np.array([2**64 - 1, 1], dtype=np.uint64)
+        exp_uniques = np.array([2 ** 64 - 1, 1], dtype=np.uint64)
 
         labels, uniques = algos.factorize(data)
         tm.assert_numpy_array_equal(labels, exp_labels)
         tm.assert_numpy_array_equal(uniques, exp_uniques)
 
     def test_int64_factorize(self, writable):
-        data = np.array([2**63 - 1, -2**63, 2**63 - 1], dtype=np.int64)
+        data = np.array([2 ** 63 - 1, -2 ** 63, 2 ** 63 - 1], dtype=np.int64)
         data.setflags(write=writable)
         exp_labels = np.array([0, 1, 0], dtype=np.intp)
-        exp_uniques = np.array([2**63 - 1, -2**63], dtype=np.int64)
+        exp_uniques = np.array([2 ** 63 - 1, -2 ** 63], dtype=np.int64)
 
         labels, uniques = algos.factorize(data)
         tm.assert_numpy_array_equal(labels, exp_labels)
         tm.assert_numpy_array_equal(uniques, exp_uniques)
 
     def test_string_factorize(self, writable):
-        data = np.array(['a', 'c', 'a', 'b', 'c'],
-                        dtype=object)
+        data = np.array(["a", "c", "a", "b", "c"], dtype=object)
         data.setflags(write=writable)
         exp_labels = np.array([0, 1, 0, 2, 1], dtype=np.intp)
-        exp_uniques = np.array(['a', 'c', 'b'], dtype=object)
+        exp_uniques = np.array(["a", "c", "b"], dtype=object)
 
         labels, uniques = algos.factorize(data)
         tm.assert_numpy_array_equal(labels, exp_labels)
         tm.assert_numpy_array_equal(uniques, exp_uniques)
 
     def test_object_factorize(self, writable):
-        data = np.array(['a', 'c', None, np.nan, 'a', 'b', pd.NaT, 'c'],
-                        dtype=object)
+        data = np.array(["a", "c", None, np.nan, "a", "b", pd.NaT, "c"], dtype=object)
         data.setflags(write=writable)
         exp_labels = np.array([0, 1, -1, -1, 0, 2, -1, 1], dtype=np.intp)
-        exp_uniques = np.array(['a', 'c', 'b'], dtype=object)
+        exp_uniques = np.array(["a", "c", "b"], dtype=object)
 
         labels, uniques = algos.factorize(data)
         tm.assert_numpy_array_equal(labels, exp_labels)
@@ -302,17 +297,20 @@ def test_object_factorize(self, writable):
     def test_deprecate_order(self):
         # gh 19727 - check warning is raised for deprecated keyword, order.
         # Test not valid once order keyword is removed.
-        data = np.array([2**63, 1, 2**63], dtype=np.uint64)
+        data = np.array([2 ** 63, 1, 2 ** 63], dtype=np.uint64)
         with tm.assert_produces_warning(expected_warning=FutureWarning):
             algos.factorize(data, order=True)
         with tm.assert_produces_warning(False):
             algos.factorize(data)
 
-    @pytest.mark.parametrize('data', [
-        np.array([0, 1, 0], dtype='u8'),
-        np.array([-2**63, 1, -2**63], dtype='i8'),
-        np.array(['__nan__', 'foo', '__nan__'], dtype='object'),
-    ])
+    @pytest.mark.parametrize(
+        "data",
+        [
+            np.array([0, 1, 0], dtype="u8"),
+            np.array([-2 ** 63, 1, -2 ** 63], dtype="i8"),
+            np.array(["__nan__", "foo", "__nan__"], dtype="object"),
+        ],
+    )
     def test_parametrized_factorize_na_value_default(self, data):
         # arrays that include the NA default for that type, but isn't used.
         l, u = algos.factorize(data)
@@ -321,16 +319,18 @@ def test_parametrized_factorize_na_value_default(self, data):
         tm.assert_numpy_array_equal(l, expected_labels)
         tm.assert_numpy_array_equal(u, expected_uniques)
 
-    @pytest.mark.parametrize('data, na_value', [
-        (np.array([0, 1, 0, 2], dtype='u8'), 0),
-        (np.array([1, 0, 1, 2], dtype='u8'), 1),
-        (np.array([-2**63, 1, -2**63, 0], dtype='i8'), -2**63),
-        (np.array([1, -2**63, 1, 0], dtype='i8'), 1),
-        (np.array(['a', '', 'a', 'b'], dtype=object), 'a'),
-        (np.array([(), ('a', 1), (), ('a', 2)], dtype=object), ()),
-        (np.array([('a', 1), (), ('a', 1), ('a', 2)], dtype=object),
-         ('a', 1)),
-    ])
+    @pytest.mark.parametrize(
+        "data, na_value",
+        [
+            (np.array([0, 1, 0, 2], dtype="u8"), 0),
+            (np.array([1, 0, 1, 2], dtype="u8"), 1),
+            (np.array([-2 ** 63, 1, -2 ** 63, 0], dtype="i8"), -2 ** 63),
+            (np.array([1, -2 ** 63, 1, 0], dtype="i8"), 1),
+            (np.array(["a", "", "a", "b"], dtype=object), "a"),
+            (np.array([(), ("a", 1), (), ("a", 2)], dtype=object), ()),
+            (np.array([("a", 1), (), ("a", 1), ("a", 2)], dtype=object), ("a", 1)),
+        ],
+    )
     def test_parametrized_factorize_na_value(self, data, na_value):
         l, u = algos._factorize_array(data, na_value=na_value)
         expected_uniques = data[[1, 3]]
@@ -340,7 +340,6 @@ def test_parametrized_factorize_na_value(self, data, na_value):
 
 
 class TestUnique(object):
-
     def test_unique_all_dtypes(self, any_numpy_dtype):
         dtype = any_numpy_dtype
         arr = np.random.randint(0, 100, size=50).astype(dtype)
@@ -359,14 +358,15 @@ def test_unique_all_dtypes(self, any_numpy_dtype):
         tm.assert_numpy_array_equal(reconstr, arr, check_dtype=False)
 
     def test_object_refcount_bug(self):
-        lst = ['A', 'B', 'C', 'D', 'E']
+        lst = ["A", "B", "C", "D", "E"]
         for i in range(1000):
             len(algos.unique(lst))
 
     def test_on_index_object(self):
 
-        mindex = pd.MultiIndex.from_arrays([np.arange(5).repeat(5), np.tile(
-            np.arange(5), 5)])
+        mindex = pd.MultiIndex.from_arrays(
+            [np.arange(5).repeat(5), np.tile(np.arange(5), 5)]
+        )
         expected = mindex.values
         expected.sort()
 
@@ -380,13 +380,20 @@ def test_on_index_object(self):
     def test_datetime64_dtype_array_returned(self):
         # GH 9431
         expected = np_array_datetime64_compat(
-            ['2015-01-03T00:00:00.000000000+0000',
-             '2015-01-01T00:00:00.000000000+0000'],
-            dtype='M8[ns]')
+            [
+                "2015-01-03T00:00:00.000000000+0000",
+                "2015-01-01T00:00:00.000000000+0000",
+            ],
+            dtype="M8[ns]",
+        )
 
-        dt_index = pd.to_datetime(['2015-01-03T00:00:00.000000000',
-                                   '2015-01-01T00:00:00.000000000',
-                                   '2015-01-01T00:00:00.000000000'])
+        dt_index = pd.to_datetime(
+            [
+                "2015-01-03T00:00:00.000000000",
+                "2015-01-01T00:00:00.000000000",
+                "2015-01-01T00:00:00.000000000",
+            ]
+        )
         result = algos.unique(dt_index)
         tm.assert_numpy_array_equal(result, expected)
         assert result.dtype == expected.dtype
@@ -401,10 +408,10 @@ def test_datetime64_dtype_array_returned(self):
         tm.assert_numpy_array_equal(result, expected)
         assert result.dtype == expected.dtype
 
-    @pytest.mark.parametrize('box', [Index, Series, np.array])
+    @pytest.mark.parametrize("box", [Index, Series, np.array])
     def test_timedelta64_dtype_array_returned(self, box):
         # GH 9431
-        expected = np.array([31200, 45678, 10000], dtype='m8[ns]')
+        expected = np.array([31200, 45678, 10000], dtype="m8[ns]")
 
         td_index = pd.to_timedelta([31200, 45678, 31200, 10000, 45678])
         obj = box(td_index)
@@ -423,40 +430,42 @@ def test_timedelta64_dtype_array_returned(self, box):
         assert_series_or_index_or_array_or_categorical_equal(reconstr, obj)
 
     def test_uint64_overflow(self):
-        s = Series([1, 2, 2**63, 2**63], dtype=np.uint64)
-        exp = np.array([1, 2, 2**63], dtype=np.uint64)
+        s = Series([1, 2, 2 ** 63, 2 ** 63], dtype=np.uint64)
+        exp = np.array([1, 2, 2 ** 63], dtype=np.uint64)
         tm.assert_numpy_array_equal(algos.unique(s), exp)
 
     def test_nan_in_object_array(self):
-        duplicated_items = ['a', np.nan, 'c', 'c']
+        duplicated_items = ["a", np.nan, "c", "c"]
         result = pd.unique(duplicated_items)
-        expected = np.array(['a', np.nan, 'c'], dtype=object)
+        expected = np.array(["a", np.nan, "c"], dtype=object)
         tm.assert_numpy_array_equal(result, expected)
 
-        result_uniques, result_inverse = pd.unique(duplicated_items,
-                                                   return_inverse=True)
-        expected_inverse = np.array([0, 1, 2, 2], dtype='int64')
+        result_uniques, result_inverse = pd.unique(
+            duplicated_items, return_inverse=True
+        )
+        expected_inverse = np.array([0, 1, 2, 2], dtype="int64")
         tm.assert_numpy_array_equal(result_inverse, expected_inverse)
 
-    @pytest.mark.parametrize('ordered', [True, False])
-    @pytest.mark.parametrize('box', [lambda x: x, Series, Index],
-                             ids=['Categorical', 'Series', 'Index'])
-    @pytest.mark.parametrize('method', [lambda x, **kwargs: x.unique(**kwargs),
-                                        pd.unique],
-                             ids=['classmethod', 'toplevel'])
+    @pytest.mark.parametrize("ordered", [True, False])
+    @pytest.mark.parametrize(
+        "box", [lambda x: x, Series, Index], ids=["Categorical", "Series", "Index"]
+    )
+    @pytest.mark.parametrize(
+        "method",
+        [lambda x, **kwargs: x.unique(**kwargs), pd.unique],
+        ids=["classmethod", "toplevel"],
+    )
     def test_categorical(self, method, box, ordered):
 
-        categories = list('abc') if ordered else list('bac')
-        expected = Categorical(list('bac'), categories=categories,
-                               ordered=ordered)
+        categories = list("abc") if ordered else list("bac")
+        expected = Categorical(list("bac"), categories=categories, ordered=ordered)
 
         # Index.unique always returns Index
         # pd.unique(Index) stays Index (only) for Categorical
         expected = box(expected) if box == Index else expected
 
         # GH 15939
-        c = box(Categorical(list('baabc'), categories=categories,
-                            ordered=ordered))
+        c = box(Categorical(list("baabc"), categories=categories, ordered=ordered))
         result = method(c)
 
         assert_series_or_index_or_array_or_categorical_equal(result, expected)
@@ -469,30 +478,33 @@ def test_categorical(self, method, box, ordered):
             result_uniques, result_inverse = method(c, return_inverse=True)
 
             assert_series_or_index_or_array_or_categorical_equal(
-                result_uniques, expected_uniques)
+                result_uniques, expected_uniques
+            )
 
             # reconstruction can only work if inverse is correct
             reconstr = box(result_uniques[result_inverse])
             assert_series_or_index_or_array_or_categorical_equal(reconstr, c)
 
-    @pytest.mark.parametrize('box', [Series, Index])
-    @pytest.mark.parametrize('method', [lambda x, **kwargs: x.unique(**kwargs),
-                                        pd.unique],
-                             ids=['classmethod', 'toplevel'])
+    @pytest.mark.parametrize("box", [Series, Index])
+    @pytest.mark.parametrize(
+        "method",
+        [lambda x, **kwargs: x.unique(**kwargs), pd.unique],
+        ids=["classmethod", "toplevel"],
+    )
     def test_datetime64tz_aware(self, method, box):
         # GH 15939
 
-        ts = Timestamp('20160101', tz='US/Eastern')
+        ts = Timestamp("20160101", tz="US/Eastern")
         obj = box([ts, ts])
 
         if box == Series:
-            expected = DatetimeArray._from_sequence(np.array([
-                Timestamp('2016-01-01 00:00:00-0500', tz="US/Eastern")
-            ]))
+            expected = DatetimeArray._from_sequence(
+                np.array([Timestamp("2016-01-01 00:00:00-0500", tz="US/Eastern")])
+            )
         else:  # Index
-            expected = DatetimeIndex(['2016-01-01 00:00:00'],
-                                     dtype='datetime64[ns, US/Eastern]',
-                                     freq=None)
+            expected = DatetimeIndex(
+                ["2016-01-01 00:00:00"], dtype="datetime64[ns, US/Eastern]", freq=None
+            )
         result = method(obj)
         assert_series_or_index_or_array_or_categorical_equal(result, expected)
 
@@ -515,21 +527,22 @@ def test_order_of_appearance(self):
         # light testing of guarantee of order of appearance
         # these also are the doc-examples
         result = pd.unique(Series([2, 1, 3, 3]))
-        tm.assert_numpy_array_equal(result,
-                                    np.array([2, 1, 3], dtype='int64'))
+        tm.assert_numpy_array_equal(result, np.array([2, 1, 3], dtype="int64"))
 
         result = pd.unique(Series([2] + [1] * 5))
-        tm.assert_numpy_array_equal(result,
-                                    np.array([2, 1], dtype='int64'))
+        tm.assert_numpy_array_equal(result, np.array([2, 1], dtype="int64"))
 
-        result = pd.unique(list('aabc'))
-        expected = np.array(['a', 'b', 'c'], dtype=object)
+        result = pd.unique(list("aabc"))
+        expected = np.array(["a", "b", "c"], dtype=object)
         tm.assert_numpy_array_equal(result, expected)
 
-    @pytest.mark.parametrize("arg ,expected", [
-        (('1', '1', '2'), np.array(['1', '2'], dtype=object)),
-        (('foo',), np.array(['foo'], dtype=object))
-    ])
+    @pytest.mark.parametrize(
+        "arg ,expected",
+        [
+            (("1", "1", "2"), np.array(["1", "2"], dtype=object)),
+            (("foo",), np.array(["foo"], dtype=object)),
+        ],
+    )
     def test_tuple_with_strings(self, arg, expected):
         # see GH 17108
         result = pd.unique(arg)
@@ -537,9 +550,9 @@ def test_tuple_with_strings(self, arg, expected):
 
     def test_obj_none_preservation(self):
         # GH 20866
-        arr = np.array(['foo', None], dtype=object)
+        arr = np.array(["foo", None], dtype=object)
         result = pd.unique(arr)
-        expected = np.array(['foo', None], dtype=object)
+        expected = np.array(["foo", None], dtype=object)
 
         tm.assert_numpy_array_equal(result, expected, strict_nan=True)
 
@@ -553,8 +566,8 @@ def test_signed_zero(self):
     def test_different_nans(self):
         # GH 21866
         # create different nans from bit-patterns:
-        NAN1 = struct.unpack("d", struct.pack("=Q", 0x7ff8000000000000))[0]
-        NAN2 = struct.unpack("d", struct.pack("=Q", 0x7ff8000000000001))[0]
+        NAN1 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000000))[0]
+        NAN2 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000001))[0]
         assert NAN1 != NAN1
         assert NAN2 != NAN2
         a = np.array([NAN1, NAN2])  # NAN1 and NAN2 are equivalent
@@ -565,8 +578,8 @@ def test_different_nans(self):
     def test_first_nan_kept(self):
         # GH 22295
         # create different nans from bit-patterns:
-        bits_for_nan1 = 0xfff8000000000001
-        bits_for_nan2 = 0x7ff8000000000001
+        bits_for_nan1 = 0xFFF8000000000001
+        bits_for_nan2 = 0x7FF8000000000001
         NAN1 = struct.unpack("d", struct.pack("=Q", bits_for_nan1))[0]
         NAN2 = struct.unpack("d", struct.pack("=Q", bits_for_nan2))[0]
         assert NAN1 != NAN1
@@ -576,17 +589,14 @@ def test_first_nan_kept(self):
             result = pd.unique(a)
             assert result.size == 1
             # use bit patterns to identify which nan was kept:
-            result_nan_bits = struct.unpack("=Q",
-                                            struct.pack("d", result[0]))[0]
+            result_nan_bits = struct.unpack("=Q", struct.pack("d", result[0]))[0]
             assert result_nan_bits == bits_for_nan1
 
-    def test_do_not_mangle_na_values(self, unique_nulls_fixture,
-                                     unique_nulls_fixture2):
+    def test_do_not_mangle_na_values(self, unique_nulls_fixture, unique_nulls_fixture2):
         # GH 22295
         if unique_nulls_fixture is unique_nulls_fixture2:
             return  # skip it, values not unique
-        a = np.array([unique_nulls_fixture,
-                      unique_nulls_fixture2], dtype=np.object)
+        a = np.array([unique_nulls_fixture, unique_nulls_fixture2], dtype=np.object)
         result = pd.unique(a)
         assert result.size == 2
         assert a[0] is unique_nulls_fixture
@@ -594,7 +604,6 @@ def test_do_not_mangle_na_values(self, unique_nulls_fixture,
 
 
 class TestIsin(object):
-
     def test_invalid(self):
 
         pytest.raises(TypeError, lambda: algos.isin(1, 1))
@@ -623,25 +632,25 @@ def test_basic(self):
         expected = np.array([True, False])
         tm.assert_numpy_array_equal(result, expected)
 
-        result = algos.isin(['a', 'b'], ['a'])
+        result = algos.isin(["a", "b"], ["a"])
         expected = np.array([True, False])
         tm.assert_numpy_array_equal(result, expected)
 
-        result = algos.isin(Series(['a', 'b']), Series(['a']))
+        result = algos.isin(Series(["a", "b"]), Series(["a"]))
         expected = np.array([True, False])
         tm.assert_numpy_array_equal(result, expected)
 
-        result = algos.isin(Series(['a', 'b']), {'a'})
+        result = algos.isin(Series(["a", "b"]), {"a"})
         expected = np.array([True, False])
         tm.assert_numpy_array_equal(result, expected)
 
-        result = algos.isin(['a', 'b'], [1])
+        result = algos.isin(["a", "b"], [1])
         expected = np.array([False, False])
         tm.assert_numpy_array_equal(result, expected)
 
     def test_i8(self):
 
-        arr = pd.date_range('20130101', periods=3).values
+        arr = pd.date_range("20130101", periods=3).values
         result = algos.isin(arr, [arr[0]])
         expected = np.array([True, False, False])
         tm.assert_numpy_array_equal(result, expected)
@@ -654,7 +663,7 @@ def test_i8(self):
         expected = np.array([True, True, False])
         tm.assert_numpy_array_equal(result, expected)
 
-        arr = pd.timedelta_range('1 day', periods=3).values
+        arr = pd.timedelta_range("1 day", periods=3).values
         result = algos.isin(arr, [arr[0]])
         expected = np.array([True, False, False])
         tm.assert_numpy_array_equal(result, expected)
@@ -669,7 +678,7 @@ def test_i8(self):
 
     def test_large(self):
 
-        s = pd.date_range('20000101', periods=2000000, freq='s').values
+        s = pd.date_range("20000101", periods=2000000, freq="s").values
         result = algos.isin(s, s[0:2])
         expected = np.zeros(len(s), dtype=bool)
         expected[0] = True
@@ -679,7 +688,7 @@ def test_large(self):
     def test_categorical_from_codes(self):
         # GH 16639
         vals = np.array([0, 1, 2, 0])
-        cats = ['a', 'b', 'c']
+        cats = ["a", "b", "c"]
         Sd = Series(Categorical(1).from_codes(vals, cats))
         St = Series(Categorical(1).from_codes(np.array([0, 1]), cats))
         expected = np.array([True, True, False, True])
@@ -721,8 +730,8 @@ def test_different_nans(self):
         # GH 22160
         # all nans are handled as equivalent
 
-        comps = [float('nan')]
-        values = [float('nan')]
+        comps = [float("nan")]
+        values = [float("nan")]
         assert comps[0] is not values[0]  # different nan-objects
 
         # as list of python-objects:
@@ -730,20 +739,22 @@ def test_different_nans(self):
         tm.assert_numpy_array_equal(np.array([True]), result)
 
         # as object-array:
-        result = algos.isin(np.asarray(comps, dtype=np.object),
-                            np.asarray(values, dtype=np.object))
+        result = algos.isin(
+            np.asarray(comps, dtype=np.object), np.asarray(values, dtype=np.object)
+        )
         tm.assert_numpy_array_equal(np.array([True]), result)
 
         # as float64-array:
-        result = algos.isin(np.asarray(comps, dtype=np.float64),
-                            np.asarray(values, dtype=np.float64))
+        result = algos.isin(
+            np.asarray(comps, dtype=np.float64), np.asarray(values, dtype=np.float64)
+        )
         tm.assert_numpy_array_equal(np.array([True]), result)
 
     def test_no_cast(self):
         # GH 22160
         # ensure 42 is not casted to a string
-        comps = ['ss', 42]
-        values = ['42']
+        comps = ["ss", 42]
+        values = ["42"]
         expected = np.array([False, False])
         result = algos.isin(comps, values)
         tm.assert_numpy_array_equal(expected, result)
@@ -759,8 +770,8 @@ def test_empty(self, empty):
 
     def test_different_nan_objects(self):
         # GH 22119
-        comps = np.array(['nan', np.nan * 1j, float('nan')], dtype=np.object)
-        vals = np.array([float('nan')], dtype=np.object)
+        comps = np.array(["nan", np.nan * 1j, float("nan")], dtype=np.object)
+        vals = np.array([float("nan")], dtype=np.object)
         expected = np.array([False, False, True])
         result = algos.isin(comps, vals)
         tm.assert_numpy_array_equal(expected, result)
@@ -770,8 +781,8 @@ def test_different_nans_as_float64(self):
         # create different nans from bit-patterns,
         # these nans will land in different buckets in the hash-table
         # if no special care is taken
-        NAN1 = struct.unpack("d", struct.pack("=Q", 0x7ff8000000000000))[0]
-        NAN2 = struct.unpack("d", struct.pack("=Q", 0x7ff8000000000001))[0]
+        NAN1 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000000))[0]
+        NAN2 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000001))[0]
         assert NAN1 != NAN1
         assert NAN2 != NAN2
 
@@ -789,7 +800,6 @@ def test_different_nans_as_float64(self):
 
 
 class TestValueCounts(object):
-
     def test_value_counts(self):
         np.random.seed(1234)
         from pandas.core.reshape.tile import cut
@@ -807,32 +817,30 @@ def test_value_counts(self):
     def test_value_counts_bins(self):
         s = [1, 2, 3, 4]
         result = algos.value_counts(s, bins=1)
-        expected = Series([4],
-                          index=IntervalIndex.from_tuples([(0.996, 4.0)]))
+        expected = Series([4], index=IntervalIndex.from_tuples([(0.996, 4.0)]))
         tm.assert_series_equal(result, expected)
 
         result = algos.value_counts(s, bins=2, sort=False)
-        expected = Series([2, 2],
-                          index=IntervalIndex.from_tuples([(0.996, 2.5),
-                                                           (2.5, 4.0)]))
+        expected = Series(
+            [2, 2], index=IntervalIndex.from_tuples([(0.996, 2.5), (2.5, 4.0)])
+        )
         tm.assert_series_equal(result, expected)
 
     def test_value_counts_dtypes(self):
-        result = algos.value_counts([1, 1.])
+        result = algos.value_counts([1, 1.0])
         assert len(result) == 1
 
-        result = algos.value_counts([1, 1.], bins=1)
+        result = algos.value_counts([1, 1.0], bins=1)
         assert len(result) == 1
 
-        result = algos.value_counts(Series([1, 1., '1']))  # object
+        result = algos.value_counts(Series([1, 1.0, "1"]))  # object
         assert len(result) == 2
 
-        pytest.raises(TypeError, lambda s: algos.value_counts(s, bins=1),
-                      ['1', 1])
+        pytest.raises(TypeError, lambda s: algos.value_counts(s, bins=1), ["1", 1])
 
     def test_value_counts_nat(self):
-        td = Series([np.timedelta64(10000), pd.NaT], dtype='timedelta64[ns]')
-        dt = pd.to_datetime(['NaT', '2014-01-01'])
+        td = Series([np.timedelta64(10000), pd.NaT], dtype="timedelta64[ns]")
+        dt = pd.to_datetime(["NaT", "2014-01-01"])
 
         for s in [td, dt]:
             vc = algos.value_counts(s)
@@ -840,32 +848,40 @@ def test_value_counts_nat(self):
             assert len(vc) == 1
             assert len(vc_with_na) == 2
 
-        exp_dt = Series({Timestamp('2014-01-01 00:00:00'): 1})
+        exp_dt = Series({Timestamp("2014-01-01 00:00:00"): 1})
         tm.assert_series_equal(algos.value_counts(dt), exp_dt)
         # TODO same for (timedelta)
 
     def test_value_counts_datetime_outofbounds(self):
         # GH 13663
-        s = Series([datetime(3000, 1, 1), datetime(5000, 1, 1),
-                    datetime(5000, 1, 1), datetime(6000, 1, 1),
-                    datetime(3000, 1, 1), datetime(3000, 1, 1)])
+        s = Series(
+            [
+                datetime(3000, 1, 1),
+                datetime(5000, 1, 1),
+                datetime(5000, 1, 1),
+                datetime(6000, 1, 1),
+                datetime(3000, 1, 1),
+                datetime(3000, 1, 1),
+            ]
+        )
         res = s.value_counts()
 
-        exp_index = Index([datetime(3000, 1, 1), datetime(5000, 1, 1),
-                           datetime(6000, 1, 1)], dtype=object)
+        exp_index = Index(
+            [datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1)],
+            dtype=object,
+        )
         exp = Series([3, 2, 1], index=exp_index)
         tm.assert_series_equal(res, exp)
 
         # GH 12424
-        res = pd.to_datetime(Series(['2362-01-01', np.nan]),
-                             errors='ignore')
-        exp = Series(['2362-01-01', np.nan], dtype=object)
+        res = pd.to_datetime(Series(["2362-01-01", np.nan]), errors="ignore")
+        exp = Series(["2362-01-01", np.nan], dtype=object)
         tm.assert_series_equal(res, exp)
 
     def test_categorical(self):
-        s = Series(Categorical(list('aaabbc')))
+        s = Series(Categorical(list("aaabbc")))
         result = s.value_counts()
-        expected = Series([3, 2, 1], index=CategoricalIndex(['a', 'b', 'c']))
+        expected = Series([3, 2, 1], index=CategoricalIndex(["a", "b", "c"]))
 
         tm.assert_series_equal(result, expected, check_index_type=True)
 
@@ -876,39 +892,51 @@ def test_categorical(self):
         tm.assert_series_equal(result, expected, check_index_type=True)
 
     def test_categorical_nans(self):
-        s = Series(Categorical(list('aaaaabbbcc')))  # 4,3,2,1 (nan)
+        s = Series(Categorical(list("aaaaabbbcc")))  # 4,3,2,1 (nan)
         s.iloc[1] = np.nan
         result = s.value_counts()
-        expected = Series([4, 3, 2], index=CategoricalIndex(
-            ['a', 'b', 'c'], categories=['a', 'b', 'c']))
+        expected = Series(
+            [4, 3, 2],
+            index=CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c"]),
+        )
         tm.assert_series_equal(result, expected, check_index_type=True)
         result = s.value_counts(dropna=False)
-        expected = Series([
-            4, 3, 2, 1
-        ], index=CategoricalIndex(['a', 'b', 'c', np.nan]))
+        expected = Series([4, 3, 2, 1], index=CategoricalIndex(["a", "b", "c", np.nan]))
         tm.assert_series_equal(result, expected, check_index_type=True)
 
         # out of order
-        s = Series(Categorical(
-            list('aaaaabbbcc'), ordered=True, categories=['b', 'a', 'c']))
+        s = Series(
+            Categorical(list("aaaaabbbcc"), ordered=True, categories=["b", "a", "c"])
+        )
         s.iloc[1] = np.nan
         result = s.value_counts()
-        expected = Series([4, 3, 2], index=CategoricalIndex(
-            ['a', 'b', 'c'], categories=['b', 'a', 'c'], ordered=True))
+        expected = Series(
+            [4, 3, 2],
+            index=CategoricalIndex(
+                ["a", "b", "c"], categories=["b", "a", "c"], ordered=True
+            ),
+        )
         tm.assert_series_equal(result, expected, check_index_type=True)
 
         result = s.value_counts(dropna=False)
-        expected = Series([4, 3, 2, 1], index=CategoricalIndex(
-            ['a', 'b', 'c', np.nan], categories=['b', 'a', 'c'], ordered=True))
+        expected = Series(
+            [4, 3, 2, 1],
+            index=CategoricalIndex(
+                ["a", "b", "c", np.nan], categories=["b", "a", "c"], ordered=True
+            ),
+        )
         tm.assert_series_equal(result, expected, check_index_type=True)
 
     def test_categorical_zeroes(self):
         # keep the `d` category with 0
-        s = Series(Categorical(
-            list('bbbaac'), categories=list('abcd'), ordered=True))
+        s = Series(Categorical(list("bbbaac"), categories=list("abcd"), ordered=True))
         result = s.value_counts()
-        expected = Series([3, 2, 1, 0], index=Categorical(
-            ['b', 'a', 'c', 'd'], categories=list('abcd'), ordered=True))
+        expected = Series(
+            [3, 2, 1, 0],
+            index=Categorical(
+                ["b", "a", "c", "d"], categories=list("abcd"), ordered=True
+            ),
+        )
         tm.assert_series_equal(result, expected, check_index_type=True)
 
     def test_dropna(self):
@@ -916,59 +944,66 @@ def test_dropna(self):
 
         tm.assert_series_equal(
             Series([True, True, False]).value_counts(dropna=True),
-            Series([2, 1], index=[True, False]))
+            Series([2, 1], index=[True, False]),
+        )
         tm.assert_series_equal(
             Series([True, True, False]).value_counts(dropna=False),
-            Series([2, 1], index=[True, False]))
+            Series([2, 1], index=[True, False]),
+        )
 
         tm.assert_series_equal(
             Series([True, True, False, None]).value_counts(dropna=True),
-            Series([2, 1], index=[True, False]))
+            Series([2, 1], index=[True, False]),
+        )
         tm.assert_series_equal(
             Series([True, True, False, None]).value_counts(dropna=False),
-            Series([2, 1, 1], index=[True, False, np.nan]))
+            Series([2, 1, 1], index=[True, False, np.nan]),
+        )
         tm.assert_series_equal(
-            Series([10.3, 5., 5.]).value_counts(dropna=True),
-            Series([2, 1], index=[5., 10.3]))
+            Series([10.3, 5.0, 5.0]).value_counts(dropna=True),
+            Series([2, 1], index=[5.0, 10.3]),
+        )
         tm.assert_series_equal(
-            Series([10.3, 5., 5.]).value_counts(dropna=False),
-            Series([2, 1], index=[5., 10.3]))
+            Series([10.3, 5.0, 5.0]).value_counts(dropna=False),
+            Series([2, 1], index=[5.0, 10.3]),
+        )
 
         tm.assert_series_equal(
-            Series([10.3, 5., 5., None]).value_counts(dropna=True),
-            Series([2, 1], index=[5., 10.3]))
+            Series([10.3, 5.0, 5.0, None]).value_counts(dropna=True),
+            Series([2, 1], index=[5.0, 10.3]),
+        )
 
         # 32-bit linux has a different ordering
         if not compat.is_platform_32bit():
-            result = Series([10.3, 5., 5., None]).value_counts(dropna=False)
-            expected = Series([2, 1, 1], index=[5., 10.3, np.nan])
+            result = Series([10.3, 5.0, 5.0, None]).value_counts(dropna=False)
+            expected = Series([2, 1, 1], index=[5.0, 10.3, np.nan])
             tm.assert_series_equal(result, expected)
 
     def test_value_counts_normalized(self):
         # GH12558
         s = Series([1, 2, np.nan, np.nan, np.nan])
-        dtypes = (np.float64, np.object, 'M8[ns]')
+        dtypes = (np.float64, np.object, "M8[ns]")
         for t in dtypes:
             s_typed = s.astype(t)
             result = s_typed.value_counts(normalize=True, dropna=False)
-            expected = Series([0.6, 0.2, 0.2],
-                              index=Series([np.nan, 2.0, 1.0], dtype=t))
+            expected = Series(
+                [0.6, 0.2, 0.2], index=Series([np.nan, 2.0, 1.0], dtype=t)
+            )
             tm.assert_series_equal(result, expected)
 
             result = s_typed.value_counts(normalize=True, dropna=True)
-            expected = Series([0.5, 0.5],
-                              index=Series([2.0, 1.0], dtype=t))
+            expected = Series([0.5, 0.5], index=Series([2.0, 1.0], dtype=t))
             tm.assert_series_equal(result, expected)
 
     def test_value_counts_uint64(self):
-        arr = np.array([2**63], dtype=np.uint64)
-        expected = Series([1], index=[2**63])
+        arr = np.array([2 ** 63], dtype=np.uint64)
+        expected = Series([1], index=[2 ** 63])
         result = algos.value_counts(arr)
 
         tm.assert_series_equal(result, expected)
 
-        arr = np.array([-1, 2**63], dtype=object)
-        expected = Series([1, 1], index=[-1, 2**63])
+        arr = np.array([-1, 2 ** 63], dtype=object)
+        expected = Series([1, 1], index=[-1, 2 ** 63])
         result = algos.value_counts(arr)
 
         # 32-bit linux has a different ordering
@@ -977,7 +1012,6 @@ def test_value_counts_uint64(self):
 
 
 class TestDuplicated(object):
-
     def test_duplicated_with_nas(self):
         keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object)
 
@@ -985,11 +1019,11 @@ def test_duplicated_with_nas(self):
         expected = np.array([False, False, False, True, False, True])
         tm.assert_numpy_array_equal(result, expected)
 
-        result = algos.duplicated(keys, keep='first')
+        result = algos.duplicated(keys, keep="first")
         expected = np.array([False, False, False, True, False, True])
         tm.assert_numpy_array_equal(result, expected)
 
-        result = algos.duplicated(keys, keep='last')
+        result = algos.duplicated(keys, keep="last")
         expected = np.array([True, False, True, False, False, False])
         tm.assert_numpy_array_equal(result, expected)
 
@@ -998,8 +1032,9 @@ def test_duplicated_with_nas(self):
         tm.assert_numpy_array_equal(result, expected)
 
         keys = np.empty(8, dtype=object)
-        for i, t in enumerate(zip([0, 0, np.nan, np.nan] * 2,
-                                  [0, np.nan, 0, np.nan] * 2)):
+        for i, t in enumerate(
+            zip([0, 0, np.nan, np.nan] * 2, [0, np.nan, 0, np.nan] * 2)
+        ):
             keys[i] = t
 
         result = algos.duplicated(keys)
@@ -1008,7 +1043,7 @@ def test_duplicated_with_nas(self):
         expected = np.array(falses + trues)
         tm.assert_numpy_array_equal(result, expected)
 
-        result = algos.duplicated(keys, keep='last')
+        result = algos.duplicated(keys, keep="last")
         expected = np.array(trues + falses)
         tm.assert_numpy_array_equal(result, expected)
 
@@ -1016,51 +1051,66 @@ def test_duplicated_with_nas(self):
         expected = np.array(trues + trues)
         tm.assert_numpy_array_equal(result, expected)
 
-    @pytest.mark.parametrize('case', [
-        np.array([1, 2, 1, 5, 3,
-                  2, 4, 1, 5, 6]),
-        np.array([1.1, 2.2, 1.1, np.nan, 3.3,
-                  2.2, 4.4, 1.1, np.nan, 6.6]),
-        np.array([1 + 1j, 2 + 2j, 1 + 1j, 5 + 5j, 3 + 3j,
-                  2 + 2j, 4 + 4j, 1 + 1j, 5 + 5j, 6 + 6j]),
-        np.array(['a', 'b', 'a', 'e', 'c',
-                  'b', 'd', 'a', 'e', 'f'], dtype=object),
-        np.array([1, 2**63, 1, 3**5, 10, 2**63, 39, 1, 3**5, 7],
-                 dtype=np.uint64),
-    ])
+    @pytest.mark.parametrize(
+        "case",
+        [
+            np.array([1, 2, 1, 5, 3, 2, 4, 1, 5, 6]),
+            np.array([1.1, 2.2, 1.1, np.nan, 3.3, 2.2, 4.4, 1.1, np.nan, 6.6]),
+            np.array(
+                [
+                    1 + 1j,
+                    2 + 2j,
+                    1 + 1j,
+                    5 + 5j,
+                    3 + 3j,
+                    2 + 2j,
+                    4 + 4j,
+                    1 + 1j,
+                    5 + 5j,
+                    6 + 6j,
+                ]
+            ),
+            np.array(["a", "b", "a", "e", "c", "b", "d", "a", "e", "f"], dtype=object),
+            np.array(
+                [1, 2 ** 63, 1, 3 ** 5, 10, 2 ** 63, 39, 1, 3 ** 5, 7], dtype=np.uint64
+            ),
+        ],
+    )
     def test_numeric_object_likes(self, case):
-        exp_first = np.array([False, False, True, False, False,
-                              True, False, True, True, False])
-        exp_last = np.array([True, True, True, True, False,
-                             False, False, False, False, False])
+        exp_first = np.array(
+            [False, False, True, False, False, True, False, True, True, False]
+        )
+        exp_last = np.array(
+            [True, True, True, True, False, False, False, False, False, False]
+        )
         exp_false = exp_first | exp_last
 
-        res_first = algos.duplicated(case, keep='first')
+        res_first = algos.duplicated(case, keep="first")
         tm.assert_numpy_array_equal(res_first, exp_first)
 
-        res_last = algos.duplicated(case, keep='last')
+        res_last = algos.duplicated(case, keep="last")
         tm.assert_numpy_array_equal(res_last, exp_last)
 
         res_false = algos.duplicated(case, keep=False)
         tm.assert_numpy_array_equal(res_false, exp_false)
 
         # index
-        for idx in [Index(case), Index(case, dtype='category')]:
-            res_first = idx.duplicated(keep='first')
+        for idx in [Index(case), Index(case, dtype="category")]:
+            res_first = idx.duplicated(keep="first")
             tm.assert_numpy_array_equal(res_first, exp_first)
 
-            res_last = idx.duplicated(keep='last')
+            res_last = idx.duplicated(keep="last")
             tm.assert_numpy_array_equal(res_last, exp_last)
 
             res_false = idx.duplicated(keep=False)
             tm.assert_numpy_array_equal(res_false, exp_false)
 
         # series
-        for s in [Series(case), Series(case, dtype='category')]:
-            res_first = s.duplicated(keep='first')
+        for s in [Series(case), Series(case, dtype="category")]:
+            res_first = s.duplicated(keep="first")
             tm.assert_series_equal(res_first, Series(exp_first))
 
-            res_last = s.duplicated(keep='last')
+            res_last = s.duplicated(keep="last")
             tm.assert_series_equal(res_last, Series(exp_last))
 
             res_false = s.duplicated(keep=False)
@@ -1068,52 +1118,82 @@ def test_numeric_object_likes(self, case):
 
     def test_datetime_likes(self):
 
-        dt = ['2011-01-01', '2011-01-02', '2011-01-01', 'NaT', '2011-01-03',
-              '2011-01-02', '2011-01-04', '2011-01-01', 'NaT', '2011-01-06']
-        td = ['1 days', '2 days', '1 days', 'NaT', '3 days',
-              '2 days', '4 days', '1 days', 'NaT', '6 days']
-
-        cases = [np.array([Timestamp(d) for d in dt]),
-                 np.array([Timestamp(d, tz='US/Eastern') for d in dt]),
-                 np.array([pd.Period(d, freq='D') for d in dt]),
-                 np.array([np.datetime64(d) for d in dt]),
-                 np.array([pd.Timedelta(d) for d in td])]
-
-        exp_first = np.array([False, False, True, False, False,
-                              True, False, True, True, False])
-        exp_last = np.array([True, True, True, True, False,
-                             False, False, False, False, False])
+        dt = [
+            "2011-01-01",
+            "2011-01-02",
+            "2011-01-01",
+            "NaT",
+            "2011-01-03",
+            "2011-01-02",
+            "2011-01-04",
+            "2011-01-01",
+            "NaT",
+            "2011-01-06",
+        ]
+        td = [
+            "1 days",
+            "2 days",
+            "1 days",
+            "NaT",
+            "3 days",
+            "2 days",
+            "4 days",
+            "1 days",
+            "NaT",
+            "6 days",
+        ]
+
+        cases = [
+            np.array([Timestamp(d) for d in dt]),
+            np.array([Timestamp(d, tz="US/Eastern") for d in dt]),
+            np.array([pd.Period(d, freq="D") for d in dt]),
+            np.array([np.datetime64(d) for d in dt]),
+            np.array([pd.Timedelta(d) for d in td]),
+        ]
+
+        exp_first = np.array(
+            [False, False, True, False, False, True, False, True, True, False]
+        )
+        exp_last = np.array(
+            [True, True, True, True, False, False, False, False, False, False]
+        )
         exp_false = exp_first | exp_last
 
         for case in cases:
-            res_first = algos.duplicated(case, keep='first')
+            res_first = algos.duplicated(case, keep="first")
             tm.assert_numpy_array_equal(res_first, exp_first)
 
-            res_last = algos.duplicated(case, keep='last')
+            res_last = algos.duplicated(case, keep="last")
             tm.assert_numpy_array_equal(res_last, exp_last)
 
             res_false = algos.duplicated(case, keep=False)
             tm.assert_numpy_array_equal(res_false, exp_false)
 
             # index
-            for idx in [Index(case), Index(case, dtype='category'),
-                        Index(case, dtype=object)]:
-                res_first = idx.duplicated(keep='first')
+            for idx in [
+                Index(case),
+                Index(case, dtype="category"),
+                Index(case, dtype=object),
+            ]:
+                res_first = idx.duplicated(keep="first")
                 tm.assert_numpy_array_equal(res_first, exp_first)
 
-                res_last = idx.duplicated(keep='last')
+                res_last = idx.duplicated(keep="last")
                 tm.assert_numpy_array_equal(res_last, exp_last)
 
                 res_false = idx.duplicated(keep=False)
                 tm.assert_numpy_array_equal(res_false, exp_false)
 
             # series
-            for s in [Series(case), Series(case, dtype='category'),
-                      Series(case, dtype=object)]:
-                res_first = s.duplicated(keep='first')
+            for s in [
+                Series(case),
+                Series(case, dtype="category"),
+                Series(case, dtype=object),
+            ]:
+                res_first = s.duplicated(keep="first")
                 tm.assert_series_equal(res_first, Series(exp_first))
 
-                res_last = s.duplicated(keep='last')
+                res_last = s.duplicated(keep="last")
                 tm.assert_series_equal(res_last, Series(exp_last))
 
                 res_false = s.duplicated(keep=False)
@@ -1123,17 +1203,24 @@ def test_unique_index(self):
         cases = [Index([1, 2, 3]), pd.RangeIndex(0, 3)]
         for case in cases:
             assert case.is_unique is True
-            tm.assert_numpy_array_equal(case.duplicated(),
-                                        np.array([False, False, False]))
-
-    @pytest.mark.parametrize('arr, unique', [
-        ([(0, 0), (0, 1), (1, 0), (1, 1), (0, 0), (0, 1), (1, 0), (1, 1)],
-         [(0, 0), (0, 1), (1, 0), (1, 1)]),
-        ([('b', 'c'), ('a', 'b'), ('a', 'b'), ('b', 'c')],
-         [('b', 'c'), ('a', 'b')]),
-        ([('a', 1), ('b', 2), ('a', 3), ('a', 1)],
-         [('a', 1), ('b', 2), ('a', 3)]),
-    ])
+            tm.assert_numpy_array_equal(
+                case.duplicated(), np.array([False, False, False])
+            )
+
+    @pytest.mark.parametrize(
+        "arr, unique",
+        [
+            (
+                [(0, 0), (0, 1), (1, 0), (1, 1), (0, 0), (0, 1), (1, 0), (1, 1)],
+                [(0, 0), (0, 1), (1, 0), (1, 1)],
+            ),
+            (
+                [("b", "c"), ("a", "b"), ("a", "b"), ("b", "c")],
+                [("b", "c"), ("a", "b")],
+            ),
+            ([("a", 1), ("b", 2), ("a", 3), ("a", 1)], [("a", 1), ("b", 2), ("a", 3)]),
+        ],
+    )
     def test_unique_tuples(self, arr, unique):
         # https://github.com/pandas-dev/pandas/issues/16519
         expected = np.empty(len(unique), dtype=object)
@@ -1144,18 +1231,17 @@ def test_unique_tuples(self, arr, unique):
 
 
 class GroupVarTestMixin(object):
-
     def test_group_var_generic_1d(self):
         prng = RandomState(1234)
 
         out = (np.nan * np.ones((5, 1))).astype(self.dtype)
-        counts = np.zeros(5, dtype='int64')
+        counts = np.zeros(5, dtype="int64")
         values = 10 * prng.rand(15, 1).astype(self.dtype)
-        labels = np.tile(np.arange(5), (3, )).astype('int64')
+        labels = np.tile(np.arange(5), (3,)).astype("int64")
 
-        expected_out = (np.squeeze(values)
-                        .reshape((5, 3), order='F')
-                        .std(axis=1, ddof=1) ** 2)[:, np.newaxis]
+        expected_out = (
+            np.squeeze(values).reshape((5, 3), order="F").std(axis=1, ddof=1) ** 2
+        )[:, np.newaxis]
         expected_counts = counts + 3
 
         self.algo(out, counts, values, labels)
@@ -1166,9 +1252,9 @@ def test_group_var_generic_1d_flat_labels(self):
         prng = RandomState(1234)
 
         out = (np.nan * np.ones((1, 1))).astype(self.dtype)
-        counts = np.zeros(1, dtype='int64')
+        counts = np.zeros(1, dtype="int64")
         values = 10 * prng.rand(5, 1).astype(self.dtype)
-        labels = np.zeros(5, dtype='int64')
+        labels = np.zeros(5, dtype="int64")
 
         expected_out = np.array([[values.std(ddof=1) ** 2]])
         expected_counts = counts + 5
@@ -1182,9 +1268,9 @@ def test_group_var_generic_2d_all_finite(self):
         prng = RandomState(1234)
 
         out = (np.nan * np.ones((5, 2))).astype(self.dtype)
-        counts = np.zeros(5, dtype='int64')
+        counts = np.zeros(5, dtype="int64")
         values = 10 * prng.rand(10, 2).astype(self.dtype)
-        labels = np.tile(np.arange(5), (2, )).astype('int64')
+        labels = np.tile(np.arange(5), (2,)).astype("int64")
 
         expected_out = np.std(values.reshape(2, 5, 2), ddof=1, axis=0) ** 2
         expected_counts = counts + 2
@@ -1197,15 +1283,17 @@ def test_group_var_generic_2d_some_nan(self):
         prng = RandomState(1234)
 
         out = (np.nan * np.ones((5, 2))).astype(self.dtype)
-        counts = np.zeros(5, dtype='int64')
+        counts = np.zeros(5, dtype="int64")
         values = 10 * prng.rand(10, 2).astype(self.dtype)
         values[:, 1] = np.nan
-        labels = np.tile(np.arange(5), (2, )).astype('int64')
-
-        expected_out = np.vstack([values[:, 0]
-                                  .reshape(5, 2, order='F')
-                                  .std(ddof=1, axis=1) ** 2,
-                                  np.nan * np.ones(5)]).T.astype(self.dtype)
+        labels = np.tile(np.arange(5), (2,)).astype("int64")
+
+        expected_out = np.vstack(
+            [
+                values[:, 0].reshape(5, 2, order="F").std(ddof=1, axis=1) ** 2,
+                np.nan * np.ones(5),
+            ]
+        ).T.astype(self.dtype)
         expected_counts = counts + 2
 
         self.algo(out, counts, values, labels)
@@ -1216,9 +1304,9 @@ def test_group_var_constant(self):
         # Regression test from GH 10448.
 
         out = np.array([[np.nan]], dtype=self.dtype)
-        counts = np.array([0], dtype='int64')
+        counts = np.array([0], dtype="int64")
         values = 0.832845131556193 * np.ones((3, 1), dtype=self.dtype)
-        labels = np.zeros(3, dtype='int64')
+        labels = np.zeros(3, dtype="int64")
 
         self.algo(out, counts, values, labels)
 
@@ -1239,10 +1327,10 @@ def test_group_var_large_inputs(self):
         prng = RandomState(1234)
 
         out = np.array([[np.nan]], dtype=self.dtype)
-        counts = np.array([0], dtype='int64')
+        counts = np.array([0], dtype="int64")
         values = (prng.rand(10 ** 6) + 10 ** 12).astype(self.dtype)
         values.shape = (10 ** 6, 1)
-        labels = np.zeros(10 ** 6, dtype='int64')
+        labels = np.zeros(10 ** 6, dtype="int64")
 
         self.algo(out, counts, values, labels)
 
@@ -1259,15 +1347,13 @@ class TestGroupVarFloat32(GroupVarTestMixin):
 
 
 class TestHashTable(object):
-
     def test_lookup_nan(self, writable):
         xs = np.array([2.718, 3.14, np.nan, -7, 5, 2, 3])
         # GH 21688 ensure we can deal with readonly memory views
         xs.setflags(write=writable)
         m = ht.Float64HashTable()
         m.map_locations(xs)
-        tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs),
-                                                            dtype=np.int64))
+        tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), dtype=np.int64))
 
     def test_add_signed_zeros(self):
         # GH 21866 inconsistent hash-function for float64
@@ -1283,8 +1369,8 @@ def test_add_signed_zeros(self):
     def test_add_different_nans(self):
         # GH 21866 inconsistent hash-function for float64
         # create different nans from bit-patterns:
-        NAN1 = struct.unpack("d", struct.pack("=Q", 0x7ff8000000000000))[0]
-        NAN2 = struct.unpack("d", struct.pack("=Q", 0x7ff8000000000001))[0]
+        NAN1 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000000))[0]
+        NAN2 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000001))[0]
         assert NAN1 != NAN1
         assert NAN2 != NAN2
         # default hash function would lead to different hash-buckets
@@ -1295,28 +1381,32 @@ def test_add_different_nans(self):
         assert len(m) == 1  # NAN1 and NAN2 are equivalent
 
     def test_lookup_overflow(self, writable):
-        xs = np.array([1, 2, 2**63], dtype=np.uint64)
+        xs = np.array([1, 2, 2 ** 63], dtype=np.uint64)
         # GH 21688 ensure we can deal with readonly memory views
         xs.setflags(write=writable)
         m = ht.UInt64HashTable()
         m.map_locations(xs)
-        tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs),
-                                                            dtype=np.int64))
+        tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), dtype=np.int64))
 
     def test_get_unique(self):
-        s = Series([1, 2, 2**63, 2**63], dtype=np.uint64)
-        exp = np.array([1, 2, 2**63], dtype=np.uint64)
+        s = Series([1, 2, 2 ** 63, 2 ** 63], dtype=np.uint64)
+        exp = np.array([1, 2, 2 ** 63], dtype=np.uint64)
         tm.assert_numpy_array_equal(s.unique(), exp)
 
-    @pytest.mark.parametrize('nvals', [0, 10])  # resizing to 0 is special case
-    @pytest.mark.parametrize('htable, uniques, dtype, safely_resizes', [
-        (ht.PyObjectHashTable, ht.ObjectVector, 'object', False),
-        (ht.StringHashTable, ht.ObjectVector, 'object', True),
-        (ht.Float64HashTable, ht.Float64Vector, 'float64', False),
-        (ht.Int64HashTable, ht.Int64Vector, 'int64', False),
-        (ht.UInt64HashTable, ht.UInt64Vector, 'uint64', False)])
-    def test_vector_resize(self, writable, htable, uniques, dtype,
-                           safely_resizes, nvals):
+    @pytest.mark.parametrize("nvals", [0, 10])  # resizing to 0 is special case
+    @pytest.mark.parametrize(
+        "htable, uniques, dtype, safely_resizes",
+        [
+            (ht.PyObjectHashTable, ht.ObjectVector, "object", False),
+            (ht.StringHashTable, ht.ObjectVector, "object", True),
+            (ht.Float64HashTable, ht.Float64Vector, "float64", False),
+            (ht.Int64HashTable, ht.Int64Vector, "int64", False),
+            (ht.UInt64HashTable, ht.UInt64Vector, "uint64", False),
+        ],
+    )
+    def test_vector_resize(
+        self, writable, htable, uniques, dtype, safely_resizes, nvals
+    ):
         # Test for memory errors after internal vector
         # reallocations (GH 7157)
         vals = np.array(np.random.randn(1000), dtype=dtype)
@@ -1341,21 +1431,25 @@ def test_vector_resize(self, writable, htable, uniques, dtype,
         if safely_resizes:
             htable.get_labels(vals, uniques, 0, -1)
         else:
-            with pytest.raises(ValueError, match='external reference.*'):
+            with pytest.raises(ValueError, match="external reference.*"):
                 htable.get_labels(vals, uniques, 0, -1)
 
-        uniques.to_array()   # should not raise here
+        uniques.to_array()  # should not raise here
         assert tmp.shape == oldshape
 
-    @pytest.mark.parametrize('htable, tm_dtype', [
-        (ht.PyObjectHashTable, 'String'),
-        (ht.StringHashTable, 'String'),
-        (ht.Float64HashTable, 'Float'),
-        (ht.Int64HashTable, 'Int'),
-        (ht.UInt64HashTable, 'UInt')])
+    @pytest.mark.parametrize(
+        "htable, tm_dtype",
+        [
+            (ht.PyObjectHashTable, "String"),
+            (ht.StringHashTable, "String"),
+            (ht.Float64HashTable, "Float"),
+            (ht.Int64HashTable, "Int"),
+            (ht.UInt64HashTable, "UInt"),
+        ],
+    )
     def test_hashtable_unique(self, htable, tm_dtype, writable):
         # output of maker has guaranteed unique elements
-        maker = getattr(tm, 'make' + tm_dtype + 'Index')
+        maker = getattr(tm, "make" + tm_dtype + "Index")
         s = Series(maker(1000))
         if htable == ht.Float64HashTable:
             # add NaN for float column
@@ -1370,27 +1464,32 @@ def test_hashtable_unique(self, htable, tm_dtype, writable):
 
         # drop_duplicates has own cython code (hash_table_func_helper.pxi)
         # and is tested separately; keeps first occurrence like ht.unique()
-        expected_unique = s_duplicated.drop_duplicates(keep='first').values
+        expected_unique = s_duplicated.drop_duplicates(keep="first").values
         result_unique = htable().unique(s_duplicated.values)
         tm.assert_numpy_array_equal(result_unique, expected_unique)
 
         # test return_inverse=True
         # reconstruction can only succeed if the inverse is correct
-        result_unique, result_inverse = htable().unique(s_duplicated.values,
-                                                        return_inverse=True)
+        result_unique, result_inverse = htable().unique(
+            s_duplicated.values, return_inverse=True
+        )
         tm.assert_numpy_array_equal(result_unique, expected_unique)
         reconstr = result_unique[result_inverse]
         tm.assert_numpy_array_equal(reconstr, s_duplicated.values)
 
-    @pytest.mark.parametrize('htable, tm_dtype', [
-        (ht.PyObjectHashTable, 'String'),
-        (ht.StringHashTable, 'String'),
-        (ht.Float64HashTable, 'Float'),
-        (ht.Int64HashTable, 'Int'),
-        (ht.UInt64HashTable, 'UInt')])
+    @pytest.mark.parametrize(
+        "htable, tm_dtype",
+        [
+            (ht.PyObjectHashTable, "String"),
+            (ht.StringHashTable, "String"),
+            (ht.Float64HashTable, "Float"),
+            (ht.Int64HashTable, "Int"),
+            (ht.UInt64HashTable, "UInt"),
+        ],
+    )
     def test_hashtable_factorize(self, htable, tm_dtype, writable):
         # output of maker has guaranteed unique elements
-        maker = getattr(tm, 'make' + tm_dtype + 'Index')
+        maker = getattr(tm, "make" + tm_dtype + "Index")
         s = Series(maker(1000))
         if htable == ht.Float64HashTable:
             # add NaN for float column
@@ -1418,42 +1517,46 @@ def test_hashtable_factorize(self, htable, tm_dtype, writable):
         expected_reconstruct = s_duplicated.dropna().values
         tm.assert_numpy_array_equal(result_reconstruct, expected_reconstruct)
 
-    @pytest.mark.parametrize('hashtable', [
-        ht.PyObjectHashTable, ht.StringHashTable,
-        ht.Float64HashTable, ht.Int64HashTable, ht.UInt64HashTable])
+    @pytest.mark.parametrize(
+        "hashtable",
+        [
+            ht.PyObjectHashTable,
+            ht.StringHashTable,
+            ht.Float64HashTable,
+            ht.Int64HashTable,
+            ht.UInt64HashTable,
+        ],
+    )
     def test_hashtable_large_sizehint(self, hashtable):
         # GH 22729
         size_hint = np.iinfo(np.uint32).max + 1
-        tbl = hashtable(size_hint=size_hint) # noqa
+        tbl = hashtable(size_hint=size_hint)  # noqa
 
 
 def test_quantile():
     s = Series(np.random.randn(100))
 
-    result = algos.quantile(s, [0, .25, .5, .75, 1.])
-    expected = algos.quantile(s.values, [0, .25, .5, .75, 1.])
+    result = algos.quantile(s, [0, 0.25, 0.5, 0.75, 1.0])
+    expected = algos.quantile(s.values, [0, 0.25, 0.5, 0.75, 1.0])
     tm.assert_almost_equal(result, expected)
 
 
 def test_unique_label_indices():
 
-    a = np.random.randint(1, 1 << 10, 1 << 15).astype('i8')
+    a = np.random.randint(1, 1 << 10, 1 << 15).astype("i8")
 
     left = ht.unique_label_indices(a)
     right = np.unique(a, return_index=True)[1]
 
-    tm.assert_numpy_array_equal(left, right,
-                                check_dtype=False)
+    tm.assert_numpy_array_equal(left, right, check_dtype=False)
 
     a[np.random.choice(len(a), 10)] = -1
     left = ht.unique_label_indices(a)
     right = np.unique(a, return_index=True)[1][1:]
-    tm.assert_numpy_array_equal(left, right,
-                                check_dtype=False)
+    tm.assert_numpy_array_equal(left, right, check_dtype=False)
 
 
 class TestRank(object):
-
     @td.skip_if_no_scipy
     def test_scipy_compat(self):
         from scipy.stats import rankdata
@@ -1467,13 +1570,13 @@ def _check(arr):
             exp[mask] = nan
             assert_almost_equal(result, exp)
 
-        _check(np.array([nan, nan, 5., 5., 5., nan, 1, 2, 3, nan]))
-        _check(np.array([4., nan, 5., 5., 5., nan, 1, 2, 4., nan]))
+        _check(np.array([nan, nan, 5.0, 5.0, 5.0, nan, 1, 2, 3, nan]))
+        _check(np.array([4.0, nan, 5.0, 5.0, 5.0, nan, 1, 2, 4.0, nan]))
 
     def test_basic(self):
         exp = np.array([1, 2], dtype=np.float64)
 
-        for dtype in np.typecodes['AllInteger']:
+        for dtype in np.typecodes["AllInteger"]:
             s = Series([1, 100], dtype=dtype)
             tm.assert_numpy_array_equal(algos.rank(s), exp)
 
@@ -1481,7 +1584,7 @@ def test_uint64_overflow(self):
         exp = np.array([1, 2], dtype=np.float64)
 
         for dtype in [np.float64, np.uint64]:
-            s = Series([1, 2**63], dtype=dtype)
+            s = Series([1, 2 ** 63], dtype=dtype)
             tm.assert_numpy_array_equal(algos.rank(s), exp)
 
     def test_too_many_ndims(self):
@@ -1492,10 +1595,11 @@ def test_too_many_ndims(self):
             algos.rank(arr)
 
     @pytest.mark.single
-    @pytest.mark.parametrize('values', [
-        np.arange(2**24 + 1),
-        np.arange(2**25 + 2).reshape(2**24 + 1, 2)],
-        ids=['1d', '2d'])
+    @pytest.mark.parametrize(
+        "values",
+        [np.arange(2 ** 24 + 1), np.arange(2 ** 25 + 2).reshape(2 ** 24 + 1, 2)],
+        ids=["1d", "2d"],
+    )
     def test_pct_max_many_rows(self, values):
         # GH 18271
         result = algos.rank(values, pct=True).max()
@@ -1504,8 +1608,8 @@ def test_pct_max_many_rows(self, values):
 
 def test_pad_backfill_object_segfault():
 
-    old = np.array([], dtype='O')
-    new = np.array([datetime(2010, 12, 31)], dtype='O')
+    old = np.array([], dtype="O")
+    new = np.array([datetime(2010, 12, 31)], dtype="O")
 
     result = libalgos.pad["object"](old, new)
     expected = np.array([-1], dtype=np.int64)
@@ -1525,13 +1629,12 @@ def test_pad_backfill_object_segfault():
 
 
 def test_arrmap():
-    values = np.array(['foo', 'foo', 'bar', 'bar', 'baz', 'qux'], dtype='O')
-    result = libalgos.arrmap_object(values, lambda x: x in ['foo', 'bar'])
-    assert (result.dtype == np.bool_)
+    values = np.array(["foo", "foo", "bar", "bar", "baz", "qux"], dtype="O")
+    result = libalgos.arrmap_object(values, lambda x: x in ["foo", "bar"])
+    assert result.dtype == np.bool_
 
 
 class TestTseriesUtil(object):
-
     def test_combineFunc(self):
         pass
 
@@ -1553,8 +1656,7 @@ def test_backfill(self):
 
         filler = libalgos.backfill["int64_t"](old.values, new.values)
 
-        expect_filler = np.array([0, 0, 1, 1, 1, 1,
-                                  2, 2, 2, 2, 2, -1], dtype=np.int64)
+        expect_filler = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1], dtype=np.int64)
         tm.assert_numpy_array_equal(filler, expect_filler)
 
         # corner case
@@ -1571,8 +1673,7 @@ def test_pad(self):
 
         filler = libalgos.pad["int64_t"](old.values, new.values)
 
-        expect_filler = np.array([-1, 0, 0, 0, 0, 1,
-                                  1, 1, 1, 1, 2, 2], dtype=np.int64)
+        expect_filler = np.array([-1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2], dtype=np.int64)
         tm.assert_numpy_array_equal(filler, expect_filler)
 
         # corner case
@@ -1585,32 +1686,267 @@ def test_pad(self):
 
 def test_is_lexsorted():
     failure = [
-        np.array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-                  3, 3,
-                  3, 3,
-                  3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2,
-                  2, 2, 2, 2, 2, 2, 2,
-                  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-                  1, 1, 1, 1, 1, 1, 1,
-                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-                  1, 1, 1, 1, 1, 1, 1,
-                  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0, 0, 0, 0, 0, 0, 0,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='int64'),
-        np.array([30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
-                  15, 14,
-                  13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28,
-                  27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13,
-                  12, 11,
-                  10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, 27, 26, 25,
-                  24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10,
-                  9, 8,
-                  7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, 27, 26, 25, 24, 23, 22,
-                  21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7,
-                  6, 5,
-                  4, 3, 2, 1, 0], dtype='int64')]
-
-    assert (not libalgos.is_lexsorted(failure))
+        np.array(
+            [
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                2,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+            ],
+            dtype="int64",
+        ),
+        np.array(
+            [
+                30,
+                29,
+                28,
+                27,
+                26,
+                25,
+                24,
+                23,
+                22,
+                21,
+                20,
+                19,
+                18,
+                17,
+                16,
+                15,
+                14,
+                13,
+                12,
+                11,
+                10,
+                9,
+                8,
+                7,
+                6,
+                5,
+                4,
+                3,
+                2,
+                1,
+                0,
+                30,
+                29,
+                28,
+                27,
+                26,
+                25,
+                24,
+                23,
+                22,
+                21,
+                20,
+                19,
+                18,
+                17,
+                16,
+                15,
+                14,
+                13,
+                12,
+                11,
+                10,
+                9,
+                8,
+                7,
+                6,
+                5,
+                4,
+                3,
+                2,
+                1,
+                0,
+                30,
+                29,
+                28,
+                27,
+                26,
+                25,
+                24,
+                23,
+                22,
+                21,
+                20,
+                19,
+                18,
+                17,
+                16,
+                15,
+                14,
+                13,
+                12,
+                11,
+                10,
+                9,
+                8,
+                7,
+                6,
+                5,
+                4,
+                3,
+                2,
+                1,
+                0,
+                30,
+                29,
+                28,
+                27,
+                26,
+                25,
+                24,
+                23,
+                22,
+                21,
+                20,
+                19,
+                18,
+                17,
+                16,
+                15,
+                14,
+                13,
+                12,
+                11,
+                10,
+                9,
+                8,
+                7,
+                6,
+                5,
+                4,
+                3,
+                2,
+                1,
+                0,
+            ],
+            dtype="int64",
+        ),
+    ]
+
+    assert not libalgos.is_lexsorted(failure)
 
 
 def test_groupsort_indexer():
@@ -1622,7 +1958,7 @@ def test_groupsort_indexer():
     # need to use a stable sort
     # np.argsort returns int, groupsort_indexer
     # always returns int64
-    expected = np.argsort(a, kind='mergesort')
+    expected = np.argsort(a, kind="mergesort")
     expected = expected.astype(np.int64)
 
     tm.assert_numpy_array_equal(result, expected)
@@ -1694,7 +2030,7 @@ def test_ensure_platform_int():
     arr = np.arange(100, dtype=np.intp)
 
     result = libalgos.ensure_platform_int(arr)
-    assert (result is arr)
+    assert result is arr
 
 
 def test_int64_add_overflow():
@@ -1714,34 +2050,42 @@ def test_int64_add_overflow():
     with pytest.raises(OverflowError, match=msg):
         algos.checked_add_with_arr(np.array([m, n]), np.array([n, n]))
     with pytest.raises(OverflowError, match=msg):
-        algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]),
-                                   arr_mask=np.array([False, True]))
+        algos.checked_add_with_arr(
+            np.array([m, m]), np.array([m, m]), arr_mask=np.array([False, True])
+        )
     with pytest.raises(OverflowError, match=msg):
-        algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]),
-                                   b_mask=np.array([False, True]))
+        algos.checked_add_with_arr(
+            np.array([m, m]), np.array([m, m]), b_mask=np.array([False, True])
+        )
     with pytest.raises(OverflowError, match=msg):
-        algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]),
-                                   arr_mask=np.array([False, True]),
-                                   b_mask=np.array([False, True]))
+        algos.checked_add_with_arr(
+            np.array([m, m]),
+            np.array([m, m]),
+            arr_mask=np.array([False, True]),
+            b_mask=np.array([False, True]),
+        )
     with pytest.raises(OverflowError, match=msg):
         with tm.assert_produces_warning(RuntimeWarning):
-            algos.checked_add_with_arr(np.array([m, m]),
-                                       np.array([np.nan, m]))
+            algos.checked_add_with_arr(np.array([m, m]), np.array([np.nan, m]))
 
     # Check that the nan boolean arrays override whether or not
     # the addition overflows. We don't check the result but just
     # the fact that an OverflowError is not raised.
-    algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]),
-                               arr_mask=np.array([True, True]))
-    algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]),
-                               b_mask=np.array([True, True]))
-    algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]),
-                               arr_mask=np.array([True, False]),
-                               b_mask=np.array([False, True]))
+    algos.checked_add_with_arr(
+        np.array([m, m]), np.array([m, m]), arr_mask=np.array([True, True])
+    )
+    algos.checked_add_with_arr(
+        np.array([m, m]), np.array([m, m]), b_mask=np.array([True, True])
+    )
+    algos.checked_add_with_arr(
+        np.array([m, m]),
+        np.array([m, m]),
+        arr_mask=np.array([True, False]),
+        b_mask=np.array([False, True]),
+    )
 
 
 class TestMode(object):
-
     def test_no_mode(self):
         exp = Series([], dtype=np.float64)
         tm.assert_series_equal(algos.mode([]), exp)
@@ -1754,7 +2098,7 @@ def test_mode_single(self):
         exp_multi = [1]
         data_multi = [1, 1]
 
-        for dt in np.typecodes['AllInteger'] + np.typecodes['Float']:
+        for dt in np.typecodes["AllInteger"] + np.typecodes["Float"]:
             s = Series(data_single, dtype=dt)
             exp = Series(exp_single, dtype=dt)
             tm.assert_series_equal(algos.mode(s), exp)
@@ -1766,8 +2110,8 @@ def test_mode_single(self):
         exp = Series([1], dtype=np.int)
         tm.assert_series_equal(algos.mode([1]), exp)
 
-        exp = Series(['a', 'b', 'c'], dtype=np.object)
-        tm.assert_series_equal(algos.mode(['a', 'b', 'c']), exp)
+        exp = Series(["a", "b", "c"], dtype=np.object)
+        tm.assert_series_equal(algos.mode(["a", "b", "c"]), exp)
 
     def test_number_mode(self):
         exp_single = [1]
@@ -1776,7 +2120,7 @@ def test_number_mode(self):
         exp_multi = [1, 3]
         data_multi = [1] * 5 + [2] * 3 + [3] * 5
 
-        for dt in np.typecodes['AllInteger'] + np.typecodes['Float']:
+        for dt in np.typecodes["AllInteger"] + np.typecodes["Float"]:
             s = Series(data_single, dtype=dt)
             exp = Series(exp_single, dtype=dt)
             tm.assert_series_equal(algos.mode(s), exp)
@@ -1786,15 +2130,15 @@ def test_number_mode(self):
             tm.assert_series_equal(algos.mode(s), exp)
 
     def test_strobj_mode(self):
-        exp = ['b']
-        data = ['a'] * 2 + ['b'] * 3
+        exp = ["b"]
+        data = ["a"] * 2 + ["b"] * 3
 
-        s = Series(data, dtype='c')
-        exp = Series(exp, dtype='c')
+        s = Series(data, dtype="c")
+        exp = Series(exp, dtype="c")
         tm.assert_series_equal(algos.mode(s), exp)
 
-        exp = ['bar']
-        data = ['foo'] * 2 + ['bar'] * 3
+        exp = ["bar"]
+        data = ["foo"] * 2 + ["bar"] * 3
 
         for dt in [str, object]:
             s = Series(data, dtype=dt)
@@ -1802,41 +2146,41 @@ def test_strobj_mode(self):
             tm.assert_series_equal(algos.mode(s), exp)
 
     def test_datelike_mode(self):
-        exp = Series(['1900-05-03', '2011-01-03',
-                      '2013-01-02'], dtype="M8[ns]")
-        s = Series(['2011-01-03', '2013-01-02',
-                    '1900-05-03'], dtype='M8[ns]')
+        exp = Series(["1900-05-03", "2011-01-03", "2013-01-02"], dtype="M8[ns]")
+        s = Series(["2011-01-03", "2013-01-02", "1900-05-03"], dtype="M8[ns]")
         tm.assert_series_equal(algos.mode(s), exp)
 
-        exp = Series(['2011-01-03', '2013-01-02'], dtype='M8[ns]')
-        s = Series(['2011-01-03', '2013-01-02', '1900-05-03',
-                    '2011-01-03', '2013-01-02'], dtype='M8[ns]')
+        exp = Series(["2011-01-03", "2013-01-02"], dtype="M8[ns]")
+        s = Series(
+            ["2011-01-03", "2013-01-02", "1900-05-03", "2011-01-03", "2013-01-02"],
+            dtype="M8[ns]",
+        )
         tm.assert_series_equal(algos.mode(s), exp)
 
     def test_timedelta_mode(self):
-        exp = Series(['-1 days', '0 days', '1 days'],
-                     dtype='timedelta64[ns]')
-        s = Series(['1 days', '-1 days', '0 days'],
-                   dtype='timedelta64[ns]')
+        exp = Series(["-1 days", "0 days", "1 days"], dtype="timedelta64[ns]")
+        s = Series(["1 days", "-1 days", "0 days"], dtype="timedelta64[ns]")
         tm.assert_series_equal(algos.mode(s), exp)
 
-        exp = Series(['2 min', '1 day'], dtype='timedelta64[ns]')
-        s = Series(['1 day', '1 day', '-1 day', '-1 day 2 min',
-                    '2 min', '2 min'], dtype='timedelta64[ns]')
+        exp = Series(["2 min", "1 day"], dtype="timedelta64[ns]")
+        s = Series(
+            ["1 day", "1 day", "-1 day", "-1 day 2 min", "2 min", "2 min"],
+            dtype="timedelta64[ns]",
+        )
         tm.assert_series_equal(algos.mode(s), exp)
 
     def test_mixed_dtype(self):
-        exp = Series(['foo'])
-        s = Series([1, 'foo', 'foo'])
+        exp = Series(["foo"])
+        s = Series([1, "foo", "foo"])
         tm.assert_series_equal(algos.mode(s), exp)
 
     def test_uint64_overflow(self):
-        exp = Series([2**63], dtype=np.uint64)
-        s = Series([1, 2**63, 2**63], dtype=np.uint64)
+        exp = Series([2 ** 63], dtype=np.uint64)
+        s = Series([1, 2 ** 63, 2 ** 63], dtype=np.uint64)
         tm.assert_series_equal(algos.mode(s), exp)
 
-        exp = Series([1, 2**63], dtype=np.uint64)
-        s = Series([1, 2**63], dtype=np.uint64)
+        exp = Series([1, 2 ** 63], dtype=np.uint64)
+        s = Series([1, 2 ** 63], dtype=np.uint64)
         tm.assert_series_equal(algos.mode(s), exp)
 
     def test_categorical(self):
@@ -1845,8 +2189,8 @@ def test_categorical(self):
         tm.assert_categorical_equal(algos.mode(c), exp)
         tm.assert_categorical_equal(c.mode(), exp)
 
-        c = Categorical([1, 'a', 'a'])
-        exp = Categorical(['a'], categories=[1, 'a'])
+        c = Categorical([1, "a", "a"])
+        exp = Categorical(["a"], categories=[1, "a"])
         tm.assert_categorical_equal(algos.mode(c), exp)
         tm.assert_categorical_equal(c.mode(), exp)
 
@@ -1860,15 +2204,17 @@ def test_index(self):
         exp = Series([1, 2, 3], dtype=np.int64)
         tm.assert_series_equal(algos.mode(idx), exp)
 
-        idx = Index([1, 'a', 'a'])
-        exp = Series(['a'], dtype=object)
+        idx = Index([1, "a", "a"])
+        exp = Series(["a"], dtype=object)
         tm.assert_series_equal(algos.mode(idx), exp)
 
         idx = Index([1, 1, 2, 3, 3])
         exp = Series([1, 3], dtype=np.int64)
         tm.assert_series_equal(algos.mode(idx), exp)
 
-        exp = Series(['2 min', '1 day'], dtype='timedelta64[ns]')
-        idx = Index(['1 day', '1 day', '-1 day', '-1 day 2 min',
-                     '2 min', '2 min'], dtype='timedelta64[ns]')
+        exp = Series(["2 min", "1 day"], dtype="timedelta64[ns]")
+        idx = Index(
+            ["1 day", "1 day", "-1 day", "-1 day 2 min", "2 min", "2 min"],
+            dtype="timedelta64[ns]",
+        )
         tm.assert_series_equal(algos.mode(idx), exp)

From 786159fd56759428c939fbb7891ce4d6a25da8c0 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Fri, 11 Oct 2019 14:10:46 +0200
Subject: [PATCH 14/15] lint

---
 pandas/tests/test_algos.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index d6c29bbf8454f..1d76b99a7170d 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -371,6 +371,7 @@ def test_factorize_na_sentinel(self, sort, na_sentinel, data, uniques):
         else:
             tm.assert_extension_array_equal(uniques, expected_uniques)
 
+
 class TestUnique:
     def test_unique_all_dtypes(self, any_numpy_dtype):
         dtype = any_numpy_dtype

From dfee500d4ded093ee08fe609cac768faae563633 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Fri, 11 Oct 2019 16:18:34 +0200
Subject: [PATCH 15/15] fix oversight from merge

---
 pandas/core/algorithms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 8053b3e6c3219..8392f9c4cc77d 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -428,7 +428,7 @@ def unique(values, return_inverse=False):
     else:
         uniques = table.unique(values)
 
-    uniques = _reconstruct_data(uniques, dtype, original)
+    uniques = _reconstruct_data(uniques, original.dtype, original)
 
     if return_inverse:
         return uniques, inverse