correct ncategories size + add tests

topper-123 · topper-123 · commit 9e4738f5a9c2 · 2018-08-17T09:24:09.000+01:00
diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py
@@ -2,7 +2,8 @@
 
 import numpy as np
 import pandas.util.testing as tm
-from pandas import (Series, DataFrame, MultiIndex, Int64Index, Float64Index,
+from pandas import (Series, DataFrame, MultiIndex,
+                    Int64Index, UInt64Index, Float64Index,
                     IntervalIndex, CategoricalIndex,
                     IndexSlice, concat, date_range)
 from .pandas_vb_common import setup, Panel  # noqa
@@ -11,7 +12,7 @@
 class NumericSeriesIndexing(object):
 
     goal_time = 0.2
-    params = [Int64Index, Float64Index]
+    params = [Int64Index, UInt64Index, Float64Index]
     param = ['index']
 
     def setup(self, index):
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
@@ -14,7 +14,7 @@
 from pandas import Categorical, IntervalIndex, compat
 from pandas.util.testing import assert_almost_equal
 import pandas.core.config as cf
-from pandas._libs import index as li
+from pandas._libs import index as libindex
 import pandas as pd
 
 if PY3:
@@ -1121,18 +1121,18 @@ def test_engine_type(self, nbits):
         """Check that a CategoricalIndex has the correct engine type.
         """
         if nbits < 64:
-            ncategories = int(2 ** (nbits / 2) / 2)  # 128 for nbits==16 etc
+            ncategories = int(2 ** (nbits / 2) / 2 + 1)  # 129 if nbits==16 etc
             index = CategoricalIndex(range(ncategories))
         else:
             index = CategoricalIndex(['a', 'b', 'c'])
-            # having actual 2 ** (64 / 2) / 2 categories is too
+            # having actual 2 ** (64 / 2) / 2 + 1 categories is too
             # memory-intensive, so we set codes.dtype manually
             index._values._codes = index._values._codes.astype('int64')
 
         dtype = {8: np.int8, 16: np.int16,
                  32: np.int32, 64: np.int64}[nbits]
-        engine = {8: li.Int8Engine, 16: li.Int16Engine,
-                  32: li.Int32Engine, 64: li.Int64Engine}[nbits]
+        engine = {8: libindex.Int8Engine, 16: libindex.Int16Engine,
+                  32: libindex.Int32Engine, 64: libindex.Int64Engine}[nbits]
 
         assert isinstance(index._engine, engine)
         assert issubclass(index.codes.dtype.type, dtype)
diff --git a/pandas/tests/indexes/test_engine.py b/pandas/tests/indexes/test_engine.py
@@ -6,6 +6,8 @@
 import pandas as pd
 from pandas._libs.index import (Int64Engine, UInt64Engine,
                                 Float64Engine, ObjectEngine)
+from pandas._libs.lib import is_scalar
+import pandas.util.testing as tm
 
 
 class TestNumericEngine(object):
@@ -52,12 +54,39 @@ def test_is_unique(self, values, expected, num_engine):
         e = num_engine(lambda: codes, len(codes))
         assert e.is_unique is expected
 
+    @pytest.mark.parametrize('values, value, expected', [
+        ([1, 2, 3], 2, 1),
+        ([1, 2, 2, 3], 2, slice(1, 3)),
+        ([3, 2, 2, 1], 2, np.array([False,  True,  True, False])),
+        ([1, 2, 2, 1], 2, np.array([False,  True,  True, False])),
+        ([1, 3, 2], 2, 2),
+    ])
+    def test_get_loc(self, values, value, expected, num_engine):
+        codes = np.array(values, dtype=num_engine._dtype)
+        e = num_engine(lambda: codes, len(codes))
+        result = e.get_loc(value)
+
+        if isinstance(expected, np.ndarray):
+            assert (result == expected).all()
+        else:
+            assert result == expected
+
+    @pytest.mark.parametrize('values, value, error', [
+        ([1, 2, 3], 4, KeyError),
+        ([1, 2, 3], '4', KeyError),
+    ])
+    def test_get_loc_raises(self, values, value, error, num_engine):
+        codes = np.array(values, dtype=num_engine._dtype)
+        e = num_engine(lambda: codes, len(codes))
+        with pytest.raises(error):
+            e.get_loc(value)
+
 
 class TestObjectEngine(object):
 
     def setup_class(cls):
-        cls.Engine = ObjectEngine
         cls.dtype = object
+        cls.Engine = ObjectEngine
 
     @pytest.mark.parametrize('data', [['a', 'b', 'c']])
     def test_engine_type(self, data):
@@ -92,3 +121,30 @@ def test_is_unique(self, values, expected):
         codes = np.array(values, dtype=self.dtype)
         e = self.Engine(lambda: codes, len(codes))
         assert e.is_unique is expected
+
+    @pytest.mark.parametrize('values, value, expected', [
+        (list('abc'), 'b', 1),
+        (list('abbc'), 'b', slice(1, 3)),
+        (list('cbba'), 'b', np.array([False,  True,  True, False])),
+        (list('abba'), 'b', np.array([False,  True,  True, False])),
+        (list('acb'), 'b', 2),
+    ])
+    def test_get_loc(self, values, value, expected):
+        codes = np.array(values, dtype=self.dtype)
+        e = self.Engine(lambda: codes, len(codes))
+        result = e.get_loc(value)
+
+        if isinstance(expected, np.ndarray):
+            assert (result == expected).all()
+        else:
+            assert result == expected
+
+    @pytest.mark.parametrize('values, value, error', [
+        (list('abc'), 'd', KeyError),
+        (list('abc'), 4, KeyError),
+    ])
+    def test_get_loc_raises(self, values, value, error):
+        codes = np.array(values, dtype=self.dtype)
+        e = self.Engine(lambda: codes, len(codes))
+        with pytest.raises(error):
+            e.get_loc(value)