Skip to content

Commit b8bfcff

Browse files
committed
Final refactor of categorical tests
1 parent 44be314 commit b8bfcff

12 files changed

+342
-330
lines changed
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# -*- coding: utf-8 -*-
2+
3+
import pytest
4+
5+
import numpy as np
6+
7+
import pandas.util.testing as tm
8+
from pandas import Categorical
9+
10+
11+
class TestCategoricalAnalytics(object):
12+
13+
def test_min_max(self):
14+
15+
# unordered cats have no min/max
16+
cat = Categorical(["a", "b", "c", "d"], ordered=False)
17+
pytest.raises(TypeError, lambda: cat.min())
18+
pytest.raises(TypeError, lambda: cat.max())
19+
cat = Categorical(["a", "b", "c", "d"], ordered=True)
20+
_min = cat.min()
21+
_max = cat.max()
22+
assert _min == "a"
23+
assert _max == "d"
24+
cat = Categorical(["a", "b", "c", "d"],
25+
categories=['d', 'c', 'b', 'a'], ordered=True)
26+
_min = cat.min()
27+
_max = cat.max()
28+
assert _min == "d"
29+
assert _max == "a"
30+
cat = Categorical([np.nan, "b", "c", np.nan],
31+
categories=['d', 'c', 'b', 'a'], ordered=True)
32+
_min = cat.min()
33+
_max = cat.max()
34+
assert np.isnan(_min)
35+
assert _max == "b"
36+
37+
_min = cat.min(numeric_only=True)
38+
assert _min == "c"
39+
_max = cat.max(numeric_only=True)
40+
assert _max == "b"
41+
42+
cat = Categorical([np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1],
43+
ordered=True)
44+
_min = cat.min()
45+
_max = cat.max()
46+
assert np.isnan(_min)
47+
assert _max == 1
48+
49+
_min = cat.min(numeric_only=True)
50+
assert _min == 2
51+
_max = cat.max(numeric_only=True)
52+
assert _max == 1
53+
54+
def test_mode(self):
55+
s = Categorical([1, 1, 2, 4, 5, 5, 5], categories=[5, 4, 3, 2, 1],
56+
ordered=True)
57+
res = s.mode()
58+
exp = Categorical([5], categories=[5, 4, 3, 2, 1], ordered=True)
59+
tm.assert_categorical_equal(res, exp)
60+
s = Categorical([1, 1, 1, 4, 5, 5, 5], categories=[5, 4, 3, 2, 1],
61+
ordered=True)
62+
res = s.mode()
63+
exp = Categorical([5, 1], categories=[5, 4, 3, 2, 1], ordered=True)
64+
tm.assert_categorical_equal(res, exp)
65+
s = Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1],
66+
ordered=True)
67+
res = s.mode()
68+
exp = Categorical([5, 4, 3, 2, 1],
69+
categories=[5, 4, 3, 2, 1], ordered=True)
70+
tm.assert_categorical_equal(res, exp)
71+
# NaN should not become the mode!
72+
s = Categorical([np.nan, np.nan, np.nan, 4, 5],
73+
categories=[5, 4, 3, 2, 1], ordered=True)
74+
res = s.mode()
75+
exp = Categorical([5, 4], categories=[5, 4, 3, 2, 1], ordered=True)
76+
tm.assert_categorical_equal(res, exp)
77+
s = Categorical([np.nan, np.nan, np.nan, 4, 5, 4],
78+
categories=[5, 4, 3, 2, 1], ordered=True)
79+
res = s.mode()
80+
exp = Categorical([4], categories=[5, 4, 3, 2, 1], ordered=True)
81+
tm.assert_categorical_equal(res, exp)
82+
s = Categorical([np.nan, np.nan, 4, 5, 4], categories=[5, 4, 3, 2, 1],
83+
ordered=True)
84+
res = s.mode()
85+
exp = Categorical([4], categories=[5, 4, 3, 2, 1], ordered=True)
86+
tm.assert_categorical_equal(res, exp)

pandas/tests/categorical/test_api.py

Lines changed: 58 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66
import numpy as np
77

88
import pandas.util.testing as tm
9-
from pandas import Categorical, Index, Series
9+
from pandas import Categorical, CategoricalIndex, Index, Series, DataFrame
1010

1111
from pandas.compat import PYPY
1212
from pandas.core.categorical import _recode_for_categories
13+
from pandas.tests.categorical.common import TestCategorical
1314

1415

1516
class TestCategoricalAPI(object):
@@ -511,47 +512,6 @@ def f():
511512
exp = np.array([0, 1, 2, 0, 2], dtype='int8')
512513
tm.assert_numpy_array_equal(c.codes, exp)
513514

514-
def test_min_max(self):
515-
516-
# unordered cats have no min/max
517-
cat = Categorical(["a", "b", "c", "d"], ordered=False)
518-
pytest.raises(TypeError, lambda: cat.min())
519-
pytest.raises(TypeError, lambda: cat.max())
520-
cat = Categorical(["a", "b", "c", "d"], ordered=True)
521-
_min = cat.min()
522-
_max = cat.max()
523-
assert _min == "a"
524-
assert _max == "d"
525-
cat = Categorical(["a", "b", "c", "d"],
526-
categories=['d', 'c', 'b', 'a'], ordered=True)
527-
_min = cat.min()
528-
_max = cat.max()
529-
assert _min == "d"
530-
assert _max == "a"
531-
cat = Categorical([np.nan, "b", "c", np.nan],
532-
categories=['d', 'c', 'b', 'a'], ordered=True)
533-
_min = cat.min()
534-
_max = cat.max()
535-
assert np.isnan(_min)
536-
assert _max == "b"
537-
538-
_min = cat.min(numeric_only=True)
539-
assert _min == "c"
540-
_max = cat.max(numeric_only=True)
541-
assert _max == "b"
542-
543-
cat = Categorical([np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1],
544-
ordered=True)
545-
_min = cat.min()
546-
_max = cat.max()
547-
assert np.isnan(_min)
548-
assert _max == 1
549-
550-
_min = cat.min(numeric_only=True)
551-
assert _min == 2
552-
_max = cat.max(numeric_only=True)
553-
assert _max == 1
554-
555515
def test_unique(self):
556516
# categories are reordered based on value when ordered=False
557517
cat = Categorical(["a", "b"])
@@ -633,40 +593,6 @@ def test_unique_index_series(self):
633593
tm.assert_index_equal(Index(c).unique(), Index(exp))
634594
tm.assert_categorical_equal(Series(c).unique(), exp)
635595

636-
def test_mode(self):
637-
s = Categorical([1, 1, 2, 4, 5, 5, 5], categories=[5, 4, 3, 2, 1],
638-
ordered=True)
639-
res = s.mode()
640-
exp = Categorical([5], categories=[5, 4, 3, 2, 1], ordered=True)
641-
tm.assert_categorical_equal(res, exp)
642-
s = Categorical([1, 1, 1, 4, 5, 5, 5], categories=[5, 4, 3, 2, 1],
643-
ordered=True)
644-
res = s.mode()
645-
exp = Categorical([5, 1], categories=[5, 4, 3, 2, 1], ordered=True)
646-
tm.assert_categorical_equal(res, exp)
647-
s = Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1],
648-
ordered=True)
649-
res = s.mode()
650-
exp = Categorical([5, 4, 3, 2, 1],
651-
categories=[5, 4, 3, 2, 1], ordered=True)
652-
tm.assert_categorical_equal(res, exp)
653-
# NaN should not become the mode!
654-
s = Categorical([np.nan, np.nan, np.nan, 4, 5],
655-
categories=[5, 4, 3, 2, 1], ordered=True)
656-
res = s.mode()
657-
exp = Categorical([5, 4], categories=[5, 4, 3, 2, 1], ordered=True)
658-
tm.assert_categorical_equal(res, exp)
659-
s = Categorical([np.nan, np.nan, np.nan, 4, 5, 4],
660-
categories=[5, 4, 3, 2, 1], ordered=True)
661-
res = s.mode()
662-
exp = Categorical([4], categories=[5, 4, 3, 2, 1], ordered=True)
663-
tm.assert_categorical_equal(res, exp)
664-
s = Categorical([np.nan, np.nan, 4, 5, 4], categories=[5, 4, 3, 2, 1],
665-
ordered=True)
666-
res = s.mode()
667-
exp = Categorical([4], categories=[5, 4, 3, 2, 1], ordered=True)
668-
tm.assert_categorical_equal(res, exp)
669-
670596
def test_shift(self):
671597
# GH 9416
672598
cat = Categorical(['a', 'b', 'c', 'd', 'a'])
@@ -774,13 +700,6 @@ def test_validate_inplace(self):
774700
with pytest.raises(ValueError):
775701
cat.sort_values(inplace=value)
776702

777-
@pytest.mark.xfail(reason="Imaginary values not supported in Categorical")
778-
def test_imaginary(self):
779-
values = [1, 2, 3 + 1j]
780-
c1 = Categorical(values)
781-
tm.assert_index_equal(c1.categories, Index(values))
782-
tm.assert_numpy_array_equal(np.array(c1), np.array(values))
783-
784703
def test_repeat(self):
785704
# GH10183
786705
cat = Categorical(["a", "b"], categories=["a", "b"])
@@ -804,17 +723,61 @@ def test_astype_categorical(self):
804723

805724
pytest.raises(ValueError, lambda: cat.astype(float))
806725

807-
def test_cat_tab_completition(self):
808-
# test the tab completion display
809-
ok_for_cat = ['categories', 'codes', 'ordered', 'set_categories',
810-
'add_categories', 'remove_categories',
811-
'rename_categories', 'reorder_categories',
812-
'remove_unused_categories', 'as_ordered', 'as_unordered']
726+
def test_isna(self):
727+
exp = np.array([False, False, True])
728+
c = Categorical(["a", "b", np.nan])
729+
res = c.isna()
730+
731+
tm.assert_numpy_array_equal(res, exp)
813732

814-
def get_dir(s):
815-
results = [r for r in s.cat.__dir__() if not r.startswith('_')]
816-
return list(sorted(set(results)))
817733

818-
s = Series(list('aabbcde')).astype('category')
819-
results = get_dir(s)
820-
tm.assert_almost_equal(results, list(sorted(set(ok_for_cat))))
734+
class TestCategoricalAPIWithFactor(TestCategorical):
735+
736+
def test_describe(self):
737+
# string type
738+
desc = self.factor.describe()
739+
assert self.factor.ordered
740+
exp_index = CategoricalIndex(['a', 'b', 'c'], name='categories',
741+
ordered=self.factor.ordered)
742+
expected = DataFrame({'counts': [3, 2, 3],
743+
'freqs': [3 / 8., 2 / 8., 3 / 8.]},
744+
index=exp_index)
745+
tm.assert_frame_equal(desc, expected)
746+
747+
# check unused categories
748+
cat = self.factor.copy()
749+
cat.set_categories(["a", "b", "c", "d"], inplace=True)
750+
desc = cat.describe()
751+
752+
exp_index = CategoricalIndex(
753+
list('abcd'), ordered=self.factor.ordered, name='categories')
754+
expected = DataFrame({'counts': [3, 2, 3, 0],
755+
'freqs': [3 / 8., 2 / 8., 3 / 8., 0]},
756+
index=exp_index)
757+
tm.assert_frame_equal(desc, expected)
758+
759+
# check an integer one
760+
cat = Categorical([1, 2, 3, 1, 2, 3, 3, 2, 1, 1, 1])
761+
desc = cat.describe()
762+
exp_index = CategoricalIndex([1, 2, 3], ordered=cat.ordered,
763+
name='categories')
764+
expected = DataFrame({'counts': [5, 3, 3],
765+
'freqs': [5 / 11., 3 / 11., 3 / 11.]},
766+
index=exp_index)
767+
tm.assert_frame_equal(desc, expected)
768+
769+
# https://github.com/pandas-dev/pandas/issues/3678
770+
# describe should work with NaN
771+
cat = Categorical([np.nan, 1, 2, 2])
772+
desc = cat.describe()
773+
expected = DataFrame({'counts': [1, 2, 1],
774+
'freqs': [1 / 4., 2 / 4., 1 / 4.]},
775+
index=CategoricalIndex([1, 2, np.nan],
776+
categories=[1, 2],
777+
name='categories'))
778+
tm.assert_frame_equal(desc, expected)
779+
780+
def test_set_categories_inplace(self):
781+
cat = self.factor.copy()
782+
cat.set_categories(['a', 'b', 'c', 'd'], inplace=True)
783+
tm.assert_index_equal(cat.categories, Index(['a', 'b', 'c', 'd']))

pandas/tests/categorical/test_constructors.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# -*- coding: utf-8 -*-
2+
23
import pytest
34
from datetime import datetime
45

@@ -507,3 +508,10 @@ def test_construction_with_ordered(self):
507508
assert not cat.ordered
508509
cat = Categorical([0, 1, 2], ordered=True)
509510
assert cat.ordered
511+
512+
@pytest.mark.xfail(reason="Imaginary values not supported in Categorical")
513+
def test_constructor_imaginary(self):
514+
values = [1, 2, 3 + 1j]
515+
c1 = Categorical(values)
516+
tm.assert_index_equal(c1.categories, Index(values))
517+
tm.assert_numpy_array_equal(np.array(c1), np.array(values))

0 commit comments

Comments
 (0)