6
6
import numpy as np
7
7
8
8
import pandas .util .testing as tm
9
- from pandas import Categorical , Index , Series
9
+ from pandas import Categorical , CategoricalIndex , Index , Series , DataFrame
10
10
11
11
from pandas .compat import PYPY
12
12
from pandas .core .categorical import _recode_for_categories
13
+ from pandas .tests .categorical .common import TestCategorical
13
14
14
15
15
16
class TestCategoricalAPI (object ):
@@ -511,47 +512,6 @@ def f():
511
512
exp = np .array ([0 , 1 , 2 , 0 , 2 ], dtype = 'int8' )
512
513
tm .assert_numpy_array_equal (c .codes , exp )
513
514
514
- def test_min_max (self ):
515
-
516
- # unordered cats have no min/max
517
- cat = Categorical (["a" , "b" , "c" , "d" ], ordered = False )
518
- pytest .raises (TypeError , lambda : cat .min ())
519
- pytest .raises (TypeError , lambda : cat .max ())
520
- cat = Categorical (["a" , "b" , "c" , "d" ], ordered = True )
521
- _min = cat .min ()
522
- _max = cat .max ()
523
- assert _min == "a"
524
- assert _max == "d"
525
- cat = Categorical (["a" , "b" , "c" , "d" ],
526
- categories = ['d' , 'c' , 'b' , 'a' ], ordered = True )
527
- _min = cat .min ()
528
- _max = cat .max ()
529
- assert _min == "d"
530
- assert _max == "a"
531
- cat = Categorical ([np .nan , "b" , "c" , np .nan ],
532
- categories = ['d' , 'c' , 'b' , 'a' ], ordered = True )
533
- _min = cat .min ()
534
- _max = cat .max ()
535
- assert np .isnan (_min )
536
- assert _max == "b"
537
-
538
- _min = cat .min (numeric_only = True )
539
- assert _min == "c"
540
- _max = cat .max (numeric_only = True )
541
- assert _max == "b"
542
-
543
- cat = Categorical ([np .nan , 1 , 2 , np .nan ], categories = [5 , 4 , 3 , 2 , 1 ],
544
- ordered = True )
545
- _min = cat .min ()
546
- _max = cat .max ()
547
- assert np .isnan (_min )
548
- assert _max == 1
549
-
550
- _min = cat .min (numeric_only = True )
551
- assert _min == 2
552
- _max = cat .max (numeric_only = True )
553
- assert _max == 1
554
-
555
515
def test_unique (self ):
556
516
# categories are reordered based on value when ordered=False
557
517
cat = Categorical (["a" , "b" ])
@@ -633,40 +593,6 @@ def test_unique_index_series(self):
633
593
tm .assert_index_equal (Index (c ).unique (), Index (exp ))
634
594
tm .assert_categorical_equal (Series (c ).unique (), exp )
635
595
636
- def test_mode (self ):
637
- s = Categorical ([1 , 1 , 2 , 4 , 5 , 5 , 5 ], categories = [5 , 4 , 3 , 2 , 1 ],
638
- ordered = True )
639
- res = s .mode ()
640
- exp = Categorical ([5 ], categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
641
- tm .assert_categorical_equal (res , exp )
642
- s = Categorical ([1 , 1 , 1 , 4 , 5 , 5 , 5 ], categories = [5 , 4 , 3 , 2 , 1 ],
643
- ordered = True )
644
- res = s .mode ()
645
- exp = Categorical ([5 , 1 ], categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
646
- tm .assert_categorical_equal (res , exp )
647
- s = Categorical ([1 , 2 , 3 , 4 , 5 ], categories = [5 , 4 , 3 , 2 , 1 ],
648
- ordered = True )
649
- res = s .mode ()
650
- exp = Categorical ([5 , 4 , 3 , 2 , 1 ],
651
- categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
652
- tm .assert_categorical_equal (res , exp )
653
- # NaN should not become the mode!
654
- s = Categorical ([np .nan , np .nan , np .nan , 4 , 5 ],
655
- categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
656
- res = s .mode ()
657
- exp = Categorical ([5 , 4 ], categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
658
- tm .assert_categorical_equal (res , exp )
659
- s = Categorical ([np .nan , np .nan , np .nan , 4 , 5 , 4 ],
660
- categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
661
- res = s .mode ()
662
- exp = Categorical ([4 ], categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
663
- tm .assert_categorical_equal (res , exp )
664
- s = Categorical ([np .nan , np .nan , 4 , 5 , 4 ], categories = [5 , 4 , 3 , 2 , 1 ],
665
- ordered = True )
666
- res = s .mode ()
667
- exp = Categorical ([4 ], categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
668
- tm .assert_categorical_equal (res , exp )
669
-
670
596
def test_shift (self ):
671
597
# GH 9416
672
598
cat = Categorical (['a' , 'b' , 'c' , 'd' , 'a' ])
@@ -774,13 +700,6 @@ def test_validate_inplace(self):
774
700
with pytest .raises (ValueError ):
775
701
cat .sort_values (inplace = value )
776
702
777
- @pytest .mark .xfail (reason = "Imaginary values not supported in Categorical" )
778
- def test_imaginary (self ):
779
- values = [1 , 2 , 3 + 1j ]
780
- c1 = Categorical (values )
781
- tm .assert_index_equal (c1 .categories , Index (values ))
782
- tm .assert_numpy_array_equal (np .array (c1 ), np .array (values ))
783
-
784
703
def test_repeat (self ):
785
704
# GH10183
786
705
cat = Categorical (["a" , "b" ], categories = ["a" , "b" ])
@@ -804,17 +723,61 @@ def test_astype_categorical(self):
804
723
805
724
pytest .raises (ValueError , lambda : cat .astype (float ))
806
725
807
- def test_cat_tab_completition (self ):
808
- # test the tab completion display
809
- ok_for_cat = [ 'categories' , 'codes' , 'ordered' , 'set_categories' ,
810
- 'add_categories' , 'remove_categories' ,
811
- 'rename_categories' , 'reorder_categories' ,
812
- 'remove_unused_categories' , 'as_ordered' , 'as_unordered' ]
726
+ def test_isna (self ):
727
+ exp = np . array ([ False , False , True ])
728
+ c = Categorical ([ "a" , "b" , np . nan ])
729
+ res = c . isna ()
730
+
731
+ tm . assert_numpy_array_equal ( res , exp )
813
732
814
- def get_dir (s ):
815
- results = [r for r in s .cat .__dir__ () if not r .startswith ('_' )]
816
- return list (sorted (set (results )))
817
733
818
- s = Series (list ('aabbcde' )).astype ('category' )
819
- results = get_dir (s )
820
- tm .assert_almost_equal (results , list (sorted (set (ok_for_cat ))))
734
+ class TestCategoricalAPIWithFactor (TestCategorical ):
735
+
736
+ def test_describe (self ):
737
+ # string type
738
+ desc = self .factor .describe ()
739
+ assert self .factor .ordered
740
+ exp_index = CategoricalIndex (['a' , 'b' , 'c' ], name = 'categories' ,
741
+ ordered = self .factor .ordered )
742
+ expected = DataFrame ({'counts' : [3 , 2 , 3 ],
743
+ 'freqs' : [3 / 8. , 2 / 8. , 3 / 8. ]},
744
+ index = exp_index )
745
+ tm .assert_frame_equal (desc , expected )
746
+
747
+ # check unused categories
748
+ cat = self .factor .copy ()
749
+ cat .set_categories (["a" , "b" , "c" , "d" ], inplace = True )
750
+ desc = cat .describe ()
751
+
752
+ exp_index = CategoricalIndex (
753
+ list ('abcd' ), ordered = self .factor .ordered , name = 'categories' )
754
+ expected = DataFrame ({'counts' : [3 , 2 , 3 , 0 ],
755
+ 'freqs' : [3 / 8. , 2 / 8. , 3 / 8. , 0 ]},
756
+ index = exp_index )
757
+ tm .assert_frame_equal (desc , expected )
758
+
759
+ # check an integer one
760
+ cat = Categorical ([1 , 2 , 3 , 1 , 2 , 3 , 3 , 2 , 1 , 1 , 1 ])
761
+ desc = cat .describe ()
762
+ exp_index = CategoricalIndex ([1 , 2 , 3 ], ordered = cat .ordered ,
763
+ name = 'categories' )
764
+ expected = DataFrame ({'counts' : [5 , 3 , 3 ],
765
+ 'freqs' : [5 / 11. , 3 / 11. , 3 / 11. ]},
766
+ index = exp_index )
767
+ tm .assert_frame_equal (desc , expected )
768
+
769
+ # https://github.com/pandas-dev/pandas/issues/3678
770
+ # describe should work with NaN
771
+ cat = Categorical ([np .nan , 1 , 2 , 2 ])
772
+ desc = cat .describe ()
773
+ expected = DataFrame ({'counts' : [1 , 2 , 1 ],
774
+ 'freqs' : [1 / 4. , 2 / 4. , 1 / 4. ]},
775
+ index = CategoricalIndex ([1 , 2 , np .nan ],
776
+ categories = [1 , 2 ],
777
+ name = 'categories' ))
778
+ tm .assert_frame_equal (desc , expected )
779
+
780
+ def test_set_categories_inplace (self ):
781
+ cat = self .factor .copy ()
782
+ cat .set_categories (['a' , 'b' , 'c' , 'd' ], inplace = True )
783
+ tm .assert_index_equal (cat .categories , Index (['a' , 'b' , 'c' , 'd' ]))
0 commit comments