1
1
# -*- coding: utf-8 -*-
2
2
3
3
import pytest
4
+ import sys
4
5
5
6
import numpy as np
6
7
7
8
import pandas .util .testing as tm
8
- from pandas import Categorical
9
+ from pandas import Categorical , Index , Series
10
+
11
+ from pandas .compat import PYPY
9
12
10
13
11
14
class TestCategoricalAnalytics (object ):
@@ -16,17 +19,20 @@ def test_min_max(self):
16
19
cat = Categorical (["a" , "b" , "c" , "d" ], ordered = False )
17
20
pytest .raises (TypeError , lambda : cat .min ())
18
21
pytest .raises (TypeError , lambda : cat .max ())
22
+
19
23
cat = Categorical (["a" , "b" , "c" , "d" ], ordered = True )
20
24
_min = cat .min ()
21
25
_max = cat .max ()
22
26
assert _min == "a"
23
27
assert _max == "d"
28
+
24
29
cat = Categorical (["a" , "b" , "c" , "d" ],
25
30
categories = ['d' , 'c' , 'b' , 'a' ], ordered = True )
26
31
_min = cat .min ()
27
32
_max = cat .max ()
28
33
assert _min == "d"
29
34
assert _max == "a"
35
+
30
36
cat = Categorical ([np .nan , "b" , "c" , np .nan ],
31
37
categories = ['d' , 'c' , 'b' , 'a' ], ordered = True )
32
38
_min = cat .min ()
@@ -51,36 +57,264 @@ def test_min_max(self):
51
57
_max = cat .max (numeric_only = True )
52
58
assert _max == 1
53
59
54
- def test_mode (self ):
55
- s = Categorical ([1 , 1 , 2 , 4 , 5 , 5 , 5 ], categories = [5 , 4 , 3 , 2 , 1 ],
56
- ordered = True )
60
+ @pytest .mark .parametrize ("values,categories,exp_mode" , [
61
+ ([1 , 1 , 2 , 4 , 5 , 5 , 5 ], [5 , 4 , 3 , 2 , 1 ], [5 ]),
62
+ ([1 , 1 , 1 , 4 , 5 , 5 , 5 ], [5 , 4 , 3 , 2 , 1 ], [5 , 1 ]),
63
+ ([1 , 2 , 3 , 4 , 5 ], [5 , 4 , 3 , 2 , 1 ], [5 , 4 , 3 , 2 , 1 ]),
64
+ ([np .nan , np .nan , np .nan , 4 , 5 ], [5 , 4 , 3 , 2 , 1 ], [5 , 4 ]),
65
+ ([np .nan , np .nan , np .nan , 4 , 5 , 4 ], [5 , 4 , 3 , 2 , 1 ], [4 ]),
66
+ ([np .nan , np .nan , 4 , 5 , 4 ], [5 , 4 , 3 , 2 , 1 ], [4 ])])
67
+ def test_mode (self , values , categories , exp_mode ):
68
+ s = Categorical (values , categories = categories , ordered = True )
57
69
res = s .mode ()
58
- exp = Categorical ([ 5 ] , categories = [ 5 , 4 , 3 , 2 , 1 ] , ordered = True )
70
+ exp = Categorical (exp_mode , categories = categories , ordered = True )
59
71
tm .assert_categorical_equal (res , exp )
60
- s = Categorical ([1 , 1 , 1 , 4 , 5 , 5 , 5 ], categories = [5 , 4 , 3 , 2 , 1 ],
61
- ordered = True )
62
- res = s .mode ()
63
- exp = Categorical ([5 , 1 ], categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
64
- tm .assert_categorical_equal (res , exp )
65
- s = Categorical ([1 , 2 , 3 , 4 , 5 ], categories = [5 , 4 , 3 , 2 , 1 ],
66
- ordered = True )
67
- res = s .mode ()
68
- exp = Categorical ([5 , 4 , 3 , 2 , 1 ],
69
- categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
70
- tm .assert_categorical_equal (res , exp )
71
- # NaN should not become the mode!
72
- s = Categorical ([np .nan , np .nan , np .nan , 4 , 5 ],
73
- categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
74
- res = s .mode ()
75
- exp = Categorical ([5 , 4 ], categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
76
- tm .assert_categorical_equal (res , exp )
77
- s = Categorical ([np .nan , np .nan , np .nan , 4 , 5 , 4 ],
78
- categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
79
- res = s .mode ()
80
- exp = Categorical ([4 ], categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
81
- tm .assert_categorical_equal (res , exp )
82
- s = Categorical ([np .nan , np .nan , 4 , 5 , 4 ], categories = [5 , 4 , 3 , 2 , 1 ],
83
- ordered = True )
84
- res = s .mode ()
85
- exp = Categorical ([4 ], categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
72
+
73
+ def test_searchsorted (self ):
74
+ # https://github.com/pandas-dev/pandas/issues/8420
75
+ # https://github.com/pandas-dev/pandas/issues/14522
76
+
77
+ c1 = Categorical (['cheese' , 'milk' , 'apple' , 'bread' , 'bread' ],
78
+ categories = ['cheese' , 'milk' , 'apple' , 'bread' ],
79
+ ordered = True )
80
+ s1 = Series (c1 )
81
+ c2 = Categorical (['cheese' , 'milk' , 'apple' , 'bread' , 'bread' ],
82
+ categories = ['cheese' , 'milk' , 'apple' , 'bread' ],
83
+ ordered = False )
84
+ s2 = Series (c2 )
85
+
86
+ # Searching for single item argument, side='left' (default)
87
+ res_cat = c1 .searchsorted ('apple' )
88
+ res_ser = s1 .searchsorted ('apple' )
89
+ exp = np .array ([2 ], dtype = np .intp )
90
+ tm .assert_numpy_array_equal (res_cat , exp )
91
+ tm .assert_numpy_array_equal (res_ser , exp )
92
+
93
+ # Searching for single item array, side='left' (default)
94
+ res_cat = c1 .searchsorted (['bread' ])
95
+ res_ser = s1 .searchsorted (['bread' ])
96
+ exp = np .array ([3 ], dtype = np .intp )
97
+ tm .assert_numpy_array_equal (res_cat , exp )
98
+ tm .assert_numpy_array_equal (res_ser , exp )
99
+
100
+ # Searching for several items array, side='right'
101
+ res_cat = c1 .searchsorted (['apple' , 'bread' ], side = 'right' )
102
+ res_ser = s1 .searchsorted (['apple' , 'bread' ], side = 'right' )
103
+ exp = np .array ([3 , 5 ], dtype = np .intp )
104
+ tm .assert_numpy_array_equal (res_cat , exp )
105
+ tm .assert_numpy_array_equal (res_ser , exp )
106
+
107
+ # Searching for a single value that is not from the Categorical
108
+ pytest .raises (ValueError , lambda : c1 .searchsorted ('cucumber' ))
109
+ pytest .raises (ValueError , lambda : s1 .searchsorted ('cucumber' ))
110
+
111
+ # Searching for multiple values one of each is not from the Categorical
112
+ pytest .raises (ValueError ,
113
+ lambda : c1 .searchsorted (['bread' , 'cucumber' ]))
114
+ pytest .raises (ValueError ,
115
+ lambda : s1 .searchsorted (['bread' , 'cucumber' ]))
116
+
117
+ # searchsorted call for unordered Categorical
118
+ pytest .raises (ValueError , lambda : c2 .searchsorted ('apple' ))
119
+ pytest .raises (ValueError , lambda : s2 .searchsorted ('apple' ))
120
+
121
+ with tm .assert_produces_warning (FutureWarning ):
122
+ res = c1 .searchsorted (v = ['bread' ])
123
+ exp = np .array ([3 ], dtype = np .intp )
124
+ tm .assert_numpy_array_equal (res , exp )
125
+
126
+ def test_unique (self ):
127
+ # categories are reordered based on value when ordered=False
128
+ cat = Categorical (["a" , "b" ])
129
+ exp = Index (["a" , "b" ])
130
+ res = cat .unique ()
131
+ tm .assert_index_equal (res .categories , exp )
132
+ tm .assert_categorical_equal (res , cat )
133
+
134
+ cat = Categorical (["a" , "b" , "a" , "a" ], categories = ["a" , "b" , "c" ])
135
+ res = cat .unique ()
136
+ tm .assert_index_equal (res .categories , exp )
137
+ tm .assert_categorical_equal (res , Categorical (exp ))
138
+
139
+ cat = Categorical (["c" , "a" , "b" , "a" , "a" ],
140
+ categories = ["a" , "b" , "c" ])
141
+ exp = Index (["c" , "a" , "b" ])
142
+ res = cat .unique ()
143
+ tm .assert_index_equal (res .categories , exp )
144
+ exp_cat = Categorical (exp , categories = ['c' , 'a' , 'b' ])
145
+ tm .assert_categorical_equal (res , exp_cat )
146
+
147
+ # nan must be removed
148
+ cat = Categorical (["b" , np .nan , "b" , np .nan , "a" ],
149
+ categories = ["a" , "b" , "c" ])
150
+ res = cat .unique ()
151
+ exp = Index (["b" , "a" ])
152
+ tm .assert_index_equal (res .categories , exp )
153
+ exp_cat = Categorical (["b" , np .nan , "a" ], categories = ["b" , "a" ])
154
+ tm .assert_categorical_equal (res , exp_cat )
155
+
156
+ def test_unique_ordered (self ):
157
+ # keep categories order when ordered=True
158
+ cat = Categorical (['b' , 'a' , 'b' ], categories = ['a' , 'b' ], ordered = True )
159
+ res = cat .unique ()
160
+ exp_cat = Categorical (['b' , 'a' ], categories = ['a' , 'b' ], ordered = True )
161
+ tm .assert_categorical_equal (res , exp_cat )
162
+
163
+ cat = Categorical (['c' , 'b' , 'a' , 'a' ], categories = ['a' , 'b' , 'c' ],
164
+ ordered = True )
165
+ res = cat .unique ()
166
+ exp_cat = Categorical (['c' , 'b' , 'a' ], categories = ['a' , 'b' , 'c' ],
167
+ ordered = True )
168
+ tm .assert_categorical_equal (res , exp_cat )
169
+
170
+ cat = Categorical (['b' , 'a' , 'a' ], categories = ['a' , 'b' , 'c' ],
171
+ ordered = True )
172
+ res = cat .unique ()
173
+ exp_cat = Categorical (['b' , 'a' ], categories = ['a' , 'b' ], ordered = True )
174
+ tm .assert_categorical_equal (res , exp_cat )
175
+
176
+ cat = Categorical (['b' , 'b' , np .nan , 'a' ], categories = ['a' , 'b' , 'c' ],
177
+ ordered = True )
178
+ res = cat .unique ()
179
+ exp_cat = Categorical (['b' , np .nan , 'a' ], categories = ['a' , 'b' ],
180
+ ordered = True )
181
+ tm .assert_categorical_equal (res , exp_cat )
182
+
183
+ def test_unique_index_series (self ):
184
+ c = Categorical ([3 , 1 , 2 , 2 , 1 ], categories = [3 , 2 , 1 ])
185
+ # Categorical.unique sorts categories by appearance order
186
+ # if ordered=False
187
+ exp = Categorical ([3 , 1 , 2 ], categories = [3 , 1 , 2 ])
188
+ tm .assert_categorical_equal (c .unique (), exp )
189
+
190
+ tm .assert_index_equal (Index (c ).unique (), Index (exp ))
191
+ tm .assert_categorical_equal (Series (c ).unique (), exp )
192
+
193
+ c = Categorical ([1 , 1 , 2 , 2 ], categories = [3 , 2 , 1 ])
194
+ exp = Categorical ([1 , 2 ], categories = [1 , 2 ])
195
+ tm .assert_categorical_equal (c .unique (), exp )
196
+ tm .assert_index_equal (Index (c ).unique (), Index (exp ))
197
+ tm .assert_categorical_equal (Series (c ).unique (), exp )
198
+
199
+ c = Categorical ([3 , 1 , 2 , 2 , 1 ], categories = [3 , 2 , 1 ], ordered = True )
200
+ # Categorical.unique keeps categories order if ordered=True
201
+ exp = Categorical ([3 , 1 , 2 ], categories = [3 , 2 , 1 ], ordered = True )
202
+ tm .assert_categorical_equal (c .unique (), exp )
203
+
204
+ tm .assert_index_equal (Index (c ).unique (), Index (exp ))
205
+ tm .assert_categorical_equal (Series (c ).unique (), exp )
206
+
207
+ def test_shift (self ):
208
+ # GH 9416
209
+ cat = Categorical (['a' , 'b' , 'c' , 'd' , 'a' ])
210
+
211
+ # shift forward
212
+ sp1 = cat .shift (1 )
213
+ xp1 = Categorical ([np .nan , 'a' , 'b' , 'c' , 'd' ])
214
+ tm .assert_categorical_equal (sp1 , xp1 )
215
+ tm .assert_categorical_equal (cat [:- 1 ], sp1 [1 :])
216
+
217
+ # shift back
218
+ sn2 = cat .shift (- 2 )
219
+ xp2 = Categorical (['c' , 'd' , 'a' , np .nan , np .nan ],
220
+ categories = ['a' , 'b' , 'c' , 'd' ])
221
+ tm .assert_categorical_equal (sn2 , xp2 )
222
+ tm .assert_categorical_equal (cat [2 :], sn2 [:- 2 ])
223
+
224
+ # shift by zero
225
+ tm .assert_categorical_equal (cat , cat .shift (0 ))
226
+
227
+ def test_nbytes (self ):
228
+ cat = Categorical ([1 , 2 , 3 ])
229
+ exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories
230
+ assert cat .nbytes == exp
231
+
232
+ def test_memory_usage (self ):
233
+ cat = Categorical ([1 , 2 , 3 ])
234
+
235
+ # .categories is an index, so we include the hashtable
236
+ assert 0 < cat .nbytes <= cat .memory_usage ()
237
+ assert 0 < cat .nbytes <= cat .memory_usage (deep = True )
238
+
239
+ cat = Categorical (['foo' , 'foo' , 'bar' ])
240
+ assert cat .memory_usage (deep = True ) > cat .nbytes
241
+
242
+ if not PYPY :
243
+ # sys.getsizeof will call the .memory_usage with
244
+ # deep=True, and add on some GC overhead
245
+ diff = cat .memory_usage (deep = True ) - sys .getsizeof (cat )
246
+ assert abs (diff ) < 100
247
+
248
+ def test_map (self ):
249
+ c = Categorical (list ('ABABC' ), categories = list ('CBA' ), ordered = True )
250
+ result = c .map (lambda x : x .lower ())
251
+ exp = Categorical (list ('ababc' ), categories = list ('cba' ), ordered = True )
252
+ tm .assert_categorical_equal (result , exp )
253
+
254
+ c = Categorical (list ('ABABC' ), categories = list ('ABC' ), ordered = False )
255
+ result = c .map (lambda x : x .lower ())
256
+ exp = Categorical (list ('ababc' ), categories = list ('abc' ), ordered = False )
257
+ tm .assert_categorical_equal (result , exp )
258
+
259
+ result = c .map (lambda x : 1 )
260
+ # GH 12766: Return an index not an array
261
+ tm .assert_index_equal (result , Index (np .array ([1 ] * 5 , dtype = np .int64 )))
262
+
263
+ def test_validate_inplace (self ):
264
+ cat = Categorical (['A' , 'B' , 'B' , 'C' , 'A' ])
265
+ invalid_values = [1 , "True" , [1 , 2 , 3 ], 5.0 ]
266
+
267
+ for value in invalid_values :
268
+ with pytest .raises (ValueError ):
269
+ cat .set_ordered (value = True , inplace = value )
270
+
271
+ with pytest .raises (ValueError ):
272
+ cat .as_ordered (inplace = value )
273
+
274
+ with pytest .raises (ValueError ):
275
+ cat .as_unordered (inplace = value )
276
+
277
+ with pytest .raises (ValueError ):
278
+ cat .set_categories (['X' , 'Y' , 'Z' ], rename = True , inplace = value )
279
+
280
+ with pytest .raises (ValueError ):
281
+ cat .rename_categories (['X' , 'Y' , 'Z' ], inplace = value )
282
+
283
+ with pytest .raises (ValueError ):
284
+ cat .reorder_categories (
285
+ ['X' , 'Y' , 'Z' ], ordered = True , inplace = value )
286
+
287
+ with pytest .raises (ValueError ):
288
+ cat .add_categories (
289
+ new_categories = ['D' , 'E' , 'F' ], inplace = value )
290
+
291
+ with pytest .raises (ValueError ):
292
+ cat .remove_categories (removals = ['D' , 'E' , 'F' ], inplace = value )
293
+
294
+ with pytest .raises (ValueError ):
295
+ cat .remove_unused_categories (inplace = value )
296
+
297
+ with pytest .raises (ValueError ):
298
+ cat .sort_values (inplace = value )
299
+
300
+ def test_repeat (self ):
301
+ # GH10183
302
+ cat = Categorical (["a" , "b" ], categories = ["a" , "b" ])
303
+ exp = Categorical (["a" , "a" , "b" , "b" ], categories = ["a" , "b" ])
304
+ res = cat .repeat (2 )
86
305
tm .assert_categorical_equal (res , exp )
306
+
307
+ def test_numpy_repeat (self ):
308
+ cat = Categorical (["a" , "b" ], categories = ["a" , "b" ])
309
+ exp = Categorical (["a" , "a" , "b" , "b" ], categories = ["a" , "b" ])
310
+ tm .assert_categorical_equal (np .repeat (cat , 2 ), exp )
311
+
312
+ msg = "the 'axis' parameter is not supported"
313
+ tm .assert_raises_regex (ValueError , msg , np .repeat , cat , 2 , axis = 1 )
314
+
315
+ def test_isna (self ):
316
+ exp = np .array ([False , False , True ])
317
+ c = Categorical (["a" , "b" , np .nan ])
318
+ res = c .isna ()
319
+
320
+ tm .assert_numpy_array_equal (res , exp )
0 commit comments