1
+ import re
2
+
1
3
import numpy as np
2
4
import pytest
3
5
@@ -254,41 +256,45 @@ def test_slicing_doc_examples(self):
254
256
)
255
257
tm .assert_frame_equal (result , expected )
256
258
257
- def test_loc_listlike (self ):
258
-
259
+ def test_loc_getitem_listlike_labels (self ):
259
260
# list of labels
260
261
result = self .df .loc [["c" , "a" ]]
261
262
expected = self .df .iloc [[4 , 0 , 1 , 5 ]]
262
263
tm .assert_frame_equal (result , expected , check_index_type = True )
263
264
264
- result = self .df2 .loc [["a" , "b" , "e" ]]
265
- exp_index = CategoricalIndex (list ("aaabbe" ), categories = list ("cabe" ), name = "B" )
266
- expected = DataFrame ({"A" : [0 , 1 , 5 , 2 , 3 , np .nan ]}, index = exp_index )
267
- tm .assert_frame_equal (result , expected , check_index_type = True )
265
+ def test_loc_getitem_listlike_unused_category (self ):
266
+ # GH#37901 a label that is in index.categories but not in index
267
+ # listlike containing an element in the categories but not in the values
268
+ msg = (
269
+ "The following labels were missing: CategoricalIndex(['e'], "
270
+ "categories=['c', 'a', 'b', 'e'], ordered=False, name='B', "
271
+ "dtype='category')"
272
+ )
273
+ with pytest .raises (KeyError , match = re .escape (msg )):
274
+ self .df2 .loc [["a" , "b" , "e" ]]
268
275
276
+ def test_loc_getitem_label_unused_category (self ):
269
277
# element in the categories but not in the values
270
278
with pytest .raises (KeyError , match = r"^'e'$" ):
271
279
self .df2 .loc ["e" ]
272
280
273
- # assign is ok
281
+ def test_loc_getitem_non_category (self ):
282
+ # not all labels in the categories
283
+ msg = (
284
+ "The following labels were missing: Index(['d'], dtype='object', name='B')"
285
+ )
286
+ with pytest .raises (KeyError , match = re .escape (msg )):
287
+ self .df2 .loc [["a" , "d" ]]
288
+
289
+ def test_loc_setitem_expansion_label_unused_category (self ):
290
+ # assigning with a label that is in the categories but not in the index
274
291
df = self .df2 .copy ()
275
292
df .loc ["e" ] = 20
276
293
result = df .loc [["a" , "b" , "e" ]]
277
294
exp_index = CategoricalIndex (list ("aaabbe" ), categories = list ("cabe" ), name = "B" )
278
295
expected = DataFrame ({"A" : [0 , 1 , 5 , 2 , 3 , 20 ]}, index = exp_index )
279
296
tm .assert_frame_equal (result , expected )
280
297
281
- df = self .df2 .copy ()
282
- result = df .loc [["a" , "b" , "e" ]]
283
- exp_index = CategoricalIndex (list ("aaabbe" ), categories = list ("cabe" ), name = "B" )
284
- expected = DataFrame ({"A" : [0 , 1 , 5 , 2 , 3 , np .nan ]}, index = exp_index )
285
- tm .assert_frame_equal (result , expected , check_index_type = True )
286
-
287
- # not all labels in the categories
288
- msg = "a list-indexer must only include values that are in the categories"
289
- with pytest .raises (KeyError , match = msg ):
290
- self .df2 .loc [["a" , "d" ]]
291
-
292
298
def test_loc_listlike_dtypes (self ):
293
299
# GH 11586
294
300
@@ -309,8 +315,8 @@ def test_loc_listlike_dtypes(self):
309
315
exp = DataFrame ({"A" : [1 , 1 , 2 ], "B" : [4 , 4 , 5 ]}, index = exp_index )
310
316
tm .assert_frame_equal (res , exp , check_index_type = True )
311
317
312
- msg = "a list-indexer must only include values that are in the categories "
313
- with pytest .raises (KeyError , match = msg ):
318
+ msg = "The following labels were missing: Index(['x'], dtype='object') "
319
+ with pytest .raises (KeyError , match = re . escape ( msg ) ):
314
320
df .loc [["a" , "x" ]]
315
321
316
322
# duplicated categories and codes
@@ -332,8 +338,7 @@ def test_loc_listlike_dtypes(self):
332
338
)
333
339
tm .assert_frame_equal (res , exp , check_index_type = True )
334
340
335
- msg = "a list-indexer must only include values that are in the categories"
336
- with pytest .raises (KeyError , match = msg ):
341
+ with pytest .raises (KeyError , match = re .escape (msg )):
337
342
df .loc [["a" , "x" ]]
338
343
339
344
# contains unused category
@@ -347,13 +352,6 @@ def test_loc_listlike_dtypes(self):
347
352
)
348
353
tm .assert_frame_equal (res , exp , check_index_type = True )
349
354
350
- res = df .loc [["a" , "e" ]]
351
- exp = DataFrame (
352
- {"A" : [1 , 3 , np .nan ], "B" : [5 , 7 , np .nan ]},
353
- index = CategoricalIndex (["a" , "a" , "e" ], categories = list ("abcde" )),
354
- )
355
- tm .assert_frame_equal (res , exp , check_index_type = True )
356
-
357
355
# duplicated slice
358
356
res = df .loc [["a" , "a" , "b" ]]
359
357
exp = DataFrame (
@@ -362,10 +360,27 @@ def test_loc_listlike_dtypes(self):
362
360
)
363
361
tm .assert_frame_equal (res , exp , check_index_type = True )
364
362
365
- msg = "a list-indexer must only include values that are in the categories"
366
- with pytest .raises (KeyError , match = msg ):
363
+ with pytest .raises (KeyError , match = re .escape (msg )):
367
364
df .loc [["a" , "x" ]]
368
365
366
+ def test_loc_getitem_listlike_unused_category_raises_keyerro (self ):
367
+ # key that is an *unused* category raises
368
+ index = CategoricalIndex (["a" , "b" , "a" , "c" ], categories = list ("abcde" ))
369
+ df = DataFrame ({"A" : [1 , 2 , 3 , 4 ], "B" : [5 , 6 , 7 , 8 ]}, index = index )
370
+
371
+ with pytest .raises (KeyError , match = "e" ):
372
+ # For comparison, check the scalar behavior
373
+ df .loc ["e" ]
374
+
375
+ msg = (
376
+ "Passing list-likes to .loc or [] with any missing labels is no "
377
+ "longer supported. The following labels were missing: "
378
+ "CategoricalIndex(['e'], categories=['a', 'b', 'c', 'd', 'e'], "
379
+ "ordered=False, dtype='category'). See https"
380
+ )
381
+ with pytest .raises (KeyError , match = re .escape (msg )):
382
+ df .loc [["a" , "e" ]]
383
+
369
384
def test_ix_categorical_index (self ):
370
385
# GH 12531
371
386
df = DataFrame (np .random .randn (3 , 3 ), index = list ("ABC" ), columns = list ("XYZ" ))
0 commit comments