Skip to content

Commit fd2e002

Browse files
authored
ENH: Categorical.fillna allow Categorical/ndarray (#32420)
1 parent 98a6b3c commit fd2e002

File tree

6 files changed

+46
-14
lines changed

6 files changed

+46
-14
lines changed

pandas/core/algorithms.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from pandas._libs import Timestamp, algos, hashtable as htable, lib
1313
from pandas._libs.tslib import iNaT
14+
from pandas._typing import AnyArrayLike
1415
from pandas.util._decorators import doc
1516

1617
from pandas.core.dtypes.cast import (
@@ -45,10 +46,14 @@
4546
is_unsigned_integer_dtype,
4647
needs_i8_conversion,
4748
)
48-
from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
49+
from pandas.core.dtypes.generic import (
50+
ABCExtensionArray,
51+
ABCIndex,
52+
ABCIndexClass,
53+
ABCSeries,
54+
)
4955
from pandas.core.dtypes.missing import isna, na_value_for_dtype
5056

51-
import pandas.core.common as com
5257
from pandas.core.construction import array, extract_array
5358
from pandas.core.indexers import validate_indices
5459

@@ -384,7 +389,7 @@ def unique(values):
384389
unique1d = unique
385390

386391

387-
def isin(comps, values) -> np.ndarray:
392+
def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
388393
"""
389394
Compute the isin boolean array.
390395
@@ -409,15 +414,14 @@ def isin(comps, values) -> np.ndarray:
409414
f"to isin(), you passed a [{type(values).__name__}]"
410415
)
411416

412-
if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)):
417+
if not isinstance(values, (ABCIndex, ABCSeries, ABCExtensionArray, np.ndarray)):
413418
values = construct_1d_object_array_from_listlike(list(values))
414419

420+
comps = extract_array(comps, extract_numpy=True)
415421
if is_categorical_dtype(comps):
416422
# TODO(extension)
417423
# handle categoricals
418-
return comps._values.isin(values)
419-
420-
comps = com.values_from_object(comps)
424+
return comps.isin(values) # type: ignore
421425

422426
comps, dtype = _ensure_data(comps)
423427
values, _ = _ensure_data(values, dtype=dtype)

pandas/core/arrays/categorical.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1733,12 +1733,17 @@ def fillna(self, value=None, method=None, limit=None):
17331733

17341734
# If value is a dict or a Series (a dict value has already
17351735
# been converted to a Series)
1736-
if isinstance(value, ABCSeries):
1737-
if not value[~value.isin(self.categories)].isna().all():
1736+
if isinstance(value, (np.ndarray, Categorical, ABCSeries)):
1737+
# We get ndarray or Categorical if called via Series.fillna,
1738+
# where it will unwrap another aligned Series before getting here
1739+
1740+
mask = ~algorithms.isin(value, self.categories)
1741+
if not isna(value[mask]).all():
17381742
raise ValueError("fill value must be in categories")
17391743

17401744
values_codes = _get_codes_for_values(value, self.categories)
17411745
indexer = np.where(codes == -1)
1746+
codes = codes.copy()
17421747
codes[indexer] = values_codes[indexer]
17431748

17441749
# If value is not a dict or Series it should be a scalar

pandas/core/generic.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6089,6 +6089,8 @@ def fillna(
60896089
value = create_series_with_explicit_dtype(
60906090
value, dtype_if_empty=object
60916091
)
6092+
value = value.reindex(self.index, copy=False)
6093+
value = value._values
60926094
elif not is_list_like(value):
60936095
pass
60946096
else:

pandas/core/internals/managers.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,7 @@ def apply(self: T, f, filter=None, **kwargs) -> T:
391391
BlockManager
392392
"""
393393
result_blocks = []
394+
# fillna: Series/DataFrame is responsible for making sure value is aligned
394395

395396
# filter kwarg is used in replace-* family of methods
396397
if filter is not None:
@@ -415,11 +416,6 @@ def apply(self: T, f, filter=None, **kwargs) -> T:
415416
align_keys = ["new", "mask"]
416417
else:
417418
align_keys = ["mask"]
418-
elif f == "fillna":
419-
# fillna internally does putmask, maybe it's better to do this
420-
# at mgr, not block level?
421-
align_copy = False
422-
align_keys = ["value"]
423419
else:
424420
align_keys = []
425421

pandas/tests/arrays/categorical/test_missing.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,3 +82,18 @@ def test_fillna_iterable_category(self, named):
8282
expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)])
8383

8484
tm.assert_categorical_equal(result, expected)
85+
86+
def test_fillna_array(self):
87+
# accept Categorical or ndarray value if it holds appropriate values
88+
cat = Categorical(["A", "B", "C", None, None])
89+
90+
other = cat.fillna("C")
91+
result = cat.fillna(other)
92+
tm.assert_categorical_equal(result, other)
93+
assert isna(cat[-1]) # didnt modify original inplace
94+
95+
other = np.array(["A", "B", "C", "B", "A"])
96+
result = cat.fillna(other)
97+
expected = Categorical(["A", "B", "C", "B", "A"], dtype=cat.dtype)
98+
tm.assert_categorical_equal(result, expected)
99+
assert isna(cat[-1]) # didnt modify original inplace

pandas/tests/test_algos.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,16 @@ def test_categorical_from_codes(self):
760760
result = algos.isin(Sd, St)
761761
tm.assert_numpy_array_equal(expected, result)
762762

763+
def test_categorical_isin(self):
764+
vals = np.array([0, 1, 2, 0])
765+
cats = ["a", "b", "c"]
766+
cat = Categorical(1).from_codes(vals, cats)
767+
other = Categorical(1).from_codes(np.array([0, 1]), cats)
768+
769+
expected = np.array([True, True, False, True])
770+
result = algos.isin(cat, other)
771+
tm.assert_numpy_array_equal(expected, result)
772+
763773
def test_same_nan_is_in(self):
764774
# GH 22160
765775
# nan is special, because from " a is b" doesn't follow "a == b"

0 commit comments

Comments
 (0)