Skip to content

Commit 0b671be

Browse files
authored
REF: unify casting logic in Categorical.__init__ (#40097)
1 parent 4060498 commit 0b671be

File tree

3 files changed

+25
-25
lines changed

3 files changed

+25
-25
lines changed

pandas/core/arrays/categorical.py

+19-18
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,6 @@
4848
from pandas.core.dtypes.cast import (
4949
coerce_indexer_dtype,
5050
maybe_cast_to_extension_array,
51-
maybe_infer_to_datetimelike,
52-
sanitize_to_nanoseconds,
5351
)
5452
from pandas.core.dtypes.common import (
5553
ensure_int64,
@@ -396,24 +394,27 @@ def __init__(
396394
if dtype.categories is None:
397395
dtype = CategoricalDtype(values.categories, dtype.ordered)
398396
elif not isinstance(values, (ABCIndex, ABCSeries, ExtensionArray)):
399-
# sanitize_array coerces np.nan to a string under certain versions
400-
# of numpy
401-
if not isinstance(values, (np.ndarray, list)):
402-
# convert e.g. range, tuple to allow for stronger typing
403-
# of maybe_infer_to_datetimelike
404-
values = list(values)
405-
values = maybe_infer_to_datetimelike(values)
406-
if isinstance(values, np.ndarray):
407-
values = sanitize_to_nanoseconds(values)
408-
elif not isinstance(values, ExtensionArray):
409-
values = com.convert_to_list_like(values)
410-
397+
values = com.convert_to_list_like(values)
398+
if isinstance(values, list) and len(values) == 0:
411399
# By convention, empty lists result in object dtype:
412-
sanitize_dtype = np.dtype("O") if len(values) == 0 else None
413-
null_mask = isna(values)
400+
values = np.array([], dtype=object)
401+
elif isinstance(values, np.ndarray):
402+
if values.ndim > 1:
403+
# preempt sanitize_array from raising ValueError
404+
raise NotImplementedError(
405+
"> 1 ndim Categorical are not supported at this time"
406+
)
407+
values = sanitize_array(values, None)
408+
else:
409+
# i.e. must be a list
410+
arr = sanitize_array(values, None)
411+
null_mask = isna(arr)
414412
if null_mask.any():
415-
values = [values[idx] for idx in np.where(~null_mask)[0]]
416-
values = sanitize_array(values, None, dtype=sanitize_dtype)
413+
# We remove null values here, then below will re-insert
414+
# them, grep "full_codes"
415+
arr = [values[idx] for idx in np.where(~null_mask)[0]]
416+
arr = sanitize_array(arr, None)
417+
values = arr
417418

418419
if dtype.categories is None:
419420
try:

pandas/tests/arrays/categorical/test_constructors.py

+6
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,12 @@ def test_categorical_scalar_deprecated(self):
4242
with tm.assert_produces_warning(FutureWarning):
4343
Categorical("A", categories=["A", "B"])
4444

45+
def test_categorical_1d_only(self):
46+
# ndim > 1
47+
msg = "> 1 ndim Categorical are not supported at this time"
48+
with pytest.raises(NotImplementedError, match=msg):
49+
Categorical(np.array([list("abcd")]))
50+
4551
def test_validate_ordered(self):
4652
# see gh-14058
4753
exp_msg = "'ordered' must either be 'True' or 'False'"

pandas/tests/frame/test_constructors.py

-7
Original file line numberDiff line numberDiff line change
@@ -2062,13 +2062,6 @@ def test_construct_from_listlikes_mismatched_lengths(self):
20622062
with pytest.raises(ValueError, match=msg):
20632063
DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))])
20642064

2065-
def test_categorical_1d_only(self):
2066-
# TODO: belongs in Categorical tests
2067-
# ndim > 1
2068-
msg = "> 1 ndim Categorical are not supported at this time"
2069-
with pytest.raises(NotImplementedError, match=msg):
2070-
Categorical(np.array([list("abcd")]))
2071-
20722065
def test_constructor_categorical_series(self):
20732066

20742067
items = [1, 2, 3, 1]

0 commit comments

Comments
 (0)