diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index bdc64d5f14a27..9db21800d2499 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -48,8 +48,6 @@ from pandas.core.dtypes.cast import ( coerce_indexer_dtype, maybe_cast_to_extension_array, - maybe_infer_to_datetimelike, - sanitize_to_nanoseconds, ) from pandas.core.dtypes.common import ( ensure_int64, @@ -396,24 +394,27 @@ def __init__( if dtype.categories is None: dtype = CategoricalDtype(values.categories, dtype.ordered) elif not isinstance(values, (ABCIndex, ABCSeries, ExtensionArray)): - # sanitize_array coerces np.nan to a string under certain versions - # of numpy - if not isinstance(values, (np.ndarray, list)): - # convert e.g. range, tuple to allow for stronger typing - # of maybe_infer_to_datetimelike - values = list(values) - values = maybe_infer_to_datetimelike(values) - if isinstance(values, np.ndarray): - values = sanitize_to_nanoseconds(values) - elif not isinstance(values, ExtensionArray): - values = com.convert_to_list_like(values) - + values = com.convert_to_list_like(values) + if isinstance(values, list) and len(values) == 0: # By convention, empty lists result in object dtype: - sanitize_dtype = np.dtype("O") if len(values) == 0 else None - null_mask = isna(values) + values = np.array([], dtype=object) + elif isinstance(values, np.ndarray): + if values.ndim > 1: + # preempt sanitize_array from raising ValueError + raise NotImplementedError( + "> 1 ndim Categorical are not supported at this time" + ) + values = sanitize_array(values, None) + else: + # i.e. must be a list + arr = sanitize_array(values, None) + null_mask = isna(arr) if null_mask.any(): - values = [values[idx] for idx in np.where(~null_mask)[0]] - values = sanitize_array(values, None, dtype=sanitize_dtype) + # We remove null values here, then below will re-insert + # them, grep "full_codes" + arr = [values[idx] for idx in np.where(~null_mask)[0]] + arr = sanitize_array(arr, None) + values = arr if dtype.categories is None: try: diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 7c144c390a128..93ba16c5fda22 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -42,6 +42,12 @@ def test_categorical_scalar_deprecated(self): with tm.assert_produces_warning(FutureWarning): Categorical("A", categories=["A", "B"]) + def test_categorical_1d_only(self): + # ndim > 1 + msg = "> 1 ndim Categorical are not supported at this time" + with pytest.raises(NotImplementedError, match=msg): + Categorical(np.array([list("abcd")])) + def test_validate_ordered(self): # see gh-14058 exp_msg = "'ordered' must either be 'True' or 'False'" diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index afc7ccb516c7f..280d1da4070d9 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2062,13 +2062,6 @@ def test_construct_from_listlikes_mismatched_lengths(self): with pytest.raises(ValueError, match=msg): DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))]) - def test_categorical_1d_only(self): - # TODO: belongs in Categorical tests - # ndim > 1 - msg = "> 1 ndim Categorical are not supported at this time" - with pytest.raises(NotImplementedError, match=msg): - Categorical(np.array([list("abcd")])) - def test_constructor_categorical_series(self): items = [1, 2, 3, 1]