Skip to content

Commit 5632219

Browse files
Backport PR #39615: BUG: fix Categorical.astype for dtype=np.int32 argument' (#39676)
Co-authored-by: Andrew Wieteska <[email protected]>
1 parent 3ef1923 commit 5632219

File tree

5 files changed

+43
-5
lines changed

5 files changed

+43
-5
lines changed

doc/source/whatsnew/v1.2.2.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ Fixed regressions
2121
- Fixed regression in :meth:`~DataFrame.to_pickle` failing to create bz2/xz compressed pickle files with ``protocol=5`` (:issue:`39002`)
2222
- Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`)
2323
- Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`)
24+
- Fixed regression in :meth:`Categorical.astype` casting to incorrect dtype when ``np.int32`` is passed to dtype argument (:issue:`39402`)
2425
- Fixed regression in :meth:`~DataFrame.to_excel` creating corrupt files when appending (``mode="a"``) to an existing file (:issue:`39576`)
2526
- Fixed regression in :meth:`DataFrame.transform` failing in case of an empty DataFrame or Series (:issue:`39636`)
2627
- Fixed regression in :meth:`core.window.rolling.Rolling.count` where the ``min_periods`` argument would be set to ``0`` after the operation (:issue:`39554`)

pandas/conftest.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1188,6 +1188,32 @@ def any_nullable_int_dtype(request):
11881188
return request.param
11891189

11901190

1191+
@pytest.fixture(params=tm.ALL_INT_DTYPES + tm.ALL_EA_INT_DTYPES)
1192+
def any_int_or_nullable_int_dtype(request):
1193+
"""
1194+
Parameterized fixture for any nullable integer dtype.
1195+
1196+
* int
1197+
* 'int8'
1198+
* 'uint8'
1199+
* 'int16'
1200+
* 'uint16'
1201+
* 'int32'
1202+
* 'uint32'
1203+
* 'int64'
1204+
* 'uint64'
1205+
* 'UInt8'
1206+
* 'Int8'
1207+
* 'UInt16'
1208+
* 'Int16'
1209+
* 'UInt32'
1210+
* 'Int32'
1211+
* 'UInt64'
1212+
* 'Int64'
1213+
"""
1214+
return request.param
1215+
1216+
11911217
@pytest.fixture(params=tm.ALL_EA_INT_DTYPES + tm.FLOAT_EA_DTYPES)
11921218
def any_numeric_dtype(request):
11931219
"""

pandas/core/arrays/categorical.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -427,16 +427,16 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
427427
else:
428428
# GH8628 (PERF): astype category codes instead of astyping array
429429
try:
430-
astyped_cats = self.categories.astype(dtype=dtype, copy=copy)
430+
new_cats = np.asarray(self.categories)
431+
new_cats = new_cats.astype(dtype=dtype, copy=copy)
431432
except (
432433
TypeError, # downstream error msg for CategoricalIndex is misleading
433434
ValueError,
434435
):
435436
msg = f"Cannot cast {self.categories.dtype} dtype to {dtype}"
436437
raise ValueError(msg)
437438

438-
astyped_cats = extract_array(astyped_cats, extract_numpy=True)
439-
result = take_1d(astyped_cats, libalgos.ensure_platform_int(self._codes))
439+
result = take_1d(new_cats, libalgos.ensure_platform_int(self._codes))
440440

441441
return result
442442

pandas/tests/arrays/categorical/test_dtypes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def test_astype(self, ordered):
138138
tm.assert_numpy_array_equal(result, expected)
139139

140140
result = cat.astype(int)
141-
expected = np.array(cat, dtype="int64")
141+
expected = np.array(cat, dtype="int")
142142
tm.assert_numpy_array_equal(result, expected)
143143

144144
result = cat.astype(float)

pandas/tests/series/test_dtypes.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def test_astype_categorical_to_other(self):
6868
exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"])
6969
tm.assert_series_equal(cat.astype("str"), exp)
7070
s2 = Series(Categorical(["1", "2", "3", "4"]))
71-
exp2 = Series([1, 2, 3, 4]).astype("int64")
71+
exp2 = Series([1, 2, 3, 4]).astype("int")
7272
tm.assert_series_equal(s2.astype("int"), exp2)
7373

7474
# object don't sort correctly, so just compare that we have the same
@@ -109,6 +109,17 @@ def test_astype_categorical_invalid_conversions(self):
109109
with pytest.raises(TypeError, match=msg):
110110
ser.astype("object").astype(Categorical)
111111

112+
def test_categorical_astype_to_int(self, any_int_or_nullable_int_dtype):
113+
# GH 39402
114+
115+
df = DataFrame(data={"col1": pd.array([2.0, 1.0, 3.0])})
116+
df.col1 = df.col1.astype("category")
117+
df.col1 = df.col1.astype(any_int_or_nullable_int_dtype)
118+
expected = DataFrame(
119+
{"col1": pd.array([2, 1, 3], dtype=any_int_or_nullable_int_dtype)}
120+
)
121+
tm.assert_frame_equal(df, expected)
122+
112123
def test_series_to_categorical(self):
113124
# see gh-16524: test conversion of Series to Categorical
114125
series = Series(["a", "b", "c"])

0 commit comments

Comments
 (0)