From 3875dc00bf1ba472590d0bd85d923f81a0a90b7b Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 12 Sep 2020 13:03:52 +0100 Subject: [PATCH 1/2] PERF: converting arrays with many strings to StringArray --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/_libs/lib.pyx | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index e65daa439a225..d8a7a86a2600c 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -205,6 +205,7 @@ Deprecations Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ +- Performance improvements when converting :class:`Series` with object dtype and many string elements to dtype :class:`StringDtype` (:issue:`xxxxx`) - Performance improvement in :meth:`GroupBy.agg` with the ``numba`` engine (:issue:`35759`) - diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index eadfcefaac73d..af94178af7d7d 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -655,6 +655,10 @@ cpdef ndarray[object] ensure_string_array( for i in range(n): val = result[i] + + if isinstance(val, str): + continue + if not checknull(val): result[i] = str(val) else: From 3c34edc8144c96d821077076bbcdb5d5c8aa4e8b Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 12 Sep 2020 13:56:32 +0100 Subject: [PATCH 2/2] update whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index d8a7a86a2600c..9b7a96e367af3 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -205,7 +205,7 @@ Deprecations Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ -- Performance improvements when converting :class:`Series` with object dtype and many string elements to dtype :class:`StringDtype` (:issue:`xxxxx`) +- Performance improvements when creating Series with dtype `str` or :class:`StringDtype` from array with many string elements (:issue:`36304`) - Performance improvement in :meth:`GroupBy.agg` with the ``numba`` engine (:issue:`35759`) -