Skip to content

String dtype: remove fallback Perfomance warnings for string methods #59760

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 0 additions & 19 deletions pandas/core/arrays/arrow/_arrow_utils.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,8 @@
from __future__ import annotations

import warnings

import numpy as np
import pyarrow

from pandas._config.config import get_option

from pandas.errors import PerformanceWarning
from pandas.util._exceptions import find_stack_level


def fallback_performancewarning(version: str | None = None) -> None:
"""
Raise a PerformanceWarning for falling back to ExtensionArray's
non-pyarrow method
"""
if get_option("performance_warnings"):
msg = "Falling back on a non-pyarrow code path which may decrease performance."
if version is not None:
msg += f" Upgrade to pyarrow >={version} to possibly suppress this warning."
warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level())


def pyarrow_array_to_numpy_and_mask(
arr, dtype: np.dtype
Expand Down
8 changes: 0 additions & 8 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@

import numpy as np

from pandas._config.config import get_option

from pandas._libs import (
lib,
missing as libmissing,
Expand Down Expand Up @@ -43,8 +41,6 @@
import pyarrow as pa
import pyarrow.compute as pc

from pandas.core.arrays.arrow._arrow_utils import fallback_performancewarning


if TYPE_CHECKING:
from collections.abc import (
Expand Down Expand Up @@ -299,8 +295,6 @@ def _str_contains(
self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
):
if flags:
if get_option("mode.performance_warnings"):
fallback_performancewarning()
return super()._str_contains(pat, case, flags, na, regex)

if not isna(na):
Expand All @@ -326,8 +320,6 @@ def _str_replace(
regex: bool = True,
):
if isinstance(pat, re.Pattern) or callable(repl) or not case or flags:
if get_option("mode.performance_warnings"):
fallback_performancewarning()
return super()._str_replace(pat, repl, n, case, flags, regex)

return ArrowExtensionArray._str_replace(self, pat, repl, n, case, flags, regex)
Expand Down
1 change: 0 additions & 1 deletion pandas/tests/extension/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,6 @@ def test_compare_scalar(self, data, comparison_op):
ser = pd.Series(data)
self._compare_other(ser, data, comparison_op, "abc")

@pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning")
def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)

Expand Down
12 changes: 0 additions & 12 deletions pandas/tests/indexes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,6 @@ def test_intersection_base(self, index):
with pytest.raises(TypeError, match=msg):
first.intersection([1, 2, 3])

@pytest.mark.filterwarnings(
"ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
)
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
def test_union_base(self, index):
index = index.unique()
Expand Down Expand Up @@ -276,9 +273,6 @@ def test_union_base(self, index):
first.union([1, 2, 3])

@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
@pytest.mark.filterwarnings(
"ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
)
def test_difference_base(self, sort, index):
first = index[2:]
second = index[:4]
Expand All @@ -305,9 +299,6 @@ def test_difference_base(self, sort, index):
first.difference([1, 2, 3], sort)

@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
@pytest.mark.filterwarnings(
"ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
)
def test_symmetric_difference(self, index):
if isinstance(index, CategoricalIndex):
pytest.skip(f"Not relevant for {type(index).__name__}")
Expand Down Expand Up @@ -529,9 +520,6 @@ def test_intersection_difference_match_empty(self, index, sort):


@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
@pytest.mark.filterwarnings(
"ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
)
@pytest.mark.parametrize(
"method", ["intersection", "union", "difference", "symmetric_difference"]
)
Expand Down
103 changes: 27 additions & 76 deletions pandas/tests/strings/test_find_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@
# --------------------------------------------------------------------------------------


def using_pyarrow(dtype):
return dtype == "string" and dtype.storage == "pyarrow"


def test_contains(any_string_dtype):
values = np.array(
["foo", np.nan, "fooommm__foo", "mmm_", "foommm[_]+bar"], dtype=np.object_
Expand Down Expand Up @@ -458,13 +454,10 @@ def test_replace_mixed_object():
tm.assert_series_equal(result, expected)


def test_replace_unicode(any_string_dtype, performance_warning):
def test_replace_unicode(any_string_dtype):
ser = Series([b"abcd,\xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
expected = Series([b"abcd, \xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
with tm.maybe_produces_warning(
performance_warning, using_pyarrow(any_string_dtype)
):
result = ser.str.replace(r"(?<=\w),(?=\w)", ", ", flags=re.UNICODE, regex=True)
result = ser.str.replace(r"(?<=\w),(?=\w)", ", ", flags=re.UNICODE, regex=True)
tm.assert_series_equal(result, expected)


Expand All @@ -478,24 +471,21 @@ def test_replace_wrong_repl_type_raises(any_string_dtype, index_or_series, repl,
obj.str.replace("a", repl)


def test_replace_callable(any_string_dtype, performance_warning):
def test_replace_callable(any_string_dtype):
# GH 15055
ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)

# test with callable
repl = lambda m: m.group(0).swapcase()
with tm.maybe_produces_warning(
performance_warning, using_pyarrow(any_string_dtype)
):
result = ser.str.replace("[a-z][A-Z]{2}", repl, n=2, regex=True)
result = ser.str.replace("[a-z][A-Z]{2}", repl, n=2, regex=True)
expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype)
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize(
"repl", [lambda: None, lambda m, x: None, lambda m, x, y=None: None]
)
def test_replace_callable_raises(any_string_dtype, performance_warning, repl):
def test_replace_callable_raises(any_string_dtype, repl):
# GH 15055
values = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)

Expand All @@ -504,43 +494,31 @@ def test_replace_callable_raises(any_string_dtype, performance_warning, repl):
r"((takes)|(missing)) (?(2)from \d+ to )?\d+ "
r"(?(3)required )positional arguments?"
)
if not using_pyarrow(any_string_dtype):
performance_warning = False
with pytest.raises(TypeError, match=msg):
with tm.assert_produces_warning(performance_warning):
values.str.replace("a", repl, regex=True)
values.str.replace("a", repl, regex=True)


def test_replace_callable_named_groups(any_string_dtype, performance_warning):
def test_replace_callable_named_groups(any_string_dtype):
# test regex named groups
ser = Series(["Foo Bar Baz", np.nan], dtype=any_string_dtype)
pat = r"(?P<first>\w+) (?P<middle>\w+) (?P<last>\w+)"
repl = lambda m: m.group("middle").swapcase()
with tm.maybe_produces_warning(
performance_warning, using_pyarrow(any_string_dtype)
):
result = ser.str.replace(pat, repl, regex=True)
result = ser.str.replace(pat, repl, regex=True)
expected = Series(["bAR", np.nan], dtype=any_string_dtype)
tm.assert_series_equal(result, expected)


def test_replace_compiled_regex(any_string_dtype, performance_warning):
def test_replace_compiled_regex(any_string_dtype):
# GH 15446
ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)

# test with compiled regex
pat = re.compile(r"BAD_*")
with tm.maybe_produces_warning(
performance_warning, using_pyarrow(any_string_dtype)
):
result = ser.str.replace(pat, "", regex=True)
result = ser.str.replace(pat, "", regex=True)
expected = Series(["foobar", np.nan], dtype=any_string_dtype)
tm.assert_series_equal(result, expected)

with tm.maybe_produces_warning(
performance_warning, using_pyarrow(any_string_dtype)
):
result = ser.str.replace(pat, "", n=1, regex=True)
result = ser.str.replace(pat, "", n=1, regex=True)
expected = Series(["foobarBAD", np.nan], dtype=any_string_dtype)
tm.assert_series_equal(result, expected)

Expand All @@ -557,14 +535,11 @@ def test_replace_compiled_regex_mixed_object():
tm.assert_series_equal(result, expected)


def test_replace_compiled_regex_unicode(any_string_dtype, performance_warning):
def test_replace_compiled_regex_unicode(any_string_dtype):
ser = Series([b"abcd,\xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
expected = Series([b"abcd, \xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
pat = re.compile(r"(?<=\w),(?=\w)", flags=re.UNICODE)
with tm.maybe_produces_warning(
performance_warning, using_pyarrow(any_string_dtype)
):
result = ser.str.replace(pat, ", ", regex=True)
result = ser.str.replace(pat, ", ", regex=True)
tm.assert_series_equal(result, expected)


Expand All @@ -586,15 +561,12 @@ def test_replace_compiled_regex_raises(any_string_dtype):
ser.str.replace(pat, "", case=True, regex=True)


def test_replace_compiled_regex_callable(any_string_dtype, performance_warning):
def test_replace_compiled_regex_callable(any_string_dtype):
# test with callable
ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
repl = lambda m: m.group(0).swapcase()
pat = re.compile("[a-z][A-Z]{2}")
with tm.maybe_produces_warning(
performance_warning, using_pyarrow(any_string_dtype)
):
result = ser.str.replace(pat, repl, n=2, regex=True)
result = ser.str.replace(pat, repl, n=2, regex=True)
expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype)
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -626,7 +598,7 @@ def test_replace_literal_compiled_raises(any_string_dtype):
ser.str.replace(pat, "", regex=False)


def test_replace_moar(any_string_dtype, performance_warning):
def test_replace_moar(any_string_dtype):
# PR #1179
ser = Series(
["A", "B", "C", "Aaba", "Baca", "", np.nan, "CABA", "dog", "cat"],
Expand All @@ -640,10 +612,7 @@ def test_replace_moar(any_string_dtype, performance_warning):
)
tm.assert_series_equal(result, expected)

with tm.maybe_produces_warning(
performance_warning, using_pyarrow(any_string_dtype)
):
result = ser.str.replace("A", "YYY", case=False)
result = ser.str.replace("A", "YYY", case=False)
expected = Series(
[
"YYY",
Expand All @@ -661,10 +630,7 @@ def test_replace_moar(any_string_dtype, performance_warning):
)
tm.assert_series_equal(result, expected)

with tm.maybe_produces_warning(
performance_warning, using_pyarrow(any_string_dtype)
):
result = ser.str.replace("^.a|dog", "XX-XX ", case=False, regex=True)
result = ser.str.replace("^.a|dog", "XX-XX ", case=False, regex=True)
expected = Series(
[
"A",
Expand All @@ -683,21 +649,15 @@ def test_replace_moar(any_string_dtype, performance_warning):
tm.assert_series_equal(result, expected)


def test_replace_not_case_sensitive_not_regex(any_string_dtype, performance_warning):
def test_replace_not_case_sensitive_not_regex(any_string_dtype):
# https://github.com/pandas-dev/pandas/issues/41602
ser = Series(["A.", "a.", "Ab", "ab", np.nan], dtype=any_string_dtype)

with tm.maybe_produces_warning(
performance_warning, using_pyarrow(any_string_dtype)
):
result = ser.str.replace("a", "c", case=False, regex=False)
result = ser.str.replace("a", "c", case=False, regex=False)
expected = Series(["c.", "c.", "cb", "cb", np.nan], dtype=any_string_dtype)
tm.assert_series_equal(result, expected)

with tm.maybe_produces_warning(
performance_warning, using_pyarrow(any_string_dtype)
):
result = ser.str.replace("a.", "c.", case=False, regex=False)
result = ser.str.replace("a.", "c.", case=False, regex=False)
expected = Series(["c.", "c.", "Ab", "ab", np.nan], dtype=any_string_dtype)
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -853,7 +813,7 @@ def test_fullmatch_na_kwarg(any_string_dtype):
tm.assert_series_equal(result, expected)


def test_fullmatch_case_kwarg(any_string_dtype, performance_warning):
def test_fullmatch_case_kwarg(any_string_dtype):
ser = Series(["ab", "AB", "abc", "ABC"], dtype=any_string_dtype)
expected_dtype = (
np.bool_ if is_object_or_nan_string_dtype(any_string_dtype) else "boolean"
Expand All @@ -869,10 +829,7 @@ def test_fullmatch_case_kwarg(any_string_dtype, performance_warning):
result = ser.str.fullmatch("ab", case=False)
tm.assert_series_equal(result, expected)

with tm.maybe_produces_warning(
performance_warning, using_pyarrow(any_string_dtype)
):
result = ser.str.fullmatch("ab", flags=re.IGNORECASE)
result = ser.str.fullmatch("ab", flags=re.IGNORECASE)
tm.assert_series_equal(result, expected)


Expand Down Expand Up @@ -1046,7 +1003,7 @@ def test_translate_mixed_object():
# --------------------------------------------------------------------------------------


def test_flags_kwarg(any_string_dtype, performance_warning):
def test_flags_kwarg(any_string_dtype):
data = {
"Dave": "[email protected]",
"Steve": "[email protected]",
Expand All @@ -1057,17 +1014,13 @@ def test_flags_kwarg(any_string_dtype, performance_warning):

pat = r"([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\.([A-Z]{2,4})"

use_pyarrow = using_pyarrow(any_string_dtype)

result = data.str.extract(pat, flags=re.IGNORECASE, expand=True)
assert result.iloc[0].tolist() == ["dave", "google", "com"]

with tm.maybe_produces_warning(performance_warning, use_pyarrow):
result = data.str.match(pat, flags=re.IGNORECASE)
result = data.str.match(pat, flags=re.IGNORECASE)
assert result.iloc[0]

with tm.maybe_produces_warning(performance_warning, use_pyarrow):
result = data.str.fullmatch(pat, flags=re.IGNORECASE)
result = data.str.fullmatch(pat, flags=re.IGNORECASE)
assert result.iloc[0]

result = data.str.findall(pat, flags=re.IGNORECASE)
Expand All @@ -1077,8 +1030,6 @@ def test_flags_kwarg(any_string_dtype, performance_warning):
assert result.iloc[0] == 1

msg = "has match groups"
with tm.assert_produces_warning(
UserWarning, match=msg, raise_on_extra_warnings=not use_pyarrow
):
with tm.assert_produces_warning(UserWarning, match=msg):
result = data.str.contains(pat, flags=re.IGNORECASE)
assert result.iloc[0]
1 change: 0 additions & 1 deletion pandas/tests/strings/test_string_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
)


@pytest.mark.filterwarnings("ignore:Falling back")
def test_string_array(nullable_string_dtype, any_string_method):
method_name, args, kwargs = any_string_method

Expand Down
Loading