Skip to content

Commit a43c42c

Browse files
[ArrowStringArray] CLN: remove hasattr checks (#41327)
1 parent b8f8955 commit a43c42c

File tree

3 files changed

+78
-55
lines changed

3 files changed

+78
-55
lines changed

pandas/compat/__init__.py

+10
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@
2020
np_version_under1p19,
2121
np_version_under1p20,
2222
)
23+
from pandas.compat.pyarrow import (
24+
pa_version_under1p0,
25+
pa_version_under2p0,
26+
pa_version_under3p0,
27+
pa_version_under4p0,
28+
)
2329

2430
PY38 = sys.version_info >= (3, 8)
2531
PY39 = sys.version_info >= (3, 9)
@@ -136,4 +142,8 @@ def get_lzma_file(lzma):
136142
"np_version_under1p18",
137143
"np_version_under1p19",
138144
"np_version_under1p20",
145+
"pa_version_under1p0",
146+
"pa_version_under2p0",
147+
"pa_version_under3p0",
148+
"pa_version_under4p0",
139149
]

pandas/compat/pyarrow.py

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
""" support pyarrow compatibility across versions """
2+
3+
from distutils.version import LooseVersion
4+
5+
try:
6+
import pyarrow as pa
7+
8+
_pa_version = pa.__version__
9+
_palv = LooseVersion(_pa_version)
10+
pa_version_under1p0 = _palv < LooseVersion("1.0.0")
11+
pa_version_under2p0 = _palv < LooseVersion("2.0.0")
12+
pa_version_under3p0 = _palv < LooseVersion("3.0.0")
13+
pa_version_under4p0 = _palv < LooseVersion("4.0.0")
14+
except ImportError:
15+
pa_version_under1p0 = True
16+
pa_version_under2p0 = True
17+
pa_version_under3p0 = True
18+
pa_version_under4p0 = True

pandas/core/arrays/string_arrow.py

+50-55
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@
2222
Scalar,
2323
type_t,
2424
)
25+
from pandas.compat import (
26+
pa_version_under2p0,
27+
pa_version_under3p0,
28+
pa_version_under4p0,
29+
)
2530
from pandas.util._decorators import doc
2631
from pandas.util._validators import validate_fillna_kwargs
2732

@@ -667,9 +672,7 @@ def take(
667672
return type(self)(self._data.take(indices_array))
668673

669674
def isin(self, values):
670-
671-
# pyarrow.compute.is_in added in pyarrow 2.0.0
672-
if not hasattr(pc, "is_in"):
675+
if pa_version_under2p0:
673676
return super().isin(values)
674677

675678
value_set = [
@@ -684,7 +687,7 @@ def isin(self, values):
684687
return np.zeros(len(self), dtype=bool)
685688

686689
kwargs = {}
687-
if LooseVersion(pa.__version__) < "3.0.0":
690+
if pa_version_under3p0:
688691
# in pyarrow 2.0.0 skip_null is ignored but is a required keyword and raises
689692
# with unexpected keyword argument in pyarrow 3.0.0+
690693
kwargs["skip_null"] = True
@@ -802,11 +805,10 @@ def _str_contains(self, pat, case=True, flags=0, na=np.nan, regex: bool = True):
802805
return super()._str_contains(pat, case, flags, na, regex)
803806

804807
if regex:
805-
# match_substring_regex added in pyarrow 4.0.0
806-
if hasattr(pc, "match_substring_regex") and case:
807-
result = pc.match_substring_regex(self._data, pat)
808-
else:
808+
if pa_version_under4p0 or case is False:
809809
return super()._str_contains(pat, case, flags, na, regex)
810+
else:
811+
result = pc.match_substring_regex(self._data, pat)
810812
else:
811813
if case:
812814
result = pc.match_substring(self._data, pat)
@@ -818,27 +820,25 @@ def _str_contains(self, pat, case=True, flags=0, na=np.nan, regex: bool = True):
818820
return result
819821

820822
def _str_startswith(self, pat, na=None):
821-
# match_substring_regex added in pyarrow 4.0.0
822-
if hasattr(pc, "match_substring_regex"):
823-
result = pc.match_substring_regex(self._data, "^" + re.escape(pat))
824-
result = BooleanDtype().__from_arrow__(result)
825-
if not isna(na):
826-
result[isna(result)] = bool(na)
827-
return result
828-
else:
823+
if pa_version_under4p0:
829824
return super()._str_startswith(pat, na)
830825

826+
result = pc.match_substring_regex(self._data, "^" + re.escape(pat))
827+
result = BooleanDtype().__from_arrow__(result)
828+
if not isna(na):
829+
result[isna(result)] = bool(na)
830+
return result
831+
831832
def _str_endswith(self, pat, na=None):
832-
# match_substring_regex added in pyarrow 4.0.0
833-
if hasattr(pc, "match_substring_regex"):
834-
result = pc.match_substring_regex(self._data, re.escape(pat) + "$")
835-
result = BooleanDtype().__from_arrow__(result)
836-
if not isna(na):
837-
result[isna(result)] = bool(na)
838-
return result
839-
else:
833+
if pa_version_under4p0:
840834
return super()._str_endswith(pat, na)
841835

836+
result = pc.match_substring_regex(self._data, re.escape(pat) + "$")
837+
result = BooleanDtype().__from_arrow__(result)
838+
if not isna(na):
839+
result[isna(result)] = bool(na)
840+
return result
841+
842842
def _str_match(
843843
self, pat: str, case: bool = True, flags: int = 0, na: Scalar = None
844844
):
@@ -871,13 +871,12 @@ def _str_isnumeric(self):
871871
return BooleanDtype().__from_arrow__(result)
872872

873873
def _str_isspace(self):
874-
# utf8_is_space added in pyarrow 2.0.0
875-
if hasattr(pc, "utf8_is_space"):
876-
result = pc.utf8_is_space(self._data)
877-
return BooleanDtype().__from_arrow__(result)
878-
else:
874+
if pa_version_under2p0:
879875
return super()._str_isspace()
880876

877+
result = pc.utf8_is_space(self._data)
878+
return BooleanDtype().__from_arrow__(result)
879+
881880
def _str_istitle(self):
882881
result = pc.utf8_is_title(self._data)
883882
return BooleanDtype().__from_arrow__(result)
@@ -887,48 +886,44 @@ def _str_isupper(self):
887886
return BooleanDtype().__from_arrow__(result)
888887

889888
def _str_len(self):
890-
# utf8_length added in pyarrow 4.0.0
891-
if hasattr(pc, "utf8_length"):
892-
result = pc.utf8_length(self._data)
893-
return Int64Dtype().__from_arrow__(result)
894-
else:
889+
if pa_version_under4p0:
895890
return super()._str_len()
896891

892+
result = pc.utf8_length(self._data)
893+
return Int64Dtype().__from_arrow__(result)
894+
897895
def _str_lower(self):
898896
return type(self)(pc.utf8_lower(self._data))
899897

900898
def _str_upper(self):
901899
return type(self)(pc.utf8_upper(self._data))
902900

903901
def _str_strip(self, to_strip=None):
902+
if pa_version_under4p0:
903+
return super()._str_strip(to_strip)
904+
904905
if to_strip is None:
905-
# utf8_trim_whitespace added in pyarrow 4.0.0
906-
if hasattr(pc, "utf8_trim_whitespace"):
907-
return type(self)(pc.utf8_trim_whitespace(self._data))
906+
result = pc.utf8_trim_whitespace(self._data)
908907
else:
909-
# utf8_trim added in pyarrow 4.0.0
910-
if hasattr(pc, "utf8_trim"):
911-
return type(self)(pc.utf8_trim(self._data, characters=to_strip))
912-
return super()._str_strip(to_strip)
908+
result = pc.utf8_trim(self._data, characters=to_strip)
909+
return type(self)(result)
913910

914911
def _str_lstrip(self, to_strip=None):
912+
if pa_version_under4p0:
913+
return super()._str_lstrip(to_strip)
914+
915915
if to_strip is None:
916-
# utf8_ltrim_whitespace added in pyarrow 4.0.0
917-
if hasattr(pc, "utf8_ltrim_whitespace"):
918-
return type(self)(pc.utf8_ltrim_whitespace(self._data))
916+
result = pc.utf8_ltrim_whitespace(self._data)
919917
else:
920-
# utf8_ltrim added in pyarrow 4.0.0
921-
if hasattr(pc, "utf8_ltrim"):
922-
return type(self)(pc.utf8_ltrim(self._data, characters=to_strip))
923-
return super()._str_lstrip(to_strip)
918+
result = pc.utf8_ltrim(self._data, characters=to_strip)
919+
return type(self)(result)
924920

925921
def _str_rstrip(self, to_strip=None):
922+
if pa_version_under4p0:
923+
return super()._str_rstrip(to_strip)
924+
926925
if to_strip is None:
927-
# utf8_rtrim_whitespace added in pyarrow 4.0.0
928-
if hasattr(pc, "utf8_rtrim_whitespace"):
929-
return type(self)(pc.utf8_rtrim_whitespace(self._data))
926+
result = pc.utf8_rtrim_whitespace(self._data)
930927
else:
931-
# utf8_rtrim added in pyarrow 4.0.0
932-
if hasattr(pc, "utf8_rtrim"):
933-
return type(self)(pc.utf8_rtrim(self._data, characters=to_strip))
934-
return super()._str_rstrip(to_strip)
928+
result = pc.utf8_rtrim(self._data, characters=to_strip)
929+
return type(self)(result)

0 commit comments

Comments
 (0)