22
22
Scalar ,
23
23
type_t ,
24
24
)
25
+ from pandas .compat import (
26
+ pa_version_under2p0 ,
27
+ pa_version_under3p0 ,
28
+ pa_version_under4p0 ,
29
+ )
25
30
from pandas .util ._decorators import doc
26
31
from pandas .util ._validators import validate_fillna_kwargs
27
32
@@ -667,9 +672,7 @@ def take(
667
672
return type (self )(self ._data .take (indices_array ))
668
673
669
674
def isin (self , values ):
670
-
671
- # pyarrow.compute.is_in added in pyarrow 2.0.0
672
- if not hasattr (pc , "is_in" ):
675
+ if pa_version_under2p0 :
673
676
return super ().isin (values )
674
677
675
678
value_set = [
@@ -684,7 +687,7 @@ def isin(self, values):
684
687
return np .zeros (len (self ), dtype = bool )
685
688
686
689
kwargs = {}
687
- if LooseVersion ( pa . __version__ ) < "3.0.0" :
690
+ if pa_version_under3p0 :
688
691
# in pyarrow 2.0.0 skip_null is ignored but is a required keyword and raises
689
692
# with unexpected keyword argument in pyarrow 3.0.0+
690
693
kwargs ["skip_null" ] = True
@@ -802,11 +805,10 @@ def _str_contains(self, pat, case=True, flags=0, na=np.nan, regex: bool = True):
802
805
return super ()._str_contains (pat , case , flags , na , regex )
803
806
804
807
if regex :
805
- # match_substring_regex added in pyarrow 4.0.0
806
- if hasattr (pc , "match_substring_regex" ) and case :
807
- result = pc .match_substring_regex (self ._data , pat )
808
- else :
808
+ if pa_version_under4p0 or case is False :
809
809
return super ()._str_contains (pat , case , flags , na , regex )
810
+ else :
811
+ result = pc .match_substring_regex (self ._data , pat )
810
812
else :
811
813
if case :
812
814
result = pc .match_substring (self ._data , pat )
@@ -818,27 +820,25 @@ def _str_contains(self, pat, case=True, flags=0, na=np.nan, regex: bool = True):
818
820
return result
819
821
820
822
def _str_startswith (self , pat , na = None ):
821
- # match_substring_regex added in pyarrow 4.0.0
822
- if hasattr (pc , "match_substring_regex" ):
823
- result = pc .match_substring_regex (self ._data , "^" + re .escape (pat ))
824
- result = BooleanDtype ().__from_arrow__ (result )
825
- if not isna (na ):
826
- result [isna (result )] = bool (na )
827
- return result
828
- else :
823
+ if pa_version_under4p0 :
829
824
return super ()._str_startswith (pat , na )
830
825
826
+ result = pc .match_substring_regex (self ._data , "^" + re .escape (pat ))
827
+ result = BooleanDtype ().__from_arrow__ (result )
828
+ if not isna (na ):
829
+ result [isna (result )] = bool (na )
830
+ return result
831
+
831
832
def _str_endswith (self , pat , na = None ):
832
- # match_substring_regex added in pyarrow 4.0.0
833
- if hasattr (pc , "match_substring_regex" ):
834
- result = pc .match_substring_regex (self ._data , re .escape (pat ) + "$" )
835
- result = BooleanDtype ().__from_arrow__ (result )
836
- if not isna (na ):
837
- result [isna (result )] = bool (na )
838
- return result
839
- else :
833
+ if pa_version_under4p0 :
840
834
return super ()._str_endswith (pat , na )
841
835
836
+ result = pc .match_substring_regex (self ._data , re .escape (pat ) + "$" )
837
+ result = BooleanDtype ().__from_arrow__ (result )
838
+ if not isna (na ):
839
+ result [isna (result )] = bool (na )
840
+ return result
841
+
842
842
def _str_match (
843
843
self , pat : str , case : bool = True , flags : int = 0 , na : Scalar = None
844
844
):
@@ -871,13 +871,12 @@ def _str_isnumeric(self):
871
871
return BooleanDtype ().__from_arrow__ (result )
872
872
873
873
def _str_isspace (self ):
874
- # utf8_is_space added in pyarrow 2.0.0
875
- if hasattr (pc , "utf8_is_space" ):
876
- result = pc .utf8_is_space (self ._data )
877
- return BooleanDtype ().__from_arrow__ (result )
878
- else :
874
+ if pa_version_under2p0 :
879
875
return super ()._str_isspace ()
880
876
877
+ result = pc .utf8_is_space (self ._data )
878
+ return BooleanDtype ().__from_arrow__ (result )
879
+
881
880
def _str_istitle (self ):
882
881
result = pc .utf8_is_title (self ._data )
883
882
return BooleanDtype ().__from_arrow__ (result )
@@ -887,48 +886,44 @@ def _str_isupper(self):
887
886
return BooleanDtype ().__from_arrow__ (result )
888
887
889
888
def _str_len (self ):
890
- # utf8_length added in pyarrow 4.0.0
891
- if hasattr (pc , "utf8_length" ):
892
- result = pc .utf8_length (self ._data )
893
- return Int64Dtype ().__from_arrow__ (result )
894
- else :
889
+ if pa_version_under4p0 :
895
890
return super ()._str_len ()
896
891
892
+ result = pc .utf8_length (self ._data )
893
+ return Int64Dtype ().__from_arrow__ (result )
894
+
897
895
def _str_lower (self ):
898
896
return type (self )(pc .utf8_lower (self ._data ))
899
897
900
898
def _str_upper (self ):
901
899
return type (self )(pc .utf8_upper (self ._data ))
902
900
903
901
def _str_strip (self , to_strip = None ):
902
+ if pa_version_under4p0 :
903
+ return super ()._str_strip (to_strip )
904
+
904
905
if to_strip is None :
905
- # utf8_trim_whitespace added in pyarrow 4.0.0
906
- if hasattr (pc , "utf8_trim_whitespace" ):
907
- return type (self )(pc .utf8_trim_whitespace (self ._data ))
906
+ result = pc .utf8_trim_whitespace (self ._data )
908
907
else :
909
- # utf8_trim added in pyarrow 4.0.0
910
- if hasattr (pc , "utf8_trim" ):
911
- return type (self )(pc .utf8_trim (self ._data , characters = to_strip ))
912
- return super ()._str_strip (to_strip )
908
+ result = pc .utf8_trim (self ._data , characters = to_strip )
909
+ return type (self )(result )
913
910
914
911
def _str_lstrip (self , to_strip = None ):
912
+ if pa_version_under4p0 :
913
+ return super ()._str_lstrip (to_strip )
914
+
915
915
if to_strip is None :
916
- # utf8_ltrim_whitespace added in pyarrow 4.0.0
917
- if hasattr (pc , "utf8_ltrim_whitespace" ):
918
- return type (self )(pc .utf8_ltrim_whitespace (self ._data ))
916
+ result = pc .utf8_ltrim_whitespace (self ._data )
919
917
else :
920
- # utf8_ltrim added in pyarrow 4.0.0
921
- if hasattr (pc , "utf8_ltrim" ):
922
- return type (self )(pc .utf8_ltrim (self ._data , characters = to_strip ))
923
- return super ()._str_lstrip (to_strip )
918
+ result = pc .utf8_ltrim (self ._data , characters = to_strip )
919
+ return type (self )(result )
924
920
925
921
def _str_rstrip (self , to_strip = None ):
922
+ if pa_version_under4p0 :
923
+ return super ()._str_rstrip (to_strip )
924
+
926
925
if to_strip is None :
927
- # utf8_rtrim_whitespace added in pyarrow 4.0.0
928
- if hasattr (pc , "utf8_rtrim_whitespace" ):
929
- return type (self )(pc .utf8_rtrim_whitespace (self ._data ))
926
+ result = pc .utf8_rtrim_whitespace (self ._data )
930
927
else :
931
- # utf8_rtrim added in pyarrow 4.0.0
932
- if hasattr (pc , "utf8_rtrim" ):
933
- return type (self )(pc .utf8_rtrim (self ._data , characters = to_strip ))
934
- return super ()._str_rstrip (to_strip )
928
+ result = pc .utf8_rtrim (self ._data , characters = to_strip )
929
+ return type (self )(result )
0 commit comments