@@ -14,7 +14,7 @@ def define_common_targets():
14
14
name = "headers" ,
15
15
exported_headers = subdir_glob ([
16
16
("include" , "pytorch/tokenizers/*.h" ),
17
- ], exclude = [ "pcre2_regex.h" , "std_regex.h" ] ),
17
+ ]),
18
18
visibility = [
19
19
"@EXECUTORCH_CLIENTS" ,
20
20
"//pytorch/tokenizers/..." ,
@@ -23,9 +23,6 @@ def define_common_targets():
23
23
platforms = PLATFORMS ,
24
24
)
25
25
26
- # TODO: add target for regex which does lookahed with pcre2
27
- # by adding "-DSUPPORT_REGEX_LOOKAHEAD" as a compiler flag
28
- # and including pcre2 dependencies.
29
26
runtime .cxx_library (
30
27
name = "regex" ,
31
28
srcs = [
@@ -43,6 +40,27 @@ def define_common_targets():
43
40
platforms = PLATFORMS ,
44
41
)
45
42
43
+ runtime .cxx_library (
44
+ name = "regex_lookahead" ,
45
+ srcs = [
46
+ "src/pcre2_regex.cpp" ,
47
+ "src/regex.cpp" ,
48
+ "src/re2_regex.cpp" ,
49
+ "src/std_regex.cpp" ,
50
+ ],
51
+ exported_deps = [
52
+ ":headers" ,
53
+ ],
54
+ exported_external_deps = [
55
+ "pcre2" ,
56
+ "re2" ,
57
+ ],
58
+ preprocessor_flags = ["-DSUPPORT_REGEX_LOOKAHEAD=ON" ],
59
+ visibility = ["//pytorch/tokenizers/..." ],
60
+ header_namespace = "" ,
61
+ platforms = PLATFORMS ,
62
+ )
63
+
46
64
runtime .cxx_library (
47
65
name = "bpe_tokenizer_base" ,
48
66
srcs = [
@@ -101,6 +119,29 @@ def define_common_targets():
101
119
platforms = PLATFORMS ,
102
120
)
103
121
122
+ runtime .cxx_library (
123
+ name = "tiktoken_lookahead" ,
124
+ srcs = [
125
+ "src/tiktoken.cpp" ,
126
+ ],
127
+ deps = [
128
+ ":regex_lookahead" ,
129
+ ],
130
+ exported_deps = [
131
+ ":bpe_tokenizer_base" ,
132
+ ":headers" ,
133
+ ],
134
+ exported_external_deps = [
135
+ "pcre2" ,
136
+ "re2" ,
137
+ ],
138
+ visibility = [
139
+ "@EXECUTORCH_CLIENTS" ,
140
+ "//pytorch/tokenizers/..." ,
141
+ ],
142
+ platforms = PLATFORMS ,
143
+ )
144
+
104
145
runtime .cxx_library (
105
146
name = "hf_tokenizer" ,
106
147
srcs = [
0 commit comments