Skip to content

Commit def3a96

Browse files
authored
Add look ahead tiktoken target
Differential Revision: D74864197 Pull Request resolved: #75
1 parent d539a61 commit def3a96

File tree

1 file changed

+45
-4
lines changed

1 file changed

+45
-4
lines changed

targets.bzl

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def define_common_targets():
1414
name = "headers",
1515
exported_headers = subdir_glob([
1616
("include", "pytorch/tokenizers/*.h"),
17-
], exclude = ["pcre2_regex.h", "std_regex.h"]),
17+
]),
1818
visibility = [
1919
"@EXECUTORCH_CLIENTS",
2020
"//pytorch/tokenizers/...",
@@ -23,9 +23,6 @@ def define_common_targets():
2323
platforms = PLATFORMS,
2424
)
2525

26-
# TODO: add target for regex which does lookahed with pcre2
27-
# by adding "-DSUPPORT_REGEX_LOOKAHEAD" as a compiler flag
28-
# and including pcre2 dependencies.
2926
runtime.cxx_library(
3027
name = "regex",
3128
srcs = [
@@ -43,6 +40,27 @@ def define_common_targets():
4340
platforms = PLATFORMS,
4441
)
4542

43+
runtime.cxx_library(
44+
name = "regex_lookahead",
45+
srcs = [
46+
"src/pcre2_regex.cpp",
47+
"src/regex.cpp",
48+
"src/re2_regex.cpp",
49+
"src/std_regex.cpp",
50+
],
51+
exported_deps = [
52+
":headers",
53+
],
54+
exported_external_deps = [
55+
"pcre2",
56+
"re2",
57+
],
58+
preprocessor_flags = ["-DSUPPORT_REGEX_LOOKAHEAD=ON"],
59+
visibility = ["//pytorch/tokenizers/..."],
60+
header_namespace = "",
61+
platforms = PLATFORMS,
62+
)
63+
4664
runtime.cxx_library(
4765
name = "bpe_tokenizer_base",
4866
srcs = [
@@ -101,6 +119,29 @@ def define_common_targets():
101119
platforms = PLATFORMS,
102120
)
103121

122+
runtime.cxx_library(
123+
name = "tiktoken_lookahead",
124+
srcs = [
125+
"src/tiktoken.cpp",
126+
],
127+
deps = [
128+
":regex_lookahead",
129+
],
130+
exported_deps = [
131+
":bpe_tokenizer_base",
132+
":headers",
133+
],
134+
exported_external_deps = [
135+
"pcre2",
136+
"re2",
137+
],
138+
visibility = [
139+
"@EXECUTORCH_CLIENTS",
140+
"//pytorch/tokenizers/...",
141+
],
142+
platforms = PLATFORMS,
143+
)
144+
104145
runtime.cxx_library(
105146
name = "hf_tokenizer",
106147
srcs = [

0 commit comments

Comments
 (0)