Skip to content

Commit 22fddbb

Browse files
committed
add_special option for server tokenize endpoint
1 parent 92139b9 commit 22fddbb

File tree

4 files changed

+48
-7
lines changed

4 files changed

+48
-7
lines changed

examples/server/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ Notice that each `probs` is an array of length `n_probs`.
319319

320320
`content`: Set the text to tokenize.
321321

322-
Note that a special `BOS` token is never inserted.
322+
`add_special`: Boolean indicating if special tokens, i.e. `BOS`, should be inserted. Default: `false`
323323

324324
- **POST** `/detokenize`: Convert tokens to text.
325325

examples/server/server.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3632,7 +3632,8 @@ int main(int argc, char ** argv) {
36323632

36333633
std::vector<llama_token> tokens;
36343634
if (body.count("content") != 0) {
3635-
tokens = ctx_server.tokenize(body["content"], false);
3635+
const bool add_special = json_value(body, "add_special", false);
3636+
tokens = ctx_server.tokenize(body["content"], add_special);
36363637
}
36373638
const json data = format_tokenizer_response(tokens);
36383639
return res.set_content(data.dump(), "application/json; charset=utf-8");

examples/server/tests/features/server.feature

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ Feature: llama.cpp server
77
And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
88
And a model file test-model.gguf
99
And a model alias tinyllama-2
10+
And BOS token is 1
1011
And 42 as server seed
1112
# KV Cache corresponds to the total amount of tokens
1213
# that can be stored across all independent sequences: #4130
@@ -91,7 +92,18 @@ Feature: llama.cpp server
9192
"""
9293
What is the capital of France ?
9394
"""
94-
Then tokens can be detokenize
95+
Then tokens can be detokenized
96+
And tokens do not begin with BOS
97+
98+
Scenario: Tokenize w/ BOS
99+
Given adding special tokens
100+
When tokenizing:
101+
"""
102+
What is the capital of Germany?
103+
"""
104+
Then tokens begin with BOS
105+
Given first token is removed
106+
Then tokens can be detokenized
95107

96108
Scenario: Models available
97109
Given available models

examples/server/tests/features/steps/steps.py

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,11 @@ def step_seed(context, seed):
376376
context.seed.append(seed)
377377

378378

379+
@step('BOS token is {bos:d}')
380+
def step_bos_token(context, bos):
381+
context.bos = bos
382+
383+
379384
@step('a prefix prompt')
380385
def step_prompt_prefix(context):
381386
context.prompt_prefix = context_text(context)
@@ -656,21 +661,29 @@ async def all_embeddings_are_generated(context):
656661
assert_embeddings(context.tasks_result.pop().pop())
657662

658663

664+
@step('adding special tokens')
665+
def step_tokenize_set_add_special(context):
666+
context.tokenize_add_special = True
667+
668+
659669
@step('tokenizing')
660670
@async_run_until_complete
661671
async def step_tokenize(context):
662672
context.tokenized_text = context_text(context)
663673
async with aiohttp.ClientSession() as session:
674+
tokenize_args = {
675+
"content": context.tokenized_text,
676+
}
677+
if getattr(context, 'tokenize_add_special', None) is not None:
678+
tokenize_args['add_special'] = context.tokenize_add_special
664679
async with session.post(f'{context.base_url}/tokenize',
665-
json={
666-
"content": context.tokenized_text,
667-
}) as response:
680+
json=tokenize_args) as response:
668681
assert response.status == 200
669682
tokenize_json = await response.json()
670683
context.tokens = tokenize_json['tokens']
671684

672685

673-
@step('tokens can be detokenize')
686+
@step('tokens can be detokenized')
674687
@async_run_until_complete
675688
async def step_detokenize(context):
676689
assert len(context.tokens) > 0
@@ -685,6 +698,21 @@ async def step_detokenize(context):
685698
assert context.tokenized_text == detokenize_json['content'].strip()
686699

687700

701+
@step('tokens begin with BOS')
702+
def step_strings_for_tokenization(context):
703+
assert context.tokens[0] == context.bos
704+
705+
706+
@step('tokens do not begin with BOS')
707+
def step_strings_for_tokenization(context):
708+
assert context.tokens[0] != context.bos
709+
710+
711+
@step('first token is removed')
712+
def step_strings_for_tokenization(context):
713+
context.tokens = context.tokens[1:]
714+
715+
688716
@step('an OPTIONS request is sent from {origin}')
689717
@async_run_until_complete
690718
async def step_options_request(context, origin):

0 commit comments

Comments
 (0)