Skip to content

Commit 289a4e7

Browse files
ggerganovhodlen
authored andcommitted
server : enable continuous batching by default (ggml-org#6231)
1 parent 77a4903 commit 289a4e7

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

common/common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ struct gpt_params {
139139
bool interactive_first = false; // wait for user input immediately
140140
bool multiline_input = false; // reverse the usage of `\`
141141
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
142-
bool cont_batching = false; // insert new sequences for decoding on-the-fly
142+
bool cont_batching = true; // insert new sequences for decoding on-the-fly
143143

144144
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
145145
bool ignore_eos = false; // ignore generated EOS tokens

examples/server/server.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1758,7 +1758,7 @@ struct server_context {
17581758
}
17591759

17601760
// process in chunks of params.n_batch
1761-
int32_t n_batch = llama_n_batch(ctx);
1761+
int32_t n_batch = llama_n_batch(ctx);
17621762
int32_t n_ubatch = llama_n_ubatch(ctx);
17631763

17641764
// next, batch any pending prompts without exceeding n_batch
@@ -2225,7 +2225,7 @@ static void server_print_usage(const char * argv0, const gpt_params & params, co
22252225
printf(" -to N, --timeout N server read/write timeout in seconds (default: %d)\n", sparams.read_timeout);
22262226
printf(" --embeddings enable embedding vector output (default: %s)\n", params.embedding ? "enabled" : "disabled");
22272227
printf(" -np N, --parallel N number of slots for process requests (default: %d)\n", params.n_parallel);
2228-
printf(" -cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: disabled)\n");
2228+
printf(" -cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: enabled)\n");
22292229
printf(" -spf FNAME, --system-prompt-file FNAME\n");
22302230
printf(" set a file to load a system prompt (initial prompt of all slots), this is useful for chat applications.\n");
22312231
printf(" -ctk TYPE, --cache-type-k TYPE\n");

0 commit comments

Comments
 (0)