Skip to content

Commit 322686e

Browse files
committed
mamba : in comments, properly refer to KV cells instead of slots
1 parent 06ead3d commit 322686e

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

llama.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1734,7 +1734,7 @@ struct llama_kv_cell {
17341734
// ring-buffer of cached KV data
17351735
struct llama_kv_cache {
17361736
bool has_shift = false;
1737-
// with Mamba, a slot can hold the state for more than one past token
1737+
// with Mamba, a cell can hold the state for more than one past token
17381738
bool unlimited = false;
17391739

17401740
// Note: The value of head isn't only used to optimize searching
@@ -1993,7 +1993,7 @@ static bool llama_kv_cache_init(
19931993

19941994
cache.has_shift = false;
19951995

1996-
// for now, only Mamba can hold state for more than one past token per slot
1996+
// for now, only Mamba can hold state for more than one past token per cell
19971997
cache.unlimited = model.arch == LLM_ARCH_MAMBA;
19981998

19991999
cache.head = 0;
@@ -2249,7 +2249,7 @@ static void llama_kv_cache_seq_cp(
22492249
cache.cells[seq_id_dst].delta = seq_id_src;
22502250
// NOTE: a sequence can't have multiple sources, but can have multiple destinations.
22512251
// For compatibility with the other KV cache API functions,
2252-
// the seq_id(s) of a slot suggests an intent to "copy to" those id(s),
2252+
// the seq_id(s) of a cell suggests an intent to "copy to" those id(s),
22532253
// so that when a sequence is copied, it can initially be found from the source cell.
22542254
cache.cells[seq_id_src].seq_id.insert(seq_id_dst);
22552255
// prevent the destination from getting cleared
@@ -11726,10 +11726,10 @@ struct llama_context * llama_new_context_with_model(
1172611726
ggml_type type_k = params.type_k;
1172711727
ggml_type type_v = params.type_v;
1172811728

11729-
// Mamba only needs a constant number of KV cache slots per sequence
11729+
// Mamba only needs a constant number of KV cache cells per sequence
1173011730
if (model->arch == LLM_ARCH_MAMBA) {
11731-
// Mamba needs as many slots as there are distinct sequences processed at the same time
11732-
// The extra slot allows dedicating a sequence id to the system prompt
11731+
// Mamba needs as many KV cells as there are sequences kept at any time
11732+
// The extra cell allows dedicating a sequence id to the system prompt
1173311733
// TODO: find a better way to get the max number of parallel sequences
1173411734
kv_size = params.n_parallel + 1;
1173511735
// it's probably best to keep as much precision as possible for the states

0 commit comments

Comments
 (0)