Skip to content

Commit b182f8f

Browse files
committed
Returning 0 for some cases, instead of asserting.
1 parent b8e8fac commit b182f8f

File tree

2 files changed

+19
-5
lines changed

2 files changed

+19
-5
lines changed

llama.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15227,7 +15227,9 @@ size_t llama_set_seq_data(struct llama_context * ctx, const uint8_t * src, llama
1522715227
uint32_t size_t_size;
1522815228
memcpy(&size_t_size, inp, sizeof(size_t_size));
1522915229
inp += sizeof(size_t_size);
15230-
GGML_ASSERT(size_t_size == sizeof(size_t));
15230+
if (size_t_size != sizeof(size_t)) {
15231+
return 0;
15232+
}
1523115233

1523215234
// Read the cell count
1523315235
uint32_t cell_count;
@@ -15244,6 +15246,18 @@ size_t llama_set_seq_data(struct llama_context * ctx, const uint8_t * src, llama
1524415246
memcpy(&n_embd_v_gqa_ref, inp, sizeof(n_embd_v_gqa_ref));
1524515247
inp += sizeof(n_embd_v_gqa_ref);
1524615248

15249+
// Sanity check model compatibility
15250+
const auto& hparams = ctx->model.hparams;
15251+
const uint32_t n_layer = hparams.n_layer;
15252+
const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa() + hparams.n_embd_k_s();
15253+
const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa() + hparams.n_embd_v_s();
15254+
if (n_layer != n_layer_ref) {
15255+
return 0;
15256+
}
15257+
if (n_embd_v_gqa != n_embd_v_gqa_ref) {
15258+
return 0;
15259+
}
15260+
1524715261
// Allocate the new cells for the slot
1524815262
{
1524915263
llama_batch batch = llama_batch_init(cell_count, 0, 1);
@@ -15274,10 +15288,6 @@ size_t llama_set_seq_data(struct llama_context * ctx, const uint8_t * src, llama
1527415288
llama_batch_free(batch);
1527515289
}
1527615290

15277-
const auto& hparams = ctx->model.hparams;
15278-
const uint32_t n_layer = hparams.n_layer;
15279-
const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa() + hparams.n_embd_k_s();
15280-
const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa() + hparams.n_embd_v_s();
1528115291
const uint32_t kv_size = kv_self.size;
1528215292
const uint32_t kv_head = kv_self.head;
1528315293
GGML_ASSERT(n_layer == n_layer_ref);

llama.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,10 @@ extern "C" {
632632
uint8_t * dst,
633633
llama_seq_id seq_id);
634634

635+
// Copy the sequence data (originally copied with `llama_copy_seq_data`) into a sequence.
636+
// Returns:
637+
// - Positive: Ok
638+
// - Zero: Failed to load
635639
LLAMA_API size_t llama_set_seq_data(
636640
struct llama_context * ctx,
637641
const uint8_t * src,

0 commit comments

Comments
 (0)