Returning 0 for some cases, instead of asserting.

martindevans · martindevans · commit b182f8f67f72 · 2024-03-27T17:00:52.000Z
diff --git a/llama.cpp b/llama.cpp
@@ -15227,7 +15227,9 @@ size_t llama_set_seq_data(struct llama_context * ctx, const uint8_t * src, llama
     uint32_t size_t_size;
     memcpy(&size_t_size, inp, sizeof(size_t_size));
     inp += sizeof(size_t_size);
-    GGML_ASSERT(size_t_size == sizeof(size_t));
+    if (size_t_size != sizeof(size_t)) {
+        return 0;
+    }
 
     // Read the cell count
     uint32_t cell_count;
@@ -15244,6 +15246,18 @@ size_t llama_set_seq_data(struct llama_context * ctx, const uint8_t * src, llama
     memcpy(&n_embd_v_gqa_ref, inp, sizeof(n_embd_v_gqa_ref));
     inp += sizeof(n_embd_v_gqa_ref);
 
+    // Sanity check model compatibility
+    const auto& hparams = ctx->model.hparams;
+    const uint32_t n_layer = hparams.n_layer;
+    const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa() + hparams.n_embd_k_s();
+    const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa() + hparams.n_embd_v_s();
+    if (n_layer != n_layer_ref) {
+        return 0;
+    }
+    if (n_embd_v_gqa != n_embd_v_gqa_ref) {
+        return 0;
+    }
+
     // Allocate the new cells for the slot
     {
         llama_batch batch = llama_batch_init(cell_count, 0, 1);
@@ -15274,10 +15288,6 @@ size_t llama_set_seq_data(struct llama_context * ctx, const uint8_t * src, llama
         llama_batch_free(batch);
     }
 
-    const auto& hparams = ctx->model.hparams;
-    const uint32_t n_layer = hparams.n_layer;
-    const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa() + hparams.n_embd_k_s();
-    const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa() + hparams.n_embd_v_s();
     const uint32_t kv_size = kv_self.size;
     const uint32_t kv_head = kv_self.head;
     GGML_ASSERT(n_layer == n_layer_ref);
diff --git a/llama.h b/llama.h
@@ -632,6 +632,10 @@ extern "C" {
                          uint8_t * dst,
                     llama_seq_id   seq_id);
 
+    // Copy the sequence data (originally copied with `llama_copy_seq_data`) into a sequence.
+    // Returns:
+    //  - Positive: Ok
+    //  - Zero: Failed to load
     LLAMA_API size_t llama_set_seq_data(
             struct llama_context * ctx,
                    const uint8_t * src,