4
4
5
5
#include < climits>
6
6
#include < cstdarg>
7
+ #include < cinttypes>
7
8
#include < string>
8
9
#include < map>
9
10
#include < sstream>
44
45
// tensor name constants
45
46
//
46
47
47
- #define TN_POS_EMBD " %s .position_embd.weight"
48
+ #define TN_POS_EMBD " v .position_embd.weight"
48
49
#define TN_CLASS_EMBD " v.class_embd"
49
50
#define TN_PATCH_EMBD " v.patch_embd.weight" // not rename tensor with ".0" postfix for backwrad compat
50
51
#define TN_PATCH_EMBD_1 " v.patch_embd.weight.1"
@@ -110,6 +111,7 @@ enum projector_type {
110
111
PROJECTOR_TYPE_PIXTRAL,
111
112
PROJECTOR_TYPE_QWEN25VL,
112
113
PROJECTOR_TYPE_INTERNVL,
114
+ PROJECTOR_TYPE_LLAMA4,
113
115
PROJECTOR_TYPE_UNKNOWN,
114
116
};
115
117
@@ -125,6 +127,7 @@ static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = {
125
127
{ PROJECTOR_TYPE_IDEFICS3, " idefics3" },
126
128
{ PROJECTOR_TYPE_PIXTRAL, " pixtral" },
127
129
{ PROJECTOR_TYPE_INTERNVL, " internvl" },
130
+ { PROJECTOR_TYPE_LLAMA4, " llama4" },
128
131
};
129
132
130
133
static projector_type clip_projector_type_from_string (const std::string & str) {
@@ -240,6 +243,11 @@ struct clip_image_u8_batch {
240
243
struct clip_image_f32_batch {
241
244
std::vector<clip_image_f32_ptr> entries;
242
245
246
+ // for llava-uhd style models, we need to know the grid size
247
+ // note: entries.size() == grid_x * grid_y + 1 (one overview image)
248
+ int grid_x = 0 ;
249
+ int grid_y = 0 ;
250
+
243
251
clip_image_f32_batch clone () const {
244
252
clip_image_f32_batch new_batch;
245
253
new_batch.entries .reserve (entries.size ());
@@ -358,6 +366,70 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
358
366
}
359
367
}
360
368
369
+ //
370
+ // debugging
371
+ //
372
+
373
+ static void print_tensor_shape (ggml_tensor * t) {
374
+ printf (" %s.shape = [" , t->name );
375
+ for (int i = 0 ; i < ggml_n_dims (t); ++i) {
376
+ printf (" %" PRId64, t->ne [i]);
377
+ if (i < ggml_n_dims (t) - 1 ) {
378
+ printf (" , " );
379
+ }
380
+ }
381
+ printf (" ]\n " );
382
+ }
383
+
384
+ static void print_tensor_data (ggml_tensor * t, uint8_t * data, int64_t n) {
385
+ ggml_type type = t->type ;
386
+ int64_t * ne = t->ne ;
387
+ size_t * nb = t->nb ;
388
+ for (int64_t i3 = 0 ; i3 < ne[3 ]; i3++) {
389
+ printf (" %s.data: [\n " , t->name );
390
+ for (int64_t i2 = 0 ; i2 < ne[2 ]; i2++) {
391
+ if (i2 == n && ne[2 ] > 2 *n) {
392
+ printf (" ..., \n " );
393
+ i2 = ne[2 ] - n;
394
+ }
395
+ printf (" [\n " );
396
+ for (int64_t i1 = 0 ; i1 < ne[1 ]; i1++) {
397
+ if (i1 == n && ne[1 ] > 2 *n) {
398
+ printf (" ..., \n " );
399
+ i1 = ne[1 ] - n;
400
+ }
401
+ printf (" [" );
402
+ for (int64_t i0 = 0 ; i0 < ne[0 ]; i0++) {
403
+ if (i0 == n && ne[0 ] > 2 *n) {
404
+ printf (" ..., " );
405
+ i0 = ne[0 ] - n;
406
+ }
407
+ size_t i = i3 * nb[3 ] + i2 * nb[2 ] + i1 * nb[1 ] + i0 * nb[0 ];
408
+ float v;
409
+ if (type == GGML_TYPE_F16) {
410
+ v = ggml_fp16_to_fp32 (*(ggml_fp16_t *) &data[i]);
411
+ } else if (type == GGML_TYPE_F32) {
412
+ v = *(float *) &data[i];
413
+ } else if (type == GGML_TYPE_I32) {
414
+ v = (float ) *(int32_t *) &data[i];
415
+ } else if (type == GGML_TYPE_I16) {
416
+ v = (float ) *(int16_t *) &data[i];
417
+ } else if (type == GGML_TYPE_I8) {
418
+ v = (float ) *(int8_t *) &data[i];
419
+ } else {
420
+ GGML_ABORT (" fatal error" );
421
+ }
422
+ printf (" %8.4f" , v);
423
+ if (i0 < ne[0 ] - 1 ) printf (" , " );
424
+ }
425
+ printf (" ],\n " );
426
+ }
427
+ printf (" ],\n " );
428
+ }
429
+ printf (" ]\n " );
430
+ }
431
+ }
432
+
361
433
//
362
434
// API used internally with mtmd
363
435
//
0 commit comments