Skip to content

Commit 5aa365d

Browse files
KerfuffleV2cebtenzzreggerganov
authored
llama : allow overriding GGUF metadata when loading model (#4092)
* feat: Allow overriding GGUF metadata when loading model * Fix the one time GCC is stricter than clang about something * Step1 * Refactor... basically everything! * Nuke obsolete GetArrayLen struct * simplify std::string specialization * Various cleanups Add informational output when overrides are applied Warn user when an override with the wrong type is specified * Fix broken logic for parsing bool KV overrides Fix issue where overrides didn't apply when key missing in GGUF metadata Resolve merge changes * llama : rearrange model params * Update new GET_KEY call Add note that metadata KV overrides aren't reflected in initial metadata KV info dump --------- Co-authored-by: cebtenzzre <[email protected]> Co-authored-by: Georgi Gerganov <[email protected]>
1 parent 52c8bc3 commit 5aa365d

File tree

4 files changed

+360
-85
lines changed

4 files changed

+360
-85
lines changed

common/common.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,47 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
690690
std::istreambuf_iterator<char>(),
691691
std::back_inserter(sparams.grammar)
692692
);
693+
} else if (arg == "--override-kv") {
694+
if (++i >= argc) {
695+
invalid_param = true;
696+
break;
697+
}
698+
char * sep = strchr(argv[i], '=');
699+
if (sep == nullptr || sep - argv[i] >= 128) {
700+
fprintf(stderr, "error: Malformed KV override: %s\n", argv[i]);
701+
invalid_param = true;
702+
break;
703+
}
704+
struct llama_model_kv_override kvo;
705+
std::strncpy(kvo.key, argv[i], sep - argv[i]);
706+
kvo.key[sep - argv[i]] = 0;
707+
sep++;
708+
if (strncmp(sep, "int:", 4) == 0) {
709+
sep += 4;
710+
kvo.tag = LLAMA_KV_OVERRIDE_INT;
711+
kvo.int_value = std::atol(sep);
712+
} else if (strncmp(sep, "float:", 6) == 0) {
713+
sep += 6;
714+
kvo.tag = LLAMA_KV_OVERRIDE_FLOAT;
715+
kvo.float_value = std::atof(sep);
716+
} else if (strncmp(sep, "bool:", 5) == 0) {
717+
sep += 5;
718+
kvo.tag = LLAMA_KV_OVERRIDE_BOOL;
719+
if (std::strcmp(sep, "true") == 0) {
720+
kvo.bool_value = true;
721+
} else if (std::strcmp(sep, "false") == 0) {
722+
kvo.bool_value = false;
723+
} else {
724+
fprintf(stderr, "error: Invalid boolean value for KV override: %s\n", argv[i]);
725+
invalid_param = true;
726+
break;
727+
}
728+
} else {
729+
fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]);
730+
invalid_param = true;
731+
break;
732+
}
733+
params.kv_overrides.push_back(kvo);
693734
#ifndef LOG_DISABLE_LOGS
694735
// Parse args for logging parameters
695736
} else if ( log_param_single_parse( argv[i] ) ) {
@@ -733,6 +774,11 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
733774
}
734775
}
735776

777+
if (!params.kv_overrides.empty()) {
778+
params.kv_overrides.emplace_back(llama_model_kv_override());
779+
params.kv_overrides.back().key[0] = 0;
780+
}
781+
736782
return true;
737783
}
738784

@@ -864,6 +910,9 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
864910
printf(" draft model for speculative decoding (default: %s)\n", params.model.c_str());
865911
printf(" -ld LOGDIR, --logdir LOGDIR\n");
866912
printf(" path under which to save YAML logs (no logging if unset)\n");
913+
printf(" --override-kv KEY=TYPE:VALUE\n");
914+
printf(" advanced option to override model metadata by key. may be specified multiple times.\n");
915+
printf(" types: int, float, bool. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n");
867916
printf("\n");
868917
#ifndef LOG_DISABLE_LOGS
869918
log_print_usage();
@@ -956,6 +1005,12 @@ struct llama_model_params llama_model_params_from_gpt_params(const gpt_params &
9561005
mparams.tensor_split = params.tensor_split;
9571006
mparams.use_mmap = params.use_mmap;
9581007
mparams.use_mlock = params.use_mlock;
1008+
if (params.kv_overrides.empty()) {
1009+
mparams.kv_overrides = NULL;
1010+
} else {
1011+
GGML_ASSERT(params.kv_overrides.back().key[0] == 0 && "KV overrides not terminated with empty key");
1012+
mparams.kv_overrides = params.kv_overrides.data();
1013+
}
9591014

9601015
return mparams;
9611016
}

common/common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ struct gpt_params {
8686
std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted
8787
std::string logdir = ""; // directory in which to save YAML log files
8888

89+
std::vector<llama_model_kv_override> kv_overrides;
90+
8991
// TODO: avoid tuple, use struct
9092
std::vector<std::tuple<std::string, float>> lora_adapter; // lora adapter path with user defined scale
9193
std::string lora_base = ""; // base model path for the lora adapter

0 commit comments

Comments
 (0)