Skip to content

Commit f487d5e

Browse files
authored
style : minor fixes, mostly indentations
1 parent da730c5 commit f487d5e

File tree

4 files changed

+65
-53
lines changed

4 files changed

+65
-53
lines changed

ggml-metal.m

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -885,32 +885,33 @@ void ggml_metal_graph_compute(
885885

886886
const int n_past = ((int32_t *)(src1->data))[0];
887887

888-
float freq_base, freq_scale;
889-
memcpy(&freq_base, (int32_t *) src1->data + 4, sizeof(float));
888+
float freq_base;
889+
float freq_scale;
890+
memcpy(&freq_base, (int32_t *) src1->data + 4, sizeof(float));
890891
memcpy(&freq_scale, (int32_t *) src1->data + 5, sizeof(float));
891892

892893
[encoder setComputePipelineState:ctx->pipeline_rope];
893894
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
894895
[encoder setBuffer:id_dst offset:offs_dst atIndex:1];
895-
[encoder setBytes:&ne00 length:sizeof( int64_t) atIndex:2];
896-
[encoder setBytes:&ne01 length:sizeof( int64_t) atIndex:3];
897-
[encoder setBytes:&ne02 length:sizeof( int64_t) atIndex:4];
898-
[encoder setBytes:&ne03 length:sizeof( int64_t) atIndex:5];
899-
[encoder setBytes:&nb00 length:sizeof(uint64_t) atIndex:6];
900-
[encoder setBytes:&nb01 length:sizeof(uint64_t) atIndex:7];
901-
[encoder setBytes:&nb02 length:sizeof(uint64_t) atIndex:8];
902-
[encoder setBytes:&nb03 length:sizeof(uint64_t) atIndex:9];
903-
[encoder setBytes:&ne0 length:sizeof( int64_t) atIndex:10];
904-
[encoder setBytes:&ne1 length:sizeof( int64_t) atIndex:11];
905-
[encoder setBytes:&ne2 length:sizeof( int64_t) atIndex:12];
906-
[encoder setBytes:&ne3 length:sizeof( int64_t) atIndex:13];
907-
[encoder setBytes:&nb0 length:sizeof(uint64_t) atIndex:14];
908-
[encoder setBytes:&nb1 length:sizeof(uint64_t) atIndex:15];
909-
[encoder setBytes:&nb2 length:sizeof(uint64_t) atIndex:16];
910-
[encoder setBytes:&nb3 length:sizeof(uint64_t) atIndex:17];
911-
[encoder setBytes:&n_past length:sizeof( int) atIndex:18];
912-
[encoder setBytes:&n_dims length:sizeof( int) atIndex:19];
913-
[encoder setBytes:&mode length:sizeof( int) atIndex:20];
896+
[encoder setBytes:&ne00 length:sizeof( int64_t) atIndex:2];
897+
[encoder setBytes:&ne01 length:sizeof( int64_t) atIndex:3];
898+
[encoder setBytes:&ne02 length:sizeof( int64_t) atIndex:4];
899+
[encoder setBytes:&ne03 length:sizeof( int64_t) atIndex:5];
900+
[encoder setBytes:&nb00 length:sizeof(uint64_t) atIndex:6];
901+
[encoder setBytes:&nb01 length:sizeof(uint64_t) atIndex:7];
902+
[encoder setBytes:&nb02 length:sizeof(uint64_t) atIndex:8];
903+
[encoder setBytes:&nb03 length:sizeof(uint64_t) atIndex:9];
904+
[encoder setBytes:&ne0 length:sizeof( int64_t) atIndex:10];
905+
[encoder setBytes:&ne1 length:sizeof( int64_t) atIndex:11];
906+
[encoder setBytes:&ne2 length:sizeof( int64_t) atIndex:12];
907+
[encoder setBytes:&ne3 length:sizeof( int64_t) atIndex:13];
908+
[encoder setBytes:&nb0 length:sizeof(uint64_t) atIndex:14];
909+
[encoder setBytes:&nb1 length:sizeof(uint64_t) atIndex:15];
910+
[encoder setBytes:&nb2 length:sizeof(uint64_t) atIndex:16];
911+
[encoder setBytes:&nb3 length:sizeof(uint64_t) atIndex:17];
912+
[encoder setBytes:&n_past length:sizeof( int) atIndex:18];
913+
[encoder setBytes:&n_dims length:sizeof( int) atIndex:19];
914+
[encoder setBytes:&mode length:sizeof( int) atIndex:20];
914915
[encoder setBytes:&freq_base length:sizeof(float) atIndex:21];
915916
[encoder setBytes:&freq_scale length:sizeof(float) atIndex:22];
916917

ggml.c

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6975,7 +6975,7 @@ struct ggml_tensor * ggml_rope_impl(
69756975
((int32_t *) b->data)[1] = n_dims;
69766976
((int32_t *) b->data)[2] = mode;
69776977
((int32_t *) b->data)[3] = n_ctx;
6978-
memcpy((int32_t *) b->data + 4, &freq_base, sizeof(float));
6978+
memcpy((int32_t *) b->data + 4, &freq_base, sizeof(float));
69796979
memcpy((int32_t *) b->data + 5, &freq_scale, sizeof(float));
69806980

69816981
ggml_scratch_load(ctx);
@@ -12084,12 +12084,14 @@ static void ggml_compute_forward_rope_f32(
1208412084
return;
1208512085
}
1208612086

12087+
float freq_base;
12088+
float freq_scale;
12089+
1208712090
const int n_past = ((int32_t *) src1->data)[0];
1208812091
const int n_dims = ((int32_t *) src1->data)[1];
1208912092
const int mode = ((int32_t *) src1->data)[2];
1209012093
const int n_ctx = ((int32_t *) src1->data)[3];
12091-
float freq_base, freq_scale;
12092-
memcpy(&freq_base, (int32_t *) src1->data + 4, sizeof(float));
12094+
memcpy(&freq_base, (int32_t *) src1->data + 4, sizeof(float));
1209312095
memcpy(&freq_scale, (int32_t *) src1->data + 5, sizeof(float));
1209412096

1209512097
assert(n_past >= 0);
@@ -12214,12 +12216,14 @@ static void ggml_compute_forward_rope_f16(
1221412216
return;
1221512217
}
1221612218

12219+
float freq_base;
12220+
float freq_scale;
12221+
1221712222
const int n_past = ((int32_t *) src1->data)[0];
1221812223
const int n_dims = ((int32_t *) src1->data)[1];
1221912224
const int mode = ((int32_t *) src1->data)[2];
1222012225
const int n_ctx = ((int32_t *) src1->data)[3];
12221-
float freq_base, freq_scale;
12222-
memcpy(&freq_base, (int32_t *) src1->data + 4, sizeof(float));
12226+
memcpy(&freq_base, (int32_t *) src1->data + 4, sizeof(float));
1222312227
memcpy(&freq_scale, (int32_t *) src1->data + 5, sizeof(float));
1222412228

1222512229
assert(n_past >= 0);
@@ -12322,7 +12326,7 @@ static void ggml_compute_forward_rope_f16(
1232212326
const float x0 = GGML_FP16_TO_FP32(src[0]);
1232312327
const float x1 = GGML_FP16_TO_FP32(src[n_dims/2]);
1232412328

12325-
dst_data[0] = GGML_FP32_TO_FP16(x0*cos_theta - x1*sin_theta);
12329+
dst_data[0] = GGML_FP32_TO_FP16(x0*cos_theta - x1*sin_theta);
1232612330
dst_data[n_dims/2] = GGML_FP32_TO_FP16(x0*sin_theta + x1*cos_theta);
1232712331
}
1232812332
}

llama.cpp

Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,10 @@ struct llama_hparams {
190190
uint32_t n_head = 32;
191191
uint32_t n_layer = 32;
192192
uint32_t n_rot = 64;
193+
193194
float rope_freq_base = 10000.0f;
194195
float rope_freq_scale = 1.0f;
196+
195197
enum llama_ftype ftype = LLAMA_FTYPE_MOSTLY_F16;
196198

197199
bool operator!=(const llama_hparams & other) const {
@@ -843,12 +845,12 @@ struct llama_context_params llama_context_default_params() {
843845
struct llama_context_params result = {
844846
/*.seed =*/ LLAMA_DEFAULT_SEED,
845847
/*.n_ctx =*/ 512,
846-
/*.rope_freq_base =*/ 10000.0f,
847-
/*.rope_freq_scale =*/ 1.0f,
848848
/*.n_batch =*/ 512,
849849
/*.gpu_layers =*/ 0,
850850
/*.main_gpu =*/ 0,
851851
/*.tensor_split =*/ {0},
852+
/*.rope_freq_base =*/ 10000.0f,
853+
/*.rope_freq_scale =*/ 1.0f,
852854
/*.progress_callback =*/ nullptr,
853855
/*.progress_callback_user_data =*/ nullptr,
854856
/*.low_vram =*/ false,
@@ -968,12 +970,12 @@ static void llama_model_load_internal(
968970
llama_model & model,
969971
llama_vocab & vocab,
970972
int n_ctx,
971-
float rope_freq_base,
972-
float rope_freq_scale,
973973
int n_batch,
974974
int n_gpu_layers,
975975
int main_gpu,
976976
const float * tensor_split,
977+
float rope_freq_base,
978+
float rope_freq_scale,
977979
bool low_vram,
978980
ggml_type memory_type,
979981
bool use_mmap,
@@ -1008,26 +1010,27 @@ static void llama_model_load_internal(
10081010
}
10091011

10101012
hparams.n_ctx = n_ctx;
1011-
hparams.rope_freq_base = rope_freq_base;
1013+
1014+
hparams.rope_freq_base = rope_freq_base;
10121015
hparams.rope_freq_scale = rope_freq_scale;
10131016
}
10141017

10151018
const uint32_t n_ff = ((2*(4*hparams.n_embd)/3 + hparams.n_mult - 1)/hparams.n_mult)*hparams.n_mult;
10161019

10171020
{
1018-
fprintf(stderr, "%s: format = %s\n", __func__, llama_file_version_name(file_version));
1019-
fprintf(stderr, "%s: n_vocab = %u\n", __func__, hparams.n_vocab);
1020-
fprintf(stderr, "%s: n_ctx = %u\n", __func__, hparams.n_ctx);
1021-
fprintf(stderr, "%s: n_embd = %u\n", __func__, hparams.n_embd);
1022-
fprintf(stderr, "%s: n_mult = %u\n", __func__, hparams.n_mult);
1023-
fprintf(stderr, "%s: n_head = %u\n", __func__, hparams.n_head);
1024-
fprintf(stderr, "%s: n_layer = %u\n", __func__, hparams.n_layer);
1025-
fprintf(stderr, "%s: n_rot = %u\n", __func__, hparams.n_rot);
1026-
fprintf(stderr, "%s: freq_base = %.1f\n", __func__, hparams.rope_freq_base);
1027-
fprintf(stderr, "%s: freq_scale = %g\n", __func__, hparams.rope_freq_scale);
1021+
fprintf(stderr, "%s: format = %s\n", __func__, llama_file_version_name(file_version));
1022+
fprintf(stderr, "%s: n_vocab = %u\n", __func__, hparams.n_vocab);
1023+
fprintf(stderr, "%s: n_ctx = %u\n", __func__, hparams.n_ctx);
1024+
fprintf(stderr, "%s: n_embd = %u\n", __func__, hparams.n_embd);
1025+
fprintf(stderr, "%s: n_mult = %u\n", __func__, hparams.n_mult);
1026+
fprintf(stderr, "%s: n_head = %u\n", __func__, hparams.n_head);
1027+
fprintf(stderr, "%s: n_layer = %u\n", __func__, hparams.n_layer);
1028+
fprintf(stderr, "%s: n_rot = %u\n", __func__, hparams.n_rot);
1029+
fprintf(stderr, "%s: freq_base = %.1f\n", __func__, hparams.rope_freq_base);
1030+
fprintf(stderr, "%s: freq_scale = %g\n", __func__, hparams.rope_freq_scale);
10281031
fprintf(stderr, "%s: ftype = %u (%s)\n", __func__, hparams.ftype, llama_ftype_name(hparams.ftype));
1029-
fprintf(stderr, "%s: n_ff = %u\n", __func__, n_ff);
1030-
fprintf(stderr, "%s: model size = %s\n", __func__, llama_model_type_name(model.type));
1032+
fprintf(stderr, "%s: n_ff = %u\n", __func__, n_ff);
1033+
fprintf(stderr, "%s: model size = %s\n", __func__, llama_model_type_name(model.type));
10311034
}
10321035

10331036
if (file_version < LLAMA_FILE_VERSION_GGJT_V2) {
@@ -1278,12 +1281,12 @@ static bool llama_model_load(
12781281
llama_model & model,
12791282
llama_vocab & vocab,
12801283
int n_ctx,
1281-
float rope_freq_base,
1282-
float rope_freq_scale,
12831284
int n_batch,
12841285
int n_gpu_layers,
12851286
int main_gpu,
12861287
float * tensor_split,
1288+
float rope_freq_base,
1289+
float rope_freq_scale,
12871290
bool low_vram,
12881291
ggml_type memory_type,
12891292
bool use_mmap,
@@ -1292,7 +1295,7 @@ static bool llama_model_load(
12921295
llama_progress_callback progress_callback,
12931296
void *progress_callback_user_data) {
12941297
try {
1295-
llama_model_load_internal(fname, model, vocab, n_ctx, rope_freq_base, rope_freq_scale, n_batch, n_gpu_layers, main_gpu, tensor_split, low_vram, memory_type,
1298+
llama_model_load_internal(fname, model, vocab, n_ctx, n_batch, n_gpu_layers, main_gpu, tensor_split, rope_freq_base, rope_freq_scale, low_vram, memory_type,
12961299
use_mmap, use_mlock, vocab_only, progress_callback, progress_callback_user_data);
12971300
return true;
12981301
} catch (const std::exception & err) {
@@ -1342,9 +1345,10 @@ static bool llama_eval_internal(
13421345
const int n_head = hparams.n_head;
13431346
const int n_vocab = hparams.n_vocab;
13441347
const int n_rot = hparams.n_embd/hparams.n_head;
1348+
const int n_gpu_layers = model.n_gpu_layers;
1349+
13451350
const float freq_base = hparams.rope_freq_base;
13461351
const float freq_scale = hparams.rope_freq_scale;
1347-
const int n_gpu_layers = model.n_gpu_layers;
13481352

13491353
auto & mem_per_token = lctx.mem_per_token;
13501354
auto & buf_compute = lctx.buf_compute;
@@ -2689,9 +2693,9 @@ struct llama_model * llama_load_model_from_file(
26892693

26902694
ggml_type memory_type = params.f16_kv ? GGML_TYPE_F16 : GGML_TYPE_F32;
26912695

2692-
if (!llama_model_load(path_model, *model, model->vocab, params.n_ctx, params.rope_freq_base, params.rope_freq_scale,
2693-
params.n_batch, params.n_gpu_layers, params.main_gpu, params.tensor_split, params.low_vram, memory_type,
2694-
params.use_mmap, params.use_mlock, params.vocab_only, params.progress_callback,
2696+
if (!llama_model_load(path_model, *model, model->vocab, params.n_ctx, params.n_batch, params.n_gpu_layers,
2697+
params.main_gpu, params.tensor_split, params.rope_freq_base, params.rope_freq_scale,params.low_vram,
2698+
memory_type, params.use_mmap, params.use_mlock, params.vocab_only, params.progress_callback,
26952699
params.progress_callback_user_data)) {
26962700
delete model;
26972701
fprintf(stderr, "%s: failed to load model\n", __func__);

llama.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,12 +85,15 @@ extern "C" {
8585
struct llama_context_params {
8686
uint32_t seed; // RNG seed, -1 for random
8787
int32_t n_ctx; // text context
88-
float rope_freq_base; // RoPE base frequency
89-
float rope_freq_scale; // RoPE frequency scaling factor
9088
int32_t n_batch; // prompt processing batch size
9189
int32_t n_gpu_layers; // number of layers to store in VRAM
9290
int32_t main_gpu; // the GPU that is used for scratch and small tensors
9391
float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
92+
93+
// ref: https://github.com/ggerganov/llama.cpp/pull/2054
94+
float rope_freq_base; // RoPE base frequency
95+
float rope_freq_scale; // RoPE frequency scaling factor
96+
9497
// called with a progress value between 0 and 1, pass NULL to disable
9598
llama_progress_callback progress_callback;
9699
// context pointer passed to the progress callback

0 commit comments

Comments
 (0)