Skip to content

Commit 0308f5e

Browse files
authored
llama : fix command-r inference when omitting outputs (#6367)
1 parent 28cb9a0 commit 0308f5e

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

llama.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9152,8 +9152,9 @@ struct llm_build_context {
91529152
if (il == n_layer - 1) {
91539153
// skip computing output for unused tokens
91549154
struct ggml_tensor * inp_out_ids = build_inp_out_ids();
9155-
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
9156-
inpL = ggml_get_rows(ctx0, inpL, inp_out_ids);
9155+
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
9156+
inpL = ggml_get_rows(ctx0, inpL, inp_out_ids);
9157+
ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids);
91579158
}
91589159

91599160
struct ggml_tensor * attn_out = cur;

0 commit comments

Comments
 (0)