@@ -37,34 +37,34 @@ llm_build_command_r::llm_build_command_r(const llama_model & model, const llm_gr
3737 if (model.layers [il].bq ) {
3838 Qcur = ggml_add (ctx0, Qcur, model.layers [il].bq );
3939 cb (Qcur, " Qcur" , il);
40- };
40+ }
4141 ggml_tensor * Kcur = build_lora_mm (model.layers [il].wk , cur);
4242 cb (Kcur, " Kcur" , il);
4343 if (model.layers [il].bk ) {
4444 Kcur = ggml_add (ctx0, Kcur, model.layers [il].bk );
4545 cb (Kcur, " Kcur" , il);
46- };
46+ }
4747 ggml_tensor * Vcur = build_lora_mm (model.layers [il].wv , cur);
4848 cb (Vcur, " Vcur" , il);
4949 if (model.layers [il].bv ) {
5050 Vcur = ggml_add (ctx0, Vcur, model.layers [il].bv );
5151 cb (Vcur, " Vcur" , il);
52- };
52+ }
5353 Qcur = ggml_reshape_3d (ctx0, Qcur, n_embd_head, n_head, n_tokens);
5454 Kcur = ggml_reshape_3d (ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
5555 Vcur = ggml_reshape_3d (ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
5656
5757 if (model.layers [il].attn_q_norm ) {
5858 Qcur = build_norm (Qcur, model.layers [il].attn_q_norm , NULL , LLM_NORM, il);
5959 cb (Qcur, " Qcur" , il);
60- };
60+ }
6161 Qcur = ggml_rope_ext (ctx0, Qcur, inp_pos, nullptr , n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
6262 ext_factor, attn_factor, beta_fast, beta_slow);
6363
6464 if (model.layers [il].attn_k_norm ) {
6565 Kcur = build_norm (Kcur, model.layers [il].attn_k_norm , NULL , LLM_NORM, il);
6666 cb (Kcur, " Kcur" , il);
67- };
67+ }
6868 Kcur = ggml_rope_ext (ctx0, Kcur, inp_pos, nullptr , n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
6969 ext_factor, attn_factor, beta_fast, beta_slow);
7070
@@ -75,12 +75,12 @@ llm_build_command_r::llm_build_command_r(const llama_model & model, const llm_gr
7575 cur = build_attn (inp_attn,
7676 model.layers [il].wo , model.layers [il].bo ,
7777 Qcur, Kcur, Vcur, nullptr , nullptr , nullptr , 1 .0f / sqrtf (float (n_embd_head)), il);
78- };
78+ }
7979 if (il == n_layer - 1 && inp_out_ids) {
8080 cur = ggml_get_rows (ctx0, cur, inp_out_ids);
8181 inpL = ggml_get_rows (ctx0, inpL, inp_out_ids);
8282 ffn_inp = ggml_get_rows (ctx0, ffn_inp, inp_out_ids);
83- };
83+ }
8484 ggml_tensor * attn_out = cur;
8585
8686 // feed-forward network
@@ -91,7 +91,7 @@ llm_build_command_r::llm_build_command_r(const llama_model & model, const llm_gr
9191 model.layers [il].ffn_down , NULL , NULL ,
9292 NULL , LLM_FFN_SILU, LLM_FFN_PAR, il);
9393 cb (cur, " ffn_out" , il);
94- };
94+ }
9595 // add together residual + FFN + self-attention
9696 cur = ggml_add (ctx0, cur, inpL);
9797 cur = ggml_add (ctx0, cur, attn_out);
@@ -101,7 +101,7 @@ llm_build_command_r::llm_build_command_r(const llama_model & model, const llm_gr
101101
102102 // input for next layer
103103 inpL = cur;
104- };
104+ }
105105 cur = inpL;
106106
107107 cur = build_norm (cur, model.output_norm , NULL , LLM_NORM, -1 );
@@ -114,7 +114,7 @@ llm_build_command_r::llm_build_command_r(const llama_model & model, const llm_gr
114114
115115 if (f_logit_scale) {
116116 cur = ggml_scale (ctx0, cur, f_logit_scale);
117- };
117+ }
118118 cb (cur, " result_output" , -1 );
119119 res->t_logits = cur;
120120
0 commit comments