ggml-org
diff --git a/‎src/models/command-r.cpp‎
Lines changed: 10 additions & 10 deletions b/‎src/models/command-r.cpp‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎src/models/deci.cpp‎
Lines changed: 10 additions & 10 deletions b/‎src/models/deci.cpp‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎src/models/deepseek.cpp‎
Lines changed: 7 additions & 7 deletions b/‎src/models/deepseek.cpp‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎src/models/deepseek2.cpp‎
Lines changed: 5 additions & 5 deletions b/‎src/models/deepseek2.cpp‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎src/models/dots1.cpp‎
Lines changed: 4 additions & 4 deletions b/‎src/models/dots1.cpp‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/models/dream.cpp‎
Lines changed: 3 additions & 3 deletions b/‎src/models/dream.cpp‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/models/ernie4-5-moe.cpp‎
Lines changed: 8 additions & 8 deletions b/‎src/models/ernie4-5-moe.cpp‎
Lines changed: 8 additions & 8 deletions
@@ -37,34 +37,34 @@ llm_build_command_r::llm_build_command_r(const llama_model & model, const llm_gr
             if (model.layers[il].bq) {
                 Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq);
                 cb(Qcur, "Qcur", il);
-            };
+            }
             ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur);
             cb(Kcur, "Kcur", il);
             if (model.layers[il].bk) {
                 Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk);
                 cb(Kcur, "Kcur", il);
-            };
+            }
             ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur);
             cb(Vcur, "Vcur", il);
             if (model.layers[il].bv) {
                 Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv);
                 cb(Vcur, "Vcur", il);
-            };
+            }
             Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
             Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
             Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
 
             if (model.layers[il].attn_q_norm) {
                 Qcur = build_norm(Qcur, model.layers[il].attn_q_norm, NULL, LLM_NORM, il);
                 cb(Qcur, "Qcur", il);
-            };
+            }
             Qcur = ggml_rope_ext(ctx0, Qcur, inp_pos, nullptr, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
                                  ext_factor, attn_factor, beta_fast, beta_slow);
 
             if (model.layers[il].attn_k_norm) {
                 Kcur = build_norm(Kcur, model.layers[il].attn_k_norm, NULL, LLM_NORM, il);
                 cb(Kcur, "Kcur", il);
-            };
+            }
             Kcur = ggml_rope_ext(ctx0, Kcur, inp_pos, nullptr, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
                                  ext_factor, attn_factor, beta_fast, beta_slow);
 
@@ -75,12 +75,12 @@ llm_build_command_r::llm_build_command_r(const llama_model & model, const llm_gr
             cur = build_attn(inp_attn,
                     model.layers[il].wo, model.layers[il].bo,
                     Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f / sqrtf(float(n_embd_head)), il);
-        };
+        }
         if (il == n_layer - 1 && inp_out_ids) {
             cur     = ggml_get_rows(ctx0, cur, inp_out_ids);
             inpL    = ggml_get_rows(ctx0, inpL, inp_out_ids);
             ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids);
-        };
+        }
         ggml_tensor * attn_out = cur;
 
         // feed-forward network
@@ -91,7 +91,7 @@ llm_build_command_r::llm_build_command_r(const llama_model & model, const llm_gr
                     model.layers[il].ffn_down, NULL, NULL,
                     NULL, LLM_FFN_SILU, LLM_FFN_PAR, il);
             cb(cur, "ffn_out", il);
-        };
+        }
         // add together residual + FFN + self-attention
         cur = ggml_add(ctx0, cur, inpL);
         cur = ggml_add(ctx0, cur, attn_out);
@@ -101,7 +101,7 @@ llm_build_command_r::llm_build_command_r(const llama_model & model, const llm_gr
 
         // input for next layer
         inpL = cur;
-    };
+    }
     cur = inpL;
 
     cur = build_norm(cur, model.output_norm, NULL, LLM_NORM, -1);
@@ -114,7 +114,7 @@ llm_build_command_r::llm_build_command_r(const llama_model & model, const llm_gr
 
     if (f_logit_scale) {
         cur = ggml_scale(ctx0, cur, f_logit_scale);
-    };
+    }
     cb(cur, "result_output", -1);
     res->t_logits = cur;
 
 
@@ -36,7 +36,7 @@ llm_build_deci::llm_build_deci(const llama_model & model, const llm_graph_params
             // norm
             cur = build_norm(inpL, model.layers[il].attn_norm, NULL, LLM_NORM_RMS, il);
             cb(cur, "attn_norm", il);
-        };
+        }
         if (n_head > 0 && n_head_kv == 0) {
             // "linear attention" of Llama-3_1-Nemotron-51B
             cur = build_lora_mm(model.layers[il].wo, cur);
@@ -52,19 +52,19 @@ llm_build_deci::llm_build_deci(const llama_model & model, const llm_graph_params
             if (model.layers[il].bq) {
                 Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq);
                 cb(Qcur, "Qcur", il);
-            };
+            }
             ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur);
             cb(Kcur, "Kcur", il);
             if (model.layers[il].bk) {
                 Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk);
                 cb(Kcur, "Kcur", il);
-            };
+            }
             ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur);
             cb(Vcur, "Vcur", il);
             if (model.layers[il].bv) {
                 Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv);
                 cb(Vcur, "Vcur", il);
-            };
+            }
             Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
             Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
             Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
@@ -82,21 +82,21 @@ llm_build_deci::llm_build_deci(const llama_model & model, const llm_graph_params
             cur = build_attn(inp_attn,
                     model.layers[il].wo, model.layers[il].bo,
                     Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
-        };
+        }
         if (il == n_layer - 1 && inp_out_ids) {
             cur   = ggml_get_rows(ctx0, cur, inp_out_ids);
             inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
-        };
+        }
         // FFN-free layer of Llama-3_1-Nemotron-Ultra-253B
         if (n_ff == 0) {
             continue;
-        };
+        }
         // modified to support attention-free layer of Llama-3_1-Nemotron-51B
         ggml_tensor * ffn_inp = cur;
         if (n_head > 0) {
             ffn_inp = ggml_add(ctx0, cur, inpSA);
             cb(ffn_inp, "ffn_inp", il);
-        };
+        }
         // feed-forward network
         if (model.layers[il].ffn_gate_inp == nullptr) {
             cur = build_norm(ffn_inp, model.layers[il].ffn_norm, NULL, LLM_NORM_RMS, il);
@@ -108,7 +108,7 @@ llm_build_deci::llm_build_deci(const llama_model & model, const llm_graph_params
                 model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
                 NULL, LLM_FFN_SILU, LLM_FFN_PAR, il);
             cb(cur, "ffn_out", il);
-        };
+        }
         cur = ggml_add(ctx0, cur, ffn_inp);
         cb(cur, "ffn_out", il);
 
@@ -117,7 +117,7 @@ llm_build_deci::llm_build_deci(const llama_model & model, const llm_graph_params
 
         // input for next layer
         inpL = cur;
-    };
+    }
     cur = inpL;
 
     cur = build_norm(cur, model.output_norm, NULL, LLM_NORM_RMS, -1);
 
@@ -42,19 +42,19 @@ llm_build_deepseek::llm_build_deepseek(const llama_model & model, const llm_grap
             if (model.layers[il].bq) {
                 Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq);
                 cb(Qcur, "Qcur", il);
-            };
+            }
             ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur);
             cb(Kcur, "Kcur", il);
             if (model.layers[il].bk) {
                 Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk);
                 cb(Kcur, "Kcur", il);
-            };
+            }
             ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur);
             cb(Vcur, "Vcur", il);
             if (model.layers[il].bv) {
                 Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv);
                 cb(Vcur, "Vcur", il);
-            };
+            }
             Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
             Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
             Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
@@ -72,11 +72,11 @@ llm_build_deepseek::llm_build_deepseek(const llama_model & model, const llm_grap
             cur = build_attn(inp_attn,
                     model.layers[il].wo, model.layers[il].bo,
                     Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
-        };
+        }
         if (il == n_layer - 1 && inp_out_ids) {
             cur   = ggml_get_rows(ctx0, cur, inp_out_ids);
             inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
-        };
+        }
         ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
         cb(ffn_inp, "ffn_inp", il);
 
@@ -118,15 +118,15 @@ llm_build_deepseek::llm_build_deepseek(const llama_model & model, const llm_grap
                 cur = ggml_add(ctx0, moe_out, ffn_shexp);
                 cb(cur, "ffn_out", il);
             }
-        };
+        }
         cur = ggml_add(ctx0, cur, ffn_inp);
 
         cur = build_cvec(cur, il);
         cb(cur, "l_out", il);
 
         // input for next layer
         inpL = cur;
-    };
+    }
     cur = inpL;
 
     cur = build_norm(cur, model.output_norm, NULL, LLM_NORM_RMS, -1);
 
@@ -58,7 +58,7 @@ llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_gr
             } else {
                 q = ggml_mul_mat(ctx0, model.layers[il].wq, cur);
                 cb(q, "q", il);
-            };
+            }
             // split into {n_embd_head_qk_nope, n_head, n_tokens}
             ggml_tensor * q_nope =
                 ggml_view_3d(ctx0, q, n_embd_head_qk_nope, n_head, n_tokens, ggml_row_size(q->type, n_embd_head_k),
@@ -164,11 +164,11 @@ llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_gr
                             model.layers[il].wo, NULL,
                             Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
             }
-        };
+        }
         if (il == n_layer - 1 && inp_out_ids) {
             cur   = ggml_get_rows(ctx0, cur, inp_out_ids);
             inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
-        };
+        }
         ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
         cb(ffn_inp, "ffn_inp", il);
 
@@ -210,15 +210,15 @@ llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_gr
                 cur = ggml_add(ctx0, moe_out, ffn_shexp);
                 cb(cur, "ffn_out", il);
             }
-        };
+        }
         cur = ggml_add(ctx0, cur, ffn_inp);
 
         cur = build_cvec(cur, il);
         cb(cur, "l_out", il);
 
         // input for next layer
         inpL = cur;
-    };
+    }
     cur = inpL;
 
     cur = build_norm(cur, model.output_norm, NULL, LLM_NORM_RMS, -1);
 
@@ -63,11 +63,11 @@ llm_build_dots1::llm_build_dots1(const llama_model & model, const llm_graph_para
             cur = build_attn(inp_attn,
                     model.layers[il].wo, model.layers[il].bo,
                     Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f / sqrtf(float(n_embd_head)), il);
-        };
+        }
         if (il == n_layer - 1 && inp_out_ids) {
             cur   = ggml_get_rows(ctx0, cur, inp_out_ids);
             inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
-        };
+        }
         ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
         cb(ffn_inp, "ffn_inp", il);
 
@@ -108,15 +108,15 @@ llm_build_dots1::llm_build_dots1(const llama_model & model, const llm_graph_para
                 cur = ggml_add(ctx0, moe_out, ffn_shexp);
                 cb(cur, "ffn_out", il);
             }
-        };
+        }
         cur = ggml_add(ctx0, cur, ffn_inp);
 
         cur = build_cvec(cur, il);
         cb(cur, "l_out", il);
 
         // input for next layer
         inpL = cur;
-    };
+    }
     cur = inpL;
 
     cur = build_norm(cur, model.output_norm, NULL, LLM_NORM_RMS, -1);
 
@@ -61,11 +61,11 @@ llm_build_dream::llm_build_dream(const llama_model & model, const llm_graph_para
             cur = build_attn(inp_attn,
                     model.layers[il].wo, model.layers[il].bo,
                     Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f / sqrtf(float(n_embd_head)), il);
-        };
+        }
         if (il == n_layer - 1 && inp_out_ids) {
             cur   = ggml_get_rows(ctx0, cur, inp_out_ids);
             inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
-        };
+        }
         ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
         cb(ffn_inp, "ffn_inp", il);
 
@@ -87,7 +87,7 @@ llm_build_dream::llm_build_dream(const llama_model & model, const llm_graph_para
 
         // input for next layer
         inpL = cur;
-    };
+    }
     cur = inpL;
 
     cur = build_norm(cur, model.output_norm, NULL, LLM_NORM_RMS, -1);
 
@@ -28,7 +28,7 @@ llm_build_ernie4_5_moe::llm_build_ernie4_5_moe(const llama_model & model, const
         {
             cur = build_norm(inpL, model.layers[il].attn_norm, NULL, LLM_NORM_RMS, il);
             cb(cur, "attn_norm", il);
-        };
+        }
         // self-attention
         {
             // compute Q and K and RoPE them
@@ -37,19 +37,19 @@ llm_build_ernie4_5_moe::llm_build_ernie4_5_moe(const llama_model & model, const
             if (model.layers[il].bq) {
                 Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq);
                 cb(Qcur, "Qcur", il);
-            };
+            }
             ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur);
             cb(Kcur, "Kcur", il);
             if (model.layers[il].bk) {
                 Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk);
                 cb(Kcur, "Kcur", il);
-            };
+            }
             ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur);
             cb(Vcur, "Vcur", il);
             if (model.layers[il].bv) {
                 Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv);
                 cb(Vcur, "Vcur", il);
-            };
+            }
             Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
             Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
             Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
@@ -68,11 +68,11 @@ llm_build_ernie4_5_moe::llm_build_ernie4_5_moe(const llama_model & model, const
                     model.layers[il].wo, NULL,
                     Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f / sqrtf(float(n_embd_head)), il);
             cb(cur, "attn_out", il);
-        };
+        }
         if (il == n_layer - 1 && inp_out_ids) {
             cur   = ggml_get_rows(ctx0, cur, inp_out_ids);
             inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
-        };
+        }
         ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
         cb(ffn_inp, "ffn_inp", il);
 
@@ -123,7 +123,7 @@ llm_build_ernie4_5_moe::llm_build_ernie4_5_moe(const llama_model & model, const
                 cur = moe_out;
             }
             cb(cur, "ffn_out", il);
-        };
+        }
         cur = ggml_add(ctx0, cur, ffn_inp);
         cb(cur, "ffn_out", il);
 
@@ -132,7 +132,7 @@ llm_build_ernie4_5_moe::llm_build_ernie4_5_moe(const llama_model & model, const
 
         // input for next layer
         inpL = cur;
-    };
+    }
     cur = inpL;
 
     cur = build_norm(cur, model.output_norm, NULL, LLM_NORM_RMS, -1);