@@ -3365,7 +3365,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
33653365 add_opt (common_arg (
33663366 {" --chat-template-kwargs" }, " STRING" ,
33673367 string_format (" sets additional params for the json template parser" ),
3368- [](common_params & params, const std::string & value) {
3368+ [](common_params & params, const std::string & value) {
33693369 auto parsed = json::parse (value);
33703370 for (const auto & item : parsed.items ()) {
33713371 params.default_template_kwargs [item.key ()] = item.value ().dump ();
@@ -3577,21 +3577,23 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
35773577 common_log_set_file (common_log_main (), value.c_str ());
35783578 }
35793579 ));
3580- add_opt (common_arg ({ " --log-colors" }, " [on|off|auto]" ,
3581- " Set colored logging ('on', 'off', or 'auto', default: 'auto')\n "
3582- " 'auto' enables colors when output is to a terminal" ,
3583- [](common_params &, const std::string & value) {
3584- if (is_truthy (value)) {
3585- common_log_set_colors (common_log_main (), LOG_COLORS_ENABLED);
3586- } else if (is_falsey (value)) {
3587- common_log_set_colors (common_log_main (), LOG_COLORS_DISABLED);
3588- } else if (is_autoy (value)) {
3589- common_log_set_colors (common_log_main (), LOG_COLORS_AUTO);
3590- } else {
3591- throw std::invalid_argument (
3592- string_format (" error: unkown value for --log-colors: '%s'\n " , value.c_str ()));
3593- }
3594- }).set_env (" LLAMA_LOG_COLORS" ));
3580+ add_opt (common_arg (
3581+ {" --log-colors" }, " [on|off|auto]" ,
3582+ " Set colored logging ('on', 'off', or 'auto', default: 'auto')\n "
3583+ " 'auto' enables colors when output is to a terminal" ,
3584+ [](common_params &, const std::string & value) {
3585+ if (is_truthy (value)) {
3586+ common_log_set_colors (common_log_main (), LOG_COLORS_ENABLED);
3587+ } else if (is_falsey (value)) {
3588+ common_log_set_colors (common_log_main (), LOG_COLORS_DISABLED);
3589+ } else if (is_autoy (value)) {
3590+ common_log_set_colors (common_log_main (), LOG_COLORS_AUTO);
3591+ } else {
3592+ throw std::invalid_argument (
3593+ string_format (" error: unkown value for --log-colors: '%s'\n " , value.c_str ()));
3594+ }
3595+ }
3596+ ).set_env (" LLAMA_LOG_COLORS" ));
35953597 add_opt (common_arg (
35963598 {" -v" , " --verbose" , " --log-verbose" },
35973599 " Set verbosity level to infinity (i.e. log all messages, useful for debugging)" ,
@@ -3857,7 +3859,87 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
38573859 }
38583860 ).set_examples ({LLAMA_EXAMPLE_TTS}));
38593861
3860- // model-specific
3862+ add_opt (common_arg (
3863+ {" --diffusion-steps" }, " N" ,
3864+ string_format (" number of diffusion steps (default: %d)" , params.diffusion .steps ),
3865+ [](common_params & params, int value) { params.diffusion .steps = value; }
3866+ ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3867+ add_opt (common_arg (
3868+ {" --diffusion-visual" },
3869+ string_format (" enable visual diffusion mode (show progressive generation) (default: %s)" , params.diffusion .visual_mode ? " true" : " false" ),
3870+ [](common_params & params) { params.diffusion .visual_mode = true ; }
3871+ ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3872+ add_opt (common_arg (
3873+ {" --diffusion-eps" }, " F" ,
3874+ string_format (" epsilon for timesteps (default: %.6f)" , (double ) params.diffusion .eps ),
3875+ [](common_params & params, const std::string & value) { params.diffusion .eps = std::stof (value); }
3876+ ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3877+ add_opt (common_arg (
3878+ {" --diffusion-algorithm" }, " N" ,
3879+ string_format (" diffusion algorithm: 0=ORIGIN, 1=ENTROPY_BASED, 2=MARGIN_BASED, 3=RANDOM, 4=LOW_CONFIDENCE (default: %d)" , params.diffusion .algorithm ),
3880+ [](common_params & params, int value) { params.diffusion .algorithm = value; }
3881+ ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3882+ add_opt (common_arg (
3883+ {" --diffusion-alg-temp" }, " F" ,
3884+ string_format (" dream algorithm temperature (default: %.3f)" , (double ) params.diffusion .alg_temp ),
3885+ [](common_params & params, const std::string & value) { params.diffusion .alg_temp = std::stof (value); }
3886+ ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3887+ add_opt (common_arg (
3888+ {" --diffusion-block-length" }, " N" ,
3889+ string_format (" llada block length for generation (default: %d)" , params.diffusion .block_length ),
3890+ [](common_params & params, int value) { params.diffusion .block_length = value; }
3891+ ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3892+ add_opt (common_arg (
3893+ {" --diffusion-cfg-scale" }, " F" ,
3894+ string_format (" llada classifier-free guidance scale (default: %.3f)" , (double ) params.diffusion .cfg_scale ),
3895+ [](common_params & params, const std::string & value) { params.diffusion .cfg_scale = std::stof (value); }
3896+ ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3897+ add_opt (common_arg (
3898+ {" --diffusion-add-gumbel-noise" }, " F" ,
3899+ string_format (" add gumbel noise to the logits if temp > 0.0 (default: %s)" , params.diffusion .add_gumbel_noise ? " true" : " false" ),
3900+ [](common_params & params, const std::string & value) { params.diffusion .add_gumbel_noise = std::stof (value); }
3901+ ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3902+ add_opt (common_arg (
3903+ { " -lr" , " --learning-rate" }, " ALPHA" ,
3904+ string_format (" adamw or sgd optimizer alpha (default: %.2g); note: sgd alpha recommended ~10x (no momentum)" , (double ) params.lr .lr0 ),
3905+ [](common_params & params, const std::string & value) { params.lr .lr0 = std::stof (value); }
3906+ ).set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3907+ add_opt (common_arg ({ " -lr-min" , " --learning-rate-min" }, " ALPHA" ,
3908+ string_format (" (if >0) final learning rate after decay (if -decay-epochs is set, default=%.2g)" ,
3909+ (double ) params.lr .lr_min ),
3910+ [](common_params & params, const std::string & value) { params.lr .lr_min = std::stof (value); }
3911+ ).set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3912+ add_opt (common_arg (
3913+ {" -decay-epochs" , " --learning-rate-decay-epochs" }, " ALPHA" ,
3914+ string_format (" (if >0) decay learning rate to -lr-min after this many epochs (exponential decay, default=%.2g)" , (double ) params.lr .decay_epochs ),
3915+ [](common_params & params, const std::string & value) { params.lr .decay_epochs = std::stof (value); }
3916+ ).set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3917+ add_opt (common_arg (
3918+ {" -wd" , " --weight-decay" }, " WD" ,
3919+ string_format (" adamw or sgd optimizer weight decay (0 is off; recommend very small e.g. 1e-9) (default: %.2g)." , (double ) params.lr .wd ),
3920+ [](common_params & params, const std::string & value) { params.lr .wd = std::stof (value); }
3921+ ).set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3922+ add_opt (common_arg (
3923+ {" -val-split" , " --val-split" }, " FRACTION" ,
3924+ string_format (" fraction of data to use as validation set for training (default: %.2g)." , (double ) params.val_split ),
3925+ [](common_params & params, const std::string & value) { params.val_split = std::stof (value); }
3926+ ).set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3927+ add_opt (common_arg (
3928+ {" -epochs" , " --epochs" }, " N" ,
3929+ string_format (" optimizer max # of epochs (default: %d)" , params.lr .epochs ),
3930+ [](common_params & params, int epochs) { params.lr .epochs = epochs; }
3931+ ).set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3932+ add_opt (common_arg (
3933+ {" -opt" , " --optimizer" }, " sgd|adamw" , " adamw or sgd" ,
3934+ [](common_params & params, const std::string & name) {
3935+ params.optimizer = common_opt_get_optimizer (name.c_str ());
3936+ if (params.optimizer == GGML_OPT_OPTIMIZER_TYPE_COUNT) {
3937+ throw std::invalid_argument (" invalid --optimizer, valid options: adamw, sgd" );
3938+ }
3939+ }
3940+ ).set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3941+
3942+ // presets
38613943 add_opt (common_arg (
38623944 {" --tts-oute-default" },
38633945 string_format (" use default OuteTTS models (note: can download weights from the internet)" ),
@@ -3870,39 +3952,16 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
38703952 ).set_examples ({LLAMA_EXAMPLE_TTS}));
38713953
38723954 add_opt (common_arg (
3873- {" --embd-bge-small-en-default" },
3874- string_format (" use default bge-small-en-v1.5 model (note: can download weights from the internet)" ),
3875- [](common_params & params) {
3876- params.model .hf_repo = " ggml-org/bge-small-en-v1.5-Q8_0-GGUF" ;
3877- params.model .hf_file = " bge-small-en-v1.5-q8_0.gguf" ;
3878- params.embd_normalize = 2 ;
3879- params.n_ctx = 512 ;
3880- params.verbose_prompt = true ;
3881- params.embedding = true ;
3882- }
3883- ).set_examples ({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
3884-
3885- add_opt (common_arg (
3886- {" --embd-e5-small-en-default" },
3887- string_format (" use default e5-small-v2 model (note: can download weights from the internet)" ),
3888- [](common_params & params) {
3889- params.model .hf_repo = " ggml-org/e5-small-v2-Q8_0-GGUF" ;
3890- params.model .hf_file = " e5-small-v2-q8_0.gguf" ;
3891- params.embd_normalize = 2 ;
3892- params.n_ctx = 512 ;
3893- params.verbose_prompt = true ;
3894- params.embedding = true ;
3895- }
3896- ).set_examples ({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
3897-
3898- add_opt (common_arg (
3899- {" --embd-gte-small-default" },
3900- string_format (" use default gte-small model (note: can download weights from the internet)" ),
3955+ {" --embd-gemma-default" },
3956+ string_format (" use default EmbeddingGemma model (note: can download weights from the internet)" ),
39013957 [](common_params & params) {
3902- params.model .hf_repo = " ggml-org/gte-small-Q8_0-GGUF" ;
3903- params.model .hf_file = " gte-small-q8_0.gguf" ;
3904- params.embd_normalize = 2 ;
3905- params.n_ctx = 512 ;
3958+ params.model .hf_repo = " ggml-org/embeddinggemma-300M-qat-q4_0-GGUF" ;
3959+ params.model .hf_file = " embeddinggemma-300M-qat-Q4_0.gguf" ;
3960+ params.port = 8011 ;
3961+ params.n_ubatch = 2048 ;
3962+ params.n_batch = 2048 ;
3963+ params.n_parallel = 32 ;
3964+ params.n_ctx = 2048 *params.n_parallel ;
39063965 params.verbose_prompt = true ;
39073966 params.embedding = true ;
39083967 }
@@ -3997,96 +4056,65 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
39974056 ).set_examples ({LLAMA_EXAMPLE_SERVER}));
39984057
39994058 add_opt (common_arg (
4000- { " --diffusion-steps" }, " N" ,
4001- string_format (" number of diffusion steps (default: %d)" , params.diffusion .steps ),
4002- [](common_params & params, int value) { params.diffusion .steps = value; }
4003- ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
4004- add_opt (common_arg (
4005- { " --diffusion-visual" },
4006- string_format (" enable visual diffusion mode (show progressive generation) (default: %s)" ,
4007- params.diffusion .visual_mode ? " true" : " false" ),
4008- [](common_params & params) { params.diffusion .visual_mode = true ; }
4009- ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
4059+ {" --gpt-oss-20b-default" },
4060+ string_format (" use gpt-oss-20b (note: can download weights from the internet)" ),
4061+ [](common_params & params) {
4062+ params.model .hf_repo = " ggml-org/gpt-oss-20b-GGUF" ;
4063+ params.model .hf_file = " gpt-oss-20b-mxfp4.gguf" ;
4064+ params.port = 8013 ;
4065+ params.n_ubatch = 2048 ;
4066+ params.n_batch = 32768 ;
4067+ params.n_parallel = 2 ;
4068+ params.n_ctx = 131072 *params.n_parallel ;
4069+ params.sampling .temp = 1 .0f ;
4070+ params.sampling .top_p = 1 .0f ;
4071+ params.sampling .top_k = 0 ;
4072+ params.sampling .min_p = 0 .01f ;
4073+ params.use_jinja = true ;
4074+ // params.default_template_kwargs["reasoning_effort"] = "\"high\"";
4075+ }
4076+ ).set_examples ({LLAMA_EXAMPLE_SERVER}));
40104077
40114078 add_opt (common_arg (
4012- { " --diffusion-eps" }, " F" ,
4013- string_format (" epsilon for timesteps (default: %.6f)" , (double ) params.diffusion .eps ),
4014- [](common_params & params, const std::string & value) { params.diffusion .eps = std::stof (value); }
4015- ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
4016- add_opt (common_arg (
4017- { " --diffusion-algorithm" }, " N" ,
4018- string_format (" diffusion algorithm: 0=ORIGIN, 1=ENTROPY_BASED, 2=MARGIN_BASED, 3=RANDOM, 4=LOW_CONFIDENCE (default: %d)" ,
4019- params.diffusion .algorithm ),
4020- [](common_params & params, int value) { params.diffusion .algorithm = value; }
4021- ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
4022- add_opt (common_arg (
4023- { " --diffusion-alg-temp" }, " F" ,
4024- string_format (" dream algorithm temperature (default: %.3f)" , (double ) params.diffusion .alg_temp ),
4025- [](common_params & params, const std::string & value) { params.diffusion .alg_temp = std::stof (value); }
4026- ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
4079+ {" --gpt-oss-120b-default" },
4080+ string_format (" use gpt-oss-120b (note: can download weights from the internet)" ),
4081+ [](common_params & params) {
4082+ params.model .hf_repo = " ggml-org/gpt-oss-120b-GGUF" ;
4083+ params.port = 8013 ;
4084+ params.n_ubatch = 2048 ;
4085+ params.n_batch = 32768 ;
4086+ params.n_parallel = 2 ;
4087+ params.n_ctx = 131072 *params.n_parallel ;
4088+ params.sampling .temp = 1 .0f ;
4089+ params.sampling .top_p = 1 .0f ;
4090+ params.sampling .top_k = 0 ;
4091+ params.sampling .min_p = 0 .01f ;
4092+ params.use_jinja = true ;
4093+ // params.default_template_kwargs["reasoning_effort"] = "\"high\"";
4094+ }
4095+ ).set_examples ({LLAMA_EXAMPLE_SERVER}));
40274096
40284097 add_opt (common_arg (
4029- { " --diffusion-block-length" }, " N" ,
4030- string_format (" llada block length for generation (default: %d)" , params.diffusion .block_length ),
4031- [](common_params & params, int value) { params.diffusion .block_length = value; }
4032- ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
4033- add_opt (common_arg (
4034- { " --diffusion-cfg-scale" }, " F" ,
4035- string_format (" llada classifier-free guidance scale (default: %.3f)" , (double ) params.diffusion .cfg_scale ),
4036- [](common_params & params, const std::string & value) { params.diffusion .cfg_scale = std::stof (value); }
4037- ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
4038- add_opt (common_arg (
4039- { " --diffusion-add-gumbel-noise" }, " F" ,
4040- string_format (" add gumbel noise to the logits if temp > 0.0 (default: %s)" , params.diffusion .add_gumbel_noise ? " true" : " false" ),
4041- [](common_params & params, const std::string & value) { params.diffusion .add_gumbel_noise = std::stof (value); }
4042- ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
4043-
4098+ {" --vision-gemma-4b-default" },
4099+ string_format (" use Gemma 3 4B QAT (note: can download weights from the internet)" ),
4100+ [](common_params & params) {
4101+ params.model .hf_repo = " ggml-org/gemma-3-4b-it-qat-GGUF" ;
4102+ params.port = 8014 ;
4103+ params.n_ctx = 0 ;
4104+ params.use_jinja = true ;
4105+ }
4106+ ).set_examples ({LLAMA_EXAMPLE_SERVER}));
40444107
4045- add_opt (
4046- common_arg ({ " -lr" , " --learning-rate" }, " ALPHA" ,
4047- string_format (
4048- " adamw or sgd optimizer alpha (default: %.2g); note: sgd alpha recommended ~10x (no momentum)" ,
4049- (double ) params.lr .lr0 ),
4050- [](common_params & params, const std::string & value) { params.lr .lr0 = std::stof (value); })
4051- .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
4052- add_opt (
4053- common_arg ({ " -lr-min" , " --learning-rate-min" }, " ALPHA" ,
4054- string_format (
4055- " (if >0) final learning rate after decay (if -decay-epochs is set, default=%.2g)" ,
4056- (double ) params.lr .lr_min ),
4057- [](common_params & params, const std::string & value) { params.lr .lr_min = std::stof (value); })
4058- .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
4059- add_opt (
4060- common_arg ({ " -decay-epochs" , " --learning-rate-decay-epochs" }, " ALPHA" ,
4061- string_format (
4062- " (if >0) decay learning rate to -lr-min after this many epochs (exponential decay, default=%.2g)" ,
4063- (double ) params.lr .decay_epochs ),
4064- [](common_params & params, const std::string & value) { params.lr .decay_epochs = std::stof (value); })
4065- .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
4066- add_opt (common_arg (
4067- { " -wd" , " --weight-decay" }, " WD" ,
4068- string_format (
4069- " adamw or sgd optimizer weight decay (0 is off; recommend very small e.g. 1e-9) (default: %.2g)." ,
4070- (double ) params.lr .wd ),
4071- [](common_params & params, const std::string & value) { params.lr .wd = std::stof (value); })
4072- .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
4073- add_opt (common_arg ({ " -val-split" , " --val-split" }, " FRACTION" ,
4074- string_format (" fraction of data to use as validation set for training (default: %.2g)." ,
4075- (double ) params.val_split ),
4076- [](common_params & params, const std::string & value) { params.val_split = std::stof (value); })
4077- .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
4078- add_opt (common_arg ({ " -epochs" , " --epochs" }, " N" ,
4079- string_format (" optimizer max # of epochs (default: %d)" , params.lr .epochs ),
4080- [](common_params & params, int epochs) { params.lr .epochs = epochs; })
4081- .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
4082- add_opt (common_arg ({ " -opt" , " --optimizer" }, " sgd|adamw" , " adamw or sgd" ,
4083- [](common_params & params, const std::string & name) {
4084- params.optimizer = common_opt_get_optimizer (name.c_str ());
4085- if (params.optimizer == GGML_OPT_OPTIMIZER_TYPE_COUNT) {
4086- throw std::invalid_argument (" invalid --optimizer, valid options: adamw, sgd" );
4087- }
4088- })
4089- .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
4108+ add_opt (common_arg (
4109+ {" --vision-gemma-12b-default" },
4110+ string_format (" use Gemma 3 12B QAT (note: can download weights from the internet)" ),
4111+ [](common_params & params) {
4112+ params.model .hf_repo = " ggml-org/gemma-3-12b-it-qat-GGUF" ;
4113+ params.port = 8014 ;
4114+ params.n_ctx = 0 ;
4115+ params.use_jinja = true ;
4116+ }
4117+ ).set_examples ({LLAMA_EXAMPLE_SERVER}));
40904118
40914119 return ctx_arg;
40924120}
0 commit comments