@@ -45,8 +45,8 @@ static void print_usage(int argc, char ** argv) {
45
45
(void ) argc;
46
46
47
47
LOG (" \n example usage:\n " );
48
- LOG (" \n text generation: %s -m your_model.gguf -p \" I believe the meaning of life is\" -n 128\n " , argv[0 ]);
49
- LOG (" \n chat (conversation): %s -m your_model.gguf -p \" You are a helpful assistant\" -cnv \n " , argv[0 ]);
48
+ LOG (" \n text generation: %s -m your_model.gguf -p \" I believe the meaning of life is\" -n 128 -no-cnv \n " , argv[0 ]);
49
+ LOG (" \n chat (conversation): %s -m your_model.gguf -sys \" You are a helpful assistant\"\n " , argv[0 ]);
50
50
LOG (" \n " );
51
51
}
52
52
@@ -217,8 +217,8 @@ int main(int argc, char ** argv) {
217
217
// print chat template example in conversation mode
218
218
if (params.conversation_mode ) {
219
219
if (params.enable_chat_template ) {
220
- if (!params.prompt .empty ()) {
221
- LOG_WRN (" *** User-specified prompt in conversation mode will be ignored , did you mean to set --system-prompt (-sys) instead?\n " );
220
+ if (!params.prompt .empty () && params. system_prompt . empty () ) {
221
+ LOG_WRN (" *** User-specified prompt will pre-start conversation , did you mean to set --system-prompt (-sys) instead?\n " );
222
222
}
223
223
224
224
LOG_INF (" %s: chat template example:\n %s\n " , __func__, common_chat_format_example (chat_templates.get (), params.use_jinja ).c_str ());
@@ -265,7 +265,7 @@ int main(int argc, char ** argv) {
265
265
266
266
std::vector<llama_token> embd_inp;
267
267
268
- bool waiting_for_first_input = params. conversation_mode && params. enable_chat_template && params. system_prompt . empty () ;
268
+ bool waiting_for_first_input = false ;
269
269
auto chat_add_and_format = [&chat_msgs, &chat_templates](const std::string & role, const std::string & content) {
270
270
common_chat_msg new_msg;
271
271
new_msg.role = role;
@@ -276,22 +276,34 @@ int main(int argc, char ** argv) {
276
276
return formatted;
277
277
};
278
278
279
+ std::string prompt;
279
280
{
280
- std::string prompt;
281
-
282
281
if (params.conversation_mode && params.enable_chat_template ) {
283
- // format the system prompt in conversation mode (will use template default if empty)
284
- prompt = params.system_prompt ;
282
+ if (!params.system_prompt .empty ()) {
283
+ // format the system prompt (will use template default if empty)
284
+ chat_add_and_format (" system" , params.system_prompt );
285
+ }
286
+
287
+ if (!params.prompt .empty ()) {
288
+ // format and append the user prompt
289
+ chat_add_and_format (" user" , params.prompt );
290
+ } else {
291
+ waiting_for_first_input = true ;
292
+ }
285
293
286
- if (!prompt.empty ()) {
287
- prompt = chat_add_and_format (" system" , prompt);
294
+ if (!params.system_prompt .empty () || !params.prompt .empty ()) {
295
+ common_chat_templates_inputs inputs;
296
+ inputs.messages = chat_msgs;
297
+ inputs.add_generation_prompt = !params.prompt .empty ();
298
+
299
+ prompt = common_chat_templates_apply (chat_templates.get (), inputs).prompt ;
288
300
}
289
301
} else {
290
302
// otherwise use the prompt as is
291
303
prompt = params.prompt ;
292
304
}
293
305
294
- if (params.interactive_first || !params. prompt .empty () || session_tokens.empty ()) {
306
+ if (params.interactive_first || !prompt.empty () || session_tokens.empty ()) {
295
307
LOG_DBG (" tokenize the prompt\n " );
296
308
embd_inp = common_tokenize (ctx, prompt, true , true );
297
309
} else {
@@ -304,7 +316,7 @@ int main(int argc, char ** argv) {
304
316
}
305
317
306
318
// Should not run without any tokens
307
- if (!params. conversation_mode && embd_inp.empty ()) {
319
+ if (!waiting_for_first_input && embd_inp.empty ()) {
308
320
if (add_bos) {
309
321
embd_inp.push_back (llama_vocab_bos (vocab));
310
322
LOG_WRN (" embd_inp was considered empty and bos was added: %s\n " , string_from (ctx, embd_inp).c_str ());
@@ -364,7 +376,12 @@ int main(int argc, char ** argv) {
364
376
}
365
377
366
378
if (params.conversation_mode ) {
367
- params.interactive_first = true ;
379
+ if (params.single_turn && !params.prompt .empty ()) {
380
+ params.interactive = false ;
381
+ params.interactive_first = false ;
382
+ } else {
383
+ params.interactive_first = true ;
384
+ }
368
385
}
369
386
370
387
// enable interactive mode if interactive start is specified
@@ -808,6 +825,11 @@ int main(int argc, char ** argv) {
808
825
if (params.conversation_mode && !waiting_for_first_input) {
809
826
const auto id = common_sampler_last (smpl);
810
827
assistant_ss << common_token_to_piece (ctx, id, false );
828
+
829
+ if (!prompt.empty ()) {
830
+ prompt.clear ();
831
+ is_interacting = false ;
832
+ }
811
833
}
812
834
813
835
if ((n_past > 0 || waiting_for_first_input) && is_interacting) {
@@ -905,6 +927,11 @@ int main(int argc, char ** argv) {
905
927
common_sampler_reset (smpl);
906
928
}
907
929
is_interacting = false ;
930
+
931
+ if (waiting_for_first_input && params.single_turn ) {
932
+ params.interactive = false ;
933
+ params.interactive_first = false ;
934
+ }
908
935
waiting_for_first_input = false ;
909
936
}
910
937
}
0 commit comments