Skip to content

Commit c824282

Browse files
CISCmglambda
authored andcommitted
main: allow preloading conversation with -p and add -st / --single-turn (ggml-org#12145)
* Add chat template formatting to -no-cnv * only enable prompt formatting if explicitly enabled * add -st / --single-turn * add --single-turn and -p in conversation mode * fix -sys + -p * reword warning * small readability change and fix (long) outdated example usage * only activate single turn in conversation mode
1 parent 29eda02 commit c824282

File tree

3 files changed

+52
-14
lines changed

3 files changed

+52
-14
lines changed

common/arg.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -949,6 +949,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
949949
params.conversation_mode = COMMON_CONVERSATION_MODE_DISABLED;
950950
}
951951
).set_examples({LLAMA_EXAMPLE_MAIN}));
952+
add_opt(common_arg(
953+
{"-st", "--single-turn"},
954+
"run conversation for a single turn only, then exit when done\n"
955+
"will not be interactive if first turn is predefined with --prompt\n"
956+
"(default: false)",
957+
[](common_params & params) {
958+
params.single_turn = true;
959+
}
960+
).set_examples({LLAMA_EXAMPLE_MAIN}));
952961
add_opt(common_arg(
953962
{"-i", "--interactive"},
954963
string_format("run in interactive mode (default: %s)", params.interactive ? "true" : "false"),

common/common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,8 @@ struct common_params {
328328
bool warmup = true; // warmup run
329329
bool check_tensors = false; // validate tensor data
330330

331+
bool single_turn = false; // single turn chat conversation
332+
331333
ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
332334
ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V
333335

examples/main/main.cpp

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ static void print_usage(int argc, char ** argv) {
4545
(void) argc;
4646

4747
LOG("\nexample usage:\n");
48-
LOG("\n text generation: %s -m your_model.gguf -p \"I believe the meaning of life is\" -n 128\n", argv[0]);
49-
LOG("\n chat (conversation): %s -m your_model.gguf -p \"You are a helpful assistant\" -cnv\n", argv[0]);
48+
LOG("\n text generation: %s -m your_model.gguf -p \"I believe the meaning of life is\" -n 128 -no-cnv\n", argv[0]);
49+
LOG("\n chat (conversation): %s -m your_model.gguf -sys \"You are a helpful assistant\"\n", argv[0]);
5050
LOG("\n");
5151
}
5252

@@ -217,8 +217,8 @@ int main(int argc, char ** argv) {
217217
// print chat template example in conversation mode
218218
if (params.conversation_mode) {
219219
if (params.enable_chat_template) {
220-
if (!params.prompt.empty()) {
221-
LOG_WRN("*** User-specified prompt in conversation mode will be ignored, did you mean to set --system-prompt (-sys) instead?\n");
220+
if (!params.prompt.empty() && params.system_prompt.empty()) {
221+
LOG_WRN("*** User-specified prompt will pre-start conversation, did you mean to set --system-prompt (-sys) instead?\n");
222222
}
223223

224224
LOG_INF("%s: chat template example:\n%s\n", __func__, common_chat_format_example(chat_templates.get(), params.use_jinja).c_str());
@@ -265,7 +265,7 @@ int main(int argc, char ** argv) {
265265

266266
std::vector<llama_token> embd_inp;
267267

268-
bool waiting_for_first_input = params.conversation_mode && params.enable_chat_template && params.system_prompt.empty();
268+
bool waiting_for_first_input = false;
269269
auto chat_add_and_format = [&chat_msgs, &chat_templates](const std::string & role, const std::string & content) {
270270
common_chat_msg new_msg;
271271
new_msg.role = role;
@@ -276,22 +276,34 @@ int main(int argc, char ** argv) {
276276
return formatted;
277277
};
278278

279+
std::string prompt;
279280
{
280-
std::string prompt;
281-
282281
if (params.conversation_mode && params.enable_chat_template) {
283-
// format the system prompt in conversation mode (will use template default if empty)
284-
prompt = params.system_prompt;
282+
if (!params.system_prompt.empty()) {
283+
// format the system prompt (will use template default if empty)
284+
chat_add_and_format("system", params.system_prompt);
285+
}
286+
287+
if (!params.prompt.empty()) {
288+
// format and append the user prompt
289+
chat_add_and_format("user", params.prompt);
290+
} else {
291+
waiting_for_first_input = true;
292+
}
285293

286-
if (!prompt.empty()) {
287-
prompt = chat_add_and_format("system", prompt);
294+
if (!params.system_prompt.empty() || !params.prompt.empty()) {
295+
common_chat_templates_inputs inputs;
296+
inputs.messages = chat_msgs;
297+
inputs.add_generation_prompt = !params.prompt.empty();
298+
299+
prompt = common_chat_templates_apply(chat_templates.get(), inputs).prompt;
288300
}
289301
} else {
290302
// otherwise use the prompt as is
291303
prompt = params.prompt;
292304
}
293305

294-
if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) {
306+
if (params.interactive_first || !prompt.empty() || session_tokens.empty()) {
295307
LOG_DBG("tokenize the prompt\n");
296308
embd_inp = common_tokenize(ctx, prompt, true, true);
297309
} else {
@@ -304,7 +316,7 @@ int main(int argc, char ** argv) {
304316
}
305317

306318
// Should not run without any tokens
307-
if (!params.conversation_mode && embd_inp.empty()) {
319+
if (!waiting_for_first_input && embd_inp.empty()) {
308320
if (add_bos) {
309321
embd_inp.push_back(llama_vocab_bos(vocab));
310322
LOG_WRN("embd_inp was considered empty and bos was added: %s\n", string_from(ctx, embd_inp).c_str());
@@ -364,7 +376,12 @@ int main(int argc, char ** argv) {
364376
}
365377

366378
if (params.conversation_mode) {
367-
params.interactive_first = true;
379+
if (params.single_turn && !params.prompt.empty()) {
380+
params.interactive = false;
381+
params.interactive_first = false;
382+
} else {
383+
params.interactive_first = true;
384+
}
368385
}
369386

370387
// enable interactive mode if interactive start is specified
@@ -808,6 +825,11 @@ int main(int argc, char ** argv) {
808825
if (params.conversation_mode && !waiting_for_first_input) {
809826
const auto id = common_sampler_last(smpl);
810827
assistant_ss << common_token_to_piece(ctx, id, false);
828+
829+
if (!prompt.empty()) {
830+
prompt.clear();
831+
is_interacting = false;
832+
}
811833
}
812834

813835
if ((n_past > 0 || waiting_for_first_input) && is_interacting) {
@@ -905,6 +927,11 @@ int main(int argc, char ** argv) {
905927
common_sampler_reset(smpl);
906928
}
907929
is_interacting = false;
930+
931+
if (waiting_for_first_input && params.single_turn) {
932+
params.interactive = false;
933+
params.interactive_first = false;
934+
}
908935
waiting_for_first_input = false;
909936
}
910937
}

0 commit comments

Comments
 (0)