Skip to content

Commit 978ba3d

Browse files
authored
Server: Don't ignore llama.cpp params (#8754)
* Don't ignore llama.cpp params * Add fallback for max_tokens
1 parent ecf6b7f commit 978ba3d

File tree

2 files changed

+1
-19
lines changed

2 files changed

+1
-19
lines changed

examples/server/server.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -900,7 +900,7 @@ struct server_context {
900900

901901
slot.params.stream = json_value(data, "stream", false);
902902
slot.params.cache_prompt = json_value(data, "cache_prompt", false);
903-
slot.params.n_predict = json_value(data, "n_predict", default_params.n_predict);
903+
slot.params.n_predict = json_value(data, "n_predict", json_value(data, "max_tokens", default_params.n_predict));
904904
slot.sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
905905
slot.sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
906906
slot.sparams.min_p = json_value(data, "min_p", default_sparams.min_p);

examples/server/utils.hpp

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -355,24 +355,6 @@ static json oaicompat_completion_params_parse(
355355

356356
llama_params["__oaicompat"] = true;
357357

358-
// Map OpenAI parameters to llama.cpp parameters
359-
//
360-
// For parameters that are defined by the OpenAI documentation (e.g.
361-
// temperature), we explicitly specify OpenAI's intended default; we
362-
// need to do that because sometimes OpenAI disagrees with llama.cpp
363-
//
364-
// https://platform.openai.com/docs/api-reference/chat/create
365-
llama_sampling_params default_sparams;
366-
llama_params["model"] = json_value(body, "model", std::string("unknown"));
367-
llama_params["frequency_penalty"] = json_value(body, "frequency_penalty", 0.0);
368-
llama_params["logit_bias"] = json_value(body, "logit_bias", json::object());
369-
llama_params["n_predict"] = json_value(body, "max_tokens", -1);
370-
llama_params["presence_penalty"] = json_value(body, "presence_penalty", 0.0);
371-
llama_params["seed"] = json_value(body, "seed", LLAMA_DEFAULT_SEED);
372-
llama_params["stream"] = json_value(body, "stream", false);
373-
llama_params["temperature"] = json_value(body, "temperature", 1.0);
374-
llama_params["top_p"] = json_value(body, "top_p", 1.0);
375-
376358
// Apply chat template to the list of messages
377359
llama_params["prompt"] = format_chat(model, chat_template, body.at("messages"));
378360

0 commit comments

Comments
 (0)