diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 10ae264f516f4..637f6d6c26ff8 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -196,6 +196,7 @@ struct llama_server_context llama_context *ctx = nullptr; gpt_params params; + grammar_parser::parse_state parsed_grammar; llama_grammar *grammar = nullptr; bool truncated = false; @@ -241,10 +242,13 @@ struct llama_server_context stopped_limit = false; stopping_word = ""; multibyte_pending = 0; - grammar = nullptr; - n_remain = 0; n_past = 0; + + if (grammar != nullptr) { + llama_grammar_free(grammar); + grammar = nullptr; + } } bool loadModel(const gpt_params ¶ms_) @@ -265,8 +269,6 @@ struct llama_server_context bool loadGrammar() { if (!params.grammar.empty()) { - grammar_parser::parse_state parsed_grammar; - parsed_grammar = grammar_parser::parse(params.grammar.c_str()); // will be empty (default) if there are parse errors if (parsed_grammar.rules.empty()) {