diff --git a/llama.cpp b/llama.cpp index ec4a7f94ccba8..53f5bee28ae88 100644 --- a/llama.cpp +++ b/llama.cpp @@ -7356,8 +7356,6 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c const llama_token eos = llama_token_eos(&ctx->model); - std::vector, llama_partial_utf8>> candidates_decoded; - candidates_decoded.reserve(candidates->size); std::vector candidates_grammar; candidates_grammar.reserve(candidates->size); @@ -7371,8 +7369,8 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c } else if (piece.empty() || piece[0] == 0) { candidates->data[i].logit = -INFINITY; } else { - candidates_decoded.push_back(decode_utf8(piece, grammar->partial_utf8)); - candidates_grammar.push_back({ i, candidates_decoded.back().first.data(), candidates_decoded.back().second }); + std::pair, llama_partial_utf8> decoded = decode_utf8(piece, grammar->partial_utf8); + candidates_grammar.push_back({ i, decoded.first.data(), decoded.second }); } }