Skip to content

Commit 5069b93

Browse files
committed
llama : print a log of the total cache size
1 parent 1494a18 commit 5069b93

File tree

1 file changed

+15
-8
lines changed

1 file changed

+15
-8
lines changed

llama.cpp

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4847,16 +4847,23 @@ static void llm_load_vocab(
48474847

48484848
// build token to piece caches
48494849
{
4850-
std::vector<llama_vocab::token> cache_token_to_piece (n_vocab);
4851-
std::vector<llama_vocab::token> cache_token_to_piece_special(n_vocab);
4850+
size_t size_cache = 0;
48524851

4853-
for (uint32_t id = 0; id < n_vocab; ++id) {
4854-
cache_token_to_piece[id] = llama_token_to_piece(&model, id, false);
4855-
cache_token_to_piece_special[id] = llama_token_to_piece(&model, id, true);
4856-
}
4852+
std::vector<llama_vocab::token> cache_token_to_piece (n_vocab);
4853+
std::vector<llama_vocab::token> cache_token_to_piece_special(n_vocab);
48574854

4858-
std::swap(vocab.cache_token_to_piece, cache_token_to_piece);
4859-
std::swap(vocab.cache_token_to_piece_special, cache_token_to_piece_special);
4855+
for (uint32_t id = 0; id < n_vocab; ++id) {
4856+
cache_token_to_piece[id] = llama_token_to_piece(&model, id, false);
4857+
cache_token_to_piece_special[id] = llama_token_to_piece(&model, id, true);
4858+
4859+
size_cache += cache_token_to_piece[id].size();
4860+
size_cache += cache_token_to_piece_special[id].size();
4861+
}
4862+
4863+
std::swap(vocab.cache_token_to_piece, cache_token_to_piece);
4864+
std::swap(vocab.cache_token_to_piece_special, cache_token_to_piece_special);
4865+
4866+
LLAMA_LOG_INFO("%s: token to piece cache size = %.4f MB\n", __func__, size_cache / 1024.0 / 1024.0);
48604867
}
48614868
}
48624869

0 commit comments

Comments
 (0)