@@ -4847,16 +4847,23 @@ static void llm_load_vocab(
4847
4847
4848
4848
// build token to piece caches
4849
4849
{
4850
- std::vector<llama_vocab::token> cache_token_to_piece (n_vocab);
4851
- std::vector<llama_vocab::token> cache_token_to_piece_special(n_vocab);
4850
+ size_t size_cache = 0;
4852
4851
4853
- for (uint32_t id = 0; id < n_vocab; ++id) {
4854
- cache_token_to_piece[id] = llama_token_to_piece(&model, id, false);
4855
- cache_token_to_piece_special[id] = llama_token_to_piece(&model, id, true);
4856
- }
4852
+ std::vector<llama_vocab::token> cache_token_to_piece (n_vocab);
4853
+ std::vector<llama_vocab::token> cache_token_to_piece_special(n_vocab);
4857
4854
4858
- std::swap(vocab.cache_token_to_piece, cache_token_to_piece);
4859
- std::swap(vocab.cache_token_to_piece_special, cache_token_to_piece_special);
4855
+ for (uint32_t id = 0; id < n_vocab; ++id) {
4856
+ cache_token_to_piece[id] = llama_token_to_piece(&model, id, false);
4857
+ cache_token_to_piece_special[id] = llama_token_to_piece(&model, id, true);
4858
+
4859
+ size_cache += cache_token_to_piece[id].size();
4860
+ size_cache += cache_token_to_piece_special[id].size();
4861
+ }
4862
+
4863
+ std::swap(vocab.cache_token_to_piece, cache_token_to_piece);
4864
+ std::swap(vocab.cache_token_to_piece_special, cache_token_to_piece_special);
4865
+
4866
+ LLAMA_LOG_INFO("%s: token to piece cache size = %.4f MB\n", __func__, size_cache / 1024.0 / 1024.0);
4860
4867
}
4861
4868
}
4862
4869
0 commit comments