|
12 | 12 | #include <regex>
|
13 | 13 | #include <sstream>
|
14 | 14 | #include <string>
|
| 15 | +#include <unordered_map> |
15 | 16 | #include <unordered_set>
|
16 | 17 | #include <vector>
|
17 | 18 | #include <cinttypes>
|
@@ -1386,3 +1387,77 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
|
1386 | 1387 | fprintf(stream, "typical_p: %f # default: 1.0\n", sparams.typical_p);
|
1387 | 1388 | fprintf(stream, "verbose_prompt: %s # default: false\n", params.verbose_prompt ? "true" : "false");
|
1388 | 1389 | }
|
| 1390 | + |
| 1391 | +// |
| 1392 | +// KV cache utils |
| 1393 | +// |
| 1394 | + |
| 1395 | +void dump_kv_cache_view(const llama_kv_cache_view & view, int row_size) { |
| 1396 | + static const char slot_chars[] = ".123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+"; |
| 1397 | + |
| 1398 | + printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d", |
| 1399 | + view.n_cells, view.n_max_seq, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx); |
| 1400 | + |
| 1401 | + llama_kv_cache_view_cell * c_curr = view.cells; |
| 1402 | + llama_seq_id * cs_curr = view.cells_sequences; |
| 1403 | + |
| 1404 | + for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_max_seq) { |
| 1405 | + if (i % row_size == 0) { |
| 1406 | + printf("\n%5d: ", i); |
| 1407 | + } |
| 1408 | + int seq_count = 0; |
| 1409 | + for (int j = 0; j < view.n_max_seq; j++) { |
| 1410 | + if (cs_curr[j] >= 0) { seq_count++; } |
| 1411 | + } |
| 1412 | + putchar(slot_chars[std::min(sizeof(slot_chars) - 2, size_t(seq_count))]); |
| 1413 | + } |
| 1414 | + |
| 1415 | + printf("\n=== Done dumping\n"); |
| 1416 | +} |
| 1417 | + |
| 1418 | +void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size) { |
| 1419 | + static const char slot_chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; |
| 1420 | + |
| 1421 | + printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d\n", |
| 1422 | + view.n_cells, view.n_max_seq, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx); |
| 1423 | + |
| 1424 | + std::unordered_map<llama_seq_id, size_t> seqs; |
| 1425 | + llama_kv_cache_view_cell * c_curr = view.cells; |
| 1426 | + llama_seq_id * cs_curr = view.cells_sequences; |
| 1427 | + |
| 1428 | + for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_max_seq) { |
| 1429 | + for (int j = 0; j < view.n_max_seq; j++) { |
| 1430 | + if (cs_curr[j] < 0) { continue; } |
| 1431 | + if (seqs.find(cs_curr[j]) == seqs.end()) { |
| 1432 | + if (seqs.size() + 1 >= sizeof(slot_chars)) { break; } |
| 1433 | + seqs[cs_curr[j]] = seqs.size(); |
| 1434 | + } |
| 1435 | + } |
| 1436 | + if (seqs.size() + 1 >= sizeof(slot_chars)) { break; } |
| 1437 | + } |
| 1438 | + |
| 1439 | + printf("=== Sequence legend: "); |
| 1440 | + for (const auto & it : seqs) { |
| 1441 | + printf("%zu=%d, ", it.second, it.first); |
| 1442 | + } |
| 1443 | + printf("'+'=other sequence ids"); |
| 1444 | + |
| 1445 | + c_curr = view.cells; |
| 1446 | + cs_curr = view.cells_sequences; |
| 1447 | + for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_max_seq) { |
| 1448 | + if (i % row_size == 0) { |
| 1449 | + printf("\n%5d: ", i); |
| 1450 | + } |
| 1451 | + for (int j = 0; j < view.n_max_seq; j++) { |
| 1452 | + if (cs_curr[j] >= 0) { |
| 1453 | + const auto & it = seqs.find(cs_curr[j]); |
| 1454 | + putchar(it != seqs.end() ? int(slot_chars[it->second]) : '+'); |
| 1455 | + } else { |
| 1456 | + putchar('.'); |
| 1457 | + } |
| 1458 | + } |
| 1459 | + putchar(' '); |
| 1460 | + } |
| 1461 | + |
| 1462 | + printf("\n=== Done dumping\n"); |
| 1463 | +} |
0 commit comments