@@ -1742,115 +1742,6 @@ def llama_apply_adapter_cvec(
1742
1742
# //
1743
1743
1744
1744
1745
- # // Information associated with an individual cell in the KV cache view.
1746
- # struct llama_kv_cache_view_cell {
1747
- # // The position for this cell. Takes KV cache shifts into account.
1748
- # // May be negative if the cell is not populated.
1749
- # llama_pos pos;
1750
- # };
1751
- class llama_kv_cache_view_cell (ctypes .Structure ):
1752
- """Information associated with an individual cell in the KV cache view.
1753
-
1754
- Attributes:
1755
- pos (llama_pos): The position for this cell. Takes KV cache shifts into account.
1756
- May be negative if the cell is not populated."""
1757
-
1758
- if TYPE_CHECKING :
1759
- pos : llama_pos
1760
-
1761
- _fields_ = [("pos" , llama_pos )]
1762
-
1763
-
1764
- # // An updateable view of the KV cache.
1765
- # struct llama_kv_cache_view {
1766
- # // Number of KV cache cells. This will be the same as the context size.
1767
- # int32_t n_cells;
1768
-
1769
- # // Maximum number of sequences that can exist in a cell. It's not an error
1770
- # // if there are more sequences in a cell than this value, however they will
1771
- # // not be visible in the view cells_sequences.
1772
- # int32_t n_seq_max;
1773
-
1774
- # // Number of tokens in the cache. For example, if there are two populated
1775
- # // cells, the first with 1 sequence id in it and the second with 2 sequence
1776
- # // ids then you'll have 3 tokens.
1777
- # int32_t token_count;
1778
-
1779
- # // Number of populated cache cells.
1780
- # int32_t used_cells;
1781
-
1782
- # // Maximum contiguous empty slots in the cache.
1783
- # int32_t max_contiguous;
1784
-
1785
- # // Index to the start of the max_contiguous slot range. Can be negative
1786
- # // when cache is full.
1787
- # int32_t max_contiguous_idx;
1788
-
1789
- # // Information for an individual cell.
1790
- # struct llama_kv_cache_view_cell * cells;
1791
-
1792
-
1793
- # // The sequences for each cell. There will be n_seq_max items per cell.
1794
- # llama_seq_id * cells_sequences;
1795
- # };
1796
- class llama_kv_cache_view (ctypes .Structure ):
1797
- if TYPE_CHECKING :
1798
- n_cells : int
1799
- n_max_seq : int
1800
- token_count : int
1801
- used_cells : int
1802
- max_contiguous : int
1803
- max_contiguous_idx : int
1804
- cells : CtypesArray [llama_kv_cache_view_cell ]
1805
- cells_sequences : CtypesArray [llama_seq_id ]
1806
-
1807
- _fields_ = [
1808
- ("n_cells" , ctypes .c_int32 ),
1809
- ("n_max_seq" , ctypes .c_int32 ),
1810
- ("token_count" , ctypes .c_int32 ),
1811
- ("used_cells" , ctypes .c_int32 ),
1812
- ("max_contiguous" , ctypes .c_int32 ),
1813
- ("max_contiguous_idx" , ctypes .c_int32 ),
1814
- ("cells" , ctypes .POINTER (llama_kv_cache_view_cell )),
1815
- ("cells_sequences" , ctypes .POINTER (llama_seq_id )),
1816
- ]
1817
-
1818
-
1819
- llama_kv_cache_view_p = ctypes .POINTER (llama_kv_cache_view )
1820
-
1821
-
1822
- # // Create an empty KV cache view. (use only for debugging purposes)
1823
- # LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_seq_max);
1824
- @ctypes_function (
1825
- "llama_kv_cache_view_init" ,
1826
- [llama_context_p_ctypes , ctypes .c_int32 ],
1827
- llama_kv_cache_view ,
1828
- )
1829
- def llama_kv_cache_view_init (
1830
- ctx : llama_context_p , n_seq_max : Union [ctypes .c_int32 , int ], /
1831
- ) -> llama_kv_cache_view :
1832
- """Create an empty KV cache view. (use only for debugging purposes)"""
1833
- ...
1834
-
1835
-
1836
- # // Free a KV cache view. (use only for debugging purposes)
1837
- # LLAMA_API void llama_kv_cache_view_free(struct llama_kv_cache_view * view);
1838
- @ctypes_function ("llama_kv_cache_view_free" , [llama_kv_cache_view_p ], None )
1839
- def llama_kv_cache_view_free (view : "ctypes.pointer[llama_kv_cache_view]" , / ): # type: ignore
1840
- """Free a KV cache view. (use only for debugging purposes)"""
1841
- ...
1842
-
1843
-
1844
- # // Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
1845
- # LLAMA_API void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_kv_cache_view * view);
1846
- @ctypes_function (
1847
- "llama_kv_cache_view_update" , [llama_context_p_ctypes , llama_kv_cache_view_p ], None
1848
- )
1849
- def llama_kv_cache_view_update (ctx : llama_context_p , view : CtypesPointerOrRef [llama_kv_cache_view ], / ): # type: ignore
1850
- """Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)"""
1851
- ...
1852
-
1853
-
1854
1745
# // Returns the number of tokens in the KV cache (slow, use only for debug)
1855
1746
# // If a KV cell has multiple sequences assigned to it, it will be counted multiple times
1856
1747
# LLAMA_API int32_t llama_kv_self_n_tokens(const struct llama_context * ctx);
0 commit comments