Skip to content

Commit ba39a6c

Browse files
committed
Remove llama_kv_cache_view due it was deleted on llama.cpp side too
rel: ggml-org/llama.cpp#13653
1 parent b1d23df commit ba39a6c

File tree

2 files changed

+1
-110
lines changed

2 files changed

+1
-110
lines changed

llama_cpp/llama_cpp.py

Lines changed: 0 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -1742,115 +1742,6 @@ def llama_apply_adapter_cvec(
17421742
# //
17431743

17441744

1745-
# // Information associated with an individual cell in the KV cache view.
1746-
# struct llama_kv_cache_view_cell {
1747-
# // The position for this cell. Takes KV cache shifts into account.
1748-
# // May be negative if the cell is not populated.
1749-
# llama_pos pos;
1750-
# };
1751-
class llama_kv_cache_view_cell(ctypes.Structure):
1752-
"""Information associated with an individual cell in the KV cache view.
1753-
1754-
Attributes:
1755-
pos (llama_pos): The position for this cell. Takes KV cache shifts into account.
1756-
May be negative if the cell is not populated."""
1757-
1758-
if TYPE_CHECKING:
1759-
pos: llama_pos
1760-
1761-
_fields_ = [("pos", llama_pos)]
1762-
1763-
1764-
# // An updateable view of the KV cache.
1765-
# struct llama_kv_cache_view {
1766-
# // Number of KV cache cells. This will be the same as the context size.
1767-
# int32_t n_cells;
1768-
1769-
# // Maximum number of sequences that can exist in a cell. It's not an error
1770-
# // if there are more sequences in a cell than this value, however they will
1771-
# // not be visible in the view cells_sequences.
1772-
# int32_t n_seq_max;
1773-
1774-
# // Number of tokens in the cache. For example, if there are two populated
1775-
# // cells, the first with 1 sequence id in it and the second with 2 sequence
1776-
# // ids then you'll have 3 tokens.
1777-
# int32_t token_count;
1778-
1779-
# // Number of populated cache cells.
1780-
# int32_t used_cells;
1781-
1782-
# // Maximum contiguous empty slots in the cache.
1783-
# int32_t max_contiguous;
1784-
1785-
# // Index to the start of the max_contiguous slot range. Can be negative
1786-
# // when cache is full.
1787-
# int32_t max_contiguous_idx;
1788-
1789-
# // Information for an individual cell.
1790-
# struct llama_kv_cache_view_cell * cells;
1791-
1792-
1793-
# // The sequences for each cell. There will be n_seq_max items per cell.
1794-
# llama_seq_id * cells_sequences;
1795-
# };
1796-
class llama_kv_cache_view(ctypes.Structure):
1797-
if TYPE_CHECKING:
1798-
n_cells: int
1799-
n_max_seq: int
1800-
token_count: int
1801-
used_cells: int
1802-
max_contiguous: int
1803-
max_contiguous_idx: int
1804-
cells: CtypesArray[llama_kv_cache_view_cell]
1805-
cells_sequences: CtypesArray[llama_seq_id]
1806-
1807-
_fields_ = [
1808-
("n_cells", ctypes.c_int32),
1809-
("n_max_seq", ctypes.c_int32),
1810-
("token_count", ctypes.c_int32),
1811-
("used_cells", ctypes.c_int32),
1812-
("max_contiguous", ctypes.c_int32),
1813-
("max_contiguous_idx", ctypes.c_int32),
1814-
("cells", ctypes.POINTER(llama_kv_cache_view_cell)),
1815-
("cells_sequences", ctypes.POINTER(llama_seq_id)),
1816-
]
1817-
1818-
1819-
llama_kv_cache_view_p = ctypes.POINTER(llama_kv_cache_view)
1820-
1821-
1822-
# // Create an empty KV cache view. (use only for debugging purposes)
1823-
# LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_seq_max);
1824-
@ctypes_function(
1825-
"llama_kv_cache_view_init",
1826-
[llama_context_p_ctypes, ctypes.c_int32],
1827-
llama_kv_cache_view,
1828-
)
1829-
def llama_kv_cache_view_init(
1830-
ctx: llama_context_p, n_seq_max: Union[ctypes.c_int32, int], /
1831-
) -> llama_kv_cache_view:
1832-
"""Create an empty KV cache view. (use only for debugging purposes)"""
1833-
...
1834-
1835-
1836-
# // Free a KV cache view. (use only for debugging purposes)
1837-
# LLAMA_API void llama_kv_cache_view_free(struct llama_kv_cache_view * view);
1838-
@ctypes_function("llama_kv_cache_view_free", [llama_kv_cache_view_p], None)
1839-
def llama_kv_cache_view_free(view: "ctypes.pointer[llama_kv_cache_view]", /): # type: ignore
1840-
"""Free a KV cache view. (use only for debugging purposes)"""
1841-
...
1842-
1843-
1844-
# // Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
1845-
# LLAMA_API void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_kv_cache_view * view);
1846-
@ctypes_function(
1847-
"llama_kv_cache_view_update", [llama_context_p_ctypes, llama_kv_cache_view_p], None
1848-
)
1849-
def llama_kv_cache_view_update(ctx: llama_context_p, view: CtypesPointerOrRef[llama_kv_cache_view], /): # type: ignore
1850-
"""Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)"""
1851-
...
1852-
1853-
18541745
# // Returns the number of tokens in the KV cache (slow, use only for debug)
18551746
# // If a KV cell has multiple sequences assigned to it, it will be counted multiple times
18561747
# LLAMA_API int32_t llama_kv_self_n_tokens(const struct llama_context * ctx);

vendor/llama.cpp

0 commit comments

Comments
 (0)