Skip to content

Commit fe246ca

Browse files
committed
Remove deprecations were deleted on llama.cpp side too
rel: ggml-org/llama.cpp#13653
1 parent ba39a6c commit fe246ca

File tree

1 file changed

+0
-203
lines changed

1 file changed

+0
-203
lines changed

llama_cpp/llama_cpp.py

Lines changed: 0 additions & 203 deletions
Original file line numberDiff line numberDiff line change
@@ -1755,18 +1755,6 @@ def llama_kv_self_n_tokens(ctx: llama_context_p, /) -> int:
17551755
...
17561756

17571757

1758-
# DEPRECATED(LLAMA_API int32_t llama_get_kv_cache_token_count(const struct llama_context * ctx),
1759-
# "use llama_kv_self_n_tokens instead");
1760-
@ctypes_function(
1761-
"llama_get_kv_cache_token_count", [llama_context_p_ctypes], ctypes.c_int32
1762-
)
1763-
def llama_get_kv_cache_token_count(ctx: llama_context_p, /) -> int:
1764-
"""Returns the number of tokens in the KV cache (slow, use only for debug)
1765-
If a KV cell has multiple sequences assigned to it, it will be counted multiple times
1766-
"""
1767-
...
1768-
1769-
17701758
# // Returns the number of used KV cells (i.e. have at least one sequence assigned to them)
17711759
# LLAMA_API int32_t llama_kv_self_used_cells(const struct llama_context * ctx);
17721760
@ctypes_function(
@@ -1777,16 +1765,6 @@ def llama_kv_self_used_cells(ctx: llama_context_p, /) -> int:
17771765
...
17781766

17791767

1780-
# DEPRECATED(LLAMA_API int32_t llama_get_kv_cache_used_cells(const struct llama_context * ctx),
1781-
# "use llama_kv_self_used_cells instead");
1782-
@ctypes_function(
1783-
"llama_get_kv_cache_used_cells", [llama_context_p_ctypes], ctypes.c_int32
1784-
)
1785-
def llama_get_kv_cache_used_cells(ctx: llama_context_p, /) -> int:
1786-
"""Returns the number of used KV cells (i.e. have at least one sequence assigned to them)"""
1787-
...
1788-
1789-
17901768
# // Clear the KV cache - both cell info is erased and KV data is zeroed
17911769
# LLAMA_API void llama_kv_self_clear(
17921770
# struct llama_context * ctx);
@@ -1797,49 +1775,6 @@ def llama_kv_self_clear(ctx: llama_context_p, /):
17971775
"""Clear the KV cache - both cell info is erased and KV data is zeroed"""
17981776
...
17991777

1800-
# NOTE: Deprecated
1801-
@ctypes_function("llama_kv_self_clear", [llama_context_p_ctypes], None)
1802-
def llama_kv_cache_clear(ctx: llama_context_p, /):
1803-
"""Clear the KV cache"""
1804-
...
1805-
1806-
1807-
# // Removes all tokens that belong to the specified sequence and have positions in [p0, p1)
1808-
# // Returns false if a partial sequence cannot be removed. Removing a whole sequence never fails
1809-
# // seq_id < 0 : match any sequence
1810-
# // p0 < 0 : [0, p1]
1811-
# // p1 < 0 : [p0, inf)
1812-
# LLAMA_API bool llama_kv_cache_seq_rm(
1813-
# struct llama_context * ctx,
1814-
# llama_seq_id seq_id,
1815-
# llama_pos p0,
1816-
# llama_pos p1);
1817-
@ctypes_function(
1818-
"llama_kv_cache_seq_rm",
1819-
[
1820-
llama_context_p_ctypes,
1821-
llama_seq_id,
1822-
llama_pos,
1823-
llama_pos,
1824-
],
1825-
ctypes.c_bool,
1826-
)
1827-
def llama_kv_cache_seq_rm(
1828-
ctx: llama_context_p,
1829-
seq_id: Union[llama_seq_id, int],
1830-
p0: Union[llama_pos, int],
1831-
p1: Union[llama_pos, int],
1832-
/,
1833-
) -> bool:
1834-
"""Removes all tokens that belong to the specified sequence and have positions in [p0, p1)
1835-
1836-
Returns false if a partial sequence cannot be removed. Removing a whole sequence never fails
1837-
1838-
seq_id < 0 : match any sequence
1839-
p0 < 0 : [0, p1]
1840-
p1 < 0 : [p0, inf)"""
1841-
...
1842-
18431778

18441779
# // Copy all tokens that belong to the specified sequence to another sequence
18451780
# // Note that this does not allocate extra KV cache memory - it simply assigns the tokens to the new sequence
@@ -1877,33 +1812,6 @@ def llama_kv_self_seq_cp(
18771812
...
18781813

18791814

1880-
# NOTE: Deprecated
1881-
@ctypes_function(
1882-
"llama_kv_self_seq_cp",
1883-
[
1884-
llama_context_p_ctypes,
1885-
llama_seq_id,
1886-
llama_seq_id,
1887-
llama_pos,
1888-
llama_pos,
1889-
],
1890-
None,
1891-
)
1892-
def llama_kv_cache_seq_cp(
1893-
ctx: llama_context_p,
1894-
seq_id_src: Union[llama_seq_id, int],
1895-
seq_id_dst: Union[llama_seq_id, int],
1896-
p0: Union[llama_pos, int],
1897-
p1: Union[llama_pos, int],
1898-
/,
1899-
):
1900-
"""Copy all tokens that belong to the specified sequence to another sequence
1901-
Note that this does not allocate extra KV cache memory - it simply assigns the tokens to the new sequence
1902-
p0 < 0 : [0, p1]
1903-
p1 < 0 : [p0, inf)"""
1904-
...
1905-
1906-
19071815
# // Removes all tokens that do not belong to the specified sequence
19081816
# LLAMA_API void llama_kv_self_seq_keep(
19091817
# struct llama_context * ctx,
@@ -1916,13 +1824,6 @@ def llama_kv_self_seq_keep(ctx: llama_context_p, seq_id: Union[llama_seq_id, int
19161824
...
19171825

19181826

1919-
# NOTE: Deprecated
1920-
@ctypes_function(
1921-
"llama_kv_self_seq_keep", [llama_context_p_ctypes, llama_seq_id], None
1922-
)
1923-
def llama_kv_cache_seq_keep(ctx: llama_context_p, seq_id: Union[llama_seq_id, int], /):
1924-
"""Removes all tokens that do not belong to the specified sequence"""
1925-
...
19261827

19271828

19281829

@@ -1964,49 +1865,6 @@ def llama_kv_self_seq_add(
19641865
p0 < 0 : [0, p1]
19651866
p1 < 0 : [p0, inf)"""
19661867
...
1967-
1968-
1969-
# // NOTE: Deprecated
1970-
# // Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
1971-
# // If the KV cache is RoPEd, the KV data is updated accordingly:
1972-
# // - lazily on next llama_decode()
1973-
# // - explicitly with llama_kv_cache_update()
1974-
# // p0 < 0 : [0, p1]
1975-
# // p1 < 0 : [p0, inf)
1976-
# LLAMA_API void llama_kv_cache_seq_add(
1977-
# struct llama_context * ctx,
1978-
# llama_seq_id seq_id,
1979-
# llama_pos p0,
1980-
# llama_pos p1,
1981-
# llama_pos delta);
1982-
@ctypes_function(
1983-
"llama_kv_self_seq_add",
1984-
[
1985-
llama_context_p_ctypes,
1986-
llama_seq_id,
1987-
llama_pos,
1988-
llama_pos,
1989-
llama_pos,
1990-
],
1991-
None,
1992-
)
1993-
def llama_kv_cache_seq_add(
1994-
ctx: llama_context_p,
1995-
seq_id: Union[llama_seq_id, int],
1996-
p0: Union[llama_pos, int],
1997-
p1: Union[llama_pos, int],
1998-
delta: Union[llama_pos, int],
1999-
/,
2000-
):
2001-
"""Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
2002-
If the KV cache is RoPEd, the KV data is updated accordingly:
2003-
- lazily on next llama_decode()
2004-
- explicitly with llama_kv_cache_update()
2005-
p0 < 0 : [0, p1]
2006-
p1 < 0 : [p0, inf)"""
2007-
...
2008-
2009-
20101868
# // Integer division of the positions by factor of `d > 1`
20111869
# // If the KV cache is RoPEd, the KV data is updated accordingly
20121870
# // p0 < 0 : [0, p1]
@@ -2043,43 +1901,6 @@ def llama_kv_self_seq_div(
20431901
...
20441902

20451903

2046-
# // NOTE: Deprecated
2047-
# // Integer division of the positions by factor of `d > 1`
2048-
# // If the KV cache is RoPEd, the KV data is updated accordingly
2049-
# // p0 < 0 : [0, p1]
2050-
# // p1 < 0 : [p0, inf)
2051-
# LLAMA_API void llama_kv_cache_seq_div(
2052-
# struct llama_context * ctx,
2053-
# llama_seq_id seq_id,
2054-
# llama_pos p0,
2055-
# llama_pos p1,
2056-
# int d);
2057-
@ctypes_function(
2058-
"llama_kv_self_seq_div",
2059-
[
2060-
llama_context_p_ctypes,
2061-
llama_seq_id,
2062-
llama_pos,
2063-
llama_pos,
2064-
ctypes.c_int,
2065-
],
2066-
None,
2067-
)
2068-
def llama_kv_cache_seq_div(
2069-
ctx: llama_context_p,
2070-
seq_id: Union[llama_seq_id, int],
2071-
p0: Union[llama_pos, int],
2072-
p1: Union[llama_pos, int],
2073-
d: Union[ctypes.c_int, int],
2074-
/,
2075-
):
2076-
"""Integer division of the positions by factor of `d > 1`
2077-
If the KV cache is RoPEd, the KV data is updated accordingly
2078-
p0 < 0 : [0, p1]
2079-
p1 < 0 : [p0, inf)"""
2080-
...
2081-
2082-
20831904
# // Returns the largest position present in the KV cache for the specified sequence
20841905
# LLAMA_API llama_pos llama_kv_self_seq_pos_max(
20851906
# struct llama_context * ctx,
@@ -2108,21 +1929,6 @@ def llama_kv_self_defrag(ctx: llama_context_p, /):
21081929
...
21091930

21101931

2111-
# NOTE: Deprecated
2112-
# // Defragment the KV cache
2113-
# // This will be applied:
2114-
# // - lazily on next llama_decode()
2115-
# // - explicitly with llama_kv_self_update()
2116-
# LLAMA_API void llama_kv_cache_defrag(struct llama_context * ctx);
2117-
@ctypes_function("llama_kv_cache_defrag", [llama_context_p_ctypes], None)
2118-
def llama_kv_cache_defrag(ctx: llama_context_p, /):
2119-
"""Defragment the KV cache
2120-
This will be applied:
2121-
- lazily on next llama_decode()
2122-
- explicitly with llama_kv_cache_update()"""
2123-
...
2124-
2125-
21261932
# // Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
21271933
# LLAMA_API void llama_kv_cache_update(struct llama_context * ctx);
21281934
@ctypes_function("llama_kv_self_update", [llama_context_p_ctypes], None)
@@ -2147,15 +1953,6 @@ def llama_kv_self_can_shift(ctx: llama_context_p, /) -> bool:
21471953
...
21481954

21491955

2150-
# // NOTE: Deprecated
2151-
# // Check if the context supports KV cache shifting
2152-
# LLAMA_API bool llama_kv_cache_can_shift(struct llama_context * ctx);
2153-
@ctypes_function("llama_kv_self_can_shift", [llama_context_p_ctypes], ctypes.c_bool)
2154-
def llama_kv_cache_can_shift(ctx: llama_context_p, /) -> bool:
2155-
"""Check if the context supports KV cache shifting"""
2156-
...
2157-
2158-
21591956
# //
21601957
# // State / sessions
21611958
# //

0 commit comments

Comments
 (0)