Skip to content

Commit 94d7385

Browse files
committed
Update to llama.cpp 0a7c980
1 parent f24c7fe commit 94d7385

File tree

2 files changed

+84
-81
lines changed

2 files changed

+84
-81
lines changed

crates/llama_cpp/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ license = "MIT OR Apache-2.0"
99
readme = "../../README.md"
1010
publish = true
1111

12+
[[bin]]
13+
name = "infer"
14+
path = "src/bin/infer.rs"
15+
1216
[dependencies]
1317
ctor = "0.2.5"
1418
derive_more = "0.99.17"

crates/llama_cpp/src/lib.rs

Lines changed: 80 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,27 @@ pub struct LlamaModel {
208208

209209
/// The size of this model's vocabulary, in tokens.
210210
vocabulary_size: usize,
211+
212+
/// The beginning of sentence (BOS) token for this model.
213+
bos_token: Token,
214+
215+
/// The end of sentence (EOS) token for this model.
216+
eos_token: Token,
217+
218+
/// The newline (NL) token for this model.
219+
nl_token: Token,
220+
221+
/// For infilling, the prefix token for this model.
222+
infill_prefix_token: Token,
223+
224+
/// For infilling, the middle token for this model.
225+
infill_middle_token: Token,
226+
227+
/// For infilling, the suffix token for this model.
228+
infill_suffix_token: Token,
229+
230+
/// For infilling, the token for the end of the infill.
231+
eot_token: Token,
211232
}
212233

213234
unsafe impl Send for LlamaModel {}
@@ -256,6 +277,13 @@ impl LlamaModel {
256277
Ok(Self {
257278
model: Arc::new(RwLock::new(LlamaModelInner(model))),
258279
vocabulary_size: vocabulary_size as usize,
280+
bos_token: Token(unsafe { llama_token_bos(model) }),
281+
eos_token: Token(unsafe { llama_token_eos(model) }),
282+
nl_token: Token(unsafe { llama_token_nl(model) }),
283+
infill_prefix_token: Token(unsafe { llama_token_prefix(model) }),
284+
infill_middle_token: Token(unsafe { llama_token_middle(model) }),
285+
infill_suffix_token: Token(unsafe { llama_token_suffix(model) }),
286+
eot_token: Token(unsafe { llama_token_eot(model) }),
259287
})
260288
}
261289
}
@@ -312,6 +340,21 @@ impl LlamaModel {
312340
}
313341
}
314342

343+
/// Gets the byte string representation of `token` in this model's vocabulary.
344+
///
345+
/// The returned slice is valid for the lifetime of this session, and typically encodes
346+
/// a UTF-8 string; consider using [`String::from_utf8_lossy`] if you need to display the
347+
/// contents.
348+
fn detokenize(&self, token: Token) -> &[u8] {
349+
assert!(
350+
(token.0 as usize) < self.vocabulary_size,
351+
"{} is out of range for this model's vocabulary range",
352+
token.0
353+
);
354+
355+
unsafe { CStr::from_ptr(llama_token_get_text(**self.model.blocking_read(), token.0)) }.to_bytes()
356+
}
357+
315358
/// Creates a new evaluation context for this model.
316359
///
317360
/// The model must live for at least as long as the context, but many contexts can be created
@@ -344,17 +387,44 @@ impl LlamaModel {
344387
model: self.clone(),
345388
inner: Arc::new(LlamaContextInner { ptr: ctx }),
346389
history_size: 0,
347-
348-
// SAFETY: Static constructors.
349-
bos_token: Token(unsafe { llama_token_bos(ctx) }),
350-
eos_token: Token(unsafe { llama_token_eos(ctx) }),
351-
nl_token: Token(unsafe { llama_token_nl(ctx) }),
352-
infill_prefix_token: Token(unsafe { llama_token_prefix(ctx) }),
353-
infill_middle_token: Token(unsafe { llama_token_middle(ctx) }),
354-
infill_suffix_token: Token(unsafe { llama_token_suffix(ctx) }),
355-
eot_token: Token(unsafe { llama_token_eot(ctx) }),
356390
}
357391
}
392+
393+
394+
/// Returns the beginning of sentence (BOS) token for this context.
395+
pub fn bos(&self) -> Token {
396+
self.bos_token
397+
}
398+
399+
/// Returns the end of sentence (EOS) token for this context.
400+
pub fn eos(&self) -> Token {
401+
self.eos_token
402+
}
403+
404+
/// Returns the newline (NL) token for this context.
405+
pub fn nl(&self) -> Token {
406+
self.nl_token
407+
}
408+
409+
/// Returns the infill prefix token for this context.
410+
pub fn infill_prefix(&self) -> Token {
411+
self.infill_prefix_token
412+
}
413+
414+
/// Returns the infill middle token for this context.
415+
pub fn infill_middle(&self) -> Token {
416+
self.infill_middle_token
417+
}
418+
419+
/// Returns the infill suffix token for this context.
420+
pub fn infill_suffix(&self) -> Token {
421+
self.infill_suffix_token
422+
}
423+
424+
/// Returns the infill end of middle token for this context.
425+
pub fn eot(&self) -> Token {
426+
self.eot_token
427+
}
358428
}
359429

360430
/// The inner part of a [`LlamaSession`].
@@ -391,27 +461,6 @@ pub struct LlamaSession {
391461

392462
/// The number of tokens present in this model's context.
393463
history_size: usize,
394-
395-
/// The beginning of sentence (BOS) token for this model.
396-
bos_token: Token,
397-
398-
/// The end of sentence (EOS) token for this model.
399-
eos_token: Token,
400-
401-
/// The newline (NL) token for this model.
402-
nl_token: Token,
403-
404-
/// For infilling, the prefix token for this model.
405-
infill_prefix_token: Token,
406-
407-
/// For infilling, the middle token for this model.
408-
infill_middle_token: Token,
409-
410-
/// For infilling, the suffix token for this model.
411-
infill_suffix_token: Token,
412-
413-
/// For infilling, the token for the end of the infill.
414-
eot_token: Token,
415464
}
416465

417466
/// An error raised while advancing the context in a [`LlamaSession`].
@@ -443,21 +492,6 @@ pub enum LlamaContextError {
443492
}
444493

445494
impl LlamaSession {
446-
/// Gets the byte string representation of `token` in this model's vocabulary.
447-
///
448-
/// The returned slice is valid for the lifetime of this session, and typically encodes
449-
/// a UTF-8 string; consider using [`String::from_utf8_lossy`] if you need to display the
450-
/// contents.
451-
fn detokenize(&self, token: Token) -> &[u8] {
452-
assert!(
453-
(token.0 as usize) < self.model.vocabulary_size,
454-
"{} is out of range for this model's vocabulary range",
455-
token.0
456-
);
457-
458-
unsafe { CStr::from_ptr(llama_token_get_text(self.inner.ptr, token.0)) }.to_bytes()
459-
}
460-
461495
/// Advances the inner context of this model with `tokens`.
462496
///
463497
/// The model will generate new tokens from the end of the context.
@@ -571,41 +605,6 @@ impl LlamaSession {
571605

572606
CompletionHandle { ctx: self, rx }
573607
}
574-
575-
/// Returns the beginning of sentence (BOS) token for this context.
576-
pub fn bos(&self) -> Token {
577-
self.bos_token
578-
}
579-
580-
/// Returns the end of sentence (EOS) token for this context.
581-
pub fn eos(&self) -> Token {
582-
self.eos_token
583-
}
584-
585-
/// Returns the newline (NL) token for this context.
586-
pub fn nl(&self) -> Token {
587-
self.nl_token
588-
}
589-
590-
/// Returns the infill prefix token for this context.
591-
pub fn infill_prefix(&self) -> Token {
592-
self.infill_prefix_token
593-
}
594-
595-
/// Returns the infill middle token for this context.
596-
pub fn infill_middle(&self) -> Token {
597-
self.infill_middle_token
598-
}
599-
600-
/// Returns the infill suffix token for this context.
601-
pub fn infill_suffix(&self) -> Token {
602-
self.infill_suffix_token
603-
}
604-
605-
/// Returns the infill end of middle token for this context.
606-
pub fn eot(&self) -> Token {
607-
self.eot_token
608-
}
609608
}
610609

611610
/// An intermediate token generated during an LLM completion.
@@ -620,7 +619,7 @@ pub struct CompletionToken<'a> {
620619
impl<'a> CompletionToken<'a> {
621620
/// Decodes this token, returning the bytes composing it.
622621
pub fn as_bytes(&self) -> &[u8] {
623-
self.ctx.detokenize(self.token)
622+
self.ctx.model.detokenize(self.token)
624623
}
625624

626625
/// Returns this token as an `i32`.

0 commit comments

Comments
 (0)