26
26
//!
27
27
//! // `ctx.get_completions` creates a worker thread that generates tokens. When the completion
28
28
//! // handle is dropped, tokens stop generating!
29
- //! while let Some(next_token) = ctx.get_completions().next_token() {
29
+ //!
30
+ //! let mut completions = ctx.start_completing();
31
+ //!
32
+ //! while let Some(next_token) = completions.next_token() {
30
33
//! println!("{}", String::from_utf8_lossy(next_token.as_bytes()));
31
34
//!
32
35
//! decoded_tokens += 1;
@@ -315,7 +318,7 @@ impl LlamaModel {
315
318
//
316
319
// `out_buf` is a `Vec<Token>`, and `Token` is `#[repr(transparent)]` over an `i32`.
317
320
llama_tokenize (
318
- * * self . model . blocking_read ( ) ,
321
+ * * self . model . try_read ( ) . unwrap ( ) ,
319
322
content. as_ptr ( ) as * const i8 ,
320
323
content. len ( ) as i32 ,
321
324
out_buf. as_mut_ptr ( ) as * mut i32 ,
@@ -352,7 +355,7 @@ impl LlamaModel {
352
355
token. 0
353
356
) ;
354
357
355
- unsafe { CStr :: from_ptr ( llama_token_get_text ( * * self . model . blocking_read ( ) , token. 0 ) ) } . to_bytes ( )
358
+ unsafe { CStr :: from_ptr ( llama_token_get_text ( * * self . model . try_read ( ) . unwrap ( ) , token. 0 ) ) } . to_bytes ( )
356
359
}
357
360
358
361
/// Creates a new evaluation context for this model.
@@ -581,7 +584,7 @@ impl LlamaSession {
581
584
582
585
/// Starts generating tokens at the end of the context using llama.cpp's built-in Beam search.
583
586
/// This is where you want to be if you just want some completions.
584
- pub fn get_completions ( & mut self ) -> CompletionHandle {
587
+ pub fn start_completing ( & mut self ) -> CompletionHandle {
585
588
let ( tx, rx) = flume:: unbounded ( ) ;
586
589
587
590
info ! (
@@ -599,7 +602,7 @@ impl LlamaSession {
599
602
Box :: leak ( Box :: new ( detail:: BeamSearchState { tx } ) ) as * mut _ as * mut c_void ,
600
603
1 ,
601
604
past_tokens as i32 ,
602
- 2048 ,
605
+ 32_768 ,
603
606
) ;
604
607
} ) ;
605
608
@@ -657,7 +660,7 @@ pub struct CompletionHandle<'a> {
657
660
impl < ' a > CompletionHandle < ' a > {
658
661
/// Blocks the current thread, resolving to the next completed token, or `None` if EOS is
659
662
/// reached.
660
- pub fn next_token ( & self ) -> Option < CompletionToken < ' _ > > {
663
+ pub fn next_token ( & mut self ) -> Option < CompletionToken < ' _ > > {
661
664
self . rx . recv ( ) . ok ( ) . map ( |token| CompletionToken {
662
665
ctx : self . ctx ,
663
666
token,
@@ -666,7 +669,7 @@ impl<'a> CompletionHandle<'a> {
666
669
667
670
/// Asynchronously yields the current thread, resolving to the next completed token, or `None`
668
671
/// if EOS is reached.
669
- pub async fn next_token_async ( & self ) -> Option < CompletionToken < ' _ > > {
672
+ pub async fn next_token_async ( & mut self ) -> Option < CompletionToken < ' _ > > {
670
673
self . rx
671
674
. recv_async ( )
672
675
. await
0 commit comments