@@ -208,6 +208,27 @@ pub struct LlamaModel {
208
208
209
209
/// The size of this model's vocabulary, in tokens.
210
210
vocabulary_size : usize ,
211
+
212
+ /// The beginning of sentence (BOS) token for this model.
213
+ bos_token : Token ,
214
+
215
+ /// The end of sentence (EOS) token for this model.
216
+ eos_token : Token ,
217
+
218
+ /// The newline (NL) token for this model.
219
+ nl_token : Token ,
220
+
221
+ /// For infilling, the prefix token for this model.
222
+ infill_prefix_token : Token ,
223
+
224
+ /// For infilling, the middle token for this model.
225
+ infill_middle_token : Token ,
226
+
227
+ /// For infilling, the suffix token for this model.
228
+ infill_suffix_token : Token ,
229
+
230
+ /// For infilling, the token for the end of the infill.
231
+ eot_token : Token ,
211
232
}
212
233
213
234
unsafe impl Send for LlamaModel { }
@@ -256,6 +277,13 @@ impl LlamaModel {
256
277
Ok ( Self {
257
278
model : Arc :: new ( RwLock :: new ( LlamaModelInner ( model) ) ) ,
258
279
vocabulary_size : vocabulary_size as usize ,
280
+ bos_token : Token ( unsafe { llama_token_bos ( model) } ) ,
281
+ eos_token : Token ( unsafe { llama_token_eos ( model) } ) ,
282
+ nl_token : Token ( unsafe { llama_token_nl ( model) } ) ,
283
+ infill_prefix_token : Token ( unsafe { llama_token_prefix ( model) } ) ,
284
+ infill_middle_token : Token ( unsafe { llama_token_middle ( model) } ) ,
285
+ infill_suffix_token : Token ( unsafe { llama_token_suffix ( model) } ) ,
286
+ eot_token : Token ( unsafe { llama_token_eot ( model) } ) ,
259
287
} )
260
288
}
261
289
}
@@ -312,6 +340,21 @@ impl LlamaModel {
312
340
}
313
341
}
314
342
343
+ /// Gets the byte string representation of `token` in this model's vocabulary.
344
+ ///
345
+ /// The returned slice is valid for the lifetime of this session, and typically encodes
346
+ /// a UTF-8 string; consider using [`String::from_utf8_lossy`] if you need to display the
347
+ /// contents.
348
+ fn detokenize ( & self , token : Token ) -> & [ u8 ] {
349
+ assert ! (
350
+ ( token. 0 as usize ) < self . vocabulary_size,
351
+ "{} is out of range for this model's vocabulary range" ,
352
+ token. 0
353
+ ) ;
354
+
355
+ unsafe { CStr :: from_ptr ( llama_token_get_text ( * * self . model . blocking_read ( ) , token. 0 ) ) } . to_bytes ( )
356
+ }
357
+
315
358
/// Creates a new evaluation context for this model.
316
359
///
317
360
/// The model must live for at least as long as the context, but many contexts can be created
@@ -344,17 +387,44 @@ impl LlamaModel {
344
387
model : self . clone ( ) ,
345
388
inner : Arc :: new ( LlamaContextInner { ptr : ctx } ) ,
346
389
history_size : 0 ,
347
-
348
- // SAFETY: Static constructors.
349
- bos_token : Token ( unsafe { llama_token_bos ( ctx) } ) ,
350
- eos_token : Token ( unsafe { llama_token_eos ( ctx) } ) ,
351
- nl_token : Token ( unsafe { llama_token_nl ( ctx) } ) ,
352
- infill_prefix_token : Token ( unsafe { llama_token_prefix ( ctx) } ) ,
353
- infill_middle_token : Token ( unsafe { llama_token_middle ( ctx) } ) ,
354
- infill_suffix_token : Token ( unsafe { llama_token_suffix ( ctx) } ) ,
355
- eot_token : Token ( unsafe { llama_token_eot ( ctx) } ) ,
356
390
}
357
391
}
392
+
393
+
394
+ /// Returns the beginning of sentence (BOS) token for this context.
395
+ pub fn bos ( & self ) -> Token {
396
+ self . bos_token
397
+ }
398
+
399
+ /// Returns the end of sentence (EOS) token for this context.
400
+ pub fn eos ( & self ) -> Token {
401
+ self . eos_token
402
+ }
403
+
404
+ /// Returns the newline (NL) token for this context.
405
+ pub fn nl ( & self ) -> Token {
406
+ self . nl_token
407
+ }
408
+
409
+ /// Returns the infill prefix token for this context.
410
+ pub fn infill_prefix ( & self ) -> Token {
411
+ self . infill_prefix_token
412
+ }
413
+
414
+ /// Returns the infill middle token for this context.
415
+ pub fn infill_middle ( & self ) -> Token {
416
+ self . infill_middle_token
417
+ }
418
+
419
+ /// Returns the infill suffix token for this context.
420
+ pub fn infill_suffix ( & self ) -> Token {
421
+ self . infill_suffix_token
422
+ }
423
+
424
+ /// Returns the infill end of middle token for this context.
425
+ pub fn eot ( & self ) -> Token {
426
+ self . eot_token
427
+ }
358
428
}
359
429
360
430
/// The inner part of a [`LlamaSession`].
@@ -391,27 +461,6 @@ pub struct LlamaSession {
391
461
392
462
/// The number of tokens present in this model's context.
393
463
history_size : usize ,
394
-
395
- /// The beginning of sentence (BOS) token for this model.
396
- bos_token : Token ,
397
-
398
- /// The end of sentence (EOS) token for this model.
399
- eos_token : Token ,
400
-
401
- /// The newline (NL) token for this model.
402
- nl_token : Token ,
403
-
404
- /// For infilling, the prefix token for this model.
405
- infill_prefix_token : Token ,
406
-
407
- /// For infilling, the middle token for this model.
408
- infill_middle_token : Token ,
409
-
410
- /// For infilling, the suffix token for this model.
411
- infill_suffix_token : Token ,
412
-
413
- /// For infilling, the token for the end of the infill.
414
- eot_token : Token ,
415
464
}
416
465
417
466
/// An error raised while advancing the context in a [`LlamaSession`].
@@ -443,21 +492,6 @@ pub enum LlamaContextError {
443
492
}
444
493
445
494
impl LlamaSession {
446
- /// Gets the byte string representation of `token` in this model's vocabulary.
447
- ///
448
- /// The returned slice is valid for the lifetime of this session, and typically encodes
449
- /// a UTF-8 string; consider using [`String::from_utf8_lossy`] if you need to display the
450
- /// contents.
451
- fn detokenize ( & self , token : Token ) -> & [ u8 ] {
452
- assert ! (
453
- ( token. 0 as usize ) < self . model. vocabulary_size,
454
- "{} is out of range for this model's vocabulary range" ,
455
- token. 0
456
- ) ;
457
-
458
- unsafe { CStr :: from_ptr ( llama_token_get_text ( self . inner . ptr , token. 0 ) ) } . to_bytes ( )
459
- }
460
-
461
495
/// Advances the inner context of this model with `tokens`.
462
496
///
463
497
/// The model will generate new tokens from the end of the context.
@@ -571,41 +605,6 @@ impl LlamaSession {
571
605
572
606
CompletionHandle { ctx : self , rx }
573
607
}
574
-
575
- /// Returns the beginning of sentence (BOS) token for this context.
576
- pub fn bos ( & self ) -> Token {
577
- self . bos_token
578
- }
579
-
580
- /// Returns the end of sentence (EOS) token for this context.
581
- pub fn eos ( & self ) -> Token {
582
- self . eos_token
583
- }
584
-
585
- /// Returns the newline (NL) token for this context.
586
- pub fn nl ( & self ) -> Token {
587
- self . nl_token
588
- }
589
-
590
- /// Returns the infill prefix token for this context.
591
- pub fn infill_prefix ( & self ) -> Token {
592
- self . infill_prefix_token
593
- }
594
-
595
- /// Returns the infill middle token for this context.
596
- pub fn infill_middle ( & self ) -> Token {
597
- self . infill_middle_token
598
- }
599
-
600
- /// Returns the infill suffix token for this context.
601
- pub fn infill_suffix ( & self ) -> Token {
602
- self . infill_suffix_token
603
- }
604
-
605
- /// Returns the infill end of middle token for this context.
606
- pub fn eot ( & self ) -> Token {
607
- self . eot_token
608
- }
609
608
}
610
609
611
610
/// An intermediate token generated during an LLM completion.
@@ -620,7 +619,7 @@ pub struct CompletionToken<'a> {
620
619
impl < ' a > CompletionToken < ' a > {
621
620
/// Decodes this token, returning the bytes composing it.
622
621
pub fn as_bytes ( & self ) -> & [ u8 ] {
623
- self . ctx . detokenize ( self . token )
622
+ self . ctx . model . detokenize ( self . token )
624
623
}
625
624
626
625
/// Returns this token as an `i32`.
0 commit comments