Skip to content

Commit

Permalink
Merge pull request rustformers#406 from clarkmcc/upgrade-tokenizers-c…
Browse files Browse the repository at this point in the history
…rate

Updated tokenizers crate to 0.13.4
  • Loading branch information
philpax authored Aug 17, 2023
2 parents bca64b0 + e982ff8 commit 129b84a
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 6 deletions.
6 changes: 3 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crates/llm-base/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ partial_sort = "0.2.0"
serde_bytes = "0.11"
memmap2 = { workspace = true }
half = "2"
tokenizers = {version="0.13.3", default-features=false, features=["onig"]}
tokenizers = {version="0.13.4", default-features=false, features=["onig"]}
regex = "1.8"
tracing = { workspace = true }

Expand Down
4 changes: 2 additions & 2 deletions crates/llm-base/src/tokenizer/huggingface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ impl HuggingFaceTokenizer {
/// Converts a token index to the token it represents in this tokenizer.
pub(crate) fn token(&self, idx: usize) -> Vec<u8> {
self.tokenizer
.decode(vec![idx as u32], true)
.decode(&[idx as u32], true)
.expect("Cannot decode token from tokenizer tokenizer.")
.as_bytes()
.to_vec()
Expand Down Expand Up @@ -67,7 +67,7 @@ impl HuggingFaceTokenizer {
/// Decode a list `tokens` with this tokenizer.
pub(crate) fn decode(&self, tokens: Vec<TokenId>, skip_special_tokens: bool) -> Vec<u8> {
self.tokenizer
.decode(tokens, skip_special_tokens)
.decode(&tokens, skip_special_tokens)
.expect("Cannot decode token from tokenizer.")
.as_bytes()
.to_vec()
Expand Down

0 comments on commit 129b84a

Please sign in to comment.