Skip to content

Commit

Permalink
get_added_tokens_decoder returns BTREEMap
Browse files Browse the repository at this point in the history
  • Loading branch information
ArthurZucker committed Sep 6, 2023
1 parent a7ace44 commit efec086
Showing 1 changed file with 9 additions and 6 deletions.
15 changes: 9 additions & 6 deletions bindings/python/src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use super::pre_tokenizers::PyPreTokenizer;
use super::trainers::PyTrainer;
use crate::processors::PyPostProcessor;
use crate::utils::{MaybeSizedIterator, PyBufferedIterator};
use std::collections::BTreeMap;

/// Represents a token that can be be added to a :class:`~tokenizers.Tokenizer`.
/// It can have special options that defines the way it should behave.
Expand Down Expand Up @@ -668,12 +669,14 @@ impl PyTokenizer {
/// :obj:`Dict[int, AddedToken]`: The vocabulary
#[pyo3(signature = ())]
#[pyo3(text_signature = "(self)")]
fn get_added_tokens_decoder(&self) -> HashMap<u32, PyAddedToken> {
self.tokenizer
.get_added_tokens_decoder()
.into_iter()
.map(|(key, value)| (key, value.into()))
.collect()
fn get_added_tokens_decoder(&self) -> BTreeMap<u32, PyAddedToken> {
let mut sorted_map = BTreeMap::new();

for (key, value) in self.tokenizer.get_added_tokens_decoder() {
sorted_map.insert(key, value.into());
}

sorted_map
}

/// Get the size of the underlying vocabulary
Expand Down

0 comments on commit efec086

Please sign in to comment.