@@ -73,9 +73,12 @@ def __init__(
73
73
seed : int = 1234 ,
74
74
max_length : Optional [int ] = 2048 ,
75
75
add_bos_token : bool = False ,
76
- custom_prefix_token_id = None ,
76
+ custom_prefix_token_id : int = None ,
77
77
# send the requests as tokens or strings
78
- tokenized_requests = True ,
78
+ tokenized_requests : bool = True ,
79
+ trust_remote_code : bool = False ,
80
+ revision : Optional [str ] = "main" ,
81
+ use_fast_tokenizer : bool = True ,
79
82
** kwargs ,
80
83
) -> None :
81
84
super ().__init__ ()
@@ -128,7 +131,10 @@ def __init__(
128
131
import transformers
129
132
130
133
self .tokenizer = transformers .AutoTokenizer .from_pretrained (
131
- self .tokenizer if self .tokenizer else self .model
134
+ self .tokenizer if self .tokenizer else self .model ,
135
+ trust_remote_code = trust_remote_code ,
136
+ revision = revision ,
137
+ use_fast = use_fast_tokenizer ,
132
138
)
133
139
# Not used as the API will handle padding but to mirror the behavior of the HFLM
134
140
self .tokenizer = configure_pad_token (self .tokenizer )
@@ -153,6 +159,9 @@ def __init__(
153
159
assert isinstance (tokenizer , str ), "tokenizer must be a string"
154
160
self .tokenizer = transformers .AutoTokenizer .from_pretrained (
155
161
tokenizer ,
162
+ trust_remote_code = trust_remote_code ,
163
+ revision = revision ,
164
+ use_fast = use_fast_tokenizer ,
156
165
)
157
166
158
167
@abc .abstractmethod
0 commit comments