Skip to content

Commit 4cec66e

Browse files
[API] tokenizer: add trust-remote-code (#2372)
* tokenizer: trust-remote-code * pre-commit --------- Co-authored-by: Hailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com>
1 parent aa457ed commit 4cec66e

File tree

1 file changed

+12
-3
lines changed

1 file changed

+12
-3
lines changed

lm_eval/models/api_models.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,12 @@ def __init__(
7373
seed: int = 1234,
7474
max_length: Optional[int] = 2048,
7575
add_bos_token: bool = False,
76-
custom_prefix_token_id=None,
76+
custom_prefix_token_id: int = None,
7777
# send the requests as tokens or strings
78-
tokenized_requests=True,
78+
tokenized_requests: bool = True,
79+
trust_remote_code: bool = False,
80+
revision: Optional[str] = "main",
81+
use_fast_tokenizer: bool = True,
7982
**kwargs,
8083
) -> None:
8184
super().__init__()
@@ -128,7 +131,10 @@ def __init__(
128131
import transformers
129132

130133
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
131-
self.tokenizer if self.tokenizer else self.model
134+
self.tokenizer if self.tokenizer else self.model,
135+
trust_remote_code=trust_remote_code,
136+
revision=revision,
137+
use_fast=use_fast_tokenizer,
132138
)
133139
# Not used as the API will handle padding but to mirror the behavior of the HFLM
134140
self.tokenizer = configure_pad_token(self.tokenizer)
@@ -153,6 +159,9 @@ def __init__(
153159
assert isinstance(tokenizer, str), "tokenizer must be a string"
154160
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
155161
tokenizer,
162+
trust_remote_code=trust_remote_code,
163+
revision=revision,
164+
use_fast=use_fast_tokenizer,
156165
)
157166

158167
@abc.abstractmethod

0 commit comments

Comments
 (0)