File tree Expand file tree Collapse file tree 1 file changed +8
-7
lines changed Expand file tree Collapse file tree 1 file changed +8
-7
lines changed Original file line number Diff line number Diff line change @@ -132,15 +132,16 @@ def parse_args() -> argparse.Namespace:
132
132
reverse_vocab = {id : encoded_tok for encoded_tok , id in tokenizer .vocab .items ()}
133
133
134
134
for i in range (vocab_size ):
135
- if i in reverse_vocab :
136
- tokens .append (reverse_vocab [i ])
137
- if reverse_vocab [i ] not in added_vocab :
138
- toktypes .append (gguf .TokenType .NORMAL )
139
- else :
140
- toktypes .append (gguf .TokenType .USER_DEFINED )
141
- else :
135
+ if i not in reverse_vocab :
142
136
tokens .append (f"[PAD{ i } ]" )
143
137
toktypes .append (gguf .TokenType .USER_DEFINED )
138
+ elif reverse_vocab [i ] in added_vocab :
139
+ # NOTE: wouldn't we like to distinguish CONTROL tokens here?
140
+ tokens .append (reverse_vocab [i ])
141
+ toktypes .append (gguf .TokenType .USER_DEFINED )
142
+ else :
143
+ tokens .append (reverse_vocab [i ])
144
+ toktypes .append (gguf .TokenType .NORMAL )
144
145
145
146
gguf_writer .add_token_list (tokens )
146
147
gguf_writer .add_token_types (toktypes )
You can’t perform that action at this time.
0 commit comments