Skip to content

Commit 789d0d6

Browse files
committed
update tokenizer
1 parent 6ab83d5 commit 789d0d6

File tree

3 files changed

+13
-3
lines changed

3 files changed

+13
-3
lines changed

demos/cli_demo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ def main():
215215
response = response.replace('|<end>|', '')
216216
_clear_screen()
217217
print(f"\nUser: {query}")
218-
print(f"\nQwen-Chat: {response}")
218+
print(f"\CodeShell-Chat: {response}")
219219
except KeyboardInterrupt:
220220
print('[WARNING] Generation interrupted')
221221
continue

tokenizer/tokenizer.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,15 @@
173173
"rstrip": false,
174174
"normalized": false,
175175
"special": true
176+
},
177+
{
178+
"id": 70019,
179+
"content": "<|end|>",
180+
"single_word": false,
181+
"lstrip": false,
182+
"rstrip": false,
183+
"normalized": false,
184+
"special": true
176185
}
177186
],
178187
"normalizer": null,

tokenizer/tokenizer_config.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,15 @@
1919
"<commit_before>",
2020
"<commit_msg>",
2121
"<commit_after>",
22-
"<reponame>"
22+
"<reponame>",
23+
"<|end|>"
2324
],
2425
"bos_token": "<|endoftext|>",
2526
"clean_up_tokenization_spaces": true,
2627
"eos_token": "<|endoftext|>",
2728
"model_max_length": 8192,
2829
"tokenizer_class": "GPT2Tokenizer",
2930
"unk_token": "<|endoftext|>",
30-
"vocab_size": 70019,
31+
"vocab_size": 70020,
3132
"pad_token": "<|endoftext|>"
3233
}

0 commit comments

Comments
 (0)