Skip to content

Commit

Permalink
fix bug in tokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
cogentapps committed Apr 29, 2023
1 parent 36e434f commit a9c3f38
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion app/src/core/tokenizer/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ const special_tokens: any = {

const special_tokens_map = new Map<string, number>();
for (const text of Object.keys(special_tokens)) {
special_tokens_map.set(text, special_tokens_map[text]);
special_tokens_map.set(text, special_tokens[text]);
}

const pattern = /('s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+/giu;
Expand Down

0 comments on commit a9c3f38

Please sign in to comment.