From a9c3f38d6012849c0adb71c7dfca8b992823e902 Mon Sep 17 00:00:00 2001 From: Cogent Apps Date: Sat, 29 Apr 2023 18:52:34 +0000 Subject: [PATCH] fix bug in tokenizer --- app/src/core/tokenizer/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/src/core/tokenizer/index.ts b/app/src/core/tokenizer/index.ts index e07998d7..639b12d3 100644 --- a/app/src/core/tokenizer/index.ts +++ b/app/src/core/tokenizer/index.ts @@ -12,7 +12,7 @@ const special_tokens: any = { const special_tokens_map = new Map(); for (const text of Object.keys(special_tokens)) { - special_tokens_map.set(text, special_tokens_map[text]); + special_tokens_map.set(text, special_tokens[text]); } const pattern = /('s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+/giu;