From 7e098f160e32e67e299a8fa640d7ec9f7fbbbf50 Mon Sep 17 00:00:00 2001 From: smallv0221 <33639025+smallv0221@users.noreply.github.com> Date: Tue, 17 Aug 2021 14:47:30 +0800 Subject: [PATCH] Fix tokenizer bug (#893) * fix unified transformer dtype problem * fix win dtype bug * Fix plato-2 and plato-mini dtype bug * Fix plato-2 tokenization * Refine some doc * Add general k support for topk sampling * fix seed * minor fix * Fix unitransformer readme * topk kernel optimization * add unimo model and fix generate api * add 3 datasets for unimo-text * fix tokenizer bug Co-authored-by: Jiaqi Liu Co-authored-by: liu zhengxi <380185688@qq.com> --- paddlenlp/transformers/unimo/tokenizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddlenlp/transformers/unimo/tokenizer.py b/paddlenlp/transformers/unimo/tokenizer.py index 89d5a284932b..ed273d471608 100644 --- a/paddlenlp/transformers/unimo/tokenizer.py +++ b/paddlenlp/transformers/unimo/tokenizer.py @@ -75,7 +75,7 @@ class UNIMOTokenizer(PretrainedTokenizer): "unimo-text-1.0": "https://paddlenlp.bj.bcebos.com/models/transformers/unimo/unimo-text-1.0-vocab.txt", "unimo-text-1.0-large": - "https://paddlenlp.bj.bcebos.com/models/transformers/unimo/unimo-text-1.0-vocab-large.txt", + "https://paddlenlp.bj.bcebos.com/models/transformers/unimo/unimo-text-1.0-large-vocab.txt", } } pretrained_init_configuration = {