Skip to content

Commit 50e3823

Browse files
committed
revert last commit and fix tokenizers version
1 parent d9e6b62 commit 50e3823

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

dataset/dataset.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
[alb.ShiftScaleRotate(shift_limit=0, scale_limit=(-.15, 0), rotate_limit=1, border_mode=0, interpolation=3,
2727
value=[255, 255, 255], p=1),
2828
alb.GridDistortion(distort_limit=0.1, border_mode=0, interpolation=3, value=[255, 255, 255], p=.5)], p=.15),
29-
#alb.InvertImg(p=.15),
29+
# alb.InvertImg(p=.15),
3030
alb.RGBShift(r_shift_limit=15, g_shift_limit=15,
3131
b_shift_limit=15, p=0.3),
3232
alb.GaussNoise(10, p=.2),
@@ -229,7 +229,7 @@ def generate_tokenizer(equations, output, vocab_size):
229229
tokenizer = Tokenizer(BPE())
230230
tokenizer.pre_tokenizer = pre_tokenizers.ByteLevel(add_prefix_space=False)
231231
trainer = BpeTrainer(special_tokens=["[PAD]", "[BOS]", "[EOS]"], vocab_size=vocab_size, show_progress=True)
232-
tokenizer.train([equations], trainer)
232+
tokenizer.train(trainer, [equations])
233233
tokenizer.save(path=output, pretty=False)
234234

235235

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ x_transformers>=0.8.1
99
torch>=1.7.0
1010
imagesize>=1.2.0
1111
transformers>=4.2.2
12+
tokenizers==0.9.4
1213
numpy>=1.19.5
1314
Pillow>=8.1.0
1415
PyYAML>=5.4.1

0 commit comments

Comments
 (0)