Skip to content

Commit fcb2b09

Browse files
committed
+extra train tag, mv transformations to sep. file
1 parent 4df5b06 commit fcb2b09

File tree

6 files changed

+42
-37
lines changed

6 files changed

+42
-37
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ Always double check the result carefully. You can try to redo the prediction wit
4040
4141
## Training the model [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lukas-blecher/LaTeX-OCR/blob/main/notebooks/LaTeX_OCR_training.ipynb)
4242
43+
Install a couple of dependencies `pip install pix2tex[train]`.
4344
1. First we need to combine the images with their ground truth labels. I wrote a dataset class (which needs further improving) that saves the relative paths to the images with the LaTeX code they were rendered with. To generate the dataset pickle file run
4445
4546
```

notebooks/LaTeX_OCR_training.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
"id": "r396ah-Q3EQc"
3636
},
3737
"source": [
38-
"!pip install pix2tex -qq"
38+
"!pip install pix2tex[train] -qq"
3939
],
4040
"execution_count": null,
4141
"outputs": []

pix2tex/cli.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from pix2tex.dataset.dataset import test_transform
1+
from pix2tex.dataset.transforms import test_transform
22
import pandas.io.clipboard as clipboard
33
from PIL import ImageGrab
44
from PIL import Image

pix2tex/dataset/dataset.py

+1-29
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
from tempfile import tempdir
2-
import albumentations as alb
3-
from albumentations.pytorch import ToTensorV2
41
import torch
52
import torch.nn.functional as F
63
from torch.nn.utils.rnn import pad_sequence
@@ -17,33 +14,8 @@
1714
from tqdm.auto import tqdm
1815

1916
from pix2tex.utils.utils import in_model_path
17+
from pix2tex.dataset.transforms import train_transform, test_transform
2018

21-
train_transform = alb.Compose(
22-
[
23-
alb.Compose(
24-
[alb.ShiftScaleRotate(shift_limit=0, scale_limit=(-.15, 0), rotate_limit=1, border_mode=0, interpolation=3,
25-
value=[255, 255, 255], p=1),
26-
alb.GridDistortion(distort_limit=0.1, border_mode=0, interpolation=3, value=[255, 255, 255], p=.5)], p=.15),
27-
# alb.InvertImg(p=.15),
28-
alb.RGBShift(r_shift_limit=15, g_shift_limit=15,
29-
b_shift_limit=15, p=0.3),
30-
alb.GaussNoise(10, p=.2),
31-
alb.RandomBrightnessContrast(.05, (-.2, 0), True, p=0.2),
32-
alb.ImageCompression(95, p=.3),
33-
alb.ToGray(always_apply=True),
34-
alb.Normalize((0.7931, 0.7931, 0.7931), (0.1738, 0.1738, 0.1738)),
35-
# alb.Sharpen()
36-
ToTensorV2(),
37-
]
38-
)
39-
test_transform = alb.Compose(
40-
[
41-
alb.ToGray(always_apply=True),
42-
alb.Normalize((0.7931, 0.7931, 0.7931), (0.1738, 0.1738, 0.1738)),
43-
# alb.Sharpen()
44-
ToTensorV2(),
45-
]
46-
)
4719

4820

4921
class Im2LatexDataset:

pix2tex/dataset/transforms.py

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import albumentations as alb
2+
from albumentations.pytorch import ToTensorV2
3+
4+
train_transform = alb.Compose(
5+
[
6+
alb.Compose(
7+
[alb.ShiftScaleRotate(shift_limit=0, scale_limit=(-.15, 0), rotate_limit=1, border_mode=0, interpolation=3,
8+
value=[255, 255, 255], p=1),
9+
alb.GridDistortion(distort_limit=0.1, border_mode=0, interpolation=3, value=[255, 255, 255], p=.5)], p=.15),
10+
# alb.InvertImg(p=.15),
11+
alb.RGBShift(r_shift_limit=15, g_shift_limit=15,
12+
b_shift_limit=15, p=0.3),
13+
alb.GaussNoise(10, p=.2),
14+
alb.RandomBrightnessContrast(.05, (-.2, 0), True, p=0.2),
15+
alb.ImageCompression(95, p=.3),
16+
alb.ToGray(always_apply=True),
17+
alb.Normalize((0.7931, 0.7931, 0.7931), (0.1738, 0.1738, 0.1738)),
18+
# alb.Sharpen()
19+
ToTensorV2(),
20+
]
21+
)
22+
test_transform = alb.Compose(
23+
[
24+
alb.ToGray(always_apply=True),
25+
alb.Normalize((0.7931, 0.7931, 0.7931), (0.1738, 0.1738, 0.1738)),
26+
# alb.Sharpen()
27+
ToTensorV2(),
28+
]
29+
)

setup.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,15 @@
1919
'uvicorn[standard]',
2020
'python-multipart'
2121
]
22+
train = [
23+
'python-Levenshtein>=0.12.2',
24+
'torchtext>=0.6.0',
25+
'imagesize>=1.2.0',
26+
]
2227

2328
setuptools.setup(
2429
name='pix2tex',
25-
version='0.0.22',
30+
version='0.0.23',
2631
description='pix2tex: Using a ViT to convert images of equations into LaTeX code.',
2732
long_description=long_description,
2833
long_description_content_type='text/markdown',
@@ -58,15 +63,13 @@
5863
'PyYAML>=5.4.1',
5964
'pandas>=1.0.0',
6065
'timm',
61-
'python-Levenshtein>=0.12.2',
62-
'torchtext>=0.6.0',
6366
'albumentations>=0.5.2',
64-
'imagesize>=1.2.0',
6567
],
6668
extras_require={
67-
'all': gui+api,
69+
'all': gui+api+train,
6870
'gui': gui,
69-
'api': api
71+
'api': api,
72+
'train': train
7073
},
7174
entry_points={
7275
'console_scripts': [

0 commit comments

Comments
 (0)