-
Notifications
You must be signed in to change notification settings - Fork 0
/
ttf.py
47 lines (41 loc) · 1.59 KB
/
ttf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# coding=utf-8
from PIL import Image, ImageFont, ImageDraw
from fontTools.ttLib import TTFont
import pytesseract
import os
# 中文ORC识别(使用 ttf文件提高中文识别成功率)
def numttf(fontpath):
numbers = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0']
numbers_list = {'一':'1', '二':'2', '三':'3', '四':'4', '五':'5', '六':'6', '七':'7', '八':'8', '九':'9', '零':'0'}
chinese_dir = 'numfont'
if not os.path.exists(chinese_dir):
os.mkdir(chinese_dir)
# 获取ttf的编码范围
font = TTFont(fontpath)
cmap = font['cmap']
cmap_dict = cmap.getBestCmap()
cmap_list = list(cmap_dict.keys())
num = {}
for codepoint in cmap_list:
word = chr(codepoint)
if word in numbers:
im = Image.new("RGB", (150, 150), (255, 255, 255))
dr = ImageDraw.Draw(im)
font = ImageFont.truetype(fontpath, 72)
dr.text((50, 50), word, font=font, fill="#000000")
dr.getfont()
# 图片保存路径
im.save(os.path.join(chinese_dir, word + ".png"))
fontimage = Image.open(os.path.join(chinese_dir, word + ".png"))
# psm模式
chinesechar = pytesseract.image_to_string(fontimage, lang='chi_sim', config="-psm 6")
if chinesechar != "":
try:
if numbers_list[chinesechar]:
chinesechar = numbers_list[chinesechar]
except:
pass
num[word] = chinesechar
print(num)
if __name__=='__main__':
numttf('./tyc-num.ttf')