forked from xiaomaxiao/keras_ocr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtext_proposal_connector_oriented.py
105 lines (89 loc) · 4.38 KB
/
text_proposal_connector_oriented.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#coding:utf-8
import numpy as np
from text_proposal_graph_builder import TextProposalGraphBuilder
class TextProposalConnectorOriented:
"""
Connect text proposals into text lines
"""
def __init__(self):
self.graph_builder=TextProposalGraphBuilder()
def group_text_proposals(self, text_proposals, scores, im_size):
graph=self.graph_builder.build_graph(text_proposals, scores, im_size)
return graph.sub_graphs_connected()
def fit_y(self, X, Y, x1, x2):
len(X)!=0
# if X only include one point, the function will get line y=Y[0]
if np.sum(X==X[0])==len(X):
return Y[0], Y[0]
p=np.poly1d(np.polyfit(X, Y, 1))
return p(x1), p(x2)
def get_text_lines(self, text_proposals, scores, im_size):
"""
text_proposals:boxes
"""
# tp=text proposal
tp_groups=self.group_text_proposals(text_proposals, scores, im_size)#首先还是建图,获取到文本行由哪几个小框构成
text_lines=np.zeros((len(tp_groups), 8), np.float32)
for index, tp_indices in enumerate(tp_groups):
text_line_boxes=text_proposals[list(tp_indices)]#每个文本行的全部小框
X = (text_line_boxes[:,0] + text_line_boxes[:,2]) / 2# 求每一个小框的中心x,y坐标
Y = (text_line_boxes[:,1] + text_line_boxes[:,3]) / 2
z1 = np.polyfit(X,Y,1)#多项式拟合,根据之前求的中心店拟合一条直线(最小二乘)
x0=np.min(text_line_boxes[:, 0])#文本行x坐标最小值
x1=np.max(text_line_boxes[:, 2])#文本行x坐标最大值
offset=(text_line_boxes[0, 2]-text_line_boxes[0, 0])*0.5#小框宽度的一半
# 以全部小框的左上角这个点去拟合一条直线,然后计算一下文本行x坐标的极左极右对应的y坐标
lt_y, rt_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 1], x0+offset, x1-offset)
# 以全部小框的左下角这个点去拟合一条直线,然后计算一下文本行x坐标的极左极右对应的y坐标
lb_y, rb_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 3], x0+offset, x1-offset)
score=scores[list(tp_indices)].sum()/float(len(tp_indices))#求全部小框得分的均值作为文本行的均值
text_lines[index, 0]=x0
text_lines[index, 1]=min(lt_y, rt_y)#文本行上端 线段 的y坐标的小值
text_lines[index, 2]=x1
text_lines[index, 3]=max(lb_y, rb_y)#文本行下端 线段 的y坐标的大值
text_lines[index, 4]=score#文本行得分
text_lines[index, 5]=z1[0]#根据中心点拟合的直线的k,b
text_lines[index, 6]=z1[1]
height = np.mean( (text_line_boxes[:,3]-text_line_boxes[:,1]) )#小框平均高度
text_lines[index, 7]= height + 2.5
text_recs = np.zeros((len(text_lines), 9), np.float)
index = 0
for line in text_lines:
b1 = line[6] - line[7] / 2 # 根据高度和文本行中心线,求取文本行上下两条线的b值
b2 = line[6] + line[7] / 2
x1 = line[0]
y1 = line[5] * line[0] + b1 # 左上
x2 = line[2]
y2 = line[5] * line[2] + b1 # 右上
x3 = line[0]
y3 = line[5] * line[0] + b2 # 左下
x4 = line[2]
y4 = line[5] * line[2] + b2 # 右下
disX = x2 - x1
disY = y2 - y1
width = np.sqrt(disX * disX + disY * disY) # 文本行宽度
fTmp0 = y3 - y1 # 文本行高度
fTmp1 = fTmp0 * disY / width
x = np.fabs(fTmp1 * disX / width) # 做补偿
y = np.fabs(fTmp1 * disY / width)
if line[5] < 0:
x1 -= x
y1 += y
x4 += x
y4 -= y
else:
x2 += x
y2 += y
x3 -= x
y3 -= y
text_recs[index, 0] = x1
text_recs[index, 1] = y1
text_recs[index, 2] = x2
text_recs[index, 3] = y2
text_recs[index, 4] = x3
text_recs[index, 5] = y3
text_recs[index, 6] = x4
text_recs[index, 7] = y4
text_recs[index, 8] = line[4]
index = index + 1
return text_recs