forked from clovaai/deep-text-recognition-benchmark
-
Notifications
You must be signed in to change notification settings - Fork 12
/
text_recog.py
94 lines (76 loc) · 2.97 KB
/
text_recog.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# Import required modules
import numpy as np
import cv2 as cv
import math
import argparse
############ Add argument parser for command line arguments ############
parser = argparse.ArgumentParser(
description = "The OCR model can be obtained from converting the pretrained CRNN model to .onnx format from the github repository https://github.com/meijieru/crnn.pytorch")
parser.add_argument('--input',
help='Path to input image. Skip this argument to capture frames from a camera.')
parser.add_argument('--ocr', default="/home/moo/Desktop/ocr/clovaai/new_model_8_4/DenseNet.onnx",
help="Path to a binary .pb or .onnx file contains trained recognition network", )
parser.add_argument('--width', type=int, default=100,
help='Preprocess input image by resizing to a specific width.')
parser.add_argument('--height', type=int, default=32,
help='Preprocess input image by resizing to a specific height.')
args = parser.parse_args()
############ Utility functions ############
def fill_img(img, width = 100, height = 32):
# print(img.shape)
h, w = img.shape[0], img.shape[1]
ratio = w/float(h)
resizedW = width
if math.ceil( height * ratio ) > width:
resizedW = width
else:
resizedW = math.ceil( height * ratio )
resizedImg = cv.resize(img, (resizedW, height))
# print(resizedImg.shape)
if resizedW != width:
repetition = [1] * (resizedW-1)
repetition.append(width - resizedW)
print(len(repetition))
resizedImg = np.repeat(resizedImg, repetition, axis= 1)
return resizedImg
def decodeText(scores):
text = ""
alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
for i in range(scores.shape[0]):
c = np.argmax(scores[i][0])
print(c)
if c != 0:
text += alphabet[c - 1]
else:
text += '-'
# adjacent same letters as well as background text must be removed to get the final output
char_list = []
for i in range(len(text)):
if text[i] != '-' and (not (i > 0 and text[i] == text[i - 1])):
char_list.append(text[i])
return ''.join(char_list)
def main():
# Read and store arguments
modelRecognition = args.ocr
imagePath = args.input
inpWidth = args.width
inpHeight = args.height
# Load network
recognizer = cv.dnn.readNetFromONNX(modelRecognition)
img = cv.imread(image_path)
img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
cv.imshow("name", img)
cv.waitKey(100)
# if use padding
# img = fill_img(img, inpWidth, inpHeight)
blob = cv.dnn.blobFromImage(img, size=(inpWidth, inpHeight), mean=127.5, scalefactor=1 / 255.0)
blob -= 0.5
blob /= 0.5
recognizer.setInput(blob)
# Run the recognition model
result = recognizer.forward()
# decode the result into text
wordRecognized = decodeText(result)
print("recog output is : ", wordRecognized)
if __name__ == "__main__":
main()