-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhr_gt_data.py
106 lines (80 loc) · 3.09 KB
/
hr_gt_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import numpy as np
import json
import os
from ssd_data import BaseGTUtility
import ph_utils
class GTUtility(BaseGTUtility):
"""
GT Data in COCO-Text format for printed Hangul text dataset
"""
def __init__(self, data_path, only_with_label=True):
super().__init__()
self.data_path = data_path
gt_path = data_path
self.gt_path = gt_path
self.image_path = os.path.join(data_path, 'images')
self.classes = ['Background', 'Text']
self.data_info = {}
self.id = []
self.text = []
img_fnames = sorted(
ph_utils.get_filenames(data_path, extensions=ph_utils.META_EXTENSION, recursive_=True, exit_=True))
print(" # Total file number to be processed: {:d}.".format(len(img_fnames)))
for idx, fname in enumerate(img_fnames):
print(" # Start processing ... <index: {} & file name: {}>".format(idx, fname))
# Load json file
with open(os.path.join(gt_path, fname), encoding='UTF8') as f:
gt_data = json.load(f)
img_width = 0
img_height = 0
boxes = []
text = []
id = ''
image_name = ''
# print(" # gt_data : {}".format(gt_data))
for item in gt_data:
if 'text' in item:
text.append(item['text'])
# print(item['text'])
else:
if only_with_label:
continue
else:
text.append('')
# print(' # item : {}'.format(item))
id = item['id']
image_name = item['image_name']
x = 0
y = 0
w = item['width']
h = item['height']
img_width = int(w)
img_height = int(h)
box = np.array([x, y, x + w, y, x + w, y + h, x, y + h], dtype=np.float32)
boxes.append(box)
if len(boxes) == 0:
print(" # No bounding boxes!")
continue
boxes = np.asarray(boxes)
# print(boxes.shape)
boxes[:, 0::2] /= img_width
boxes[:, 1::2] /= img_height
boxes = np.concatenate([boxes, np.ones([boxes.shape[0], 1])], axis=1)
self.id.append(id)
self.image_names.append(image_name)
self.data.append(boxes)
self.text.append(text)
print(" # Info : {} {} {} {}".format(id, image_name, boxes, text))
print(" # ID {} is added!".format(id))
# Create data object in COCO-Text format
self.init()
DATA_PATH = '/diarl_data/crnn/hospital_receipt/ori_4991_aug_60000/'
# DATA_PATH = 'C:/Users/admin/dev/data/'
PICKLE_DIR = './pickles/'
FILE_NAME = 'hospital_receipt_60000.pkl'
if __name__ == '__main__':
# Create GT data in COCO-Text format
gt_util = GTUtility(data_path=DATA_PATH, only_with_label=True)
ph_utils.create_pickle(gt_util, PICKLE_DIR, FILE_NAME)
# Print contents of GT data
print(gt_util.data)