12
12
from tqdm import tqdm
13
13
from dh_segment .io import PAGE
14
14
15
+ # Constant definitions
15
16
TARGET_HEIGHT = 1100
16
17
DRAWING_COLOR_BASELINES = (255 , 0 , 0 )
17
18
DRAWING_COLOR_LINES = (0 , 255 , 0 )
22
23
23
24
24
25
def get_page_filename (image_filename : str ) -> str :
26
+ """
27
+ Given an path to a .jpg or .png file, get the corresponding .xml file.
28
+
29
+ :param image_filename: filename of the image
30
+ :return: the filename of the corresponding .xml file, raises exception if .xml file does not exist
31
+ """
25
32
page_filename = os .path .join (os .path .dirname (image_filename ),
26
33
'page' ,
27
34
'{}.xml' .format (os .path .basename (image_filename )[:- 4 ]))
@@ -33,13 +40,31 @@ def get_page_filename(image_filename: str) -> str:
33
40
34
41
35
42
def get_image_label_basename (image_filename : str ) -> str :
43
+ """
44
+ Creates a new filename composed of the begining of the folder/collection (ex. EPFL, ABP) and the original filename
45
+
46
+ :param image_filename: path of the image filename
47
+ :return:
48
+ """
36
49
# Get acronym followed by name of file
37
50
directory , basename = os .path .split (image_filename )
38
51
acronym = directory .split (os .path .sep )[- 1 ].split ('_' )[0 ]
39
52
return '{}_{}' .format (acronym , basename .split ('.' )[0 ])
40
53
41
54
42
- def save_and_resize (img : np .array , filename : str , size = None , nearest : bool = False ) -> None :
55
+ def save_and_resize (img : np .array ,
56
+ filename : str ,
57
+ size = None ,
58
+ nearest : bool = False ) -> None :
59
+ """
60
+ Resizes the image if necessary and saves it. The resizing will keep the image ratio
61
+
62
+ :param img: the image to resize and save (numpy array)
63
+ :param filename: filename of the saved image
64
+ :param size: size of the image after resizing (in pixels). The ratio of the original image will be kept
65
+ :param nearest: whether to use nearest interpolation method (default to False)
66
+ :return:
67
+ """
43
68
if size is not None :
44
69
h , w = img .shape [:2 ]
45
70
ratio = float (np .sqrt (size / (h * w )))
@@ -59,30 +84,36 @@ def annotate_one_page(image_filename: str,
59
84
baseline_thickness : float = 0.2 ,
60
85
diameter_endpoint : int = 20 ) -> Tuple [str , str ]:
61
86
"""
87
+ Creates an annotated mask and corresponding original image and saves it in 'labels' and 'images' folders.
88
+ Also copies the corresponding .xml file into 'gt' folder.
62
89
63
- :param image_filename:
64
- :param output_dir:
90
+ :param image_filename: filename of the image to process
91
+ :param output_dir: directory to output the annotated label image
65
92
:param size: Size of the resized image (# pixels)
66
93
:param draw_baselines: Draws the baselines (boolean)
67
94
:param draw_lines: Draws the polygon's lines (boolean)
68
95
:param draw_endpoints: Predict beginning and end of baselines (True, False)
69
96
:param baseline_thickness: Thickness of annotated baseline (percentage of the line's height)
70
97
:param diameter_endpoint: Diameter of annotated start/end points
71
- :return:
98
+ :return: (output_image_path, output_label_path)
72
99
"""
73
100
74
101
page_filename = get_page_filename (image_filename )
102
+ # Parse xml file and get TextLines
75
103
page = PAGE .parse_file (page_filename )
76
104
text_lines = [tl for tr in page .text_regions for tl in tr .text_lines ]
77
105
img = imread (image_filename , pilmode = 'RGB' )
106
+ # Create empty mask
78
107
gt = np .zeros_like (img )
79
108
80
109
if text_lines :
81
110
if draw_baselines :
82
111
# Thickness : should be a percentage of the line height, for example 0.2
112
+ # First, get the mean line height.
83
113
mean_line_height , _ , _ = _compute_statistics_line_height (page )
84
114
absolute_baseline_thickness = int (max (gt .shape [0 ]* 0.002 , baseline_thickness * mean_line_height ))
85
115
116
+ # Draw the baselines
86
117
gt_baselines = np .zeros_like (img [:, :, 0 ])
87
118
gt_baselines = cv2 .polylines (gt_baselines ,
88
119
[PAGE .Point .list_to_cv2poly (tl .baseline ) for tl in
@@ -92,6 +123,7 @@ def annotate_one_page(image_filename: str,
92
123
gt [:, :, np .argmax (DRAWING_COLOR_BASELINES )] = gt_baselines
93
124
94
125
if draw_lines :
126
+ # Draw the lines
95
127
gt_lines = np .zeros_like (img [:, :, 0 ])
96
128
for tl in text_lines :
97
129
gt_lines = cv2 .fillPoly (gt_lines ,
@@ -100,6 +132,7 @@ def annotate_one_page(image_filename: str,
100
132
gt [:, :, np .argmax (DRAWING_COLOR_LINES )] = gt_lines
101
133
102
134
if draw_endpoints :
135
+ # Draw endpoints of baselines
103
136
gt_points = np .zeros_like (img [:, :, 0 ])
104
137
for tl in text_lines :
105
138
try :
@@ -113,11 +146,14 @@ def annotate_one_page(image_filename: str,
113
146
print ('Length of baseline is {}' .format (len (tl .baseline )))
114
147
gt [:, :, np .argmax (DRAWING_COLOR_POINTS )] = gt_points
115
148
149
+ # Make output filenames
116
150
image_label_basename = get_image_label_basename (image_filename )
117
151
output_image_path = os .path .join (output_dir , 'images' , '{}.jpg' .format (image_label_basename ))
118
152
output_label_path = os .path .join (output_dir , 'labels' , '{}.png' .format (image_label_basename ))
153
+ # Resize (if necessary) and save image and label
119
154
save_and_resize (img , output_image_path , size = size )
120
155
save_and_resize (gt , output_label_path , size = size , nearest = True )
156
+ # Copy XML file to 'gt' folder
121
157
shutil .copy (page_filename , os .path .join (output_dir , 'gt' , '{}.xml' .format (image_label_basename )))
122
158
123
159
return os .path .abspath (output_image_path ), os .path .abspath (output_label_path )
@@ -133,6 +169,7 @@ def cbad_set_generator(input_dir: str,
133
169
draw_endpoints : bool = False ,
134
170
circle_thickness : int = 20 ) -> None :
135
171
"""
172
+ Creates a set with 'images', 'labels', 'gt' folders, classes.txt file and .csv data
136
173
137
174
:param input_dir: Input directory containing images and PAGE files
138
175
:param output_dir: Output directory to save images and labels
@@ -198,6 +235,12 @@ def cbad_set_generator(input_dir: str,
198
235
199
236
200
237
def split_set_for_eval (csv_filename : str ) -> None :
238
+ """
239
+ Splits set into two sets (0.15 and 0.85).
240
+
241
+ :param csv_filename: path to csv file containing in each row image_filename,label_filename
242
+ :return:
243
+ """
201
244
202
245
df_data = pd .read_csv (csv_filename , header = None )
203
246
@@ -212,25 +255,26 @@ def split_set_for_eval(csv_filename: str) -> None:
212
255
df_train .to_csv (os .path .join (saving_dir , 'train_data.csv' ), header = False , index = False , encoding = 'utf8' )
213
256
214
257
215
- def draw_lines_fn (xml_filename : str , output_dir : str ):
216
- """
217
- GIven an XML PAGE file, draws the corresponding lines in the original image.
218
- :param xml_filename:
219
- :param output_dir:
220
- :return:
221
- """
222
- basename = os .path .basename (xml_filename ).split ('.' )[0 ]
223
- generated_page = PAGE .parse_file (xml_filename )
224
- drawing_img = generated_page .image_filename
225
- generated_page .draw_baselines (drawing_img , color = (0 , 0 , 255 ))
226
- imsave (os .path .join (output_dir , '{}.jpg' .format (basename )), drawing_img )
258
+ # def draw_lines_fn(xml_filename: str, output_dir: str):
259
+ # """
260
+ # Given an XML PAGE file, draws the corresponding lines in the original image.
261
+ #
262
+ # :param xml_filename:
263
+ # :param output_dir:
264
+ # :return:
265
+ # """
266
+ # basename = os.path.basename(xml_filename).split('.')[0]
267
+ # generated_page = PAGE.parse_file(xml_filename)
268
+ # drawing_img = generated_page.image_filename
269
+ # generated_page.draw_baselines(drawing_img, color=(0, 0, 255))
270
+ # imsave(os.path.join(output_dir, '{}.jpg'.format(basename)), drawing_img)
227
271
228
272
229
273
def _compute_statistics_line_height (page_class : PAGE .Page , verbose : bool = False ) -> Tuple [float , float , float ]:
230
274
"""
231
- Function to compute mean and std of line height among a page.
275
+ Function to compute mean and std of line height in a page.
232
276
233
- :param page_class: json Page
277
+ :param page_class: PAGE. Page object
234
278
:param verbose: either to print computational info or not
235
279
:return: tuple (mean, standard deviation, median)
236
280
"""
@@ -311,6 +355,12 @@ def update_to(b: int=1, bsize: int=1, tsize: int=None):
311
355
312
356
313
357
def cbad_download (output_dir : str ):
358
+ """
359
+ Download BAD-READ dataset.
360
+
361
+ :param output_dir: folder where to download the data
362
+ :return:
363
+ """
314
364
os .makedirs (output_dir , exist_ok = True )
315
365
zip_filename = os .path .join (output_dir , 'cbad-icdar17.zip' )
316
366
0 commit comments