-
Notifications
You must be signed in to change notification settings - Fork 9.7k
/
Copy pathlinefind.cpp
774 lines (744 loc) · 32.9 KB
/
linefind.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
///////////////////////////////////////////////////////////////////////
// File: linefind.cpp
// Description: Class to find vertical lines in an image and create
// a corresponding list of empty blobs.
// Author: Ray Smith
// Created: Thu Mar 20 09:49:01 PDT 2008
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifdef _MSC_VER
#pragma warning(disable:4244) // Conversion warnings
#endif
#ifdef HAVE_CONFIG_H
#include "config_auto.h"
#endif
#include "linefind.h"
#include "alignedblob.h"
#include "tabvector.h"
#include "blobbox.h"
#include "edgblob.h"
#include "openclwrapper.h"
#include "allheaders.h"
namespace tesseract {
/// Denominator of resolution makes max pixel width to allow thin lines.
const int kThinLineFraction = 20;
/// Denominator of resolution makes min pixels to demand line lengths to be.
const int kMinLineLengthFraction = 4;
/// Spacing of cracks across the page to break up tall vertical lines.
const int kCrackSpacing = 100;
/// Grid size used by line finder. Not very critical.
const int kLineFindGridSize = 50;
// Min width of a line in pixels to be considered thick.
const int kMinThickLineWidth = 12;
// Max size of line residue. (The pixels that fail the long thin opening, and
// therefore don't make it to the candidate line mask, but are nevertheless
// part of the line.)
const int kMaxLineResidue = 6;
// Min length in inches of a line segment that exceeds kMinThickLineWidth in
// thickness. (Such lines shouldn't break by simple image degradation.)
const double kThickLengthMultiple = 0.75;
// Max fraction of line box area that can be occupied by non-line pixels.
const double kMaxNonLineDensity = 0.25;
// Max height of a music stave in inches.
const double kMaxStaveHeight = 1.0;
// Minimum fraction of pixels in a music rectangle connected to the staves.
const double kMinMusicPixelFraction = 0.75;
// Erases the unused blobs from the line_pix image, taking into account
// whether this was a horizontal or vertical line set.
static void RemoveUnusedLineSegments(bool horizontal_lines,
BLOBNBOX_LIST* line_bblobs,
Pix* line_pix) {
int height = pixGetHeight(line_pix);
BLOBNBOX_IT bbox_it(line_bblobs);
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
BLOBNBOX* blob = bbox_it.data();
if (blob->left_tab_type() != TT_VLINE) {
const TBOX& box = blob->bounding_box();
Box* pixbox = NULL;
if (horizontal_lines) {
// Horizontal lines are in tess format and also have x and y flipped
// (to use FindVerticalAlignment) so we have to flip x and y and then
// convert to Leptonica by height - flipped x (ie the right edge).
// See GetLineBoxes for more explanation.
pixbox = boxCreate(box.bottom(), height - box.right(),
box.height(), box.width());
} else {
// For vertical lines, just flip upside-down to convert to Leptonica.
// The y position of the box in Leptonica terms is the distance from
// the top of the image to the top of the box.
pixbox = boxCreate(box.left(), height - box.top(),
box.width(), box.height());
}
pixClearInRect(line_pix, pixbox);
boxDestroy(&pixbox);
}
}
}
// Helper subtracts the line_pix image from the src_pix, and removes residue
// as well by removing components that touch the line, but are not in the
// non_line_pix mask. It is assumed that the non_line_pix mask has already
// been prepared to required accuracy.
static void SubtractLinesAndResidue(Pix* line_pix, Pix* non_line_pix,
int resolution, Pix* src_pix) {
// First remove the lines themselves.
pixSubtract(src_pix, src_pix, line_pix);
// Subtract the non-lines from the image to get the residue.
Pix* residue_pix = pixSubtract(NULL, src_pix, non_line_pix);
// Dilate the lines so they touch the residue.
Pix* fat_line_pix = pixDilateBrick(NULL, line_pix, 3, 3);
// Seed fill the fat lines to get all the residue.
pixSeedfillBinary(fat_line_pix, fat_line_pix, residue_pix, 8);
// Subtract the residue from the original image.
pixSubtract(src_pix, src_pix, fat_line_pix);
pixDestroy(&fat_line_pix);
pixDestroy(&residue_pix);
}
// Returns the maximum strokewidth in the given binary image by doubling
// the maximum of the distance function.
static int MaxStrokeWidth(Pix* pix) {
Pix* dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG);
int width = pixGetWidth(dist_pix);
int height = pixGetHeight(dist_pix);
int wpl = pixGetWpl(dist_pix);
l_uint32* data = pixGetData(dist_pix);
// Find the maximum value in the distance image.
int max_dist = 0;
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
int pixel = GET_DATA_BYTE(data, x);
if (pixel > max_dist)
max_dist = pixel;
}
data += wpl;
}
pixDestroy(&dist_pix);
return max_dist * 2;
}
// Returns the number of components in the intersection_pix touched by line_box.
static int NumTouchingIntersections(Box* line_box, Pix* intersection_pix) {
if (intersection_pix == NULL) return 0;
Pix* rect_pix = pixClipRectangle(intersection_pix, line_box, NULL);
Boxa* boxa = pixConnComp(rect_pix, NULL, 8);
pixDestroy(&rect_pix);
if (boxa == NULL) return false;
int result = boxaGetCount(boxa);
boxaDestroy(&boxa);
return result;
}
// Returns the number of black pixels found in the box made by adding the line
// width to both sides of the line bounding box. (Increasing the smallest
// dimension of the bounding box.)
static int CountPixelsAdjacentToLine(int line_width, Box* line_box,
Pix* nonline_pix) {
l_int32 x, y, box_width, box_height;
boxGetGeometry(line_box, &x, &y, &box_width, &box_height);
if (box_width > box_height) {
// horizontal line.
int bottom = MIN(pixGetHeight(nonline_pix), y + box_height + line_width);
y = MAX(0, y - line_width);
box_height = bottom - y;
} else {
// Vertical line.
int right = MIN(pixGetWidth(nonline_pix), x + box_width + line_width);
x = MAX(0, x - line_width);
box_width = right - x;
}
Box* box = boxCreate(x, y, box_width, box_height);
Pix* rect_pix = pixClipRectangle(nonline_pix, box, NULL);
boxDestroy(&box);
l_int32 result;
pixCountPixels(rect_pix, &result, NULL);
pixDestroy(&rect_pix);
return result;
}
// Helper erases false-positive line segments from the input/output line_pix.
// 1. Since thick lines shouldn't really break up, we can eliminate some false
// positives by marking segments that are at least kMinThickLineWidth
// thickness, yet have a length less than min_thick_length.
// 2. Lines that don't have at least 2 intersections with other lines and have
// a lot of neighbouring non-lines are probably not lines (perhaps arabic
// or Hindi words, or underlines.)
// Bad line components are erased from line_pix.
// Returns the number of remaining connected components.
static int FilterFalsePositives(int resolution, Pix* nonline_pix,
Pix* intersection_pix, Pix* line_pix) {
int min_thick_length = static_cast<int>(resolution * kThickLengthMultiple);
Pixa* pixa = NULL;
Boxa* boxa = pixConnComp(line_pix, &pixa, 8);
// Iterate over the boxes to remove false positives.
int nboxes = boxaGetCount(boxa);
int remaining_boxes = nboxes;
for (int i = 0; i < nboxes; ++i) {
Box* box = boxaGetBox(boxa, i, L_CLONE);
l_int32 x, y, box_width, box_height;
boxGetGeometry(box, &x, &y, &box_width, &box_height);
Pix* comp_pix = pixaGetPix(pixa, i, L_CLONE);
int max_width = MaxStrokeWidth(comp_pix);
pixDestroy(&comp_pix);
bool bad_line = false;
// If the length is too short to stand-alone as a line, and the box width
// is thick enough, and the stroke width is thick enough it is bad.
if (box_width >= kMinThickLineWidth && box_height >= kMinThickLineWidth &&
box_width < min_thick_length && box_height < min_thick_length &&
max_width > kMinThickLineWidth) {
// Too thick for the length.
bad_line = true;
}
if (!bad_line &&
(intersection_pix == NULL ||
NumTouchingIntersections(box, intersection_pix) < 2)) {
// Test non-line density near the line.
int nonline_count = CountPixelsAdjacentToLine(max_width, box,
nonline_pix);
if (nonline_count > box_height * box_width * kMaxNonLineDensity)
bad_line = true;
}
if (bad_line) {
// Not a good line.
pixClearInRect(line_pix, box);
--remaining_boxes;
}
boxDestroy(&box);
}
pixaDestroy(&pixa);
boxaDestroy(&boxa);
return remaining_boxes;
}
// Finds vertical and horizontal line objects in the given pix.
// Uses the given resolution to determine size thresholds instead of any
// that may be present in the pix.
// The output vertical_x and vertical_y contain a sum of the output vectors,
// thereby giving the mean vertical direction.
// If pix_music_mask != NULL, and music is detected, a mask of the staves
// and anything that is connected (bars, notes etc.) will be returned in
// pix_music_mask, the mask subtracted from pix, and the lines will not
// appear in v_lines or h_lines.
// The output vectors are owned by the list and Frozen (cannot refit) by
// having no boxes, as there is no need to refit or merge separator lines.
// The detected lines are removed from the pix.
void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix* pix,
int* vertical_x, int* vertical_y,
Pix** pix_music_mask,
TabVector_LIST* v_lines,
TabVector_LIST* h_lines) {
PERF_COUNT_START("FindAndRemoveLines")
if (pix == NULL || vertical_x == NULL || vertical_y == NULL) {
tprintf("Error in parameters for LineFinder::FindAndRemoveLines\n");
return;
}
Pix* pix_vline = NULL;
Pix* pix_non_vline = NULL;
Pix* pix_hline = NULL;
Pix* pix_non_hline = NULL;
Pix* pix_intersections = NULL;
Pixa* pixa_display = debug ? pixaCreate(0) : NULL;
GetLineMasks(resolution, pix, &pix_vline, &pix_non_vline, &pix_hline,
&pix_non_hline, &pix_intersections, pix_music_mask,
pixa_display);
// Find lines, convert to TabVector_LIST and remove those that are used.
FindAndRemoveVLines(resolution, pix_intersections, vertical_x, vertical_y,
&pix_vline, pix_non_vline, pix, v_lines);
if (pix_hline != NULL) {
// Recompute intersections and re-filter false positive h-lines.
if (pix_vline != NULL)
pixAnd(pix_intersections, pix_vline, pix_hline);
else
pixDestroy(&pix_intersections);
if (!FilterFalsePositives(resolution, pix_non_hline, pix_intersections,
pix_hline)) {
pixDestroy(&pix_hline);
}
}
FindAndRemoveHLines(resolution, pix_intersections, *vertical_x, *vertical_y,
&pix_hline, pix_non_hline, pix, h_lines);
if (pixa_display != NULL && pix_vline != NULL)
pixaAddPix(pixa_display, pix_vline, L_CLONE);
if (pixa_display != NULL && pix_hline != NULL)
pixaAddPix(pixa_display, pix_hline, L_CLONE);
if (pix_vline != NULL && pix_hline != NULL) {
// Remove joins (intersections) where lines cross, and the residue.
// Recalculate the intersections, since some lines have been deleted.
pixAnd(pix_intersections, pix_vline, pix_hline);
// Fatten up the intersections and seed-fill to get the intersection
// residue.
Pix* pix_join_residue = pixDilateBrick(NULL, pix_intersections, 5, 5);
pixSeedfillBinary(pix_join_residue, pix_join_residue, pix, 8);
// Now remove the intersection residue.
pixSubtract(pix, pix, pix_join_residue);
pixDestroy(&pix_join_residue);
}
// Remove any detected music.
if (pix_music_mask != NULL && *pix_music_mask != NULL) {
if (pixa_display != NULL)
pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
pixSubtract(pix, pix, *pix_music_mask);
}
if (pixa_display != NULL)
pixaAddPix(pixa_display, pix, L_CLONE);
pixDestroy(&pix_vline);
pixDestroy(&pix_non_vline);
pixDestroy(&pix_hline);
pixDestroy(&pix_non_hline);
pixDestroy(&pix_intersections);
if (pixa_display != NULL) {
pixaConvertToPdf(pixa_display, resolution, 1.0f, 0, 0, "LineFinding",
"vhlinefinding.pdf");
pixaDestroy(&pixa_display);
}
PERF_COUNT_END
}
// Converts the Boxa array to a list of C_BLOB, getting rid of severely
// overlapping outlines and those that are children of a bigger one.
// The output is a list of C_BLOBs that are owned by the list.
// The C_OUTLINEs in the C_BLOBs contain no outline data - just empty
// bounding boxes. The Boxa is consumed and destroyed.
void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height,
Boxa** boxes, C_BLOB_LIST* blobs) {
C_OUTLINE_LIST outlines;
C_OUTLINE_IT ol_it = &outlines;
// Iterate the boxes to convert to outlines.
int nboxes = boxaGetCount(*boxes);
for (int i = 0; i < nboxes; ++i) {
l_int32 x, y, width, height;
boxaGetBoxGeometry(*boxes, i, &x, &y, &width, &height);
// Make a C_OUTLINE from the leptonica box. This is a bit of a hack,
// as there is no outline, just a bounding box, but with some very
// small changes to coutln.cpp, it works nicely.
ICOORD top_left(x, y);
ICOORD bot_right(x + width, y + height);
CRACKEDGE startpt;
startpt.pos = top_left;
C_OUTLINE* outline = new C_OUTLINE(&startpt, top_left, bot_right, 0);
ol_it.add_after_then_move(outline);
}
// Use outlines_to_blobs to convert the outlines to blobs and find
// overlapping and contained objects. The output list of blobs in the block
// has all the bad ones filtered out and deleted.
BLOCK block;
ICOORD page_tl(0, 0);
ICOORD page_br(image_width, image_height);
outlines_to_blobs(&block, page_tl, page_br, &outlines);
// Transfer the created blobs to the output list.
C_BLOB_IT blob_it(blobs);
blob_it.add_list_after(block.blob_list());
// The boxes aren't needed any more.
boxaDestroy(boxes);
}
// Finds vertical line objects in pix_vline and removes the from src_pix.
// Uses the given resolution to determine size thresholds instead of any
// that may be present in the pix.
// The output vertical_x and vertical_y contain a sum of the output vectors,
// thereby giving the mean vertical direction.
// The output vectors are owned by the list and Frozen (cannot refit) by
// having no boxes, as there is no need to refit or merge separator lines.
// If no good lines are found, pix_vline is destroyed.
// None of the input pointers may be NULL, and if *pix_vline is NULL then
// the function does nothing.
void LineFinder::FindAndRemoveVLines(int resolution,
Pix* pix_intersections,
int* vertical_x, int* vertical_y,
Pix** pix_vline, Pix* pix_non_vline,
Pix* src_pix, TabVector_LIST* vectors) {
if (pix_vline == NULL || *pix_vline == NULL) return;
C_BLOB_LIST line_cblobs;
BLOBNBOX_LIST line_bblobs;
GetLineBoxes(false, *pix_vline, pix_intersections,
&line_cblobs, &line_bblobs);
int width = pixGetWidth(src_pix);
int height = pixGetHeight(src_pix);
ICOORD bleft(0, 0);
ICOORD tright(width, height);
FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors);
if (!vectors->empty()) {
RemoveUnusedLineSegments(false, &line_bblobs, *pix_vline);
SubtractLinesAndResidue(*pix_vline, pix_non_vline, resolution, src_pix);
ICOORD vertical;
vertical.set_with_shrink(*vertical_x, *vertical_y);
TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
} else {
pixDestroy(pix_vline);
}
}
// Finds horizontal line objects in pix_hline and removes them from src_pix.
// Uses the given resolution to determine size thresholds instead of any
// that may be present in the pix.
// The output vertical_x and vertical_y contain a sum of the output vectors,
// thereby giving the mean vertical direction.
// The output vectors are owned by the list and Frozen (cannot refit) by
// having no boxes, as there is no need to refit or merge separator lines.
// If no good lines are found, pix_hline is destroyed.
// None of the input pointers may be NULL, and if *pix_hline is NULL then
// the function does nothing.
void LineFinder::FindAndRemoveHLines(int resolution,
Pix* pix_intersections,
int vertical_x, int vertical_y,
Pix** pix_hline, Pix* pix_non_hline,
Pix* src_pix, TabVector_LIST* vectors) {
if (pix_hline == NULL || *pix_hline == NULL) return;
C_BLOB_LIST line_cblobs;
BLOBNBOX_LIST line_bblobs;
GetLineBoxes(true, *pix_hline, pix_intersections, &line_cblobs, &line_bblobs);
int width = pixGetWidth(src_pix);
int height = pixGetHeight(src_pix);
ICOORD bleft(0, 0);
ICOORD tright(height, width);
FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y,
vectors);
if (!vectors->empty()) {
RemoveUnusedLineSegments(true, &line_bblobs, *pix_hline);
SubtractLinesAndResidue(*pix_hline, pix_non_hline, resolution, src_pix);
ICOORD vertical;
vertical.set_with_shrink(vertical_x, vertical_y);
TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
// Iterate the vectors to flip them. x and y were flipped for horizontal
// lines, so FindLineVectors can work just with the vertical case.
// See GetLineBoxes for more on the flip.
TabVector_IT h_it(vectors);
for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
h_it.data()->XYFlip();
}
} else {
pixDestroy(pix_hline);
}
}
// Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
// are the bounds of the image on which the input line_bblobs were found.
// The input line_bblobs list is const really.
// The output vertical_x and vertical_y are the total of all the vectors.
// The output list of TabVector makes no reference to the input BLOBNBOXes.
void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright,
BLOBNBOX_LIST* line_bblobs,
int* vertical_x, int* vertical_y,
TabVector_LIST* vectors) {
BLOBNBOX_IT bbox_it(line_bblobs);
int b_count = 0;
// Put all the blobs into the grid to find the lines, and move the blobs
// to the output lists.
AlignedBlob blob_grid(kLineFindGridSize, bleft, tright);
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
BLOBNBOX* bblob = bbox_it.data();
bblob->set_left_tab_type(TT_MAYBE_ALIGNED);
bblob->set_left_rule(bleft.x());
bblob->set_right_rule(tright.x());
bblob->set_left_crossing_rule(bleft.x());
bblob->set_right_crossing_rule(tright.x());
blob_grid.InsertBBox(false, true, bblob);
++b_count;
}
if (b_count == 0)
return;
// Search the entire grid, looking for vertical line vectors.
BlobGridSearch lsearch(&blob_grid);
BLOBNBOX* bbox;
TabVector_IT vector_it(vectors);
*vertical_x = 0;
*vertical_y = 1;
lsearch.StartFullSearch();
while ((bbox = lsearch.NextFullSearch()) != NULL) {
if (bbox->left_tab_type() == TT_MAYBE_ALIGNED) {
const TBOX& box = bbox->bounding_box();
if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()))
tprintf("Finding line vector starting at bbox (%d,%d)\n",
box.left(), box.bottom());
AlignedBlobParams align_params(*vertical_x, *vertical_y, box.width());
TabVector* vector = blob_grid.FindVerticalAlignment(align_params, bbox,
vertical_x,
vertical_y);
if (vector != NULL) {
vector->Freeze();
vector_it.add_to_end(vector);
}
}
}
}
// Returns a Pix music mask if music is detected.
// Any vertical line that has at least 5 intersections in sufficient density
// is taken to be a bar. Bars are used as a seed and the entire touching
// component is added to the output music mask and subtracted from the lines.
// Returns NULL and does minimal work if no music is found.
static Pix* FilterMusic(int resolution, Pix* pix_closed,
Pix* pix_vline, Pix* pix_hline,
l_int32* v_empty, l_int32* h_empty) {
int max_stave_height = static_cast<int>(resolution * kMaxStaveHeight);
Pix* intersection_pix = pixAnd(NULL, pix_vline, pix_hline);
Boxa* boxa = pixConnComp(pix_vline, NULL, 8);
// Iterate over the boxes to find music bars.
int nboxes = boxaGetCount(boxa);
Pix* music_mask = NULL;
for (int i = 0; i < nboxes; ++i) {
Box* box = boxaGetBox(boxa, i, L_CLONE);
l_int32 x, y, box_width, box_height;
boxGetGeometry(box, &x, &y, &box_width, &box_height);
int joins = NumTouchingIntersections(box, intersection_pix);
// Test for the join density being at least 5 per max_stave_height,
// ie (joins-1)/box_height >= (5-1)/max_stave_height.
if (joins >= 5 && (joins - 1) * max_stave_height >= 4 * box_height) {
// This is a music bar. Add to the mask.
if (music_mask == NULL)
music_mask = pixCreate(pixGetWidth(pix_vline), pixGetHeight(pix_vline),
1);
pixSetInRect(music_mask, box);
}
boxDestroy(&box);
}
boxaDestroy(&boxa);
pixDestroy(&intersection_pix);
if (music_mask != NULL) {
// The mask currently contains just the bars. Use the mask as a seed
// and the pix_closed as the mask for a seedfill to get all the
// intersecting staves.
pixSeedfillBinary(music_mask, music_mask, pix_closed, 8);
// Filter out false positives. CCs in the music_mask should be the vast
// majority of the pixels in their bounding boxes, as we expect just a
// tiny amount of text, a few phrase marks, and crescendo etc left.
Boxa* boxa = pixConnComp(music_mask, NULL, 8);
// Iterate over the boxes to find music components.
int nboxes = boxaGetCount(boxa);
for (int i = 0; i < nboxes; ++i) {
Box* box = boxaGetBox(boxa, i, L_CLONE);
Pix* rect_pix = pixClipRectangle(music_mask, box, NULL);
l_int32 music_pixels;
pixCountPixels(rect_pix, &music_pixels, NULL);
pixDestroy(&rect_pix);
rect_pix = pixClipRectangle(pix_closed, box, NULL);
l_int32 all_pixels;
pixCountPixels(rect_pix, &all_pixels, NULL);
pixDestroy(&rect_pix);
if (music_pixels < kMinMusicPixelFraction * all_pixels) {
// False positive. Delete from the music mask.
pixClearInRect(music_mask, box);
}
boxDestroy(&box);
}
l_int32 no_remaining_music;
boxaDestroy(&boxa);
pixZero(music_mask, &no_remaining_music);
if (no_remaining_music) {
pixDestroy(&music_mask);
} else {
pixSubtract(pix_vline, pix_vline, music_mask);
pixSubtract(pix_hline, pix_hline, music_mask);
// We may have deleted all the lines
pixZero(pix_vline, v_empty);
pixZero(pix_hline, h_empty);
}
}
return music_mask;
}
// Most of the heavy lifting of line finding. Given src_pix and its separate
// resolution, returns image masks:
// pix_vline candidate vertical lines.
// pix_non_vline pixels that didn't look like vertical lines.
// pix_hline candidate horizontal lines.
// pix_non_hline pixels that didn't look like horizontal lines.
// pix_intersections pixels where vertical and horizontal lines meet.
// pix_music_mask candidate music staves.
// This function promises to initialize all the output (2nd level) pointers,
// but any of the returns that are empty will be NULL on output.
// None of the input (1st level) pointers may be NULL except pix_music_mask,
// which will disable music detection, and pixa_display.
void LineFinder::GetLineMasks(int resolution, Pix* src_pix,
Pix** pix_vline, Pix** pix_non_vline,
Pix** pix_hline, Pix** pix_non_hline,
Pix** pix_intersections, Pix** pix_music_mask,
Pixa* pixa_display) {
Pix* pix_closed = NULL;
Pix* pix_hollow = NULL;
int max_line_width = resolution / kThinLineFraction;
int min_line_length = resolution / kMinLineLengthFraction;
if (pixa_display != NULL) {
tprintf("Image resolution = %d, max line width = %d, min length=%d\n",
resolution, max_line_width, min_line_length);
}
int closing_brick = max_line_width / 3;
PERF_COUNT_START("GetLineMasksMorph")
// only use opencl if compiled w/ OpenCL and selected device is opencl
#ifdef USE_OPENCL
if (OpenclDevice::selectedDeviceIsOpenCL()) {
// OpenCL pixGetLines Operation
int clStatus = OpenclDevice::initMorphCLAllocations(pixGetWpl(src_pix),
pixGetHeight(src_pix),
src_pix);
bool getpixclosed = pix_music_mask != NULL ? true : false;
OpenclDevice::pixGetLinesCL(NULL, src_pix, pix_vline, pix_hline,
&pix_closed, getpixclosed, closing_brick,
closing_brick, max_line_width, max_line_width,
min_line_length, min_line_length);
} else {
#endif
// Close up small holes, making it less likely that false alarms are found
// in thickened text (as it will become more solid) and also smoothing over
// some line breaks and nicks in the edges of the lines.
pix_closed = pixCloseBrick(NULL, src_pix, closing_brick, closing_brick);
if (pixa_display != NULL)
pixaAddPix(pixa_display, pix_closed, L_CLONE);
// Open up with a big box to detect solid areas, which can then be subtracted.
// This is very generous and will leave in even quite wide lines.
Pix* pix_solid = pixOpenBrick(NULL, pix_closed, max_line_width,
max_line_width);
if (pixa_display != NULL)
pixaAddPix(pixa_display, pix_solid, L_CLONE);
pix_hollow = pixSubtract(NULL, pix_closed, pix_solid);
pixDestroy(&pix_solid);
// Now open up in both directions independently to find lines of at least
// 1 inch/kMinLineLengthFraction in length.
if (pixa_display != NULL)
pixaAddPix(pixa_display, pix_hollow, L_CLONE);
*pix_vline = pixOpenBrick(NULL, pix_hollow, 1, min_line_length);
*pix_hline = pixOpenBrick(NULL, pix_hollow, min_line_length, 1);
pixDestroy(&pix_hollow);
#ifdef USE_OPENCL
}
#endif
PERF_COUNT_END
// Lines are sufficiently rare, that it is worth checking for a zero image.
l_int32 v_empty = 0;
l_int32 h_empty = 0;
pixZero(*pix_vline, &v_empty);
pixZero(*pix_hline, &h_empty);
if (pix_music_mask != NULL) {
if (!v_empty && !h_empty) {
*pix_music_mask = FilterMusic(resolution, pix_closed,
*pix_vline, *pix_hline,
&v_empty, &h_empty);
} else {
*pix_music_mask = NULL;
}
}
pixDestroy(&pix_closed);
Pix* pix_nonlines = NULL;
*pix_intersections = NULL;
Pix* extra_non_hlines = NULL;
if (!v_empty) {
// Subtract both line candidates from the source to get definite non-lines.
pix_nonlines = pixSubtract(NULL, src_pix, *pix_vline);
if (!h_empty) {
pixSubtract(pix_nonlines, pix_nonlines, *pix_hline);
// Intersections are a useful indicator for likelihood of being a line.
*pix_intersections = pixAnd(NULL, *pix_vline, *pix_hline);
// Candidate vlines are not hlines (apart from the intersections)
// and vice versa.
extra_non_hlines = pixSubtract(NULL, *pix_vline, *pix_intersections);
}
*pix_non_vline = pixErodeBrick(NULL, pix_nonlines, kMaxLineResidue, 1);
pixSeedfillBinary(*pix_non_vline, *pix_non_vline, pix_nonlines, 8);
if (!h_empty) {
// Candidate hlines are not vlines.
pixOr(*pix_non_vline, *pix_non_vline, *pix_hline);
pixSubtract(*pix_non_vline, *pix_non_vline, *pix_intersections);
}
if (!FilterFalsePositives(resolution, *pix_non_vline, *pix_intersections,
*pix_vline))
pixDestroy(pix_vline); // No candidates left.
} else {
// No vertical lines.
pixDestroy(pix_vline);
*pix_non_vline = NULL;
if (!h_empty) {
pix_nonlines = pixSubtract(NULL, src_pix, *pix_hline);
}
}
if (h_empty) {
pixDestroy(pix_hline);
*pix_non_hline = NULL;
if (v_empty) {
return;
}
} else {
*pix_non_hline = pixErodeBrick(NULL, pix_nonlines, 1, kMaxLineResidue);
pixSeedfillBinary(*pix_non_hline, *pix_non_hline, pix_nonlines, 8);
if (extra_non_hlines != NULL) {
pixOr(*pix_non_hline, *pix_non_hline, extra_non_hlines);
pixDestroy(&extra_non_hlines);
}
if (!FilterFalsePositives(resolution, *pix_non_hline, *pix_intersections,
*pix_hline))
pixDestroy(pix_hline); // No candidates left.
}
if (pixa_display != NULL) {
if (*pix_vline != NULL) pixaAddPix(pixa_display, *pix_vline, L_CLONE);
if (*pix_hline != NULL) pixaAddPix(pixa_display, *pix_hline, L_CLONE);
if (pix_nonlines != NULL) pixaAddPix(pixa_display, pix_nonlines, L_CLONE);
if (*pix_non_vline != NULL)
pixaAddPix(pixa_display, *pix_non_vline, L_CLONE);
if (*pix_non_hline != NULL)
pixaAddPix(pixa_display, *pix_non_hline, L_CLONE);
if (*pix_intersections != NULL)
pixaAddPix(pixa_display, *pix_intersections, L_CLONE);
if (pix_music_mask != NULL && *pix_music_mask != NULL)
pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
}
pixDestroy(&pix_nonlines);
}
// Returns a list of boxes corresponding to the candidate line segments. Sets
// the line_crossings member of the boxes so we can later determin the number
// of intersections touched by a full line.
void LineFinder::GetLineBoxes(bool horizontal_lines,
Pix* pix_lines, Pix* pix_intersections,
C_BLOB_LIST* line_cblobs,
BLOBNBOX_LIST* line_bblobs) {
// Put a single pixel crack in every line at an arbitrary spacing,
// so they break up and the bounding boxes can be used to get the
// direction accurately enough without needing outlines.
int wpl = pixGetWpl(pix_lines);
int width = pixGetWidth(pix_lines);
int height = pixGetHeight(pix_lines);
l_uint32* data = pixGetData(pix_lines);
if (horizontal_lines) {
for (int y = 0; y < height; ++y, data += wpl) {
for (int x = kCrackSpacing; x < width; x += kCrackSpacing) {
CLEAR_DATA_BIT(data, x);
}
}
} else {
for (int y = kCrackSpacing; y < height; y += kCrackSpacing) {
memset(data + wpl * y, 0, wpl * sizeof(*data));
}
}
// Get the individual connected components
Boxa* boxa = pixConnComp(pix_lines, NULL, 8);
ConvertBoxaToBlobs(width, height, &boxa, line_cblobs);
// Make the BLOBNBOXes from the C_BLOBs.
C_BLOB_IT blob_it(line_cblobs);
BLOBNBOX_IT bbox_it(line_bblobs);
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
C_BLOB* cblob = blob_it.data();
BLOBNBOX* bblob = new BLOBNBOX(cblob);
bbox_it.add_to_end(bblob);
// Determine whether the line segment touches two intersections.
const TBOX& bbox = bblob->bounding_box();
Box* box = boxCreate(bbox.left(), bbox.bottom(),
bbox.width(), bbox.height());
bblob->set_line_crossings(NumTouchingIntersections(box, pix_intersections));
boxDestroy(&box);
// Transform the bounding box prior to finding lines. To save writing
// two line finders, flip x and y for horizontal lines and re-use the
// tab-stop detection code. For vertical lines we still have to flip the
// y-coordinates to switch from leptonica coords to tesseract coords.
if (horizontal_lines) {
// Note that we have Leptonica coords stored in a Tesseract box, so that
// bbox.bottom(), being the MIN y coord, is actually the top, so to get
// back to Leptonica coords in RemoveUnusedLineSegments, we have to
// use height - box.right() as the top, which looks very odd.
TBOX new_box(height - bbox.top(), bbox.left(),
height - bbox.bottom(), bbox.right());
bblob->set_bounding_box(new_box);
} else {
TBOX new_box(bbox.left(), height - bbox.top(),
bbox.right(), height - bbox.bottom());
bblob->set_bounding_box(new_box);
}
}
}
} // namespace tesseract.