forked from arthenica/tesseract
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmatrix.cpp
170 lines (160 loc) · 5.6 KB
/
matrix.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
/******************************************************************************
*
* File: matrix.cpp (Formerly matrix.c)
* Description: Ratings matrix code. (Used by associator)
* Author: Mark Seaman, OCR Technology
*
* (c) Copyright 1990, Hewlett-Packard Company.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
*****************************************************************************/
/*----------------------------------------------------------------------
I n c l u d e s
----------------------------------------------------------------------*/
#include "matrix.h"
#include "ratngs.h"
#include "tprintf.h"
#include "unicharset.h"
namespace tesseract {
// Destructor.
// It is defined here, so the compiler can create a single vtable
// instead of weak vtables in every compilation unit.
MATRIX::~MATRIX() = default;
// Returns true if there are any real classification results.
bool MATRIX::Classified(int col, int row, int wildcard_id) const {
if (get(col, row) == NOT_CLASSIFIED) {
return false;
}
BLOB_CHOICE_IT b_it(get(col, row));
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
BLOB_CHOICE *choice = b_it.data();
if (choice->IsClassified()) {
return true;
}
}
return false;
}
// Expands the existing matrix in-place to make the band wider, without
// losing any existing data.
void MATRIX::IncreaseBandSize(int bandwidth) {
ResizeWithCopy(dimension(), bandwidth);
}
// Returns a bigger MATRIX with a new column and row in the matrix in order
// to split the blob at the given (ind,ind) diagonal location.
// Entries are relocated to the new MATRIX using the transformation defined
// by MATRIX_COORD::MapForSplit.
// Transfers the pointer data to the new MATRIX and deletes *this.
MATRIX *MATRIX::ConsumeAndMakeBigger(int ind) {
int dim = dimension();
int band_width = bandwidth();
// Check to see if bandwidth needs expanding.
for (int col = ind; col >= 0 && col > ind - band_width; --col) {
if (array_[col * band_width + band_width - 1] != empty_) {
++band_width;
break;
}
}
auto *result = new MATRIX(dim + 1, band_width);
for (int col = 0; col < dim; ++col) {
for (int row = col; row < dim && row < col + bandwidth(); ++row) {
MATRIX_COORD coord(col, row);
coord.MapForSplit(ind);
BLOB_CHOICE_LIST *choices = get(col, row);
if (choices != nullptr) {
// Correct matrix location on each choice.
BLOB_CHOICE_IT bc_it(choices);
for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
BLOB_CHOICE *choice = bc_it.data();
choice->set_matrix_cell(coord.col, coord.row);
}
ASSERT_HOST(coord.Valid(*result));
result->put(coord.col, coord.row, choices);
}
}
}
delete this;
return result;
}
// Makes and returns a deep copy of *this, including all the BLOB_CHOICEs
// on the lists, but not any LanguageModelState that may be attached to the
// BLOB_CHOICEs.
MATRIX *MATRIX::DeepCopy() const {
int dim = dimension();
int band_width = bandwidth();
auto *result = new MATRIX(dim, band_width);
for (int col = 0; col < dim; ++col) {
for (int row = col; row < dim && row < col + band_width; ++row) {
BLOB_CHOICE_LIST *choices = get(col, row);
if (choices != nullptr) {
auto *copy_choices = new BLOB_CHOICE_LIST;
copy_choices->deep_copy(choices, &BLOB_CHOICE::deep_copy);
result->put(col, row, copy_choices);
}
}
}
return result;
}
// Print the best guesses out of the match rating matrix.
void MATRIX::print(const UNICHARSET &unicharset) const {
tprintf("Ratings Matrix (top 3 choices)\n");
int dim = dimension();
int band_width = bandwidth();
int row, col;
for (col = 0; col < dim; ++col) {
for (row = col; row < dim && row < col + band_width; ++row) {
BLOB_CHOICE_LIST *rating = this->get(col, row);
if (rating == NOT_CLASSIFIED) {
continue;
}
BLOB_CHOICE_IT b_it(rating);
tprintf("col=%d row=%d ", col, row);
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
tprintf("%s rat=%g cert=%g ", unicharset.id_to_unichar(b_it.data()->unichar_id()),
b_it.data()->rating(), b_it.data()->certainty());
}
tprintf("\n");
}
tprintf("\n");
}
tprintf("\n");
for (col = 0; col < dim; ++col) {
tprintf("\t%d", col);
}
tprintf("\n");
for (row = 0; row < dim; ++row) {
for (col = 0; col <= row; ++col) {
if (col == 0) {
tprintf("%d\t", row);
}
if (row >= col + band_width) {
tprintf(" \t");
continue;
}
BLOB_CHOICE_LIST *rating = this->get(col, row);
if (rating != NOT_CLASSIFIED) {
BLOB_CHOICE_IT b_it(rating);
int counter = 0;
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
tprintf("%s ", unicharset.id_to_unichar(b_it.data()->unichar_id()));
++counter;
if (counter == 3) {
break;
}
}
tprintf("\t");
} else {
tprintf(" \t");
}
}
tprintf("\n");
}
}
} // namespace tesseract