-
Notifications
You must be signed in to change notification settings - Fork 9.7k
/
Copy pathnormalis.h
317 lines (298 loc) · 14.2 KB
/
normalis.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
/**********************************************************************
* File: normalis.h (Formerly denorm.h)
* Description: Code for the DENORM class.
* Author: Ray Smith
* Created: Thu Apr 23 09:22:43 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef NORMALIS_H
#define NORMALIS_H
#include <stdio.h>
#include "genericvector.h"
#include "host.h"
const int kBlnCellHeight = 256; // Full-height for baseline normalization.
const int kBlnXHeight = 128; // x-height for baseline normalization.
const int kBlnBaselineOffset = 64; // offset for baseline normalization.
struct Pix;
class ROW; // Forward decl
class BLOCK;
class FCOORD;
struct TBLOB;
class TBOX;
struct TPOINT;
class UNICHARSET;
namespace tesseract {
// Possible normalization methods. Use NEGATIVE values as these also
// double up as markers for the last sub-classifier.
enum NormalizationMode {
NM_BASELINE = -3, // The original BL normalization mode.
NM_CHAR_ISOTROPIC = -2, // Character normalization but isotropic.
NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode.
};
} // namespace tesseract.
class DENORM {
public:
DENORM();
// Copying a DENORM is allowed.
DENORM(const DENORM &);
DENORM& operator=(const DENORM&);
~DENORM();
// Setup the normalization transformation parameters.
// The normalizations applied to a blob are as follows:
// 1. An optional block layout rotation that was applied during layout
// analysis to make the textlines horizontal.
// 2. A normalization transformation (LocalNormTransform):
// Subtract the "origin"
// Apply an x,y scaling.
// Apply an optional rotation.
// Add back a final translation.
// The origin is in the block-rotated space, and is usually something like
// the x-middle of the word at the baseline.
// 3. Zero or more further normalization transformations that are applied
// in sequence, with a similar pattern to the first normalization transform.
//
// A DENORM holds the parameters of a single normalization, and can execute
// both the LocalNormTransform (a forwards normalization), and the
// LocalDenormTransform which is an inverse transform or de-normalization.
// A DENORM may point to a predecessor DENORM, which is actually the earlier
// normalization, so the full normalization sequence involves executing all
// predecessors first and then the transform in "this".
// Let x be image co-ordinates and that we have normalization classes A, B, C
// where we first apply A then B then C to get normalized x':
// x' = CBAx
// Then the backwards (to original coordinates) would be:
// x = A^-1 B^-1 C^-1 x'
// and A = B->predecessor_ and B = C->predecessor_
// NormTransform executes all predecessors recursively, and then this.
// NormTransform would be used to transform an image-based feature to
// normalized space for use in a classifier
// DenormTransform inverts this and then all predecessors. It can be
// used to get back to the original image coordinates from normalized space.
// The LocalNormTransform member executes just the transformation
// in "this" without the layout rotation or any predecessors. It would be
// used to run each successive normalization, eg the word normalization,
// and later the character normalization.
// Arguments:
// block: if not NULL, then this is the first transformation, and
// block->re_rotation() needs to be used after the Denorm
// transformation to get back to the image coords.
// rotation: if not NULL, apply this rotation after translation to the
// origin and scaling. (Usually a classify rotation.)
// predecessor: if not NULL, then predecessor has been applied to the
// input space and needs to be undone to complete the inverse.
// The above pointers are not owned by this DENORM and are assumed to live
// longer than this denorm, except rotation, which is deep copied on input.
//
// x_origin: The x origin which will be mapped to final_xshift in the result.
// y_origin: The y origin which will be mapped to final_yshift in the result.
// Added to result of row->baseline(x) if not NULL.
//
// x_scale: scale factor for the x-coordinate.
// y_scale: scale factor for the y-coordinate. Ignored if segs is given.
// Note that these scale factors apply to the same x and y system as the
// x-origin and y-origin apply, ie after any block rotation, but before
// the rotation argument is applied.
//
// final_xshift: The x component of the final translation.
// final_yshift: The y component of the final translation.
//
// In theory, any of the commonly used normalizations can be setup here:
// * Traditional baseline normalization on a word:
// SetupNormalization(block, NULL, NULL,
// box.x_middle(), baseline,
// kBlnXHeight / x_height, kBlnXHeight / x_height,
// 0, kBlnBaselineOffset);
// * "Numeric mode" baseline normalization on a word, in which the blobs
// are positioned with the bottom as the baseline is achieved by making
// a separate DENORM for each blob.
// SetupNormalization(block, NULL, NULL,
// box.x_middle(), box.bottom(),
// kBlnXHeight / x_height, kBlnXHeight / x_height,
// 0, kBlnBaselineOffset);
// * Anisotropic character normalization used by IntFx.
// SetupNormalization(NULL, NULL, denorm,
// centroid_x, centroid_y,
// 51.2 / ry, 51.2 / rx, 128, 128);
// * Normalize blob height to x-height (current OSD):
// SetupNormalization(NULL, &rotation, NULL,
// box.rotational_x_middle(rotation),
// box.rotational_y_middle(rotation),
// kBlnXHeight / box.rotational_height(rotation),
// kBlnXHeight / box.rotational_height(rotation),
// 0, kBlnBaselineOffset);
// * Secondary normalization for classification rotation (current):
// FCOORD rotation = block->classify_rotation();
// float target_height = kBlnXHeight / CCStruct::kXHeightCapRatio;
// SetupNormalization(NULL, &rotation, denorm,
// box.rotational_x_middle(rotation),
// box.rotational_y_middle(rotation),
// target_height / box.rotational_height(rotation),
// target_height / box.rotational_height(rotation),
// 0, kBlnBaselineOffset);
// * Proposed new normalizations for CJK: Between them there is then
// no need for further normalization at all, and the character fills the cell.
// ** Replacement for baseline normalization on a word:
// Scales height and width independently so that modal height and pitch
// fill the cell respectively.
// float cap_height = x_height / CCStruct::kXHeightCapRatio;
// SetupNormalization(block, NULL, NULL,
// box.x_middle(), cap_height / 2.0f,
// kBlnCellHeight / fixed_pitch,
// kBlnCellHeight / cap_height,
// 0, 0);
// ** Secondary normalization for classification (with rotation) (proposed):
// Requires a simple translation to the center of the appropriate character
// cell, no further scaling and a simple rotation (or nothing) about the
// cell center.
// FCOORD rotation = block->classify_rotation();
// SetupNormalization(NULL, &rotation, denorm,
// fixed_pitch_cell_center,
// 0.0f,
// 1.0f,
// 1.0f,
// 0, 0);
void SetupNormalization(const BLOCK* block,
const FCOORD* rotation,
const DENORM* predecessor,
float x_origin, float y_origin,
float x_scale, float y_scale,
float final_xshift, float final_yshift);
// Sets up the DENORM to execute a non-linear transformation based on
// preserving an even distribution of stroke edges. The transformation
// operates only within the given box, scaling input coords within the box
// non-linearly to a box of target_width by target_height, with all other
// coords being clipped to the box edge. As with SetupNormalization above,
// final_xshift and final_yshift are applied after scaling, and the bottom-
// left of box is used as a pre-scaling origin.
// x_coords is a collection of the x-coords of vertical edges for each
// y-coord starting at box.bottom().
// y_coords is a collection of the y-coords of horizontal edges for each
// x-coord starting at box.left().
// Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
// Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
// The second-level vectors must all be sorted in ascending order.
void SetupNonLinear(const DENORM* predecessor, const TBOX& box,
float target_width, float target_height,
float final_xshift, float final_yshift,
const GenericVector<GenericVector<int> >& x_coords,
const GenericVector<GenericVector<int> >& y_coords);
// Transforms the given coords one step forward to normalized space, without
// using any block rotation or predecessor.
void LocalNormTransform(const TPOINT& pt, TPOINT* transformed) const;
void LocalNormTransform(const FCOORD& pt, FCOORD* transformed) const;
// Transforms the given coords forward to normalized space using the
// full transformation sequence defined by the block rotation, the
// predecessors, deepest first, and finally this. If first_norm is not NULL,
// then the first and deepest transformation used is first_norm, ending
// with this, and the block rotation will not be applied.
void NormTransform(const DENORM* first_norm, const TPOINT& pt,
TPOINT* transformed) const;
void NormTransform(const DENORM* first_norm, const FCOORD& pt,
FCOORD* transformed) const;
// Transforms the given coords one step back to source space, without
// using to any block rotation or predecessor.
void LocalDenormTransform(const TPOINT& pt, TPOINT* original) const;
void LocalDenormTransform(const FCOORD& pt, FCOORD* original) const;
// Transforms the given coords all the way back to source image space using
// the full transformation sequence defined by this and its predecesors
// recursively, shallowest first, and finally any block re_rotation.
// If last_denorm is not NULL, then the last transformation used will
// be last_denorm, and the block re_rotation will never be executed.
void DenormTransform(const DENORM* last_denorm, const TPOINT& pt,
TPOINT* original) const;
void DenormTransform(const DENORM* last_denorm, const FCOORD& pt,
FCOORD* original) const;
// Normalize a blob using blob transformations. Less accurate, but
// more accurately copies the old way.
void LocalNormBlob(TBLOB* blob) const;
// Fills in the x-height range accepted by the given unichar_id in blob
// coordinates, given its bounding box in the usual baseline-normalized
// coordinates, with some initial crude x-height estimate (such as word
// size) and this denoting the transformation that was used.
// Also returns the amount the character must have shifted up or down.
void XHeightRange(int unichar_id, const UNICHARSET& unicharset,
const TBOX& bbox,
float* min_xht,
float* max_xht,
float* yshift) const;
// Prints the content of the DENORM for debug purposes.
void Print() const;
Pix* pix() const {
return pix_;
}
void set_pix(Pix* pix) {
pix_ = pix;
}
bool inverse() const {
return inverse_;
}
void set_inverse(bool value) {
inverse_ = value;
}
const DENORM* RootDenorm() const {
if (predecessor_ != NULL)
return predecessor_->RootDenorm();
return this;
}
const DENORM* predecessor() const {
return predecessor_;
}
// Accessors - perhaps should not be needed.
float x_scale() const {
return x_scale_;
}
float y_scale() const {
return y_scale_;
}
const BLOCK* block() const {
return block_;
}
void set_block(const BLOCK* block) {
block_ = block;
}
private:
// Free allocated memory and clear pointers.
void Clear();
// Setup default values.
void Init();
// Best available image.
Pix* pix_;
// True if the source image is white-on-black.
bool inverse_;
// Block the word came from. If not null, block->re_rotation() takes the
// "untransformed" coordinates even further back to the original image.
// Used only on the first DENORM in a chain.
const BLOCK* block_;
// Rotation to apply between translation to the origin and scaling.
const FCOORD* rotation_;
// Previous transformation in a chain.
const DENORM* predecessor_;
// Non-linear transformation maps directly from each integer offset from the
// origin to the corresponding x-coord. Owned by the DENORM.
GenericVector<float>* x_map_;
// Non-linear transformation maps directly from each integer offset from the
// origin to the corresponding y-coord. Owned by the DENORM.
GenericVector<float>* y_map_;
// x-coordinate to be mapped to final_xshift_ in the result.
float x_origin_;
// y-coordinate to be mapped to final_yshift_ in the result.
float y_origin_;
// Scale factors for x and y coords. Applied to pre-rotation system.
float x_scale_;
float y_scale_;
// Destination coords of the x_origin_ and y_origin_.
float final_xshift_;
float final_yshift_;
};
#endif