-
Notifications
You must be signed in to change notification settings - Fork 9.7k
/
Copy pathparams_training_featdef.h
149 lines (134 loc) · 5.78 KB
/
params_training_featdef.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
///////////////////////////////////////////////////////////////////////
// File: params_training_featdef.h
// Description: Feature definitions for params training.
// Author: Rika Antonova
// Created: Mon Nov 28 11:26:42 PDT 2011
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_WORDREC_PARAMS_TRAINING_FEATDEF_H_
#define TESSERACT_WORDREC_PARAMS_TRAINING_FEATDEF_H_
#include "genericvector.h"
#include "strngs.h"
namespace tesseract {
// Maximum number of unichars in the small and medium sized words
static const int kMaxSmallWordUnichars = 3;
static const int kMaxMediumWordUnichars = 6;
// Raw features extracted from a single OCR hypothesis.
// The features are normalized (by outline length or number of unichars as
// appropriate) real-valued quantities with unbounded range and
// unknown distribution.
// Normalization / binarization of these features is done at a later stage.
// Note: when adding new fields to this enum make sure to modify
// kParamsTrainingFeatureTypeName
enum kParamsTrainingFeatureType {
// Digits
PTRAIN_DIGITS_SHORT, // 0
PTRAIN_DIGITS_MED, // 1
PTRAIN_DIGITS_LONG, // 2
// Number or pattern (NUMBER_PERM, USER_PATTERN_PERM)
PTRAIN_NUM_SHORT, // 3
PTRAIN_NUM_MED, // 4
PTRAIN_NUM_LONG, // 5
// Document word (DOC_DAWG_PERM)
PTRAIN_DOC_SHORT, // 6
PTRAIN_DOC_MED, // 7
PTRAIN_DOC_LONG, // 8
// Word (SYSTEM_DAWG_PERM, USER_DAWG_PERM, COMPOUND_PERM)
PTRAIN_DICT_SHORT, // 9
PTRAIN_DICT_MED, // 10
PTRAIN_DICT_LONG, // 11
// Frequent word (FREQ_DAWG_PERM)
PTRAIN_FREQ_SHORT, // 12
PTRAIN_FREQ_MED, // 13
PTRAIN_FREQ_LONG, // 14
PTRAIN_SHAPE_COST_PER_CHAR, // 15
PTRAIN_NGRAM_COST_PER_CHAR, // 16
PTRAIN_NUM_BAD_PUNC, // 17
PTRAIN_NUM_BAD_CASE, // 18
PTRAIN_XHEIGHT_CONSISTENCY, // 19
PTRAIN_NUM_BAD_CHAR_TYPE, // 20
PTRAIN_NUM_BAD_SPACING, // 21
PTRAIN_NUM_BAD_FONT, // 22
PTRAIN_RATING_PER_CHAR, // 23
PTRAIN_NUM_FEATURE_TYPES
};
static const char * const kParamsTrainingFeatureTypeName[] = {
"PTRAIN_DIGITS_SHORT", // 0
"PTRAIN_DIGITS_MED", // 1
"PTRAIN_DIGITS_LONG", // 2
"PTRAIN_NUM_SHORT", // 3
"PTRAIN_NUM_MED", // 4
"PTRAIN_NUM_LONG", // 5
"PTRAIN_DOC_SHORT", // 6
"PTRAIN_DOC_MED", // 7
"PTRAIN_DOC_LONG", // 8
"PTRAIN_DICT_SHORT", // 9
"PTRAIN_DICT_MED", // 10
"PTRAIN_DICT_LONG", // 11
"PTRAIN_FREQ_SHORT", // 12
"PTRAIN_FREQ_MED", // 13
"PTRAIN_FREQ_LONG", // 14
"PTRAIN_SHAPE_COST_PER_CHAR", // 15
"PTRAIN_NGRAM_COST_PER_CHAR", // 16
"PTRAIN_NUM_BAD_PUNC", // 17
"PTRAIN_NUM_BAD_CASE", // 18
"PTRAIN_XHEIGHT_CONSISTENCY", // 19
"PTRAIN_NUM_BAD_CHAR_TYPE", // 20
"PTRAIN_NUM_BAD_SPACING", // 21
"PTRAIN_NUM_BAD_FONT", // 22
"PTRAIN_RATING_PER_CHAR", // 23
};
// Returns the index of the given feature (by name),
// or -1 meaning the feature is unknown.
int ParamsTrainingFeatureByName(const char *name);
// Entry with features extracted from a single OCR hypothesis for a word.
struct ParamsTrainingHypothesis {
ParamsTrainingHypothesis() : cost(0.0) {
memset(features, 0, sizeof(float) * PTRAIN_NUM_FEATURE_TYPES);
}
ParamsTrainingHypothesis(const ParamsTrainingHypothesis &other) {
memcpy(features, other.features,
sizeof(float) * PTRAIN_NUM_FEATURE_TYPES);
str = other.str;
cost = other.cost;
}
float features[PTRAIN_NUM_FEATURE_TYPES];
STRING str; // string corresponding to word hypothesis (for debugging)
float cost; // path cost computed by segsearch
};
// A list of hypotheses explored during one run of segmentation search.
typedef GenericVector<ParamsTrainingHypothesis> ParamsTrainingHypothesisList;
// A bundle that accumulates all of the hypothesis lists explored during all
// of the runs of segmentation search on a word (e.g. a list of hypotheses
// explored on PASS1, PASS2, fix xheight pass, etc).
class ParamsTrainingBundle {
public:
ParamsTrainingBundle() {};
// Starts a new hypothesis list.
// Should be called at the beginning of a new run of the segmentation search.
void StartHypothesisList() {
hyp_list_vec.push_back(ParamsTrainingHypothesisList());
}
// Adds a new ParamsTrainingHypothesis to the current hypothesis list
// and returns the reference to the newly added entry.
ParamsTrainingHypothesis &AddHypothesis(
const ParamsTrainingHypothesis &other) {
if (hyp_list_vec.empty()) StartHypothesisList();
hyp_list_vec.back().push_back(ParamsTrainingHypothesis(other));
return hyp_list_vec.back().back();
}
GenericVector<ParamsTrainingHypothesisList> hyp_list_vec;
};
} // namespace tesseract
#endif // TESSERACT_WORDREC_PARAMS_TRAINING_FEATDEF_H_