-
Notifications
You must be signed in to change notification settings - Fork 9.7k
/
Copy pathindexmapbidi.cpp
250 lines (231 loc) · 8.88 KB
/
indexmapbidi.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
///////////////////////////////////////////////////////////////////////
// File: indexmapbidi.cpp
// Description: Bi-directional mapping between a sparse and compact space.
// Author: rays@google.com (Ray Smith)
// Created: Tue Apr 06 11:33:59 PDT 2010
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "indexmapbidi.h"
namespace tesseract {
// SparseToCompact takes a sparse index to an index in the compact space.
// Uses a binary search to find the result. For faster speed use
// IndexMapBiDi, but that takes more memory.
int IndexMap::SparseToCompact(int sparse_index) const {
int result = compact_map_.binary_search(sparse_index);
return compact_map_[result] == sparse_index ? result : -1;
}
// Copy from the input.
void IndexMap::CopyFrom(const IndexMap& src) {
sparse_size_ = src.sparse_size_;
compact_map_ = src.compact_map_;
}
void IndexMap::CopyFrom(const IndexMapBiDi& src) {
sparse_size_ = src.SparseSize();
compact_map_ = src.compact_map_;
}
// Writes to the given file. Returns false in case of error.
bool IndexMap::Serialize(FILE* fp) const {
inT32 sparse_size = sparse_size_;
if (fwrite(&sparse_size, sizeof(sparse_size), 1, fp) != 1) return false;
if (!compact_map_.Serialize(fp)) return false;
return true;
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool IndexMap::DeSerialize(bool swap, FILE* fp) {
inT32 sparse_size;
if (fread(&sparse_size, sizeof(sparse_size), 1, fp) != 1) return false;
if (swap)
ReverseN(&sparse_size, sizeof(sparse_size));
sparse_size_ = sparse_size;
if (!compact_map_.DeSerialize(swap, fp)) return false;
return true;
}
// Top-level init function in a single call to initialize a map to select
// a single contiguous subrange [start, end) of the sparse space to be mapped
// 1 to 1 to the compact space, with all other elements of the sparse space
// left unmapped.
// No need to call Setup after this.
void IndexMapBiDi::InitAndSetupRange(int sparse_size, int start, int end) {
Init(sparse_size, false);
for (int i = start; i < end; ++i)
SetMap(i, true);
Setup();
}
// Initializes just the sparse_map_ to the given size with either all
// forward indices mapped (all_mapped = true) or none (all_mapped = false).
// Call Setup immediately after, or make calls to SetMap first to adjust the
// mapping and then call Setup before using the map.
void IndexMapBiDi::Init(int size, bool all_mapped) {
sparse_map_.init_to_size(size, -1);
if (all_mapped) {
for (int i = 0; i < size; ++i)
sparse_map_[i] = i;
}
}
// Sets a given index in the sparse_map_ to be mapped or not.
void IndexMapBiDi::SetMap(int sparse_index, bool mapped) {
sparse_map_[sparse_index] = mapped ? 0 : -1;
}
// Sets up the sparse_map_ and compact_map_ properly after Init and
// some calls to SetMap. Assumes an ordered 1-1 map from set indices
// in the forward map to the compact space.
void IndexMapBiDi::Setup() {
int compact_size = 0;
for (int i = 0; i < sparse_map_.size(); ++i) {
if (sparse_map_[i] >= 0) {
sparse_map_[i] = compact_size++;
}
}
compact_map_.init_to_size(compact_size, -1);
for (int i = 0; i < sparse_map_.size(); ++i) {
if (sparse_map_[i] >= 0) {
compact_map_[sparse_map_[i]] = i;
}
}
sparse_size_ = sparse_map_.size();
}
// Copy from the input.
void IndexMapBiDi::CopyFrom(const IndexMapBiDi& src) {
sparse_map_ = src.sparse_map_;
compact_map_ = src.compact_map_;
sparse_size_ = sparse_map_.size();
}
// Merges the two compact space indices. May be called many times, but
// the merges must be concluded by a call to CompleteMerges.
// Returns true if a merge was actually performed.
bool IndexMapBiDi::Merge(int compact_index1, int compact_index2) {
// Find the current master index for index1 and index2.
compact_index1 = MasterCompactIndex(compact_index1);
compact_index2 = MasterCompactIndex(compact_index2);
// Be sure that index1 < index2.
if (compact_index1 > compact_index2) {
int tmp = compact_index1;
compact_index1 = compact_index2;
compact_index2 = tmp;
} else if (compact_index1 == compact_index2) {
return false;
}
// To save iterating over all sparse_map_ entries, simply make the master
// entry for index2 point to index1.
// This leaves behind a potential chain of parents that needs to be chased,
// as above.
sparse_map_[compact_map_[compact_index2]] = compact_index1;
if (compact_index1 >= 0)
compact_map_[compact_index2] = compact_map_[compact_index1];
return true;
}
// Completes one or more Merge operations by further compacting the
// compact space. Unused compact space indices are removed, and the used
// ones above shuffled down to fill the gaps.
// Example:
// Input sparse_map_: (x indicates -1)
// x x 0 x 2 x x 4 x 0 x 2 x
// Output sparse_map_:
// x x 0 x 1 x x 2 x 0 x 1 x
// Output compact_map_:
// 2 4 7.
void IndexMapBiDi::CompleteMerges() {
// Ensure each sparse_map_entry contains a master compact_map_ index.
int compact_size = 0;
for (int i = 0; i < sparse_map_.size(); ++i) {
int compact_index = MasterCompactIndex(sparse_map_[i]);
sparse_map_[i] = compact_index;
if (compact_index >= compact_size)
compact_size = compact_index + 1;
}
// Re-generate the compact_map leaving holes for unused indices.
compact_map_.init_to_size(compact_size, -1);
for (int i = 0; i < sparse_map_.size(); ++i) {
if (sparse_map_[i] >= 0) {
if (compact_map_[sparse_map_[i]] == -1)
compact_map_[sparse_map_[i]] = i;
}
}
// Compact the compact_map, leaving tmp_compact_map saying where each
// index went to in the compacted map.
GenericVector<inT32> tmp_compact_map;
tmp_compact_map.init_to_size(compact_size, -1);
compact_size = 0;
for (int i = 0; i < compact_map_.size(); ++i) {
if (compact_map_[i] >= 0) {
tmp_compact_map[i] = compact_size;
compact_map_[compact_size++] = compact_map_[i];
}
}
compact_map_.truncate(compact_size);
// Now modify the entries in the sparse map to point to the new locations.
for (int i = 0; i < sparse_map_.size(); ++i) {
if (sparse_map_[i] >= 0) {
sparse_map_[i] = tmp_compact_map[sparse_map_[i]];
}
}
}
// Writes to the given file. Returns false in case of error.
bool IndexMapBiDi::Serialize(FILE* fp) const {
if (!IndexMap::Serialize(fp)) return false;
// Make a vector containing the rest of the map. If the map is many-to-one
// then each additional sparse entry needs to be stored.
// Normally we store only the compact map to save space.
GenericVector<inT32> remaining_pairs;
for (int i = 0; i < sparse_map_.size(); ++i) {
if (sparse_map_[i] >= 0 && compact_map_[sparse_map_[i]] != i) {
remaining_pairs.push_back(i);
remaining_pairs.push_back(sparse_map_[i]);
}
}
if (!remaining_pairs.Serialize(fp)) return false;
return true;
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool IndexMapBiDi::DeSerialize(bool swap, FILE* fp) {
if (!IndexMap::DeSerialize(swap, fp)) return false;
GenericVector<inT32> remaining_pairs;
if (!remaining_pairs.DeSerialize(swap, fp)) return false;
sparse_map_.init_to_size(sparse_size_, -1);
for (int i = 0; i < compact_map_.size(); ++i) {
sparse_map_[compact_map_[i]] = i;
}
for (int i = 0; i < remaining_pairs.size(); ++i) {
int sparse_index = remaining_pairs[i++];
sparse_map_[sparse_index] = remaining_pairs[i];
}
return true;
}
// Bulk calls to SparseToCompact.
// Maps the given array of sparse indices to an array of compact indices.
// Assumes the input is sorted. The output indices are sorted and uniqued.
// Return value is the number of "missed" features, being features that
// don't map to the compact feature space.
int IndexMapBiDi::MapFeatures(const GenericVector<int>& sparse,
GenericVector<int>* compact) const {
compact->truncate(0);
int num_features = sparse.size();
int missed_features = 0;
int prev_good_feature = -1;
for (int f = 0; f < num_features; ++f) {
int feature = sparse_map_[sparse[f]];
if (feature >= 0) {
if (feature != prev_good_feature) {
compact->push_back(feature);
prev_good_feature = feature;
}
} else {
++missed_features;
}
}
return missed_features;
}
} // namespace tesseract.