@@ -26,85 +26,103 @@ namespace rime {
26
26
27
27
DictCompiler::DictCompiler (Dictionary *dictionary, const string& prefix)
28
28
: dict_name_(dictionary->name ()),
29
+ packs_(dictionary->packs ()),
29
30
prism_(dictionary->prism ()),
30
- table_ (dictionary->table ()),
31
+ tables_ (dictionary->tables ()),
31
32
prefix_(prefix) {
32
33
}
33
34
34
- static string LocateFile (const string& file_name) {
35
+ static string locate_file (const string& file_name) {
35
36
the<ResourceResolver> resolver (
36
37
Service::instance ().CreateResourceResolver ({" build_source" , " " , " " }));
37
38
return resolver->ResolvePath (file_name).string ();
38
39
}
39
40
41
+ static bool load_dict_settings_from_file (DictSettings* settings,
42
+ const string& dict_file) {
43
+ std::ifstream fin (dict_file.c_str ());
44
+ bool success = settings->LoadDictHeader (fin);
45
+ fin.close ();
46
+ return success;
47
+ }
48
+
49
+ static bool get_dict_files_from_settings (vector<string>* dict_files,
50
+ DictSettings& settings) {
51
+ if (auto tables = settings.GetTables ()) {
52
+ for (auto it = tables->begin (); it != tables->end (); ++it) {
53
+ string dict_name = As<ConfigValue>(*it)->str ();
54
+ string dict_file = locate_file (dict_name + " .dict.yaml" );
55
+ if (!boost::filesystem::exists (dict_file)) {
56
+ LOG (ERROR) << " source file '" << dict_file << " ' does not exist." ;
57
+ return false ;
58
+ }
59
+ dict_files->push_back (dict_file);
60
+ }
61
+ }
62
+ return true ;
63
+ }
64
+
65
+ static uint32_t compute_dict_file_checksum (uint32_t initial_checksum,
66
+ const vector<string>& dict_files,
67
+ DictSettings& settings) {
68
+ if (dict_files.empty ()) {
69
+ return initial_checksum;
70
+ }
71
+ ChecksumComputer cc (initial_checksum);
72
+ for (const auto & file_name : dict_files) {
73
+ cc.ProcessFile (file_name);
74
+ }
75
+ if (settings.use_preset_vocabulary ()) {
76
+ cc.ProcessFile (PresetVocabulary::DictFilePath (settings.vocabulary ()));
77
+ }
78
+ return cc.Checksum ();
79
+ }
80
+
40
81
bool DictCompiler::Compile (const string &schema_file) {
41
82
LOG (INFO) << " compiling dictionary for " << schema_file;
42
83
bool build_table_from_source = true ;
43
84
DictSettings settings;
44
- string dict_file = LocateFile (dict_name_ + " .dict.yaml" );
85
+ string dict_file = locate_file (dict_name_ + " .dict.yaml" );
45
86
if (!boost::filesystem::exists (dict_file)) {
46
87
LOG (ERROR) << " source file '" << dict_file << " ' does not exist." ;
47
88
build_table_from_source = false ;
48
89
}
49
- else {
50
- std::ifstream fin (dict_file.c_str ());
51
- if (!settings.LoadDictHeader (fin)) {
52
- LOG (ERROR) << " failed to load settings from '" << dict_file << " '." ;
53
- return false ;
54
- }
55
- fin.close ();
56
- LOG (INFO) << " dict name: " << settings.dict_name ();
57
- LOG (INFO) << " dict version: " << settings.dict_version ();
90
+ else if (!load_dict_settings_from_file (&settings, dict_file)) {
91
+ LOG (ERROR) << " failed to load settings from '" << dict_file << " '." ;
92
+ return false ;
58
93
}
59
94
vector<string> dict_files;
60
- auto tables = settings.GetTables ();
61
- for (auto it = tables->begin (); it != tables->end (); ++it) {
62
- if (!Is<ConfigValue>(*it))
63
- continue ;
64
- string dict_name = As<ConfigValue>(*it)->str ();
65
- string dict_file = LocateFile (dict_name + " .dict.yaml" );
66
- if (!boost::filesystem::exists (dict_file)) {
67
- LOG (ERROR) << " source file '" << dict_file << " ' does not exist." ;
68
- return false ;
69
- }
70
- dict_files.push_back (dict_file);
71
- }
72
- uint32_t dict_file_checksum = 0 ;
73
- if (!dict_files.empty ()) {
74
- ChecksumComputer cc;
75
- for (const auto & file_name : dict_files) {
76
- cc.ProcessFile (file_name);
77
- }
78
- if (settings.use_preset_vocabulary ()) {
79
- cc.ProcessFile (PresetVocabulary::DictFilePath (settings.vocabulary ()));
80
- }
81
- dict_file_checksum = cc.Checksum ();
95
+ if (!get_dict_files_from_settings (&dict_files, settings)) {
96
+ return false ;
82
97
}
98
+ uint32_t dict_file_checksum =
99
+ compute_dict_file_checksum (0 , dict_files, settings);
83
100
uint32_t schema_file_checksum =
84
101
schema_file.empty () ? 0 : Checksum (schema_file);
85
- bool rebuild_table = true ;
86
- bool rebuild_prism = true ;
87
- if (table_-> Exists () && table_-> Load ()) {
88
- if (!build_table_from_source ) {
89
- dict_file_checksum = table_-> dict_file_checksum ();
90
- LOG (INFO) << " reuse existing table: " << table_-> file_name () ;
91
- }
92
- if (table_-> dict_file_checksum () == dict_file_checksum) {
93
- rebuild_table = false ;
102
+ bool rebuild_table = false ;
103
+ bool rebuild_prism = false ;
104
+ const auto & primary_table = tables_[ 0 ];
105
+ if (primary_table-> Exists () && primary_table-> Load () ) {
106
+ if (build_table_from_source) {
107
+ rebuild_table = primary_table-> dict_file_checksum () != dict_file_checksum ;
108
+ } else {
109
+ dict_file_checksum = primary_table-> dict_file_checksum ();
110
+ LOG (INFO) << " reuse existing table: " << primary_table-> file_name () ;
94
111
}
95
- table_->Close ();
96
- }
97
- else if (!build_table_from_source) {
112
+ primary_table->Close ();
113
+ } else if (build_table_from_source) {
114
+ rebuild_table = true ;
115
+ } else {
98
116
LOG (ERROR) << " neither " << dict_name_ << " .dict.yaml nor "
99
117
<< dict_name_ << " .table.bin exists." ;
100
118
return false ;
101
119
}
102
120
if (prism_->Exists () && prism_->Load ()) {
103
- if (prism_->dict_file_checksum () == dict_file_checksum &&
104
- prism_->schema_file_checksum () == schema_file_checksum) {
105
- rebuild_prism = false ;
106
- }
121
+ rebuild_prism = prism_->dict_file_checksum () != dict_file_checksum ||
122
+ prism_->schema_file_checksum () != schema_file_checksum;
107
123
prism_->Close ();
124
+ } else {
125
+ rebuild_prism = true ;
108
126
}
109
127
LOG (INFO) << dict_file << " [" << dict_files.size () << " file(s)]"
110
128
<< " (" << dict_file_checksum << " )" ;
@@ -126,11 +144,55 @@ bool DictCompiler::Compile(const string &schema_file) {
126
144
if (options_ & kRebuildPrism ) {
127
145
rebuild_prism = true ;
128
146
}
129
- if (rebuild_table && !BuildTable (&settings, dict_files, dict_file_checksum))
130
- return false ;
131
- if (rebuild_prism && !BuildPrism (schema_file,
132
- dict_file_checksum, schema_file_checksum))
147
+ Syllabary syllabary;
148
+ if (rebuild_table) {
149
+ EntryCollector collector;
150
+ if (!BuildTable (0 ,
151
+ collector,
152
+ &settings,
153
+ dict_files,
154
+ dict_file_checksum)) {
155
+ return false ;
156
+ }
157
+ syllabary = std::move (collector.syllabary );
158
+ }
159
+ if (rebuild_prism &&
160
+ !BuildPrism (schema_file,
161
+ syllabary,
162
+ dict_file_checksum,
163
+ schema_file_checksum)) {
133
164
return false ;
165
+ }
166
+ if (rebuild_table) {
167
+ for (int table_index = 1 ; table_index < tables_.size (); ++table_index) {
168
+ const auto & pack_name = packs_[table_index - 1 ];
169
+ EntryCollector collector (std::move (syllabary));
170
+ DictSettings settings;
171
+ string dict_file = locate_file (pack_name + " .dict.yaml" );
172
+ if (!boost::filesystem::exists (dict_file)) {
173
+ LOG (ERROR) << " source file '" << dict_file << " ' does not exist." ;
174
+ continue ;
175
+ }
176
+ if (!load_dict_settings_from_file (&settings, dict_file)) {
177
+ LOG (ERROR) << " failed to load settings from '" << dict_file << " '." ;
178
+ continue ;
179
+ }
180
+ vector<string> dict_files;
181
+ if (!get_dict_files_from_settings (&dict_files, settings)) {
182
+ continue ;
183
+ }
184
+ uint32_t pack_file_checksum =
185
+ compute_dict_file_checksum (dict_file_checksum, dict_files, settings);
186
+ if (!BuildTable (table_index,
187
+ collector,
188
+ &settings,
189
+ dict_files,
190
+ pack_file_checksum)) {
191
+ LOG (ERROR) << " failed to build pack: " << pack_name;
192
+ }
193
+ syllabary = std::move (collector.syllabary );
194
+ }
195
+ }
134
196
// done!
135
197
return true ;
136
198
}
@@ -143,17 +205,20 @@ static string RelocateToUserDirectory(const string& prefix,
143
205
return resolver.ResolvePath (resource_id).string ();
144
206
}
145
207
146
- bool DictCompiler::BuildTable (DictSettings* settings,
208
+ bool DictCompiler::BuildTable (int table_index,
209
+ EntryCollector& collector,
210
+ DictSettings* settings,
147
211
const vector<string>& dict_files,
148
212
uint32_t dict_file_checksum) {
149
- LOG (INFO) << " building table..." ;
150
- table_ = New<Table>(RelocateToUserDirectory (prefix_, table_->file_name ()));
213
+ auto & table = tables_[table_index];
214
+ auto path = RelocateToUserDirectory (prefix_, table->file_name ());
215
+ LOG (INFO) << " building table: " << path;
216
+ table = New<Table>(path);
151
217
152
- EntryCollector collector;
153
218
collector.Configure (settings);
154
219
collector.Collect (dict_files);
155
220
if (options_ & kDump ) {
156
- boost::filesystem::path path (table_ ->file_name ());
221
+ boost::filesystem::path path (table ->file_name ());
157
222
path.replace_extension (" .txt" );
158
223
collector.Dump (path.string ());
159
224
}
@@ -184,16 +249,34 @@ bool DictCompiler::BuildTable(DictSettings* settings,
184
249
if (settings->sort_order () != " original" ) {
185
250
vocabulary.SortHomophones ();
186
251
}
187
- table_->Remove ();
188
- if (!table_->Build (collector.syllabary , vocabulary, collector.num_entries ,
189
- dict_file_checksum) ||
190
- !table_->Save ()) {
252
+ table->Remove ();
253
+ if (!table->Build (collector.syllabary ,
254
+ vocabulary,
255
+ collector.num_entries ,
256
+ dict_file_checksum) ||
257
+ !table->Save ()) {
191
258
return false ;
192
259
}
193
260
}
261
+ // build reverse db for the primary table
262
+ if (table_index == 0 &&
263
+ !BuildReverseDb (settings,
264
+ collector,
265
+ vocabulary,
266
+ dict_file_checksum)) {
267
+ return false ;
268
+ }
269
+ return true ;
270
+ }
271
+
272
+ bool DictCompiler::BuildReverseDb (DictSettings* settings,
273
+ const EntryCollector& collector,
274
+ const Vocabulary& vocabulary,
275
+ uint32_t dict_file_checksum) {
194
276
// build .reverse.bin
195
- ReverseDb reverse_db (RelocateToUserDirectory (prefix_,
196
- dict_name_ + " .reverse.bin" ));
277
+ auto path = RelocateToUserDirectory (prefix_,
278
+ dict_name_ + " .reverse.bin" );
279
+ ReverseDb reverse_db (path);
197
280
if (!reverse_db.Build (settings,
198
281
collector.syllabary ,
199
282
vocabulary,
@@ -206,15 +289,12 @@ bool DictCompiler::BuildTable(DictSettings* settings,
206
289
}
207
290
208
291
bool DictCompiler::BuildPrism (const string &schema_file,
292
+ const Syllabary& syllabary,
209
293
uint32_t dict_file_checksum,
210
294
uint32_t schema_file_checksum) {
211
295
LOG (INFO) << " building prism..." ;
212
296
prism_ = New<Prism>(RelocateToUserDirectory (prefix_, prism_->file_name ()));
213
297
214
- // get syllabary from table
215
- Syllabary syllabary;
216
- if (!table_->Load () || !table_->GetSyllabary (&syllabary) || syllabary.empty ())
217
- return false ;
218
298
// apply spelling algebra and prepare corrections (if enabled)
219
299
Script script;
220
300
if (!schema_file.empty ()) {
0 commit comments