forked from mozilla/DeepSpeech
-
Notifications
You must be signed in to change notification settings - Fork 0
/
alphabet.h
83 lines (73 loc) · 1.96 KB
/
alphabet.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#ifndef ALPHABET_H
#define ALPHABET_H
#include <cassert>
#include <fstream>
#include <iostream>
#include <string>
#include <unordered_map>
#include <vector>
/*
* Loads a text file describing a mapping of labels to strings, one string per
* line. This is used by the decoder, client and Python scripts to convert the
* output of the decoder to a human-readable string and vice-versa.
*/
class Alphabet {
public:
Alphabet(const char *config_file) {
std::ifstream in(config_file, std::ios::in);
unsigned int label = 0;
space_label_ = -2;
for (std::string line; std::getline(in, line);) {
if (line.size() == 2 && line[0] == '\\' && line[1] == '#') {
line = '#';
} else if (line[0] == '#') {
continue;
}
//TODO: we should probably do something more i18n-aware here
if (line == " ") {
space_label_ = label;
}
label_to_str_.push_back(line);
str_to_label_[line] = label;
++label;
}
size_ = label;
in.close();
}
const std::string& StringFromLabel(unsigned int label) const {
assert(label < size_);
return label_to_str_[label];
}
unsigned int LabelFromString(const std::string& string) const {
auto it = str_to_label_.find(string);
if (it != str_to_label_.end()) {
return it->second;
} else {
std::cerr << "Invalid label " << string << std::endl;
abort();
}
}
size_t GetSize() const {
return size_;
}
bool IsSpace(unsigned int label) const {
return label == space_label_;
}
unsigned int GetSpaceLabel() const {
return space_label_;
}
template <typename T>
std::string LabelsToString(const std::vector<T>& input) const {
std::string word;
for (auto ind : input) {
word += StringFromLabel(ind);
}
return word;
}
private:
size_t size_;
unsigned int space_label_;
std::vector<std::string> label_to_str_;
std::unordered_map<std::string, unsigned int> str_to_label_;
};
#endif //ALPHABET_H