Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow compilation with Visual Studio; add flag for output file instead of stdout; document flags in usage arg; allow forced align to produce same output format as train+align. #34

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Open
Prev Previous commit
Next Next commit
Fix doc msg for forced align; add switch to only dump alignments when
doing forced alignment (to make output format identical to train + align
output)
  • Loading branch information
anthonyaue committed Oct 25, 2018
commit f06ccf2b4f55f30e765ef3a40549a696151ab2c4
45 changes: 33 additions & 12 deletions src/fast_align.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ string conditional_probability_filename = "";
string input_model_file = "";
double mean_srclen_multiplier = 1.0;
int is_reverse = 0;
int print_alignments_only = 0;
int ITERATIONS = 5;
int favor_diagonal = 0;
double beam_threshold = -4.0;
Expand Down Expand Up @@ -101,16 +102,18 @@ struct option options[] = {
{"thread_buffer_size", required_argument, 0, 'b'},
{"output_file", required_argument, 0, 'O'},
{"num_threads", required_argument, 0, 'n'},
{"print_alignments_only",no_argument, &print_alignments_only,'A'},
{0,0,0,0}
};

bool InitCommandLine(int argc, char** argv) {
while (1) {
int oi;
int c = getopt_long(argc, argv, "i:rI:df:m:t:q:T:ova:Np:b:sO:n:", options, &oi);
int c = getopt_long(argc, argv, "i:rI:df:m:t:q:T:ova:Np:b:sO:n:A", options, &oi);
if (c == -1) break;
cerr << "ARG=" << (char)c << endl;
switch(c) {
case 'A': print_alignments_only = 1; break;
case 'i': input = optarg; break;
case 'r': is_reverse = 1; break;
case 'I': ITERATIONS = atoi(optarg); break;
Expand Down Expand Up @@ -316,7 +319,7 @@ void InitialPass(const unsigned kNULL, const bool use_null, TTable* s2t,

int main(int argc, char** argv) {
if (!InitCommandLine(argc, argv)) {
cerr << "Usage: " << argv[0] << " -i file.fr-en\n"
cerr << "Usage: " << argv[0] << " -i file.fr-en\n"
<< " Standard options ([USE] = strongly recommended):\n"
<< " -i: [REQ] Input parallel corpus\n"
<< " -v: [USE] Use Dirichlet prior on lexical translation distributions\n"
Expand All @@ -332,7 +335,13 @@ int main(int argc, char** argv) {
<< " -N: No null word\n"
<< " -a: alpha parameter for optional Dirichlet prior (default = 0.01)\n"
<< " -T: starting lambda for diagonal distance parameter (default = 4)\n"
<< " -s: print alignment scores (alignment ||| score, disabled by default)\n";
<< " -s: print alignment scores (alignment ||| score, disabled by default)\n"
<< " -f: force align, using specified input probability table (obtained via training with -p switch)\n"
<< " -A: print alignments only (only applies to forced align, where default is to dump src|||tgt|||align|||p(align)"
<< " -m: set mean source length multiplier\n"
<< " -t: set beam threshold\n"
<< " -a: set alpha parameter\n"
<< " -b: set thread buffer size\n";
return 1;
}
const bool use_null = !no_null_word;
Expand Down Expand Up @@ -462,17 +471,21 @@ int main(int argc, char** argv) {
while(getline(in, line)) {
++lc;
ParseLine(line, &src, &trg);
for (auto s : src) cout << d.Convert(s) << ' ';
cout << "|||";
for (auto t : trg) cout << ' ' << d.Convert(t);
cout << " |||";
if (!print_alignments_only)
{
for (auto s : src) *outputStream << d.Convert(s) << ' ';
*outputStream << "|||";
for (auto t : trg) *outputStream << ' ' << d.Convert(t);
*outputStream << " |||";
}
if (is_reverse)
swap(src, trg);
if (src.size() == 0 || trg.size() == 0) {
cerr << "Error in line " << lc << endl;
return 1;
}
double log_prob = Md::log_poisson(trg.size(), 0.05 + src.size() * mean_srclen_multiplier);
bool first = true;

// compute likelihood
for (unsigned j = 0; j < trg.size(); ++j) {
Expand All @@ -499,16 +512,24 @@ int main(int argc, char** argv) {
log_prob += log(sum);
if (true) {
if (a_j > 0) {
cout << ' ';
if (is_reverse)
cout << j << '-' << (a_j - 1);
if (!first) {
*outputStream << ' ';
}
if (is_reverse) {
*outputStream << j << '-' << (a_j - 1);
}
else
cout << (a_j - 1) << '-' << j;
*outputStream << (a_j - 1) << '-' << j;
first = false;
}
}
}
tlp += log_prob;
cout << " ||| " << log_prob << endl << flush;
if(!print_alignments_only)
{
*outputStream << " ||| " << log_prob;
}
*outputStream << endl << flush;
} // loop over test set sentences
cerr << "TOTAL LOG PROB " << tlp << endl;
}
Expand Down