Skip to content

Commit

Permalink
use boost program_options for command line arguments, and remove the …
Browse files Browse the repository at this point in the history
…example dir.
  • Loading branch information
Waleed Ammar committed Sep 11, 2013
1 parent 7ccc44c commit aac76a8
Show file tree
Hide file tree
Showing 151 changed files with 194 additions and 1,406,936 deletions.
5 changes: 2 additions & 3 deletions Makefile-latentCrfAligner
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
CC=mpiCC
SINGLE=-c
BEFORE=-x c++ -std=c++11
LIBS=-llbfgs -lfst -ldl -lboost_mpi -lboost_serialization -lboost_thread -lboost_system -lcmph
LIBS=-llbfgs -lfst -ldl -lboost_mpi -lboost_serialization -lboost_thread -lboost_system -lboost_program_options -lcmph
OPT=-O3 -g
INC=-I/usr/local/packages/gcc/4.7.2/include/c++/4.7.2/

all: train-latentCrfAligner

Expand All @@ -12,7 +11,7 @@ train-latentCrfAligner: train-latentCrfAligner.o
$(CC) train-latentCrfAligner.o IbmModel1.o FstUtils.o LatentCrfModel.o LatentCrfAligner.o LogLinearParams.o fdict.o simann.o random.o r250.o randgen.o registrar.o rndlcg.o erstream.o $(LIBS) -o train-latentCrfAligner

train-latentCrfAligner.o: IbmModel1.o LatentCrfModel.o LatentCrfAligner.o train-latentCrfAligner.cc ClustersComparer.h StringUtils.h LearningInfo.h
$(CC) $(BEFORE) $(SINGLE) train-latentCrfAligner.cc $(OPT)
$(CC) $(BEFORE) $(SINGLE) -I/opt/tools/boost_1_54_0/include/ train-latentCrfAligner.cc $(OPT)

LatentCrfAligner.o: LatentCrfModel.o LatentCrfAligner.h LatentCrfAligner.cc
$(CC) $(BEFORE) $(SINGLE) LatentCrfAligner.cc $(OPT)
Expand Down
17 changes: 0 additions & 17 deletions batch-files/example-duct.tape

This file was deleted.

10 changes: 10 additions & 0 deletions batch-files/wa-czen-fast_align.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

# train and align
~/cdec/word-aligner/fast_align -i data/$1.cz-en -d -v -o > batch-files/wa-czen-fast_align-dir/out.$1.labels

# compute aer
./data/czen-manual-alignments/eval-czen.pl \
./batch-files/wa-czen-fast_align-dir/out.$1.labels \
./data/czen-manual-alignments/czen.wal \
> batch-files/wa-czen-fast_align-dir/out.$1.aer
10 changes: 5 additions & 5 deletions batch-files/wa-czen-latentCrfAligner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@
make -f Makefile-latentCrfAligner

# use cdec to generate word pair features for a partiu7clar test set
#~/cdec/word-aligner/aligner.pl --mkcls=/mal0/tools/mosesdecoder/bin/mkcls example/10k.cz-en
#~/cdec/word-aligner/aligner.pl --mkcls=/mal0/tools/mosesdecoder/bin/mkcls data/10k.cz-en
#mv talign cz-en-talign
#cd cz-en-talign
#make
#cd ../
#mv cz-en-talign/grammars/wordpair-features example/cz-en-wordpair-features
#mv cz-en-talign/grammars/wordpair-features data/cz-en-wordpair-features

# the latent-CRF word alignment model
mpirun -np $2 ./train-latentCrfAligner \
example/10k.cz-en \
data/1k.cz-en \
none none \
batch-files/wa-czen-latentCrfAligner-dir/cz-en-talign/grammars/wordpairs.f-e.features \
batch-files/wa-czen-latentCrfAligner-dir/out.$1 \
Expand All @@ -24,7 +24,7 @@ mpirun -np $2 ./train-latentCrfAligner \
# batch-files/wa-czen-latentCrfAligner-dir/cz-en-talign/grammars/wordpairs.f-e.features.gz

# compute aer
./example/czen-manual-alignments/eval-czen.pl \
./data/czen-manual-alignments/eval-czen.pl \
./batch-files/wa-czen-latentCrfAligner-dir/out.$1.labels \
./example/czen-manual-alignments/czen.wal \
./data/czen-manual-alignments/czen.wal \
> batch-files/wa-czen-latentCrfAligner-dir/out.$1.aer
3 changes: 3 additions & 0 deletions ducttape-files/cdec.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
formalism=scfg
add_pass_through_rules=true
feature_function=WordPenalty
121 changes: 121 additions & 0 deletions ducttape-files/wa-czen-latentCrfAligner.tape
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@

task autoencoder_fwd_align :: test_sents_count=$test_sents_count prefix=$prefix procs=$procs data_file=$data_file dir=$fwd > fwd_align out_err="fwd-out.err" {
pushd /usr0/home/wammar/alignment-with-openfst/
# compile
make -f Makefile-latentCrfAligner
popd

# use cdec to generate word pair features for a partiu7clar test set
~/cdec/word-aligner/aligner.pl --mkcls=/mal0/tools/mosesdecoder/bin/mkcls $data_file
pushd talign
make generate-wordpair-features
gzip -d grammars/wordpairs.f-e.features.gz
popd
echo "$dir-talign"
mv talign "$dir-talign"

# the latent-CRF word alignment model
echo "$dir-talign/grammars/wordpairs.f-e.features"
echo "$prefix-fwd"
mpirun -np $procs /usr0/home/wammar/alignment-with-openfst/train-latentCrfAligner \
$data_file \
none none \
"$dir-talign/grammars/wordpairs.f-e.features" \
"$prefix-fwd" \
$test_sents_count \
2> $out_err

echo "$prefix-fwd.labels"
cp "$prefix-fwd.labels" $fwd_align

# example/wa-czen-latentCrfAligner-1k-lambda example/wa-czen-latentCrfAligner-1k-theta
}

task autoencoder_bwd_align :: test_sents_count=$test_sents_count prefix=$prefix procs=$procs data_file=$bwd_data_file dir=$bwd > bwd_align out_err="bwd-out.err" {
pushd /usr0/home/wammar/alignment-with-openfst/
# compile
make -f Makefile-latentCrfAligner
popd

# use cdec to generate word pair features for a particular test set
~/cdec/word-aligner/aligner.pl --mkcls=/mal0/tools/mosesdecoder/bin/mkcls $data_file
pushd talign
make generate-wordpair-features
gzip -d grammars/wordpairs.f-e.features.gz
popd
mv talign "$dir-talign"

# the latent-CRF word alignment model
mpirun -np $procs /usr0/home/wammar/alignment-with-openfst/train-latentCrfAligner \
$data_file \
none none \
"$dir-talign/grammars/wordpairs.f-e.features" \
"$prefix-bwd" \
$test_sents_count \
2> $out_err

cp "$prefix-bwd.labels" $bwd_align

# example/wa-czen-latentCrfAligner-1k-lambda example/wa-czen-latentCrfAligner-1k-theta
}

task autoencoder_align :: data_file=$data_file < fwd_align=$fwd_align@autoencoder_fwd_align bwd_align=$bwd_align@autoencoder_bwd_align > sym_align {
/usr0/home/wammar/cdec/utils/atools -i $fwd_align -j $bwd_align -c grow-diag-final-and > $sym_align
}

task fast_align :: data_file=$data_file > fwd_align bwd_align sym_align {
/usr0/home/wammar/cdec/word-aligner/fast_align -i $data_file -d -v -r -o > $bwd_align
/usr0/home/wammar/cdec/word-aligner/fast_align -i $data_file -d -v -o > $fwd_align
/usr0/home/wammar/cdec/utils/atools -i $fwd_align -j $bwd_align -c grow-diag-final-and > $sym_align
}

task eval_align :: eval_script=$eval_script gold_file=$gold_file < fwd_align=$fwd_align@fast_align > aer_file="aer" {
# compute aer
$eval_script \
$fwd_align \
$gold_file \
> $aer_file
}

task train_cdec :: procs=$procs data_file=$data_file dev_file=$dev_file test_file=$test_file mono_file=$mono_file cdec_ini_file=$cdec_ini_file < sym_align=$sym_align@autoencoder_align > extract_ini_file klm_file="klm" dev_sgm test_sgm lm_file suffix_array dev_grammar test_grammar cdec_ini_ofile {
export PYTHONPATH=`echo ~/cdec/python/build/lib.*`
echo "create extract ini file and suffix array from training data $data_file and alignments $sym_align"
python -m cdec.sa.compile -b $data_file -a $sym_align -c extract_ini_file -o $suffix_array
echo "create dev set grammar and suffix array"
python -m cdec.sa.extract -c extract_ini_file -g $dev_grammar -j $procs -z < $dev_file > $dev_sgm
echo "create test set grammar and suffix array"
python -m cdec.sa.extract -c extract_ini_file -g $test_grammar -j $procs -z < $test_file > $test_sgm
/usr0/home/wammar/cdec/klm/lm/builder/builder --order 3 < $mono_file > $lm_file
/usr0/home/wammar/cdec/klm/lm/build_binary $lm_file $klm_file
cp $cdec_ini_file $cdec_ini_ofile
echo "feature_function=KLanguageModel $klm_file" >> $cdec_ini_ofile
}

task tune_mira :: procs=$procs < dev_sgm=$dev_sgm@train_cdec test_sgm=$test_sgm@train_cdec cdec_ini_ofile=$cdec_ini_ofile@train_cdec > mira_out_dir {
/usr0/home/wammar/cdec/training/mira/mira.py \
-d $dev_sgm \
-t $test_sgm \
-c $cdec_ini_ofile \
-j $procs \
-o $mira_out_dir
}

global {
eval_script=/usr0/home/wammar/alignment-with-openfst/data/czen-manual-alignments/eval-czen.pl
gold_file=/usr0/home/wammar/alignment-with-openfst/data/czen-manual-alignments/czen.wal
mono_file=/usr0/home/wammar/alignment-with-openfst/data/10k.cz-en.en
data_file=/usr0/home/wammar/alignment-with-openfst/data/10k.cz-en
bwd_data_file=/usr0/home/wammar/alignment-with-openfst/data/10k.en-cz
dev_file=/usr0/home/wammar/alignment-with-openfst/data/1k.cz-en
test_file=/usr0/home/wammar/alignment-with-openfst/data/1k.cz-en
prefix=10k
cdec_ini_file=/usr0/home/wammar/alignment-with-openfst/ducttape-files/cdec.ini
procs=32
src=cz
tgt=en
fwd=cz-en
bwd=en-cz
ducttape_structure=flat
test_sents_count=515
}

10 changes: 0 additions & 10 deletions example/10.fr-en

This file was deleted.

10 changes: 0 additions & 10 deletions example/10.kin-eng

This file was deleted.

Loading

0 comments on commit aac76a8

Please sign in to comment.