-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
New makefiles and configs for demo purposes
- Loading branch information
Showing
10 changed files
with
448 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# squoia/FreeLingModules/Makefile | ||
# This makefile requires GNU Make. | ||
|
||
FREELING_INSTALL_DIR ?= /opt/freeling | ||
|
||
CXX ?= g++ | ||
CXXFLAGS ?= -std=gnu++11 -Wall -O0 -g3 | ||
|
||
CPPFLAGS = -Iconfig_squoia -I$(FREELING_INSTALL_DIR)/include | ||
LDFLAGS = -L$(FREELING_INSTALL_DIR)/lib | ||
|
||
analyzer_sources = analyzer_client.cc | ||
analyzer_objects = $(analyzer_sources:.cc=.o) | ||
|
||
nec_sources = nec.cc | ||
nec_objects = $(nec_sources:.cc=.o) | ||
|
||
server_sources = server_squoia.cc output_crf.cc | ||
server_objects = $(server_sources:.cc=.o) | ||
|
||
all: analyzer_client nec server_squoia | ||
|
||
.cc.o: | ||
$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $< | ||
|
||
analyzer_client: $(analyzer_objects) | ||
$(CXX) $(CXXFLAGS) -o $@ $(analyzer_objects) $(LDFLAGS) -lfreeling | ||
|
||
nec: $(nec_objects) | ||
$(CXX) $(CXXFLAGS) -o $@ $(nec_objects) $(LDFLAGS) \ | ||
-lfreeling -lboost_system -lpthread | ||
|
||
server_squoia: $(server_objects) | ||
$(CXX) $(CXXFLAGS) -o $@ $(server_objects) $(LDFLAGS) \ | ||
-lfreeling -lboost_program_options -lboost_system -lpthread | ||
|
||
clean: | ||
rm -f *.o analyzer_client nec server_squoia | ||
|
||
# end squoia/FreeLingModules/Makefile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
## | ||
#### default configuration file for Spanish analyzer | ||
## | ||
|
||
#### General options | ||
Lang=es | ||
Locale=default | ||
|
||
#### Trace options. Only effective if we have compiled with -DVERBOSE | ||
# | ||
## Possible values for TraceModule (may be OR'ed) | ||
#define SPLIT_TRACE 0x00000001 | ||
#define TOKEN_TRACE 0x00000002 | ||
#define MACO_TRACE 0x00000004 | ||
#define OPTIONS_TRACE 0x00000008 | ||
#define NUMBERS_TRACE 0x00000010 | ||
#define DATES_TRACE 0x00000020 | ||
#define PUNCT_TRACE 0x00000040 | ||
#define DICT_TRACE 0x00000080 | ||
#define SUFF_TRACE 0x00000100 | ||
#define LOCUT_TRACE 0x00000200 | ||
#define NP_TRACE 0x00000400 | ||
#define PROB_TRACE 0x00000800 | ||
#define QUANT_TRACE 0x00001000 | ||
#define NEC_TRACE 0x00002000 | ||
#define AUTOMAT_TRACE 0x00004000 | ||
#define TAGGER_TRACE 0x00008000 | ||
#define HMM_TRACE 0x00010000 | ||
#define RELAX_TRACE 0x00020000 | ||
#define RELAX_TAGGER_TRACE 0x00040000 | ||
#define CONST_GRAMMAR_TRACE 0x00080000 | ||
#define SENSES_TRACE 0x00100000 | ||
#define CHART_TRACE 0x00200000 | ||
#define GRAMMAR_TRACE 0x00400000 | ||
#define DEP_TRACE 0x00800000 | ||
#define UTIL_TRACE 0x01000000 | ||
|
||
TraceLevel=3 | ||
TraceModule=0x0000 | ||
|
||
## Options to control the applied modules. The input may be partially | ||
## processed, or not a full analysis may me wanted. The specific | ||
## formats are a choice of the main program using the library, as well | ||
## as the responsability of calling only the required modules. | ||
## Valid input/output formats are: plain, token, splitted, morfo, tagged, parsed | ||
#InputFormat=plain | ||
InputFormat=text | ||
OutputFormat=crf | ||
InputLevel=text | ||
OutputLevel=morfo | ||
|
||
# consider each newline as a sentence end | ||
AlwaysFlush=yes | ||
|
||
#### Tokenizer options | ||
TokenizerFile=grammar_es/tokenizer.dat | ||
|
||
#### Splitter options | ||
SplitterFile=grammar_es/splitter.dat | ||
|
||
#### Morfo options | ||
AffixAnalysis=yes | ||
MultiwordsDetection=yes | ||
NumbersDetection=yes | ||
PunctuationDetection=yes | ||
DatesDetection=yes | ||
QuantitiesDetection=yes | ||
DictionarySearch=yes | ||
ProbabilityAssignment=yes | ||
#OrthographicCorrection=no | ||
DecimalPoint=, | ||
ThousandPoint=. | ||
LocutionsFile=grammar_es/locucions_squoia.dat | ||
QuantitiesFile=grammar_es/quantities.dat | ||
AffixFile=grammar_es/afixos_desr.dat | ||
ProbabilityFile=grammar_es/probabilitats.dat | ||
DictionaryFile=grammar_es/dicc_squoia.src | ||
PunctuationFile=common/punct.dat | ||
ProbabilityThreshold=0.001 | ||
|
||
# NER options | ||
NERecognition=yes | ||
NPDataFile=grammar_es/np_desr.dat | ||
## comment line above and uncomment that below, if you want | ||
## a better NE recognizer (higer accuracy, lower speed) | ||
#NPDataFile=$FREELINGSHARE/es/nerc/ner/ner-ab-rich.dat | ||
|
||
#Spelling Corrector config file | ||
#CorrectorFile=$FREELINGSHARE/es/corrector.dat | ||
|
||
## Phonetic encoding of words. | ||
Phonetics=no | ||
PhoneticsFile=grammar_es/phonetics.dat | ||
|
||
## NEC options | ||
NEClassification=yes | ||
NECFile=$FREELINGSHARE/es/nerc/nec/nec-ab-rich.dat | ||
#NECFile=$FREELINGSHARE/as/nec/nec-svm.dat | ||
|
||
## Sense annotation options (none,all,mfs,ukb) | ||
SenseAnnotation=none | ||
SenseConfigFile=$FREELINGSHARE/es/senses.dat | ||
UKBConfigFile=$FREELINGSHARE/es/ukb.dat | ||
|
||
#### Tagger options | ||
Tagger=hmm | ||
TaggerHMMFile=grammar_es/tagger.dat | ||
TaggerRelaxFile=grammar_es/constr_gram_nora.dat | ||
TaggerRelaxMaxIter=500 | ||
TaggerRelaxScaleFactor=670.0 | ||
TaggerRelaxEpsilon=0.001 | ||
TaggerRetokenize=yes | ||
TaggerForceSelect=tagger | ||
|
||
##TODO: don't need this, but has to be specified, otherwise analyzer crashes!! | ||
#### Parser options | ||
GrammarFile=$FREELINGSHARE/es/chunker/grammar-chunk.dat | ||
|
||
#### Dependence Parser options | ||
DepTxalaFile=$FREELINGSHARE/es/dep_txala/dependences.dat | ||
|
||
#### Coreference Solver options | ||
# CoreferenceResolution=no | ||
# CorefFile=$FREELINGSHARE/es/coref/coref.dat |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# squoia/MT_systems/Makefile | ||
|
||
all: maltparser.jar | ||
|
||
# Create symlinks so that we can use static paths in translate_demo.cfg | ||
maltparser.jar: | ||
ln -s $(FREELING_INSTALL_DIR)/share/freeling freeling-share | ||
ln -s $(MALTPARSER_JAR) maltparser.jar | ||
ln -s $(WAPITI_INSTALL_DIR) wapiti | ||
|
||
clean: | ||
rm -f freeling-share maltparser.jar wapiti | ||
rm -rf tmp | ||
|
||
# These can be easily regenerated | ||
rm -f storage/InterTransferRules | ||
rm -f storage/IntraTransferRules | ||
rm -f storage/LexSelRules | ||
rm -f storage/MorphSelRules | ||
rm -f storage/PrepSelRules | ||
rm -f storage/SemLex | ||
rm -f storage/child2siblingRules | ||
rm -f storage/config.bin | ||
rm -f storage/interchunkOrderRules | ||
rm -f storage/intrachunkOrderRules | ||
rm -f storage/nodes2chunksRules | ||
|
||
.PHONY: all clean | ||
|
||
# end squoia/MT_systems/Makefile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# squoia/MT_systems/maltparser_tools/Makefile | ||
# This makefile requires GNU Make. | ||
|
||
# Path to MaltParser JAR file | ||
MALTPARSER_JAR ?= /path/to/maltparser-1.9.0.jar | ||
|
||
JAVAC = javac | ||
|
||
sources = \ | ||
src/MaltParserServer.java \ | ||
src/MPClient.java | ||
|
||
class = bin/MPClient.class | ||
|
||
all: $(class) | ||
|
||
$(class): $(MALTPARSER_JAR) $(sources) | ||
test -d bin || mkdir bin | ||
$(JAVAC) -cp $(MALTPARSER_JAR) -d bin $(sources) | ||
|
||
clean: | ||
rm -rf bin | ||
|
||
# end squoia/MT_systems/maltparser_tools/Makefile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# squoia/MT_systems/matxin-lex/Makefile | ||
# This makefile requires GNU Make. | ||
|
||
LIBXML2_INSTALL_DIR ?= /usr | ||
LTTOOLBOX_INSTALL_DIR ?= /usr | ||
|
||
CXX ?= g++ | ||
CXXFLAGS ?= -O0 -g3 | ||
|
||
CPPFLAGS = -I$(LTTOOLBOX_INSTALL_DIR)/include/lttoolbox-3.3 -I$(LIBXML2_INSTALL_DIR)/include/libxml2 | ||
LDFLAGS = -L$(LTTOOLBOX_INSTALL_DIR)/lib -L$(LIBXML2_INSTALL_DIR)/lib | ||
|
||
all: squoia-xfer-lex | ||
|
||
squoia-xfer-lex: | ||
$(CXX) $(CXXFLAGS) $(CPPFLAGS) squoia_xfer_lex.cc -o $@ $(LDFLAGS) -llttoolbox3 -lxml2 | ||
|
||
clean: | ||
rm -f *.o squoia-xfer-lex | ||
|
||
.PHONY: all clean | ||
|
||
# end squoia/MT_systems/matxin-lex/Makefile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# squoia/MT_systems/squoia/esqu/Makefile | ||
# This makefile requires GNU Make. | ||
|
||
FOMA_INSTALL_DIR ?= /opt/foma | ||
|
||
KENLM_SRC_DIR ?= /tmp/kenlm | ||
KENLM_BUILD_DIR ?= /tmp/kenlm/build | ||
|
||
CXX ?= g++ | ||
CXXFLAGS ?= -O0 -g3 | ||
|
||
CPPFLAGS = -I$(KENLM_SRC_DIR) -DKENLM_MAX_ORDER=6 -I$(FOMA_INSTALL_DIR)/include | ||
|
||
# May need these if KenLM was compiled with compression support | ||
COMPRESS_LIBS = -lbz2 -llzma | ||
|
||
header_deps = \ | ||
$(FOMA_INSTALL_DIR)/include/foma/fomalib.h \ | ||
$(KENLM_SRC_DIR)/lm/model.hh | ||
|
||
libs = \ | ||
-L$(KENLM_BUILD_DIR)/lib -lkenlm -lkenlm_util \ | ||
$(COMPRESS_LIBS) \ | ||
-L$(FOMA_INSTALL_DIR)/lib -lfoma \ | ||
-lboost_regex -lboost_thread -lboost_system \ | ||
-lz -pthread | ||
|
||
all: outputSentences | ||
|
||
outputSentences: outputSentences.cpp $(header_deps) | ||
$(CXX) $(CXXFLAGS) $(CPPFLAGS) outputSentences.cpp -o $@ $(libs) | ||
|
||
clean: | ||
rm -f *.o outputSentences | ||
|
||
.PHONY: all clean | ||
|
||
# end squoia/MT_systems/squoia/esqu/Makefile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# squoia/MT_systems/squoia/esqu/morphgen_foma/Makefile | ||
# This makefile requires GNU Make. | ||
|
||
unificadoTransfer.fst: unificadoTransfer.foma | ||
foma -f $< | ||
|
||
clean: | ||
rm -f *.fst | ||
|
||
.PHONY: clean | ||
|
||
# end squoia/MT_systems/squoia/esqu/morphgen_foma/Makefile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
# Define where to look for grammar file for the tranlation | ||
GRAMMAR_DIR=squoia/esqu/grammar | ||
SQUOIA_DIR=. | ||
|
||
direction=esqu | ||
# set output format | ||
# default: print nbest translations, | ||
#other valid options are: | ||
# tagged (wapiti), parsed, conll2xml, | ||
# rdisamb, coref, vdisamb, svm, lextrans, morphdisamb, | ||
# prepdisamb, intraTrans, interTrans, intraOrder, | ||
# interOrder, morph, words | ||
outformat=nbest | ||
informat=senttok | ||
|
||
## set variables for tagging (FreeLing and Wapiti) | ||
# tagging | ||
wapiti=wapiti/bin | ||
wapitiModel=$SQUOIA_DIR/models/3gram_enhancedAncora.model | ||
wapitiPort=9003 | ||
freelingPort=9001 | ||
freelingConf=../FreeLingModules/es_demo.cfg | ||
## config for ne classification (since FL 4 no longer possible in morphological analyzer | ||
nec=../FreeLingModules | ||
neccfg=freeling-share/es/nerc/nec/nec-ab-rich.dat | ||
matxin=matxin-lex | ||
|
||
## set variables for desr parser | ||
# desrPort1=5678 | ||
# desrPort2=1234 | ||
# desrModel1=/mnt/storage/hex/projects/clsquoia/parser/desr-1.3.2/spanish_es4.MLP | ||
# desrModel2=/mnt/storage/hex/projects/clsquoia/parser/desr-1.3.2/spanish.MLP | ||
|
||
# set variables for maltparser | ||
maltPort=9002 | ||
maltModel=$SQUOIA_DIR/models/splitDatesModel.mco | ||
maltPath=maltparser.jar | ||
#maltHost=localhost atm: always localhost | ||
|
||
|
||
## set variables for lexical transfer | ||
bidix=$SQUOIA_DIR/squoia/esqu/lexica/es-quz.bin | ||
#verblex=$SQUOIA_DIR/squoia/esqu/lexica/allVerbs.xml | ||
verblex=storable | ||
nounlex=$SQUOIA_DIR/squoia/esqu/lexica/noun_semantics.txt | ||
#wordnet=/mnt/storage/hex/projects/clsquoia/resources/mcr30 | ||
wordnet=storable | ||
|
||
#################### | ||
# Grammar Files # | ||
#################### | ||
|
||
# set variables for translation | ||
#esqu_intrachunk_transfer.rules esqu_lexSelection.rules esqu_morphSelection.rules esqu_prepSelection.rules esqu_semanticTags.rules | ||
SemLex=$GRAMMAR_DIR/esqu_semanticTags.rules | ||
LexSelFile=$GRAMMAR_DIR/esqu_lexSelection.rules | ||
MorphSelFile=$GRAMMAR_DIR/esqu_morphSelection.rules | ||
IntraTransferFile=$GRAMMAR_DIR/esqu_intrachunk_transfer.rules | ||
InterTransferFile=$GRAMMAR_DIR/esqu_interchunk_transfer.rules | ||
PrepFile=$GRAMMAR_DIR/esqu_prepSelection.rules | ||
ChunkOrderFile=$GRAMMAR_DIR/qu_interchunkOrder.rules | ||
NodeOrderFile=$GRAMMAR_DIR/qu_intrachunkOrder.rules | ||
NodeChunkFile=$GRAMMAR_DIR/qu_nodesToChunk.rules | ||
ChildToSiblingFile=$GRAMMAR_DIR/qu_childToSiblingChunk.rules | ||
|
||
## set variables for morphological generation | ||
#XFST_GENERATOR="$SQUOIA_DIR/squoia/esqu/morphgen/unificadoTransfer.fst" | ||
morphgenerator=$SQUOIA_DIR/squoia/esqu/morphgen_foma/unificadoTransfer.fst | ||
fomaFST=$SQUOIA_DIR/squoia/esqu/morphgen_foma/unificadoTransfer.fst | ||
|
||
## set variables for language model | ||
#quModel=$SQUOIA_DIR/models/all_norm_3gram_interpolated_unigrams.lm | ||
quModel=$SQUOIA_DIR/models/cleaned_all_norm_5grams_interpolated.lm | ||
quMorphModel=$SQUOIA_DIR/models/all_morph_5grams_interpolated_unigr.lm | ||
useMorphModel=1 | ||
nbest=3 | ||
|
||
# end translate_demo.cfg |
Oops, something went wrong.