Skip to content

Commit

Permalink
New makefiles and configs for demo purposes
Browse files Browse the repository at this point in the history
  • Loading branch information
iskunk committed Aug 21, 2017
1 parent 8f2b2b5 commit 41e9ef5
Show file tree
Hide file tree
Showing 10 changed files with 448 additions and 0 deletions.
40 changes: 40 additions & 0 deletions FreeLingModules/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# squoia/FreeLingModules/Makefile
# This makefile requires GNU Make.

FREELING_INSTALL_DIR ?= /opt/freeling

CXX ?= g++
CXXFLAGS ?= -std=gnu++11 -Wall -O0 -g3

CPPFLAGS = -Iconfig_squoia -I$(FREELING_INSTALL_DIR)/include
LDFLAGS = -L$(FREELING_INSTALL_DIR)/lib

analyzer_sources = analyzer_client.cc
analyzer_objects = $(analyzer_sources:.cc=.o)

nec_sources = nec.cc
nec_objects = $(nec_sources:.cc=.o)

server_sources = server_squoia.cc output_crf.cc
server_objects = $(server_sources:.cc=.o)

all: analyzer_client nec server_squoia

.cc.o:
$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $<

analyzer_client: $(analyzer_objects)
$(CXX) $(CXXFLAGS) -o $@ $(analyzer_objects) $(LDFLAGS) -lfreeling

nec: $(nec_objects)
$(CXX) $(CXXFLAGS) -o $@ $(nec_objects) $(LDFLAGS) \
-lfreeling -lboost_system -lpthread

server_squoia: $(server_objects)
$(CXX) $(CXXFLAGS) -o $@ $(server_objects) $(LDFLAGS) \
-lfreeling -lboost_program_options -lboost_system -lpthread

clean:
rm -f *.o analyzer_client nec server_squoia

# end squoia/FreeLingModules/Makefile
124 changes: 124 additions & 0 deletions FreeLingModules/es_demo.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
##
#### default configuration file for Spanish analyzer
##

#### General options
Lang=es
Locale=default

#### Trace options. Only effective if we have compiled with -DVERBOSE
#
## Possible values for TraceModule (may be OR'ed)
#define SPLIT_TRACE 0x00000001
#define TOKEN_TRACE 0x00000002
#define MACO_TRACE 0x00000004
#define OPTIONS_TRACE 0x00000008
#define NUMBERS_TRACE 0x00000010
#define DATES_TRACE 0x00000020
#define PUNCT_TRACE 0x00000040
#define DICT_TRACE 0x00000080
#define SUFF_TRACE 0x00000100
#define LOCUT_TRACE 0x00000200
#define NP_TRACE 0x00000400
#define PROB_TRACE 0x00000800
#define QUANT_TRACE 0x00001000
#define NEC_TRACE 0x00002000
#define AUTOMAT_TRACE 0x00004000
#define TAGGER_TRACE 0x00008000
#define HMM_TRACE 0x00010000
#define RELAX_TRACE 0x00020000
#define RELAX_TAGGER_TRACE 0x00040000
#define CONST_GRAMMAR_TRACE 0x00080000
#define SENSES_TRACE 0x00100000
#define CHART_TRACE 0x00200000
#define GRAMMAR_TRACE 0x00400000
#define DEP_TRACE 0x00800000
#define UTIL_TRACE 0x01000000

TraceLevel=3
TraceModule=0x0000

## Options to control the applied modules. The input may be partially
## processed, or not a full analysis may me wanted. The specific
## formats are a choice of the main program using the library, as well
## as the responsability of calling only the required modules.
## Valid input/output formats are: plain, token, splitted, morfo, tagged, parsed
#InputFormat=plain
InputFormat=text
OutputFormat=crf
InputLevel=text
OutputLevel=morfo

# consider each newline as a sentence end
AlwaysFlush=yes

#### Tokenizer options
TokenizerFile=grammar_es/tokenizer.dat

#### Splitter options
SplitterFile=grammar_es/splitter.dat

#### Morfo options
AffixAnalysis=yes
MultiwordsDetection=yes
NumbersDetection=yes
PunctuationDetection=yes
DatesDetection=yes
QuantitiesDetection=yes
DictionarySearch=yes
ProbabilityAssignment=yes
#OrthographicCorrection=no
DecimalPoint=,
ThousandPoint=.
LocutionsFile=grammar_es/locucions_squoia.dat
QuantitiesFile=grammar_es/quantities.dat
AffixFile=grammar_es/afixos_desr.dat
ProbabilityFile=grammar_es/probabilitats.dat
DictionaryFile=grammar_es/dicc_squoia.src
PunctuationFile=common/punct.dat
ProbabilityThreshold=0.001

# NER options
NERecognition=yes
NPDataFile=grammar_es/np_desr.dat
## comment line above and uncomment that below, if you want
## a better NE recognizer (higer accuracy, lower speed)
#NPDataFile=$FREELINGSHARE/es/nerc/ner/ner-ab-rich.dat

#Spelling Corrector config file
#CorrectorFile=$FREELINGSHARE/es/corrector.dat

## Phonetic encoding of words.
Phonetics=no
PhoneticsFile=grammar_es/phonetics.dat

## NEC options
NEClassification=yes
NECFile=$FREELINGSHARE/es/nerc/nec/nec-ab-rich.dat
#NECFile=$FREELINGSHARE/as/nec/nec-svm.dat

## Sense annotation options (none,all,mfs,ukb)
SenseAnnotation=none
SenseConfigFile=$FREELINGSHARE/es/senses.dat
UKBConfigFile=$FREELINGSHARE/es/ukb.dat

#### Tagger options
Tagger=hmm
TaggerHMMFile=grammar_es/tagger.dat
TaggerRelaxFile=grammar_es/constr_gram_nora.dat
TaggerRelaxMaxIter=500
TaggerRelaxScaleFactor=670.0
TaggerRelaxEpsilon=0.001
TaggerRetokenize=yes
TaggerForceSelect=tagger

##TODO: don't need this, but has to be specified, otherwise analyzer crashes!!
#### Parser options
GrammarFile=$FREELINGSHARE/es/chunker/grammar-chunk.dat

#### Dependence Parser options
DepTxalaFile=$FREELINGSHARE/es/dep_txala/dependences.dat

#### Coreference Solver options
# CoreferenceResolution=no
# CorefFile=$FREELINGSHARE/es/coref/coref.dat
30 changes: 30 additions & 0 deletions MT_systems/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# squoia/MT_systems/Makefile

all: maltparser.jar

# Create symlinks so that we can use static paths in translate_demo.cfg
maltparser.jar:
ln -s $(FREELING_INSTALL_DIR)/share/freeling freeling-share
ln -s $(MALTPARSER_JAR) maltparser.jar
ln -s $(WAPITI_INSTALL_DIR) wapiti

clean:
rm -f freeling-share maltparser.jar wapiti
rm -rf tmp

# These can be easily regenerated
rm -f storage/InterTransferRules
rm -f storage/IntraTransferRules
rm -f storage/LexSelRules
rm -f storage/MorphSelRules
rm -f storage/PrepSelRules
rm -f storage/SemLex
rm -f storage/child2siblingRules
rm -f storage/config.bin
rm -f storage/interchunkOrderRules
rm -f storage/intrachunkOrderRules
rm -f storage/nodes2chunksRules

.PHONY: all clean

# end squoia/MT_systems/Makefile
24 changes: 24 additions & 0 deletions MT_systems/maltparser_tools/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# squoia/MT_systems/maltparser_tools/Makefile
# This makefile requires GNU Make.

# Path to MaltParser JAR file
MALTPARSER_JAR ?= /path/to/maltparser-1.9.0.jar

JAVAC = javac

sources = \
src/MaltParserServer.java \
src/MPClient.java

class = bin/MPClient.class

all: $(class)

$(class): $(MALTPARSER_JAR) $(sources)
test -d bin || mkdir bin
$(JAVAC) -cp $(MALTPARSER_JAR) -d bin $(sources)

clean:
rm -rf bin

# end squoia/MT_systems/maltparser_tools/Makefile
23 changes: 23 additions & 0 deletions MT_systems/matxin-lex/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# squoia/MT_systems/matxin-lex/Makefile
# This makefile requires GNU Make.

LIBXML2_INSTALL_DIR ?= /usr
LTTOOLBOX_INSTALL_DIR ?= /usr

CXX ?= g++
CXXFLAGS ?= -O0 -g3

CPPFLAGS = -I$(LTTOOLBOX_INSTALL_DIR)/include/lttoolbox-3.3 -I$(LIBXML2_INSTALL_DIR)/include/libxml2
LDFLAGS = -L$(LTTOOLBOX_INSTALL_DIR)/lib -L$(LIBXML2_INSTALL_DIR)/lib

all: squoia-xfer-lex

squoia-xfer-lex:
$(CXX) $(CXXFLAGS) $(CPPFLAGS) squoia_xfer_lex.cc -o $@ $(LDFLAGS) -llttoolbox3 -lxml2

clean:
rm -f *.o squoia-xfer-lex

.PHONY: all clean

# end squoia/MT_systems/matxin-lex/Makefile
38 changes: 38 additions & 0 deletions MT_systems/squoia/esqu/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# squoia/MT_systems/squoia/esqu/Makefile
# This makefile requires GNU Make.

FOMA_INSTALL_DIR ?= /opt/foma

KENLM_SRC_DIR ?= /tmp/kenlm
KENLM_BUILD_DIR ?= /tmp/kenlm/build

CXX ?= g++
CXXFLAGS ?= -O0 -g3

CPPFLAGS = -I$(KENLM_SRC_DIR) -DKENLM_MAX_ORDER=6 -I$(FOMA_INSTALL_DIR)/include

# May need these if KenLM was compiled with compression support
COMPRESS_LIBS = -lbz2 -llzma

header_deps = \
$(FOMA_INSTALL_DIR)/include/foma/fomalib.h \
$(KENLM_SRC_DIR)/lm/model.hh

libs = \
-L$(KENLM_BUILD_DIR)/lib -lkenlm -lkenlm_util \
$(COMPRESS_LIBS) \
-L$(FOMA_INSTALL_DIR)/lib -lfoma \
-lboost_regex -lboost_thread -lboost_system \
-lz -pthread

all: outputSentences

outputSentences: outputSentences.cpp $(header_deps)
$(CXX) $(CXXFLAGS) $(CPPFLAGS) outputSentences.cpp -o $@ $(libs)

clean:
rm -f *.o outputSentences

.PHONY: all clean

# end squoia/MT_systems/squoia/esqu/Makefile
12 changes: 12 additions & 0 deletions MT_systems/squoia/esqu/morphgen_foma/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# squoia/MT_systems/squoia/esqu/morphgen_foma/Makefile
# This makefile requires GNU Make.

unificadoTransfer.fst: unificadoTransfer.foma
foma -f $<

clean:
rm -f *.fst

.PHONY: clean

# end squoia/MT_systems/squoia/esqu/morphgen_foma/Makefile
78 changes: 78 additions & 0 deletions MT_systems/translate_demo.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Define where to look for grammar file for the tranlation
GRAMMAR_DIR=squoia/esqu/grammar
SQUOIA_DIR=.

direction=esqu
# set output format
# default: print nbest translations,
#other valid options are:
# tagged (wapiti), parsed, conll2xml,
# rdisamb, coref, vdisamb, svm, lextrans, morphdisamb,
# prepdisamb, intraTrans, interTrans, intraOrder,
# interOrder, morph, words
outformat=nbest
informat=senttok

## set variables for tagging (FreeLing and Wapiti)
# tagging
wapiti=wapiti/bin
wapitiModel=$SQUOIA_DIR/models/3gram_enhancedAncora.model
wapitiPort=9003
freelingPort=9001
freelingConf=../FreeLingModules/es_demo.cfg
## config for ne classification (since FL 4 no longer possible in morphological analyzer
nec=../FreeLingModules
neccfg=freeling-share/es/nerc/nec/nec-ab-rich.dat
matxin=matxin-lex

## set variables for desr parser
# desrPort1=5678
# desrPort2=1234
# desrModel1=/mnt/storage/hex/projects/clsquoia/parser/desr-1.3.2/spanish_es4.MLP
# desrModel2=/mnt/storage/hex/projects/clsquoia/parser/desr-1.3.2/spanish.MLP

# set variables for maltparser
maltPort=9002
maltModel=$SQUOIA_DIR/models/splitDatesModel.mco
maltPath=maltparser.jar
#maltHost=localhost atm: always localhost


## set variables for lexical transfer
bidix=$SQUOIA_DIR/squoia/esqu/lexica/es-quz.bin
#verblex=$SQUOIA_DIR/squoia/esqu/lexica/allVerbs.xml
verblex=storable
nounlex=$SQUOIA_DIR/squoia/esqu/lexica/noun_semantics.txt
#wordnet=/mnt/storage/hex/projects/clsquoia/resources/mcr30
wordnet=storable

####################
# Grammar Files #
####################

# set variables for translation
#esqu_intrachunk_transfer.rules esqu_lexSelection.rules esqu_morphSelection.rules esqu_prepSelection.rules esqu_semanticTags.rules
SemLex=$GRAMMAR_DIR/esqu_semanticTags.rules
LexSelFile=$GRAMMAR_DIR/esqu_lexSelection.rules
MorphSelFile=$GRAMMAR_DIR/esqu_morphSelection.rules
IntraTransferFile=$GRAMMAR_DIR/esqu_intrachunk_transfer.rules
InterTransferFile=$GRAMMAR_DIR/esqu_interchunk_transfer.rules
PrepFile=$GRAMMAR_DIR/esqu_prepSelection.rules
ChunkOrderFile=$GRAMMAR_DIR/qu_interchunkOrder.rules
NodeOrderFile=$GRAMMAR_DIR/qu_intrachunkOrder.rules
NodeChunkFile=$GRAMMAR_DIR/qu_nodesToChunk.rules
ChildToSiblingFile=$GRAMMAR_DIR/qu_childToSiblingChunk.rules

## set variables for morphological generation
#XFST_GENERATOR="$SQUOIA_DIR/squoia/esqu/morphgen/unificadoTransfer.fst"
morphgenerator=$SQUOIA_DIR/squoia/esqu/morphgen_foma/unificadoTransfer.fst
fomaFST=$SQUOIA_DIR/squoia/esqu/morphgen_foma/unificadoTransfer.fst

## set variables for language model
#quModel=$SQUOIA_DIR/models/all_norm_3gram_interpolated_unigrams.lm
quModel=$SQUOIA_DIR/models/cleaned_all_norm_5grams_interpolated.lm
quMorphModel=$SQUOIA_DIR/models/all_morph_5grams_interpolated_unigr.lm
useMorphModel=1
nbest=3

# end translate_demo.cfg
Loading

0 comments on commit 41e9ef5

Please sign in to comment.