-
Notifications
You must be signed in to change notification settings - Fork 0
/
Makefile
223 lines (201 loc) · 6.49 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# This is a makefile that builds the sme-nob translation parser
# It should be rewritten with Twig or something similar instead of
# the shellscript we have now
# =========== Tools: ============= #
LEXC = lexc #-utf8
XFST = xfst #-utf8
XSLT = ~/lib/saxon8.jar
XQL = java net.sf.saxon.Query
JARF = -jar
SSH = /usr/bin/ssh
# =========== Paths & files: ============= #
GTHOME = ../../../gt
SMETESTING = $(GTHOME)/sme/testing
SMENOBMAC = deliv/macdict/objects
SMENOBNAME = Nordsamisk-norsk ordbok.dictionary
SMENOBZIP = smenob-mac.dictionary.tgz
UPLOADDIR = sd@giellatekno.uit.no:xtdoc/sd/src/documentation/content/xdocs
DOWNLOADDIR = http://www.divvun.no/static_files
ADJ = adjective_smenob.xml
ADJSTAT = adjstatpar_smenob.xml
DIV = div_statisk_smenob.xml
MWE = mwe_smenob.xml
NUM = num_smenob.xml
OTHERSTAT = other_stat_smenob.stat
DEM = pronDem_smenob.xml
INDEF = pronIndef_smenob.xml
PERS = pronPers_smenob.xml
REC = pronRec_smenob.xml
REFL = pronRefl_smenob.xml
REL = pronRel_smenob.xml
COP = verbCop_smenob.xml
NEG = verbNeg_smenob.xml
SUP = verbSupNeg_smenob.xml
ADV = adverb_smenob.xml
NOUNC = nounCommon_smenob.xml
NOUNP = nounRevProper_smenob.xml
NOUNA = nounActor_smenob.xml
NOUNG = nounG3_smenob.xml
OTHER = nounProperPl_smenob.xml
VERB = verb_smenob.xml
SN_XML = smenob.xml
SN_XSL = smenob.xsl
SN_LEXC = smenob.lexc
SN_HTML = smenob.html
SN_FST = smenob.fst
S_DIC = sme.dic
S_FST = smedic.fst
SRC = src
BIN = bin
SCRIPTS = scripts
BEGIN = @echo "*** Generating the $@-file ***"
END = @echo "Done."
# =========== Other: ============= #
DATE = $(shell date +%Y%m%d)
# fst for the sme-nob dictionary
#Pseudocode:
#Make a lexc file:
#Print the first line: LEXICON Root
#Then, for each entry, make lines of the format smelemma:firstnobtranslation # ;
#Then print the result to file.
#Then make xfst read it with the command read lexc.
# The trick is that only the first <t node may be chosen, there may be several.
# Target to create a fst transducer picking just the first translation of each
# lemma. And while we're at it, we invert it as well.
# Create the smenob.fst-file
$(SN_FST): $(SN_LEXC)
@echo
$(BEGIN)
@echo
@printf "read lexc < $(BIN)/$< \n\
save $(BIN)/ismenob.fst \n\
invert net \n\
save $(BIN)/$@ \n\
quit \n" > ../tmp/smenob-save-script
$(XFST) < ../tmp/smenob-save-script
@rm -rf ../tmp/smenob-save-script
@echo
$(END)
@echo
# # fst for the Sámi words in the dictionary
# # Pseudocode:
# # Pick the lemmas, and print them to list
# # Read the list into xfst
# # Save as an automaton.
# # The perlscript for glossing should use smenob.lexc or something similar.
# #
# # Create the smedic.fst-file
$(S_FST): $(S_DIC)
@echo
$(BEGIN)
@echo
@printf "read text < $(BIN)/$< \n\
save stack $> \n\
quit \n" > ../tmp/smedic-save-script
$(XFST) < ../tmp/smedic-save-script
@rm -f ../tmp/smedic-save-script
@rm -f $(BIN)/$<
# # Create the sme.dic file from the smenob.xml-file
$(S_DIC): $(SN_XML)
@echo
$(BEGIN)
@java $(JARF) $(XSLT) $(BIN)/$(SN_XML) $(SCRIPTS)/get-lemma.xsl > $(BIN)/$@
@echo
$(END)
@echo
# Create the lexc file from the smenob.xml-file (using XQuery 1.0)
$(SN_LEXC): $(SN_XML)
@echo
$(BEGIN)
@$(XQL) $(SCRIPTS)/smenob-pairs.xql smenob=../$(BIN)/$< > $(BIN)/$@
@echo
$(END)
@echo
# Create the lexc file from the smenob.xml-file (using XSLT 2.0, which is not as nice as XQuery)
# $(SN_LEXC): $(SN_XML)
# @echo
# $(BEGIN)
# @java $(JARF) $(XSLT) $(BIN)/$< $(SCRIPTS)/smenob-pairs.xsl > $(BIN)/$@
# @echo
# $(END)
# @echo
# Create a simple HTML file for local browsing of the whole dictionary
$(SN_HTML): $(SN_XML) $(SCRIPTS)/$(SN_XSL)
@echo
$(BEGIN)
@java $(JARF) $(XSLT) $(BIN)/$(SN_XML) $(SCRIPTS)/$(SN_XSL) > $(BIN)/$@
@echo
$(END)
@echo
# Create the smenob.xml-file by unifing the individual parts (using XQuery 1.0)
$(SN_XML):
@echo
$(BEGIN)
@$(XQL) $(SCRIPTS)/collect-smenob-parts.xql \
adj=../$(SRC)/$(ADJ) adv=../$(SRC)/$(ADV) nounc=../$(SRC)/$(NOUNC) nouna=../$(SRC)/$(NOUNA) \
noung=../$(SRC)/$(NOUNG) nounp=../$(SRC)/$(NOUNP) other=../$(SRC)/$(OTHER) adjstat=../$(SRC)/$(ADJSTAT) div=../$(SRC)/$(DIV) mwe=../$(SRC)/$(MWE) num=../$(SRC)/$(NUM) otherstat=../$(SRC)/$(OTHERSTAT) dem=../$(SRC)/$(DEM) indef=../$(SRC)/$(INDEF) pers=../$(SRC)/$(PERS) rec=../$(SRC)/$(REC) refl=../$(SRC)/$(REFL) rel=../$(SRC)/$(REL) cop=../$(SRC)/$(COP) neg=../$(SRC)/$(NEG) sup=../$(SRC)/$(SUP) verb=../$(SRC)/$(VERB) > $(BIN)/$@
@echo
$(END)
@echo
# Create the smenob.xml-file by unifing the individual parts (using XSLT 2.0, which is not as nice as XQuery)
# $(SN_XML):
# @echo
# $(BEGIN)
# @java $(JARF) $(XSLT) $(SCRIPTS)/dummy.xml $(SCRIPTS)/collect-smenob-parts.xsl \
# adj=../$(SRC)/$(ADJ) adv=../$(SRC)/$(ADV) nounc=../$(SRC)/$(NOUNC) \
# nounp=../$(SRC)/$(NOUNP) nouna=../$(SRC)/$(NOUNA) \
# noung=../$(SRC)/$(NOUNG) other=../$(SRC)/$(OTHER) verb=../$(SRC)/$(VERB) > $(BIN)/$@
# @echo
# $(END)
# @echo
# Target to make a MacOS X/Dictionary.app compatible dictionary bundle:
macdict: macdict/smenob.xml
macdict/smenob.xml: src/smenob.xml \
scripts/smenob2macdict.xsl \
scripts/add-paradigm.xsl
@echo
@echo "*** Extracting words from dictionary file. ***"
@echo
grep '<l pos' $< | \
perl -pe 's/^.*pos="([^"]+)">(.+)<.*$$/\2 \1/' | \
grep ' [nav]$$' | grep -v '^-' | sort -u \
> $(SMETESTING)/dictwords.txt
@echo
@echo "*** Generating paradigms. ***"
@echo
cd $(SMETESTING) && ./gen-paradigms.sh dictwords.txt
@echo
@echo "*** Building smenob.xml for MacOS X Dictionary ***"
@echo
java -Xmx1024m \
org.apache.xalan.xslt.Process \
-in $< \
-out $@.tmp \
-xsl scripts/add-paradigm.xsl \
-param gtpath $(SMETESTING)
@xsltproc scripts/smenob2macdict.xsl $@.tmp > $@
rm -f $@.tmp
@cd deliv/macdict && make
# Target to upload a MacOSX dictionary module
upload-macdict:
@echo
@echo "*** TAR-ing and ZIP-ing smenob Mac dictionary ***"
@echo
tar -czf $(SMENOBZIP) "$(SMENOBMAC)/$(SMENOBNAME)"
@echo
@echo "*** Uploading smenob Mac dictionary to www.divvun.no ***"
@echo
@scp $(SMENOBZIP) $(UPLOADDIR)/static_files/$(DATE)-$(SMENOBZIP)
@$(SSH) sd@divvun.no \
"cd staticfiles/ && ln -sf $(DATE)-$(SMENOBZIP) $(SMENOBZIP)"
@echo
@echo "*** New zip file for smenob Mac dictionary now available at: ***"
@echo
@echo "$(DOWNLOADDIR)/$(DATE)-$(SMENOBZIP)"
@echo
@echo "*** Permlink to newest version is always: ***"
@echo
@echo "$(DOWNLOADDIR)/$(SMENOBZIP)"
@echo
clean:
rm -f $(BIN)/*fst $(BIN)/*dic $(BIN)/*lexc $(BIN)/*list $(BIN)/*html $(BIN)/*xml $(BIN)/*txt