-
Notifications
You must be signed in to change notification settings - Fork 0
/
_TranslateXML.lua
381 lines (326 loc) · 12 KB
/
_TranslateXML.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
--[[
A system for translating from one XML format to another.
This is done by building an array that describes how to process the source format.
The processing of an element can automatically:
Build a new element, with a possibly different name from the original.
Build new attributes for that new element, with possibly different names.
Process child elements, based on matches from a table.
Call a processing function that can do special-case work for the element.
Skip creating an element and simply process the child elements.
The main function which processes a file takes an array of "processors". Each processor
is a table that is defined as follows:
{
--An element name string or a general function(node) that returns
--`true` or `false`, depending on whether it matches.
--Test matching is always in the order specified in the outer array; the first
--matching test wins the right to process the element.
test = "Input_Element_Name",
--When present, says that all elements that match this `test`
--will be processed together, with a single begin/end.
--The order between elements that match will be preserved.
--By default, all elements that match the `test` will be processed
--in sequence before processing the next element that *doesn't* match
--the test.
collate =
{
--If true, then the collation will only collate consecutive runs of that
--matched element. That is, if you have 3 <name>s followed by a <type>
--followed by 2 <name>s, if the <name> parser uses `consecutive`,
--then it will invoke the processor for the 3 consecutive <name>s, then
--process the <type>, then process the 2 consecutive <name>s.
--So if you use `start` and `stop` elements, you can wrap runs of
--such elements.
consecutive = true,
--Normally, when building a collation, all other processors at this level
--are checked against each node. This means that if earlier tests
--match nodes in `test`, then they will be processed by those
--processors instead of the collation.
--By setting `consume`, once we find a node that matches
--`test`, then the only thing that determines whether subsequent
--nodes go into the collation is whether they match the `consume`
--critera.
--The `consume` value may be an element name string or
--a general function(node) that returns `true` or `false`,
--depending on whether it matches.
consume = "Consume_Element_Name",
--If present, then processing elements that match this test will take place
--after processing all non-deferred elements. The number provided by this
--property is a priority. Deferred processing will happen
--in the order of the priorities, from highest to lowest.
--If you use both `defer` and `consecutive`, an assert will trigger.
defer = 23,
--Creates the named element before processing any nodes in the collation group,
--which will be the parent of all nodes in the group.
--May also be a function(writer), which can do arbitrary writing.
--Optional.
start = "Element-To-Wrap",
--If `start` was a function, then `stop` will be called after all processing.
stop = function(writer) end,
--Normally, when doing collation, each individual element in the
--collation is processed by the `element` and `children` part of
--the rule.
--However, if this is set to `true`, then `element` processing is skipped.
--Instead, it will match the collated nodes against the `children`
--rules as though the collated nodes were all the child nodes of
--a single parent.
--This makes it possible to do collation on a group of multiple
--kinds of elements, which generate multiple kinds of elements.
--It will still do `start`/`stop` elements.
group = true,
}
--Provokes the creation of an element.
element =
{
--The name of the element being generated, or function taking the
--node and returning the element's name
name = "Element Name",
--Specifies whether formatting should be applied to
--the element's children.
verbatim = true
--Used for mapping a specific input attribute to a single output attribute.
--Multiple input attributes can map to the same output, but if that happens,
--they cannot all match simultaneously.
map_attribs =
{
--Verbatim copy of value of Input to Output
["Input Attrib Name"] = "Output Attrib Name",
--Verbatim copy of input to output of the same name.
["Input Attrib Name"] = true,
--Arbitrary processing of value of input, but only
--to write a single output attribute.
--Return value is the new attribute's name and the output value.
["Input Attrib Name"] = function(value, node)
return NewAttributeName, NewAttributeValue
end
},
--Used for creating new attributes.
attribs =
{
--Creates a new attribute from a given, unchanging value.
["Output Attrib Name"] = "attribute value",
--Creates a new attribute by calling a function.
--If the function returns `nil`, the attribute will not be written.
["Output Attrib Name"] = function(node)
end
},
--After writing any attributes, performs arbitrary writing.
--You may write attributes here, but obviously before any elements.
--Do not PopElement more times than you PushElement
--within this function.
proc = function(writer, node)
end,
},
--Processes child nodes of this one.
--If there is no `element`, then any elements created here will be children
--of whatever came before.
--If `element` exists, then processing children will happen *after*
--`element.proc` (if any), which happens after `element.attribs` (if any). But
--it will still happen within the element created by `element`.
children =
{
},
}
]]
local funcs = {}
local TranslateXML
local function Process(writer, node, proc)
if(proc.element) then
local element = proc.element
local name = element.name
if(type(name) == "function") then
name = name(node)
end
assert(name, proc.test)
local verbatim = element.verbatim
writer:PushElement(name, nil, verbatim)
--Don't map attributes for nodes that don't have any.
if(element.map_attribs and node.attr) then
for attrib, map in pairs(element.map_attribs) do
local outname, value
if(type(map) == "string") then
outname, value = map, node.attr[attrib]
elseif(map == true) then
outname, value = attrib, node.attr[attrib]
elseif(node.attr[attrib]) then
--No mapping if input attribute doesn't exist.
outname, value = map(node.attr[attrib], node)
end
--Don't write nils
if(value ~= nil) then
if(type(value) ~= "string") then
value = tostring(value)
end
writer:AddAttribute(outname, value)
end
end
end
if(element.attribs) then
for attrib, value in pairs(element.attribs) do
if(type(value) == "function") then
value = value(node)
end
if(value ~= nil) then
if(type(value) ~= "string") then
value = tostring(value)
end
writer:AddAttribute(attrib, value)
end
end
end
if(element.proc) then
element.proc(writer, node)
end
end
if(proc.children) then
TranslateXML(writer, node.kids, proc.children)
end
if(proc.element) then
writer:PopElement()
end
end
local function ShouldProcess(node, proc, test)
test = test or proc.test
--Process by match with node name.
if(type(test) == "string") then
if(node.type == "element" and node.name == test) then
return true
end
else
--Function call to test.
if(test(node)) then
return true
end
end
return false
end
local function ProcessCollation(writer, proc, node_arr)
--Deferment may have special start/stop needs.
local start_type = type(proc.collate.start)
if(start_type == "string") then
writer:PushElement(proc.collate.start)
elseif(start_type == "function") then
assert(proc.collate.stop)
proc.collate.start(writer)
end
if(proc.collate.group == true) then
TranslateXML(writer, node_arr, proc.children)
else
for _, node in ipairs(node_arr) do
Process(writer, node, proc)
end
end
if(start_type == "string") then
writer:PopElement()
elseif(start_type == "function") then
proc.collate.stop(writer)
end
end
--Searches through `node_arr`, starting from `start_node_ix` + 1,
--with `start_node_ix` pointing to a matching node..
--For each node, checks `procs`. If all `procs` before `test_ix` test
--negative and procs[test_ix]` tests positive, then the node is added to an array.
--If the node doesn't match any processor, then it is ignored.
--If `consecutive` is true, then will return the array when the first matched node is
--found which doesn't match with `procs[test_ix]`.
--Otherwise, returns all such elements.
--Returns an array and the number of elements to skip.
local function AccumulateCollation(start_node_ix, node_arr, test_ix, procs, consecutive)
local collate_proc = procs[test_ix]
local consume = collate_proc.collate.consume
--`start_node_ix` is assumed to point to the first matching node. So keep it.
local arr = { node_arr[start_node_ix] }
local num_elements = 1
for node_ix = start_node_ix + 1, #node_arr do
local node = node_arr[node_ix]
local found = false
if(consume) then
--In consume mode, the consumer determines all nodes to process.
if(ShouldProcess(node, collate_proc, consume)) then
arr[#arr + 1] = node
else
if(consecutive) then
break
end
end
else
for proc_ix = 1, #procs do
local proc = procs[proc_ix]
if(ShouldProcess(node, proc)) then
found = proc
break
end
end
--Ignore unprocessed nodes.
if(found) then
if(found == collate_proc) then
--Valid match, add to list
arr[#arr + 1] = node
elseif(consecutive) then
--We don't break if none matched, since
--unprocessed nodes don't count.
break
end
end
end
num_elements = num_elements + 1
end
return arr, num_elements
end
--Call this with `doc.kids`
TranslateXML = function(writer, node_arr, procs)
--Array of proc for deferments, and
--also a map from proc to array of nodes to be processed
local deferments = {}
--If a processor matches and is in this list, then we should skip the node.
--It has already been processed for collation.
local skips = {}
local node_ix, node_len = 1, #node_arr
while(node_ix <= node_len) do
local node = node_arr[node_ix]
for proc_ix, proc in ipairs(procs) do
if(ShouldProcess(node, proc)) then
--Did we already process it?
if(not skips[proc]) then
if(proc.collate) then
local collate = proc.collate
assert(not(collate.defer and collate.consecutive))
if(collate.defer) then
assert(type(collate.defer) == "number")
--Store node for later processing.
if(deferments[proc]) then
table.insert(deferments[proc], node)
else
--New deferment. Array of 1
deferments[proc] = { node }
deferments[#deferments + 1] = proc
end
elseif(collate.consecutive) then
local nodes, skip_count = AccumulateCollation(
node_ix, node_arr, proc_ix, procs, true)
ProcessCollation(writer, proc, nodes)
--Skip these nodes.
node_ix = node_ix + skip_count - 1
else
local nodes = AccumulateCollation(
node_ix, node_arr, proc_ix, procs)
ProcessCollation(writer, proc, nodes)
skips[proc] = true
end
else
--Regular processing.
Process(writer, node, proc)
end
end
break --Process it.
end
end
node_ix = node_ix + 1
end
--Process deferments, in order highest-to-lowest
table.sort(deferments, function(lhs, rhs) return lhs.collate.defer > rhs.collate.defer end)
for _, proc in ipairs(deferments) do
ProcessCollation(writer, proc, deferments[proc])
end
end
funcs.TranslateXML = TranslateXML
return funcs