-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathxls2lua.py
452 lines (397 loc) · 14.8 KB
/
xls2lua.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
# coding:utf-8
import sys
import xlrd
import codecs
reload(sys)
sys.setdefaultencoding( "utf-8" )
type = sys.getfilesystemencoding()
# 定义符号
type_array = 'array'
type_dict = 'dict'
type_int = 'int'
type_number = 'number'
type_bool = 'bool'
type_string = 'string'
type_function = 'function'
type_table = 'table'
# 定义简单类型
simple_type_def = {
type_int: True,
type_number: True,
type_bool: True,
type_string: True,
type_table: True,
}
# 定义复合类型
complex_token_def = {
type_array: ('array<', '>'),
type_dict: ('dict<', '>'),
}
# 判断是否简单类型
def is_simple_type(ty):
return ty in simple_type_def
# 将整数转换为excel的列
def to_xls_col(num):
a = ord('A')
result = ''
tmp = num
while True:
factor = tmp / 26
remain = tmp % 26
tmp = factor
result = chr(remain + a) + result
if factor == 0:
break
return result
# 输出解析错误信息并退出程序
def parse_error(msg):
print '[parser error]%s' % msg.decode('utf-8').encode(type)
exit(1)
# 输出求值错误信息并退出程序
def eval_error(msg):
print '[eval error]%s' % msg.decode('utf-8').encode(type)
exit(2)
# 为字符串内容添加双引号
def add_quout(text):
out = ''
for c in text:
if c == '"':
out += '\\"'
elif c == '\\':
out += '\\\\'
elif c == '\n':
out += '\\n'
elif c == '\r':
out += '\\r'
else:
out += c
return '"' + out + '"'
# 表头符号,每个符号由类型和名字组成,用于构造表格数据结构
class Token:
def __init__(self, option, decl_type, name, col, sheet_name):
self.option = option
self.decl_type = decl_type
self.name = name
self.col = col
self.sheet_name = sheet_name
def __str__(self):
if self.decl_type == '':
return self.name
else:
return '%s:%s' % (self.name, self.decl_type)
# 节点解析器,由表头定义构造,根据行数据求值
class NodeParser:
def __init__(self, node_type, token):
self.type = node_type
self.members = []
self.begin_col = -1
self.end_col = -1
self.required = None # 该节点必须有值,对于容器类型则表示全部子项为空时仍会输出容器节点本身
self.unique = None
self.token = token
self.eval_table = {} # 求值存档
self.unique_check = {} # 重复检查
if token is not None:
self.begin_col = token.col
self.end_col = token.col
self.required = (token.option == 'required')
self.unique = (token.option == 'unique')
# 记录塌陷子节点
self.flat_fields = {}
# 添加元素项,数组类型name为None
def add_member(self, name, pt):
self.members.append((name, pt))
def get_member(self, name):
for key, value in self.members:
if key == name:
return value
return None
def is_all_member_nil(self, row, row_cells):
for _, m in self.members:
if m.eval(row, row_cells) != 'nil':
return False
return True
def eval(self, coord, row_data):
if coord in self.eval_table:
return self.eval_table[coord]
eval_str = 'nil'
# 单列类型空值处理
if (self.type != type_array and self.type != type_dict) and row_data[self.begin_col].value == '':
eval_str = 'nil'
else:
if self.type == type_int:
val = row_data[self.begin_col].value
i = int(val)
eval_str = str(i)
elif self.type == type_string:
val = row_data[self.begin_col].value
if val == '':
eval_str = 'nil'
else:
eval_str = add_quout(val)
elif self.type == type_number:
val = row_data[self.begin_col].value
eval_str = str('%g' % val)
elif self.type == type_bool:
val = row_data[self.begin_col].value
lower_str = str(val).lower()
if lower_str == '0' or lower_str == 'false':
eval_str = 'false'
else:
eval_str = 'true'
elif self.type == type_table:
val = row_data[self.begin_col].value
eval_str = '{' + val + '}'
elif self.type == type_function:
val = row_data[self.begin_col].value
eval_str = '%s return %s end' % (self.token.decl_type, val)
elif self.type == type_array:
all_member_nil = self.is_all_member_nil(coord, row_data)
# 全部子项为空,且该节点为可选类型,则不输出
if all_member_nil and not self.required:
eval_str = 'nil'
elif all_member_nil:
eval_str = '{}'
else:
eval_str = '{'
mcount = len(self.members)
for i in range(mcount):
(_, m) = self.members[i]
eval_str += m.eval(coord, row_data)
eval_str += ','
if eval_str.endswith(','):
eval_str = eval_str[:-1]
eval_str += '}'
elif self.type == type_dict:
all_member_nil = self.is_all_member_nil(coord, row_data)
# 全部子项为空,且该节点为可选类型,则不输出
if all_member_nil and not self.required:
eval_str = 'nil'
elif all_member_nil:
eval_str = '{}'
else:
eval_str = '{'
mcount = len(self.members)
for i in range(mcount):
(key, m) = self.members[i]
meval = m.eval(coord, row_data)
if meval != 'nil':
eval_str += '%s=%s' % (key, meval)
eval_str += ','
if eval_str.endswith(','):
eval_str = eval_str[:-1]
eval_str += '}'
else:
eval_error('无法求值列%s未知类型%s' % (to_xls_col(self.begin_col), str(self.type)))
# 空值检查
if self.required and eval_str == 'nil':
eval_error('列%s项%s类型节点不能为空' % (to_xls_col(self.begin_col), str(self.type)))
# 重复赋值检查
if self.unique:
if eval_str in self.unique_check:
exist_coord = self.unique_check[eval_str]
eval_error('列%s项%s表%s第%d行与表%s第%d行重复赋值' %
(to_xls_col(self.begin_col), str(self.token.name), str(coord[0]), coord[1], str(exist_coord[0]), exist_coord[1]))
self.unique_check[eval_str] = coord
self.eval_table[coord] = eval_str
return eval_str
# 塌陷解析树,将所有子节点平铺
def flat(self):
self.flat_fields = {}
self.__flat__(None, self.flat_fields)
def __flat__(self, prefix_path, flat_paths):
for name, m in self.members:
path = name
if prefix_path is not None:
path = prefix_path + '.' + name
flat_paths[path] = m
m.__flat__(path, flat_paths)
m.flat()
# 解析器
class SheetParser:
def __init__(self, ts):
self.tokens = ts
self.pos = 0
# 配置符合类型对应的解析函数
self.parser_cfg = {
type_array: self.parse_array_node,
type_dict: self.parse_dict_node,
}
root_node = NodeParser(type_dict, None)
root_node.begin_col = self.cur_token().col
self.parse_dict_node(root_node)
root_node.end_col = self.tokens[self.pos-1].col
self.root_parser = root_node
root_node.flat()
# 取得当前读取位置的符号
def cur_token(self):
if self.pos >= len(self.tokens):
return None
return self.tokens[self.pos]
# 读取头前进
def skip_token(self):
self.pos += 1
# 解析简单类型
def parse_simple_node(self):
cur = self.cur_token()
pt = NodeParser(cur.decl_type, cur)
pt.begin_col = cur.col
pt.end_col = cur.col
return pt
# 解析复合类型
def parse_complex_node(self, pt):
cur = self.cur_token()
for k, v in complex_token_def.items():
if v[0] == cur.decl_type:
value = NodeParser(k, cur)
value.begin_col = cur.col
self.skip_token()
self.parser_cfg[k](value)
return value
parse_error('解析列%d的%s类型元素时,遇到了位于列%d的意外的符号\'%s\'' % (pt.begin_col, pt.type, cur.col, str(cur)))
return None
# 解析数组类型
def parse_array_node(self, arr_node):
cur = self.cur_token()
(_, end_token) = complex_token_def[type_array]
while cur is not None:
if cur.decl_type == end_token:
arr_node.end_col = cur.col
break
if cur.decl_type == '':
parse_error('列%d项%s没有声明类型' % (cur.col, str(cur.name)))
break
elif is_simple_type(cur.decl_type):
value = NodeParser(cur.decl_type, cur)
self.skip_token()
elif cur.decl_type.startswith(type_function):
value = NodeParser(type_function, cur)
self.skip_token()
else:
value = self.parse_complex_node(arr_node)
self.skip_token()
if value is not None:
arr_node.add_member(cur.name, value)
cur = self.cur_token()
# 解析字典类型
def parse_dict_node(self, dict_node):
cur = self.cur_token()
(_, end_token) = complex_token_def[type_dict]
while cur is not None:
if cur.decl_type == end_token:
dict_node.end_col = cur.col
break
if cur.name == '':
parse_error('解析列%d的%s类型元素时,遇到位于列%d的元素缺少变量名' % (dict_node.begin_col, dict_node.type, cur.col))
break
if cur.decl_type == '':
parse_error('列%d项%s没有声明类型' % (cur.col, str(cur.name)))
break
elif is_simple_type(cur.decl_type):
value = NodeParser(cur.decl_type, cur)
self.skip_token()
elif cur.decl_type.startswith(type_function):
value = NodeParser(type_function, cur)
self.skip_token()
else:
value = self.parse_complex_node(dict_node)
self.skip_token()
if value is not None:
# 路径变量需要解析取得操作对象
path_id = cur.name.split('.')
path_len = len(path_id)
if path_len > 1:
last_node = dict_node
for i in range(path_len-1):
key = path_id[i]
last_node = last_node.get_member(key)
if last_node is None or last_node.type != type_dict:
parse_error('path_id(%s) is invalid' % cur.name)
break
last_node.add_member(path_id[path_len-1], value)
else:
dict_node.add_member(cur.name, value)
cur = self.cur_token()
# 读取xls文件内容,并过滤注释行
def read_sheets_from_xls(file_path):
workbook = xlrd.open_workbook(file_path)
sheets = []
for sheet in workbook.sheets():
if sheet.ncols <= 0:
continue
cells = []
for y in range(0, sheet.nrows):
# 过滤全空白行
all_empty = True
for v in sheet.row_values(y):
if v != '':
all_empty = False
break
if all_empty:
continue
text = sheet.cell_value(y, 0)
# 过滤注释行
if isinstance(text, unicode) and text.startswith('//'):
continue
cells.append(sheet.row(y))
if len(cells) > 0:
sheets.append((sheet.name, cells))
return sheets
def build_parser_tree(sheet_name, sheet_cells):
ts = []
for col in range(len(sheet_cells[0])):
option = sheet_cells[0][col].value
decl_type = sheet_cells[1][col].value
name = sheet_cells[2][col].value
t = Token(option, decl_type, name, col, sheet_name)
ts.append(t)
sp = SheetParser(ts)
return sp.root_parser
def parser_check(from_sheet_name, from_ps, to_sheet_name, to_ps):
for path, m in from_ps.flat_fields.items():
if not m.unique:
continue
if path not in to_ps.flat_fields:
parse_error('unique field %s not define in sheet %s' % (path, to_sheet_name))
else:
unique_node = to_ps.flat_fields[path]
for eval_str in unique_node.unique_check.keys():
if eval_str in m.unique_check:
eval_error('sheet %s unique field %s val=%s is dup to sheet %s' %
(from_sheet_name, path, eval_str, to_sheet_name))
def xls2lua(file_path, out_file_path):
sheets = read_sheets_from_xls(file_path) # 过滤注释行
exist_parsers = {}
# 求值
out = '{\n'
for sheet_name, cells in sheets:
parser = build_parser_tree(sheet_name, cells)
exist_parsers[sheet_name] = parser
_, key_node = parser.members[0] # 约定第一项为key
out += '--%s\n' % sheet_name
for row in range(3, len(cells)):
row_data = cells[row]
coord = (sheet_name, row)
out += '[%s]=%s,\n' % (key_node.eval(coord, row_data), parser.eval(coord, row_data))
out += '}'
# 交叉检查
for from_name, from_ps in exist_parsers.items():
for to_name, to_ps in exist_parsers.items():
if from_ps == to_ps:
continue
parser_check(from_name, from_ps, to_name, to_ps)
parser_check(to_name, to_ps, from_name, from_ps)
# 输出文件
with codecs.open(out_file_path, "w+", "utf-8") as f:
f.write(out)
def main(argv):
argc = len(argv)
if argc < 3:
print 'usage: xls2lua.py <input_file> <output_file>'
exit(3)
else:
input_file = argv[1]
output_file = argv[2]
xls2lua(input_file, output_file)
if __name__ == '__main__':
main(sys.argv)