-
Notifications
You must be signed in to change notification settings - Fork 1
/
compile-db-gen.py
executable file
·478 lines (408 loc) · 16.3 KB
/
compile-db-gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import argparse
import copy
import json
import os
import re
import subprocess
import sys
include_file = []
include_dir = []
exclude_file = []
exclude_dir = []
def compiler_call(executable):
""" A predicate to decide the entry is a compiler call or not. """
compilers = [
re.compile(r'^([^/]*/)*([^-]*-)*c(c|\+\+)$'),
re.compile(r'^([^/]*/)*([^-]*-)*g(cc|\+\+)(-\d+(\.\d+){0,2})?$'),
re.compile(r'^([^/]*/)*([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'),
re.compile(r'^([^/]*/)*llvm-g(cc|\+\+)$'),
]
return any((pattern.match(executable) for pattern in compilers))
def is_source_file(filename):
""" A predicate to decide the filename is a source file or not. """
accepted = {
'.c', '.C', '.cc', '.CC', '.cxx', '.cp', '.cpp', '.c++', '.m', '.mm',
'.i', '.ii', '.mii'
}
_, ext = os.path.splitext(filename)
return ext in accepted
def shell_quote(arg):
'''Quote the shell arguments'''
table = {'\\': '\\\\', '"': '\\"', "'": "\\'"}
return ''.join([table.get(c, c) for c in arg])
def shell_escape(arg):
""" Create a single string from list.
The major challenge, to deal with white spaces. Which are used by
the shell as separator. (Eg.: -D_KEY="Value with spaces") """
# rtags have bug to deal "-D_KEY=\"V S\"", it only support -D_KEY="\"V S\""
res = re.search(r'([^\'\"\\]+)([\'\"\\].*)', arg)
if res:
return '%s"%s"' % (res.group(1), shell_quote(res.group(2)))
return arg
def join_command(args):
"""Join the command with escaped options."""
return ' '.join([shell_escape(arg) for arg in args])
g_sys_inc = {}
def get_sys_inc(compiler):
"""return a list of compiler system include dir."""
if compiler in g_sys_inc:
return g_sys_inc[compiler]
lang = "c"
if re.compile(r"\+\+|pp$").findall(compiler):
lang = "c++"
p = subprocess.Popen([compiler, "-x", lang, "-E", "-v", "-"],
stderr=subprocess.PIPE,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
info = p.communicate(input='')[1].decode('utf-8')
raw_inc = re.compile(r"^.*starts here:((?:.|\n)*?)End of search list.",
re.MULTILINE).findall(info)
if len(raw_inc) > 0:
incs = re.compile("/.*$", re.MULTILINE).findall(raw_inc[0])
g_sys_inc[compiler] = ["-I%s" % x for x in incs]
return g_sys_inc[compiler]
class OType:
EXIT = 1
CHDIR = 2
EXEC = 3
chdir_re = re.compile(r"^(\d+) +chdir\((.*)(\)\s+= 0|<unfinished \.\.\.>)")
exec_re = re.compile(r"^(\d+) +execve\((.*)(\)\s*= 0|<unfinished \.\.\.>)")
exit_re = re.compile(r"^(\d+) +\+\+\+ (?:exited with|killed by) ")
fork_re = re.compile(r"^(\d+) +v?fork\((?:.+?\) += (\d+)| <unfinished \.\.\.>)$")
fork_resumed_re = re.compile(r"^(\d+) +<\.\.\. v?fork resumed>\) += (\d+)$")
clone_re = re.compile(r"^(\d+) +clone3?\((?:.+?\) = (\d+)| <unfinished \.\.\.>)$")
child_re = re.compile(r"^(\d+).+?, child_tidptr=.*\) += (\d+)$")
ccache_re = re.compile(r'^([^/]*/)*([^-]*-)*ccache(-\d+(\.\d+){0,2})?$')
parent = {}
def genlineobjs(fname):
"""Parse the lines into objects."""
obj_list = []
unfinished_fork = False
with open(fname, 'r') as fd: # pre process to sensitive objects
linum = 0 # most editor line begin with 1
for line in fd:
linum += 1
m = chdir_re.match(line)
if m: # chdir, record this
pid = m.group(1)
wdir = eval(m.group(2))
obj_list.append({
'line': linum,
'type': OType.CHDIR,
'pid': pid,
'wd': wdir})
# print (pid + " chdir:" + proc_run[pid]["cwd"])
continue
m = fork_re.match(line)
if m:
pid, cid = m.group(1, 2)
if cid is not None:
parent[cid] = pid
else:
unfinished_fork = True
continue
m = fork_resumed_re.match(line)
if m:
assert unfinished_fork
pid, cid = m.group(1, 2)
parent[cid] = pid
unfinished_fork = False
continue
m = clone_re.match(line)
if m:
pid, cid = m.group(1, 2)
if cid is not None:
parent[cid] = pid
else:
unfinished_fork = True
continue
m = child_re.match(line)
if m:
if unfinished_fork:
unfinished_fork = False
pid, cid = m.group(1, 2)
parent[cid] = pid
continue
m = exit_re.match(line)
if m:
pid = m.group(1)
obj_list.append({
'line': linum,
'type': OType.EXIT,
'pid': pid})
continue
m = exec_re.match(line)
if m: # execve, get the compiler
pid = m.group(1)
# for strace <=4.11, format:
# 012 execve("PATH", ["E", "..."], [/* N vars */]) = 0
# for strace 2018, format:
# 012 execve("PATH", ["E", "..."], 0xM /* N vars */) = 0
# remove the tail of execve()
line = re.sub(r", \[/\* [^*]+ \*/\]", "", m.group(2))
line = re.sub(r', 0x[^\)]+', '', line)
(pname, command) = eval(line)
obj_list.append({
"line": linum,
"type": OType.EXEC,
"pid": pid,
"pname": pname,
"command": command
})
assert not unfinished_fork
return obj_list
def parse_exec_trace(fname, ppwd, proc_run):
"""Construct the compile tree, and the key is pid, the node contain
proc_run[pid] = {
'cwd': '', # the last chdir, the child process depend on this value
'child': [], # the child node
'pname': '' # program name
'command': '' # the commands
}"""
obj_list = genlineobjs(fname)
itr = iter(obj_list)
while True:
item = None
try:
item = next(itr)
except StopIteration:
break
if item['type'] == OType.EXIT:
# the child process end, move it to it's parent
cid = item['pid']
if cid in proc_run:
child_item = proc_run[cid]
del proc_run[cid] # remove from 'running' process list
pid = parent.get(cid)
if pid not in proc_run:
# this process end, append it to its parent
proc_run[pid] = {"cwd": ppwd,
"child": [],
"pname": "",
"command": ""}
proc_run[pid]["child"].append({cid: child_item})
continue
pid = item['pid']
if pid not in proc_run:
# first ocurr in the lines, it's new child process, get the dir
# try to find the child end log to get its parent
ppid = parent.get(pid)
assert ppid is not None or not proc_run, f"{ppid} {pid} {proc_run} {parent}"
cwd = proc_run[ppid]['cwd'] if ppid in proc_run else ppwd
proc_run[pid] = {"cwd": cwd,
"child": [],
"pname": "",
"command": ""}
pobj = proc_run[pid]
if item['type'] == OType.EXEC:
# execve, add to the running process
pobj['pname'] = item['pname']
pobj['command'] = item['command']
continue
if item['type'] == OType.CHDIR: # chdir, record this
pobj["cwd"] = os.path.join(pobj["cwd"], item['wd'])
# print(pid + " chdir:" + pobj["cwd"])
continue
def print_exec_trace(proc_run, proc_res, auto_sys_inc=False):
"""Print the execute trace in compile data json format."""
for pid, item in proc_run.items():
# process the child first, get the reverse results
for child in item["child"]:
print_exec_trace(child, proc_res, auto_sys_inc)
pname, command = item['pname'], item['command']
if ccache_re.match(pname) is not None:
# for "ccache", drop first slot (which is "ccache")
pname = command[1]
del command[0]
if ccache_re.match(pname) or compiler_call(pname):
if len(command) >= 2 and command[1] == "-cc1":
# ignore the "clang -cc1 ..." call
continue
if any([x in ['-M', '-MM'] for x in command]):
# ignore the -Mx param, which will fork a child to compile
continue
sys_inc = []
if auto_sys_inc:
sys_inc = get_sys_inc(command[0])
cmd_files = []
cmd_opts = []
for opt in command: # make item for each
if is_source_file(opt):
if ((len(include_file) > 0
and not any((r.search(opt) for r in include_file)))
or (len(exclude_file) > 0
and any((r.search(opt) for r in exclude_file)))
or (len(include_dir) > 0
and not any((r.search(opt) for r in include_dir)))
or (len(exclude_dir) > 0
and any((r.search(opt) for r in exclude_dir)))):
continue
cmd_files.append(opt)
else:
cmd_opts.append(opt)
for fname in cmd_files:
cmds = join_command(cmd_opts + [fname] + sys_inc)
jstr = shell_quote(cmds)
cmd = {"directory": item["cwd"],
"command": jstr,
"file": fname}
proc_res.append(cmd)
def trace(args):
"""Trace the compile command and get the raw compile log."""
# request strace-4.8 or higher
proc = subprocess.Popen(["strace", "-V"], stdout=subprocess.PIPE)
proc.wait()
s_ver = proc.stdout.read().decode('utf-8')
# for Ubuntu 18.04, the ver string is "version UNKNOWN"
m_ver = re.match(r'strace -- version (\d+)\.(\d+)', s_ver)
if m_ver:
major = int(m_ver.group(1))
if major < 4 or (major == 4 and int(m_ver.group(2)) < 8):
print("strace version should high than 4.8")
print("Current:" + s_ver)
sys.exit(1)
proc = subprocess.Popen(["getconf", "ARG_MAX"], stdout=subprocess.PIPE)
proc.wait()
arg_max = str(int(proc.stdout.readline()))
command = [
"strace", "-z", "-f", "-s" + arg_max, "-etrace=%process,chdir", "-o",
args.output
]
command += args.command
# TBD: the output of stdin/stderr maybe very large, hangup happend when try
# to grabe them, refer the manual of .wait() for detail.
proc = subprocess.Popen(command)
proc.wait()
return proc.returncode
def parse(args):
"""Parse the output from trace and generate the compile_commands.json."""
proc_run = {}
fname = args.raw_database
cwd = os.path.abspath(args.startup_dir)
parse_exec_trace(fname, cwd, proc_run)
ofs = sys.stdout
if args.output != "" and args.output != "-":
ofs = open(args.output, "w")
for i in args.include:
include_file.append(re.compile(i))
for i in args.exclude:
exclude_file.append(re.compile(i))
for i in args.include_dir:
include_dir.append(re.compile(i))
for i in args.exclude_dir:
exclude_dir.append(re.compile(i))
proc_res = []
print_exec_trace(proc_run, proc_res, args.auto_sys_inc)
json.dump(proc_res, ofs, indent=4)
def run(args):
"""run the build command and generate the compilation database."""
raw_database = "./compile_commands.raw"
output = args.output
args.output = raw_database
if trace(args) == 0:
args.output = output # restore the value
args.raw_database = raw_database
parse(args)
def add_common_opts_parse(s):
"""add the opts for subcommand "parse" """
s.add_argument("--startup-dir",
"-s",
default='.',
help="the startup directory")
s.add_argument("--auto-sys-inc",
"-a",
default=True,
action="store_true",
help="auto detect the system include path")
s.add_argument("--no-auto-sys-inc",
"-A",
action="store_false",
dest="auto_sys_inc",
help="don't auto detect the system include path")
s.add_argument("--include",
"-i",
metavar="REGEX",
default=[],
action="append",
help="include the file parten")
s.add_argument("--exclude",
"-e",
metavar="REGEX",
default=[],
action="append",
help="exclude the file patten")
s.add_argument("--include-dir",
"-I",
metavar="REGEX",
default=[],
action="append",
help="include the dir parten")
s.add_argument("--exclude-dir",
"-E",
metavar="REGEX",
default=[],
action="append",
help="exclude the dir patten")
def add_common_opts_trace(parser):
"""add the opts for subcommand "trace" """
parser.add_argument("command",
metavar="COMMAND",
nargs=argparse.REMAINDER,
help="build command line")
def main():
"The main function"
parser = argparse.ArgumentParser(
description="Generate the compile database from build")
subparsers = parser.add_subparsers(metavar="SUBCOMMAND")
# run the compile command and generate the JSON compilation database
parser_run = subparsers.add_parser(
"run",
help="(Default) trace build command, and parse result ",
description="Create a compilation database by tracing build command.")
add_common_opts_parse(parser_run)
add_common_opts_trace(parser_run)
parser_run.add_argument("--output",
"-o",
default="./compile_commands.json",
help="the strace output file")
parser_run.set_defaults(fun=run)
# trace
parser_trace = subparsers.add_parser(
"trace",
help="trace build command",
description="Create a compilation database by tracing build command.")
parser_trace.add_argument("--output",
"-o",
default="./compile_commands.raw",
help="the strace output file")
add_common_opts_trace(parser_trace)
parser_trace.set_defaults(fun=trace)
# parse
parser_parse = subparsers.add_parser(
"parse",
help="parse the strace file",
description="Create compilation database from the tracking log.")
add_common_opts_parse(parser_parse)
parser_parse.add_argument("raw_database",
default="./compile_commands.raw",
nargs='?',
help="the raw database from strace")
parser_parse.add_argument("output",
default="./compile_commands.json",
nargs='?',
help="the output compilor database")
parser_parse.set_defaults(fun=parse)
# no subcommand in argv, set the 'run' as default
if len(sys.argv) >= 2:
if not any(['-h' in sys.argv,
'--help' in sys.argv,
sys.argv[1] in ['trace', 'parse', 'run']]):
sys.argv.insert(1, 'run')
else: # len(sys.argv) == 1
sys.argv.insert(1, "-h")
args = parser.parse_args()
return args.fun(args)
if __name__ == "__main__":
sys.exit(main())