10
10
from typing import Dict , List , Optional , Tuple
11
11
import json
12
12
import re
13
- import struct
14
13
import subprocess
15
14
import html
16
15
17
16
from tree_sitter import Language , Parser , Query
17
+ import tree_sitter_c
18
18
import emoji
19
19
20
20
from binaryninja import *
21
21
22
22
23
- # directory path to the current script
24
- CURRENT_DIR = Path (__file__ ).parent
25
-
26
- # tree-sitter files
27
- TREE_SITTER_C = CURRENT_DIR / "tree-sitter-c"
28
- TREE_SITTER_LIB = CURRENT_DIR / "build" / "tree-sitter-c.so"
29
-
30
23
# query to search tree-sitter's syntax tree for illegal identifier annotations
31
24
FUNC_ANNOT_QUERY_STR = """
32
25
(function_definition
@@ -249,15 +242,9 @@ def __init__(self, bv):
249
242
self .bv , disas_settings
250
243
)
251
244
252
- # initialize tree-sitter
253
- if not TREE_SITTER_LIB .is_file ():
254
- Language .build_library (str (TREE_SITTER_LIB ), [str (TREE_SITTER_C )])
255
- if not TREE_SITTER_LIB .is_file ():
256
- raise Exception ("Failed to build tree-sitter lib" )
257
-
258
- c_language = Language (str (TREE_SITTER_LIB ), "c" )
245
+ c_language = Language (tree_sitter_c .language ())
259
246
self .parser = Parser ()
260
- self .parser .set_language ( c_language )
247
+ self .parser .language = c_language
261
248
self .func_annot_query = c_language .query (FUNC_ANNOT_QUERY_STR )
262
249
263
250
def run (self ):
@@ -284,11 +271,10 @@ def run(self):
284
271
]
285
272
286
273
# generate temp files for pseudo C code and semgrep findings
287
- with NamedTemporaryFile (
288
- suffix = ".c" , mode = "w+"
289
- ) as pseudo_c_out , NamedTemporaryFile (
290
- suffix = ".json" , mode = "w+"
291
- ) as semgrep_results_out :
274
+ with (
275
+ NamedTemporaryFile (suffix = ".c" , mode = "w+" ) as pseudo_c_out ,
276
+ NamedTemporaryFile (suffix = ".json" , mode = "w+" ) as semgrep_results_out ,
277
+ ):
292
278
# serialise pseudo C contents
293
279
for gv in global_vars :
294
280
pseudo_c_out .write (f"{ gv } \n " )
@@ -446,10 +432,12 @@ def remove_function_annotations(self, src: str) -> str:
446
432
captures = self .func_annot_query .captures (tree .root_node )
447
433
src_list = list (src )
448
434
449
- for node , _ in captures :
450
- # replace each annotation with the empty string
451
- for i in range (node .start_byte , node .end_byte ):
452
- src_list [i ] = ""
435
+ # Process all nodes from all capture groups
436
+ for capture_list in captures .values ():
437
+ for node in capture_list :
438
+ # replace each annotation with the empty string
439
+ for i in range (node .start_byte , node .end_byte ):
440
+ src_list [i ] = ""
453
441
454
442
# reconstruct source code from the list
455
443
return "" .join (src_list )
0 commit comments