Skip to content

Commit f8ebdf2

Browse files
fix: use new TS API (#2)
Update and pin to current TreeSitter Python implementation.
1 parent a56ed9f commit f8ebdf2

File tree

3 files changed

+23
-30
lines changed

3 files changed

+23
-30
lines changed

__init__.py

Lines changed: 13 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,23 +10,16 @@
1010
from typing import Dict, List, Optional, Tuple
1111
import json
1212
import re
13-
import struct
1413
import subprocess
1514
import html
1615

1716
from tree_sitter import Language, Parser, Query
17+
import tree_sitter_c
1818
import emoji
1919

2020
from binaryninja import *
2121

2222

23-
# directory path to the current script
24-
CURRENT_DIR = Path(__file__).parent
25-
26-
# tree-sitter files
27-
TREE_SITTER_C = CURRENT_DIR / "tree-sitter-c"
28-
TREE_SITTER_LIB = CURRENT_DIR / "build" / "tree-sitter-c.so"
29-
3023
# query to search tree-sitter's syntax tree for illegal identifier annotations
3124
FUNC_ANNOT_QUERY_STR = """
3225
(function_definition
@@ -249,15 +242,9 @@ def __init__(self, bv):
249242
self.bv, disas_settings
250243
)
251244

252-
# initialize tree-sitter
253-
if not TREE_SITTER_LIB.is_file():
254-
Language.build_library(str(TREE_SITTER_LIB), [str(TREE_SITTER_C)])
255-
if not TREE_SITTER_LIB.is_file():
256-
raise Exception("Failed to build tree-sitter lib")
257-
258-
c_language = Language(str(TREE_SITTER_LIB), "c")
245+
c_language = Language(tree_sitter_c.language())
259246
self.parser = Parser()
260-
self.parser.set_language(c_language)
247+
self.parser.language = c_language
261248
self.func_annot_query = c_language.query(FUNC_ANNOT_QUERY_STR)
262249

263250
def run(self):
@@ -284,11 +271,10 @@ def run(self):
284271
]
285272

286273
# generate temp files for pseudo C code and semgrep findings
287-
with NamedTemporaryFile(
288-
suffix=".c", mode="w+"
289-
) as pseudo_c_out, NamedTemporaryFile(
290-
suffix=".json", mode="w+"
291-
) as semgrep_results_out:
274+
with (
275+
NamedTemporaryFile(suffix=".c", mode="w+") as pseudo_c_out,
276+
NamedTemporaryFile(suffix=".json", mode="w+") as semgrep_results_out,
277+
):
292278
# serialise pseudo C contents
293279
for gv in global_vars:
294280
pseudo_c_out.write(f"{gv}\n")
@@ -446,10 +432,12 @@ def remove_function_annotations(self, src: str) -> str:
446432
captures = self.func_annot_query.captures(tree.root_node)
447433
src_list = list(src)
448434

449-
for node, _ in captures:
450-
# replace each annotation with the empty string
451-
for i in range(node.start_byte, node.end_byte):
452-
src_list[i] = ""
435+
# Process all nodes from all capture groups
436+
for capture_list in captures.values():
437+
for node in capture_list:
438+
# replace each annotation with the empty string
439+
for i in range(node.start_byte, node.end_byte):
440+
src_list[i] = ""
453441

454442
# reconstruct source code from the list
455443
return "".join(src_list)

plugin.json

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
{
22
"pluginmetadataversion": 2,
33
"name": "semgrep-bn",
4-
"version": "1.0",
4+
"version": "1.0.1",
55
"author": "Samman Palihapitiya",
6-
"type": [ "helper" ],
6+
"type": [
7+
"helper"
8+
],
79
"description": "Search code with Semgrep",
810
"longdescription": "",
9-
"api": [ "python3" ],
11+
"api": [
12+
"python3"
13+
],
1014
"platforms": [
1115
"Darwin",
1216
"Linux",

requirements.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
emoji
2-
tree-sitter
3-
semgrep
2+
tree-sitter==0.23.2
3+
tree-sitter-c==0.23.4
4+
semgrep

0 commit comments

Comments
 (0)