-
Notifications
You must be signed in to change notification settings - Fork 106
/
__init__.py
217 lines (182 loc) · 7.34 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
"""Python bindings for tree-sitter."""
import enum
from ctypes import c_void_p, cdll
from os import path
from platform import system
from tempfile import TemporaryDirectory
from typing import Callable, List, Optional
from tree_sitter.binding import (
LookaheadIterator,
LookaheadNamesIterator,
Node,
Parser,
Query,
Range,
Tree,
TreeCursor,
_language_field_count,
_language_field_id_for_name,
_language_field_name_for_id,
_language_query,
_language_state_count,
_language_symbol_count,
_language_symbol_for_name,
_language_symbol_name,
_language_symbol_type,
_language_version,
_lookahead_iterator,
_next_state,
)
__all__ = [
"Language",
"Node",
"Parser",
"Query",
"Range",
"Tree",
"TreeCursor",
"LookaheadIterator",
"LookaheadNamesIterator",
]
class SymbolType(enum.IntEnum):
"""An enumeration of the different types of symbols."""
REGULAR = 0
"""A regular symbol."""
ANONYMOUS = 1
"""An anonymous symbol."""
AUXILIARY = 2
"""An auxiliary symbol."""
class Language:
"""A tree-sitter language"""
@staticmethod
def build_library(output_path: str, repo_paths: List[str]):
"""
Build a dynamic library at the given path, based on the parser
repositories at the given paths.
Returns `True` if the dynamic library was compiled and `False` if
the library already existed and was modified more recently than
any of the source files.
"""
output_mtime = path.getmtime(output_path) if path.exists(output_path) else 0
if not repo_paths:
raise ValueError("Must provide at least one language folder")
cpp = False
source_paths = []
for repo_path in repo_paths:
src_path = path.join(repo_path, "src")
source_paths.append(path.join(src_path, "parser.c"))
if path.exists(path.join(src_path, "scanner.cc")):
cpp = True
source_paths.append(path.join(src_path, "scanner.cc"))
elif path.exists(path.join(src_path, "scanner.c")):
source_paths.append(path.join(src_path, "scanner.c"))
source_mtimes = [path.getmtime(__file__)] + [path.getmtime(path_) for path_ in source_paths]
if max(source_mtimes) <= output_mtime:
return False
# local import saves import time in the common case that nothing is
# compiled
from distutils.ccompiler import new_compiler
from distutils.unixccompiler import UnixCCompiler
compiler = new_compiler()
if isinstance(compiler, UnixCCompiler):
compiler.set_executables(compiler_cxx="c++")
with TemporaryDirectory(suffix="tree_sitter_language") as out_dir:
object_paths = []
for source_path in source_paths:
if system() == "Windows":
flags = None
else:
flags = ["-fPIC"]
if source_path.endswith(".c"):
flags.append("-std=c99")
object_paths.append(
compiler.compile(
[source_path],
output_dir=out_dir,
include_dirs=[path.dirname(source_path)],
extra_preargs=flags,
)[0]
)
compiler.link_shared_object(
object_paths,
output_path,
target_lang="c++" if cpp else "c",
)
return True
def __init__(self, library_path: str, name: str):
"""
Load the language with the given name from the dynamic library
at the given path.
"""
self.name = name
self.lib = cdll.LoadLibrary(library_path)
language_function: Callable[[], c_void_p] = getattr(self.lib, "tree_sitter_%s" % name)
language_function.restype = c_void_p
self.language_id: c_void_p = language_function()
@property
def version(self) -> int:
"""
Get the ABI version number that indicates which version of the Tree-sitter CLI
that was used to generate this [`Language`].
"""
return _language_version(self.language_id)
@property
def node_kind_count(self) -> int:
"""Get the number of distinct node types in this language."""
return _language_symbol_count(self.language_id)
@property
def parse_state_count(self) -> int:
"""Get the number of valid states in this language."""
return _language_state_count(self.language_id)
def node_kind_for_id(self, id: int) -> Optional[str]:
"""Get the name of the node kind for the given numerical id."""
return _language_symbol_name(self.language_id, id)
def id_for_node_kind(self, kind: str, named: bool) -> Optional[int]:
"""Get the numerical id for the given node kind."""
return _language_symbol_for_name(self.language_id, kind, named)
def node_kind_is_named(self, id: int) -> bool:
"""
Check if the node type for the given numerical id is named
(as opposed to an anonymous node type).
"""
return _language_symbol_type(self.language_id, id) == SymbolType.REGULAR
def node_kind_is_visible(self, id: int) -> bool:
"""
Check if the node type for the given numerical id is visible
(as opposed to an auxiliary node type).
"""
return _language_symbol_type(self.language_id, id) <= SymbolType.ANONYMOUS
@property
def field_count(self) -> int:
"""Get the number of fields in this language."""
return _language_field_count(self.language_id)
def field_name_for_id(self, field_id: int) -> Optional[str]:
"""Get the name of the field for the given numerical id."""
return _language_field_name_for_id(self.language_id, field_id)
def field_id_for_name(self, name: str) -> Optional[int]:
"""Return the field id for a field name."""
return _language_field_id_for_name(self.language_id, name)
def next_state(self, state: int, id: int) -> int:
"""
Get the next parse state. Combine this with
[`lookahead_iterator`](Language.lookahead_iterator) to
generate completion suggestions or valid symbols in error nodes.
"""
return _next_state(self.language_id, state, id)
def lookahead_iterator(self, state: int) -> Optional[LookaheadIterator]:
"""
Create a new lookahead iterator for this language and parse state.
This returns `None` if state is invalid for this language.
Iterating `LookaheadIterator` will yield valid symbols in the given
parse state. Newly created lookahead iterators will return the `ERROR`
symbol from `LookaheadIterator.current_symbol`.
Lookahead iterators can be useful to generate suggestions and improve
syntax error diagnostics. To get symbols valid in an ERROR node, use the
lookahead iterator on its first leaf node state. For `MISSING` nodes, a
lookahead iterator created on the previous non-extra leaf node may be
appropriate.
"""
return _lookahead_iterator(self.language_id, state)
def query(self, source: str):
"""Create a Query with the given source code."""
return _language_query(self.language_id, source)