Skip to content

Commit bfb8ba5

Browse files
authored
Merge pull request #9 from BinaryAnalysisPlatform/fix-section-region-parsing
fixes parsing section and region statements
2 parents 4cc8ae7 + c1b609f commit bfb8ba5

File tree

1 file changed

+19
-10
lines changed

1 file changed

+19
-10
lines changed

src/bap/noeval_parser.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,32 @@
22
'''
33
Parser for ADT string from bap that does not use eval
44
5-
The nieve eval-based version runs into out-of-memory conditions on large files
5+
The naive eval-based version runs into out-of-memory conditions on large files
66
'''
77
import gc
88
import sys
99
import time
1010

11-
# NOTE: uses bap.bir, but cannot import at module level (circular references)
11+
from subprocess import check_output
12+
13+
# bap.1.3 breaks the format of the following types. it prints hexes
14+
# without prefixing them with the `0x` escape. To fix it without
15+
# fixing bap, we will treat integers inside this parents as
16+
# hexadecimals if there is no prefix.
17+
BROKEN_TYPES = [
18+
'Section',
19+
'Region'
20+
]
1221

22+
# NOTE: uses bap.bir, but cannot import at module level (circular references)
1323

14-
def toint(string, start, end):
24+
def toint(string, start, end, base=10):
1525
'''
1626
Convert substring string[start:end] to integer/long without eval
1727
1828
Note: may contain leading whitespace
1929
'''
2030
istr = string[start:end].lstrip()
21-
2231
if sys.version_info > (3,): # then longs don't exist
2332
if istr.endswith('L'):
2433
istr = istr.rstrip('L')
@@ -31,7 +40,7 @@ def toint(string, start, end):
3140
if istr.startswith('0x'):
3241
return of_str(istr, 16)
3342
else:
34-
return of_str(istr)
43+
return of_str(istr, base)
3544

3645
def setup_progress(totalitems):
3746
'''
@@ -159,17 +168,19 @@ def _parse_end(in_c, in_s, i, objs, stk):
159168
raise ParserInputError('Mismatched input stream')
160169
j = stk[-1]
161170
parent = objs[j]
171+
ptyp = parent['typ']
162172
assert isinstance(parent, dict)
163173
assert parent, 'parent is empty'
164-
assert parent['typ'] != 'int', 'parent wrong type: %r' % (parent['typ'])
174+
assert ptyp != 'int', 'parent wrong type: %r' % (parent['typ'])
165175
assert 'children' in parent
166176
if top: # add to parent if non empty
167177
# make real int before appending
168178
if top['typ'] == 'd': # int
169179
try:
170-
top = toint(in_s, k, i)
180+
base = 16 if ptyp in BROKEN_TYPES else 10
181+
top = toint(in_s, k, i, base)
171182
except ValueError:
172-
raise ParserInputError("Integer expected between [%d..%d)" % (top, i))
183+
raise ParserInputError("Integer expected between [%d..%d)" % (k, i))
173184
parent['children'].append(top)
174185
if in_c == ',': # add blank object and move on
175186
# next obj
@@ -179,7 +190,6 @@ def _parse_end(in_c, in_s, i, objs, stk):
179190
return i
180191
else: # we are ending a tuple/list/app do it
181192
# maybe handle apply (num and seq are earlier)
182-
ptyp = parent['typ']
183193
if ptyp == '[':
184194
if in_c != ']':
185195
raise ParserInputError('close %r and open %r mismatch' % (in_c, ptyp))
@@ -325,4 +335,3 @@ def parser(input_str, disable_gc=False, logger=None):
325335
'format': 'adt',
326336
'load': parser
327337
}
328-

0 commit comments

Comments
 (0)