Skip to content

Commit c1b609f

Browse files
committed
fixes parsing section and region statements
Note: this is a backport from bap-1.3.1 package, as this fix is already there, but wasn't commited to the master branch of the upstream repository. The fix enables the special handling for the Section and Region types, which use hex number without the 0x prefix. Ideally, we shouldn't generate such input, but since historically this happened, we need to make our parser robust enough to be able to chew such representation also.
1 parent 4cc8ae7 commit c1b609f

File tree

1 file changed

+19
-10
lines changed

1 file changed

+19
-10
lines changed

src/bap/noeval_parser.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,32 @@
22
'''
33
Parser for ADT string from bap that does not use eval
44
5-
The nieve eval-based version runs into out-of-memory conditions on large files
5+
The naive eval-based version runs into out-of-memory conditions on large files
66
'''
77
import gc
88
import sys
99
import time
1010

11-
# NOTE: uses bap.bir, but cannot import at module level (circular references)
11+
from subprocess import check_output
12+
13+
# bap.1.3 breaks the format of the following types. it prints hexes
14+
# without prefixing them with the `0x` escape. To fix it without
15+
# fixing bap, we will treat integers inside this parents as
16+
# hexadecimals if there is no prefix.
17+
BROKEN_TYPES = [
18+
'Section',
19+
'Region'
20+
]
1221

22+
# NOTE: uses bap.bir, but cannot import at module level (circular references)
1323

14-
def toint(string, start, end):
24+
def toint(string, start, end, base=10):
1525
'''
1626
Convert substring string[start:end] to integer/long without eval
1727
1828
Note: may contain leading whitespace
1929
'''
2030
istr = string[start:end].lstrip()
21-
2231
if sys.version_info > (3,): # then longs don't exist
2332
if istr.endswith('L'):
2433
istr = istr.rstrip('L')
@@ -31,7 +40,7 @@ def toint(string, start, end):
3140
if istr.startswith('0x'):
3241
return of_str(istr, 16)
3342
else:
34-
return of_str(istr)
43+
return of_str(istr, base)
3544

3645
def setup_progress(totalitems):
3746
'''
@@ -159,17 +168,19 @@ def _parse_end(in_c, in_s, i, objs, stk):
159168
raise ParserInputError('Mismatched input stream')
160169
j = stk[-1]
161170
parent = objs[j]
171+
ptyp = parent['typ']
162172
assert isinstance(parent, dict)
163173
assert parent, 'parent is empty'
164-
assert parent['typ'] != 'int', 'parent wrong type: %r' % (parent['typ'])
174+
assert ptyp != 'int', 'parent wrong type: %r' % (parent['typ'])
165175
assert 'children' in parent
166176
if top: # add to parent if non empty
167177
# make real int before appending
168178
if top['typ'] == 'd': # int
169179
try:
170-
top = toint(in_s, k, i)
180+
base = 16 if ptyp in BROKEN_TYPES else 10
181+
top = toint(in_s, k, i, base)
171182
except ValueError:
172-
raise ParserInputError("Integer expected between [%d..%d)" % (top, i))
183+
raise ParserInputError("Integer expected between [%d..%d)" % (k, i))
173184
parent['children'].append(top)
174185
if in_c == ',': # add blank object and move on
175186
# next obj
@@ -179,7 +190,6 @@ def _parse_end(in_c, in_s, i, objs, stk):
179190
return i
180191
else: # we are ending a tuple/list/app do it
181192
# maybe handle apply (num and seq are earlier)
182-
ptyp = parent['typ']
183193
if ptyp == '[':
184194
if in_c != ']':
185195
raise ParserInputError('close %r and open %r mismatch' % (in_c, ptyp))
@@ -325,4 +335,3 @@ def parser(input_str, disable_gc=False, logger=None):
325335
'format': 'adt',
326336
'load': parser
327337
}
328-

0 commit comments

Comments
 (0)