Skip to content

Commit 8d5733d

Browse files
committed
final set of bug fixes after resolving most issues in datasets
1 parent 70e0df9 commit 8d5733d

File tree

1 file changed

+102
-48
lines changed

1 file changed

+102
-48
lines changed

prd_multidecomp_ida.py

Lines changed: 102 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@ def get_primitives():
113113

114114
SYSTEM_TYPES=PRIMITIVES+STD_HEADER_TYPES
115115

116+
def get_array_size(x):
117+
return len(re.findall("\[\d*\]",x))
118+
116119
def get_basetype_info(field):
117120
# maybe this is a function prototype where there's only the type for each param
118121
# making this the default
@@ -164,6 +167,13 @@ def readpickle(pkl_file):
164167
f=open(pkl_file,'rb')
165168
return pickle.load(f)
166169

170+
def get_function_name(line):
171+
x=line.split(";")[0].split("(")[0].strip().rsplit()[-1]
172+
while x.startswith("*"):
173+
x=x[1:]
174+
return x
175+
176+
167177
def is_function_ptr(line):
168178
#x=re.match("\s*\*?(\((\s*\*)+\s*\w+\)|\w+)\((.*)\)",line)
169179
# return type (*fn_name) fn_name (params) params
@@ -1411,6 +1421,9 @@ def typedef_resolution(self,structDump):
14111421
undefined.remove(u)
14121422
RESOLVED.add(u)
14131423
del uses_x[u]
1424+
else:
1425+
print(f"DEBUG: {u} is not SIMPLE {u} in all_user_defined_types: {u in all_user_defined_types}; {u} in undefined:{u in undefined}")
1426+
pass
14141427

14151428
orig_x_requires['reduced']=copy.deepcopy(orig_x_requires['original'])
14161429

@@ -1428,6 +1441,8 @@ def typedef_resolution(self,structDump):
14281441

14291442
problems=missing_type_defs+list(problems-set(plist)) # let's make sure the original missing types are ordered first
14301443
prev_undefined=None
1444+
print(f"tiny_page in undefined=> {'tiny_page' in undefined}; tiny_page in RESOLVED => {'tiny_page' in RESOLVED}")
1445+
print(f"malloc_t requires : {x_requires['malloc_t']} (uses tiny_page: {uses_x['tiny_page']})")
14311446
while(len(undefined)>0):
14321447
prev_undefined=copy.copy(undefined)
14331448
updated=False
@@ -1454,14 +1469,33 @@ def typedef_resolution(self,structDump):
14541469
x_requires[n].remove(r)
14551470
undefined=undefined-set(RESOLVED)
14561471
if not updated:
1457-
print(f"ERROR! We should be resolving at least one type per iteration")
1458-
print(f"NOTE: These are the remaining unresolved types: {undefined}")
1459-
print(f"NOTE: Exiting to debug")
1472+
changed=False
14601473
for u in undefined:
14611474
u_info=(u in fwd_decl_types,u in simple_types,u in collective_types,u in RESOLVED)
14621475
x_requires_u=x_requires.get(u,set())
1463-
dprint(f"{u}:{x_requires_u}")
1464-
assert updated
1476+
new_x_requires_u=set()
1477+
for xx in x_requires_u:
1478+
if xx in RESOLVED:
1479+
pass
1480+
else:
1481+
new_x_requires_u.add(xx)
1482+
x_requires[u]=new_x_requires_u
1483+
if (x_requires_u!=new_x_requires_u):
1484+
changed=True
1485+
dprint(f"[CHANGED] {u}:{new_x_requires_u}")
1486+
else:
1487+
dprint(f"[UNCHANGED] {u}:{x_requires_u}")
1488+
if not changed:
1489+
print(f"WARNING!! Proceeding although we have circular references we are unable to resolve.")
1490+
print(f"ERROR! We should be resolving at least one type per iteration")
1491+
print(f"NOTE: These are the remaining unresolved types: {undefined}")
1492+
#print(f"NOTE: Exiting to debug")
1493+
for u in undefined:
1494+
RESOLVED.add(u)
1495+
undefined=undefined-set(RESOLVED)
1496+
assert len(undefined)==0, f"ERROR! we have these things undefined {undefined} "
1497+
1498+
14651499

14661500
print("DONE -- with dependencies",flush=True)
14671501

@@ -1547,7 +1581,7 @@ def reorder_(self,resolvable,x_requires,orig_x_requires,fnptr_types):
15471581
direct_unresolved = list(resolvable & reduced_x_requires[o])
15481582
dprint(f"DEBUG: CHECK(0) for {o}: direct_unresolved=> {direct_unresolved} [reduced_x_requires: {reduced_x_requires[o]}]")
15491583
if o in reduced_x_requires[o]:
1550-
if o in direct_unresolved and o in initial_x_requires[o]:
1584+
if o in direct_unresolved and (o in initial_x_requires[o]|x_requires[o]):
15511585
if len(direct_unresolved)>1:
15521586
# let's put self-referencing type declarations that depend on other unresolved types at the end
15531587
dprint(f"DEBUG: FOURTH(1): {o} [{x_requires[o]}] => {direct_unresolved} [{initial_x_requires[o]}]")
@@ -1810,15 +1844,15 @@ def process_datalines(self, dataLines, data_syms, gdataMap:dict):
18101844
print("{} [DATA SYMBOL]".format(base_dataName))
18111845
ext_syms.add(base_dataName)
18121846

1813-
array_size=len(re.findall("\[\d*\]",dataName))
1847+
array_size=get_array_size(dataName)
18141848
print("Array Size:", array_size)
18151849
defLine=""
18161850
if array_size>=2:
1817-
print("// --- WARNING! Two-dimensional array objects are not yet supported")
1851+
print(f"// --- WARNING! Two-dimensional array objects are not yet supported => {dataName}")
18181852
defLine += "%s *(p%s);\n" %(dataType, dataName)
18191853
dataName = dataName.split("[")[0] # handle arrays
18201854
defLine += "#define %s (*p%s)\n" % (dataName, dataName)
1821-
print(" // --- END OF WARNING!\n")
1855+
18221856
elif array_size==1 and (("*" not in dataType) or ("*" in dataType and "[]" in dataName)):
18231857
dataName = dataName.split("[")[0] # handle arrays
18241858
defLine = "%s *(p%s);\n" %(dataType, dataName)
@@ -2080,6 +2114,7 @@ def get_stubs(self, lines, stubs, funcs, decomp_re, global_decomp,
20802114
# hex-rays either gets rid of prepended _ character or appends _\d+ for inlined functions
20812115
if sym_name == "patchmain":
20822116
translate_dict[sym_name]="main"
2117+
translate_dict["main"]="patchmain"
20832118
nm_sym_name="main"
20842119
elif new_sym != sym_name and new_sym in fn_symbols:
20852120
#line = re.sub(r'\b'+sym_name+r'\b',new_sym,line)
@@ -2470,8 +2505,8 @@ def generate_wrapper(self, target_list, funcs, stubMap, dataMap, detour_prefix,
24702505
#trans_targ=rev_trans.get(target,target) if target != "main" else "main"
24712506
trans_targ=translation_dict.get(target,target)
24722507
ltarget=target
2473-
if trans_targ=="main":
2474-
ltarget="patchmain"
2508+
print(f"DEBUG: ORiginal target = {ltarget}, Translated Target = {trans_targ}")
2509+
24752510
detour_target="{}{}".format(detour_prefix,trans_targ)
24762511
# if 'main' function exists locally, then we'll have a collision with new main during compilation
24772512
# renaming to 'patchmain'
@@ -2521,17 +2556,17 @@ def generate_wrapper(self, target_list, funcs, stubMap, dataMap, detour_prefix,
25212556
size=len(argArray)
25222557
while j < size:
25232558
fn_ptr=False
2524-
print("DEBUG : {} [{}] ".format(argArray[j],j))
2559+
dprint("DEBUG : {} [{}] ".format(argArray[j],j))
25252560
arg=argArray[j]
25262561
# this looks like a function pointer
25272562
while arg.count('(') != arg.count(')') and j+1 < size:
25282563
j+=1
25292564
arg+=","+argArray[j]
2530-
print("DEBUG [fnptr]: arg = '{}'".format(arg))
2565+
dprint("DEBUG [fnptr]: arg = '{}'".format(arg))
25312566
fn_ptr=True
25322567
arg = arg.strip()
25332568
argTuple = self.getTypeAndLabel(arg,fn_ptr)
2534-
print("DEBUG: arg = {} [{}] [argTuple = {}]".format(arg,argLine,argTuple))
2569+
dprint("DEBUG: arg = {} [{}] [argTuple = {}]".format(arg,argLine,argTuple))
25352570
args.append(argTuple)
25362571
j+=1
25372572
break
@@ -2544,10 +2579,10 @@ def generate_wrapper(self, target_list, funcs, stubMap, dataMap, detour_prefix,
25442579
x1=re.search(r"\b("+ltarget+r")\b",targetHeader)
25452580
x2=re.search(r"\b("+trans_targ+r")\b",targetHeader)
25462581
if x1:
2547-
print("Replacing : {} with {} in '{}'".format(ltarget,detour_target,targetHeader))
2582+
print("[x1] Replacing : {} with {} in '{}'".format(ltarget,detour_target,targetHeader))
25482583
targetHeader = re.sub(r"\b("+ltarget+r")\b",detour_target,targetHeader)
25492584
else:
2550-
print("Replacing : {} with {} in '{}'".format(trans_targ,detour_target,targetHeader))
2585+
print("[x2] Replacing : {} with {} in '{}'".format(trans_targ,detour_target,targetHeader))
25512586
#targetHeader = targetHeader.replace(trans_targ,detour_target)
25522587
targetHeader = re.sub(r"\b("+trans_targ+r")\b",detour_target,targetHeader)
25532588
wrapperStub += targetHeader.split("(", maxsplit=1)[0] #remove arguments
@@ -2589,15 +2624,15 @@ def generate_wrapper(self, target_list, funcs, stubMap, dataMap, detour_prefix,
25892624
dataDef = d.split(";")[0]
25902625
dataDef = dataDef.split("=")[0].strip()
25912626
dataType, dataName = self.getTypeAndLabel(dataDef)
2592-
array_size=len(re.findall("\[\d*\]",dataName))
2627+
array_size=get_array_size(dataName)
25932628
if ":" not in call_me[target]:
25942629
call_me[target]+=":"
25952630
else:
25962631
call_me[target]+=","
25972632
if array_size>=2:
25982633
print("SORRY: two-dimensional array objects just aren't working right now")
25992634
print(" ==> "+dataType+" "+dataName)
2600-
wrapperStub += "// --- WARNING! Two-dimensional array objects are not yet supported"
2635+
wrapperStub += "\t// --- WARNING! Two-dimensional array objects are not yet supported\n\t//"
26012636
wrapperStub += "\tvoid* my%s,\n" % dataName
26022637
call_me[target]+=dataName
26032638
#elif array_size==1 and "*" not in dataType:
@@ -2649,12 +2684,14 @@ def generate_wrapper(self, target_list, funcs, stubMap, dataMap, detour_prefix,
26492684
dataDef = dataDef[3:] # handle commented out cases
26502685
dataDef = dataDef.split("=")[0].strip()
26512686
dataType, dataName = self.getTypeAndLabel(dataDef)
2652-
array_size=len(re.findall("\[\d*\]",dataName))
2687+
array_size=get_array_size(dataName)
2688+
26532689
if array_size>=2:
26542690
print("// --- WARNING! Two-dimensional array objects are not yet supported\n")
2691+
wrapperStub += "// UNSUPPORTED : "
26552692
wrapperStub += "\tp%s = (%s*) my%s;\n" % (dataName, dataType, dataName)
26562693
print(" // --- END OF WARNING!\n")
2657-
#elif array_size==1 and "*" not in dataType:
2694+
26582695
elif array_size==1 and (("*" not in dataType) or ("*" in dataType and "[]" in dataName)):
26592696
dataNamex = dataName.split("[")[0] # handle arrays
26602697
wrapperStub += "\tp%s = (%s*) my%s;\n" % (dataNamex, dataType, dataNamex)
@@ -2663,6 +2700,7 @@ def generate_wrapper(self, target_list, funcs, stubMap, dataMap, detour_prefix,
26632700

26642701
for s in stubMap[target].keys():
26652702
name = self.get_stub_name(s)
2703+
array_size=get_array_size(name)
26662704
pname=f"p{name}"
26672705
dname=translation_dict.get(name,name)
26682706
if name in list(VALIST_TRANSFORM.keys()):
@@ -2693,14 +2731,17 @@ def generate_wrapper(self, target_list, funcs, stubMap, dataMap, detour_prefix,
26932731
# there's a weird behavior with IDA, it translates functions prepended with '_' to without
26942732
#wrapperStub += "my%s(\n" % target
26952733
#wrapperStub += "%s(\n" % target
2696-
wrapperStub += "%s(\n" % ltarget
2734+
2735+
wrapperStub += "%s(\n" % trans_targ
2736+
print(f"DEBUG => wrapperStub = {wrapperStub}")
26972737

26982738
for argTuple in args:
2739+
dprint(f"DEBUG: argTuple = {argTuple}")
26992740
argName = argTuple[1]
27002741
wrapperStub += "\t\t%s,\n" % (argName)
2701-
2702-
if args: # list not empty
2703-
wrapperStub = wrapperStub[:-2] #strip ,\n
2742+
2743+
while wrapperStub.rstrip().endswith(','):
2744+
wrapperStub = wrapperStub.rstrip()[:-1] #strip ,\n
27042745

27052746
wrapperStub += "\n\t);\n"
27062747

@@ -2710,19 +2751,6 @@ def generate_wrapper(self, target_list, funcs, stubMap, dataMap, detour_prefix,
27102751
#t="patchmain" if target=="main" else target
27112752
wrapperStub += "\n\t /* ASM STACK "+ltarget+" HERE */\n"
27122753

2713-
# wrapperStub += "\n\tasm(\n"
2714-
2715-
# wrapperStub += "\t\"nop\\n\\t\"\n\t\"nop\\n\\t\"\n\t\"nop\\n\\t\"\n\t\"nop\\n\\t\"\n"
2716-
# wrapperStub += "\t\"add $0x%x,%%esp\\n\\t\"\n" % (numFuncArgs * 4)
2717-
# wrapperStub += "\t\"pop %ebx\\n\\t\"\n"
2718-
# wrapperStub += "\t\"pop %ebp\\n\\t\"\n"
2719-
# wrapperStub += "\t\"pop %ecx\\n\\t\"\n"
2720-
# wrapperStub += "\t\"add $0x%x,%%esp\\n\\t\"\n" % (numStubs * 4)
2721-
# wrapperStub += "\t\"push %ecx\\n\\t\"\n"
2722-
# wrapperStub += "\t\"ret\\n\\t\"\n"
2723-
2724-
# wrapperStub += "\t);\n"
2725-
27262754
# ret and close
27272755
wrapperStub += "\n\treturn"
27282756
if targetRetType != "void" and targetRetType != "void __noreturn":
@@ -2880,11 +2908,19 @@ def get_target_info(self,workdir):
28802908
funcs_=re.sub(":"," ",funcs_)
28812909
funcs=re.sub("_____","::",funcs_)
28822910
# we're now assuming that we're getting mangled symbols as input
2883-
funcList = funcs.split(" ")
2884-
print(f"FUNCLIST='{funcList}'",flush=True)
2885-
for i in funcList:
2911+
funcList_ = funcs.split(" ")
2912+
print(f"FUNCLIST='{funcList_}'",flush=True)
2913+
funcList=list()
2914+
for i in funcList_:
28862915
print(f"{i} ",flush=True)
2887-
print(f" => {self.mang2demLUT[i]}",flush=True)
2916+
if self.mang2demLUT.get(i,None) is not None:
2917+
print(f" => {self.mang2demLUT[i]}",flush=True)
2918+
funcList.append(i)
2919+
else:
2920+
print(f"ERROR: {i} does not exist as a local symbol. SKipping.")
2921+
if len(funcList)==0:
2922+
print(f"Nothing to do. Exiting.")
2923+
import sys;sys.exit(-1);
28882924
detour_funcs= [ (self.mang2demLUT[f],f) for f in funcList ]
28892925
x={'target':target,'path':path,'funcList':funcList,'detour_funcs':detour_funcs,'symbols_lut':symbols_lut}
28902926
self.targets.append(x)
@@ -3046,6 +3082,9 @@ def run(self):
30463082
print(f"DETOUR FUNCS 'detour_funcs[{idx}]' => '{detour_funcs[idx]}'")
30473083
print(f"GUESSED FUNCS: {guessed_protos}")
30483084

3085+
used_symbols=[ get_function_name(x.strip()) for x in data_decls+stubs['prototypes'] if not x.strip().startswith("//")]
3086+
3087+
30493088
for rmdd in rm_decomp_decl:
30503089
print(f"Removing existing weaker conflicting declaration: {rmdd}")
30513090
del decomp_decls[decomp_decls.index(rmdd)]
@@ -3073,7 +3112,11 @@ def run(self):
30733112
basic_finalOutput="\n\n"+"\n".join(basic_)+"\n\n"
30743113

30753114
#let's clean-up the GLIBC references to avoid collision
3076-
decomp_defs = cleaner.prevent_glibc_collision(decomp_defs,CSTDIO_FUNCS+glibc_symbols+ext_symbols)
3115+
# and only clean-up references that are used and external (maybe this should be used everywhere?)
3116+
used_extsymbols = [x for x in used_symbols if x in CSTDIO_FUNCS+glibc_symbols+ext_symbols]
3117+
print(f"DEBUG : USED EXTERNAL SYMBOLS => {used_extsymbols}")
3118+
if len(used_extsymbols)>0:
3119+
decomp_defs = cleaner.prevent_glibc_collision(decomp_defs,used_extsymbols)
30773120

30783121
# let's uniquify the header lines by the set datatype
30793122
print("\nFUNC_HEADERS:\n{}".format(" -- "+"\n -- ".join(funcHeaders['prototypes'])))
@@ -3194,20 +3237,25 @@ def run(self):
31943237
di=re.sub("::","__",i)
31953238
sym_i=self.dem2mangLUT[i]
31963239
define=f"{di}:{sym_i}"
3240+
ti=translate_dict.get(i,i)
3241+
print(f"DETOURS = {i} vs {ti}")
3242+
31973243
if self.detour_entry_fn_prefix:
3198-
di="{}{}".format(self.detour_entry_fn_prefix,i)
3244+
di="{}{}".format(self.detour_entry_fn_prefix,ti)
31993245
di=re.sub("::","__",di)
32003246
define="{}:{}".format(di,sym_i)
3201-
elif i=="main":
3202-
di="patchmain"
3203-
define="{}:{}".format(di,sym_i)
32043247

32053248
if i=="main":
3206-
define+="+7"
3249+
# Ideally, should read through the disasm and find the correct offset
3250+
# for dynamically linked binaries, this is after the EBX register has been loaded
3251+
if len(ext_symbols)>0:
3252+
define+="+23"
3253+
else:
3254+
# for statically linked binaries, this is after initial population of registers required to return to __libc_start_main
3255+
define+="+7"
32073256
detours.append(define)
32083257

32093258

3210-
#detour_list=[ str(f+":"+self.detour_entry_fn_prefix+f) for f in detour_funcs ]
32113259
makefile_dict={
32123260
"BIN":target,
32133261
"MYSRC":target+"_recomp.c",
@@ -3290,6 +3338,9 @@ def main():
32903338
help='Detour prefix to append to detour entry function')
32913339
parser.add_argument('target_list',
32923340
help='path to the list of target binaries + paths')
3341+
parser.add_argument("--debug",dest='debug',
3342+
default=False,action='store_const',const=True,
3343+
help='Print more debug information')
32933344
parser.add_argument("--strip-binary",dest='strip',
32943345
default=False,action='store_const',const=True,
32953346
help='get decompiled output from stripped version of binary')
@@ -3304,11 +3355,14 @@ def main():
33043355
help='r2ghidra command line <SYM> is symbol to decompile, <C_OUT> is decompile out file')
33053356

33063357
args, unknownargs = parser.parse_known_args()
3358+
global DEBUG
3359+
DEBUG=args.debug
33073360
if not os.path.exists(args.decompdir):
33083361
os.makedirs(args.decompdir) # make sure that the decomp dir exists before using it
33093362
gpd = GenprogDecomp(args.target_list, args.scriptpath, args.ouput_directory,args.detfn_prefix,args.r2,args.strip,args.decompdir,args.version2)
33103363
gpd.get_target_info(args.decompdir)
33113364
gpd.run()
3365+
import sys;sys.exit(0);
33123366

33133367
main()
33143368

0 commit comments

Comments
 (0)