Skip to content

Commit e598acc

Browse files
authored
enables functions with multiple entry (#926)
* enables functions with multiple entry * refactoring * solved through symtab data * minor bug fix * wip * two algorithms for IR without repetitions * finally end up with the last algorithm * seems everything works togetherworks * refactoring * fixed oasis file * fixed minor bug * added a check, that fall call hasn't taken yet * allow instructions to share bytes * refactoring * fixed a minor bug * bug fising * check shared edges * refactoring * it works * few optimizations * refactored * save all edges * renamed function
1 parent 7268622 commit e598acc

File tree

5 files changed

+154
-75
lines changed

5 files changed

+154
-75
lines changed

lib/bap_disasm/bap_disasm_rec.ml

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,9 @@ type stage1 = {
123123

124124
type stage2 = {
125125
stage1 : stage1;
126-
addrs : mem Addrs.t; (* table of blocks *)
127-
succs : dests Addrs.t;
128-
preds : addr list Addrs.t;
126+
addrs : mem Addrs.t; (* table of blocks *)
127+
succs : dests Addrs.t;
128+
preds : addr list Addrs.t;
129129
disasm : mem -> decoded list;
130130
}
131131

@@ -161,13 +161,14 @@ let ok_nil = function
161161
| Ok xs -> xs
162162
| Error _ -> []
163163

164-
let is_jump s mem insn =
164+
let is_terminator s mem insn =
165165
Dis.Insn.is insn `May_affect_control_flow ||
166-
has_jump (ok_nil (s.lift mem insn))
166+
has_jump (ok_nil (s.lift mem insn)) ||
167+
Set.mem s.inits (Addr.succ (Memory.max_addr mem))
167168

168169
let update s mem insn dests : stage1 =
169170
let s = { s with visited = Visited.add_insn s.visited mem } in
170-
if is_jump s mem insn then
171+
if is_terminator s mem insn then
171172
let () = update_dests s mem dests in
172173
let roots = List.(filter_map ~f:fst dests |> rev_append s.roots) in
173174
{ s with roots }

lib/bap_disasm/bap_disasm_reconstructor.ml

Lines changed: 65 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -71,43 +71,78 @@ let is_unresolved blk cfg =
7171
deg = 0 ||
7272
(deg = 1 && is_fall (Seq.hd_exn (Cfg.Node.outputs blk cfg)))
7373

74-
let add_callnames syms name cfg blk =
75-
if is_call blk then
74+
let add_call symtab blk name label =
75+
Symtab.add_call symtab blk name label
76+
77+
let add_unresolved syms name cfg blk =
78+
if is_unresolved blk cfg then
7679
let call_addr = terminator_addr blk in
77-
if is_unresolved blk cfg then
78-
Symtab.add_call_name syms blk (name call_addr)
79-
else
80-
Seq.fold ~init:syms (Cfg.Node.outputs blk cfg)
81-
~f:(fun syms e ->
82-
if is_fall e then syms
83-
else
84-
Cfg.Edge.dst e |> Block.addr |> name |>
85-
Symtab.add_call_name syms blk)
80+
add_call syms blk (name call_addr) `Fall
8681
else syms
8782

8883
let collect name cfg roots =
8984
Seq.fold (Cfg.nodes cfg) ~init:(Block.Set.empty, Symtab.empty)
90-
~f:(fun (entries,syms) blk ->
91-
Set.union entries (entries_of_block cfg roots blk),
92-
add_callnames syms name cfg blk)
93-
94-
let reconstruct name roots prog =
95-
let roots = Addr.Set.of_list roots in
96-
let entries,syms = collect name prog roots in
97-
let is_call e = Set.mem entries (Cfg.Edge.dst e) in
98-
let rec add cfg node =
99-
let cfg = Cfg.Node.insert node cfg in
100-
Seq.fold (Cfg.Node.outputs node prog) ~init:cfg
101-
~f:(fun cfg edge ->
102-
if is_call edge then cfg
85+
~f:(fun (entries, syms) blk ->
86+
let entries' = entries_of_block cfg roots blk in
87+
Set.union entries entries', add_unresolved syms name cfg blk)
88+
89+
let reachable cfg from =
90+
let rec loop nodes node =
91+
Seq.fold (Cfg.Node.outputs node cfg)
92+
~init:(Set.add nodes node)
93+
~f:(fun nodes edge ->
94+
if Set.mem nodes ( Cfg.Edge.dst edge) then nodes
95+
else loop nodes (Cfg.Edge.dst edge)) in
96+
loop Block.Set.empty from
97+
98+
let sub roots prog start =
99+
let is_call e = Set.mem roots (Cfg.Edge.dst e) in
100+
let update_inputs node init =
101+
Seq.fold ~init (Cfg.Node.inputs node prog) ~f:Set.add in
102+
let rec loop cfg inputs node =
103+
Seq.fold (Cfg.Node.outputs node prog)
104+
~init:(cfg, update_inputs node inputs)
105+
~f:(fun (cfg, inputs) edge ->
106+
if is_call edge then cfg,inputs
103107
else
104108
let cfg' = Cfg.Edge.insert edge cfg in
105-
if Cfg.Node.mem (Cfg.Edge.dst edge) cfg then cfg'
106-
else add cfg' (Cfg.Edge.dst edge)) in
107-
Set.fold entries ~init:syms ~f:(fun syms entry ->
108-
let name = name (Block.addr entry) in
109-
let cfg = add Cfg.empty entry in
110-
Symtab.add_symbol syms (name,entry,cfg))
109+
if Cfg.Node.mem (Cfg.Edge.dst edge) cfg then cfg',inputs
110+
else loop cfg' inputs (Cfg.Edge.dst edge)) in
111+
let cfg = Cfg.Node.insert start Cfg.empty in
112+
loop cfg Cfg.Edge.Set.empty start
113+
114+
let edges_of_seq s = Seq.fold s ~init:Cfg.Edge.Set.empty ~f:Set.add
115+
116+
let reconstruct name initial_roots prog =
117+
let (--) = Set.diff in
118+
let update_symtab syms cfg entry inputs =
119+
let name = name (Block.addr entry) in
120+
let syms = Symtab.add_symbol syms (name,entry,cfg) in
121+
Set.fold inputs ~init:syms ~f:(fun syms e ->
122+
add_call syms (Cfg.Edge.src e) name (Cfg.Edge.label e)) in
123+
let remove_node cfg n = Cfg.Node.remove n cfg in
124+
let remove_reachable cfg from =
125+
let reachable = reachable cfg from in
126+
let cfg = Set.fold reachable ~init:cfg ~f:remove_node in
127+
remove_node cfg from in
128+
let collect_destinations edges =
129+
Set.fold edges ~init:Block.Set.empty ~f:(fun bs e ->
130+
Set.add bs (Cfg.Edge.dst e)) in
131+
let rec loop known_roots syms = function
132+
| [] -> syms,known_roots
133+
| root :: roots ->
134+
let self_inputs = edges_of_seq (Cfg.Node.inputs root prog) in
135+
let cfg, inputs = sub known_roots prog root in
136+
let edges = edges_of_seq (Cfg.edges cfg) in
137+
let calls = collect_destinations (inputs -- edges -- self_inputs) in
138+
let cfg = Set.fold calls ~init:cfg ~f:remove_reachable in
139+
let known = Set.union known_roots calls in
140+
let syms = update_symtab syms cfg root self_inputs in
141+
let syms,known = loop known syms (Set.to_list calls) in
142+
loop known syms roots in
143+
let initial_roots = Addr.Set.of_list initial_roots in
144+
let roots,syms = collect name prog initial_roots in
145+
fst @@ loop roots syms (Set.to_list roots)
111146

112147
let of_blocks syms =
113148
let reconstruct (cfg : cfg) =

lib/bap_disasm/bap_disasm_symtab.ml

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ module Insn = Bap_disasm_insn
1212

1313

1414
type block = Block.t [@@deriving compare, sexp_of]
15+
type edge = Block.edge [@@deriving compare, sexp_of]
1516
type cfg = Cfg.t [@@deriving compare]
1617

1718

@@ -29,11 +30,10 @@ type t = {
2930
addrs : fn Addr.Map.t;
3031
names : fn String.Map.t;
3132
memory : fn Memmap.t;
32-
callnames : string Addr.Map.t;
33+
callees : (string * edge) list Addr.Map.t;
3334
} [@@deriving sexp_of]
3435

3536

36-
3737
let compare t1 t2 =
3838
Addr.Map.compare Fn.compare t1.addrs t2.addrs
3939

@@ -47,7 +47,7 @@ let empty = {
4747
addrs = Addr.Map.empty;
4848
names = String.Map.empty;
4949
memory = Memmap.empty;
50-
callnames = Addr.Map.empty;
50+
callees = Addr.Map.empty;
5151
}
5252

5353
let merge m1 m2 =
@@ -58,16 +58,17 @@ let filter_mem mem name entry =
5858
Memmap.filter mem ~f:(fun (n,e,_) ->
5959
not(String.(name = n) || Block.(entry = e)))
6060

61-
let filter_callnames name =
62-
Map.filter ~f:( fun name' -> String.(name <> name'))
61+
let filter_callees name callees =
62+
Map.map callees
63+
~f:(List.filter ~f:(fun (name',_) -> String.(name <> name')))
6364

6465
let remove t (name,entry,_) : t =
6566
if Map.mem t.addrs (Block.addr entry) then
6667
{
6768
names = Map.remove t.names name;
6869
addrs = Map.remove t.addrs (Block.addr entry);
6970
memory = filter_mem t.memory name entry;
70-
callnames = filter_callnames name t.callnames
71+
callees = filter_callees name t.callees;
7172
}
7273
else t
7374

@@ -96,7 +97,10 @@ let name_of_fn = fst
9697
let entry_of_fn = snd
9798
let span fn = span fn |> Memmap.map ~f:(fun _ -> ())
9899

99-
let add_call_name t b name =
100-
{ t with callnames = Map.set t.callnames (Block.addr b) name }
100+
let add_call t b name edge =
101+
{t with callees = Map.add_multi t.callees (Block.addr b) (name,edge)}
101102

102-
let find_call_name t addr = Map.find t.callnames addr
103+
let enum_calls t addr =
104+
match Map.find t.callees addr with
105+
| None -> []
106+
| Some callees -> callees

lib/bap_disasm/bap_disasm_symtab.mli

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ open Bap_types.Std
33
open Image_internal_std
44

55
type block = Bap_disasm_block.t
6+
type edge = Bap_disasm_block.edge
67
type cfg = Bap_disasm_rec.Cfg.t
78

89
type t [@@deriving compare, sexp_of]
@@ -21,8 +22,10 @@ val intersecting : t -> mem -> fn list
2122
val to_sequence : t -> fn seq
2223
val span : fn -> unit memmap
2324

24-
(* remembers a call to a function from the given block *)
25-
val add_call_name : t -> block -> string -> t
25+
(** [add_call symtab block name edge] remembers a call to a function
26+
[name] from the given block with [edge] *)
27+
val add_call : t -> block -> string -> edge -> t
2628

27-
(* finds if there are any calls from the given block *)
28-
val find_call_name : t -> addr -> string option
29+
(** [enum_calls t addr] returns a list of calls from a block with
30+
the given [addr] *)
31+
val enum_calls : t -> addr -> (string * edge) list

lib/bap_sema/bap_sema_lift.ml

Lines changed: 62 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ let linear_of_stmt ?addr return insn stmt : linear list =
127127
Label finish :: [] in
128128
linearize stmt
129129

130+
130131
let lift_insn ?addr fall init insn =
131132
List.fold (Insn.bil insn) ~init ~f:(fun init stmt ->
132133
List.fold (linear_of_stmt ?addr fall insn stmt) ~init
@@ -152,8 +153,39 @@ let is_conditional_jump jmp =
152153
Insn.(may affect_control_flow) jmp &&
153154
has_jump_under_condition (Insn.bil jmp)
154155

155-
let blk cfg block : blk term list =
156-
let fall_label = label_of_fall cfg block in
156+
let has_called block addr =
157+
let finder =
158+
object inherit [unit] Stmt.finder
159+
method! enter_jmp e r =
160+
match e with
161+
| Bil.Int a when Addr.(a = addr) -> r.return (Some ())
162+
| _ -> r
163+
end in
164+
Bil.exists finder (Insn.bil (Block.terminator block))
165+
166+
let fall_of_symtab symtab block =
167+
Option.(
168+
symtab >>= fun symtab ->
169+
match Symtab.enum_calls symtab (Block.addr block) with
170+
| [] -> None
171+
| calls ->
172+
List.find_map calls
173+
~f:(fun (n,e) -> Option.some_if (e = `Fall) n) >>= fun name ->
174+
Symtab.find_by_name symtab name >>= fun (_,entry,_) ->
175+
Option.some_if Block.(block <> entry) entry >>= fun callee ->
176+
let addr = Block.addr callee in
177+
Option.some_if (not (has_called block addr)) () >>= fun () ->
178+
let bldr = Ir_blk.Builder.create () in
179+
let call = Call.create ~target:(Label.indirect Bil.(int addr)) () in
180+
let () = Ir_blk.Builder.add_jmp bldr (Ir_jmp.create_call call) in
181+
Some (Ir_blk.Builder.result bldr))
182+
183+
let blk ?symtab cfg block : blk term list =
184+
let fall_to_fn = fall_of_symtab symtab block in
185+
let fall_label =
186+
match label_of_fall cfg block, fall_to_fn with
187+
| None, Some b -> Some (Label.direct (Term.tid b))
188+
| fall_label,_ -> fall_label in
157189
List.fold (Block.insns block) ~init:([],Ir_blk.Builder.create ())
158190
~f:(fun init (mem,insn) ->
159191
let addr = Memory.min_addr mem in
@@ -167,7 +199,10 @@ let blk cfg block : blk term list =
167199
| Some dst -> Some (`Jmp (Ir_jmp.create_goto dst)) in
168200
Option.iter fall ~f:(Ir_blk.Builder.add_elt b);
169201
let b = Ir_blk.Builder.result b in
170-
List.rev (b::bs) |> function
202+
let blocks = match fall_to_fn with
203+
| None -> b :: bs
204+
| Some b' -> b' :: b :: bs in
205+
List.rev blocks |> function
171206
| [] -> assert false
172207
| b::bs -> Term.set_attr b address (Block.addr block) :: bs
173208

@@ -214,14 +249,14 @@ let remove_false_jmps blk =
214249

215250
let unbound _ = true
216251

217-
let lift_sub entry cfg =
252+
let lift_sub ?symtab entry cfg =
218253
let addrs = Addr.Table.create () in
219254
let recons acc b =
220255
let addr = Block.addr b in
221-
let blks = blk cfg b in
256+
let blks = blk ?symtab cfg b in
222257
Option.iter (List.hd blks) ~f:(fun blk ->
223258
Hashtbl.add_exn addrs ~key:addr ~data:(Term.tid blk));
224-
acc @ blks in
259+
acc @ blks in
225260
let blocks = Graphlib.reverse_postorder_traverse
226261
(module Cfg) ~start:entry cfg in
227262
let blks = Seq.fold blocks ~init:[] ~f:recons in
@@ -248,23 +283,23 @@ let indirect_target jmp =
248283
let is_indirect_call jmp = Option.is_some (indirect_target jmp)
249284

250285
let with_address t ~f ~default =
251-
match Term.get_attr t address with
252-
| None -> default
253-
| Some a -> f a
286+
Option.value_map ~default ~f (Term.get_attr t address)
254287

255-
let find_call_name symtab blk =
256-
with_address blk ~default:None ~f:(Symtab.find_call_name symtab)
288+
let with_address_opt t ~f ~default =
289+
let g a = Option.value (f a) ~default in
290+
with_address t ~f:g ~default
257291

258292
let update_unresolved symtab unresolved exts sub =
259293
let iter cls t ~f = Term.to_sequence cls t |> Seq.iter ~f in
260294
let symbol_exists name =
261295
Option.is_some (Symtab.find_by_name symtab name) in
262296
let is_known a = Option.is_some (Symtab.find_by_start symtab a) in
263297
let is_unknown name = not (symbol_exists name) in
264-
let add_external name =
265-
Hashtbl.update exts name ~f:(function
266-
| None -> create_synthetic name
267-
| Some x -> x) in
298+
let add_external (name,_) =
299+
if is_unknown name then
300+
Hashtbl.update exts name ~f:(function
301+
| None -> create_synthetic name
302+
| Some x -> x) in
268303
iter blk_t sub ~f:(fun blk ->
269304
iter jmp_t blk ~f:(fun jmp ->
270305
match indirect_target jmp with
@@ -273,24 +308,24 @@ let update_unresolved symtab unresolved exts sub =
273308
| _ ->
274309
with_address blk ~default:() ~f:(fun addr ->
275310
Hash_set.add unresolved addr;
276-
match Symtab.find_call_name symtab addr with
277-
| Some name when is_unknown name -> add_external name
278-
| _ -> ())))
311+
Symtab.enum_calls symtab addr |>
312+
List.iter ~f:add_external)))
279313

280314
let resolve_indirect symtab exts blk jmp =
281315
let update_target tar =
316+
Option.some @@
282317
match Ir_jmp.kind jmp with
283318
| Call c -> Ir_jmp.with_kind jmp (Call (Call.with_target c tar))
284319
| _ -> jmp in
285-
match find_call_name symtab blk with
286-
| None -> jmp
287-
| Some name ->
320+
let resolve_name (name,_) =
288321
match Symtab.find_by_name symtab name with
289322
| Some (_,b,_) -> update_target (Indirect (Int (Block.addr b)))
290-
| None ->
291-
match Hashtbl.find exts name with
323+
| _ -> match Hashtbl.find exts name with
292324
| Some s -> update_target (Direct (Term.tid s))
293-
| None -> jmp
325+
| None -> None in
326+
with_address_opt blk ~default:jmp ~f:(fun addr ->
327+
Symtab.enum_calls symtab addr |>
328+
List.find_map ~f:resolve_name)
294329

295330
let program symtab =
296331
let b = Ir_program.Builder.create () in
@@ -299,7 +334,7 @@ let program symtab =
299334
let unresolved = Addr.Hash_set.create () in
300335
Seq.iter (Symtab.to_sequence symtab) ~f:(fun (name,entry,cfg) ->
301336
let addr = Block.addr entry in
302-
let sub = lift_sub entry cfg in
337+
let sub = lift_sub ~symtab entry cfg in
303338
Ir_program.Builder.add_sub b (Ir_sub.with_name sub name);
304339
Tid.set_name (Term.tid sub) name;
305340
Hashtbl.add_exn addrs ~key:addr ~data:(Term.tid sub);
@@ -318,7 +353,8 @@ let program symtab =
318353
else j in
319354
resolve_jmp ~local:false addrs j)))
320355

321-
let sub = lift_sub
356+
let sub = lift_sub ?symtab:None
357+
let blk = blk ?symtab:None
322358

323359
let insn insn =
324360
lift_insn None ([], Ir_blk.Builder.create ()) insn |>

0 commit comments

Comments
 (0)