Skip to content

Commit a2812fb

Browse files
authored
error handling and performance tweaks (#1375)
* error handling and performance tweaks Sorry for a little bit dirty pull request featuring several independent changes, but it is really tedious to split it into several PRs. The most important and controversial change is that we now capture conflicts when disassemble. The conflict is reported, the instruction is marked as invalid and the whole chain instructions that led to it, is rejected. The previous behavior was terminating the program with an error message, which to my taste is more correct, but is unsatisfying from the user-perspective. We might return the old behavior after the release, in the development version of bap, as most of the conflicts are programmers' errors. The motivation for capturing conflicts was introducing pattern-based instruction encoding identification, which naturally can have false positives (which we occasionally see in some ARM interworked binaries). The other changes concern how our lifters report and handle errors. Since we can now have multiple lifters each supplying semantics for a subset of the instruction set, we no longer assume that an unhandled instruction is an error (it might be handled by some other lifter). We still report errors on instructions that a lifter claim to lift but failed to lift. And since the log file is now much cleaner, I was able to detect some missing cases, mostly for thumb2 (via arm) instructions, which are now handled. Finally, this PR removes the `is-valid` promise from thumb, which wasn't necessary but was introducing a little bit of slowdown. * fixes the empty instruction BIL intrinsic
1 parent 2c871e9 commit a2812fb

File tree

9 files changed

+98
-111
lines changed

9 files changed

+98
-111
lines changed

lib/arm/arm_lifter.ml

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -357,13 +357,15 @@ let lift_bits mem ops (insn : bits_insn ) =
357357
let lift_mult ops insn =
358358
let open Mul in
359359
match insn,ops with
360-
| `MUL, [|`Reg dest; src1; src2; cond; _rflag; wflag|] ->
360+
| `MUL, [|`Reg dest; src1; src2; cond; _; wflag|]
361+
| `MUL, [|`Reg dest; src1; src2; cond; wflag|] ->
361362
let flags = Flags.set_nzf Bil.(var (Env.of_reg dest)) reg32_t in
362363
exec [
363364
assn (Env.of_reg dest) Bil.(exp_of_op src1 * exp_of_op src2)
364365
] ~flags ~wflag cond
365366

366-
| `MLA, [|`Reg dest; src1; src2; addend; cond; _rflag; wflag|] ->
367+
| `MLA, [|`Reg dest; src1; src2; addend; cond; _; wflag|]
368+
| `MLA, [|`Reg dest; src1; src2; addend; cond; wflag|] ->
367369
let flags = Flags.set_nzf Bil.(var Bil.(Env.of_reg dest)) reg32_t in
368370
exec [
369371
assn (Env.of_reg dest)
@@ -376,18 +378,24 @@ let lift_mult ops insn =
376378
Bil.(exp_of_op addend - exp_of_op src1 * exp_of_op src2)
377379
] cond
378380

379-
| `UMULL, [|lodest; hidest; src1; src2; cond; _rflag; wflag|] ->
381+
| `UMULL, [|lodest; hidest; src1; src2; cond; _; wflag|]
382+
| `UMULL, [|lodest; hidest; src1; src2; cond; wflag|] ->
380383
lift_mull ~lodest ~hidest ~src1 ~src2 Unsigned ~wflag cond
381384

382-
| `SMULL, [|lodest; hidest; src1; src2; cond; _rflag; wflag|] ->
385+
| `SMULL, [|lodest; hidest; src1; src2; cond; _; wflag|]
386+
| `SMULL, [|lodest; hidest; src1; src2; cond; wflag|] ->
383387
lift_mull ~lodest ~hidest ~src1 ~src2 Signed ~wflag cond
384388

385389
| `UMLAL, [|lodest; hidest; src1; src2;
386-
_loadd; _hiadd; cond; _rflag; wflag|] ->
390+
_loadd; _hiadd; cond; _; wflag|]
391+
| `UMLAL, [|lodest; hidest; src1; src2;
392+
_loadd; _hiadd; cond; wflag|] ->
387393
lift_mull ~lodest ~hidest ~src1 ~src2 Unsigned ~addend:true ~wflag cond
388394

389395
| `SMLAL, [|lodest; hidest; src1; src2;
390-
_loadd; _hiadd; cond; _rflag; wflag|] ->
396+
_loadd; _hiadd; cond; _; wflag|]
397+
| `SMLAL, [|lodest; hidest; src1; src2;
398+
_loadd; _hiadd; cond; wflag|] ->
391399
lift_mull ~lodest ~hidest ~src1 ~src2 Signed ~addend:true ~wflag cond
392400

393401
(* signed 16bit mul plus a 32bit bit accum, Q *)
@@ -876,6 +884,13 @@ let lift_mem ops insn =
876884
in
877885
exec insns cond
878886

887+
| `LDREX, [|dest1; `Reg _ as dest2; base; cond; _|] ->
888+
let insns =
889+
Mem_shift.lift_r_op ~dest1 ~dest2 ~base ~offset:(`Imm (word 0))
890+
Offset Unsigned W Ld
891+
in
892+
exec insns cond
893+
879894
| `LDREXB, [|dest1; base; cond; _|] ->
880895
let insns =
881896
Mem_shift.lift_r_op ~dest1 ~base ~offset:(`Imm (word 0))
@@ -905,6 +920,13 @@ let lift_mem ops insn =
905920
let result = [Bil.move (Env.of_reg dest1) (int32 0)] in
906921
exec (insns @ result) cond
907922

923+
| `STREX, [|`Reg dest1; src1; `Reg _ as src2; base; cond; _|] ->
924+
let insns =
925+
Mem_shift.lift_r_op ~dest1:src1 ~dest2:src2 ~base ~offset:(`Imm (word 0))
926+
Offset Unsigned W St in
927+
let result = [Bil.move (Env.of_reg dest1) (int32 0)] in
928+
exec (insns @ result) cond
929+
908930
| `STREXB, [|`Reg dest1; src1; base; cond; _|] ->
909931
let insns =
910932
Mem_shift.lift_r_op ~dest1:src1 ~base ~offset:(`Imm (word 0))
@@ -1108,13 +1130,12 @@ let resolve_pc mem = Stmt.map (object
11081130
end)
11091131

11101132
let insn_exn mem insn : bil Or_error.t =
1111-
let name = Basic.Insn.name insn in
11121133
let encoding = Theory.Language.read ~package:"bap" (Basic.Insn.encoding insn) in
11131134
Memory.(Addr.Int_err.(!$(max_addr mem) - !$(min_addr mem)))
11141135
>>= Word.to_int >>= fun s -> Size.of_int ((s+1) * 8) >>= fun scale ->
11151136
Memory.get ~scale mem >>= fun word ->
11161137
match Arm_insn.of_basic insn with
1117-
| None -> errorf "unsupported opcode: %s" name
1138+
| None -> Ok []
11181139
| Some arm_insn -> match arm_ops (Basic.Insn.ops insn) with
11191140
| Error _ as fail -> fail
11201141
| Ok ops -> Result.return @@ match arm_insn with

lib/bap_disasm/bap_disasm_driver.ml

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ open Bap_types.Std
33
open Bap_core_theory
44
open Bap_image_std
55

6+
include Bap_main.Loggers()
7+
68
open KB.Syntax
79

810
module Dis = Bap_disasm_basic
@@ -422,6 +424,7 @@ let disassemble ~code ~data ~funs debt base : Machine.state KB.t =
422424
else f state current mem
423425
else f state encoding mem
424426
and skip state addr f =
427+
info "encoding for %a is unknown, skipping" Addr.pp addr;
425428
Machine.view (Machine.skipped state addr) base
426429
~empty:KB.return
427430
~ready:(fun state current mem ->
@@ -446,17 +449,25 @@ let disassemble ~code ~data ~funs debt base : Machine.state KB.t =
446449
~return:KB.return ~init:(Machine.switch init encoding)
447450
~stopped:(fun d s -> step d (Machine.stopped s encoding))
448451
~hit:(fun d mem insn s ->
449-
new_insn mem insn >>= fun label ->
450-
let encoding = Machine.encoding s in
451-
KB.provide Theory.Label.encoding label encoding.coding >>= fun () ->
452-
collect_dests encoding label >>= fun dests ->
453-
if Set.is_empty dests.resolved &&
454-
not dests.indirect then
455-
step d @@ Machine.moved s encoding mem
456-
else
457-
delay mem insn >>= fun delay ->
458-
step d @@ Machine.jumped s encoding mem dests delay)
459-
~invalid:(fun d _ s ->
452+
KB.catch begin
453+
new_insn mem insn >>= fun label ->
454+
let encoding = Machine.encoding s in
455+
KB.provide Theory.Label.encoding label encoding.coding >>= fun () ->
456+
collect_dests encoding label >>= fun dests ->
457+
if Set.is_empty dests.resolved &&
458+
not dests.indirect then
459+
step d @@ Machine.moved s encoding mem
460+
else
461+
delay mem insn >>= fun delay ->
462+
step d @@ Machine.jumped s encoding mem dests
463+
delay
464+
end @@ fun problem ->
465+
warning "rejecting %a due to a conflict %a"
466+
Memory.pp mem KB.Conflict.pp problem;
467+
step d (Machine.failed s encoding s.addr))
468+
~invalid:(fun d mem s ->
469+
info "rejecting %a as an invalid instruction"
470+
Memory.pp mem;
460471
step d (Machine.failed s encoding s.addr)))
461472
~empty:KB.return
462473

lib/bap_disasm/bap_disasm_target_factory.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ include Bap_disasm_target_intf
55

66
let create_stub_target arch =
77
let module Lifter = struct
8-
let lift _ _ = Or_error.error_string "not implemented"
8+
let lift _ _ = Ok []
99
let addr_size = Arch.addr_size arch
1010

1111
module CPU = struct

plugins/bil/bil_lifter.ml

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -274,10 +274,8 @@ let lift ~enable_intrinsics:{for_all; for_unk; for_special; predicates}
274274
else
275275
let module Target = (val target_of_arch arch) in
276276
match Target.lift mem insn with
277-
| Error _ as err ->
278-
if for_unk
279-
then Ok (create_intrinsic target mem insn)
280-
else err
277+
| Error _ as err -> err
278+
| Ok [] when for_unk -> Ok (create_intrinsic target mem insn)
281279
| Ok bil ->
282280
if for_special && has_special bil
283281
then Ok (create_intrinsic target mem insn)
@@ -297,7 +295,7 @@ let provide_bil ~enable_intrinsics () =
297295
let* insn = obj-->?Disasm_expert.Basic.Insn.slot in
298296
match lift ~enable_intrinsics target arch mem insn with
299297
| Error err ->
300-
info "BIL: the BIL lifter failed with %a" Error.pp err;
298+
warning "BIL: the BIL lifter failed with %a" Error.pp err;
301299
KB.return []
302300
| Ok [] -> KB.return []
303301
| Ok bil ->
@@ -327,12 +325,12 @@ let provide_lifter ~with_fp () =
327325
else base_context in
328326
let is_empty = KB.Domain.is_empty Bil.domain in
329327
let lifter obj =
330-
Theory.Label.target obj >>= fun target ->
331-
Theory.instance ~context:(context target) () >>=
332-
Theory.require >>= fun (module Core) ->
333328
KB.collect Bil.code obj >>= fun bil ->
334329
if is_empty bil then !!Insn.empty
335330
else
331+
Theory.Label.target obj >>= fun target ->
332+
Theory.instance ~context:(context target) () >>=
333+
Theory.require >>= fun (module Core) ->
336334
let module Lifter = Theory.Parser.Make(Core) in
337335
Lifter.run Bil.Theory.parser bil in
338336
KB.Rule.(declare ~package "bil-semantics" |>

plugins/radare2/radare2_main.ml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ let provide_roots file funcs =
3434
Bitvec.((addr - bias) mod modulus bits) in
3535
Option.some_if (Hashtbl.mem funcs addr) true
3636
else None in
37-
promise_property Theory.Label.is_valid;
3837
promise_property Theory.Label.is_subroutine
3938

4039
let strip str =

plugins/thumb/thumb_main.ml

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ module Thumb(CT : Theory.Core) = struct
106106
cmpr (reg rn) (reg rm)
107107
| `tORR, [|Reg rd; _; _; Reg rm; Imm c; _|] ->
108108
lorr (reg rd) (reg rm) (cnd c)
109-
| insn ->
109+
| #opmov,_ as insn ->
110110
info "unhandled move instruction: %a: %a" Bitvec.pp addr pp_insn insn;
111111
!!Insn.empty
112112

@@ -151,7 +151,8 @@ module Thumb(CT : Theory.Core) = struct
151151
strhr (reg rd) (reg rm) (reg rn) (cnd c)
152152
| #opmem_multi as op,ops ->
153153
begin match op, Array.to_list ops with
154-
| `tSTMIA_UPD, Reg rd :: Imm c :: _ :: ar ->
154+
| `tSTMIA_UPD, Reg rd :: Imm c :: _ :: ar
155+
| `tSTMIA_UPD, Reg rd :: Reg _ :: Imm c :: _ :: ar ->
155156
stm (reg rd) (regs ar) (cnd c)
156157
| `tLDMIA, Reg rd :: Imm c :: _ :: ar ->
157158
ldm (reg rd) (regs ar) (cnd c)
@@ -166,7 +167,7 @@ module Thumb(CT : Theory.Core) = struct
166167
info "unhandled multi-reg instruction: %a" pp_insn (op,ops);
167168
!!Insn.empty
168169
end
169-
| insn ->
170+
| #opmem,_ as insn ->
170171
info "unhandled memory operation: %a" pp_insn insn;
171172
!!Insn.empty
172173

@@ -177,7 +178,7 @@ module Thumb(CT : Theory.Core) = struct
177178
| `tSXTH, [|Reg rd; Reg rm; Imm c; _|] -> sx (reg rd) (reg rm) (cnd c)
178179
| `tUXTB, [|Reg rd; Reg rm; Imm c; _|] -> ux (reg rd) (reg rm) (cnd c)
179180
| `tUXTH, [|Reg rd; Reg rm; Imm c; _|] -> ux (reg rd) (reg rm) (cnd c)
180-
| insn ->
181+
| #opbit,_ as insn ->
181182
info "unhandled bit-wise instruction: %a" pp_insn insn;
182183
!!Insn.empty
183184

@@ -192,7 +193,8 @@ module Thumb(CT : Theory.Core) = struct
192193
| `tB, [|Imm dst; _; _|] -> b pc (imm dst)
193194
| `tBcc, [|Imm dst; Imm c; _|] -> bcc pc (cnd c) (imm dst)
194195
| `tBL, [|_; _; Imm dst; _|]
195-
| `tBLXi, [|_; _; Imm dst|] -> bli pc (imm dst)
196+
| `tBLXi, ([|_; _; Imm dst|] | [|_; _; Imm dst; _|] ) ->
197+
bli pc (imm dst)
196198
| `tBLXr, [|_; _; Reg dst|]when is_pc (reg dst) ->
197199
(* blx pc is unpredictable in all versions of ARM *)
198200
Thumb.ctrl unpredictable
@@ -201,7 +203,7 @@ module Thumb(CT : Theory.Core) = struct
201203
| `tBX, [|Reg dst;_;_|] -> bxr (reg dst)
202204
| `tCBNZ, [|Reg rn; Imm c|] -> cbnz pc (reg rn) (imm c)
203205
| `tCBZ, [|Reg rn; Imm c|] -> cbz pc (reg rn) (imm c)
204-
| insn ->
206+
| #opbranch,_ as insn ->
205207
info "unhandled branch: %a" pp_insn insn;
206208
!!Insn.empty
207209

@@ -216,22 +218,6 @@ end
216218
module Main = struct
217219
open Bap.Std
218220

219-
let cmnz_opcode = Word.of_int 10 0x10b
220-
221-
(* this is a temporary fix since bap-mc disassembler couldn't recognize CMN *)
222-
let fix_cmnz insn mem = match insn with
223-
| Some insn -> Some insn
224-
| None ->
225-
let opcode =
226-
let open Or_error.Monad_infix in
227-
let scale = Size.of_int_exn 16 in
228-
Memory.get ~scale mem >>=
229-
Word.extract ~hi:15 ~lo:6 in
230-
match opcode with
231-
| Ok opcode when Word.equal opcode cmnz_opcode ->
232-
Some `tCMNz
233-
| _ -> None
234-
235221
let load () =
236222
KB.promise Theory.Semantics.slot @@ fun label ->
237223
KB.collect Theory.Label.encoding label >>= fun encoding ->
@@ -242,13 +228,7 @@ module Main = struct
242228
let module Thumb = Thumb(Core) in
243229
let addr = Word.to_bitvec@@Memory.min_addr mem in
244230
match decode_opcode (MC.Insn.name insn) with
245-
| None ->
246-
info "failed to decode MC instruction, unknown opcode: \
247-
%a: %s => %a"
248-
Memory.pp mem
249-
(MC.Insn.asm insn)
250-
Sexp.pp_hum (MC.Insn.sexp_of_t insn);
251-
!!Insn.empty
231+
| None -> !!Insn.empty
252232
| Some opcode ->
253233
try
254234
Thumb.lift_insn addr opcode insn >>| fun sema ->

plugins/thumb/thumb_opcodes.ml

Lines changed: 20 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -3,43 +3,24 @@ open Bap.Std
33

44
(* all the `mov` series, registers marked with `e` means extended *)
55
type opmov = [
6-
| `tMOVi8 (* Rd imm8 *)
7-
| `tMOVSr (* Rd Rm affect CSPR *)
8-
| `tMVN (* Rd Rm *)
9-
| `tADC (* Rd Rn Rm *)
10-
| `tADDi3 (* Rd Rs imm *)
11-
| `tADDi8 (* Rd imm *)
12-
| `tADDrr (* Rd Rn Rm *)
13-
| `tADDhirr (* Rde Rse *)
14-
| `tADR (* Rd imm *)
15-
| `tADDrSPi (* Rd imm *)
16-
| `tADDspi (* imm *)
17-
| `tAND
6+
| `tADC
7+
| `tADDi3
8+
| `tADDi8
9+
| `tADDrSPi
10+
| `tADDrr
11+
| `tADDspi
1812
| `tASRri
19-
| `tASRrr
20-
| `tBIC
21-
| `tCMNz
2213
| `tCMPi8
2314
| `tCMPr
24-
| `tCMPhir
25-
| `tEOR
2615
| `tLSLri
27-
| `tLSLrr
2816
| `tLSRri
29-
| `tLSRrr
17+
| `tMOVSr
18+
| `tMOVi8
3019
| `tORR
31-
| `tRSB (* NEG *)
32-
| `tREV
33-
| `tREV16
34-
| `tREVSH
35-
| `tROR
36-
| `tSBC (* Rd Rm *)
37-
| `tSUBi3 (* See the corresponding ADD insns. *)
20+
| `tSUBi3
3821
| `tSUBi8
3922
| `tSUBrr
4023
| `tSUBspi
41-
| `tTST
42-
| `tMUL (* Rd Rn *)
4324
] [@@deriving bin_io, compare, sexp, enumerate]
4425

4526
type opbit = [
@@ -61,36 +42,29 @@ type opmem_multi = [
6142

6243

6344
type opmem = [
64-
| `tLDRi
65-
| `tLDRr
66-
| `tLDRpci | `t2LDRpci
67-
| `tLDRspi
45+
| opmem_multi
46+
| `t2LDRpci
6847
| `tLDRBi
6948
| `tLDRBr
7049
| `tLDRHi
7150
| `tLDRHr
7251
| `tLDRSB
7352
| `tLDRSH
74-
| `tSTRi
75-
| `tSTRr
76-
| `tSTRspi
53+
| `tLDRi
54+
| `tLDRpci
55+
| `tLDRr
56+
| `tLDRspi
7757
| `tSTRBi
7858
| `tSTRBr
7959
| `tSTRHi
8060
| `tSTRHr
81-
| opmem_multi
61+
| `tSTRi
62+
| `tSTRr
63+
| `tSTRspi
8264
] [@@deriving bin_io, compare, sexp, enumerate]
8365

84-
type opbranch = [
85-
| `tBcc
86-
| `tB
87-
| `tBL
88-
| `tBLXi
89-
| `tBLXr
90-
| `tBX
91-
| `tCBNZ
92-
| `tCBZ
93-
] [@@deriving bin_io, compare, sexp, enumerate]
66+
type opbranch = [ `tB | `tBL | `tBLXi | `tBLXr | `tBX | `tBcc | `tCBNZ | `tCBZ ]
67+
[@@deriving bin_io, compare, sexp, enumerate]
9468

9569
type opcode = [
9670
| opmov

0 commit comments

Comments
 (0)