Skip to content

Commit 2c028b3

Browse files
committed
Refactor peephole pass (ocaml-flambda#3797)
1 parent d106170 commit 2c028b3

File tree

5 files changed

+125
-117
lines changed

5 files changed

+125
-117
lines changed

backend/cfg_selectgen.ml

+3
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ module SU = Select_utils
2929
module V = Backend_var
3030
module VP = Backend_var.With_provenance
3131

32+
(* CR-soon gyorsh: This functor must not have state, because it is instantiated
33+
twice with the same [Target] (see [Asmgen] and [Peephole_utils] to avoid
34+
dependency cycles. *)
3235
module Make (Target : Cfg_selectgen_target_intf.S) = struct
3336
(* A syntactic criterion used in addition to judgements about (co)effects as
3437
to whether the evaluation of a given expression may be deferred by

backend/cfg_selectgen.mli

+2
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,6 @@ module Make (Target : Cfg_selectgen_target_intf.S) : sig
3232
future_funcnames:Misc.Stdlib.String.Set.t ->
3333
Cmm.fundecl ->
3434
Cfg_with_layout.t
35+
36+
val is_immediate : Operation.integer_operation -> int -> bool
3537
end

backend/peephole/peephole_rules.ml

+46-92
Original file line numberDiff line numberDiff line change
@@ -72,112 +72,68 @@ let remove_useless_mov (cell : Cfg.basic Cfg.instruction DLL.cell) =
7272

7373
(** Logical condition for simplifying the following case:
7474
{|
75-
<op 1> const1, r
76-
<op 2> const2, r
75+
<op1> const1, r
76+
<op2> const2, r
7777
|}
7878
7979
to:
8080
{|
81-
<op 1> (const1 <op 2> const2), r
81+
<op1> (const1 <op2> const2), r
8282
|}
8383
84-
Where <op 1> and <op 2> can be any two binary operators that are associative and commutative
85-
and const1 and const2 are immediate values. *)
84+
where
85+
const1 and const2 are immediate values, and
86+
<op1> and <op2> are associative binary operators such
87+
that either <op1> is the same as <op2>, or <op1> is the inverse of <op2>,
88+
or there exists const3 such that <op1 const1> can be expressed as <op2 const3>
89+
or <op2 const2> can be expressed as <op1 const3> *)
8690

8791
let are_compatible op1 op2 imm1 imm2 :
8892
(Operation.integer_operation * int) option =
8993
match
9094
(op1 : Operation.integer_operation), (op2 : Operation.integer_operation)
9195
with
92-
(* Folding two bitwise operations such as (AND, OR, XOR) should never produce
93-
an overflow so we assert this conditon. *)
94-
| Iand, Iand ->
95-
assert (U.amd64_imm32_within_bounds imm1 imm2 ( land ));
96-
Some (Iand, imm1 land imm2)
97-
| Ior, Ior ->
98-
assert (U.amd64_imm32_within_bounds imm1 imm2 ( lor ));
99-
Some (Ior, imm1 lor imm2)
100-
| Ixor, Ixor ->
101-
assert (U.amd64_imm32_within_bounds imm1 imm2 ( lxor ));
102-
Some (Ixor, imm1 lxor imm2)
96+
| Iand, Iand -> U.bitwise_immediates op1 imm1 imm2 ( land )
97+
| Ior, Ior -> U.bitwise_immediates op1 imm1 imm2 ( lor )
98+
| Ixor, Ixor -> U.bitwise_immediates op1 imm1 imm2 ( lxor )
10399
(* For the following three cases we have the issue that in some situations,
104100
one or both immediate values could be out of bounds, but the result might
105101
be within bounds (e.g. imm1 = -4 and imm2 = 65, their sum being 61). This
106102
should not happen at all since the immediate values should always be within
107103
the bounds [0, Sys.int_size]. *)
108-
| Ilsl, Ilsl ->
109-
if Misc.no_overflow_add imm1 imm2 && imm1 + imm2 <= Sys.int_size
110-
then (
111-
U.bitwise_shift_assert imm1 imm2;
112-
Some (Ilsl, imm1 + imm2))
113-
else None
114-
| Ilsr, Ilsr ->
115-
if Misc.no_overflow_add imm1 imm2 && imm1 + imm2 <= Sys.int_size
116-
then (
117-
U.bitwise_shift_assert imm1 imm2;
118-
Some (Ilsr, imm1 + imm2))
119-
else None
120-
| Iasr, Iasr ->
121-
if Misc.no_overflow_add imm1 imm2 && imm1 + imm2 <= Sys.int_size
122-
then (
123-
U.bitwise_shift_assert imm1 imm2;
124-
Some (Iasr, imm1 + imm2))
125-
else None
126-
(* for the amd64 instruction set the `ADD` `SUB` `MUL` opperations take at
127-
most an imm32 as the second argument, so we need to check for overflows on
128-
32-bit signed ints. *)
129-
(* CR-someday gtulba-lecu: This condition is architecture specific and should
130-
either live in amd64 specific code or this module should contain
131-
information about the architecture target. *)
132-
| Iadd, Iadd ->
133-
if Misc.no_overflow_add imm1 imm2
134-
&& U.amd64_imm32_within_bounds imm1 imm2 ( + )
135-
then Some (Iadd, imm1 + imm2)
136-
else None
104+
| Ilsl, Ilsl | Ilsr, Ilsr | Iasr, Iasr | Iadd, Iadd ->
105+
U.add_immediates op1 imm1 imm2
137106
| Iadd, Isub ->
107+
(* The following transformation changes the order of operations on [r] and
108+
therefore might change the overflow behavior: if [r+c1] overflows, but
109+
r-[c2-c1] does not overflow. This is fine, other compiler transformations
110+
may also do it. The code below only ensures that immediates that the
111+
compiler emits do not overflow. *)
138112
if imm1 >= imm2
139-
then
140-
if Misc.no_overflow_sub imm1 imm2
141-
&& U.amd64_imm32_within_bounds imm1 imm2 ( - )
142-
then Some (Iadd, imm1 - imm2)
143-
else None
144-
else if Misc.no_overflow_sub imm2 imm1
145-
&& U.amd64_imm32_within_bounds imm2 imm1 ( - )
146-
then Some (Isub, imm2 - imm1)
147-
else None
148-
| Isub, Isub ->
149-
if Misc.no_overflow_add imm1 imm2
150-
&& U.amd64_imm32_within_bounds imm1 imm2 ( + )
151-
then Some (Isub, imm1 + imm2)
152-
else None
113+
then U.sub_immediates Iadd imm1 imm2
114+
else U.sub_immediates Isub imm2 imm1
115+
| Isub, Isub (* r - (imm1 + imm2 *) -> U.add_immediates Isub imm1 imm2
153116
| Isub, Iadd ->
154117
if imm1 >= imm2
155-
then
156-
if Misc.no_overflow_sub imm1 imm2
157-
&& U.amd64_imm32_within_bounds imm1 imm2 ( - )
158-
then Some (Isub, imm1 - imm2)
159-
else None
160-
else if Misc.no_overflow_sub imm2 imm1
161-
&& U.amd64_imm32_within_bounds imm2 imm1 ( - )
162-
then Some (Iadd, imm2 - imm1)
163-
else None
118+
then U.sub_immediates Isub imm1 imm2
119+
else U.sub_immediates Iadd imm2 imm1
164120
| Ilsl, Imul ->
165-
if imm1 >= 0 && imm1 < 31
166-
&& Misc.no_overflow_mul (1 lsl imm1) imm2
167-
&& U.amd64_imm32_within_bounds (1 lsl imm1) imm2 ( * )
168-
then Some (Imul, (1 lsl imm1) * imm2)
121+
(* [imm1] is guaranteed to be within bounds for [Ilsl], but [1 lsl imm1] may
122+
not be within bounds for [Imul]. *)
123+
U.assert_within_range Ilsl imm1;
124+
let imm1 = 1 lsl imm1 in
125+
if U.is_immediate_for_intop Imul imm1
126+
then U.mul_immediates Imul imm1 imm2
169127
else None
170128
| Imul, Ilsl ->
171-
if imm2 >= 0 && imm2 < 31
172-
&& Misc.no_overflow_mul imm1 (1 lsl imm2)
173-
&& U.amd64_imm32_within_bounds imm1 (1 lsl imm2) ( * )
174-
then Some (Imul, imm1 * (1 lsl imm2))
175-
else None
176-
| Imul, Imul ->
177-
if Misc.no_overflow_mul imm1 imm2
178-
&& U.amd64_imm32_within_bounds imm1 imm2 ( * )
179-
then Some (Imul, imm1 * imm2)
129+
(* [imm2] is guaranteed to be within bounds for [Ilsl], but [1 lsl imm2] may
130+
not be within bounds for [Imul]. *)
131+
U.assert_within_range Ilsl imm2;
132+
let imm2 = 1 lsl imm2 in
133+
if U.is_immediate_for_intop Imul imm2
134+
then U.mul_immediates Imul imm1 imm2
180135
else None
136+
| Imul, Imul -> U.mul_immediates op1 imm1 imm2
181137
(* CR-soon gtulba-lecu: check this last case | Imod, Imod -> if imm1 mod imm2
182138
= 0 then Some (Imod, imm2) else None
183139
@@ -199,11 +155,8 @@ let fold_intop_imm (cell : Cfg.basic Cfg.instruction DLL.cell) =
199155
let snd_val = DLL.value snd in
200156
(* The following check does the following: 1. Ensures that both instructions
201157
use the same source register; 2. Ensures that both instructions output
202-
the result to the source register, this is redundant for amd64 since
203-
there are no instructions that invalidate this condition. *)
204-
(* CR-someday gtulba-lecu: This condition is architecture specific and
205-
should either live in amd64 specific code or this module should contain
206-
information about the architecture target. *)
158+
the result to the source register. This is currently redundant for amd64
159+
since there are no instructions that invalidate this condition. *)
207160
if Array.length fst_val.arg = 1
208161
&& Array.length snd_val.arg = 1
209162
&& Array.length fst_val.res = 1
@@ -235,9 +188,10 @@ let fold_intop_imm (cell : Cfg.basic Cfg.instruction DLL.cell) =
235188
| _ -> None
236189

237190
let apply cell =
238-
match remove_overwritten_mov cell with
239-
| None -> (
240-
match remove_useless_mov cell with
241-
| None -> ( match fold_intop_imm cell with None -> None | res -> res)
242-
| res -> res)
243-
| res -> res
191+
let[@inline always] if_none_do f o =
192+
match o with Some _ -> o | None -> f cell
193+
in
194+
None
195+
|> if_none_do remove_overwritten_mov
196+
|> if_none_do remove_useless_mov
197+
|> if_none_do fold_intop_imm

backend/peephole/peephole_utils.ml

+46-22
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,10 @@
11
module DLL = Flambda_backend_utils.Doubly_linked_list
22
open! Int_replace_polymorphic_compare
33

4-
(* CR-someday gtulba-lecu: make sure that this comparison is correct and
5-
sufficent. Take into consideration using Proc.regs_are_volatile in the
6-
future. As we only support amd64 and Proc.regs_are_volatile is always false
7-
in amd64 this is not necessary for now. See backend/cfg/cfg_deadcode.ml for
8-
more details.*)
94
let are_equal_regs (reg1 : Reg.t) (reg2 : Reg.t) =
105
Reg.same_loc reg1 reg2 && Cmm.equal_machtype_component reg1.typ reg2.typ
116

12-
(* CR-soon gtulba-lecu: Delete this when imeplementing auto-generated rules. *)
7+
(* CR-soon gtulba-lecu: Delete this when implementing auto-generated rules. *)
138
let go_back_const = 1
149

1510
let rec prev_at_most steps cell =
@@ -32,19 +27,48 @@ let get_cells cell size =
3227
assert (size > 0);
3328
get_cells' (DLL.next cell) (size - 1) [cell]
3429

35-
let is_bitwise_op (op : Operation.integer_operation) =
36-
match op with Iand | Ior | Ixor | Ilsl | Ilsr | Iasr -> true | _ -> false
37-
[@@ocaml.warning "-4"]
38-
39-
let bitwise_shift_assert (imm1 : int) (imm2 : int) =
40-
if imm1 < 0 || imm1 > Sys.int_size || imm2 < 0 || imm2 > Sys.int_size
41-
then assert false
42-
[@@inline]
43-
44-
(* CR-someday gtulba-lecu: This is architecture specific and should be moved in
45-
a different part of the compiler that is specific to the amd64 architecture.
46-
This is fine for now as we only support amd64. *)
47-
let amd64_imm32_within_bounds imm1 imm2 op =
48-
let imm = op imm1 imm2 in
49-
Int32.to_int Int32.min_int <= imm && imm <= Int32.to_int Int32.max_int
50-
[@@inline]
30+
(* CR-soon gyorsh: This functor is also instantiated in
31+
[Asmgen.compile_fundecl]. Find a shared place to put it, instead of
32+
instantiating twice. May require restructuring the backend to avoid
33+
dependency cycles. *)
34+
module Cfg_selection = Cfg_selectgen.Make (Cfg_selection)
35+
36+
let is_immediate_for_intop op n = Cfg_selection.is_immediate op n
37+
38+
let assert_within_range integer_operation imm =
39+
if not (is_immediate_for_intop integer_operation imm)
40+
then
41+
Misc.fatal_errorf "Peephole: unexpected immediate %d for operation %s" imm
42+
(Operation.string_of_integer_operation integer_operation)
43+
44+
let[@inline] op_immediates integer_operation imm1 imm2 no_overflow op =
45+
(* [no_overflow imm1 imm2] operation may assume that each of the immediates on
46+
its own is within bounds. *)
47+
assert_within_range integer_operation imm1;
48+
assert_within_range integer_operation imm2;
49+
let res = op imm1 imm2 in
50+
if no_overflow imm1 imm2 && is_immediate_for_intop integer_operation res
51+
then Some (integer_operation, res)
52+
else None
53+
54+
let add_immediates integer_operation imm1 imm2 =
55+
op_immediates integer_operation imm1 imm2 Misc.no_overflow_add ( + )
56+
57+
let sub_immediates integer_operation imm1 imm2 =
58+
op_immediates integer_operation imm1 imm2 Misc.no_overflow_sub ( - )
59+
60+
let mul_immediates integer_operation imm1 imm2 =
61+
op_immediates integer_operation imm1 imm2 Misc.no_overflow_mul ( * )
62+
63+
let never_overflow _ _ = true
64+
65+
let bitwise_immediates integer_operation imm1 imm2 op =
66+
(* Bitwise operations on immediates within range cannot produce immediates
67+
outside of range. Bitwise operations do not need overflow check. *)
68+
match op_immediates integer_operation imm1 imm2 never_overflow op with
69+
| None ->
70+
Misc.fatal_errorf
71+
"Peephole: cannot rewrite immediates for %s: combining %d %d = %d"
72+
(Operation.string_of_integer_operation integer_operation)
73+
imm1 imm2 (op imm1 imm2)
74+
| Some _ as res -> res

backend/peephole/peephole_utils.mli

+28-3
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,33 @@ val get_cells :
1313
int ->
1414
Cfg.basic Cfg.instruction DLL.cell list
1515

16-
val is_bitwise_op : Operation.integer_operation -> bool
16+
(** The following functions check for overflow and ranges of immediates w.r.t. the
17+
operation and optionally rewrite the operation. *)
18+
val add_immediates :
19+
Operation.integer_operation ->
20+
int ->
21+
int ->
22+
(Operation.integer_operation * int) option
23+
24+
val sub_immediates :
25+
Operation.integer_operation ->
26+
int ->
27+
int ->
28+
(Operation.integer_operation * int) option
29+
30+
val mul_immediates :
31+
Operation.integer_operation ->
32+
int ->
33+
int ->
34+
(Operation.integer_operation * int) option
35+
36+
val bitwise_immediates :
37+
Operation.integer_operation ->
38+
int ->
39+
int ->
40+
(int -> int -> int) ->
41+
(Operation.integer_operation * int) option
1742

18-
val bitwise_shift_assert : int -> int -> unit
43+
val assert_within_range : Operation.integer_operation -> int -> unit
1944

20-
val amd64_imm32_within_bounds : int -> int -> (int -> int -> int) -> bool
45+
val is_immediate_for_intop : Operation.integer_operation -> int -> bool

0 commit comments

Comments
 (0)