-
Notifications
You must be signed in to change notification settings - Fork 86
/
Copy pathCSEgen.ml
239 lines (217 loc) · 10.8 KB
/
CSEgen.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
(* *)
(* Copyright 2014 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)
(* Common subexpression elimination by value numbering over extended
basic blocks. *)
open Mach
open CSE_utils
module CSE_utils_numbering = Make (struct type t = Mach.operation end)
open CSE_utils_numbering
(* Prepend a set of moves before [i] to assign [srcs] to [dsts]. *)
let insert_single_move i src dst =
instr_cons_debug (Iop Imove) [|src|] [|dst|] i.dbg i
let insert_move srcs dsts i =
match Array.length srcs with
| 0 -> i
| 1 -> instr_cons_debug (Iop Imove) srcs dsts i.dbg i
| _ -> (* Parallel move: first copy srcs into tmps one by one,
then copy tmps into dsts one by one *)
let tmps = Reg.createv_like srcs in
let i1 = Misc.Stdlib.Array.fold_left2 insert_single_move i tmps dsts in
Misc.Stdlib.Array.fold_left2 insert_single_move i1 srcs tmps
let rec split4 = function
[] -> ([], [], [], [])
| (x,y,z,w)::l ->
let (rx, ry, rz, rw) = split4 l in (x::rx, y::ry, z::rz, w::rw)
let rec combine4 l1 l2 l3 l4 =
match (l1, l2, l3, l4) with
([], [], [], []) -> []
| (a1::l1, a2::l2, a3::l3, a4::l4) -> (a1, a2, a3, a4) :: combine4 l1 l2 l3 l4
| (_, _, _, _) -> invalid_arg "combine4"
class cse_generic = object (self)
(* Default classification of operations. Can be overridden in
processor-specific files to classify specific operations better. *)
method class_of_operation op =
match op with
| Imove | Ispill | Ireload -> assert false (* treated specially *)
| Iconst_int _ | Iconst_float32 _ | Iconst_float _
| Iconst_symbol _ | Iconst_vec128 _ -> Op_pure
| Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
| Iextcall _ | Iprobe _ | Iopaque -> assert false (* treated specially *)
| Istackoffset _ -> Op_other
| Iload { mutability; is_atomic } ->
(* #12173: disable CSE for atomic loads.
#12825: atomic loads cannot be treated as Op_other
because they update our view / the frontier of the
non-atomic locations, so past non-atomic (mutable) loads
may be not be valid anymore.
We conservatively treat them as non-initializing stores.
(the above are upstream PR numbers)
*)
if is_atomic then Op_store true
else Op_load (match mutability with
| Mutable -> Mutable
| Immutable -> Immutable)
| Istore(_,_,asg) -> Op_store asg
| Ialloc _ | Ipoll _ -> assert false (* treated specially *)
| Iintop _ -> Op_pure
| Iintop_imm(_, _) -> Op_pure
| Iintop_atomic _ -> Op_store true
| Ifloatop _
| Icsel _
| Istatic_cast _
| Ireinterpret_cast _ -> Op_pure
| Ispecific _ -> Op_other
| Iname_for_debugger _ -> Op_other
| Iprobe_is_enabled _ -> Op_other
| Ibeginregion | Iendregion -> Op_other
| Idls_get -> Op_load Mutable
(* Operations that are so cheap that it isn't worth factoring them. *)
method is_cheap_operation op =
match op with
| Iconst_int _ -> true
| _ -> false
(* Forget all equations involving mutable memory loads.
Performed after a non-initializing store *)
method private kill_loads n =
remove_mutable_load_numbering n
(* Perform CSE on the given instruction [i] and its successors.
[n] is the value numbering current at the beginning of [i]. *)
method private cse n i k =
match i.desc with
| Iend | Ireturn _ | Iop(Itailcall_ind) | Iop(Itailcall_imm _)
| Iexit _ | Iraise _ ->
k i
| Iop (Imove | Ispill | Ireload) ->
(* For moves, we associate the same value number to the result reg
as to the argument reg. *)
let n1 = set_move n i.arg.(0) i.res.(0) in
self#cse n1 i.next (fun next -> k { i with next; })
| Iop (Icall_ind | Icall_imm _ | Iextcall _ | Iprobe _) ->
(* For function calls and probes, we should at least forget:
- equations involving memory loads, since the callee can
perform arbitrary memory stores;
- equations involving arithmetic operations that can
produce [Addr]-typed derived pointers into the heap
(see below for Ialloc);
- mappings from hardware registers to value numbers,
since the callee does not preserve these registers.
That doesn't leave much usable information: checkbounds
could be kept, but won't be usable for CSE as one of their
arguments is always a memory load. For simplicity, we
just forget everything. *)
self#cse empty_numbering i.next (fun next -> k { i with next; })
| Iop Iopaque ->
(* Assume arbitrary side effects from Iopaque *)
self#cse empty_numbering i.next (fun next -> k { i with next; })
| Iop (Ialloc _) | Iop (Ipoll _) ->
(* For allocations, we must avoid extending the live range of a
pseudoregister across the allocation if this pseudoreg
is a derived heap pointer (a pointer into the heap that does
not point to the beginning of a Caml block). PR#6484 is an
example of this situation. Such pseudoregs have type [Addr].
Pseudoregs with types other than [Addr] can be kept.
Moreover, allocations and polls can trigger the asynchronous execution
of arbitrary Caml code (finalizer, signal handler, context
switch), which can contain non-initializing stores.
Hence, all equations over mutable loads must be removed. *)
let n1 = kill_addr_regs (self#kill_loads n) in
let n2 = set_unknown_regs n1 i.res in
self#cse n2 i.next (fun next -> k { i with next; })
| Iop op ->
begin match self#class_of_operation op with
| (Op_pure | Op_load _) as op_class ->
let (n1, varg) = valnum_regs n i.arg in
let n2 = set_unknown_regs n1 (Proc.destroyed_at_oper i.desc) in
begin match find_equation op_class n1 (op, varg) with
| Some vres ->
(* This operation was computed earlier. *)
(* Are there registers that hold the results computed earlier? *)
begin match find_regs_containing n1 vres with
| Some res when (not (self#is_cheap_operation op)) ->
(* We can replace res <- op args with r <- move res,
provided res are stable (non-volatile) registers.
If the operation is very cheap to compute, e.g.
an integer constant, don't bother. *)
let n3 = set_known_regs n1 i.res vres in
(* This is n1 above and not n2 because the move
does not destroy any regs *)
self#cse n3 i.next (fun next ->
k (insert_move res i.res next))
| _ ->
(* We already computed the operation but lost its
results. Associate the result registers to
the result valnums of the previous operation. *)
let n3 = set_known_regs n2 i.res vres in
self#cse n3 i.next (fun next -> k { i with next; })
end
| None ->
(* This operation produces a result we haven't seen earlier. *)
let n3 = set_fresh_regs n2 i.res (op, varg) op_class in
self#cse n3 i.next (fun next -> k { i with next; })
end
| Op_store false | Op_other ->
(* An initializing store or an "other" operation do not invalidate
any equations, but we do not know anything about the results. *)
let n1 = set_unknown_regs n (Proc.destroyed_at_oper i.desc) in
let n2 = set_unknown_regs n1 i.res in
self#cse n2 i.next (fun next -> k { i with next; })
| Op_store true ->
(* A non-initializing store can invalidate
anything we know about prior mutable loads. *)
let n1 = set_unknown_regs n (Proc.destroyed_at_oper i.desc) in
let n2 = set_unknown_regs n1 i.res in
let n3 = self#kill_loads n2 in
self#cse n3 i.next (fun next -> k { i with next; })
end
(* For control structures, we set the numbering to empty at every
join point, but propagate the current numbering across fork points. *)
| Iifthenelse(test, ifso, ifnot) ->
let n1 = set_unknown_regs n (Proc.destroyed_at_oper i.desc) in
self#cse n1 ifso (fun ifso ->
self#cse n1 ifnot (fun ifnot ->
self#cse empty_numbering i.next (fun next ->
k { i with desc = Iifthenelse(test, ifso, ifnot); next; })))
| Iswitch(index, cases) ->
let n1 = set_unknown_regs n (Proc.destroyed_at_oper i.desc) in
self#cse_array n1 cases (fun cases ->
self#cse empty_numbering i.next (fun next ->
k { i with desc = Iswitch(index, cases); next; }))
| Icatch(rec_flag, ts, handlers, body) ->
let nfail, t, handler_code, is_cold = split4 handlers in
self#cse_list empty_numbering handler_code (fun handler_code ->
let handlers = combine4 nfail t handler_code is_cold in
self#cse n body (fun body ->
self#cse empty_numbering i.next (fun next ->
k { i with desc = Icatch(rec_flag, ts, handlers, body); next; })))
| Itrywith(body, kind, (ts, handler)) ->
self#cse n body (fun body ->
self#cse empty_numbering handler (fun handler ->
self#cse empty_numbering i.next (fun next ->
k { i with desc = Itrywith(body, kind, (ts, handler)); next; })))
method private cse_array n is k =
self#cse_list n (Array.to_list is) (fun is -> k (Array.of_list is))
method private cse_list0 n is acc k =
match is with
| [] -> k acc
| i::is -> self#cse n i (fun i -> self#cse_list0 n is (i :: acc) k)
method private cse_list n is k =
self#cse_list0 n is [] (fun is_rev -> k (List.rev is_rev))
method fundecl f =
(* CSE can trigger bad register allocation behaviors, see MPR#7630 *)
if List.mem Cmm.No_CSE f.fun_codegen_options then
f
else
{ f with fun_body = self#cse empty_numbering f.fun_body Fun.id }
end