Skip to content

Commit 558f40e

Browse files
committed
New back-end optimization pass: common subexpression elimination (CSE).
(Reuses results of previous computations instead of recomputing them.) (Cherry-picked from branch backend-optim.) Tested on amd64/linux and i386/linux. Other back-ends compile (after assorted updates) but are untested. git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@14688 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
1 parent 95d98cd commit 558f40e

40 files changed

+660
-82
lines changed

.depend

+7-2
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,7 @@ bytecomp/typeopt.cmo : typing/types.cmi typing/typedtree.cmi \
576576
bytecomp/typeopt.cmx : typing/types.cmx typing/typedtree.cmx \
577577
typing/predef.cmx typing/path.cmx bytecomp/lambda.cmx typing/ident.cmx \
578578
typing/env.cmx typing/ctype.cmx bytecomp/typeopt.cmi
579+
asmcomp/CSEgen.cmi : asmcomp/mach.cmi
579580
asmcomp/asmgen.cmi : bytecomp/lambda.cmi asmcomp/cmm.cmi
580581
asmcomp/asmlibrarian.cmi :
581582
asmcomp/asmlink.cmi : asmcomp/cmx_format.cmi
@@ -618,6 +619,10 @@ asmcomp/selection.cmi : asmcomp/mach.cmi asmcomp/cmm.cmi
618619
asmcomp/spill.cmi : asmcomp/mach.cmi
619620
asmcomp/split.cmi : asmcomp/mach.cmi
620621
asmcomp/strmatch.cmi : asmcomp/cmm.cmi
622+
asmcomp/CSE.cmo : asmcomp/mach.cmi asmcomp/CSEgen.cmi asmcomp/arch.cmo
623+
asmcomp/CSE.cmx : asmcomp/mach.cmx asmcomp/CSEgen.cmx asmcomp/arch.cmx
624+
asmcomp/CSEgen.cmo : asmcomp/reg.cmi asmcomp/mach.cmi asmcomp/CSEgen.cmi
625+
asmcomp/CSEgen.cmx : asmcomp/reg.cmx asmcomp/mach.cmx asmcomp/CSEgen.cmi
621626
asmcomp/arch.cmo :
622627
asmcomp/arch.cmx :
623628
asmcomp/asmgen.cmo : bytecomp/translmod.cmi asmcomp/split.cmi \
@@ -629,7 +634,7 @@ asmcomp/asmgen.cmo : bytecomp/translmod.cmi asmcomp/split.cmi \
629634
asmcomp/emitaux.cmi asmcomp/emit.cmi asmcomp/deadcode.cmi \
630635
utils/config.cmi asmcomp/compilenv.cmi asmcomp/comballoc.cmi \
631636
asmcomp/coloring.cmi asmcomp/cmmgen.cmi asmcomp/cmm.cmi \
632-
asmcomp/closure.cmi utils/clflags.cmi asmcomp/asmgen.cmi
637+
asmcomp/closure.cmi utils/clflags.cmi asmcomp/CSE.cmo asmcomp/asmgen.cmi
633638
asmcomp/asmgen.cmx : bytecomp/translmod.cmx asmcomp/split.cmx \
634639
asmcomp/spill.cmx asmcomp/selection.cmx asmcomp/scheduling.cmx \
635640
asmcomp/reload.cmx asmcomp/reg.cmx asmcomp/proc.cmx asmcomp/printmach.cmx \
@@ -639,7 +644,7 @@ asmcomp/asmgen.cmx : bytecomp/translmod.cmx asmcomp/split.cmx \
639644
asmcomp/emitaux.cmx asmcomp/emit.cmx asmcomp/deadcode.cmx \
640645
utils/config.cmx asmcomp/compilenv.cmx asmcomp/comballoc.cmx \
641646
asmcomp/coloring.cmx asmcomp/cmmgen.cmx asmcomp/cmm.cmx \
642-
asmcomp/closure.cmx utils/clflags.cmx asmcomp/asmgen.cmi
647+
asmcomp/closure.cmx utils/clflags.cmx asmcomp/CSE.cmx asmcomp/asmgen.cmi
643648
asmcomp/asmlibrarian.cmo : utils/misc.cmi parsing/location.cmi \
644649
utils/config.cmi asmcomp/compilenv.cmi asmcomp/cmx_format.cmi \
645650
utils/clflags.cmi asmcomp/clambda.cmi utils/ccomp.cmi asmcomp/asmlink.cmi \

Changes

+2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ Compilers:
3939
int32/int64/nativeint arithmetic. Constant propagation for floats
4040
can be turned off with option -no-float-const-prop, for codes that
4141
change FP rounding modes at run-time.
42+
- New back-end optimization pass: common subexpression elimination (CSE).
43+
(Reuses results of previous computations instead of recomputing them.)
4244
- New back-end optimization pass: dead code elimination.
4345
(Removes arithmetic and load instructions whose results are unused.)
4446
- PR#6269 Optimization of string matching (patch by Benoit Vaugon

Makefile

+11-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,9 @@ ASMCOMP=asmcomp/arch.cmo asmcomp/debuginfo.cmo \
8484
asmcomp/clambda.cmo asmcomp/printclambda.cmo asmcomp/compilenv.cmo \
8585
asmcomp/closure.cmo asmcomp/strmatch.cmo asmcomp/cmmgen.cmo \
8686
asmcomp/printmach.cmo asmcomp/selectgen.cmo asmcomp/selection.cmo \
87-
asmcomp/comballoc.cmo asmcomp/liveness.cmo \
87+
asmcomp/comballoc.cmo \
88+
asmcomp/CSEgen.cmo asmcomp/CSE.cmo \
89+
asmcomp/liveness.cmo \
8890
asmcomp/spill.cmo asmcomp/split.cmo \
8991
asmcomp/interf.cmo asmcomp/coloring.cmo \
9092
asmcomp/reloadgen.cmo asmcomp/reload.cmo \
@@ -589,6 +591,14 @@ partialclean::
589591

590592
beforedepend:: asmcomp/selection.ml
591593

594+
asmcomp/CSE.ml: asmcomp/$(ARCH)/CSE.ml
595+
ln -s $(ARCH)/CSE.ml asmcomp/CSE.ml
596+
597+
partialclean::
598+
rm -f asmcomp/CSE.ml
599+
600+
beforedepend:: asmcomp/CSE.ml
601+
592602
asmcomp/reload.ml: asmcomp/$(ARCH)/reload.ml
593603
ln -s $(ARCH)/reload.ml asmcomp/reload.ml
594604

asmcomp/CSEgen.ml

+258
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
(***********************************************************************)
2+
(* *)
3+
(* OCaml *)
4+
(* *)
5+
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
6+
(* *)
7+
(* Copyright 2014 Institut National de Recherche en Informatique et *)
8+
(* en Automatique. All rights reserved. This file is distributed *)
9+
(* under the terms of the Q Public License version 1.0. *)
10+
(* *)
11+
(***********************************************************************)
12+
13+
(* Common subexpression elimination by value numbering over extended
14+
basic blocks. *)
15+
16+
open Mach
17+
18+
type valnum = int
19+
20+
(* We maintain sets of equations of the form
21+
valnums = operation(valnums)
22+
plus a mapping from registers to value numbers. *)
23+
24+
type rhs = operation * valnum array
25+
26+
module Equations =
27+
Map.Make(struct type t = rhs let compare = Pervasives.compare end)
28+
29+
type numbering =
30+
{ num_next: int; (* next fresh value number *)
31+
num_eqs: valnum array Equations.t; (* mapping rhs -> valnums *)
32+
num_reg: valnum Reg.Map.t } (* mapping register -> valnum *)
33+
34+
let empty_numbering =
35+
{ num_next = 0; num_eqs = Equations.empty; num_reg = Reg.Map.empty }
36+
37+
(** [valnum_reg n r] returns the value number for the contents of
38+
register [r]. If none exists, a fresh value number is returned
39+
and associated with register [r]. The possibly updated numbering
40+
is also returned. [valnum_regs] is similar, but for an array of
41+
registers. *)
42+
43+
let valnum_reg n r =
44+
try
45+
(n, Reg.Map.find r n.num_reg)
46+
with Not_found ->
47+
let v = n.num_next in
48+
({n with num_next = v + 1; num_reg = Reg.Map.add r v n.num_reg}, v)
49+
50+
let valnum_regs n rs =
51+
let l = Array.length rs in
52+
let vs = Array.make l 0 in
53+
let n = ref n in
54+
for i = 0 to l-1 do
55+
let (ni, vi) = valnum_reg !n rs.(i) in
56+
vs.(i) <- vi;
57+
n := ni
58+
done;
59+
(!n, vs)
60+
61+
(* Look up the set of equations for an equation with the given rhs.
62+
Return [Some res] if there is one, where [res] is the lhs. *)
63+
64+
let find_equation n rhs =
65+
try
66+
Some(Equations.find rhs n.num_eqs)
67+
with Not_found ->
68+
None
69+
70+
(* Find a set of registers containing the given value numbers. *)
71+
72+
let find_regs_containing n vs =
73+
match Array.length vs with
74+
| 0 -> Some [||]
75+
| 1 -> let v = vs.(0) in
76+
Reg.Map.fold (fun r v' res -> if v' = v then Some [|r|] else res)
77+
n.num_reg None
78+
| _ -> assert false
79+
80+
(* Associate the given value numbers to the given result registers,
81+
without adding new equations. *)
82+
83+
let set_known_regs n rs vs =
84+
match Array.length rs with
85+
| 0 -> n
86+
| 1 -> { n with num_reg = Reg.Map.add rs.(0) vs.(0) n.num_reg }
87+
| _ -> assert false
88+
89+
(* Record the effect of a move: no new equations, but the result reg
90+
maps to the same value number as the argument reg. *)
91+
92+
let set_move n src dst =
93+
let (n1, v) = valnum_reg n src in
94+
{ n1 with num_reg = Reg.Map.add dst v n1.num_reg }
95+
96+
(* Record the equation [fresh valnums = rhs] and associate the given
97+
result registers [rs] to [fresh valnums]. *)
98+
99+
let set_fresh_regs n rs rhs =
100+
match Array.length rs with
101+
| 0 -> { n with num_eqs = Equations.add rhs [||] n.num_eqs }
102+
| 1 -> let v = n.num_next in
103+
{ num_next = v + 1;
104+
num_eqs = Equations.add rhs [|v|] n.num_eqs;
105+
num_reg = Reg.Map.add rs.(0) v n.num_reg }
106+
| _ -> assert false
107+
108+
(* Forget everything we know about the given result registers,
109+
which are receiving unpredictable values at run-time. *)
110+
111+
let set_unknown_regs n rs =
112+
{ n with num_reg = Array.fold_right Reg.Map.remove rs n.num_reg }
113+
114+
(* Keep only the equations satisfying the given predicate. *)
115+
116+
let filter_equations pred n =
117+
{ n with num_eqs = Equations.filter (fun (op,_) res -> pred op) n.num_eqs }
118+
119+
(* Prepend a reg-reg move *)
120+
121+
let insert_move srcs dsts i =
122+
match Array.length srcs with
123+
| 0 -> i
124+
| 1 -> instr_cons (Iop Imove) srcs dsts i
125+
| _ -> assert false
126+
127+
(* Classification of operations *)
128+
129+
type op_class =
130+
| Op_pure (* pure, produce one result *)
131+
| Op_checkbound (* checkbound-style: no result, can raise an exn *)
132+
| Op_load (* memory load *)
133+
| Op_store of bool (* memory store, false = init, true = assign *)
134+
| Op_other (* anything else that does not store in memory *)
135+
136+
class cse_generic = object (self)
137+
138+
(* Default classification of operations. Can be overriden in
139+
processor-specific files to classify specific operations better. *)
140+
141+
method class_of_operation op =
142+
match op with
143+
| Imove | Ispill | Ireload -> assert false (* treated specially *)
144+
| Iconst_int _ | Iconst_float _ | Iconst_symbol _
145+
| Iconst_blockheader _ -> Op_pure
146+
| Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
147+
| Iextcall _ -> assert false (* treated specially *)
148+
| Istackoffset _ -> Op_other
149+
| Iload(_,_) -> Op_load
150+
| Istore(_,_,asg) -> Op_store asg
151+
| Ialloc _ -> Op_other
152+
| Iintop(Icheckbound) -> Op_checkbound
153+
| Iintop _ -> Op_pure
154+
| Iintop_imm(Icheckbound, _) -> Op_checkbound
155+
| Iintop_imm(_, _) -> Op_pure
156+
| Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf
157+
| Ifloatofint | Iintoffloat -> Op_pure
158+
| Ispecific _ -> Op_other
159+
160+
(* Operations that are so cheap that it isn't worth factoring them. *)
161+
162+
method is_cheap_operation op =
163+
match op with
164+
| Iconst_int _ | Iconst_blockheader _ -> true
165+
| _ -> false
166+
167+
(* Forget all equations involving memory loads. Performed after a
168+
non-initializing store *)
169+
170+
method private kill_loads n =
171+
filter_equations (fun o -> self#class_of_operation o <> Op_load) n
172+
173+
(* Keep only equations involving checkbounds, and forget register values.
174+
Performed across a call. *)
175+
176+
method private keep_checkbounds n =
177+
filter_equations (fun o -> self#class_of_operation o = Op_checkbound)
178+
{n with num_reg = Reg.Map.empty }
179+
180+
(* Perform CSE on the given instruction [i] and its successors.
181+
[n] is the value numbering current at the beginning of [i]. *)
182+
183+
method private cse n i =
184+
match i.desc with
185+
| Iend | Ireturn | Iop(Itailcall_ind) | Iop(Itailcall_imm _)
186+
| Iexit _ | Iraise _ ->
187+
i
188+
| Iop (Imove | Ispill | Ireload) ->
189+
(* For moves, we associate the same value number to the result reg
190+
as to the argument reg. *)
191+
let n1 = set_move n i.arg.(0) i.res.(0) in
192+
{i with next = self#cse n1 i.next}
193+
| Iop (Icall_ind | Icall_imm _ | Iextcall _) ->
194+
(* We don't perform CSE across function calls, as it increases
195+
register pressure too much. We do remember the checkbound
196+
instructions already performed, though, since their reuse
197+
cannot increase register pressure. *)
198+
let n1 = self#keep_checkbounds n in
199+
{i with next = self#cse n1 i.next}
200+
| Iop op ->
201+
begin match self#class_of_operation op with
202+
| Op_pure | Op_checkbound | Op_load ->
203+
assert (Array.length i.res <= 1);
204+
let (n1, varg) = valnum_regs n i.arg in
205+
begin match find_equation n1 (op, varg) with
206+
| Some vres ->
207+
(* This operation was computed earlier. *)
208+
let n2 = set_known_regs n1 i.res vres in
209+
begin match find_regs_containing n1 vres with
210+
| Some res when not (self#is_cheap_operation op) ->
211+
(* We can replace res <- op args with r <- move res.
212+
If the operation is very cheap to compute, e.g.
213+
an integer constant, don't bother. *)
214+
insert_move res i.res (self#cse n2 i.next)
215+
| _ ->
216+
{i with next = self#cse n2 i.next}
217+
end
218+
| None ->
219+
(* This operation produces a result we haven't seen earlier. *)
220+
let n2 = set_fresh_regs n1 i.res (op, varg) in
221+
{i with next = self#cse n2 i.next}
222+
end
223+
| Op_store false | Op_other ->
224+
(* An initializing store or an "other" operation do not invalidate
225+
any equations, but we do not know anything about the results. *)
226+
let n1 = set_unknown_regs n i.res in
227+
{i with next = self#cse n1 i.next}
228+
| Op_store true ->
229+
(* A non-initializing store: it can invalidate
230+
anything we know about prior loads. *)
231+
let n1 = set_unknown_regs (self#kill_loads n) i.res in
232+
{i with next = self#cse n1 i.next}
233+
end
234+
(* For control structures, we set the numbering to empty at every
235+
join point, but propagate the current numbering across fork points. *)
236+
| Iifthenelse(test, ifso, ifnot) ->
237+
{i with desc = Iifthenelse(test, self#cse n ifso, self#cse n ifnot);
238+
next = self#cse empty_numbering i.next}
239+
| Iswitch(index, cases) ->
240+
{i with desc = Iswitch(index, Array.map (self#cse n) cases);
241+
next = self#cse empty_numbering i.next}
242+
| Iloop(body) ->
243+
{i with desc = Iloop(self#cse empty_numbering body);
244+
next = self#cse empty_numbering i.next}
245+
| Icatch(nfail, body, handler) ->
246+
{i with desc = Icatch(nfail, self#cse n body, self#cse empty_numbering handler);
247+
next = self#cse empty_numbering i.next}
248+
| Itrywith(body, handler) ->
249+
{i with desc = Itrywith(self#cse n body, self#cse empty_numbering handler);
250+
next = self#cse empty_numbering i.next}
251+
252+
method fundecl f =
253+
{f with fun_body = self#cse empty_numbering f.fun_body}
254+
255+
end
256+
257+
258+

asmcomp/CSEgen.mli

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
(***********************************************************************)
2+
(* *)
3+
(* OCaml *)
4+
(* *)
5+
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
6+
(* *)
7+
(* Copyright 2014 Institut National de Recherche en Informatique et *)
8+
(* en Automatique. All rights reserved. This file is distributed *)
9+
(* under the terms of the Q Public License version 1.0. *)
10+
(* *)
11+
(***********************************************************************)
12+
13+
(* Common subexpression elimination by value numbering over extended
14+
basic blocks. *)
15+
16+
type op_class =
17+
| Op_pure (* pure, produce one result *)
18+
| Op_checkbound (* checkbound-style: no result, can raise an exn *)
19+
| Op_load (* memory load *)
20+
| Op_store of bool (* memory store, false = init, true = assign *)
21+
| Op_other (* anything else that does not store in memory *)
22+
23+
class cse_generic : object
24+
(* The following methods can be overriden to handle processor-specific
25+
operations. *)
26+
27+
method class_of_operation: Mach.operation -> op_class
28+
29+
method is_cheap_operation: Mach.operation -> bool
30+
(* Operations that are so cheap that it isn't worth factoring them. *)
31+
32+
(* The following method is the entry point and should not be overridden *)
33+
method fundecl: Mach.fundecl -> Mach.fundecl
34+
35+
end
36+
37+
38+

0 commit comments

Comments
 (0)