Skip to content

Commit

Permalink
Port Comballoc to work on CFG values (ocaml-flambda#2163)
Browse files Browse the repository at this point in the history
  • Loading branch information
xclerc authored Feb 12, 2024
1 parent 1ac79a9 commit c22043e
Show file tree
Hide file tree
Showing 9 changed files with 243 additions and 9 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ jobs:
- name: gi
config: --enable-middle-end=flambda2
os: ubuntu-latest
build_ocamlparam: '_,w=-46,regalloc=gi,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=GI_PRIORITY_HEURISTICS:interval-length,regalloc-param=GI_SELECTION_HEURISTICS:first-available,regalloc-param=GI_SPILLING_HEURISTICS:flat-uses,regalloc-validate=1'
build_ocamlparam: '_,w=-46,regalloc=gi,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=GI_PRIORITY_HEURISTICS:interval-length,regalloc-param=GI_SELECTION_HEURISTICS:first-available,regalloc-param=GI_SPILLING_HEURISTICS:flat-uses,regalloc-validate=1,cfg-cse-optimize=1'
check_arch: true

- name: build_upstream_closure
Expand Down
24 changes: 17 additions & 7 deletions backend/asmgen.ml
Original file line number Diff line number Diff line change
Expand Up @@ -279,18 +279,25 @@ let compile_fundecl ~ppf_dump ~funcnames fd_cmm =
++ Compiler_hooks.execute_and_pipe Compiler_hooks.Mach_polling
++ Profile.record ~accumulate:true "checkmach"
(Checkmach.fundecl ~future_funcnames:funcnames ppf_dump)
++ Profile.record ~accumulate:true "comballoc" Comballoc.fundecl
++ Compiler_hooks.execute_and_pipe Compiler_hooks.Mach_combine
++ pass_dump_if ppf_dump dump_combine "After allocation combining"
++ (fun fd ->
++ (fun fd ->
match !Flambda_backend_flags.cfg_cse_optimize with
| false ->
fd
++ pass_dump_if ppf_dump dump_combine "Before allocation combining"
++ Profile.record ~accumulate:true "comballoc" Comballoc.fundecl
++ Compiler_hooks.execute_and_pipe Compiler_hooks.Mach_combine
++ pass_dump_if ppf_dump dump_combine "After allocation combining"
++ Profile.record ~accumulate:true "cse" CSE.fundecl
++ Compiler_hooks.execute_and_pipe Compiler_hooks.Mach_cse
++ pass_dump_if ppf_dump dump_cse "After CSE"
| true ->
fd)
| true ->
(* Will happen after `Cfgize`. *)
(match register_allocator fd with
| Upstream ->
fatal_error "-cfg-cse-optimize should only be used with a CFG register allocator"
| GI | IRC | LS ->
());
fd)
++ Profile.record ~accumulate:true "regalloc" (fun (fd : Mach.fundecl) ->
match register_allocator fd with
| ((GI | IRC | LS) as regalloc) ->
Expand All @@ -304,7 +311,10 @@ let compile_fundecl ~ppf_dump ~funcnames fd_cmm =
| false -> cfg_with_layout
| true ->
cfg_with_layout
++ Profile.record ~accumulate:true "cse" CSE.cfg_with_layout)
++ Profile.record ~accumulate:true "cfg_comballoc" Cfg_comballoc.run
++ Compiler_hooks.execute_and_pipe Compiler_hooks.Cfg_combine
++ Profile.record ~accumulate:true "cfg_cse" CSE.cfg_with_layout
++ Compiler_hooks.execute_and_pipe Compiler_hooks.Cfg_cse)
++ Cfg_with_infos.make
++ Profile.record ~accumulate:true "cfg_deadcode" Cfg_deadcode.run
in
Expand Down
196 changes: 196 additions & 0 deletions backend/cfg/cfg_comballoc.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
[@@@ocaml.warning "+a-30-40-41-42"]

module List = ListLabels
module DLL = Flambda_backend_utils.Doubly_linked_list

type cell = Cfg.basic Cfg.instruction DLL.cell

(* Description of an allocation: has the fields of a [Cfg.Alloc _] value, and a
cell so that the instruction can be modified. *)
type allocation =
{ bytes : int;
dbginfo : Debuginfo.alloc_dbginfo;
mode : Lambda.locality_mode;
cell : cell
}

(* Description of allocations that can be folded into a previous one, and a cell
indicating where to continue the process. *)
type compatible_allocations =
{ allocations : allocation list;
next_cell : cell option
}

(* [max_instr_id cfg] returns the maximum instruction identifier in [cfg]. *)
let max_instr_id : Cfg.t -> int =
fun cfg ->
(* CR-someday xclerc for xclerc: factor out with similar function in
regalloc/. *)
Cfg.fold_blocks cfg ~init:Int.min_int ~f:(fun _label block max_id ->
let max_id =
DLL.fold_left block.body ~init:max_id ~f:(fun max_id instr ->
Int.max max_id instr.Cfg.id)
in
Int.max max_id block.terminator.id)

(* [find_next_allocation cell] returns the first allocation found by iterating
from [cell]. *)
let rec find_next_allocation : cell option -> allocation option =
fun cell ->
match cell with
| None -> None
| Some cell -> (
let instr = DLL.value cell in
match instr.desc with
| Op (Alloc { bytes; dbginfo; mode }) -> Some { bytes; dbginfo; mode; cell }
| Op
( Move | Spill | Reload | Const_int _ | Const_float _ | Const_symbol _
| Const_vec128 _ | Stackoffset _ | Load _ | Store _ | Intop _
| Intop_imm _ | Intop_atomic _ | Negf | Absf | Addf | Subf | Mulf | Divf
| Compf _ | Csel _ | Floatofint | Intoffloat | Valueofint | Intofvalue
| Vectorcast _ | Scalarcast _ | Probe_is_enabled _ | Opaque
| Begin_region | End_region | Specific _ | Name_for_debugger _ | Dls_get
| Poll )
| Reloadretaddr | Pushtrap _ | Poptrap | Prologue ->
find_next_allocation (DLL.next cell))

(* [find_compatible_allocations cell ~curr_mode ~curr_size] returns the
allocations compatible with mode [curr_mode] and total size [curr_size]. *)
let find_compatible_allocations :
cell option ->
curr_mode:Lambda.locality_mode ->
curr_size:int ->
compatible_allocations =
fun cell ~curr_mode ~curr_size ->
let rec loop (allocations : allocation list) (cell : cell option)
~(curr_mode : Lambda.alloc_mode) ~(curr_size : int) :
compatible_allocations =
match cell with
| None -> { allocations = List.rev allocations; next_cell = None }
| Some cell -> (
let instr = DLL.value cell in
let return () =
{ allocations = List.rev allocations; next_cell = Some cell }
in
match instr.desc with
| Op (Alloc { bytes; dbginfo; mode }) ->
let is_compatible =
Lambda.equal_alloc_mode mode curr_mode
&& (curr_size + bytes
<= (Config.max_young_wosize + 1) * Arch.size_addr
|| Lambda.is_local_mode mode)
in
if is_compatible
then
let allocation = { bytes; dbginfo; mode; cell } in
loop
(allocation :: allocations)
(DLL.next cell) ~curr_mode ~curr_size:(curr_size + bytes)
else { allocations = List.rev allocations; next_cell = Some cell }
| Op (Begin_region | End_region) -> (
match curr_mode with
| Lambda.Alloc_local -> return ()
| Lambda.Alloc_heap ->
loop allocations (DLL.next cell) ~curr_mode ~curr_size)
| Op Poll -> return ()
| Reloadretaddr | Poptrap | Prologue | Pushtrap _ ->
(* CR-soon xclerc for xclerc: is it too conservative? (note: only the
`Pushtrap` case may be too conservative) *)
{ allocations = List.rev allocations; next_cell = Some cell }
| Op
( Move | Spill | Reload | Negf | Absf | Addf | Subf | Mulf | Divf
| Floatofint | Intoffloat | Valueofint | Intofvalue | Vectorcast _
| Opaque | Const_int _ | Const_float _ | Const_vec128 _
| Const_symbol _ | Stackoffset _ | Load _
| Store (_, _, _)
| Compf _ | Csel _ | Specific _ | Name_for_debugger _
| Probe_is_enabled _ | Scalarcast _ | Dls_get
| Intop
( Iadd | Isub | Imul | Idiv | Imod | Iand | Ior | Ixor | Ilsl
| Ilsr | Iasr | Ipopcnt | Imulh _ | Iclz _ | Ictz _ | Icomp _ )
| Intop_imm
( ( Iadd | Isub | Imul | Idiv | Imod | Iand | Ior | Ixor | Ilsl
| Ilsr | Iasr | Ipopcnt | Imulh _ | Iclz _ | Ictz _ | Icomp _ ),
_ )
| Intop_atomic _ ) ->
loop allocations (DLL.next cell) ~curr_mode ~curr_size)
in
loop [] cell ~curr_mode ~curr_size

(** [combine ~max_instr_id cell] combines allocations, starting from [cell] and
using [max_instr_id] as the counter to get new instruction identifiers.
The allocation are combined by repeatedly:
- 1. looking for a "first" allocation;
- 2. looking for all subsequent allocations compatible with the "first" one;
- 3. continuing the process at step 1. from the instruction after the last
one seen at step 2.
When steps 1 and 2 are both successful, allocations are effectively
combined. This means that:
- the "first" allocation is made bigger to account for all allocations;
- the other allocations are replaced with a reference to the result of the
previous allocation, with a different offset. *)
let rec combine : max_instr_id:int ref -> cell option -> unit =
fun ~max_instr_id cell ->
let first_allocation = find_next_allocation cell in
match first_allocation with
| None -> ()
| Some { bytes; dbginfo; mode; cell } ->
assert (List.length dbginfo = 1);
let compatible_allocs =
find_compatible_allocations (DLL.next cell) ~curr_mode:mode
~curr_size:bytes
in
(match compatible_allocs.allocations with
| [] -> ()
| other_allocations ->
let first_allocation_instr = DLL.value cell in
let first_allocation_res0 = first_allocation_instr.res.(0) in
(* First, replace the "other" allocations with a reference to the result
of the previous allocation and compute the total size. *)
let total_size_of_other_allocations, dbginfo_of_other_allocations, _ =
List.fold_left other_allocations ~init:(0, [], first_allocation_res0)
~f:(fun (size, dbginfos, prev_res0) other_allocation ->
let other_allocation_instr = DLL.value other_allocation.cell in
let res0 = other_allocation_instr.res.(0) in
DLL.set_value other_allocation.cell
{ other_allocation_instr with
desc = Cfg.Op (Intop_imm (Mach.Iadd, -other_allocation.bytes));
arg = [| prev_res0 |]
};
( size + other_allocation.bytes,
other_allocation.dbginfo @ dbginfos,
res0 ))
in
(* Then, change the size of the first allocation so that it is the sum of
all allocations, and update the debug info. *)
DLL.set_value cell
{ first_allocation_instr with
desc =
Cfg.Op
(Alloc
{ bytes = bytes + total_size_of_other_allocations;
dbginfo = dbginfo_of_other_allocations @ dbginfo;
mode
})
};
incr max_instr_id;
DLL.insert_after cell
{ first_allocation_instr with
desc = Cfg.Op (Intop_imm (Mach.Iadd, total_size_of_other_allocations));
arg = [| first_allocation_res0 |];
res = [| first_allocation_res0 |];
id = !max_instr_id
});
combine ~max_instr_id compatible_allocs.next_cell

let run : Cfg_with_layout.t -> Cfg_with_layout.t =
fun cfg_with_layout ->
let cfg = Cfg_with_layout.cfg cfg_with_layout in
let max_instr_id = ref (max_instr_id cfg) in
Cfg.iter_blocks cfg ~f:(fun _label block ->
combine ~max_instr_id (DLL.hd_cell block.body));
cfg_with_layout
4 changes: 4 additions & 0 deletions backend/cfg/cfg_comballoc.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[@@@ocaml.warning "+a-30-40-41-42"]

(* Combine heap allocations occurring in the same basic block. *)
val run : Cfg_with_layout.t -> Cfg_with_layout.t
12 changes: 12 additions & 0 deletions driver/compiler_hooks.ml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ type _ pass =
| Mach_sel : Mach.fundecl pass
| Mach_split : Mach.fundecl pass
| Linear : Linear.fundecl pass
| Cfg_combine : Cfg_with_layout.t pass
| Cfg_cse : Cfg_with_layout.t pass
| Cfg : Cfg_with_layout.t pass
| Cmm : Cmm.phrase list pass

Expand All @@ -53,6 +55,8 @@ type t = {
mutable mach_sel : (Mach.fundecl -> unit) list;
mutable mach_split : (Mach.fundecl -> unit) list;
mutable linear : (Linear.fundecl -> unit) list;
mutable cfg_combine : (Cfg_with_layout.t -> unit) list;
mutable cfg_cse : (Cfg_with_layout.t -> unit) list;
mutable cfg : (Cfg_with_layout.t -> unit) list;
mutable cmm : (Cmm.phrase list -> unit) list;
mutable inlining_tree : (Flambda2_simplify_shared.Inlining_report.Inlining_tree.t -> unit) list;
Expand All @@ -76,6 +80,8 @@ let hooks : t = {
mach_sel = [];
mach_split = [];
linear = [];
cfg_combine = [];
cfg_cse = [];
cfg = [];
cmm = [];
inlining_tree = [];
Expand Down Expand Up @@ -106,6 +112,8 @@ let register : type a. a pass -> (a -> unit) -> unit =
| Mach_sel -> hooks.mach_sel <- f :: hooks.mach_sel
| Mach_split -> hooks.mach_split <- f :: hooks.mach_split
| Linear -> hooks.linear <- f :: hooks.linear
| Cfg_combine -> hooks.cfg_combine <- f :: hooks.cfg_combine
| Cfg_cse -> hooks.cfg_cse <- f :: hooks.cfg_cse
| Cfg -> hooks.cfg <- f :: hooks.cfg
| Cmm -> hooks.cmm <- f :: hooks.cmm
| Inlining_tree -> hooks.inlining_tree <- f :: hooks.inlining_tree
Expand All @@ -132,6 +140,8 @@ let execute : type a. a pass -> a -> unit =
| Mach_sel -> execute_hooks hooks.mach_sel arg
| Mach_split -> execute_hooks hooks.mach_split arg
| Linear -> execute_hooks hooks.linear arg
| Cfg_combine -> execute_hooks hooks.cfg_combine arg
| Cfg_cse -> execute_hooks hooks.cfg_cse arg
| Cfg -> execute_hooks hooks.cfg arg
| Cmm -> execute_hooks hooks.cmm arg
| Inlining_tree -> execute_hooks hooks.inlining_tree arg
Expand All @@ -158,6 +168,8 @@ let clear : type a. a pass -> unit =
| Mach_sel -> hooks.mach_sel <- []
| Mach_split -> hooks.mach_split <- []
| Linear -> hooks.linear <- []
| Cfg_combine -> hooks.cfg_combine <- []
| Cfg_cse -> hooks.cfg_cse <- []
| Cfg -> hooks.cfg <- []
| Cmm -> hooks.cmm <- []
| Inlining_tree -> hooks.inlining_tree <- []
Expand Down
2 changes: 2 additions & 0 deletions driver/compiler_hooks.mli
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ type _ pass =
| Mach_sel : Mach.fundecl pass
| Mach_split : Mach.fundecl pass
| Linear : Linear.fundecl pass
| Cfg_combine : Cfg_with_layout.t pass
| Cfg_cse : Cfg_with_layout.t pass
| Cfg : Cfg_with_layout.t pass
| Cmm : Cmm.phrase list pass

Expand Down
1 change: 1 addition & 0 deletions dune
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@
cfg_to_linear
cfg_with_layout
cfg_with_infos
cfg_comballoc
cfg_dataflow
cfg_deadcode
cfg_liveness
Expand Down
9 changes: 8 additions & 1 deletion utils/doubly_linked_list.ml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
type 'a node =
| Empty
| Node of
{ value : 'a;
{ mutable value : 'a;
mutable prev : 'a node;
mutable next : 'a node
}
Expand Down Expand Up @@ -72,6 +72,13 @@ let value cell =
assert false
| Node node -> node.value

let set_value cell v =
match cell.node with
| Empty ->
(* internal invariant: cell's nodes are not empty *)
assert false
| Node node -> node.value <- v

let prev cell =
match cell.node with
| Empty ->
Expand Down
2 changes: 2 additions & 0 deletions utils/doubly_linked_list.mli
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ val insert_after : 'a cell -> 'a -> unit

val value : 'a cell -> 'a

val set_value : 'a cell -> 'a -> unit

val prev : 'a cell -> 'a cell option

val next : 'a cell -> 'a cell option
Expand Down

0 comments on commit c22043e

Please sign in to comment.