Skip to content

Commit 9b3b050

Browse files
authored
Greedy register allocator (POC) (oxcaml#2225)
1 parent 1ac25f3 commit 9b3b050

File tree

11 files changed

+1281
-2
lines changed

11 files changed

+1281
-2
lines changed

.github/workflows/build.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,12 @@ jobs:
8282
build_ocamlparam: '_,w=-46,regalloc=ls,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=LS_ORDER:layout,regalloc-validate=1'
8383
check_arch: true
8484

85+
- name: gi
86+
config: --enable-middle-end=flambda2
87+
os: ubuntu-latest
88+
build_ocamlparam: '_,w=-46,regalloc=gi,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=GI_PRIORITY_HEURISTICS:interval-length,regalloc-param=GI_SELECTION_HEURISTICS:first-available,regalloc-param=GI_SPILLING_HEURISTICS:flat-uses,regalloc-validate=1'
89+
check_arch: true
90+
8591
- name: build_upstream_closure
8692
config: --enable-middle-end=upstream-closure
8793
os: ubuntu-20.04

backend/asmgen.ml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ let cfgize (f : Mach.fundecl) : Cfg_with_layout.t =
247247

248248
type register_allocator =
249249
| Upstream
250+
| GI
250251
| IRC
251252
| LS
252253

@@ -255,6 +256,7 @@ let default_allocator = Upstream
255256
let register_allocator fd : register_allocator =
256257
match String.lowercase_ascii !Flambda_backend_flags.regalloc with
257258
| "cfg" -> if should_use_linscan fd then LS else IRC
259+
| "gi" -> GI
258260
| "irc" -> IRC
259261
| "ls" -> LS
260262
| "upstream" -> Upstream
@@ -285,7 +287,7 @@ let compile_fundecl ~ppf_dump ~funcnames fd_cmm =
285287
++ pass_dump_if ppf_dump dump_cse "After CSE"
286288
++ Profile.record ~accumulate:true "regalloc" (fun (fd : Mach.fundecl) ->
287289
match register_allocator fd with
288-
| ((IRC | LS) as regalloc) ->
290+
| ((GI | IRC | LS) as regalloc) ->
289291
fd
290292
++ Profile.record ~accumulate:true "cfg" (fun fd ->
291293
let cfg =
@@ -299,6 +301,7 @@ let compile_fundecl ~ppf_dump ~funcnames fd_cmm =
299301
in
300302
cfg
301303
++ begin match regalloc with
304+
| GI -> Profile.record ~accumulate:true "cfg_gi" Regalloc_gi.run
302305
| IRC -> Profile.record ~accumulate:true "cfg_irc" Regalloc_irc.run
303306
| LS -> Profile.record ~accumulate:true "cfg_ls" Regalloc_ls.run
304307
| Upstream -> assert false

backend/regalloc/regalloc_gi.ml

Lines changed: 272 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,272 @@
1+
[@@@ocaml.warning "+a-4-30-40-41-42"]
2+
3+
open! Regalloc_utils
4+
open! Regalloc_gi_utils
5+
module State = Regalloc_gi_state
6+
7+
module Utils = struct
8+
include Regalloc_gi_utils
9+
10+
let debug = gi_debug
11+
12+
let invariants = gi_invariants
13+
14+
let log = log
15+
16+
let log_body_and_terminator = log_body_and_terminator
17+
18+
let is_spilled reg = reg.Reg.spill
19+
20+
let set_spilled _reg = ()
21+
end
22+
23+
let rewrite : State.t -> Cfg_with_infos.t -> spilled_nodes:Reg.t list -> bool =
24+
fun state cfg_with_infos ~spilled_nodes ->
25+
let new_temporaries, block_inserted =
26+
Regalloc_rewrite.rewrite_gen
27+
(module State)
28+
(module Utils)
29+
state cfg_with_infos ~spilled_nodes
30+
in
31+
if new_temporaries <> []
32+
then Cfg_with_infos.invalidate_liveness cfg_with_infos;
33+
if block_inserted
34+
then Cfg_with_infos.invalidate_dominators_and_loop_infos cfg_with_infos;
35+
match new_temporaries with
36+
| [] -> false
37+
| _ :: _ ->
38+
State.add_introduced_temporaries_list state new_temporaries;
39+
State.clear_assignments state;
40+
true
41+
42+
let update_register_locations : State.t -> unit =
43+
fun state ->
44+
if gi_debug then log ~indent:0 "update_register_locations";
45+
let update_register (reg : Reg.t) : unit =
46+
match reg.Reg.loc with
47+
| Reg _ -> ()
48+
| Stack _ -> ()
49+
| Unknown -> (
50+
match State.find_assignment state reg with
51+
| None ->
52+
(* a register may "disappear" because of split/rename *)
53+
()
54+
| Some location ->
55+
if gi_debug
56+
then
57+
log ~indent:1 "updating %a to %a" Printmach.reg reg
58+
Hardware_register.print_location location;
59+
reg.Reg.loc <- Hardware_register.reg_location_of_location location)
60+
in
61+
List.iter (Reg.all_registers ()) ~f:update_register
62+
63+
module Prio_queue = Make_max_priority_queue (Int)
64+
65+
type prio_queue = (Reg.t * Interval.t) Prio_queue.t
66+
67+
let priority_heuristics : Reg.t -> Interval.t -> int =
68+
fun _reg itv ->
69+
match Lazy.force Priority_heuristics.value with
70+
| Priority_heuristics.Interval_length -> Interval.length itv
71+
72+
let make_hardware_registers_and_prio_queue (cfg_with_infos : Cfg_with_infos.t) :
73+
Hardware_registers.t * prio_queue =
74+
if gi_debug then log ~indent:0 "creating registers and queue";
75+
let intervals = build_intervals cfg_with_infos in
76+
let hardware_registers = Hardware_registers.make () in
77+
let prio_queue =
78+
(* CR-soon xclerc for xclerc: use the number of temporaries. *)
79+
Prio_queue.make ~initial_capacity:256
80+
in
81+
Reg.Tbl.iter
82+
(fun reg interval ->
83+
match reg.loc with
84+
| Reg _ ->
85+
if gi_debug
86+
then (
87+
log ~indent:1 "pre-assigned register %a" Printmach.reg reg;
88+
log ~indent:2 "%a" Interval.print interval);
89+
let hardware_reg = Hardware_registers.of_reg hardware_registers reg in
90+
Hardware_register.add_non_evictable hardware_reg reg interval
91+
| Unknown ->
92+
let priority = priority_heuristics reg interval in
93+
if gi_debug
94+
then (
95+
log ~indent:1 "register %a" Printmach.reg reg;
96+
log ~indent:2 "%a" Interval.print interval;
97+
log ~indent:2 "priority=%d" priority);
98+
Prio_queue.add prio_queue ~priority ~data:(reg, interval)
99+
| Stack _ ->
100+
if gi_debug
101+
then (
102+
log ~indent:1 "stack register %a" Printmach.reg reg;
103+
log ~indent:2 "%a" Interval.print interval);
104+
())
105+
intervals;
106+
hardware_registers, prio_queue
107+
108+
(* CR xclerc for xclerc: try to find a reasonable threshold. *)
109+
let max_rounds = 32
110+
111+
(* CR xclerc for xclerc: the `round` parameter is temporary; this is an hybrid
112+
version of "greedy" using the `rewrite` function from IRC when it needs to
113+
spill. *)
114+
let rec main : round:int -> State.t -> Cfg_with_infos.t -> unit =
115+
fun ~round state cfg_with_infos ->
116+
if round > max_rounds
117+
then
118+
fatal "register allocation was not succesful after %d rounds (%s)"
119+
max_rounds (Cfg_with_infos.cfg cfg_with_infos).fun_name;
120+
if gi_debug
121+
then (
122+
log ~indent:0 "main, round #%d" round;
123+
log_cfg_with_infos ~indent:0 cfg_with_infos);
124+
if gi_debug then log ~indent:0 "updating spilling costs";
125+
let flat =
126+
match Lazy.force Spilling_heuristics.value with
127+
| Flat_uses -> true
128+
| Hierarchical_uses -> false
129+
in
130+
update_spill_cost cfg_with_infos ~flat ();
131+
State.iter_introduced_temporaries state ~f:(fun (reg : Reg.t) ->
132+
reg.Reg.spill_cost <- reg.Reg.spill_cost + 10_000);
133+
if gi_debug
134+
then (
135+
log ~indent:0 "spilling costs";
136+
List.iter (Reg.all_registers ()) ~f:(fun (reg : Reg.t) ->
137+
reg.Reg.spill <- false;
138+
log ~indent:1 "%a: %d" Printmach.reg reg reg.spill_cost));
139+
let hardware_registers, prio_queue =
140+
make_hardware_registers_and_prio_queue cfg_with_infos
141+
in
142+
let step = ref 0 in
143+
let spilling = ref ([] : (Reg.t * Interval.t) list) in
144+
while not (Prio_queue.is_empty prio_queue) do
145+
incr step;
146+
if gi_debug
147+
then log ~indent:1 "step #%d (size=%d)" !step (Prio_queue.size prio_queue);
148+
let { Prio_queue.priority; data = reg, interval } =
149+
Prio_queue.get_and_remove prio_queue
150+
in
151+
if gi_debug
152+
then log ~indent:2 "got register %a (prio=%d)" Printmach.reg reg priority;
153+
match Hardware_registers.find_available hardware_registers reg interval with
154+
| For_assignment { hardware_reg } ->
155+
if gi_debug
156+
then
157+
log ~indent:3 "assigning %a to %a" Printmach.reg reg
158+
Hardware_register.print_location hardware_reg.location;
159+
State.add_assignment state reg ~to_:hardware_reg.location;
160+
hardware_reg.assigned
161+
<- { Hardware_register.pseudo_reg = reg; interval; evictable = true }
162+
:: hardware_reg.assigned
163+
| For_eviction { hardware_reg; evicted_regs } ->
164+
if gi_debug
165+
then
166+
log ~indent:3 "evicting %a from %a" Printmach.regs
167+
(Array.of_list
168+
(List.map evicted_regs
169+
~f:(fun { Hardware_register.pseudo_reg; _ } -> pseudo_reg)))
170+
Hardware_register.print_location hardware_reg.location;
171+
List.iter evicted_regs
172+
~f:(fun
173+
{ Hardware_register.pseudo_reg = evict_reg;
174+
interval = evict_interval;
175+
evictable
176+
}
177+
->
178+
if not evictable
179+
then
180+
fatal
181+
"register %a has been picked up for eviction, but is not \
182+
evictable"
183+
Printmach.reg evict_reg;
184+
State.remove_assignment state evict_reg;
185+
Prio_queue.add prio_queue
186+
~priority:(priority_heuristics evict_reg evict_interval)
187+
~data:(evict_reg, evict_interval));
188+
State.add_assignment state reg ~to_:hardware_reg.location;
189+
(* CR xclerc for xclerc: very inefficient. *)
190+
hardware_reg.assigned
191+
<- { Hardware_register.pseudo_reg = reg; interval; evictable = true }
192+
:: List.filter hardware_reg.assigned
193+
~f:(fun { Hardware_register.pseudo_reg = r; _ } ->
194+
not
195+
(List.exists evicted_regs
196+
~f:(fun { Hardware_register.pseudo_reg = r'; _ } ->
197+
Reg.same r r')))
198+
| Split_or_spill ->
199+
(* CR xclerc for xclerc: we should actually try to split. *)
200+
if gi_debug then log ~indent:3 "spilling %a" Printmach.reg reg;
201+
reg.Reg.spill <- true;
202+
spilling := (reg, interval) :: !spilling
203+
done;
204+
match !spilling with
205+
| [] -> ()
206+
| _ :: _ as spilled_nodes -> (
207+
if gi_debug
208+
then (
209+
log_cfg_with_infos ~indent:0 cfg_with_infos;
210+
log ~indent:1 "stack slots";
211+
Regalloc_stack_slots.iter (State.stack_slots state)
212+
~f:(fun (reg : Reg.t) (slot : int) ->
213+
log ~indent:2 " - %a ~> %d" Printmach.reg reg slot);
214+
log ~indent:1 "needs to spill %d registers:" (List.length !spilling);
215+
List.iter !spilling ~f:(fun (_reg, interval) ->
216+
log ~indent:2 " - %a" Interval.print interval);
217+
Cfg.iter_blocks (Cfg_with_infos.cfg cfg_with_infos)
218+
~f:(fun (_ : Label.t) (block : Cfg.basic_block) ->
219+
let occurs =
220+
List.exists spilled_nodes ~f:(fun (reg, _) ->
221+
occurs_block block reg)
222+
in
223+
if occurs
224+
then (
225+
let dummy_liveness_for_log = Cfg_dataflow.Instr.Tbl.create 12 in
226+
log ~indent:0 "block %d has an occurrence of a spilling register"
227+
block.start;
228+
log_body_and_terminator ~indent:1 block.body block.terminator
229+
dummy_liveness_for_log)));
230+
match
231+
rewrite state cfg_with_infos
232+
~spilled_nodes:(List.map spilled_nodes ~f:fst)
233+
with
234+
| false -> if gi_debug then log ~indent:1 "(end of main)"
235+
| true -> main ~round:(succ round) state cfg_with_infos)
236+
237+
let run : Cfg_with_infos.t -> Cfg_with_infos.t =
238+
fun cfg_with_infos ->
239+
let cfg_with_layout = Cfg_with_infos.cfg_with_layout cfg_with_infos in
240+
let cfg_infos, stack_slots =
241+
Regalloc_rewrite.prelude
242+
(module Utils)
243+
~on_fatal_callback:(fun () -> save_cfg "gi" cfg_with_layout)
244+
cfg_with_infos
245+
in
246+
(* CR xclerc for xclerc: consider moving the computation of temporaries and
247+
the creation of the state to `prelude`. *)
248+
let all_temporaries = Reg.Set.union cfg_infos.arg cfg_infos.res in
249+
if gi_debug
250+
then log ~indent:0 "#temporaries=%d" (Reg.Set.cardinal all_temporaries);
251+
let state =
252+
State.make ~stack_slots
253+
~next_instruction_id:(succ cfg_infos.max_instruction_id)
254+
in
255+
let spilling_because_unused = Reg.Set.diff cfg_infos.res cfg_infos.arg in
256+
(match Reg.Set.elements spilling_because_unused with
257+
| [] -> ()
258+
| _ :: _ as spilled_nodes ->
259+
List.iter spilled_nodes ~f:(fun reg -> reg.Reg.spill <- true);
260+
(* note: rewrite will remove the `spilling` registers from the "spilled"
261+
work list and set the field to unknown. *)
262+
let (_ : bool) = rewrite state cfg_with_infos ~spilled_nodes in
263+
Cfg_with_infos.invalidate_liveness cfg_with_infos);
264+
main ~round:1 state cfg_with_infos;
265+
if gi_debug then log_cfg_with_infos ~indent:1 cfg_with_infos;
266+
Regalloc_rewrite.postlude
267+
(module State)
268+
(module Utils)
269+
state
270+
~f:(fun () -> update_register_locations state)
271+
cfg_with_infos;
272+
cfg_with_infos

backend/regalloc/regalloc_gi.mli

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[@@@ocaml.warning "+a-4-30-40-41-42"]
2+
3+
val run : Cfg_with_infos.t -> Cfg_with_infos.t

backend/regalloc/regalloc_gi_state.ml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
[@@@ocaml.warning "+a-4-30-40-41-42"]
2+
3+
open! Regalloc_utils
4+
open! Regalloc_gi_utils
5+
6+
type t =
7+
{ mutable assignments : Hardware_register.location Reg.Map.t;
8+
mutable introduced_temporaries : Reg.Set.t;
9+
stack_slots : Regalloc_stack_slots.t;
10+
mutable next_instruction_id : Instruction.id
11+
}
12+
13+
let[@inline] make ~stack_slots ~next_instruction_id =
14+
let assignments = Reg.Map.empty in
15+
let introduced_temporaries = Reg.Set.empty in
16+
{ assignments; introduced_temporaries; stack_slots; next_instruction_id }
17+
18+
let[@inline] add_assignment state reg ~to_ =
19+
state.assignments <- Reg.Map.add reg to_ state.assignments
20+
21+
let[@inline] remove_assignment state reg =
22+
state.assignments <- Reg.Map.remove reg state.assignments
23+
24+
let[@inline] find_assignment state reg = Reg.Map.find_opt reg state.assignments
25+
26+
let[@inline] clear_assignments state = state.assignments <- Reg.Map.empty
27+
28+
let[@inline] add_introduced_temporaries_list state l =
29+
state.introduced_temporaries
30+
<- List.fold_left l ~init:state.introduced_temporaries ~f:(fun set reg ->
31+
Reg.Set.add reg set)
32+
33+
let[@inline] mem_introduced_temporaries state reg =
34+
Reg.Set.mem reg state.introduced_temporaries
35+
36+
let[@inline] iter_introduced_temporaries state ~f =
37+
Reg.Set.iter f state.introduced_temporaries
38+
39+
let[@inline] stack_slots state = state.stack_slots
40+
41+
let[@inline] get_and_incr_instruction_id state =
42+
let res = state.next_instruction_id in
43+
state.next_instruction_id <- succ res;
44+
res
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
[@@@ocaml.warning "+a-4-30-40-41-42"]
2+
3+
open! Regalloc_utils
4+
open! Regalloc_gi_utils
5+
6+
type t
7+
8+
val make :
9+
stack_slots:Regalloc_stack_slots.t -> next_instruction_id:Instruction.id -> t
10+
11+
val add_assignment : t -> Reg.t -> to_:Hardware_register.location -> unit
12+
13+
val remove_assignment : t -> Reg.t -> unit
14+
15+
val find_assignment : t -> Reg.t -> Hardware_register.location option
16+
17+
val clear_assignments : t -> unit
18+
19+
val add_introduced_temporaries_list : t -> Reg.t list -> unit
20+
21+
val mem_introduced_temporaries : t -> Reg.t -> bool
22+
23+
val iter_introduced_temporaries : t -> f:(Reg.t -> unit) -> unit
24+
25+
val stack_slots : t -> Regalloc_stack_slots.t
26+
27+
val get_and_incr_instruction_id : t -> Instruction.id

0 commit comments

Comments
 (0)