Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not cache young_limit in a processor register (upstream PR 9876) #315

Merged
merged 1 commit into from
Oct 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 10 additions & 11 deletions backend/arm64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,11 @@ let fastcode_flag = ref true

(* Names for special regs *)

let reg_domain_state_ptr = phys_reg 22
let reg_trap_ptr = phys_reg 23
let reg_alloc_ptr = phys_reg 24
let reg_alloc_limit = phys_reg 25
let reg_tmp1 = phys_reg 26
let reg_x8 = phys_reg 8
let reg_domain_state_ptr = phys_reg 25 (* x28 *)
let reg_trap_ptr = phys_reg 23 (* x26 *)
let reg_alloc_ptr = phys_reg 24 (* x27 *)
let reg_tmp1 = phys_reg 26 (* x16 *)
let reg_x8 = phys_reg 8 (* x8 *)

(* Output a label *)

Expand Down Expand Up @@ -504,10 +503,8 @@ module BR = Branch_relaxation.Make (struct
| Lop (Iload (size, addr)) | Lop (Istore (size, addr, _)) ->
let based = match addr with Iindexed _ -> 0 | Ibased _ -> 1 in
based + begin match size with Single -> 2 | _ -> 1 end
| Lop (Ialloc {bytes = num_bytes}) when !fastcode_flag ->
if num_bytes <= 0xFFF then 4 else 5
| Lop (Ispecific (Ifar_alloc {bytes = num_bytes})) when !fastcode_flag ->
if num_bytes <= 0xFFF then 5 else 6
| Lop (Ialloc _) when !fastcode_flag -> 5
| Lop (Ispecific (Ifar_alloc _)) when !fastcode_flag -> 6
| Lop (Ialloc { bytes = num_bytes; _ })
| Lop (Ispecific (Ifar_alloc { bytes = num_bytes; _ })) ->
begin match num_bytes with
Expand Down Expand Up @@ -597,8 +594,10 @@ let assembly_code_for_allocation i ~n ~far ~dbginfo =
so it is reasonable to assume n < 0x1_000. This makes
the generated code simpler. *)
assert (16 <= n && n < 0x1_000 && n land 0x7 = 0);
let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in
` ldr {emit_reg reg_tmp1}, [{emit_reg reg_domain_state_ptr}, #{emit_int offset}]\n`;
` sub {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, #{emit_int n}\n`;
` cmp {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_limit}\n`;
` cmp {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp1}\n`;
if not far then begin
` b.lo {emit_label lbl_call_gc}\n`
end else begin
Expand Down
30 changes: 15 additions & 15 deletions backend/arm64/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,10 @@ let word_addressed = false
x0 - x15 general purpose (caller-save)
x16, x17 temporaries (used by call veeners)
x18 platform register (reserved)
x19 - x24 general purpose (callee-save)
x25 domain state pointer
x19 - x25 general purpose (callee-save)
x26 trap pointer
x27 alloc pointer
x28 alloc limit
x28 domain state pointer
x29 frame pointer
x30 return address
sp / xzr stack pointer / zero register
Expand All @@ -49,10 +48,11 @@ let word_addressed = false
*)

let int_reg_name =
[| "x0"; "x1"; "x2"; "x3"; "x4"; "x5"; "x6"; "x7";
"x8"; "x9"; "x10"; "x11"; "x12"; "x13"; "x14"; "x15";
"x19"; "x20"; "x21"; "x22"; "x23"; "x24";
"x25"; "x26"; "x27"; "x28"; "x16"; "x17" |]
[| "x0"; "x1"; "x2"; "x3"; "x4"; "x5"; "x6"; "x7"; (* 0 - 7 *)
"x8"; "x9"; "x10"; "x11"; "x12"; "x13"; "x14"; "x15"; (* 8 - 15 *)
"x19"; "x20"; "x21"; "x22"; "x23"; "x24"; "x25"; (* 16 - 22 *)
"x26"; "x27"; "x28"; (* 23 - 25 *)
"x16"; "x17" |] (* 26 - 27 *)

let float_reg_name =
[| "d0"; "d1"; "d2"; "d3"; "d4"; "d5"; "d6"; "d7";
Expand All @@ -68,7 +68,7 @@ let register_class r =
| Float -> 1

let num_available_registers =
[| 22; 32 |] (* first 22 int regs allocatable; all float regs allocatable *)
[| 23; 32 |] (* first 23 int regs allocatable; all float regs allocatable *)

let first_available_register =
[| 0; 100 |]
Expand Down Expand Up @@ -270,16 +270,16 @@ let destroyed_at_reloadretaddr = [| |]
(* Maximal register pressure *)

let safe_register_pressure = function
| Iextcall _ -> 8
| Ialloc _ -> 24
| _ -> 25
| Iextcall _ -> 7
| Ialloc _ -> 22
| _ -> 23

let max_register_pressure = function
| Iextcall _ -> [| 10; 8 |]
| Ialloc _ -> [| 24; 32 |]
| Iextcall _ -> [| 7; 8 |] (* 7 integer callee-saves, 8 FP callee-saves *)
| Ialloc _ -> [| 22; 32 |]
| Iintoffloat | Ifloatofint
| Iload(Single, _) | Istore(Single, _, _) -> [| 25; 31 |]
| _ -> [| 25; 32 |]
| Iload(Single, _) | Istore(Single, _, _) -> [| 23; 31 |]
| _ -> [| 23; 32 |]

(* Pure operations (without any side effect besides updating their result
registers). *)
Expand Down
48 changes: 28 additions & 20 deletions backend/power/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -508,8 +508,8 @@ module BR = Branch_relaxation.Make (struct
then load_store_size addr + 1
else load_store_size addr
| Lop(Istore(_chunk, addr, _)) -> load_store_size addr
| Lop(Ialloc _) -> 4
| Lop(Ispecific(Ialloc_far _)) -> 5
| Lop(Ialloc _) -> 5
| Lop(Ispecific(Ialloc_far _)) -> 6
| Lop(Iintop Imod) -> 3
| Lop(Iintop(Icomp _)) -> 4
| Lop(Icompf _) -> 5
Expand Down Expand Up @@ -550,6 +550,26 @@ module BR = Branch_relaxation.Make (struct
let relax_intop_imm_checkbound ~bound:_ = assert false
end)

(* Assembly code for inlined allocation *)

let emit_alloc i bytes dbginfo far =
if !call_gc_label = 0 then call_gc_label := new_label ();
let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in
` {emit_string lg} 0, {emit_int offset}(30)\n`;
` addi 31, 31, {emit_int(-bytes)}\n`;
` {emit_string cmplg} 31, 0\n`;
if not far then begin
` bltl {emit_label !call_gc_label}\n`;
record_frame i.live (Dbg_alloc dbginfo);
` addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n`
end else begin
let lbl = new_label() in
` bge {emit_label lbl}\n`;
` bl {emit_label !call_gc_label}\n`;
record_frame i.live (Dbg_alloc dbginfo);
`{emit_label lbl}: addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n`
end

(* Output the assembly code for an instruction *)

let emit_instr i =
Expand Down Expand Up @@ -782,22 +802,10 @@ let emit_instr i =
| Single -> "stfs"
| Double -> "stfd" in
emit_load_store storeinstr addr i.arg 1 i.arg.(0)
| Lop(Ialloc { bytes = n; dbginfo }) ->
if !call_gc_label = 0 then call_gc_label := new_label ();
` addi 31, 31, {emit_int(-n)}\n`;
` {emit_string cmplg} 31, 30\n`;
` bltl {emit_label !call_gc_label}\n`;
record_frame i.live (Dbg_alloc dbginfo);
` addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n`;
| Lop(Ispecific(Ialloc_far { bytes = n; dbginfo })) ->
if !call_gc_label = 0 then call_gc_label := new_label ();
let lbl = new_label() in
` addi 31, 31, {emit_int(-n)}\n`;
` {emit_string cmplg} 31, 30\n`;
` bge {emit_label lbl}\n`;
` bl {emit_label !call_gc_label}\n`;
record_frame i.live (Dbg_alloc dbginfo);
`{emit_label lbl}: addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n`
| Lop(Ialloc { bytes; dbginfo }) ->
emit_alloc i bytes dbginfo false
| Lop(Ispecific(Ialloc_far { bytes; dbginfo })) ->
emit_alloc i bytes dbginfo true
| Lop(Iintop Isub) -> (* subfc has swapped arguments *)
` subfc {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`
| Lop(Iintop Imod) ->
Expand Down Expand Up @@ -1009,8 +1017,8 @@ let emit_instr i =
Domainstate.(idx_of_field Domain_backtrace_pos)
in
begin match abi with
| ELF32 -> ` stw 0, {emit_int (backtrace_pos * 8)}(28)\n`
| _ -> ` std 0, {emit_int (backtrace_pos * 8)}(28)\n`
| ELF32 -> ` stw 0, {emit_int (backtrace_pos * 8)}(30)\n`
| _ -> ` std 0, {emit_int (backtrace_pos * 8)}(30)\n`
end;
emit_call "caml_raise_exn";
record_frame Reg.Set.empty (Dbg_raise i.dbg);
Expand Down
19 changes: 9 additions & 10 deletions backend/power/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,9 @@ let word_addressed = false
3 - 10 function arguments and results
11 - 12 temporaries
13 pointer to small data area
14 - 27 general purpose, preserved by C
28 domain state pointer
14 - 28 general purpose, preserved by C
29 trap pointer
30 allocation limit
30 domain state pointer
31 allocation pointer
Floating-point register map:
0 temporary
Expand All @@ -47,9 +46,9 @@ let word_addressed = false
*)

let int_reg_name =
[| "3"; "4"; "5"; "6"; "7"; "8"; "9"; "10";
"14"; "15"; "16"; "17"; "18"; "19"; "20"; "21";
"22"; "23"; "24"; "25"; "26"; "27" |]
[| "3"; "4"; "5"; "6"; "7"; "8"; "9"; "10"; (* 0 - 7 *)
"14"; "15"; "16"; "17"; "18"; "19"; "20"; "21"; (* 8 - 15 *)
"22"; "23"; "24"; "25"; "26"; "27"; "28" |] (* 16 - 22 *)

let float_reg_name =
[| "1"; "2"; "3"; "4"; "5"; "6"; "7"; "8";
Expand All @@ -64,7 +63,7 @@ let register_class r =
| Val | Int | Addr -> 0
| Float -> 1

let num_available_registers = [| 22; 31 |]
let num_available_registers = [| 23; 31 |]

let first_available_register = [| 0; 100 |]

Expand All @@ -76,7 +75,7 @@ let rotate_registers = true
(* Representation of hard registers by pseudo-registers *)

let hard_int_reg =
let v = Array.make 22 Reg.dummy in
let v = Array.make 23 Reg.dummy in
for i = 0 to 21 do v.(i) <- Reg.at_location Int (Reg i) done; v

let hard_float_reg =
Expand Down Expand Up @@ -315,11 +314,11 @@ let destroyed_at_reloadretaddr = [| phys_reg 11 |]

let safe_register_pressure = function
Iextcall _ -> 14
| _ -> 22
| _ -> 23

let max_register_pressure = function
Iextcall _ -> [| 14; 18 |]
| _ -> [| 22; 30 |]
| _ -> [| 23; 30 |]

(* Pure operations (without any side effect besides updating their result
registers). *)
Expand Down
9 changes: 5 additions & 4 deletions backend/riscv/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,11 @@ let rodata_space =

(* Names for special regs *)

let reg_tmp = phys_reg 22
let reg_tmp = phys_reg 23
let reg_t2 = phys_reg 16
let reg_domain_state_ptr = phys_reg 23
let reg_domain_state_ptr = phys_reg 26
let reg_trap = phys_reg 24
let reg_alloc_ptr = phys_reg 25
let reg_alloc_lim = phys_reg 26

(* Output a pseudo-register *)

Expand Down Expand Up @@ -392,13 +391,15 @@ let emit_instr i =
let lbl_after_alloc = new_label () in
let lbl_call_gc = new_label () in
let n = -bytes in
let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in
if is_immediate n then
` addi {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, {emit_int n}\n`
else begin
` li {emit_reg reg_tmp}, {emit_int n}\n`;
` add {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}\n`
end;
` bltu {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_lim}, {emit_label lbl_call_gc}\n`;
` ld {emit_reg reg_tmp}, {emit_int offset}({emit_reg reg_domain_state_ptr})\n`;
` bltu {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}, {emit_label lbl_call_gc}\n`;
`{emit_label lbl_after_alloc}:\n`;
` addi {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, {emit_int size_addr}\n`;
call_gc_sites :=
Expand Down
40 changes: 21 additions & 19 deletions backend/riscv/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@ let word_addressed = false
a0-a7 0-7 arguments/results
s2-s9 8-15 arguments/results (preserved by C)
t2-t6 16-20 temporary
t0 21 temporary
t1 22 temporary (used by code generator)
s0 23 domain pointer (preserved by C)
s0 21 general purpose (preserved by C)
t0 22 temporary
t1 23 temporary (used by code generator)
s1 24 trap pointer (preserved by C)
s10 25 allocation pointer (preserved by C)
s11 26 allocation limit (preserved by C)
s11 26 domain pointer (preserved by C)

Floating-point register map
---------------------------
Expand All @@ -66,11 +66,12 @@ let word_addressed = false
*)

let int_reg_name =
[| "a0"; "a1"; "a2"; "a3"; "a4"; "a5"; "a6"; "a7";
"s2"; "s3"; "s4"; "s5"; "s6"; "s7"; "s8"; "s9";
"t2"; "t3"; "t4"; "t5"; "t6";
"t0"; "t1";
"s0"; "s1"; "s10"; "s11" |]
[| "a0"; "a1"; "a2"; "a3"; "a4"; "a5"; "a6"; "a7"; (* 0 - 7 *)
"s2"; "s3"; "s4"; "s5"; "s6"; "s7"; "s8"; "s9"; (* 8 - 15 *)
"t2"; "t3"; "t4"; "t5"; "t6"; (* 16 - 20 *)
"s0"; (* 21 *)
"t0"; "t1"; (* 22 - 23 *)
"s1"; "s10"; "s11" |] (* 24 - 26 *)

let float_reg_name =
[| "ft0"; "ft1"; "ft2"; "ft3"; "ft4"; "ft5"; "ft6"; "ft7";
Expand All @@ -86,7 +87,7 @@ let register_class r =
| Val | Int | Addr -> 0
| Float -> 1

let num_available_registers = [| 22; 32 |]
let num_available_registers = [| 23; 32 |]

let first_available_register = [| 0; 100 |]

Expand Down Expand Up @@ -235,21 +236,21 @@ let regs_are_volatile _ = false
let destroyed_at_c_call =
(* s0-s11 and fs0-fs11 are callee-save *)
Array.of_list(List.map phys_reg
[0; 1; 2; 3; 4; 5; 6; 7; 16; 17; 18; 19; 20; 21;
[0; 1; 2; 3; 4; 5; 6; 7; 16; 17; 18; 19; 20; 22;
100; 101; 102; 103; 104; 105; 106; 107; 110; 111; 112; 113; 114; 115; 116;
117; 128; 129; 130; 131])

let destroyed_at_alloc =
(* t0-t3 are used for PLT stubs *)
if !Clflags.dlcode then Array.map phys_reg [|16; 17; 18; 19; 20; 21|]
(* t0-t6 are used for PLT stubs *)
if !Clflags.dlcode then Array.map phys_reg [|16; 17; 18; 19; 20; 22|]
else [| |]

let destroyed_at_oper = function
| Iop(Icall_ind | Icall_imm _ | Iextcall{alloc = true; _}) -> all_phys_regs
| Iop(Iextcall{alloc = false; _}) -> destroyed_at_c_call
| Iop(Ialloc _) -> destroyed_at_alloc
| Iop(Istore(Single, _, _)) -> [| phys_reg 100 |]
| Iswitch _ -> [| phys_reg 21 |]
| Iswitch _ -> [| phys_reg 22 |] (* t0 *)
| _ -> [||]

let destroyed_at_raise = all_phys_regs
Expand All @@ -259,12 +260,12 @@ let destroyed_at_reloadretaddr = [| |]
(* Maximal register pressure *)

let safe_register_pressure = function
| Iextcall _ -> 15
| _ -> 22
| Iextcall _ -> 9
| _ -> 23

let max_register_pressure = function
| Iextcall _ -> [| 15; 18 |]
| _ -> [| 22; 30 |]
| Iextcall _ -> [| 9; 12 |]
| _ -> [| 23; 30 |]

(* Pure operations (without any side effect besides updating their result
registers). *)
Expand Down Expand Up @@ -293,8 +294,9 @@ let int_dwarf_reg_numbers =
[| 10; 11; 12; 13; 14; 15; 16; 17;
18; 19; 20; 21; 22; 23; 24; 25;
7; 28; 29; 30; 31;
8;
5; 6;
8; 9; 26; 27;
9; 26; 27;
|]

let float_dwarf_reg_numbers =
Expand Down
Loading