Skip to content

Commit

Permalink
flambda-backend: Runtime 5 forward & backward porting (#2027)
Browse files Browse the repository at this point in the history
* Resolve sigprocmask cr

* Backport amd64 backend changes

* Fix systhreads5 install

* Forward port ft relative retaddr

* Backport cmm changes

* Ignore nnp link config in 5

* Fix ocamltest build with 5 runtime

* Forward port unboxed int64 in closure

* Forward port is_last closinfo flag

* Backport lazy implementation

* Forward port fl2 root scanning fix

* Make atomics compatible with 4 and 5

Once the atomic primitives are backported, switch to them.

* Backport marshal change

* Implement more of the domain API using DLS

* Backport filename change

* Backport format changes

* Ignore backport gc change

* Backport hashtbl change

* Add stubs that runtime5 needs for linking with stdlib

* CR for SIMD register save

* Address code review comments

* Don't align for runtime5

---------

Co-authored-by: Mark Shinwell <mshinwell@pm.me>
  • Loading branch information
TheNumbat and mshinwell authored Nov 20, 2023
1 parent 7917f60 commit b1933f2
Show file tree
Hide file tree
Showing 50 changed files with 826 additions and 797 deletions.
101 changes: 22 additions & 79 deletions asmcomp/amd64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ let cfi_endproc () =
let cfi_adjust_cfa_offset n =
if Config.asm_cfi_supported then D.cfi_adjust_cfa_offset n

(* BACKPORT
let cfi_remember_state () =
if Config.asm_cfi_supported then D.cfi_remember_state ()

Expand All @@ -71,7 +70,6 @@ let cfi_restore_state () =

let cfi_def_cfa_register reg =
if Config.asm_cfi_supported then D.cfi_def_cfa_register reg
*)

let emit_debug_info dbg =
emit_debug_info_gen dbg D.file D.loc
Expand All @@ -90,13 +88,7 @@ let frame_size env = (* includes return address *)
+ 8
+ (if fp then 8 else 0))
in
(* BACKPORT BEGIN *)
Misc.align
(* BACKPORT END *)
sz
(* BACKPORT BEGIN *)
16
(* BACKPORT END *)
if Config.runtime5 then sz else Misc.align sz 16
end else
env.stack_offset + 8

Expand Down Expand Up @@ -676,25 +668,19 @@ let emit_instr env fallthrough i =
end
end
end
| Lop(Iextcall { func; alloc; stack_ofs
(* BACKPORT BEGIN *)
= _
(* BACKPORT END *)
}) ->
| Lop(Iextcall { func; alloc; stack_ofs }) ->
add_used_symbol func;
(* BEGIN BACKPORT
if stack_ofs > 0 then begin
if Config.runtime5 && stack_ofs > 0 then begin
I.mov rsp r13;
I.lea (mem64 QWORD stack_ofs RSP) r12;
load_symbol_addr func rax;
emit_call "caml_c_call_stack_args";
record_frame env i.live (Dbg_other i.dbg);
end else *) if alloc then begin
end else if alloc then begin
load_symbol_addr func rax;
emit_call "caml_c_call";
record_frame env i.live (Dbg_other i.dbg);
(* BEGIN BACKPORT *)
if system <> S_win64 then begin
if not Config.runtime5 && system <> S_win64 then begin

(* In amd64.S, "caml_c_call" tail-calls the C function (in order to
produce nicer backtraces), so we need to restore r15 manually after
Expand All @@ -706,24 +692,21 @@ let emit_instr env fallthrough i =

I.mov (domain_field Domainstate.Domain_young_ptr) r15
end
(* END BACKPORT *)
end else begin
(* BEGIN BACKPORT
I.mov rsp rbx;
cfi_remember_state ();
cfi_def_cfa_register "rbx";
(* NB: gdb has asserts on contiguous stacks that mean it
will not unwind through this unless we were to tag this
calling frame with cfi_signal_frame in it's definition. *)
I.mov (domain_field Domainstate.Domain_c_stack) rsp; *)
(* END BACKPORT *)
emit_call func
(* BEGIN BACKPORT
;
I.mov rbx rsp;
cfi_restore_state ();
*)
(* END BACKPORT *)
if Config.runtime5 then begin
I.mov rsp rbx;
cfi_remember_state ();
cfi_def_cfa_register "rbx";
(* NB: gdb has asserts on contiguous stacks that mean it
will not unwind through this unless we were to tag this
calling frame with cfi_signal_frame in it's definition. *)
I.mov (domain_field Domainstate.Domain_c_stack) rsp;
end;
emit_call func;
if Config.runtime5 then begin
I.mov rbx rsp;
cfi_restore_state ();
end;
end
| Lop(Istackoffset n) ->
emit_stack_offset env n
Expand Down Expand Up @@ -947,12 +930,10 @@ let emit_instr env fallthrough i =
I.cmp (int 0) (res16 i 0);
I.set (cond (Iunsigned Cne)) (res8 i 0);
I.movzx (res8 i 0) (res i 0)
(* BACKPORT BEGIN
| Lop (Idls_get) ->
I.mov (domain_field Domainstate.Domain_dls_root) (res i 0)
*)
| Lop (Idls_get) -> Misc.fatal_error "Idls_get not implemented"
(* BACKPORT END *)
if Config.runtime5
then I.mov (domain_field Domainstate.Domain_dls_root) (res i 0)
else Misc.fatal_error "Idls_get not implemented in runtime4."
| Lreloadretaddr ->
()
| Lreturn ->
Expand Down Expand Up @@ -1046,43 +1027,6 @@ let emit_instr env fallthrough i =
let delta = 16 * delta_traps in
cfi_adjust_cfa_offset delta;
env.stack_offset <- env.stack_offset + delta
(* BACKPORT BEGIN
(exception handling)
| Lpushtrap { lbl_handler; } ->
let load_label_addr s arg =
if !Clflags.pic_code then
I.lea (mem64_rip NONE (emit_label s)) arg
else
I.mov (sym (emit_label s)) arg
in
load_label_addr lbl_handler r11;
I.push r11;
cfi_adjust_cfa_offset 8;
I.push (domain_field Domainstate.Domain_exn_handler);
cfi_adjust_cfa_offset 8;
I.mov rsp (domain_field Domainstate.Domain_exn_handler);
env.stack_offset <- env.stack_offset + 16;
| Lpoptrap ->
I.pop (domain_field Domainstate.Domain_exn_handler);
cfi_adjust_cfa_offset (-8);
I.add (int 8) rsp;
cfi_adjust_cfa_offset (-8);
env.stack_offset <- env.stack_offset - 16
| Lraise k ->
begin match k with
| Lambda.Raise_regular ->
emit_call "caml_raise_exn";
record_frame env Reg.Set.empty (Dbg_raise i.dbg)
| Lambda.Raise_reraise ->
emit_call "caml_reraise_exn";
record_frame env Reg.Set.empty (Dbg_raise i.dbg)
| Lambda.Raise_notrace ->
I.mov (domain_field Domainstate.Domain_exn_handler) rsp;
I.pop (domain_field Domainstate.Domain_exn_handler);
I.pop r11;
I.jmp r11
end
*)
| Lpushtrap { lbl_handler; } ->
let load_label_addr s arg =
if !Clflags.pic_code then
Expand Down Expand Up @@ -1118,7 +1062,6 @@ let emit_instr env fallthrough i =
I.pop r11;
I.jmp r11
end
(* BACKPORT END *)

let rec emit_all env fallthrough i =
match i.desc with
Expand Down
17 changes: 7 additions & 10 deletions asmcomp/amd64/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -238,11 +238,7 @@ let win64_float_external_arguments =
let win64_loc_external_arguments arg =
let loc = Array.make (Array.length arg) Reg.dummy in
let reg = ref 0
(* BACKPORT BEGIN
and ofs = ref 0 in
*)
and ofs = ref 32 in
(* BACKPORT END *)
and ofs = ref (if Config.runtime5 then 0 else 32) in
for i = 0 to Array.length arg - 1 do
match arg.(i) with
| Val | Int | Addr as ty ->
Expand Down Expand Up @@ -293,8 +289,7 @@ let stack_ptr_dwarf_register_number = 7

(* Registers destroyed by operations *)

(* BACKPORT BEGIN
let destroyed_at_c_call =
let destroyed_at_c_call5 =
(* C calling conventions preserve rbx, but it is clobbered
by the code sequence used for C calls in emit.mlp, so it
is marked as destroyed. *)
Expand All @@ -309,8 +304,8 @@ let destroyed_at_c_call =
[0;1;2;3;4;5;6;7;10;11;
100;101;102;103;104;105;106;107;
108;109;110;111;112;113;114;115])
*)
let destroyed_at_c_call =

let destroyed_at_c_call4 =
if win64 then
(* Win64: rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15 preserved *)
Array.of_list(List.map phys_reg
Expand All @@ -322,7 +317,9 @@ let destroyed_at_c_call =
[0;2;3;4;5;6;7;10;11;
100;101;102;103;104;105;106;107;
108;109;110;111;112;113;114;115])
(* BACKPORT END *)

let destroyed_at_c_call =
if Config.runtime5 then destroyed_at_c_call5 else destroyed_at_c_call4

let destroyed_at_alloc_or_poll =
if X86_proc.use_plt then
Expand Down
6 changes: 5 additions & 1 deletion asmcomp/asmlink.ml
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,11 @@ let add_ccobjs origin l =
end

let runtime_lib () =
let libname = "libasmrun" ^ !Clflags.runtime_variant ^ ext_lib in
let variant =
if Config.runtime5 && !Clflags.runtime_variant = "nnp" then ""
else !Clflags.runtime_variant
in
let libname = "libasmrun" ^ variant ^ ext_lib in
try
if !Clflags.nopervasives || not !Clflags.with_runtime then []
else [ Load_path.find libname ]
Expand Down
34 changes: 11 additions & 23 deletions asmcomp/cmm_helpers.ml
Original file line number Diff line number Diff line change
Expand Up @@ -707,12 +707,9 @@ let get_header ptr dbg =
loads can be marked as [Immutable], since the runtime should ensure that
there is no data race on headers. This saves performance with
ThreadSanitizer instrumentation by avoiding to instrument header loads. *)
Cop(
(* BACKPORT BEGIN
mk_load_immut Word_int,
*)
mk_load_mut Word_int,
(* BACKPORT END *)
Cop((if Config.runtime5
then mk_load_immut Word_int
else mk_load_mut Word_int),
[Cop(Cadda, [ptr; Cconst_int(-size_int, dbg)], dbg)], dbg)

let get_header_masked ptr dbg =
Expand All @@ -730,12 +727,9 @@ let get_tag ptr dbg =
Cop(Cand, [get_header ptr dbg; Cconst_int (255, dbg)], dbg)
else (* If byte loads are efficient *)
(* Same comment as [get_header] above *)
Cop(
(* BACKPORT BEGIN
mk_load_immut Byte_unsigned,
*)
mk_load_mut Byte_unsigned,
(* BACKPORT END *)
Cop((if Config.runtime5
then mk_load_immut Byte_unsigned
else mk_load_mut Byte_unsigned),
[Cop(Cadda, [ptr; Cconst_int(tag_offset, dbg)], dbg)], dbg)

let get_size ptr dbg =
Expand Down Expand Up @@ -1012,11 +1006,9 @@ let make_alloc_generic ~mode set_fn dbg tag wordsize args =
| e1::el -> Csequence(set_fn (Cvar id) (Cconst_int (idx, dbg)) e1 dbg,
fill_fields (idx + 2) el) in
Clet(VP.create id,
(* BACKPORT BEGIN
Cop(Cextcall("caml_alloc_shr_check_gc", typ_val, [], true),
*)
Cop(Cextcall("caml_alloc", typ_val, [], true),
(* BACKPORT END *)
Cop(Cextcall((if Config.runtime5
then "caml_alloc_shr_check_gc"
else "caml_alloc"), typ_val, [], true),
[Cconst_int (wordsize, dbg); Cconst_int (tag, dbg)], dbg),
fill_fields 1 args)
end
Expand Down Expand Up @@ -2554,13 +2546,9 @@ let assignment_kind
| Assignment Modify_maybe_stack, Pointer ->
assert Config.stack_allocation;
Caml_modify_local
(* BACKPORT BEGIN
| Heap_initialization, Pointer
| Root_initialization, Pointer -> Caml_initialize
*)
| Heap_initialization, Pointer -> Caml_initialize
| Root_initialization, Pointer -> Simple Initialization
(* BACKPORT END *)
| Root_initialization, Pointer ->
if Config.runtime5 then Caml_initialize else Simple Initialization
| (Assignment _), Immediate -> Simple Assignment
| Heap_initialization, Immediate
| Root_initialization, Immediate -> Simple Initialization
Expand Down
31 changes: 16 additions & 15 deletions asmcomp/cmmgen.ml
Original file line number Diff line number Diff line change
Expand Up @@ -116,21 +116,22 @@ let mut_from_env env ptr =
else Asttypes.Mutable
| _ -> Asttypes.Mutable

(* BACKPORT
(* Minimum of two [mutable_flag] values, assuming [Immutable < Mutable]. *)
let min_mut x y =
match x,y with
| Immutable,_ | _,Immutable -> Immutable
| Mutable,Mutable -> Mutable
*)

(* BACKPORT BEGIN
let get_field env mut ptr n dbg =
let mut = min_mut mut (mut_from_env env ptr) in
*)
let get_field env layout ptr n dbg =
let mut = mut_from_env env ptr in
(* BACKPORT END *)
| Asttypes.Immutable, _
| _, Asttypes.Immutable -> Asttypes.Immutable
| Asttypes.Mutable, Asttypes.Mutable -> Asttypes.Mutable

let mut_from_lambda = function
| Lambda.Immutable -> Asttypes.Immutable
| Lambda.Immutable_unique -> Asttypes.Immutable
| Lambda.Mutable -> Asttypes.Mutable

let get_field env mut layout ptr n dbg =
let mut = if Config.runtime5
then min_mut (mut_from_lambda mut) (mut_from_env env ptr)
else mut_from_env env ptr in
let memory_chunk =
match layout with
| Pvalue Pintval | Punboxed_int _ -> Word_int
Expand Down Expand Up @@ -921,13 +922,13 @@ and transl_prim_1 env p arg dbg =
Popaque ->
opaque (transl env arg) dbg
(* Heap operations *)
| Pfield (n, layout, _, _) ->
get_field env layout (transl env arg) n dbg
| Pfield (n, layout, _, mut) ->
get_field env mut layout (transl env arg) n dbg
| Pfloatfield (n,mode) ->
let ptr = transl env arg in
box_float dbg mode (floatfield n ptr dbg)
| Pufloatfield n ->
get_field env Punboxed_float (transl env arg) n dbg
get_field env Mutable Punboxed_float (transl env arg) n dbg
| Pint_as_pointer _ ->
int_as_pointer (transl env arg) dbg
(* Exceptions *)
Expand Down
Loading

0 comments on commit b1933f2

Please sign in to comment.