Skip to content

Commit b07af9b

Browse files
committed
Merge branch 'layered-persistent_env' into as-argument-for
2 parents 6d22bc4 + b3be73d commit b07af9b

File tree

692 files changed

+22343
-6601
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

692 files changed

+22343
-6601
lines changed

.github/workflows/build.yml

+6-18
Original file line numberDiff line numberDiff line change
@@ -13,28 +13,16 @@ jobs:
1313
fail-fast: false
1414
matrix:
1515
include:
16-
- name: closure
17-
config: --enable-middle-end=closure
18-
os: ubuntu-latest
19-
build_ocamlparam: ''
20-
check_arch: true
21-
use_runtime: d
22-
ocamlrunparam: "v=0,V=1"
23-
24-
- name: flambda1
25-
config: --enable-middle-end=flambda
26-
os: ubuntu-latest
27-
build_ocamlparam: ''
28-
29-
- name: flambda1_frame_pointers
30-
config: --enable-middle-end=flambda --enable-frame-pointers --enable-poll-insertion
16+
- name: flambda2_runtime5
17+
config: --enable-middle-end=flambda2 --enable-runtime5
3118
os: ubuntu-latest
32-
build_ocamlparam: ''
3319

34-
- name: flambda2_runtime5
20+
- name: flambda2_debug_runtime5
3521
config: --enable-middle-end=flambda2 --enable-runtime5
3622
os: ubuntu-latest
37-
expected_fail: true
23+
build_ocamlparam: ''
24+
use_runtime: d
25+
ocamlrunparam: "v=0,V=1"
3826

3927
- name: flambda2_debug_runtime
4028
config: --enable-middle-end=flambda2

backend/CSEgen.ml

+6-2
Original file line numberDiff line numberDiff line change
@@ -238,8 +238,12 @@ method class_of_operation op =
238238
| Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
239239
| Iextcall _ | Iprobe _ | Iopaque -> assert false (* treated specially *)
240240
| Istackoffset _ -> Op_other
241-
| Iload { mutability = Mutable } -> Op_load Mutable
242-
| Iload { mutability = Immutable } -> Op_load Immutable
241+
| Iload { mutability; is_atomic } ->
242+
(* #12173: disable CSE for atomic loads. *)
243+
if is_atomic then Op_other
244+
else Op_load (match mutability with
245+
| Mutable -> Mutable
246+
| Immutable -> Immutable)
243247
| Istore(_,_,asg) -> Op_store asg
244248
| Ialloc _ | Ipoll _ -> assert false (* treated specially *)
245249
| Iintop(Icheckbound|Icheckalign _) -> Op_checkbound

backend/amd64/emit.mlp

+66-65
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,13 @@ let cfi_endproc () =
7474
let cfi_adjust_cfa_offset n =
7575
if Config.asm_cfi_supported then D.cfi_adjust_cfa_offset n
7676

77-
let _cfi_remember_state () =
77+
let cfi_remember_state () =
7878
if Config.asm_cfi_supported then D.cfi_remember_state ()
7979

80-
let _cfi_restore_state () =
80+
let cfi_restore_state () =
8181
if Config.asm_cfi_supported then D.cfi_restore_state ()
8282

83-
let _cfi_def_cfa_register reg =
83+
let cfi_def_cfa_register reg =
8484
if Config.asm_cfi_supported then D.cfi_def_cfa_register reg
8585

8686
let emit_debug_info ?discriminator dbg =
@@ -94,7 +94,7 @@ let emit_debug_info_linear i =
9494

9595
let fp = Config.with_frame_pointers
9696

97-
let _stack_threshold_size = Config.stack_threshold * 8 (* bytes *)
97+
let stack_threshold_size = Config.stack_threshold * 8 (* bytes *)
9898

9999
(* Tradeoff between code size and code speed *)
100100

@@ -1034,6 +1034,10 @@ let emit_simd_instr op i =
10341034
| SSE2 Avg_unsigned_i8 -> I.pavgb (arg i 1) (res i 0)
10351035
| SSE2 Avg_unsigned_i16 -> I.pavgw (arg i 1) (res i 0)
10361036
| SSE2 SAD_unsigned_i8 -> I.psadbw (arg i 1) (res i 0)
1037+
| SSE2 Mulhi_i16 -> I.pmulhw (arg i 1) (res i 0)
1038+
| SSE2 Mulhi_unsigned_i16 -> I.pmulhuw (arg i 1) (res i 0)
1039+
| SSE2 Mullo_i16 -> I.pmullw (arg i 1) (res i 0)
1040+
| SSE2 Mul_hadd_i16_to_i32 -> I.pmaddwd (arg i 1) (res i 0)
10371041
| SSE2 And_bits -> I.pand (arg i 1) (res i 0)
10381042
| SSE2 Andnot_bits -> I.pandnot (arg i 1) (res i 0)
10391043
| SSE2 Or_bits -> I.por (arg i 1) (res i 0)
@@ -1108,6 +1112,8 @@ let emit_simd_instr op i =
11081112
| SSSE3 Mulsign_i32 -> I.psignd (arg i 1) (res i 0)
11091113
| SSSE3 Alignr_i8 n -> I.palignr (X86_dsl.int n) (arg i 1) (res i 0)
11101114
| SSSE3 Shuffle_8 -> I.pshufb (arg i 1) (res i 0)
1115+
| SSSE3 Mul_unsigned_hadd_saturating_i8_to_i16 ->
1116+
I.pmaddubsw (arg i 1) (res i 0)
11111117
| SSE41 (Blend_16 n) -> I.pblendw (X86_dsl.int n) (arg i 1) (res i 0)
11121118
| SSE41 (Blend_32 n) -> I.blendps (X86_dsl.int n) (arg i 1) (res i 0)
11131119
| SSE41 (Blend_64 n) -> I.blendpd (X86_dsl.int n) (arg i 1) (res i 0)
@@ -1160,6 +1166,7 @@ let emit_simd_instr op i =
11601166
| SSE41 (Round_f32 n) -> I.roundps n (arg i 0) (res i 0)
11611167
| SSE41 (Multi_sad_unsigned_i8 n) -> I.mpsadbw (X86_dsl.int n) (arg i 1) (res i 0)
11621168
| SSE41 Minpos_unsigned_i16 -> I.phminposuw (arg i 0) (res i 0)
1169+
| SSE41 Mullo_i32 -> I.pmulld (arg i 1) (res i 0)
11631170
| SSE42 Cmpgt_i64 -> I.pcmpgtq (arg i 1) (res i 0)
11641171
| SSE42 Crc32_64 -> I.crc32 (arg i 1) (res i 0)
11651172
| SSE42 (Cmpestrm n) -> I.pcmpestrm (X86_dsl.int n) (arg i 1) (arg i 0)
@@ -1269,25 +1276,19 @@ let emit_instr fallthrough i =
12691276
end
12701277
end
12711278
end
1272-
| Lop(Iextcall { func; alloc; stack_ofs
1273-
(* BACKPORT BEGIN *)
1274-
= _
1275-
(* BACKPORT END *)
1276-
}) ->
1279+
| Lop(Iextcall { func; alloc; stack_ofs }) ->
12771280
add_used_symbol func;
1278-
(* BEGIN BACKPORT
1279-
if stack_ofs > 0 then begin
1281+
if Config.runtime5 && stack_ofs > 0 then begin
12801282
I.mov rsp r13;
12811283
I.lea (mem64 QWORD stack_ofs RSP) r12;
1282-
load_symbol_addr func rax;
1283-
emit_call "caml_c_call_stack_args";
1284-
record_frame env i.live (Dbg_other i.dbg);
1285-
end else *) if alloc then begin
1284+
load_symbol_addr (Cmm.global_symbol func) rax;
1285+
emit_call (Cmm.global_symbol "caml_c_call_stack_args");
1286+
record_frame i.live (Dbg_other i.dbg);
1287+
end else if alloc then begin
12861288
load_symbol_addr (Cmm.global_symbol func) rax;
12871289
emit_call (Cmm.global_symbol "caml_c_call");
12881290
record_frame i.live (Dbg_other i.dbg);
1289-
(* BEGIN BACKPORT *)
1290-
if system <> S_win64 then begin
1291+
if not Config.runtime5 && system <> S_win64 then begin
12911292

12921293
(* In amd64.S, "caml_c_call" tail-calls the C function (in order to
12931294
produce nicer backtraces), so we need to restore r15 manually after
@@ -1299,24 +1300,21 @@ let emit_instr fallthrough i =
12991300

13001301
I.mov (domain_field Domainstate.Domain_young_ptr) r15
13011302
end
1302-
(* END BACKPORT *)
13031303
end else begin
1304-
(* BEGIN BACKPORT
1305-
I.mov rsp rbx;
1306-
cfi_remember_state ();
1307-
cfi_def_cfa_register "rbx";
1308-
(* NB: gdb has asserts on contiguous stacks that mean it
1309-
will not unwind through this unless we were to tag this
1310-
calling frame with cfi_signal_frame in it's definition. *)
1311-
I.mov (domain_field Domainstate.Domain_c_stack) rsp; *)
1312-
(* END BACKPORT *)
1313-
emit_call (Cmm.global_symbol func)
1314-
(* BEGIN BACKPORT
1315-
;
1316-
I.mov rbx rsp;
1317-
cfi_restore_state ();
1318-
*)
1319-
(* END BACKPORT *)
1304+
if Config.runtime5 then begin
1305+
I.mov rsp rbx;
1306+
cfi_remember_state ();
1307+
cfi_def_cfa_register "rbx";
1308+
(* NB: gdb has asserts on contiguous stacks that mean it
1309+
will not unwind through this unless we were to tag this
1310+
calling frame with cfi_signal_frame in it's definition. *)
1311+
I.mov (domain_field Domainstate.Domain_c_stack) rsp;
1312+
end;
1313+
emit_call (Cmm.global_symbol func);
1314+
if Config.runtime5 then begin
1315+
I.mov rbx rsp;
1316+
cfi_restore_state ();
1317+
end;
13201318
end
13211319
| Lop(Istackoffset n) ->
13221320
emit_stack_offset n
@@ -1690,8 +1688,9 @@ let emit_instr fallthrough i =
16901688
I.set (cond (Iunsigned Cne)) (res8 i 0);
16911689
I.movzx (res8 i 0) (res i 0)
16921690
| Lop (Idls_get) ->
1693-
Misc.fatal_error "Dls is currently not supported";
1694-
(* I.mov (domain_field Domainstate.Domain_dls_root) (res i 0) *)
1691+
if Config.runtime5
1692+
then I.mov (domain_field Domainstate.Domain_dls_root) (res i 0)
1693+
else Misc.fatal_error "Dls is not supported in runtime4.";
16951694
| Lreloadretaddr ->
16961695
()
16971696
| Lreturn ->
@@ -1775,16 +1774,12 @@ let emit_instr fallthrough i =
17751774
| Lraise k ->
17761775
begin match k with
17771776
| Lambda.Raise_regular ->
1778-
(* BACKPORT BEGIN *)
17791777
I.mov (int 0) (domain_field Domainstate.Domain_backtrace_pos);
1780-
(* BACKPORT END *)
17811778
emit_call (Cmm.global_symbol "caml_raise_exn");
17821779
record_frame Reg.Set.empty (Dbg_raise i.dbg)
17831780
| Lambda.Raise_reraise ->
1784-
(* BACKPORT BEGIN *)
1785-
(* emit_call (Cmm.global_symbol "caml_reraise_exn"); *)
1786-
emit_call (Cmm.global_symbol "caml_raise_exn");
1787-
(* BACKPORT END *)
1781+
emit_call (Cmm.global_symbol
1782+
(if Config.runtime5 then "caml_reraise_exn" else "caml_raise_exn"));
17881783
record_frame Reg.Set.empty (Dbg_raise i.dbg)
17891784
| Lambda.Raise_notrace ->
17901785
I.mov (domain_field Domainstate.Domain_exn_handler) rsp;
@@ -1853,33 +1848,40 @@ let fundecl fundecl =
18531848
D.label (label_name (emit_symbol fundecl.fun_name));
18541849
emit_debug_info fundecl.fun_dbg;
18551850
cfi_startproc ();
1856-
(* BACKPORT BEGIN *)
1857-
(* if !Clflags.runtime_variant = "d" then
1858-
emit_call (Cmm.global_symbol "caml_assert_stack_invariants");
1859-
let { max_frame_size; contains_nontail_calls} =
1860-
preproc_stack_check
1861-
~fun_body:fundecl.fun_body ~frame_size:(frame_size ()) ~trap_size:16
1851+
let handle_overflow_and_max_frame_size =
1852+
(* CR mshinwell: this should be conditionalized on a specific
1853+
"stack checks enabled" config option, so we can backport to 4.x *)
1854+
if not Config.runtime5 then None
1855+
else (
1856+
if !Clflags.runtime_variant = "d" then
1857+
emit_call (Cmm.global_symbol "caml_assert_stack_invariants");
1858+
let { max_frame_size; contains_nontail_calls} =
1859+
preproc_stack_check
1860+
~fun_body:fundecl.fun_body ~frame_size:(frame_size ()) ~trap_size:16
1861+
in
1862+
let handle_overflow =
1863+
if contains_nontail_calls || max_frame_size >= stack_threshold_size then begin
1864+
let overflow = new_label () and ret = new_label () in
1865+
let threshold_offset = Domainstate.stack_ctx_words * 8 + stack_threshold_size in
1866+
I.lea (mem64 NONE (-(max_frame_size + threshold_offset)) RSP) r10;
1867+
I.cmp (domain_field Domainstate.Domain_current_stack) r10;
1868+
I.jb (label overflow);
1869+
def_label ret;
1870+
Some (overflow, ret)
1871+
end else None
1872+
in
1873+
match handle_overflow with
1874+
| None -> None
1875+
| Some handle_overflow -> Some (handle_overflow, max_frame_size)
1876+
)
18621877
in
1863-
let handle_overflow =
1864-
if contains_nontail_calls || max_frame_size >= stack_threshold_size then begin
1865-
let overflow = new_label () and ret = new_label () in
1866-
let threshold_offset = Domainstate.stack_ctx_words * 8 + stack_threshold_size in
1867-
I.lea (mem64 NONE (-(max_frame_size + threshold_offset)) RSP) r10;
1868-
I.cmp (domain_field Domainstate.Domain_current_stack) r10;
1869-
I.jb (label overflow);
1870-
def_label ret;
1871-
Some (overflow, ret)
1872-
end else None
1873-
in*)
1874-
(* BACKPORT END *)
18751878
emit_all true fundecl.fun_body;
18761879
List.iter emit_call_gc !call_gc_sites;
18771880
List.iter emit_local_realloc !local_realloc_sites;
18781881
emit_call_safety_errors ();
1879-
(* BACKPORT BEGIN *)
1880-
(*begin match handle_overflow with
1882+
begin match handle_overflow_and_max_frame_size with
18811883
| None -> ()
1882-
| Some (overflow,ret) -> begin
1884+
| Some ((overflow,ret), max_frame_size) -> begin
18831885
def_label overflow;
18841886
(* Pass the desired frame size on the stack, since all of the
18851887
argument-passing registers may be in use.
@@ -1892,8 +1894,7 @@ let fundecl fundecl =
18921894
cfi_adjust_cfa_offset (-8);
18931895
I.jmp (label ret)
18941896
end
1895-
end;*)
1896-
(* BACKPORT END *)
1897+
end;
18971898
if !frame_required then begin
18981899
let n = frame_size() - 8 - (if fp then 8 else 0) in
18991900
if n <> 0

backend/amd64/proc.ml

+9-6
Original file line numberDiff line numberDiff line change
@@ -309,10 +309,7 @@ let win64_float_external_arguments =
309309
let win64_loc_external_arguments arg =
310310
let loc = Array.make (Array.length arg) Reg.dummy in
311311
let reg = ref 0
312-
(* BACKPORT BEGIN *)
313-
(* and ofs = ref 0 in *)
314-
and ofs = ref 32 in
315-
(* BACKPORT END *)
312+
and ofs = ref (if Config.runtime5 then 0 else 32) in
316313
for i = 0 to Array.length arg - 1 do
317314
match arg.(i) with
318315
| Val | Int | Addr as ty ->
@@ -374,10 +371,16 @@ let domainstate_ptr_dwarf_register_number = 14
374371

375372
(* Registers destroyed by operations *)
376373

374+
let int_regs_destroyed_at_c_call_win64 =
375+
if Config.runtime5 then [|0;1;4;5;6;7;10;11;12|] else [|0;4;5;6;7;10;11|]
376+
377+
let int_regs_destroyed_at_c_call =
378+
if Config.runtime5 then [|0;1;2;3;4;5;6;7;10;11|] else [|0;2;3;4;5;6;7;10;11|]
379+
377380
let destroyed_at_c_call_win64 =
378381
(* Win64: rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15 preserved *)
379382
let basic_regs = Array.append
380-
(Array.map (phys_reg Int) [|0;4;5;6;7;10;11|] )
383+
(Array.map (phys_reg Int) int_regs_destroyed_at_c_call_win64)
381384
(Array.sub hard_float_reg 0 6)
382385
in
383386
fun () -> if simd_regalloc_disabled ()
@@ -387,7 +390,7 @@ let destroyed_at_c_call_win64 =
387390
let destroyed_at_c_call_unix =
388391
(* Unix: rbx, rbp, r12-r15 preserved *)
389392
let basic_regs = Array.append
390-
(Array.map (phys_reg Int) [|0;2;3;4;5;6;7;10;11|])
393+
(Array.map (phys_reg Int) int_regs_destroyed_at_c_call)
391394
hard_float_reg
392395
in
393396
fun () -> if simd_regalloc_disabled ()

0 commit comments

Comments
 (0)