@@ -74,13 +74,13 @@ let cfi_endproc () =
74
74
let cfi_adjust_cfa_offset n =
75
75
if Config.asm_cfi_supported then D.cfi_adjust_cfa_offset n
76
76
77
- let _cfi_remember_state () =
77
+ let cfi_remember_state () =
78
78
if Config.asm_cfi_supported then D.cfi_remember_state ()
79
79
80
- let _cfi_restore_state () =
80
+ let cfi_restore_state () =
81
81
if Config.asm_cfi_supported then D.cfi_restore_state ()
82
82
83
- let _cfi_def_cfa_register reg =
83
+ let cfi_def_cfa_register reg =
84
84
if Config.asm_cfi_supported then D.cfi_def_cfa_register reg
85
85
86
86
let emit_debug_info ?discriminator dbg =
@@ -94,7 +94,7 @@ let emit_debug_info_linear i =
94
94
95
95
let fp = Config.with_frame_pointers
96
96
97
- let _stack_threshold_size = Config.stack_threshold * 8 (* bytes *)
97
+ let stack_threshold_size = Config.stack_threshold * 8 (* bytes *)
98
98
99
99
(* Tradeoff between code size and code speed *)
100
100
@@ -1034,6 +1034,10 @@ let emit_simd_instr op i =
1034
1034
| SSE2 Avg_unsigned_i8 -> I.pavgb (arg i 1) (res i 0)
1035
1035
| SSE2 Avg_unsigned_i16 -> I.pavgw (arg i 1) (res i 0)
1036
1036
| SSE2 SAD_unsigned_i8 -> I.psadbw (arg i 1) (res i 0)
1037
+ | SSE2 Mulhi_i16 -> I.pmulhw (arg i 1) (res i 0)
1038
+ | SSE2 Mulhi_unsigned_i16 -> I.pmulhuw (arg i 1) (res i 0)
1039
+ | SSE2 Mullo_i16 -> I.pmullw (arg i 1) (res i 0)
1040
+ | SSE2 Mul_hadd_i16_to_i32 -> I.pmaddwd (arg i 1) (res i 0)
1037
1041
| SSE2 And_bits -> I.pand (arg i 1) (res i 0)
1038
1042
| SSE2 Andnot_bits -> I.pandnot (arg i 1) (res i 0)
1039
1043
| SSE2 Or_bits -> I.por (arg i 1) (res i 0)
@@ -1108,6 +1112,8 @@ let emit_simd_instr op i =
1108
1112
| SSSE3 Mulsign_i32 -> I.psignd (arg i 1) (res i 0)
1109
1113
| SSSE3 Alignr_i8 n -> I.palignr (X86_dsl.int n) (arg i 1) (res i 0)
1110
1114
| SSSE3 Shuffle_8 -> I.pshufb (arg i 1) (res i 0)
1115
+ | SSSE3 Mul_unsigned_hadd_saturating_i8_to_i16 ->
1116
+ I.pmaddubsw (arg i 1) (res i 0)
1111
1117
| SSE41 (Blend_16 n) -> I.pblendw (X86_dsl.int n) (arg i 1) (res i 0)
1112
1118
| SSE41 (Blend_32 n) -> I.blendps (X86_dsl.int n) (arg i 1) (res i 0)
1113
1119
| SSE41 (Blend_64 n) -> I.blendpd (X86_dsl.int n) (arg i 1) (res i 0)
@@ -1160,6 +1166,7 @@ let emit_simd_instr op i =
1160
1166
| SSE41 (Round_f32 n) -> I.roundps n (arg i 0) (res i 0)
1161
1167
| SSE41 (Multi_sad_unsigned_i8 n) -> I.mpsadbw (X86_dsl.int n) (arg i 1) (res i 0)
1162
1168
| SSE41 Minpos_unsigned_i16 -> I.phminposuw (arg i 0) (res i 0)
1169
+ | SSE41 Mullo_i32 -> I.pmulld (arg i 1) (res i 0)
1163
1170
| SSE42 Cmpgt_i64 -> I.pcmpgtq (arg i 1) (res i 0)
1164
1171
| SSE42 Crc32_64 -> I.crc32 (arg i 1) (res i 0)
1165
1172
| SSE42 (Cmpestrm n) -> I.pcmpestrm (X86_dsl.int n) (arg i 1) (arg i 0)
@@ -1269,25 +1276,19 @@ let emit_instr fallthrough i =
1269
1276
end
1270
1277
end
1271
1278
end
1272
- | Lop(Iextcall { func; alloc; stack_ofs
1273
- (* BACKPORT BEGIN *)
1274
- = _
1275
- (* BACKPORT END *)
1276
- }) ->
1279
+ | Lop(Iextcall { func; alloc; stack_ofs }) ->
1277
1280
add_used_symbol func;
1278
- (* BEGIN BACKPORT
1279
- if stack_ofs > 0 then begin
1281
+ if Config.runtime5 && stack_ofs > 0 then begin
1280
1282
I.mov rsp r13;
1281
1283
I.lea (mem64 QWORD stack_ofs RSP) r12;
1282
- load_symbol_addr func rax;
1283
- emit_call "caml_c_call_stack_args";
1284
- record_frame env i.live (Dbg_other i.dbg);
1285
- end else *) if alloc then begin
1284
+ load_symbol_addr (Cmm.global_symbol func) rax;
1285
+ emit_call (Cmm.global_symbol "caml_c_call_stack_args") ;
1286
+ record_frame i.live (Dbg_other i.dbg);
1287
+ end else if alloc then begin
1286
1288
load_symbol_addr (Cmm.global_symbol func) rax;
1287
1289
emit_call (Cmm.global_symbol "caml_c_call");
1288
1290
record_frame i.live (Dbg_other i.dbg);
1289
- (* BEGIN BACKPORT *)
1290
- if system <> S_win64 then begin
1291
+ if not Config.runtime5 && system <> S_win64 then begin
1291
1292
1292
1293
(* In amd64.S, "caml_c_call" tail-calls the C function (in order to
1293
1294
produce nicer backtraces), so we need to restore r15 manually after
@@ -1299,24 +1300,21 @@ let emit_instr fallthrough i =
1299
1300
1300
1301
I.mov (domain_field Domainstate.Domain_young_ptr) r15
1301
1302
end
1302
- (* END BACKPORT *)
1303
1303
end else begin
1304
- (* BEGIN BACKPORT
1305
- I.mov rsp rbx;
1306
- cfi_remember_state ();
1307
- cfi_def_cfa_register "rbx";
1308
- (* NB: gdb has asserts on contiguous stacks that mean it
1309
- will not unwind through this unless we were to tag this
1310
- calling frame with cfi_signal_frame in it's definition. *)
1311
- I.mov (domain_field Domainstate.Domain_c_stack) rsp; *)
1312
- (* END BACKPORT *)
1313
- emit_call (Cmm.global_symbol func)
1314
- (* BEGIN BACKPORT
1315
- ;
1316
- I.mov rbx rsp;
1317
- cfi_restore_state ();
1318
- *)
1319
- (* END BACKPORT *)
1304
+ if Config.runtime5 then begin
1305
+ I.mov rsp rbx;
1306
+ cfi_remember_state ();
1307
+ cfi_def_cfa_register "rbx";
1308
+ (* NB: gdb has asserts on contiguous stacks that mean it
1309
+ will not unwind through this unless we were to tag this
1310
+ calling frame with cfi_signal_frame in it's definition. *)
1311
+ I.mov (domain_field Domainstate.Domain_c_stack) rsp;
1312
+ end;
1313
+ emit_call (Cmm.global_symbol func);
1314
+ if Config.runtime5 then begin
1315
+ I.mov rbx rsp;
1316
+ cfi_restore_state ();
1317
+ end;
1320
1318
end
1321
1319
| Lop(Istackoffset n) ->
1322
1320
emit_stack_offset n
@@ -1690,8 +1688,9 @@ let emit_instr fallthrough i =
1690
1688
I.set (cond (Iunsigned Cne)) (res8 i 0);
1691
1689
I.movzx (res8 i 0) (res i 0)
1692
1690
| Lop (Idls_get) ->
1693
- Misc.fatal_error "Dls is currently not supported";
1694
- (* I.mov (domain_field Domainstate.Domain_dls_root) (res i 0) *)
1691
+ if Config.runtime5
1692
+ then I.mov (domain_field Domainstate.Domain_dls_root) (res i 0)
1693
+ else Misc.fatal_error "Dls is not supported in runtime4.";
1695
1694
| Lreloadretaddr ->
1696
1695
()
1697
1696
| Lreturn ->
@@ -1775,16 +1774,12 @@ let emit_instr fallthrough i =
1775
1774
| Lraise k ->
1776
1775
begin match k with
1777
1776
| Lambda.Raise_regular ->
1778
- (* BACKPORT BEGIN *)
1779
1777
I.mov (int 0) (domain_field Domainstate.Domain_backtrace_pos);
1780
- (* BACKPORT END *)
1781
1778
emit_call (Cmm.global_symbol "caml_raise_exn");
1782
1779
record_frame Reg.Set.empty (Dbg_raise i.dbg)
1783
1780
| Lambda.Raise_reraise ->
1784
- (* BACKPORT BEGIN *)
1785
- (* emit_call (Cmm.global_symbol "caml_reraise_exn"); *)
1786
- emit_call (Cmm.global_symbol "caml_raise_exn");
1787
- (* BACKPORT END *)
1781
+ emit_call (Cmm.global_symbol
1782
+ (if Config.runtime5 then "caml_reraise_exn" else "caml_raise_exn"));
1788
1783
record_frame Reg.Set.empty (Dbg_raise i.dbg)
1789
1784
| Lambda.Raise_notrace ->
1790
1785
I.mov (domain_field Domainstate.Domain_exn_handler) rsp;
@@ -1853,33 +1848,40 @@ let fundecl fundecl =
1853
1848
D.label (label_name (emit_symbol fundecl.fun_name));
1854
1849
emit_debug_info fundecl.fun_dbg;
1855
1850
cfi_startproc ();
1856
- (* BACKPORT BEGIN *)
1857
- (* if !Clflags.runtime_variant = "d" then
1858
- emit_call (Cmm.global_symbol "caml_assert_stack_invariants");
1859
- let { max_frame_size; contains_nontail_calls} =
1860
- preproc_stack_check
1861
- ~fun_body:fundecl.fun_body ~frame_size:(frame_size ()) ~trap_size:16
1851
+ let handle_overflow_and_max_frame_size =
1852
+ (* CR mshinwell: this should be conditionalized on a specific
1853
+ "stack checks enabled" config option, so we can backport to 4.x *)
1854
+ if not Config.runtime5 then None
1855
+ else (
1856
+ if !Clflags.runtime_variant = "d" then
1857
+ emit_call (Cmm.global_symbol "caml_assert_stack_invariants");
1858
+ let { max_frame_size; contains_nontail_calls} =
1859
+ preproc_stack_check
1860
+ ~fun_body:fundecl.fun_body ~frame_size:(frame_size ()) ~trap_size:16
1861
+ in
1862
+ let handle_overflow =
1863
+ if contains_nontail_calls || max_frame_size >= stack_threshold_size then begin
1864
+ let overflow = new_label () and ret = new_label () in
1865
+ let threshold_offset = Domainstate.stack_ctx_words * 8 + stack_threshold_size in
1866
+ I.lea (mem64 NONE (-(max_frame_size + threshold_offset)) RSP) r10;
1867
+ I.cmp (domain_field Domainstate.Domain_current_stack) r10;
1868
+ I.jb (label overflow);
1869
+ def_label ret;
1870
+ Some (overflow, ret)
1871
+ end else None
1872
+ in
1873
+ match handle_overflow with
1874
+ | None -> None
1875
+ | Some handle_overflow -> Some (handle_overflow, max_frame_size)
1876
+ )
1862
1877
in
1863
- let handle_overflow =
1864
- if contains_nontail_calls || max_frame_size >= stack_threshold_size then begin
1865
- let overflow = new_label () and ret = new_label () in
1866
- let threshold_offset = Domainstate.stack_ctx_words * 8 + stack_threshold_size in
1867
- I.lea (mem64 NONE (-(max_frame_size + threshold_offset)) RSP) r10;
1868
- I.cmp (domain_field Domainstate.Domain_current_stack) r10;
1869
- I.jb (label overflow);
1870
- def_label ret;
1871
- Some (overflow, ret)
1872
- end else None
1873
- in*)
1874
- (* BACKPORT END *)
1875
1878
emit_all true fundecl.fun_body;
1876
1879
List.iter emit_call_gc !call_gc_sites;
1877
1880
List.iter emit_local_realloc !local_realloc_sites;
1878
1881
emit_call_safety_errors ();
1879
- (* BACKPORT BEGIN *)
1880
- (*begin match handle_overflow with
1882
+ begin match handle_overflow_and_max_frame_size with
1881
1883
| None -> ()
1882
- | Some (overflow,ret) -> begin
1884
+ | Some (( overflow,ret), max_frame_size ) -> begin
1883
1885
def_label overflow;
1884
1886
(* Pass the desired frame size on the stack, since all of the
1885
1887
argument-passing registers may be in use.
@@ -1892,8 +1894,7 @@ let fundecl fundecl =
1892
1894
cfi_adjust_cfa_offset (-8);
1893
1895
I.jmp (label ret)
1894
1896
end
1895
- end;*)
1896
- (* BACKPORT END *)
1897
+ end;
1897
1898
if !frame_required then begin
1898
1899
let n = frame_size() - 8 - (if fp then 8 else 0) in
1899
1900
if n <> 0
0 commit comments