ocaml-flambda · gretay-js · Oct 13, 2023 · Jul 28, 2023 · Jul 28, 2023 · Jul 28, 2023
diff --git a/backend/CSEgen.ml b/backend/CSEgen.ml
@@ -241,9 +241,9 @@ method class_of_operation op =
   | Iload(_,_,mut) -> Op_load mut
   | Istore(_,_,asg) -> Op_store asg
   | Ialloc _ | Ipoll _ -> assert false     (* treated specially *)
-  | Iintop(Icheckbound) -> Op_checkbound
+  | Iintop(Icheckbound|Icheckalign _) -> Op_checkbound
   | Iintop _ -> Op_pure
-  | Iintop_imm(Icheckbound, _) -> Op_checkbound
+  | Iintop_imm((Icheckbound|Icheckalign _), _) -> Op_checkbound
   | Iintop_imm(_, _) -> Op_pure
   | Iintop_atomic _ -> Op_store true
   | Icompf _

diff --git a/backend/amd64/emit.mlp b/backend/amd64/emit.mlp
@@ -423,47 +423,72 @@ let emit_local_realloc lr =
   emit_call (Cmm.global_symbol "caml_call_local_realloc");
   I.jmp (label lr.lr_return_lbl)
 
-(* Record calls to caml_ml_array_bound_error.
-   In -g mode we maintain one call to
-   caml_ml_array_bound_error per bound check site.  Without -g, we can share
+(* Record calls to caml_ml_array_bound_error and caml_ml_array_align_error.
+   In -g mode we maintain one call per bound check site.  Without -g, we can share
    a single call. *)
 
-type bound_error_call =
-  { bd_lbl: label;                      (* Entry label *)
-    bd_frame: label;                    (* Label of frame descriptor *)
-    bd_dbg: Debuginfo.t;
-    (* As for [gc_call]. *)
+type safety_check = Bound_check | Align_check
+
+type safety_check_failure = {
+    sc_lbl: label;              (* Entry label *)
+    sc_frame: label;            (* Label of frame descriptor *)
+    sc_dbg: Debuginfo.t;        (* As for [gc_call]. *)
+  }
+
+type safety_check_sites = {
+    mutable sc_sites: safety_check_failure list;
+    mutable sc_call: label;
   }
 
-let bound_error_sites = ref ([] : bound_error_call list)
-let bound_error_call = ref 0
+let bound_checks = { sc_sites = []; sc_call = 0 }
+let align_checks = { sc_sites = []; sc_call = 0 }
 
-let bound_error_label fdo dbg =
+let safety_check_failure_label kind fdo dbg =
   if !Clflags.debug then begin
-    let lbl_bound_error = new_label() in
+    let lbl_error = new_label() in
     let lbl_frame = record_frame_label Reg.Set.empty (Dbg_other dbg) in
-    bound_error_sites :=
-      { bd_lbl = lbl_bound_error;
-        bd_frame = lbl_frame;
-        bd_dbg = dbg;
-      } :: !bound_error_sites;
-    lbl_bound_error
+    let info =
+      { sc_lbl = lbl_error;
+        sc_frame = lbl_frame;
+        sc_dbg = dbg } in
+    (match kind with
+    | Bound_check -> bound_checks.sc_sites <- info :: bound_checks.sc_sites
+    | Align_check -> align_checks.sc_sites <- info :: align_checks.sc_sites);
+    lbl_error
   end else begin
-    if !bound_error_call = 0 then bound_error_call := new_label();
-    !bound_error_call
+    match kind with
+    | Bound_check ->
+      if bound_checks.sc_call = 0 then bound_checks.sc_call <- new_label();
+      bound_checks.sc_call
+    | Align_check ->
+      if align_checks.sc_call = 0 then align_checks.sc_call <- new_label();
+      align_checks.sc_call
   end
 
-let emit_call_bound_error bd =
-  def_label bd.bd_lbl;
-  emit_debug_info bd.bd_dbg;
-  emit_call (Cmm.global_symbol "caml_ml_array_bound_error");
-  def_label bd.bd_frame
-
-let emit_call_bound_errors () =
-  List.iter emit_call_bound_error !bound_error_sites;
-  if !bound_error_call > 0 then begin
-    def_label !bound_error_call;
+let emit_call_safety_error kind sc =
+  def_label sc.sc_lbl;
+  emit_debug_info sc.sc_dbg;
+  (match kind with
+  | Bound_check -> emit_call (Cmm.global_symbol "caml_ml_array_bound_error")
+  | Align_check -> emit_call (Cmm.global_symbol "caml_ml_array_align_error"));
+  def_label sc.sc_frame
+
+let clear_safety_checks () =
+  bound_checks.sc_sites <- [];
+  bound_checks.sc_call <- 0;
+  align_checks.sc_sites <- [];
+  align_checks.sc_call <- 0
+
+let emit_call_safety_errors () =
+  List.iter (emit_call_safety_error Bound_check) bound_checks.sc_sites;
+  if bound_checks.sc_call > 0 then begin
+    def_label bound_checks.sc_call;
     emit_call (Cmm.global_symbol "caml_ml_array_bound_error")
+  end;
+  List.iter (emit_call_safety_error Align_check) align_checks.sc_sites;
+  if align_checks.sc_call > 0 then begin
+    def_label align_checks.sc_call;
+    emit_call (Cmm.global_symbol "caml_ml_array_align_error")
   end
 
 (* Record jump tables *)
@@ -1265,8 +1290,10 @@ let emit_instr fallthrough i =
           I.mov (addressing addr DWORD i 0) (res32 i 0)
       | Thirtytwo_signed ->
           I.movsxd (addressing addr DWORD i 0) dest
-      | Onetwentyeight ->
+      | Onetwentyeight_unaligned ->
           I.movupd (addressing addr VEC128 i 0) dest
+      | Onetwentyeight_aligned ->
+          I.movapd (addressing addr VEC128 i 0) dest
       | Single ->
           I.cvtss2sd (addressing addr REAL4 i 0) dest
       | Double ->
@@ -1282,8 +1309,10 @@ let emit_instr fallthrough i =
           I.mov (arg16 i 0) (addressing addr WORD i 1)
       | Thirtytwo_signed | Thirtytwo_unsigned ->
           I.mov (arg32 i 0) (addressing addr DWORD i 1)
-      | Onetwentyeight ->
+      | Onetwentyeight_unaligned ->
           I.movupd (arg i 0) (addressing addr VEC128 i 1)
+      | Onetwentyeight_aligned ->
+          I.movapd (arg i 0) (addressing addr VEC128 i 1)
       | Single ->
           I.cvtsd2ss (arg i 0) xmm15;
           I.movss xmm15 (addressing addr REAL4 i 1)
@@ -1367,12 +1396,20 @@ let emit_instr fallthrough i =
       I.cmp (int n) (arg i 0);
       I.set (cond cmp) al;
       I.movzx al (res i 0)
+  | Lop(Iintop (Icheckalign { bytes_pow2 })) ->
+      let lbl = safety_check_failure_label Align_check i.fdo i.dbg in
+      let mask = bytes_pow2 - 1 in
+      I.test (Imm (Int64.of_int mask)) (arg i 0);
+      I.jne (label lbl)
+  | Lop(Iintop_imm(Icheckalign { bytes_pow2 }, n)) ->
+      let mask = bytes_pow2 - 1 in
+      if (n land mask) <> 0 then Misc.fatal_errorf "Alignment check on known int failed."
   | Lop(Iintop (Icheckbound)) ->
-      let lbl = bound_error_label i.fdo i.dbg in
+      let lbl = safety_check_failure_label Bound_check i.fdo i.dbg in
       I.cmp (arg i 1) (arg i 0);
       I.jbe (label lbl)
   | Lop(Iintop_imm(Icheckbound, n)) ->
-      let lbl = bound_error_label i.fdo i.dbg in
+      let lbl = safety_check_failure_label Bound_check i.fdo i.dbg in
       I.cmp (int n) (arg i 0);
       I.jbe (label lbl)
   | Lop(Iintop_imm (Iand, n)) when n >= 0 && n <= 0xFFFF_FFFF && Reg.is_reg i.res.(0) ->
@@ -1747,8 +1784,7 @@ let fundecl fundecl =
   stack_offset := 0;
   call_gc_sites := [];
   local_realloc_sites := [];
-  bound_error_sites := [];
-  bound_error_call := 0;
+  clear_safety_checks ();
   for i = 0 to Proc.num_stack_slot_classes - 1 do
     num_stack_slots.(i) <- fundecl.fun_num_stack_slots.(i);
   done;
@@ -1777,7 +1813,7 @@ let fundecl fundecl =
   emit_all true fundecl.fun_body;
   List.iter emit_call_gc !call_gc_sites;
   List.iter emit_local_realloc !local_realloc_sites;
-  emit_call_bound_errors ();
+  emit_call_safety_errors ();
   if !frame_required then begin
     let n = frame_size() - 8 - (if fp then 8 else 0) in
     if n <> 0

diff --git a/backend/amd64/proc.ml b/backend/amd64/proc.ml
@@ -438,14 +438,14 @@ let destroyed_at_oper = function
                  | Ifloat_iround | Ifloat_min | Ifloat_max
                  | Ifloatarithmem (_, _) | Ibswap _ | Ifloatsqrtf _))
   | Iop(Iintop(Iadd | Isub | Imul | Iand | Ior | Ixor | Ilsl | Ilsr | Iasr
-              | Ipopcnt | Iclz _ | Ictz _ | Icheckbound))
+              | Ipopcnt | Iclz _ | Ictz _ | Icheckbound | Icheckalign _))
   | Iop(Iintop_imm((Iadd | Isub | Imul | Imulh _ | Iand | Ior | Ixor | Ilsl
                    | Ilsr | Iasr | Ipopcnt | Iclz _ | Ictz _
-                   | Icheckbound),_))
+                   | Icheckbound | Icheckalign _),_))
   | Iop(Iintop_atomic _)
   | Iop(Istore((Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed
                | Thirtytwo_unsigned | Thirtytwo_signed | Word_int | Word_val
-               | Double | Onetwentyeight ), _, _))
+               | Double | Onetwentyeight_aligned | Onetwentyeight_unaligned), _, _))
   | Iop(Imove | Ispill | Ireload | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf
        | Icompf _
        | Icsel _
@@ -484,15 +484,16 @@ let destroyed_at_basic (basic : Cfg_intf.S.basic) =
     [| rax |]
   | Op (Specific (Irdtsc | Irdpmc)) ->
     [| rax; rdx |]
-  | Op (Intop Icheckbound | Intop_imm (Icheckbound, _)) ->
+  | Op (Intop (Icheckbound | Icheckalign _)
+  | Intop_imm ((Icheckbound | Icheckalign _), _)) ->
     assert false
   | Op (Move | Spill | Reload
        | Const_int _ | Const_float _ | Const_symbol _ | Const_vec128 _
        | Stackoffset _
        | Load _ | Store ((Byte_unsigned | Byte_signed | Sixteen_unsigned
                          | Sixteen_signed | Thirtytwo_unsigned
                          | Thirtytwo_signed | Word_int | Word_val
-                         | Double | Onetwentyeight ), _, _)
+                         | Double | Onetwentyeight_aligned | Onetwentyeight_unaligned), _, _)
        | Intop (Iadd | Isub | Imul | Iand | Ior | Ixor | Ilsl | Ilsr
                | Iasr | Ipopcnt | Iclz _ | Ictz _)
        | Intop_imm ((Iadd | Isub | Imul | Imulh _ | Iand | Ior | Ixor
@@ -528,7 +529,7 @@ let destroyed_at_terminator (terminator : Cfg_intf.S.terminator) =
     destroyed_at_alloc_or_poll
   | Always _ | Parity_test _ | Truth_test _ | Float_test _ | Int_test _
   | Return | Raise _ | Tailcall_self  _ | Tailcall_func _
-  | Prim {op = Checkbound _ | Probe _; _}
+  | Prim {op = Checkbound _ | Checkalign _ | Probe _; _}
   ->
     if fp then [| rbp |] else [||]
   | Switch _ ->
@@ -561,7 +562,7 @@ let is_destruction_point ~(more_destruction_points : bool) (terminator : Cfg_int
     false
   | Always _ | Parity_test _ | Truth_test _ | Float_test _ | Int_test _
   | Return | Raise _ | Tailcall_self  _ | Tailcall_func _
-  | Prim {op = Checkbound _ | Probe _; _} ->
+  | Prim {op = (Checkbound _ | Checkalign _) | Probe _; _} ->
     false
   | Switch _ ->
     false
@@ -621,13 +622,13 @@ let max_register_pressure =
   | Istore(Single, _, _) | Icompf _ ->
     consumes ~int:0 ~float:1
   | Iintop(Iadd | Isub | Imul | Imulh _ | Iand | Ior | Ixor | Ilsl | Ilsr | Iasr
-           | Ipopcnt|Iclz _| Ictz _|Icheckbound)
+           | Ipopcnt|Iclz _| Ictz _|Icheckbound|Icheckalign _)
   | Iintop_imm((Iadd | Isub | Imul | Imulh _ | Iand | Ior | Ixor | Ilsl | Ilsr
-                | Iasr | Ipopcnt | Iclz _| Ictz _|Icheckbound), _)
+                | Iasr | Ipopcnt | Iclz _| Ictz _|Icheckbound|Icheckalign _), _)
   | Iintop_atomic _
   | Istore((Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed
             | Thirtytwo_unsigned | Thirtytwo_signed | Word_int | Word_val
-            | Double | Onetwentyeight ),
+            | Double | Onetwentyeight_aligned | Onetwentyeight_unaligned),
             _, _)
   | Imove | Ispill | Ireload | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf
   | Icsel _
@@ -734,6 +735,7 @@ let operation_supported = function
   | Ccmpf _
   | Craise _
   | Ccheckbound
+  | Ccheckalign _
   | Cvectorcast _ | Cscalarcast _
   | Cprobe _ | Cprobe_is_enabled _ | Copaque | Cbeginregion | Cendregion
   | Ctuple_field _

diff --git a/backend/amd64/regalloc_stack_operands.ml b/backend/amd64/regalloc_stack_operands.ml
@@ -255,8 +255,8 @@ let basic (map : spilled_map) (instr : Cfg.basic Cfg.instruction) =
   | Prologue ->
     (* no rewrite *)
     May_still_have_spilled_registers
-  | Op (Intop Icheckbound)
-  | Op (Intop_imm ((Ipopcnt | Iclz _ | Ictz _ | Icheckbound), _)) ->
+  | Op (Intop (Icheckbound | Icheckalign _))
+  | Op (Intop_imm ((Ipopcnt | Iclz _ | Ictz _ | Icheckbound | Icheckalign _), _)) ->
     (* should not happen *)
     fatal "unexpected instruction"
   end
@@ -268,11 +268,16 @@ let terminator (map : spilled_map) (term : Cfg.terminator Cfg.instruction) =
   | Int_test { lt = _; eq = _; gt =_; is_signed = _; imm = None; }
   | Prim  {op = Checkbound { immediate = None; }; _} ->
     binary_operation map term No_result
+  | Prim  {op = Checkalign { immediate = None; _ }; _} ->
+    may_use_stack_operand_for_only_argument ~has_result:false map term
   | Int_test { lt = _; eq = _; gt =_; is_signed = _; imm = Some _; }
   | Parity_test { ifso = _; ifnot = _; }
   | Truth_test { ifso = _; ifnot = _; }
   | Prim {op = Checkbound { immediate = Some _; }; _} ->
     may_use_stack_operand_for_only_argument ~has_result:false map term
+  | Prim {op = Checkalign { immediate = Some _; _ }; _} ->
+    if debug then check_lengths term ~of_arg:0 ~of_res:0;
+    All_spilled_registers_rewritten
   | Float_test _ ->
     (* CR-someday xclerc for xclerc: this could be optimized, but the representation
        makes it more difficult than the cases above, because (i) multiple

diff --git a/backend/amd64/reload.ml b/backend/amd64/reload.ml
@@ -76,7 +76,7 @@ inherit Reloadgen.reload_generic as super
 
 method! reload_operation op arg res =
   match op with
-  | Iintop(Iadd|Isub|Iand|Ior|Ixor|Icheckbound) ->
+  | Iintop(Iadd|Isub|Iand|Ior|Ixor|Icheckbound|Icheckalign _) ->
       (* One of the two arguments can reside in the stack, but not both *)
       if stackp arg.(0) && stackp arg.(1)
       then ([|arg.(0); self#makereg arg.(1)|], res)
@@ -110,7 +110,7 @@ method! reload_operation op arg res =
       arg, res
   | Iintop(Imulh _ | Idiv | Imod | Ilsl | Ilsr | Iasr)
   | Iintop_imm((Iadd | Isub | Iand | Ior | Ixor | Ilsl | Ilsr | Iasr
-               | Imulh _ | Idiv | Imod | Icheckbound), _) ->
+               | Imulh _ | Idiv | Imod | Icheckbound | Icheckalign _), _) ->
       (* The argument(s) and results can be either in register or on stack *)
       (* Note: Imulh, Idiv, Imod: arg(0) and res(0) already forced in regs
                Ilsl, Ilsr, Iasr: arg(1) already forced in regs *)

diff --git a/backend/amd64/selection.ml b/backend/amd64/selection.ml
@@ -163,8 +163,8 @@ let pseudoregs_for_operation op arg res =
     arg.(len-1) <- res.(0);
     (arg, res)
   (* Other instructions are regular *)
-  | Iintop (Ipopcnt|Iclz _|Ictz _|Icomp _|Icheckbound)
-  | Iintop_imm ((Imulh _|Idiv|Imod|Icomp _|Icheckbound
+  | Iintop (Ipopcnt|Iclz _|Ictz _|Icomp _|Icheckbound|Icheckalign _)
+  | Iintop_imm ((Imulh _|Idiv|Imod|Icomp _|Icheckbound|Icheckalign _
                 |Ipopcnt|Iclz _|Ictz _), _)
   | Ispecific (Isqrtf|Isextend32|Izextend32|Ilea _|Istore_int (_, _, _)
               |Ifloat_iround|Ifloat_round _

diff --git a/backend/arm64/emit.mlp b/backend/arm64/emit.mlp
@@ -743,7 +743,7 @@ let emit_instr i =
     | Lop(Iintop_atomic _) ->
       (* Never generated; builtins are not yet translated to atomics *)
       assert false
-    | Lop(Ivectorcast _ | Iscalarcast _) -> 
+    | Lop(Ivectorcast _ | Iscalarcast _) ->
       (* Never generated; SIMD instructions are not yet translated *)
       assert false
     | Lop(Imove | Ispill | Ireload) ->
@@ -829,8 +829,9 @@ let emit_instr i =
             `	fcvt	{emit_reg dst}, s7\n`
         | Word_int | Word_val | Double ->
             `	ldr	{emit_reg dst}, {emit_addressing addr base}\n`
-        (* CR mslater: (SIMD) arm64 *)
-        | Onetwentyeight -> fatal_error "arm64: got 128 bit memory chunk"
+        | Onetwentyeight_aligned | Onetwentyeight_unaligned ->
+            (* CR mslater: (SIMD) arm64 *)
+            fatal_error "arm64: got 128 bit memory chunk"
         end
     | Lop(Istore(size, addr, _)) ->
         let src = i.arg.(0) in
@@ -853,8 +854,9 @@ let emit_instr i =
             `	str	s7, {emit_addressing addr base}\n`;
         | Word_int | Word_val | Double ->
             `	str	{emit_reg src}, {emit_addressing addr base}\n`
-        (* CR mslater: (SIMD) arm64 *)
-        | Onetwentyeight -> fatal_error "arm64: got 128 bit memory chunk"
+        | Onetwentyeight_aligned | Onetwentyeight_unaligned ->
+            (* CR mslater: (SIMD) arm64 *)
+            fatal_error "arm64: got 128 bit memory chunk"
         end
     | Lop(Ialloc { bytes = n; dbginfo; mode = Alloc_heap }) ->
         assembly_code_for_allocation i ~n ~far:false ~dbginfo
@@ -880,6 +882,9 @@ let emit_instr i =
     | Lop(Iintop_imm(Icomp cmp, n)) ->
         emit_cmpimm i.arg.(0) n;
         `	cset	{emit_reg i.res.(0)}, {emit_string (name_for_comparison cmp)}\n`
+    | Lop(Iintop (Icheckalign _) | Iintop_imm(Icheckalign _, _)) ->
+        (* CR mslater: (SIMD) arm64 *)
+        fatal_error "arm64: got 128 bit alignment check"
     | Lop(Iintop (Icheckbound)) ->
         let lbl = bound_error_label i.dbg in
         `	cmp	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;

diff --git a/backend/arm64/proc.ml b/backend/arm64/proc.ml
@@ -351,7 +351,7 @@ let destroyed_at_terminator (terminator : Cfg_intf.S.terminator) =
     [| reg_x8 |]
   | Always _ | Parity_test _ | Truth_test _ | Float_test _
   | Int_test _ | Switch _ | Return | Raise _ | Tailcall_self _
-  | Tailcall_func _ | Prim {op = Checkbound _ | Probe _; _}
+  | Tailcall_func _ | Prim {op = (Checkbound _ | Checkalign _) | Probe _; _}
   | Specific_can_raise _ ->
     [||]
   | Call_no_return { func_symbol = _; alloc; ty_res = _; ty_args = _; }
@@ -373,7 +373,7 @@ let is_destruction_point ~(more_destruction_points : bool) (terminator : Cfg_int
     false
   | Always _ | Parity_test _ | Truth_test _ | Float_test _
   | Int_test _ | Switch _ | Return | Raise _ | Tailcall_self _
-  | Tailcall_func _ | Prim {op = Checkbound _ | Probe _; _}
+  | Tailcall_func _ | Prim {op = (Checkbound _ | Checkalign _) | Probe _; _}
   | Specific_can_raise _ ->
     false
   | Call_no_return { func_symbol = _; alloc; ty_res = _; ty_args = _; }
@@ -436,6 +436,7 @@ let operation_supported = function
   | Cclz _ | Cctz _ | Cpopcnt
   | Cprefetch _ | Catomic _
   | Cvectorcast _ | Cscalarcast _
+  | Ccheckalign _
     -> false   (* Not implemented *)
   | Cbswap _
   | Capply _ | Cextcall _ | Cload _ | Calloc _ | Cstore _

diff --git a/backend/arm64/selection.ml b/backend/arm64/selection.ml
@@ -35,7 +35,8 @@ let is_offset chunk n =
     | Word_int | Word_val | Double ->
         n land 7 = 0 && n lsr 3 < 0x1000
     (* CR mslater: (SIMD) arm64 *)
-    | Onetwentyeight -> Misc.fatal_error "arm64: got 128 bit memory chunk")
+    | Onetwentyeight_aligned | Onetwentyeight_unaligned ->
+        Misc.fatal_error "arm64: got 128 bit memory chunk")
 
 let is_logical_immediate n =
   Arch.is_logical_immediate (Nativeint.of_int n)