Skip to content

128-bit Array Load/Store #1682

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 77 commits into from
Oct 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
77 commits
Select commit Hold shift + click to select a range
bf8af43
squash for review
TheNumbat Jul 28, 2023
a8994c2
restore consts test
TheNumbat Jul 28, 2023
a4b2c11
squash for review
TheNumbat Jul 28, 2023
72e32cc
masm fix
TheNumbat Jul 28, 2023
41dcda3
Merge branch 'simd-intrins' into simd-intrins2
TheNumbat Jul 28, 2023
06ff1cc
merge
TheNumbat Jul 28, 2023
a906a01
Merge branch 'simd-intrins' into simd-intrins2
TheNumbat Jul 28, 2023
c115d80
int64x2 tests
TheNumbat Jul 28, 2023
8129ee7
int32x4 tests
TheNumbat Jul 28, 2023
377662c
most int16x8 tests
TheNumbat Jul 28, 2023
b832bc4
finish int16x8 tests
TheNumbat Jul 31, 2023
248a6e5
int8x16 tests
TheNumbat Jul 31, 2023
0e2111a
utility tests
TheNumbat Jul 31, 2023
b90063b
string instrs + tests
TheNumbat Jul 31, 2023
cfbe737
add + test exotic ops
TheNumbat Jul 31, 2023
f65e53e
Update simd.ml
TheNumbat Jul 31, 2023
0259b48
address comments
TheNumbat Aug 1, 2023
3de0702
merge
TheNumbat Aug 1, 2023
7f8cf23
remove arg duping
TheNumbat Aug 1, 2023
8503711
add warnings
TheNumbat Aug 1, 2023
c9ddc5c
more warnings
TheNumbat Aug 1, 2023
b1b6c29
merge
TheNumbat Aug 1, 2023
fa668e9
allow f64 cast either stack op
TheNumbat Aug 1, 2023
99da902
add real unop case
TheNumbat Aug 1, 2023
2878f5f
Merge branch 'simd-intrins' into simd-intrins2
TheNumbat Aug 1, 2023
149bb72
Update backend/amd64/regalloc_stack_operands.ml
TheNumbat Aug 2, 2023
18a16e7
Merge branch 'simd-intrins' into simd-intrins2
TheNumbat Aug 2, 2023
f7a0bf6
Merge branch 'main' into simd-intrins
TheNumbat Aug 10, 2023
dcc61fb
Merge branch 'simd-intrins' into simd-intrins2
TheNumbat Aug 10, 2023
f01e32b
squash
TheNumbat Aug 11, 2023
00d5ca4
Merge branch 'main' into simd-intrins
TheNumbat Aug 14, 2023
d958cf0
Merge branch 'simd-intrins' into simd-intrins2
TheNumbat Aug 14, 2023
136e5e8
merge
TheNumbat Aug 14, 2023
2655421
error on string/bytes safe aligned case (not exposed)
TheNumbat Aug 14, 2023
be5a927
print
TheNumbat Aug 14, 2023
f9d8b00
fix dbg info order?
TheNumbat Aug 14, 2023
63930e7
remove unaligned access on string/bytes; assume ba is 16b aligned fro…
TheNumbat Aug 18, 2023
f509ee4
Merge branch 'main' into simd-intrins
TheNumbat Aug 22, 2023
c256097
Update middle_end/flambda2/from_lambda/lambda_to_flambda_primitives.ml
TheNumbat Aug 22, 2023
3f7cfe7
address comments
TheNumbat Aug 22, 2023
a0c197f
Merge branch 'simd-array-ops' of https://github.com/ocaml-flambda/fla…
TheNumbat Aug 22, 2023
5d4febc
Merge branch 'simd-intrins' into simd-intrins2
TheNumbat Aug 22, 2023
ecdc256
merge
TheNumbat Aug 22, 2023
6a237e2
add simd class_of_operation
TheNumbat Aug 23, 2023
659a44d
classes
TheNumbat Aug 23, 2023
1baf8ae
format
TheNumbat Aug 23, 2023
208e1f8
merge
TheNumbat Aug 23, 2023
2bdb8c0
merge
TheNumbat Aug 23, 2023
83a1bc7
Merge branch 'simd-intrins2' into simd-array-ops
TheNumbat Aug 23, 2023
751953e
random whitespace?
TheNumbat Aug 23, 2023
c4ac43c
improve tests
TheNumbat Aug 24, 2023
f61f493
closure check bound before align
TheNumbat Aug 24, 2023
4b011ab
Update backend/amd64/simd.ml
TheNumbat Aug 24, 2023
5924bcc
Merge branch 'simd-intrins' into simd-intrins2
TheNumbat Aug 24, 2023
74a4a5a
Merge branch 'simd-intrins2' into simd-array-ops
TheNumbat Aug 24, 2023
b951c38
merge
TheNumbat Aug 25, 2023
3c4ae41
Merge branch 'simd-intrins2' into simd-array-ops
TheNumbat Aug 25, 2023
298a8ac
address comments
TheNumbat Oct 9, 2023
1cb257e
address comments
TheNumbat Oct 9, 2023
ec8022d
address comments
TheNumbat Oct 9, 2023
9c464be
address comments
TheNumbat Oct 9, 2023
cb068c7
address comments
TheNumbat Oct 9, 2023
ed95a6c
Merge branch 'main' into simd-intrins2
TheNumbat Oct 9, 2023
29c8d36
merge
TheNumbat Oct 9, 2023
2c71f7b
use noexc for simd rounding codes
TheNumbat Oct 9, 2023
8ea31d5
Merge branch 'simd-intrins2' into simd-array-ops
TheNumbat Oct 9, 2023
18270cd
alignment comments
TheNumbat Oct 10, 2023
fa64161
Apply suggestions from code review
TheNumbat Oct 10, 2023
6e0fd67
simd_reload.ml
TheNumbat Oct 10, 2023
382aa46
Merge branch 'simd-intrins2' into simd-array-ops
TheNumbat Oct 10, 2023
55299d3
address comments
TheNumbat Oct 10, 2023
2e01e07
address comments
TheNumbat Oct 10, 2023
5fcef97
restore res/arg check
TheNumbat Oct 11, 2023
adba9cb
Merge branch 'simd-intrins2' of https://github.com/ocaml-flambda/flam…
TheNumbat Oct 11, 2023
72be21f
Merge branch 'simd-intrins2' into simd-array-ops
TheNumbat Oct 11, 2023
1654808
Merge branch 'main' into simd-array-ops
TheNumbat Oct 12, 2023
ecfac2c
label args
TheNumbat Oct 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions backend/CSEgen.ml
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,9 @@ method class_of_operation op =
| Iload(_,_,mut) -> Op_load mut
| Istore(_,_,asg) -> Op_store asg
| Ialloc _ | Ipoll _ -> assert false (* treated specially *)
| Iintop(Icheckbound) -> Op_checkbound
| Iintop(Icheckbound|Icheckalign _) -> Op_checkbound
| Iintop _ -> Op_pure
| Iintop_imm(Icheckbound, _) -> Op_checkbound
| Iintop_imm((Icheckbound|Icheckalign _), _) -> Op_checkbound
| Iintop_imm(_, _) -> Op_pure
| Iintop_atomic _ -> Op_store true
| Icompf _
Expand Down
110 changes: 73 additions & 37 deletions backend/amd64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -423,47 +423,72 @@ let emit_local_realloc lr =
emit_call (Cmm.global_symbol "caml_call_local_realloc");
I.jmp (label lr.lr_return_lbl)

(* Record calls to caml_ml_array_bound_error.
In -g mode we maintain one call to
caml_ml_array_bound_error per bound check site. Without -g, we can share
(* Record calls to caml_ml_array_bound_error and caml_ml_array_align_error.
In -g mode we maintain one call per bound check site. Without -g, we can share
a single call. *)

type bound_error_call =
{ bd_lbl: label; (* Entry label *)
bd_frame: label; (* Label of frame descriptor *)
bd_dbg: Debuginfo.t;
(* As for [gc_call]. *)
type safety_check = Bound_check | Align_check

type safety_check_failure = {
sc_lbl: label; (* Entry label *)
sc_frame: label; (* Label of frame descriptor *)
sc_dbg: Debuginfo.t; (* As for [gc_call]. *)
}

type safety_check_sites = {
mutable sc_sites: safety_check_failure list;
mutable sc_call: label;
}

let bound_error_sites = ref ([] : bound_error_call list)
let bound_error_call = ref 0
let bound_checks = { sc_sites = []; sc_call = 0 }
let align_checks = { sc_sites = []; sc_call = 0 }

let bound_error_label fdo dbg =
let safety_check_failure_label kind fdo dbg =
if !Clflags.debug then begin
let lbl_bound_error = new_label() in
let lbl_error = new_label() in
let lbl_frame = record_frame_label Reg.Set.empty (Dbg_other dbg) in
bound_error_sites :=
{ bd_lbl = lbl_bound_error;
bd_frame = lbl_frame;
bd_dbg = dbg;
} :: !bound_error_sites;
lbl_bound_error
let info =
{ sc_lbl = lbl_error;
sc_frame = lbl_frame;
sc_dbg = dbg } in
(match kind with
| Bound_check -> bound_checks.sc_sites <- info :: bound_checks.sc_sites
| Align_check -> align_checks.sc_sites <- info :: align_checks.sc_sites);
lbl_error
end else begin
if !bound_error_call = 0 then bound_error_call := new_label();
!bound_error_call
match kind with
| Bound_check ->
if bound_checks.sc_call = 0 then bound_checks.sc_call <- new_label();
bound_checks.sc_call
| Align_check ->
if align_checks.sc_call = 0 then align_checks.sc_call <- new_label();
align_checks.sc_call
end

let emit_call_bound_error bd =
def_label bd.bd_lbl;
emit_debug_info bd.bd_dbg;
emit_call (Cmm.global_symbol "caml_ml_array_bound_error");
def_label bd.bd_frame

let emit_call_bound_errors () =
List.iter emit_call_bound_error !bound_error_sites;
if !bound_error_call > 0 then begin
def_label !bound_error_call;
let emit_call_safety_error kind sc =
def_label sc.sc_lbl;
emit_debug_info sc.sc_dbg;
(match kind with
| Bound_check -> emit_call (Cmm.global_symbol "caml_ml_array_bound_error")
| Align_check -> emit_call (Cmm.global_symbol "caml_ml_array_align_error"));
def_label sc.sc_frame

let clear_safety_checks () =
bound_checks.sc_sites <- [];
bound_checks.sc_call <- 0;
align_checks.sc_sites <- [];
align_checks.sc_call <- 0

let emit_call_safety_errors () =
List.iter (emit_call_safety_error Bound_check) bound_checks.sc_sites;
if bound_checks.sc_call > 0 then begin
def_label bound_checks.sc_call;
emit_call (Cmm.global_symbol "caml_ml_array_bound_error")
end;
List.iter (emit_call_safety_error Align_check) align_checks.sc_sites;
if align_checks.sc_call > 0 then begin
def_label align_checks.sc_call;
emit_call (Cmm.global_symbol "caml_ml_array_align_error")
end

(* Record jump tables *)
Expand Down Expand Up @@ -1265,8 +1290,10 @@ let emit_instr fallthrough i =
I.mov (addressing addr DWORD i 0) (res32 i 0)
| Thirtytwo_signed ->
I.movsxd (addressing addr DWORD i 0) dest
| Onetwentyeight ->
| Onetwentyeight_unaligned ->
I.movupd (addressing addr VEC128 i 0) dest
| Onetwentyeight_aligned ->
I.movapd (addressing addr VEC128 i 0) dest
| Single ->
I.cvtss2sd (addressing addr REAL4 i 0) dest
| Double ->
Expand All @@ -1282,8 +1309,10 @@ let emit_instr fallthrough i =
I.mov (arg16 i 0) (addressing addr WORD i 1)
| Thirtytwo_signed | Thirtytwo_unsigned ->
I.mov (arg32 i 0) (addressing addr DWORD i 1)
| Onetwentyeight ->
| Onetwentyeight_unaligned ->
I.movupd (arg i 0) (addressing addr VEC128 i 1)
| Onetwentyeight_aligned ->
I.movapd (arg i 0) (addressing addr VEC128 i 1)
| Single ->
I.cvtsd2ss (arg i 0) xmm15;
I.movss xmm15 (addressing addr REAL4 i 1)
Expand Down Expand Up @@ -1367,12 +1396,20 @@ let emit_instr fallthrough i =
I.cmp (int n) (arg i 0);
I.set (cond cmp) al;
I.movzx al (res i 0)
| Lop(Iintop (Icheckalign { bytes_pow2 })) ->
let lbl = safety_check_failure_label Align_check i.fdo i.dbg in
let mask = bytes_pow2 - 1 in
I.test (Imm (Int64.of_int mask)) (arg i 0);
I.jne (label lbl)
| Lop(Iintop_imm(Icheckalign { bytes_pow2 }, n)) ->
let mask = bytes_pow2 - 1 in
if (n land mask) <> 0 then Misc.fatal_errorf "Alignment check on known int failed."
| Lop(Iintop (Icheckbound)) ->
let lbl = bound_error_label i.fdo i.dbg in
let lbl = safety_check_failure_label Bound_check i.fdo i.dbg in
I.cmp (arg i 1) (arg i 0);
I.jbe (label lbl)
| Lop(Iintop_imm(Icheckbound, n)) ->
let lbl = bound_error_label i.fdo i.dbg in
let lbl = safety_check_failure_label Bound_check i.fdo i.dbg in
I.cmp (int n) (arg i 0);
I.jbe (label lbl)
| Lop(Iintop_imm (Iand, n)) when n >= 0 && n <= 0xFFFF_FFFF && Reg.is_reg i.res.(0) ->
Expand Down Expand Up @@ -1747,8 +1784,7 @@ let fundecl fundecl =
stack_offset := 0;
call_gc_sites := [];
local_realloc_sites := [];
bound_error_sites := [];
bound_error_call := 0;
clear_safety_checks ();
for i = 0 to Proc.num_stack_slot_classes - 1 do
num_stack_slots.(i) <- fundecl.fun_num_stack_slots.(i);
done;
Expand Down Expand Up @@ -1777,7 +1813,7 @@ let fundecl fundecl =
emit_all true fundecl.fun_body;
List.iter emit_call_gc !call_gc_sites;
List.iter emit_local_realloc !local_realloc_sites;
emit_call_bound_errors ();
emit_call_safety_errors ();
if !frame_required then begin
let n = frame_size() - 8 - (if fp then 8 else 0) in
if n <> 0
Expand Down
22 changes: 12 additions & 10 deletions backend/amd64/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -438,14 +438,14 @@ let destroyed_at_oper = function
| Ifloat_iround | Ifloat_min | Ifloat_max
| Ifloatarithmem (_, _) | Ibswap _ | Ifloatsqrtf _))
| Iop(Iintop(Iadd | Isub | Imul | Iand | Ior | Ixor | Ilsl | Ilsr | Iasr
| Ipopcnt | Iclz _ | Ictz _ | Icheckbound))
| Ipopcnt | Iclz _ | Ictz _ | Icheckbound | Icheckalign _))
| Iop(Iintop_imm((Iadd | Isub | Imul | Imulh _ | Iand | Ior | Ixor | Ilsl
| Ilsr | Iasr | Ipopcnt | Iclz _ | Ictz _
| Icheckbound),_))
| Icheckbound | Icheckalign _),_))
| Iop(Iintop_atomic _)
| Iop(Istore((Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed
| Thirtytwo_unsigned | Thirtytwo_signed | Word_int | Word_val
| Double | Onetwentyeight ), _, _))
| Double | Onetwentyeight_aligned | Onetwentyeight_unaligned), _, _))
| Iop(Imove | Ispill | Ireload | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf
| Icompf _
| Icsel _
Expand Down Expand Up @@ -484,15 +484,16 @@ let destroyed_at_basic (basic : Cfg_intf.S.basic) =
[| rax |]
| Op (Specific (Irdtsc | Irdpmc)) ->
[| rax; rdx |]
| Op (Intop Icheckbound | Intop_imm (Icheckbound, _)) ->
| Op (Intop (Icheckbound | Icheckalign _)
| Intop_imm ((Icheckbound | Icheckalign _), _)) ->
assert false
| Op (Move | Spill | Reload
| Const_int _ | Const_float _ | Const_symbol _ | Const_vec128 _
| Stackoffset _
| Load _ | Store ((Byte_unsigned | Byte_signed | Sixteen_unsigned
| Sixteen_signed | Thirtytwo_unsigned
| Thirtytwo_signed | Word_int | Word_val
| Double | Onetwentyeight ), _, _)
| Double | Onetwentyeight_aligned | Onetwentyeight_unaligned), _, _)
| Intop (Iadd | Isub | Imul | Iand | Ior | Ixor | Ilsl | Ilsr
| Iasr | Ipopcnt | Iclz _ | Ictz _)
| Intop_imm ((Iadd | Isub | Imul | Imulh _ | Iand | Ior | Ixor
Expand Down Expand Up @@ -528,7 +529,7 @@ let destroyed_at_terminator (terminator : Cfg_intf.S.terminator) =
destroyed_at_alloc_or_poll
| Always _ | Parity_test _ | Truth_test _ | Float_test _ | Int_test _
| Return | Raise _ | Tailcall_self _ | Tailcall_func _
| Prim {op = Checkbound _ | Probe _; _}
| Prim {op = Checkbound _ | Checkalign _ | Probe _; _}
->
if fp then [| rbp |] else [||]
| Switch _ ->
Expand Down Expand Up @@ -561,7 +562,7 @@ let is_destruction_point ~(more_destruction_points : bool) (terminator : Cfg_int
false
| Always _ | Parity_test _ | Truth_test _ | Float_test _ | Int_test _
| Return | Raise _ | Tailcall_self _ | Tailcall_func _
| Prim {op = Checkbound _ | Probe _; _} ->
| Prim {op = (Checkbound _ | Checkalign _) | Probe _; _} ->
false
| Switch _ ->
false
Expand Down Expand Up @@ -621,13 +622,13 @@ let max_register_pressure =
| Istore(Single, _, _) | Icompf _ ->
consumes ~int:0 ~float:1
| Iintop(Iadd | Isub | Imul | Imulh _ | Iand | Ior | Ixor | Ilsl | Ilsr | Iasr
| Ipopcnt|Iclz _| Ictz _|Icheckbound)
| Ipopcnt|Iclz _| Ictz _|Icheckbound|Icheckalign _)
| Iintop_imm((Iadd | Isub | Imul | Imulh _ | Iand | Ior | Ixor | Ilsl | Ilsr
| Iasr | Ipopcnt | Iclz _| Ictz _|Icheckbound), _)
| Iasr | Ipopcnt | Iclz _| Ictz _|Icheckbound|Icheckalign _), _)
| Iintop_atomic _
| Istore((Byte_unsigned | Byte_signed | Sixteen_unsigned | Sixteen_signed
| Thirtytwo_unsigned | Thirtytwo_signed | Word_int | Word_val
| Double | Onetwentyeight ),
| Double | Onetwentyeight_aligned | Onetwentyeight_unaligned),
_, _)
| Imove | Ispill | Ireload | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf
| Icsel _
Expand Down Expand Up @@ -734,6 +735,7 @@ let operation_supported = function
| Ccmpf _
| Craise _
| Ccheckbound
| Ccheckalign _
| Cvectorcast _ | Cscalarcast _
| Cprobe _ | Cprobe_is_enabled _ | Copaque | Cbeginregion | Cendregion
| Ctuple_field _
Expand Down
9 changes: 7 additions & 2 deletions backend/amd64/regalloc_stack_operands.ml
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,8 @@ let basic (map : spilled_map) (instr : Cfg.basic Cfg.instruction) =
| Prologue ->
(* no rewrite *)
May_still_have_spilled_registers
| Op (Intop Icheckbound)
| Op (Intop_imm ((Ipopcnt | Iclz _ | Ictz _ | Icheckbound), _)) ->
| Op (Intop (Icheckbound | Icheckalign _))
| Op (Intop_imm ((Ipopcnt | Iclz _ | Ictz _ | Icheckbound | Icheckalign _), _)) ->
(* should not happen *)
fatal "unexpected instruction"
end
Expand All @@ -268,11 +268,16 @@ let terminator (map : spilled_map) (term : Cfg.terminator Cfg.instruction) =
| Int_test { lt = _; eq = _; gt =_; is_signed = _; imm = None; }
| Prim {op = Checkbound { immediate = None; }; _} ->
binary_operation map term No_result
| Prim {op = Checkalign { immediate = None; _ }; _} ->
may_use_stack_operand_for_only_argument ~has_result:false map term
| Int_test { lt = _; eq = _; gt =_; is_signed = _; imm = Some _; }
| Parity_test { ifso = _; ifnot = _; }
| Truth_test { ifso = _; ifnot = _; }
| Prim {op = Checkbound { immediate = Some _; }; _} ->
may_use_stack_operand_for_only_argument ~has_result:false map term
| Prim {op = Checkalign { immediate = Some _; _ }; _} ->
if debug then check_lengths term ~of_arg:0 ~of_res:0;
All_spilled_registers_rewritten
| Float_test _ ->
(* CR-someday xclerc for xclerc: this could be optimized, but the representation
makes it more difficult than the cases above, because (i) multiple
Expand Down
4 changes: 2 additions & 2 deletions backend/amd64/reload.ml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ inherit Reloadgen.reload_generic as super

method! reload_operation op arg res =
match op with
| Iintop(Iadd|Isub|Iand|Ior|Ixor|Icheckbound) ->
| Iintop(Iadd|Isub|Iand|Ior|Ixor|Icheckbound|Icheckalign _) ->
(* One of the two arguments can reside in the stack, but not both *)
if stackp arg.(0) && stackp arg.(1)
then ([|arg.(0); self#makereg arg.(1)|], res)
Expand Down Expand Up @@ -110,7 +110,7 @@ method! reload_operation op arg res =
arg, res
| Iintop(Imulh _ | Idiv | Imod | Ilsl | Ilsr | Iasr)
| Iintop_imm((Iadd | Isub | Iand | Ior | Ixor | Ilsl | Ilsr | Iasr
| Imulh _ | Idiv | Imod | Icheckbound), _) ->
| Imulh _ | Idiv | Imod | Icheckbound | Icheckalign _), _) ->
(* The argument(s) and results can be either in register or on stack *)
(* Note: Imulh, Idiv, Imod: arg(0) and res(0) already forced in regs
Ilsl, Ilsr, Iasr: arg(1) already forced in regs *)
Expand Down
4 changes: 2 additions & 2 deletions backend/amd64/selection.ml
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,8 @@ let pseudoregs_for_operation op arg res =
arg.(len-1) <- res.(0);
(arg, res)
(* Other instructions are regular *)
| Iintop (Ipopcnt|Iclz _|Ictz _|Icomp _|Icheckbound)
| Iintop_imm ((Imulh _|Idiv|Imod|Icomp _|Icheckbound
| Iintop (Ipopcnt|Iclz _|Ictz _|Icomp _|Icheckbound|Icheckalign _)
| Iintop_imm ((Imulh _|Idiv|Imod|Icomp _|Icheckbound|Icheckalign _
|Ipopcnt|Iclz _|Ictz _), _)
| Ispecific (Isqrtf|Isextend32|Izextend32|Ilea _|Istore_int (_, _, _)
|Ifloat_iround|Ifloat_round _
Expand Down
15 changes: 10 additions & 5 deletions backend/arm64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@ let emit_instr i =
| Lop(Iintop_atomic _) ->
(* Never generated; builtins are not yet translated to atomics *)
assert false
| Lop(Ivectorcast _ | Iscalarcast _) ->
| Lop(Ivectorcast _ | Iscalarcast _) ->
(* Never generated; SIMD instructions are not yet translated *)
assert false
| Lop(Imove | Ispill | Ireload) ->
Expand Down Expand Up @@ -829,8 +829,9 @@ let emit_instr i =
` fcvt {emit_reg dst}, s7\n`
| Word_int | Word_val | Double ->
` ldr {emit_reg dst}, {emit_addressing addr base}\n`
(* CR mslater: (SIMD) arm64 *)
| Onetwentyeight -> fatal_error "arm64: got 128 bit memory chunk"
| Onetwentyeight_aligned | Onetwentyeight_unaligned ->
(* CR mslater: (SIMD) arm64 *)
fatal_error "arm64: got 128 bit memory chunk"
end
| Lop(Istore(size, addr, _)) ->
let src = i.arg.(0) in
Expand All @@ -853,8 +854,9 @@ let emit_instr i =
` str s7, {emit_addressing addr base}\n`;
| Word_int | Word_val | Double ->
` str {emit_reg src}, {emit_addressing addr base}\n`
(* CR mslater: (SIMD) arm64 *)
| Onetwentyeight -> fatal_error "arm64: got 128 bit memory chunk"
| Onetwentyeight_aligned | Onetwentyeight_unaligned ->
(* CR mslater: (SIMD) arm64 *)
fatal_error "arm64: got 128 bit memory chunk"
end
| Lop(Ialloc { bytes = n; dbginfo; mode = Alloc_heap }) ->
assembly_code_for_allocation i ~n ~far:false ~dbginfo
Expand All @@ -880,6 +882,9 @@ let emit_instr i =
| Lop(Iintop_imm(Icomp cmp, n)) ->
emit_cmpimm i.arg.(0) n;
` cset {emit_reg i.res.(0)}, {emit_string (name_for_comparison cmp)}\n`
| Lop(Iintop (Icheckalign _) | Iintop_imm(Icheckalign _, _)) ->
(* CR mslater: (SIMD) arm64 *)
fatal_error "arm64: got 128 bit alignment check"
| Lop(Iintop (Icheckbound)) ->
let lbl = bound_error_label i.dbg in
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
Expand Down
5 changes: 3 additions & 2 deletions backend/arm64/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ let destroyed_at_terminator (terminator : Cfg_intf.S.terminator) =
[| reg_x8 |]
| Always _ | Parity_test _ | Truth_test _ | Float_test _
| Int_test _ | Switch _ | Return | Raise _ | Tailcall_self _
| Tailcall_func _ | Prim {op = Checkbound _ | Probe _; _}
| Tailcall_func _ | Prim {op = (Checkbound _ | Checkalign _) | Probe _; _}
| Specific_can_raise _ ->
[||]
| Call_no_return { func_symbol = _; alloc; ty_res = _; ty_args = _; }
Expand All @@ -373,7 +373,7 @@ let is_destruction_point ~(more_destruction_points : bool) (terminator : Cfg_int
false
| Always _ | Parity_test _ | Truth_test _ | Float_test _
| Int_test _ | Switch _ | Return | Raise _ | Tailcall_self _
| Tailcall_func _ | Prim {op = Checkbound _ | Probe _; _}
| Tailcall_func _ | Prim {op = (Checkbound _ | Checkalign _) | Probe _; _}
| Specific_can_raise _ ->
false
| Call_no_return { func_symbol = _; alloc; ty_res = _; ty_args = _; }
Expand Down Expand Up @@ -436,6 +436,7 @@ let operation_supported = function
| Cclz _ | Cctz _ | Cpopcnt
| Cprefetch _ | Catomic _
| Cvectorcast _ | Cscalarcast _
| Ccheckalign _
-> false (* Not implemented *)
| Cbswap _
| Capply _ | Cextcall _ | Cload _ | Calloc _ | Cstore _
Expand Down
3 changes: 2 additions & 1 deletion backend/arm64/selection.ml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ let is_offset chunk n =
| Word_int | Word_val | Double ->
n land 7 = 0 && n lsr 3 < 0x1000
(* CR mslater: (SIMD) arm64 *)
| Onetwentyeight -> Misc.fatal_error "arm64: got 128 bit memory chunk")
| Onetwentyeight_aligned | Onetwentyeight_unaligned ->
Misc.fatal_error "arm64: got 128 bit memory chunk")

let is_logical_immediate n =
Arch.is_logical_immediate (Nativeint.of_int n)
Expand Down
Loading