Skip to content

Commit

Permalink
flambda-backend: 128-bit load/store primitives for GC'd arrays (#2247)
Browse files Browse the repository at this point in the history
  • Loading branch information
TheNumbat authored Feb 28, 2024
1 parent e787c94 commit 70819a5
Show file tree
Hide file tree
Showing 8 changed files with 280 additions and 2 deletions.
14 changes: 13 additions & 1 deletion bytecomp/bytegen.ml
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,12 @@ let preserve_tailcall_for_prim = function
| Pbytes_set_16 _ | Pbytes_set_32 _ | Pbytes_set_64 _ | Pbytes_set_128 _
| Pbigstring_load_16 _ | Pbigstring_load_32 _ | Pbigstring_load_64 _
| Pbigstring_load_128 _ | Pbigstring_set_16 _ | Pbigstring_set_32 _
| Pfloatarray_load_128 _ | Pfloat_array_load_128 _ | Pint_array_load_128 _
| Punboxed_float_array_load_128 _ | Punboxed_int32_array_load_128 _
| Punboxed_int64_array_load_128 _ | Punboxed_nativeint_array_load_128 _
| Pfloatarray_set_128 _ | Pfloat_array_set_128 _ | Pint_array_set_128 _
| Punboxed_float_array_set_128 _ | Punboxed_int32_array_set_128 _
| Punboxed_int64_array_set_128 _ | Punboxed_nativeint_array_set_128 _
| Pbigstring_set_64 _ | Pbigstring_set_128 _
| Pprobe_is_enabled _ | Pobj_dup
| Pctconst _ | Pbswap16 | Pbbswap _ | Pint_as_pointer _
Expand Down Expand Up @@ -593,7 +599,13 @@ let comp_primitive stack_info p sz args =
| Patomic_fetch_add -> Kccall("caml_atomic_fetch_add", 2)
| Pdls_get -> Kccall("caml_domain_dls_get", 1)
| Pstring_load_128 _ | Pbytes_load_128 _ | Pbytes_set_128 _
| Pbigstring_load_128 _ | Pbigstring_set_128 _ ->
| Pbigstring_load_128 _ | Pbigstring_set_128 _
| Pfloatarray_load_128 _ | Pfloat_array_load_128 _ | Pint_array_load_128 _
| Punboxed_float_array_load_128 _ | Punboxed_int32_array_load_128 _
| Punboxed_int64_array_load_128 _ | Punboxed_nativeint_array_load_128 _
| Pfloatarray_set_128 _ | Pfloat_array_set_128 _ | Pint_array_set_128 _
| Punboxed_float_array_set_128 _ | Punboxed_int32_array_set_128 _
| Punboxed_int64_array_set_128 _ | Punboxed_nativeint_array_set_128 _ ->
fatal_error "128-bit load/store is not supported in bytecode mode."
(* The cases below are handled in [comp_expr] before the [comp_primitive] call
(in the order in which they appear below),
Expand Down
46 changes: 45 additions & 1 deletion lambda/lambda.ml
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,21 @@ type primitive =
| Pbigstring_set_32 of { unsafe : bool; boxed : bool }
| Pbigstring_set_64 of { unsafe : bool; boxed : bool }
| Pbigstring_set_128 of { aligned : bool; unsafe : bool; boxed : bool }
(* load/set SIMD vectors in GC-managed arrays *)
| Pfloatarray_load_128 of { unsafe : bool; mode : alloc_mode }
| Pfloat_array_load_128 of { unsafe : bool; mode : alloc_mode }
| Pint_array_load_128 of { unsafe : bool; mode : alloc_mode }
| Punboxed_float_array_load_128 of { unsafe : bool; mode : alloc_mode }
| Punboxed_int32_array_load_128 of { unsafe : bool; mode : alloc_mode }
| Punboxed_int64_array_load_128 of { unsafe : bool; mode : alloc_mode }
| Punboxed_nativeint_array_load_128 of { unsafe : bool; mode : alloc_mode }
| Pfloatarray_set_128 of { unsafe : bool }
| Pfloat_array_set_128 of { unsafe : bool }
| Pint_array_set_128 of { unsafe : bool }
| Punboxed_float_array_set_128 of { unsafe : bool }
| Punboxed_int32_array_set_128 of { unsafe : bool }
| Punboxed_int64_array_set_128 of { unsafe : bool }
| Punboxed_nativeint_array_set_128 of { unsafe : bool }
(* Compile time constants *)
| Pctconst of compile_time_constant
(* byte swap *)
Expand Down Expand Up @@ -402,6 +417,12 @@ let equal_boxed_vector_size v1 v2 =
match v1, v2 with
| Pvec128 _, Pvec128 _ -> true

let compare_boxed_vector = Stdlib.compare

let print_boxed_vector ppf t =
match t with
| Pvec128 v -> Format.pp_print_string ppf (vec128_name v)

let join_vec128_types v1 v2 =
match v1, v2 with
| Unknown128, _ | _, Unknown128 -> Unknown128
Expand Down Expand Up @@ -1596,6 +1617,13 @@ let primitive_may_allocate : primitive -> alloc_mode option = function
| Pstring_load_32 (_, m) | Pbytes_load_32 (_, m)
| Pstring_load_64 (_, m) | Pbytes_load_64 (_, m)
| Pstring_load_128 { mode = m; _ } | Pbytes_load_128 { mode = m; _ }
| Pfloatarray_load_128 { mode = m; _ }
| Pfloat_array_load_128 { mode = m; _ }
| Pint_array_load_128 { mode = m; _ }
| Punboxed_float_array_load_128 { mode = m; _ }
| Punboxed_int32_array_load_128 { mode = m; _ }
| Punboxed_int64_array_load_128 { mode = m; _ }
| Punboxed_nativeint_array_load_128 { mode = m; _ }
| Pget_header m -> Some m
| Pbytes_set_16 _ | Pbytes_set_32 _ | Pbytes_set_64 _ | Pbytes_set_128 _ -> None
| Pbigstring_load_16 _ -> None
Expand All @@ -1606,7 +1634,10 @@ let primitive_may_allocate : primitive -> alloc_mode option = function
| Pbigstring_load_64 { boxed = false; _ }
| Pbigstring_load_128 { boxed = false; _ } -> None
| Pbigstring_set_16 _ | Pbigstring_set_32 _
| Pbigstring_set_64 _ | Pbigstring_set_128 _ -> None
| Pbigstring_set_64 _ | Pbigstring_set_128 _
| Pfloatarray_set_128 _ | Pfloat_array_set_128 _ | Pint_array_set_128 _
| Punboxed_float_array_set_128 _ | Punboxed_int32_array_set_128 _
| Punboxed_int64_array_set_128 _ | Punboxed_nativeint_array_set_128 _ -> None
| Pctconst _ -> None
| Pbswap16 -> None
| Pbbswap (_, m) -> Some m
Expand Down Expand Up @@ -1677,6 +1708,9 @@ let primitive_result_layout (p : primitive) =
| Pbytessetu | Pbytessets | Parraysetu _ | Parraysets _ | Pbigarrayset _
| Pbytes_set_16 _ | Pbytes_set_32 _ | Pbytes_set_64 _ | Pbytes_set_128 _
| Pbigstring_set_16 _ | Pbigstring_set_32 _ | Pbigstring_set_64 _ | Pbigstring_set_128 _
| Pfloatarray_set_128 _ | Pfloat_array_set_128 _ | Pint_array_set_128 _
| Punboxed_float_array_set_128 _ | Punboxed_int32_array_set_128 _
| Punboxed_int64_array_set_128 _ | Punboxed_nativeint_array_set_128 _
-> layout_unit
| Pgetglobal _ | Psetglobal _ | Pgetpredef _ -> layout_module_field
| Pmakeblock _ | Pmakefloatblock _ | Pmakearray _ | Pduprecord _
Expand Down Expand Up @@ -1732,6 +1766,16 @@ let primitive_result_layout (p : primitive) =
| Pbigstring_load_64 { boxed = false; _ } -> layout_unboxed_int Pint64
| Pbigstring_load_128 { boxed = false; _ } ->
layout_unboxed_vector (Pvec128 Int8x16)
| Pfloatarray_load_128 _ | Pfloat_array_load_128 _
| Punboxed_float_array_load_128 _ ->
layout_boxed_vector (Pvec128 Float64x2)
| Pint_array_load_128 _ | Punboxed_int64_array_load_128 _
| Punboxed_nativeint_array_load_128 _ ->
(* 128-bit types are only supported in the x86_64 backend, so we may
assume that nativeint is 64 bits. *)
layout_boxed_vector (Pvec128 Int64x2)
| Punboxed_int32_array_load_128 _ ->
layout_boxed_vector (Pvec128 Int32x4)
| Pbigarrayref (_, _, kind, _) ->
begin match kind with
| Pbigarray_unknown -> layout_any_value
Expand Down
19 changes: 19 additions & 0 deletions lambda/lambda.mli
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,21 @@ type primitive =
| Pbigstring_set_32 of { unsafe : bool; boxed : bool }
| Pbigstring_set_64 of { unsafe : bool; boxed : bool }
| Pbigstring_set_128 of { aligned : bool; unsafe : bool; boxed : bool }
(* load/set SIMD vectors in GC-managed arrays *)
| Pfloatarray_load_128 of { unsafe : bool; mode : alloc_mode }
| Pfloat_array_load_128 of { unsafe : bool; mode : alloc_mode }
| Pint_array_load_128 of { unsafe : bool; mode : alloc_mode }
| Punboxed_float_array_load_128 of { unsafe : bool; mode : alloc_mode }
| Punboxed_int32_array_load_128 of { unsafe : bool; mode : alloc_mode }
| Punboxed_int64_array_load_128 of { unsafe : bool; mode : alloc_mode }
| Punboxed_nativeint_array_load_128 of { unsafe : bool; mode : alloc_mode }
| Pfloatarray_set_128 of { unsafe : bool }
| Pfloat_array_set_128 of { unsafe : bool }
| Pint_array_set_128 of { unsafe : bool }
| Punboxed_float_array_set_128 of { unsafe : bool }
| Punboxed_int32_array_set_128 of { unsafe : bool }
| Punboxed_int64_array_set_128 of { unsafe : bool }
| Punboxed_nativeint_array_set_128 of { unsafe : bool }
(* Compile time constants *)
| Pctconst of compile_time_constant
(* byte swap *)
Expand Down Expand Up @@ -380,6 +395,10 @@ val equal_boxed_integer : boxed_integer -> boxed_integer -> bool

val equal_boxed_vector_size : boxed_vector -> boxed_vector -> bool

val compare_boxed_vector : boxed_vector -> boxed_vector -> int

val print_boxed_vector : Format.formatter -> boxed_vector -> unit

val must_be_value : layout -> value_kind

(* This is the layout of ocaml values used as arguments to or returned from
Expand Down
56 changes: 56 additions & 0 deletions lambda/printlambda.ml
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,48 @@ let primitive ppf = function
fprintf ppf "bigarray.array1.unaligned_set128"
| Pbigstring_set_128 {unsafe = false; aligned = true} ->
fprintf ppf "bigarray.array1.aligned_set128"
| Pfloatarray_load_128 {unsafe; mode} ->
if unsafe then fprintf ppf "floatarray.unsafe_get128%s" (alloc_kind mode)
else fprintf ppf "floatarray.get128%s" (alloc_kind mode)
| Pfloat_array_load_128 {unsafe; mode} ->
if unsafe then fprintf ppf "float_array.unsafe_get128%s" (alloc_kind mode)
else fprintf ppf "float_array.get128%s" (alloc_kind mode)
| Pint_array_load_128 {unsafe; mode} ->
if unsafe then fprintf ppf "int_array.unsafe_get128%s" (alloc_kind mode)
else fprintf ppf "int_array.get128%s" (alloc_kind mode)
| Punboxed_float_array_load_128 {unsafe; mode} ->
if unsafe then fprintf ppf "unboxed_float_array.unsafe_get128%s" (alloc_kind mode)
else fprintf ppf "unboxed_float_array.get128%s" (alloc_kind mode)
| Punboxed_int32_array_load_128 {unsafe; mode} ->
if unsafe then fprintf ppf "unboxed_int32_array.unsafe_get128%s" (alloc_kind mode)
else fprintf ppf "unboxed_int32_array.get128%s" (alloc_kind mode)
| Punboxed_int64_array_load_128 {unsafe; mode} ->
if unsafe then fprintf ppf "unboxed_int64_array.unsafe_get128%s" (alloc_kind mode)
else fprintf ppf "unboxed_int64_array.get128%s" (alloc_kind mode)
| Punboxed_nativeint_array_load_128 {unsafe; mode} ->
if unsafe then fprintf ppf "unboxed_nativeint_array.unsafe_get128%s" (alloc_kind mode)
else fprintf ppf "unboxed_nativeint_array.get128%s" (alloc_kind mode)
| Pfloatarray_set_128 {unsafe} ->
if unsafe then fprintf ppf "floatarray.unsafe_set128"
else fprintf ppf "floatarray.set128"
| Pfloat_array_set_128 {unsafe} ->
if unsafe then fprintf ppf "float_array.unsafe_set128"
else fprintf ppf "float_array.set128"
| Pint_array_set_128 {unsafe} ->
if unsafe then fprintf ppf "int_array.unsafe_set128"
else fprintf ppf "int_array.set128"
| Punboxed_float_array_set_128 {unsafe} ->
if unsafe then fprintf ppf "unboxed_float_array.unsafe_set128"
else fprintf ppf "unboxed_float_array.set128"
| Punboxed_int32_array_set_128 {unsafe} ->
if unsafe then fprintf ppf "unboxed_int32_array.unsafe_set128"
else fprintf ppf "unboxed_int32_array.set128"
| Punboxed_int64_array_set_128 {unsafe} ->
if unsafe then fprintf ppf "unboxed_int64_array.unsafe_set128"
else fprintf ppf "unboxed_int64_array.set128"
| Punboxed_nativeint_array_set_128 {unsafe} ->
if unsafe then fprintf ppf "unboxed_nativeint_array.unsafe_set128"
else fprintf ppf "unboxed_nativeint_array.set128"
| Pbswap16 -> fprintf ppf "bswap16"
| Pbbswap(bi,m) -> print_boxed_integer "bswap" ppf bi m
| Pint_as_pointer m -> fprintf ppf "int_as_pointer%s" (alloc_kind m)
Expand Down Expand Up @@ -750,6 +792,20 @@ let name_of_primitive = function
| Pbigstring_set_32 _ -> "Pbigstring_set_32"
| Pbigstring_set_64 _ -> "Pbigstring_set_64"
| Pbigstring_set_128 _ -> "Pbigstring_set_128"
| Pfloatarray_load_128 _ -> "Pfloatarray_load_128"
| Pfloat_array_load_128 _ -> "Pfloat_array_load_128"
| Pint_array_load_128 _ -> "Pint_array_load_128"
| Punboxed_float_array_load_128 _ -> "Punboxed_float_array_load_128"
| Punboxed_int32_array_load_128 _ -> "Punboxed_int32_array_load_128"
| Punboxed_int64_array_load_128 _ -> "Punboxed_int64_array_load_128"
| Punboxed_nativeint_array_load_128 _ -> "Punboxed_nativeint_array_load_128"
| Pfloatarray_set_128 _ -> "Pfloatarray_set_128"
| Pfloat_array_set_128 _ -> "Pfloat_array_set_128"
| Pint_array_set_128 _ -> "Pint_array_set_128"
| Punboxed_float_array_set_128 _ -> "Punboxed_float_array_set_128"
| Punboxed_int32_array_set_128 _ -> "Punboxed_int32_array_set_128"
| Punboxed_int64_array_set_128 _ -> "Punboxed_int64_array_set_128"
| Punboxed_nativeint_array_set_128 _ -> "Punboxed_nativeint_array_set_128"
| Pbswap16 -> "Pbswap16"
| Pbbswap _ -> "Pbbswap"
| Pint_as_pointer _ -> "Pint_as_pointer"
Expand Down
14 changes: 14 additions & 0 deletions lambda/tmc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,20 @@ let rec choice ctx t =
| Pbigstring_load_16 _ | Pbigstring_load_32 _ | Pbigstring_load_64 _
| Pbigstring_load_128 _ | Pbigstring_set_16 _ | Pbigstring_set_32 _
| Pbigstring_set_64 _ | Pbigstring_set_128 _
| Pfloatarray_load_128 _
| Pfloat_array_load_128 _
| Pint_array_load_128 _
| Punboxed_float_array_load_128 _
| Punboxed_int32_array_load_128 _
| Punboxed_int64_array_load_128 _
| Punboxed_nativeint_array_load_128 _
| Pfloatarray_set_128 _
| Pfloat_array_set_128 _
| Pint_array_set_128 _
| Punboxed_float_array_set_128 _
| Punboxed_int32_array_set_128 _
| Punboxed_int64_array_set_128 _
| Punboxed_nativeint_array_set_128 _
| Pget_header _
| Pctconst _
| Pbswap16
Expand Down
62 changes: 62 additions & 0 deletions lambda/translprim.ml
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,62 @@ let lookup_primitive loc ~poly_mode ~poly_sort pos p =
| "%caml_bigstring_seta128u#" ->
Primitive ((Pbigstring_set_128 {aligned = true; unsafe = true;
boxed = false}), 3)
| "%caml_float_array_get128" ->
Primitive ((Pfloat_array_load_128 {unsafe = false; mode}), 2)
| "%caml_float_array_get128u" ->
Primitive ((Pfloat_array_load_128 {unsafe = true; mode}), 2)
| "%caml_floatarray_get128" ->
Primitive ((Pfloatarray_load_128 {unsafe = false; mode}), 2)
| "%caml_floatarray_get128u" ->
Primitive ((Pfloatarray_load_128 {unsafe = true; mode}), 2)
| "%caml_unboxed_float_array_get128" ->
Primitive ((Punboxed_float_array_load_128 {unsafe = false; mode}), 2)
| "%caml_unboxed_float_array_get128u" ->
Primitive ((Punboxed_float_array_load_128 {unsafe = true; mode}), 2)
| "%caml_int_array_get128" ->
Primitive ((Pint_array_load_128 {unsafe = false; mode}), 2)
| "%caml_int_array_get128u" ->
Primitive ((Pint_array_load_128 {unsafe = true; mode}), 2)
| "%caml_unboxed_int64_array_get128" ->
Primitive ((Punboxed_int64_array_load_128 {unsafe = false; mode}), 2)
| "%caml_unboxed_int64_array_get128u" ->
Primitive ((Punboxed_int64_array_load_128 {unsafe = true; mode}), 2)
| "%caml_unboxed_int32_array_get128" ->
Primitive ((Punboxed_int32_array_load_128 {unsafe = false; mode}), 2)
| "%caml_unboxed_int32_array_get128u" ->
Primitive ((Punboxed_int32_array_load_128 {unsafe = true; mode}), 2)
| "%caml_unboxed_nativeint_array_get128" ->
Primitive ((Punboxed_nativeint_array_load_128 {unsafe = false; mode}), 2)
| "%caml_unboxed_nativeint_array_get128u" ->
Primitive ((Punboxed_nativeint_array_load_128 {unsafe = true; mode}), 2)
| "%caml_float_array_set128" ->
Primitive ((Pfloat_array_set_128 {unsafe = false}), 3)
| "%caml_float_array_set128u" ->
Primitive ((Pfloat_array_set_128 {unsafe = true}), 3)
| "%caml_floatarray_set128" ->
Primitive ((Pfloatarray_set_128 {unsafe = false}), 3)
| "%caml_floatarray_set128u" ->
Primitive ((Pfloatarray_set_128 {unsafe = true}), 3)
| "%caml_unboxed_float_array_set128" ->
Primitive ((Punboxed_float_array_set_128 {unsafe = false}), 3)
| "%caml_unboxed_float_array_set128u" ->
Primitive ((Punboxed_float_array_set_128 {unsafe = true}), 3)
| "%caml_int_array_set128" ->
Primitive ((Pint_array_set_128 {unsafe = false}), 3)
| "%caml_int_array_set128u" ->
Primitive ((Pint_array_set_128 {unsafe = true}), 3)
| "%caml_unboxed_int64_array_set128" ->
Primitive ((Punboxed_int64_array_set_128 {unsafe = false}), 3)
| "%caml_unboxed_int64_array_set128u" ->
Primitive ((Punboxed_int64_array_set_128 {unsafe = true}), 3)
| "%caml_unboxed_int32_array_set128" ->
Primitive ((Punboxed_int32_array_set_128 {unsafe = false}), 3)
| "%caml_unboxed_int32_array_set128u" ->
Primitive ((Punboxed_int32_array_set_128 {unsafe = true}), 3)
| "%caml_unboxed_nativeint_array_set128" ->
Primitive ((Punboxed_nativeint_array_set_128 {unsafe = false}), 3)
| "%caml_unboxed_nativeint_array_set128u" ->
Primitive ((Punboxed_nativeint_array_set_128 {unsafe = true}), 3)
| "%bswap16" -> Primitive (Pbswap16, 1)
| "%bswap_int32" -> Primitive ((Pbbswap(Pint32, mode)), 1)
| "%bswap_int64" -> Primitive ((Pbbswap(Pint64, mode)), 1)
Expand Down Expand Up @@ -1248,6 +1304,12 @@ let lambda_primitive_needs_event_after = function
| Pbytes_set_32 _ | Pbytes_set_64 _ | Pbytes_set_128 _ | Pbigstring_load_16 _
| Pbigstring_load_32 _ | Pbigstring_load_64 _ | Pbigstring_load_128 _
| Pbigstring_set_16 _ | Pbigstring_set_32 _ | Pbigstring_set_64 _ | Pbigstring_set_128 _
| Pfloatarray_load_128 _ | Pfloat_array_load_128 _ | Pint_array_load_128 _
| Punboxed_float_array_load_128 _ | Punboxed_int32_array_load_128 _
| Punboxed_int64_array_load_128 _ | Punboxed_nativeint_array_load_128 _
| Pfloatarray_set_128 _ | Pfloat_array_set_128 _ | Pint_array_set_128 _
| Punboxed_float_array_set_128 _ | Punboxed_int32_array_set_128 _
| Punboxed_int64_array_set_128 _ | Punboxed_nativeint_array_set_128 _
| Prunstack | Pperform | Preperform | Presume
| Pbbswap _ | Pobj_dup | Pget_header _ -> true

Expand Down
14 changes: 14 additions & 0 deletions middle_end/convert_primitives.ml
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,20 @@ let convert (prim : Lambda.primitive) : Clambda_primitives.primitive =
| Pstring_load_128 _
| Pbytes_load_128 _
| Pbytes_set_128 _
| Pfloatarray_load_128 _
| Pfloat_array_load_128 _
| Pint_array_load_128 _
| Punboxed_float_array_load_128 _
| Punboxed_int32_array_load_128 _
| Punboxed_int64_array_load_128 _
| Punboxed_nativeint_array_load_128 _
| Pfloatarray_set_128 _
| Pfloat_array_set_128 _
| Pint_array_set_128 _
| Punboxed_float_array_set_128 _
| Punboxed_int32_array_set_128 _
| Punboxed_int64_array_set_128 _
| Punboxed_nativeint_array_set_128 _
->
Misc.fatal_errorf "lambda primitive %a can't be converted to \
clambda primitive"
Expand Down
Loading

0 comments on commit 70819a5

Please sign in to comment.