Skip to content

float32 backend plumbing/casts #2391

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 73 commits into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from 62 commits
Commits
Show all changes
73 commits
Select commit Hold shift + click to select a range
430b6b7
float32 in flambda2
TheNumbat Mar 14, 2024
627203b
fixes
TheNumbat Mar 14, 2024
296e7b8
arm
TheNumbat Mar 14, 2024
a0a2282
more crs
TheNumbat Mar 14, 2024
7458491
primitives
TheNumbat Mar 14, 2024
8644e75
format
TheNumbat Mar 14, 2024
d0342f1
upstream build
TheNumbat Mar 14, 2024
797a7a6
delete array case
TheNumbat Mar 19, 2024
fad1816
casts
TheNumbat Mar 19, 2024
0bd3e82
format
TheNumbat Mar 19, 2024
f195d02
Merge branch 'f32-fl2' into f32-backend
TheNumbat Mar 19, 2024
9c8d442
testsuite cmm parser
TheNumbat Mar 19, 2024
a2af96d
format
TheNumbat Mar 19, 2024
9451a8f
cast fl2
TheNumbat Mar 19, 2024
84e23ff
num conv
TheNumbat Mar 19, 2024
39b36ca
merge
TheNumbat Mar 19, 2024
e42799a
fix name in cfg
TheNumbat Mar 19, 2024
0b86abd
float32 static consts
TheNumbat Mar 19, 2024
2fe6ea4
add test
TheNumbat Mar 19, 2024
7e93f52
fixes
TheNumbat Mar 19, 2024
8d56965
Merge branch 'f32-fl2' into f32-backend
TheNumbat Mar 19, 2024
8ade880
another fix
TheNumbat Mar 19, 2024
7f64da8
Merge branch 'f32-fl2' into f32-backend
TheNumbat Mar 19, 2024
0d24bdd
fix nan test
TheNumbat Mar 19, 2024
0937fc4
actually use cconst_float32
TheNumbat Mar 20, 2024
2ea3d51
separate storage single and actual float32
TheNumbat Mar 22, 2024
4375a5d
merge
TheNumbat Mar 22, 2024
b6662e4
fix cmm test parser again
TheNumbat Mar 22, 2024
d83be31
comments
TheNumbat Mar 22, 2024
d8663f1
parsecmm again
TheNumbat Mar 22, 2024
96403b6
Merge branch 'main' into f32-fl2
TheNumbat Mar 22, 2024
4068688
merge
TheNumbat Mar 22, 2024
2d2498d
parsecmm again
TheNumbat Mar 22, 2024
5ea8e3e
arm
TheNumbat Mar 22, 2024
9fa47cd
unify single mem chunks
TheNumbat Apr 2, 2024
d9347fe
fix size_expr f32 const
TheNumbat Apr 2, 2024
ccfbe36
merge fix from later pr
TheNumbat Apr 2, 2024
ae04a8e
parsecmm...
TheNumbat Apr 2, 2024
01c848f
arm build
TheNumbat Apr 2, 2024
b13bd15
new machtype component
TheNumbat Apr 2, 2024
1b19d02
fix ext args
TheNumbat Apr 2, 2024
7eaa6ec
Apply suggestions from code review
TheNumbat Apr 3, 2024
f5766a3
comments
TheNumbat Apr 3, 2024
a42f8f7
use 4b size in selectgen
TheNumbat Apr 3, 2024
e38b256
real f32 ops in f32_by_bit_pattern
TheNumbat Apr 8, 2024
a7b32de
float32 parsing (untested)
TheNumbat Apr 8, 2024
050b46b
install stubs for testsuite
TheNumbat Apr 8, 2024
f899698
merge
TheNumbat Apr 8, 2024
d08462a
merge
TheNumbat Apr 8, 2024
5c150cd
merge arm changes
TheNumbat Apr 8, 2024
7d45d1f
Update ocaml/middle_end/flambda/closure_conversion.ml
TheNumbat Apr 9, 2024
64b2663
address comments
TheNumbat Apr 9, 2024
d165586
Merge branch 'f32-fl2' of github.com:ocaml-flambda/flambda-backend in…
TheNumbat Apr 9, 2024
62d2c75
Merge branch 'f32-fl2' into f32-backend
TheNumbat Apr 9, 2024
9eec365
comment
TheNumbat Apr 9, 2024
0f9dae1
Merge branch 'f32-fl2' into f32-backend
TheNumbat Apr 9, 2024
cbd71a1
simplify locale defs
TheNumbat Apr 9, 2024
fa7870b
Merge branch 'f32-fl2' into f32-backend
TheNumbat Apr 9, 2024
8e9bdf8
move f32 lib to external
TheNumbat Apr 11, 2024
7f69249
fix build system
TheNumbat Apr 11, 2024
9ac6060
format
TheNumbat Apr 11, 2024
eacd280
Merge branch 'f32-fl2' into f32-backend
TheNumbat Apr 11, 2024
b78a2e4
fix compare
TheNumbat Apr 16, 2024
f551594
Merge branch 'f32-fl2' into f32-backend
TheNumbat Apr 16, 2024
4daf82d
fix static const
TheNumbat Apr 16, 2024
04bd989
typo
TheNumbat Apr 16, 2024
77e83d9
Merge branch 'f32-fl2' into f32-backend
TheNumbat Apr 16, 2024
2a3a4f2
typo
TheNumbat Apr 16, 2024
fc02a69
Merge branch 'f32-fl2' into f32-backend
TheNumbat Apr 16, 2024
df6688e
another tagging fix
TheNumbat Apr 16, 2024
c50b64f
Merge branch 'f32-fl2' into f32-backend
TheNumbat Apr 16, 2024
b0ed404
merge
TheNumbat Apr 26, 2024
ac288c1
fix arm build
TheNumbat Apr 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion backend/CSEgen.ml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ class cse_generic = object (self)
method class_of_operation op =
match op with
| Imove | Ispill | Ireload -> assert false (* treated specially *)
| Iconst_int _ | Iconst_float _ | Iconst_symbol _ | Iconst_vec128 _ -> Op_pure
| Iconst_int _ | Iconst_float32 _ | Iconst_float _
| Iconst_symbol _ | Iconst_vec128 _ -> Op_pure
| Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
| Iextcall _ | Iprobe _ | Iopaque -> assert false (* treated specially *)
| Istackoffset _ -> Op_other
Expand Down
4 changes: 2 additions & 2 deletions backend/afl_instrument.ml
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ and instrument = function
| Cexit (ex, args, traps) -> Cexit (ex, List.map instrument args, traps)

(* these are base cases and have no logging *)
| Cconst_int _ | Cconst_natint _ | Cconst_float _ | Cconst_vec128 _
| Cconst_symbol _
| Cconst_int _ | Cconst_natint _ | Cconst_float32 _ | Cconst_float _
| Cconst_vec128 _ | Cconst_symbol _
| Cvar _ as c -> c

let instrument_function c dbg =
Expand Down
6 changes: 4 additions & 2 deletions backend/amd64/CSE.ml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ method! class_of_operation op =
| Ifloatop _
| Icsel _
| Ivalueofint | Iintofvalue | Ivectorcast _ | Iscalarcast _
| Iconst_int _ | Iconst_float _ | Iconst_symbol _ | Iconst_vec128 _
| Iconst_int _ | Iconst_float32 _ | Iconst_float _
| Iconst_symbol _ | Iconst_vec128 _
| Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ | Iextcall _
| Istackoffset _ | Iload _ | Istore _ | Ialloc _
| Iintop _ | Iintop_imm _ | Iintop_atomic _
Expand Down Expand Up @@ -89,7 +90,8 @@ class cfg_cse = object
| Floatop _
| Csel _
| Valueofint | Intofvalue | Vectorcast _ | Scalarcast _
| Const_int _ | Const_float _ | Const_symbol _ | Const_vec128 _
| Const_int _ | Const_float32 _ | Const_float _
| Const_symbol _ | Const_vec128 _
| Stackoffset _ | Load _ | Store _ | Alloc _
| Intop _ | Intop_imm _ | Intop_atomic _
| Name_for_debugger _ | Probe_is_enabled _ | Opaque
Expand Down
4 changes: 4 additions & 0 deletions backend/amd64/arch.ml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ let assert_simd_enabled () =
if not (Language_extension.is_enabled SIMD) then
Misc.fatal_error "SIMD is not enabled."

let assert_float32_enabled () =
if not (Language_extension.is_enabled Small_numbers) then
Misc.fatal_error "float32 is not enabled."

(* Specific operations for the AMD64 processor *)

open Format
Expand Down
2 changes: 2 additions & 0 deletions backend/amd64/arch.mli
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ end
val trap_notes : bool ref
val arch_check_symbols : bool ref
val command_line_options : (string * Arg.spec * string) list

val assert_simd_enabled : unit -> unit
val assert_float32_enabled : unit -> unit

(* Specific operations for the AMD64 processor *)

Expand Down
62 changes: 50 additions & 12 deletions backend/amd64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,15 @@ let int_reg_name =
let float_reg_name = Array.init 16 (fun i -> XMM i)

let register_name typ r =
match typ with
match (typ : machtype_component) with
| Int | Val | Addr -> Reg64 (int_reg_name.(r))
| Float -> Regf (float_reg_name.(r - 100))
| Vec128 -> assert_simd_enabled (); Regf (float_reg_name.(r - 100))
| Vec128 ->
assert_simd_enabled ();
Regf (float_reg_name.(r - 100))
| Float32 ->
assert_float32_enabled ();
Regf (float_reg_name.(r - 100))

let phys_rax = phys_reg Int 0
let phys_rdx = phys_reg Int 4
Expand Down Expand Up @@ -300,10 +305,11 @@ let emit_Llabel fallthrough lbl section_name =

(* Output a pseudo-register *)

let x86_data_type_for_stack_slot = function
let x86_data_type_for_stack_slot : machtype_component -> data_type = function
| Float -> REAL8
| Vec128 -> VEC128
| Int | Addr | Val -> QWORD
| Float32 -> REAL4

let reg = function
| { loc = Reg.Reg r; typ = ty } -> register_name ty r
Expand Down Expand Up @@ -745,12 +751,14 @@ let emit_global_label s =
let move (src : Reg.t) (dst : Reg.t) =
if src.loc <> dst.loc then
begin match src.typ, src.loc, dst.typ, dst.loc with
| Float, Reg.Reg _, Float, Reg.Reg _
| (Float | Float32), Reg.Reg _, (Float | Float32), Reg.Reg _
| Vec128, _, Vec128, _ ->
(* Vec128 stack slots are always aligned. *)
I.movapd (reg src) (reg dst)
| Float, _, Float, _ ->
I.movsd (reg src) (reg dst)
| Float32, _, Float32, _ ->
I.movss (reg src) (reg dst)
| Float, _, Int, _ | Int, _, Float, _ ->
(* CR-soon gyorsh: this case is used by the bits_of_float/float_of_bits intrinsics.
They should instead generate a separate Ispecific and this case should be
Expand All @@ -762,6 +770,10 @@ let move (src : Reg.t) (dst : Reg.t) =
Misc.fatal_errorf
"Illegal move between a vector and non-vector register (%s to %s)\n"
(Reg.name src) (Reg.name dst)
| Float32, _, _, _ | _, _, Float32, _ ->
Misc.fatal_errorf
"Illegal move between a float32 and non-float32 register (%s to %s)\n"
(Reg.name src) (Reg.name dst)
| Float, _, (Val | Addr), _ | (Val | Addr), _, Float, _ ->
Misc.fatal_errorf
"Illegal move between a float and val/addr register (%s to %s)\n"
Expand All @@ -776,7 +788,7 @@ let stack_to_stack_move (src : Reg.t) (dst : Reg.t) =
(* Not calling move because r15 is not in int_reg_name. *)
I.mov (reg src) r15;
I.mov r15 (reg dst)
| Float | Addr | Vec128 ->
| Float | Addr | Vec128 | Float32 ->
Misc.fatal_errorf
"Unexpected register type for stack to stack move: from %s to %s\n"
(Reg.name src) (Reg.name dst)
Expand Down Expand Up @@ -1237,6 +1249,15 @@ let emit_instr fallthrough i =
I.mov (nat n) (res i 0)
end else
I.mov (nat n) (res i 0)
| Lop(Iconst_float32 f) ->
begin match f with
| 0x0000_0000l -> (* +0.0 *)
I.xorpd (res i 0) (res i 0)
| _ ->
(* float32 constants still take up 8 bytes; we load the lower half. *)
let lbl = add_float_constant (Int64.of_int32 f) in
I.movss (mem64_rip REAL4 (emit_label lbl)) (res i 0)
end
| Lop(Iconst_float f) ->
begin match f with
| 0x0000_0000_0000_0000L -> (* +0.0 *)
Expand Down Expand Up @@ -1341,8 +1362,10 @@ let emit_instr fallthrough i =
I.movupd (addressing addressing_mode VEC128 i 0) dest
| Onetwentyeight_aligned ->
I.movapd (addressing addressing_mode VEC128 i 0) dest
| Single ->
| Single { reg = Float64 } ->
I.cvtss2sd (addressing addressing_mode REAL4 i 0) dest
| Single { reg = Float32 } ->
I.movss (addressing addressing_mode REAL4 i 0) dest
| Double ->
I.movsd (addressing addressing_mode REAL8 i 0) dest
end
Expand All @@ -1360,9 +1383,11 @@ let emit_instr fallthrough i =
I.movupd (arg i 0) (addressing addr VEC128 i 1)
| Onetwentyeight_aligned ->
I.movapd (arg i 0) (addressing addr VEC128 i 1)
| Single ->
| Single { reg = Float64 } ->
I.cvtsd2ss (arg i 0) xmm15;
I.movss xmm15 (addressing addr REAL4 i 1)
| Single { reg = Float32 } ->
I.movss (arg i 0) (addressing addr REAL4 i 1)
| Double ->
I.movsd (arg i 0) (addressing addr REAL8 i 1)
end
Expand Down Expand Up @@ -1485,10 +1510,18 @@ let emit_instr fallthrough i =
instr_for_floatop floatop (arg i 1) (res i 0)
| Lop(Iintofvalue | Ivalueofint | Ivectorcast Bits128) ->
move i.arg.(0) i.res.(0)
| Lop(Iscalarcast Float_of_int) ->
| Lop(Iscalarcast (Float_of_int Float64)) ->
I.cvtsi2sd (arg i 0) (res i 0)
| Lop(Iscalarcast Float_to_int) ->
| Lop(Iscalarcast (Float_to_int Float64)) ->
I.cvttsd2si (arg i 0) (res i 0)
| Lop(Iscalarcast (Float_of_int Float32)) ->
I.cvtsi2ss (arg i 0) (res i 0)
| Lop(Iscalarcast (Float_to_int Float32)) ->
I.cvttss2si (arg i 0) (res i 0)
| Lop(Iscalarcast Float_of_float32) ->
I.cvtss2sd (arg i 0) (res i 0)
| Lop(Iscalarcast Float_to_float32) ->
I.cvtsd2ss (arg i 0) (res i 0)
| Lop(Iscalarcast (V128_of_scalar Float64x2 | V128_to_scalar Float64x2)) ->
I.movsd (arg i 0) (res i 0)
| Lop(Iscalarcast (V128_to_scalar Int64x2 | V128_of_scalar Int64x2)) ->
Expand Down Expand Up @@ -2011,6 +2044,7 @@ let make_stack_loc ~offset n (r : Reg.t) =
(* Manufacture stack entry with this register's type *)
(match r.typ with
| Int | Val | Addr | Float -> ()
| Float32 -> assert_float32_enabled ()
| Vec128 -> assert_simd_enabled ());
Reg.at_location r.typ loc

Expand Down Expand Up @@ -2067,15 +2101,19 @@ let size_of_regs regs =
(fun r acc ->
match r.Reg.typ with
| Int | Addr | Val -> acc + size_int
| Float -> acc + size_float
| Float | Float32 ->
(* Float32 slots still take up a full word *)
acc + size_float
| Vec128 -> acc + size_vec128)
regs 0

let stack_locations ~offset regs =
let _, locs = Array.fold_right (fun r (n, offsets) ->
let next = n + match r.Reg.typ with
| Int | Val | Addr -> size_int
| Float -> size_float
| Float | Float32 ->
(* Float32 slots still take up a full word *)
size_float
| Vec128 -> size_vec128 in
next, (make_stack_loc n r ~offset :: offsets)) regs (0, []) in
locs |> Array.of_list
Expand Down Expand Up @@ -2161,7 +2199,7 @@ let emit_probe_handler_wrapper p =
| Stack (Outgoing k) ->
(match r.typ with
| Val -> k::acc
| Int | Float | Vec128 -> acc
| Int | Float | Vec128 | Float32 -> acc
| Addr -> Misc.fatal_error ("bad GC root " ^ Reg.name r))
| _ -> assert false)
saved_live
Expand Down
Loading
Loading