ocaml-flambda · TheNumbat · Apr 26, 2024 · Mar 14, 2024 · Mar 14, 2024 · Mar 14, 2024
diff --git a/backend/CSEgen.ml b/backend/CSEgen.ml
@@ -56,7 +56,8 @@ class cse_generic = object (self)
 method class_of_operation op =
   match op with
   | Imove | Ispill | Ireload -> assert false   (* treated specially *)
-  | Iconst_int _ | Iconst_float _ | Iconst_symbol _ | Iconst_vec128 _ -> Op_pure
+  | Iconst_int _ | Iconst_float32 _ | Iconst_float _
+  | Iconst_symbol _ | Iconst_vec128 _ -> Op_pure
   | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
   | Iextcall _ | Iprobe _ | Iopaque -> assert false  (* treated specially *)
   | Istackoffset _ -> Op_other

diff --git a/backend/afl_instrument.ml b/backend/afl_instrument.ml
@@ -97,8 +97,8 @@ and instrument = function
   | Cexit (ex, args, traps) -> Cexit (ex, List.map instrument args, traps)
 
   (* these are base cases and have no logging *)
-  | Cconst_int _ | Cconst_natint _ | Cconst_float _ | Cconst_vec128 _
-  | Cconst_symbol _
+  | Cconst_int _ | Cconst_natint _ | Cconst_float32 _ | Cconst_float _
+  | Cconst_vec128 _ | Cconst_symbol _
   | Cvar _ as c -> c
 
 let instrument_function c dbg =

diff --git a/backend/amd64/CSE.ml b/backend/amd64/CSE.ml
@@ -47,7 +47,8 @@ method! class_of_operation op =
   | Ifloatop _
   | Icsel _
   | Ivalueofint | Iintofvalue | Ivectorcast _ | Iscalarcast _
-  | Iconst_int _ | Iconst_float _ | Iconst_symbol _ | Iconst_vec128 _
+  | Iconst_int _ | Iconst_float32 _ | Iconst_float _
+  | Iconst_symbol _ | Iconst_vec128 _
   | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ | Iextcall _
   | Istackoffset _ | Iload _ | Istore _ | Ialloc _
   | Iintop _ | Iintop_imm _ | Iintop_atomic _
@@ -89,7 +90,8 @@ class cfg_cse = object
   | Floatop _
   | Csel _
   | Valueofint | Intofvalue | Vectorcast _ | Scalarcast _
-  | Const_int _ | Const_float _ | Const_symbol _ | Const_vec128 _
+  | Const_int _ | Const_float32 _ | Const_float _
+  | Const_symbol _ | Const_vec128 _
   | Stackoffset _ | Load _ | Store _ | Alloc _
   | Intop _ | Intop_imm _ | Intop_atomic _
   | Name_for_debugger _ | Probe_is_enabled _ | Opaque

diff --git a/backend/amd64/arch.ml b/backend/amd64/arch.ml
@@ -112,6 +112,10 @@ let assert_simd_enabled () =
   if not (Language_extension.is_enabled SIMD) then
   Misc.fatal_error "SIMD is not enabled."
 
+let assert_float32_enabled () =
+  if not (Language_extension.is_enabled Small_numbers) then
+  Misc.fatal_error "float32 is not enabled."
+
 (* Specific operations for the AMD64 processor *)
 
 open Format

diff --git a/backend/amd64/arch.mli b/backend/amd64/arch.mli
@@ -41,7 +41,9 @@ end
 val trap_notes : bool ref
 val arch_check_symbols : bool ref
 val command_line_options : (string * Arg.spec * string) list
+
 val assert_simd_enabled : unit -> unit
+val assert_float32_enabled : unit -> unit
 
 (* Specific operations for the AMD64 processor *)
 

diff --git a/backend/amd64/emit.mlp b/backend/amd64/emit.mlp
@@ -48,10 +48,15 @@ let int_reg_name =
 let float_reg_name = Array.init 16 (fun i -> XMM i)
 
 let register_name typ r =
-  match typ with
+  match (typ : machtype_component) with
   | Int | Val | Addr -> Reg64 (int_reg_name.(r))
   | Float -> Regf (float_reg_name.(r - 100))
-  | Vec128 -> assert_simd_enabled (); Regf (float_reg_name.(r - 100))
+  | Vec128 ->
+    assert_simd_enabled ();
+    Regf (float_reg_name.(r - 100))
+  | Float32 ->
+    assert_float32_enabled ();
+    Regf (float_reg_name.(r - 100))
 
 let phys_rax = phys_reg Int 0
 let phys_rdx = phys_reg Int 4
@@ -300,10 +305,11 @@ let emit_Llabel fallthrough lbl section_name =
 
 (* Output a pseudo-register *)
 
-let x86_data_type_for_stack_slot = function
+let x86_data_type_for_stack_slot : machtype_component -> data_type = function
   | Float -> REAL8
   | Vec128 -> VEC128
   | Int | Addr | Val -> QWORD
+  | Float32 -> REAL4
 
 let reg = function
   | { loc = Reg.Reg r; typ = ty } -> register_name ty r
@@ -745,12 +751,14 @@ let emit_global_label s =
 let move (src : Reg.t) (dst : Reg.t) =
   if src.loc <> dst.loc then
     begin match src.typ, src.loc, dst.typ, dst.loc with
-    | Float, Reg.Reg _, Float, Reg.Reg _
+    | (Float | Float32), Reg.Reg _, (Float | Float32), Reg.Reg _
     | Vec128, _, Vec128, _ ->
       (* Vec128 stack slots are always aligned. *)
       I.movapd (reg src) (reg dst)
     | Float, _, Float, _ ->
       I.movsd (reg src) (reg dst)
+    | Float32, _, Float32, _ ->
+      I.movss (reg src) (reg dst)
     | Float, _, Int, _ | Int, _, Float, _ ->
       (* CR-soon gyorsh: this case is used by the bits_of_float/float_of_bits intrinsics.
          They should instead generate a separate Ispecific and this case should be
@@ -762,6 +770,10 @@ let move (src : Reg.t) (dst : Reg.t) =
       Misc.fatal_errorf
         "Illegal move between a vector and non-vector register (%s to %s)\n"
         (Reg.name src) (Reg.name dst)
+    | Float32, _, _, _ | _, _, Float32, _ ->
+      Misc.fatal_errorf
+        "Illegal move between a float32 and non-float32 register (%s to %s)\n"
+        (Reg.name src) (Reg.name dst)
     | Float, _, (Val | Addr), _ | (Val | Addr), _, Float, _ ->
       Misc.fatal_errorf
         "Illegal move between a float and val/addr register (%s to %s)\n"
@@ -776,7 +788,7 @@ let stack_to_stack_move (src : Reg.t) (dst : Reg.t) =
       (* Not calling move because r15 is not in int_reg_name. *)
       I.mov (reg src) r15;
       I.mov r15 (reg dst)
-    | Float | Addr | Vec128 ->
+    | Float | Addr | Vec128 | Float32 ->
       Misc.fatal_errorf
         "Unexpected register type for stack to stack move: from %s to %s\n"
         (Reg.name src) (Reg.name dst)
@@ -1237,6 +1249,15 @@ let emit_instr fallthrough i =
           I.mov (nat n) (res i 0)
       end else
         I.mov (nat n) (res i 0)
+  | Lop(Iconst_float32 f) ->
+      begin match f with
+      | 0x0000_0000l ->       (* +0.0 *)
+          I.xorpd (res i 0) (res i 0)
+      | _ ->
+          (* float32 constants still take up 8 bytes; we load the lower half. *)
+          let lbl = add_float_constant (Int64.of_int32 f) in
+          I.movss (mem64_rip REAL4 (emit_label lbl)) (res i 0)
+      end
   | Lop(Iconst_float f) ->
       begin match f with
       | 0x0000_0000_0000_0000L ->       (* +0.0 *)
@@ -1341,8 +1362,10 @@ let emit_instr fallthrough i =
           I.movupd (addressing addressing_mode VEC128 i 0) dest
       | Onetwentyeight_aligned ->
           I.movapd (addressing addressing_mode VEC128 i 0) dest
-      | Single ->
+      | Single { reg = Float64 } ->
           I.cvtss2sd (addressing addressing_mode REAL4 i 0) dest
+      | Single { reg = Float32 } ->
+          I.movss (addressing addressing_mode REAL4 i 0) dest
       | Double ->
           I.movsd (addressing addressing_mode REAL8 i 0) dest
       end
@@ -1360,9 +1383,11 @@ let emit_instr fallthrough i =
           I.movupd (arg i 0) (addressing addr VEC128 i 1)
       | Onetwentyeight_aligned ->
           I.movapd (arg i 0) (addressing addr VEC128 i 1)
-      | Single ->
+      | Single { reg = Float64 } ->
           I.cvtsd2ss (arg i 0) xmm15;
           I.movss xmm15 (addressing addr REAL4 i 1)
+      | Single { reg = Float32 } ->
+          I.movss (arg i 0) (addressing addr REAL4 i 1)
       | Double ->
           I.movsd (arg i 0) (addressing addr REAL8 i 1)
       end
@@ -1485,10 +1510,18 @@ let emit_instr fallthrough i =
       instr_for_floatop floatop (arg i 1) (res i 0)
   | Lop(Iintofvalue | Ivalueofint | Ivectorcast Bits128) ->
       move i.arg.(0) i.res.(0)
-  | Lop(Iscalarcast Float_of_int) ->
+  | Lop(Iscalarcast (Float_of_int Float64)) ->
       I.cvtsi2sd  (arg i 0)  (res i 0)
-  | Lop(Iscalarcast Float_to_int) ->
+  | Lop(Iscalarcast (Float_to_int Float64)) ->
       I.cvttsd2si (arg i 0) (res i 0)
+  | Lop(Iscalarcast (Float_of_int Float32)) ->
+      I.cvtsi2ss  (arg i 0)  (res i 0)
+  | Lop(Iscalarcast (Float_to_int Float32)) ->
+      I.cvttss2si (arg i 0) (res i 0)
+  | Lop(Iscalarcast Float_of_float32) ->
+      I.cvtss2sd  (arg i 0)  (res i 0)
+  | Lop(Iscalarcast Float_to_float32) ->
+      I.cvtsd2ss  (arg i 0) (res i 0)
   | Lop(Iscalarcast (V128_of_scalar Float64x2 | V128_to_scalar Float64x2)) ->
       I.movsd (arg i 0) (res i 0)
   | Lop(Iscalarcast (V128_to_scalar Int64x2 | V128_of_scalar Int64x2)) ->
@@ -2011,6 +2044,7 @@ let make_stack_loc ~offset n (r : Reg.t) =
   (* Manufacture stack entry with this register's type *)
   (match r.typ with
    | Int | Val | Addr | Float -> ()
+   | Float32 -> assert_float32_enabled ()
    | Vec128 -> assert_simd_enabled ());
   Reg.at_location r.typ loc
 
@@ -2067,15 +2101,19 @@ let size_of_regs regs =
     (fun r acc ->
       match r.Reg.typ with
       | Int | Addr | Val -> acc + size_int
-      | Float -> acc + size_float
+      | Float | Float32 ->
+        (* Float32 slots still take up a full word *)
+        acc + size_float
       | Vec128 -> acc + size_vec128)
     regs 0
 
 let stack_locations ~offset regs =
   let _, locs = Array.fold_right (fun r (n, offsets) ->
     let next = n + match r.Reg.typ with
       | Int | Val | Addr -> size_int
-      | Float -> size_float
+      | Float | Float32 ->
+        (* Float32 slots still take up a full word *)
+        size_float
       | Vec128 -> size_vec128 in
     next, (make_stack_loc n r ~offset :: offsets)) regs (0, []) in
   locs |> Array.of_list
@@ -2161,7 +2199,7 @@ let emit_probe_handler_wrapper p =
       | Stack (Outgoing k) ->
         (match r.typ with
         | Val -> k::acc
-        | Int | Float | Vec128 -> acc
+        | Int | Float | Vec128 | Float32 -> acc
         | Addr -> Misc.fatal_error ("bad GC root " ^ Reg.name r))
       | _ -> assert false)
     saved_live