From 12ba4d92a9901a74e07a6d38d7516298459a17e5 Mon Sep 17 00:00:00 2001 From: Max Slater Date: Thu, 13 Jun 2024 12:07:01 -0400 Subject: [PATCH] Float32 min/max/rounding intrinsics (#2684) --- backend/amd64/emit.mlp | 15 +++- backend/amd64/simd.ml | 91 ++++++++++++++--------- backend/amd64/simd_proc.ml | 13 ++-- backend/amd64/simd_selection.ml | 15 +++- backend/x86_ast.mli | 3 + backend/x86_binary_emitter.ml | 18 +++++ backend/x86_dsl.ml | 3 + backend/x86_dsl.mli | 3 + backend/x86_gas.ml | 3 + backend/x86_masm.ml | 3 + ocaml/otherlibs/stdlib_beta/float32.ml | 28 +++++++ ocaml/otherlibs/stdlib_beta/float32.mli | 57 ++++++++++++-- ocaml/otherlibs/stdlib_beta/float32_u.ml | 16 ++++ ocaml/otherlibs/stdlib_beta/float32_u.mli | 40 ++++++++++ ocaml/runtime/float32.c | 45 +++++++++++ ocaml/runtime4/float32.c | 45 +++++++++++ tests/small_numbers/float32_lib.ml | 17 +++++ tests/small_numbers/float32_u_lib.ml | 18 +++++ tests/small_numbers/stubs.c | 24 ++++++ 19 files changed, 404 insertions(+), 53 deletions(-) diff --git a/backend/amd64/emit.mlp b/backend/amd64/emit.mlp index 3cde513ca31..7a5213c0cf8 100644 --- a/backend/amd64/emit.mlp +++ b/backend/amd64/emit.mlp @@ -1086,6 +1086,13 @@ let emit_simd_instr op i = | CLMUL (Clmul_64 n) -> I.pclmulqdq (X86_dsl.int n) (arg i 1) (res i 0) | BMI2 Extract_64 -> I.pext (arg i 1) (arg i 0) (res i 0) | BMI2 Deposit_64 -> I.pdep (arg i 1) (arg i 0) (res i 0) + | SSE Round_current_f32_i64 -> I.cvtss2si (arg i 0) (res i 0) + | SSE Sqrt_scalar_f32 -> + if arg i 0 <> res i 0 then + I.xorpd (res i 0) (res i 0); (* avoid partial register stall *) + I.sqrtss (arg i 0) (res i 0) + | SSE Max_scalar_f32 -> I.maxss (arg i 1) (res i 0) + | SSE Min_scalar_f32 -> I.minss (arg i 1) (res i 0) | SSE (Cmp_f32 n) -> I.cmpps n (arg i 1) (res i 0) | SSE Add_f32 -> I.addps (arg i 1) (res i 0) | SSE Sub_f32 -> I.subps (arg i 1) (res i 0) @@ -1109,10 +1116,6 @@ let emit_simd_instr op i = if arg i 0 <> res i 0 then I.xorpd (res i 0) (res i 0); (* avoid partial register stall *) I.sqrtsd (arg i 0) (res i 0) - | SSE2 Sqrt_scalar_f32 -> - if arg i 0 <> res i 0 then - I.xorpd (res i 0) (res i 0); (* avoid partial register stall *) - I.sqrtss (arg i 0) (res i 0) | SSE2 Sqrt_f64 -> I.sqrtpd (arg i 0) (res i 0) | SSE2 Add_i8 -> I.paddb (arg i 1) (res i 0) | SSE2 Add_i16 -> I.paddw (arg i 1) (res i 0) @@ -1271,6 +1274,10 @@ let emit_simd_instr op i = if arg i 0 <> res i 0 then I.xorpd (res i 0) (res i 0); (* avoid partial register stall *) I.roundsd n (arg i 0) (res i 0) + | SSE41 (Round_scalar_f32 n) -> + if arg i 0 <> res i 0 then + I.xorpd (res i 0) (res i 0); (* avoid partial register stall *) + I.roundss n (arg i 0) (res i 0) | SSE41 (Round_f64 n) -> I.roundpd n (arg i 0) (res i 0) | SSE41 (Round_f32 n) -> I.roundps n (arg i 0) (res i 0) | SSE41 (Multi_sad_unsigned_i8 n) -> I.mpsadbw (X86_dsl.int n) (arg i 1) (res i 0) diff --git a/backend/amd64/simd.ml b/backend/amd64/simd.ml index d64c2e67576..0828956b1de 100644 --- a/backend/amd64/simd.ml +++ b/backend/amd64/simd.ml @@ -68,6 +68,10 @@ type bmi2_operation = | Extract_64 type sse_operation = + | Round_current_f32_i64 + | Sqrt_scalar_f32 + | Min_scalar_f32 + | Max_scalar_f32 | Cmp_f32 of float_condition | Add_f32 | Sub_f32 @@ -89,7 +93,6 @@ type sse_operation = type sse2_operation = | Round_current_f64_i64 | Sqrt_scalar_f64 - | Sqrt_scalar_f32 | Min_scalar_f64 | Max_scalar_f64 | Sqrt_f64 @@ -207,6 +210,7 @@ type ssse3_operation = type sse41_operation = | Round_scalar_f64 of float_rounding + | Round_scalar_f32 of float_rounding | Blend_16 of int | Blend_32 of int | Blend_64 of int @@ -289,6 +293,10 @@ let equal_operation_bmi2 l r = let equal_operation_sse l r = match l, r with + | Round_current_f32_i64, Round_current_f32_i64 + | Sqrt_scalar_f32, Sqrt_scalar_f32 + | Min_scalar_f32, Min_scalar_f32 + | Max_scalar_f32, Max_scalar_f32 | Add_f32, Add_f32 | Sub_f32, Sub_f32 | Mul_f32, Mul_f32 @@ -307,10 +315,11 @@ let equal_operation_sse l r = true | Cmp_f32 l, Cmp_f32 r when float_condition_equal l r -> true | Shuffle_32 l, Shuffle_32 r when Int.equal l r -> true - | ( ( Add_f32 | Sub_f32 | Mul_f32 | Div_f32 | Max_f32 | Min_f32 | Rcp_f32 - | Sqrt_f32 | Rsqrt_f32 | High_64_to_low_64 | Low_64_to_high_64 - | Interleave_high_32 | Interleave_low_32_regs | Interleave_low_32 - | Movemask_32 | Cmp_f32 _ | Shuffle_32 _ ), + | ( ( Round_current_f32_i64 | Sqrt_scalar_f32 | Min_scalar_f32 + | Max_scalar_f32 | Add_f32 | Sub_f32 | Mul_f32 | Div_f32 | Max_f32 + | Min_f32 | Rcp_f32 | Sqrt_f32 | Rsqrt_f32 | High_64_to_low_64 + | Low_64_to_high_64 | Interleave_high_32 | Interleave_low_32_regs + | Interleave_low_32 | Movemask_32 | Cmp_f32 _ | Shuffle_32 _ ), _ ) -> false @@ -320,7 +329,6 @@ let equal_operation_sse2 l r = | Min_scalar_f64, Min_scalar_f64 | Max_scalar_f64, Max_scalar_f64 | Sqrt_scalar_f64, Sqrt_scalar_f64 - | Sqrt_scalar_f32, Sqrt_scalar_f32 | Sqrt_f64, Sqrt_f64 | Add_i8, Add_i8 | Add_i16, Add_i16 @@ -409,26 +417,25 @@ let equal_operation_sse2 l r = true | Cmp_f64 l, Cmp_f64 r when float_condition_equal l r -> true | ( ( Add_i8 | Add_i16 | Add_i32 | Add_i64 | Add_f64 | Min_scalar_f64 - | Max_scalar_f64 | Round_current_f64_i64 | Sqrt_scalar_f64 - | Sqrt_scalar_f32 | Sqrt_f64 | Add_saturating_unsigned_i8 - | Add_saturating_unsigned_i16 | Add_saturating_i8 | Add_saturating_i16 - | Sub_i8 | Sub_i16 | Sub_i32 | Sub_i64 | Sub_f64 - | Sub_saturating_unsigned_i8 | Sub_saturating_unsigned_i16 - | Sub_saturating_i8 | Sub_saturating_i16 | Max_unsigned_i8 | Max_i16 - | Max_f64 | Min_unsigned_i8 | Min_i16 | Min_f64 | Mul_f64 | Div_f64 - | And_bits | Andnot_bits | Or_bits | Xor_bits | Movemask_8 | Movemask_64 - | Cmpeq_i8 | Cmpeq_i16 | Cmpeq_i32 | Cmpgt_i8 | Cmpgt_i16 | Cmpgt_i32 - | I32_to_f64 | I32_to_f32 | F64_to_i32 | F64_to_f32 | F32_to_i32 - | F32_to_f64 | SLL_i16 | SLL_i32 | SLL_i64 | SRL_i16 | SRL_i32 | SRL_i64 - | SRA_i16 | SRA_i32 | I16_to_i8 | I32_to_i16 | I16_to_unsigned_i8 - | I32_to_unsigned_i16 | Avg_unsigned_i8 | Avg_unsigned_i16 - | SAD_unsigned_i8 | Interleave_high_8 | Interleave_high_16 - | Interleave_high_64 | Interleave_low_8 | Interleave_low_16 - | Interleave_low_64 | SLLi_i16 _ | SLLi_i32 _ | SLLi_i64 _ | SRLi_i16 _ - | SRLi_i32 _ | SRLi_i64 _ | SRAi_i16 _ | SRAi_i32 _ | Shift_left_bytes _ - | Shift_right_bytes _ | Cmp_f64 _ | Shuffle_64 _ | Shuffle_high_16 _ - | Shuffle_low_16 _ | Mulhi_i16 | Mulhi_unsigned_i16 | Mullo_i16 - | Mul_hadd_i16_to_i32 ), + | Max_scalar_f64 | Round_current_f64_i64 | Sqrt_scalar_f64 | Sqrt_f64 + | Add_saturating_unsigned_i8 | Add_saturating_unsigned_i16 + | Add_saturating_i8 | Add_saturating_i16 | Sub_i8 | Sub_i16 | Sub_i32 + | Sub_i64 | Sub_f64 | Sub_saturating_unsigned_i8 + | Sub_saturating_unsigned_i16 | Sub_saturating_i8 | Sub_saturating_i16 + | Max_unsigned_i8 | Max_i16 | Max_f64 | Min_unsigned_i8 | Min_i16 + | Min_f64 | Mul_f64 | Div_f64 | And_bits | Andnot_bits | Or_bits + | Xor_bits | Movemask_8 | Movemask_64 | Cmpeq_i8 | Cmpeq_i16 | Cmpeq_i32 + | Cmpgt_i8 | Cmpgt_i16 | Cmpgt_i32 | I32_to_f64 | I32_to_f32 | F64_to_i32 + | F64_to_f32 | F32_to_i32 | F32_to_f64 | SLL_i16 | SLL_i32 | SLL_i64 + | SRL_i16 | SRL_i32 | SRL_i64 | SRA_i16 | SRA_i32 | I16_to_i8 | I32_to_i16 + | I16_to_unsigned_i8 | I32_to_unsigned_i16 | Avg_unsigned_i8 + | Avg_unsigned_i16 | SAD_unsigned_i8 | Interleave_high_8 + | Interleave_high_16 | Interleave_high_64 | Interleave_low_8 + | Interleave_low_16 | Interleave_low_64 | SLLi_i16 _ | SLLi_i32 _ + | SLLi_i64 _ | SRLi_i16 _ | SRLi_i32 _ | SRLi_i64 _ | SRAi_i16 _ + | SRAi_i32 _ | Shift_left_bytes _ | Shift_right_bytes _ | Cmp_f64 _ + | Shuffle_64 _ | Shuffle_high_16 _ | Shuffle_low_16 _ | Mulhi_i16 + | Mulhi_unsigned_i16 | Mullo_i16 | Mul_hadd_i16_to_i32 ), _ ) -> false @@ -521,6 +528,7 @@ let equal_operation_sse41 l r = when Int.equal l r -> true | Round_scalar_f64 l, Round_scalar_f64 r + | Round_scalar_f32 l, Round_scalar_f32 r | Round_f64 l, Round_f64 r | Round_f32 l, Round_f32 r when float_rounding_equal l r -> @@ -533,7 +541,7 @@ let equal_operation_sse41 l r = | Blend_16 _ | Blend_32 _ | Blend_64 _ | Dp_f32 _ | Dp_f64 _ | Mullo_i32 | Extract_i8 _ | Extract_i16 _ | Extract_i32 _ | Extract_i64 _ | Insert_i8 _ | Insert_i16 _ | Insert_i32 _ | Insert_i64 _ | Round_f64 _ - | Round_scalar_f64 _ | Round_f32 _ ), + | Round_scalar_f64 _ | Round_scalar_f32 _ | Round_f32 _ ), _ ) -> false @@ -607,6 +615,13 @@ let print_operation_bmi2 printreg op ppf arg = let print_operation_sse printreg op ppf arg = match op with + | Round_current_f32_i64 -> + fprintf ppf "round_current_f32_i64 %a" printreg arg.(0) + | Sqrt_scalar_f32 -> fprintf ppf "sqrt_scalar_f32 %a" printreg arg.(0) + | Min_scalar_f32 -> + fprintf ppf "min_scalar_f32 %a %a" printreg arg.(0) printreg arg.(1) + | Max_scalar_f32 -> + fprintf ppf "max_scalar_f32 %a %a" printreg arg.(0) printreg arg.(1) | Cmp_f32 i -> fprintf ppf "cmp_f32[%a] %a %a" print_float_condition i printreg arg.(0) printreg arg.(1) @@ -636,7 +651,6 @@ let print_operation_sse printreg op ppf arg = let print_operation_sse2 printreg op ppf arg = match op with | Sqrt_scalar_f64 -> fprintf ppf "sqrt_scalar_f64 %a" printreg arg.(0) - | Sqrt_scalar_f32 -> fprintf ppf "sqrt_scalar_f32 %a" printreg arg.(0) | Min_scalar_f64 -> fprintf ppf "min_scalar_f64 %a %a" printreg arg.(0) printreg arg.(1) | Max_scalar_f64 -> @@ -853,6 +867,9 @@ let print_operation_sse41 printreg op ppf arg = | Round_scalar_f64 i -> fprintf ppf "round_scalar_f64[%a] %a" print_float_rounding i printreg arg.(0) + | Round_scalar_f32 i -> + fprintf ppf "round_scalar_f32[%a] %a" print_float_rounding i printreg + arg.(0) | Round_f64 i -> fprintf ppf "round_f64[%a] %a" print_float_rounding i printreg arg.(0) | Round_f32 i -> @@ -912,18 +929,20 @@ let class_of_operation_clmul = function Clmul_64 _ -> Pure let class_of_operation_bmi2 = function Deposit_64 | Extract_64 -> Pure let class_of_operation_sse = function - | Cmp_f32 _ | Add_f32 | Sub_f32 | Mul_f32 | Div_f32 | Max_f32 | Min_f32 - | Rcp_f32 | Sqrt_f32 | Rsqrt_f32 | High_64_to_low_64 | Low_64_to_high_64 - | Interleave_high_32 | Interleave_low_32 | Interleave_low_32_regs - | Movemask_32 | Shuffle_32 _ -> + | Round_current_f32_i64 + (* CR-someday mslater: (SIMD) reads current rounding mode *) + | Sqrt_scalar_f32 | Min_scalar_f32 | Max_scalar_f32 | Cmp_f32 _ | Add_f32 + | Sub_f32 | Mul_f32 | Div_f32 | Max_f32 | Min_f32 | Rcp_f32 | Sqrt_f32 + | Rsqrt_f32 | High_64_to_low_64 | Low_64_to_high_64 | Interleave_high_32 + | Interleave_low_32 | Interleave_low_32_regs | Movemask_32 | Shuffle_32 _ -> Pure let class_of_operation_sse2 = function | Round_current_f64_i64 (* CR-someday mslater: (SIMD) reads current rounding mode *) | Add_i8 | Add_i16 | Add_i32 | Add_i64 | Add_f64 | Add_saturating_i8 - | Min_scalar_f64 | Max_scalar_f64 | Sqrt_scalar_f64 | Sqrt_scalar_f32 - | Sqrt_f64 | Add_saturating_i16 | Add_saturating_unsigned_i8 + | Min_scalar_f64 | Max_scalar_f64 | Sqrt_scalar_f64 | Sqrt_f64 + | Add_saturating_i16 | Add_saturating_unsigned_i8 | Add_saturating_unsigned_i16 | Sub_i8 | Sub_i16 | Sub_i32 | Sub_i64 | Sub_f64 | Sub_saturating_i8 | Sub_saturating_i16 | Sub_saturating_unsigned_i8 | Sub_saturating_unsigned_i16 | Max_unsigned_i8 | Max_i16 | Max_f64 @@ -962,8 +981,8 @@ let class_of_operation_sse41 = function | Extract_i32 _ | Extract_i64 _ | Insert_i8 _ | Insert_i16 _ | Insert_i32 _ | Insert_i64 _ | Max_i8 | Max_i32 | Max_unsigned_i16 | Max_unsigned_i32 | Min_i8 | Min_i32 | Min_unsigned_i16 | Min_unsigned_i32 | Round_f64 _ - | Round_scalar_f64 _ | Round_f32 _ | Multi_sad_unsigned_i8 _ - | Minpos_unsigned_i16 | Mullo_i32 -> + | Round_scalar_f64 _ | Round_scalar_f32 _ | Round_f32 _ + | Multi_sad_unsigned_i8 _ | Minpos_unsigned_i16 | Mullo_i32 -> Pure let class_of_operation_sse42 = function diff --git a/backend/amd64/simd_proc.ml b/backend/amd64/simd_proc.ml index 24ba56b241e..e8a752cb31a 100644 --- a/backend/amd64/simd_proc.ml +++ b/backend/amd64/simd_proc.ml @@ -39,10 +39,12 @@ let register_behavior_clmul = function Clmul_64 _ -> R_RM_to_fst let register_behavior_bmi2 = function Extract_64 | Deposit_64 -> R_RM_to_R let register_behavior_sse = function - | Cmp_f32 _ | Add_f32 | Sub_f32 | Mul_f32 | Div_f32 | Max_f32 | Min_f32 - | Interleave_low_32 | Interleave_high_32 | Shuffle_32 _ -> + | Min_scalar_f32 | Max_scalar_f32 | Cmp_f32 _ | Add_f32 | Sub_f32 | Mul_f32 + | Div_f32 | Max_f32 | Min_f32 | Interleave_low_32 | Interleave_high_32 + | Shuffle_32 _ -> R_RM_to_fst - | Rcp_f32 | Sqrt_f32 | Rsqrt_f32 -> RM_to_R + | Round_current_f32_i64 | Sqrt_scalar_f32 | Rcp_f32 | Sqrt_f32 | Rsqrt_f32 -> + RM_to_R | Interleave_low_32_regs | High_64_to_low_64 | Low_64_to_high_64 -> R_R_to_fst | Movemask_32 -> R_to_R @@ -64,7 +66,7 @@ let register_behavior_sse2 = function R_RM_to_fst | Shuffle_high_16 _ | Shuffle_low_16 _ | I32_to_f64 | I32_to_f32 | F64_to_i32 | Round_current_f64_i64 | F64_to_f32 | F32_to_i32 | F32_to_f64 | Sqrt_f64 - | Sqrt_scalar_f64 | Sqrt_scalar_f32 -> + | Sqrt_scalar_f64 -> RM_to_R | SLLi_i16 _ | SLLi_i32 _ | SLLi_i64 _ | SRLi_i16 _ | SRLi_i32 _ | SRLi_i64 _ | SRAi_i16 _ | SRAi_i32 _ | Shift_left_bytes _ | Shift_right_bytes _ -> @@ -91,7 +93,8 @@ let register_behavior_sse41 = function R_RM_to_fst | I8_sx_i16 | I8_sx_i32 | I8_sx_i64 | I16_sx_i32 | I16_sx_i64 | I32_sx_i64 | I8_zx_i16 | I8_zx_i32 | I8_zx_i64 | I16_zx_i32 | I16_zx_i64 | I32_zx_i64 - | Round_f64 _ | Round_f32 _ | Minpos_unsigned_i16 | Round_scalar_f64 _ -> + | Round_f64 _ | Round_f32 _ | Minpos_unsigned_i16 | Round_scalar_f64 _ + | Round_scalar_f32 _ -> RM_to_R | Blendv_8 | Blendv_32 | Blendv_64 -> R_RM_xmm0_to_fst | Extract_i64 _ | Extract_i32 _ -> R_to_RM diff --git a/backend/amd64/simd_selection.ml b/backend/amd64/simd_selection.ml index a054a219c50..f6cf760eeca 100644 --- a/backend/amd64/simd_selection.ml +++ b/backend/amd64/simd_selection.ml @@ -78,6 +78,10 @@ let select_operation_bmi2 op args = let select_operation_sse op args = match op with + | "caml_sse_float32_sqrt" | "sqrtf" -> Some (Sqrt_scalar_f32, args) + | "caml_sse_float32_max" -> Some (Max_scalar_f32, args) + | "caml_sse_float32_min" -> Some (Min_scalar_f32, args) + | "caml_sse_cast_float32_int64" -> Some (Round_current_f32_i64, args) | "caml_sse_float32x4_cmp" -> let i, args = extract_constant args ~max:7 op in Some (Cmp_f32 (float_condition_of_int i), args) @@ -103,7 +107,6 @@ let select_operation_sse op args = let select_operation_sse2 op args = match op with | "caml_sse2_float64_sqrt" | "sqrt" -> Some (Sqrt_scalar_f64, args) - | "caml_sse2_float32_sqrt" | "sqrtf" -> Some (Sqrt_scalar_f32, args) | "caml_sse2_float64_max" -> Some (Max_scalar_f64, args) | "caml_sse2_float64_min" -> Some (Min_scalar_f64, args) | "caml_sse2_cast_float64_int64" -> Some (Round_current_f64_i64, args) @@ -335,8 +338,18 @@ let select_operation_sse41 op args = let i, args = extract_constant args ~max:15 op in Some (Round_f64 (float_rounding_of_int i), args) | "caml_sse41_float64_round" -> + (* CR-someday mslater: the following CR also applies here, but this + builtin is not exposed by any of the stdlib libraries. *) let i, args = extract_constant args ~max:15 op in Some (Round_scalar_f64 (float_rounding_of_int i), args) + | "caml_sse41_float32_round" -> + (* CR-someday mslater: this builtin is exposed by float32.ml, so must + actually be cross-platform. Currently, non-amd64 architectures will + fall back to a C implementation. If we want the arm64 backend to + specialize it, we should redefine the constant mapping from the amd64 + values to a new sum type. *) + let i, args = extract_constant args ~max:15 op in + Some (Round_scalar_f32 (float_rounding_of_int i), args) | "caml_sse41_int8x16_max" -> Some (Max_i8, args) | "caml_sse41_int32x4_max" -> Some (Max_i32, args) | "caml_sse41_int16x8_max_unsigned" -> Some (Max_unsigned_i16, args) diff --git a/backend/x86_ast.mli b/backend/x86_ast.mli index a200c0b43e4..a19a3d20362 100644 --- a/backend/x86_ast.mli +++ b/backend/x86_ast.mli @@ -109,6 +109,8 @@ type arg = | Mem64_RIP of data_type * string * int type sse_instruction = + | MINSS of arg * arg + | MAXSS of arg * arg | CMPPS of float_condition * arg * arg | SHUFPS of arg * arg * arg | ADDPS of arg * arg @@ -280,6 +282,7 @@ type sse41_instruction = | PMINUD of arg * arg | ROUNDPD of rounding * arg * arg | ROUNDPS of rounding * arg * arg + | ROUNDSS of rounding * arg * arg | MPSADBW of arg * arg * arg | PHMINPOSUW of arg * arg | PMULLD of arg * arg diff --git a/backend/x86_binary_emitter.ml b/backend/x86_binary_emitter.ml index 097a3d42210..f6c08a9ac56 100644 --- a/backend/x86_binary_emitter.ml +++ b/backend/x86_binary_emitter.ml @@ -1091,6 +1091,7 @@ let emit_dpps = suffix emit_osize_rf_rfm_3A 0x40 let emit_dppd = suffix emit_osize_rf_rfm_3A 0x41 let emit_roundps = suffix emit_osize_rf_rfm_3A 0x08 let emit_roundpd = suffix emit_osize_rf_rfm_3A 0x09 +let emit_roundss = suffix emit_osize_rf_rfm_3A 0x0A let emit_pmulhw = emit_osize_rf_rfm 0xE5 let emit_pmulhuw = emit_osize_rf_rfm 0xE4 @@ -1754,6 +1755,20 @@ let emit_mfence b = buf_opcodes b [ 0x0F; 0xAE; 0xF0 ] let emit_leave b = buf_int8 b 0xC9 +let emit_maxss b ~dst ~src = + match (dst, src) with + | (Regf reg, ((Regf _ | Mem _ | Mem64_RIP _) as rm)) -> + buf_int8 b 0xF3; + emit_mod_rm_reg b no_rex [ 0x0F; 0x5F ] rm (rd_of_regf reg) + | _ -> assert false + +let emit_minss b ~dst ~src = + match (dst, src) with + | (Regf reg, ((Regf _ | Mem _ | Mem64_RIP _) as rm)) -> + buf_int8 b 0xF3; + emit_mod_rm_reg b no_rex [ 0x0F; 0x5D ] rm (rd_of_regf reg) + | _ -> assert false + let emit_maxsd b ~dst ~src = match (dst, src) with | (Regf reg, ((Regf _ | Mem _ | Mem64_RIP _) as rm)) -> @@ -1933,6 +1948,8 @@ let assemble_instr b loc = function | XORPS (src, dst) -> emit_xor_float ~width:Cmm.Float32 b dst src | ANDPS (src, dst) -> emit_and_float ~width:Cmm.Float32 b dst src | CMPSS (condition, src, dst) -> emit_cmp_float ~width:Cmm.Float32 b ~condition ~dst ~src + | SSE MINSS (src, dst) -> emit_minss b ~dst ~src + | SSE MAXSS (src, dst) -> emit_maxss b ~dst ~src | SSE CMPPS (cmp, src, dst) -> emit_cmpps b (imm8_of_float_condition cmp) dst src | SSE ADDPS (src, dst) -> emit_addps b dst src | SSE SUBPS (src, dst) -> emit_subps b dst src @@ -2095,6 +2112,7 @@ let assemble_instr b loc = function | SSE41 PMINUD (src, dst) -> emit_pminud b dst src | SSE41 ROUNDPD (n, src, dst) -> emit_roundpd b (imm8_of_rounding n) dst src | SSE41 ROUNDPS (n, src, dst) -> emit_roundps b (imm8_of_rounding n) dst src + | SSE41 ROUNDSS (n, src, dst) -> emit_roundss b (imm8_of_rounding n) dst src | SSE41 PHMINPOSUW (src, dst) -> emit_phminposuw b dst src | SSE41 PMULLD (src, dst) -> emit_pmulld b dst src | SSE41 MPSADBW (n, src, dst) -> emit_mpsadbw b (imm n) dst src diff --git a/backend/x86_dsl.ml b/backend/x86_dsl.ml index ab9d37c6e0f..bd4cdadfe87 100644 --- a/backend/x86_dsl.ml +++ b/backend/x86_dsl.ml @@ -205,6 +205,8 @@ module I = struct let andps x y = emit (ANDPS (x, y)) let cmpss i x y = emit (CMPSS (i, x, y)) + let minss x y = emit (SSE (MINSS (x, y))) + let maxss x y = emit (SSE (MAXSS (x, y))) let cmpps i x y = emit (SSE (CMPPS (i, x, y))) let shufps i x y = emit (SSE (SHUFPS (i, x, y))) let addps x y = emit (SSE (ADDPS (x, y))) @@ -372,6 +374,7 @@ module I = struct let pminud x y = emit (SSE41 (PMINUD (x, y))) let roundpd i x y = emit (SSE41 (ROUNDPD (i, x, y))) let roundps i x y = emit (SSE41 (ROUNDPS (i, x, y))) + let roundss i x y = emit (SSE41 (ROUNDSS (i, x, y))) let mpsadbw i x y = emit (SSE41 (MPSADBW (i, x, y))) let phminposuw x y = emit (SSE41 (PHMINPOSUW (x, y))) let pmulld x y = emit (SSE41 (PMULLD (x, y))) diff --git a/backend/x86_dsl.mli b/backend/x86_dsl.mli index 58bef19032e..08152d9296b 100644 --- a/backend/x86_dsl.mli +++ b/backend/x86_dsl.mli @@ -200,6 +200,8 @@ module I : sig (* Float32 arithmetic *) + val minss: arg -> arg -> unit + val maxss: arg -> arg -> unit val addss: arg -> arg -> unit val subss: arg -> arg -> unit val mulss: arg -> arg -> unit @@ -388,6 +390,7 @@ module I : sig val pminud: arg -> arg -> unit val roundpd: rounding -> arg -> arg -> unit val roundps: rounding -> arg -> arg -> unit + val roundss: rounding -> arg -> arg -> unit val mpsadbw: arg -> arg -> arg -> unit val phminposuw: arg -> arg -> unit val pmulld: arg -> arg -> unit diff --git a/backend/x86_gas.ml b/backend/x86_gas.ml index d8babc4f606..9bd0661b65f 100644 --- a/backend/x86_gas.ml +++ b/backend/x86_gas.ml @@ -219,6 +219,8 @@ let print_instr b = function | XORPS (arg1, arg2) -> i2 b "xorps" arg1 arg2 | ANDPS (arg1, arg2) -> i2 b "andps" arg1 arg2 | CMPSS (cmp, arg1, arg2) -> i2 b ("cmp" ^ string_of_float_condition cmp ^ "ss") arg1 arg2 + | SSE MINSS (arg1, arg2) -> i2 b "minss" arg1 arg2 + | SSE MAXSS (arg1, arg2) -> i2 b "maxss" arg1 arg2 | SSE CMPPS (cmp, arg1, arg2) -> i2 b ("cmp" ^ string_of_float_condition cmp ^ "ps") arg1 arg2 | SSE SHUFPS (shuf, arg1, arg2) -> i3 b "shufps" shuf arg1 arg2 | SSE ADDPS (arg1, arg2) -> i2 b "addps" arg1 arg2 @@ -382,6 +384,7 @@ let print_instr b = function | SSE41 PMINUD (arg1, arg2) -> i2 b "pminud" arg1 arg2 | SSE41 ROUNDPD (rd, arg1, arg2) -> i3 b "roundpd" (imm_of_rounding rd) arg1 arg2 | SSE41 ROUNDPS (rd, arg1, arg2) -> i3 b "roundps" (imm_of_rounding rd) arg1 arg2 + | SSE41 ROUNDSS (rd, arg1, arg2) -> i3 b "roundss" (imm_of_rounding rd) arg1 arg2 | SSE41 MPSADBW (n, arg1, arg2) -> i3 b "mpsadbw" n arg1 arg2 | SSE41 PHMINPOSUW (arg1, arg2) -> i2 b "phminposuw" arg1 arg2 | SSE41 PMULLD (arg1, arg2) -> i2 b "pmulld" arg1 arg2 diff --git a/backend/x86_masm.ml b/backend/x86_masm.ml index ed9527b2331..a510690c351 100644 --- a/backend/x86_masm.ml +++ b/backend/x86_masm.ml @@ -221,6 +221,8 @@ let print_instr b = function | CMPSS (cmp, arg1, arg2) -> i2 b ("cmp" ^ string_of_float_condition cmp ^ "ss") arg1 arg2 | SSE CMPPS (cmp, arg1, arg2) -> i2 b ("cmp" ^ string_of_float_condition cmp ^ "ps") arg1 arg2 | SSE SHUFPS (shuf, arg1, arg2) -> i3 b "shufps" shuf arg1 arg2 + | SSE MINSS (arg1, arg2) -> i2 b "minss" arg1 arg2 + | SSE MAXSS (arg1, arg2) -> i2 b "maxss" arg1 arg2 | SSE ADDPS (arg1, arg2) -> i2 b "addps" arg1 arg2 | SSE SUBPS (arg1, arg2) -> i2 b "subps" arg1 arg2 | SSE MULPS (arg1, arg2) -> i2 b "mulps" arg1 arg2 @@ -382,6 +384,7 @@ let print_instr b = function | SSE41 PMINUD (arg1, arg2) -> i2 b "pminud" arg1 arg2 | SSE41 ROUNDPD (rd, arg1, arg2) -> i3 b "roundpd" (imm_of_rounding rd) arg1 arg2 | SSE41 ROUNDPS (rd, arg1, arg2) -> i3 b "roundps" (imm_of_rounding rd) arg1 arg2 + | SSE41 ROUNDSS (rd, arg1, arg2) -> i3 b "roundss" (imm_of_rounding rd) arg1 arg2 | SSE41 MPSADBW (n, arg1, arg2) -> i3 b "mpsadbw" n arg1 arg2 | SSE41 PHMINPOSUW (arg1, arg2) -> i2 b "phminposuw" arg1 arg2 | SSE41 PMULLD (arg1, arg2) -> i2 b "pmulld" arg1 arg2 diff --git a/ocaml/otherlibs/stdlib_beta/float32.ml b/ocaml/otherlibs/stdlib_beta/float32.ml index 949a17ef321..bfa26e8d864 100644 --- a/ocaml/otherlibs/stdlib_beta/float32.ml +++ b/ocaml/otherlibs/stdlib_beta/float32.ml @@ -246,6 +246,16 @@ let[@inline] max (x : t) (y : t) = else if is_nan y then y else x +module With_weird_nan_behavior = struct + external min : t -> t -> t + = "caml_sse_float32_min_bytecode" "caml_sse_float32_min" + [@@noalloc] [@@unboxed] [@@builtin] + + external max : t -> t -> t + = "caml_sse_float32_max_bytecode" "caml_sse_float32_max" + [@@noalloc] [@@unboxed] [@@builtin] +end + let[@inline] min_max (x : t) (y : t) = if is_nan x || is_nan y then (nan, nan) else if y > x || ((not (sign_bit y)) && sign_bit x) then (x, y) @@ -267,6 +277,24 @@ let[@inline] min_max_num (x : t) (y : t) = else if y > x || ((not (sign_bit y)) && sign_bit x) then (x, y) else (y, x) +external iround_half_to_even : t -> int64 + = "caml_sse_cast_float32_int64_bytecode" "caml_sse_cast_float32_int64" + [@@noalloc] [@@unboxed] [@@builtin] + +external round_intrinsic : (int[@untagged]) -> (t[@unboxed]) -> (t[@unboxed]) + = "caml_sse41_float32_round_bytecode" "caml_sse41_float32_round" + [@@noalloc] [@@builtin] + +(* On amd64, these constants also imply _MM_FROUND_NO_EXC (suppress exceptions). *) +let round_neg_inf = 0x9 +let round_pos_inf = 0xA +let round_zero = 0xB +let round_current_mode = 0xC +let[@inline] round_half_to_even x = round_intrinsic round_current_mode x +let[@inline] round_down x = round_intrinsic round_neg_inf x +let[@inline] round_up x = round_intrinsic round_pos_inf x +let[@inline] round_towards_zero x = round_intrinsic round_zero x + external seeded_hash_param : int -> int -> int -> 'a -> int = "caml_hash_exn" [@@noalloc] diff --git a/ocaml/otherlibs/stdlib_beta/float32.mli b/ocaml/otherlibs/stdlib_beta/float32.mli index 3f90464182b..10fa57c795b 100644 --- a/ocaml/otherlibs/stdlib_beta/float32.mli +++ b/ocaml/otherlibs/stdlib_beta/float32.mli @@ -374,17 +374,14 @@ external erfc : t -> t = "caml_erfc_float32_bytecode" "erfcf" external trunc : t -> t = "caml_trunc_float32_bytecode" "truncf" [@@unboxed] [@@noalloc] (** [trunc x] rounds [x] to the nearest integer whose absolute value is - less than or equal to [x]. *) + less than or equal to [x]. *) external round : t -> t = "caml_round_float32_bytecode" "roundf" [@@unboxed] [@@noalloc] (** [round x] rounds [x] to the nearest integer with ties (fractional - values of 0.5s) rounded away from zero, regardless of the current - rounding direction. If [x] is an integer, [+0.s], [-0.s], [nan], or - infinite, [x] itself is returned. - - On 64-bit mingw-w64, this function may be emulated owing to a bug in the - C runtime library (CRT) on this platform. *) + values of 0.5s) rounded away from zero, regardless of the current + rounding direction. If [x] is an integer, [+0.s], [-0.s], [nan], or + infinite, [x] itself is returned. *) external ceil : t -> t = "caml_ceil_float32_bytecode" "ceilf" [@@unboxed] [@@noalloc] @@ -461,6 +458,24 @@ val max : t -> t -> t (** [max x y] returns the maximum of [x] and [y]. It returns [nan] when [x] or [y] is [nan]. Moreover [max (-0.s) (+0.s) = +0.s] *) +module With_weird_nan_behavior : sig + external min : t -> t -> t + = "caml_sse_float32_min_bytecode" "caml_sse_float32_min" + [@@noalloc] [@@unboxed] [@@builtin] + (** [min x y] returns the minimum of [x] and [y]. + If either [x] or [y] is [nan], [y] is returned. + If both [x] and [y] equal zero, [y] is returned. + The amd64 flambda-backend compiler translates this call to MINSS. *) + + external max : t -> t -> t + = "caml_sse_float32_max_bytecode" "caml_sse_float32_max" + [@@noalloc] [@@unboxed] [@@builtin] + (** [max x y] returns the maximum of [x] and [y]. + If either [x] or [y] is [nan], [y] is returned. + If both [x] and [y] equal zero, [y] is returned. + The amd64 flambda-backend compiler translates this call to MAXSS. *) +end + val min_max : t -> t -> t * t (** [min_max x y] is [(min x y, max x y)], just more efficient. *) @@ -479,6 +494,34 @@ val min_max_num : t -> t -> t * t efficient. Note that in particular [min_max_num x nan = (x, x)] and [min_max_num nan y = (y, y)]. *) +external iround_half_to_even : t -> int64 + = "caml_sse_cast_float32_int64_bytecode" "caml_sse_cast_float32_int64" + [@@noalloc] [@@unboxed] [@@builtin] +(** Rounds a [float32] to an [int64] using the current rounding mode. The default + rounding mode is "round half to even", and we expect that no program will + change the rounding mode. + If the argument is NaN or infinite or if the rounded value cannot be + represented, then the result is unspecified. + The amd64 flambda-backend compiler translates this call to CVTSS2SI. *) + +val round_half_to_even : t -> t +(** Rounds a [float32] to an integer [float32] using the current rounding + mode. The default rounding mode is "round half to even", and we + expect that no program will change the rounding mode. + The amd64 flambda-backend compiler translates this call to ROUNDSS. *) + +val round_down : t -> t +(** Rounds a [float32] down to the next integer [float32] toward negative infinity. + The amd64 flambda-backend compiler translates this call to ROUNDSS.*) + +val round_up : t -> t +(** Rounds a [float32] up to the next integer [float32] toward positive infinity. + The amd64 flambda-backend compiler translates this call to ROUNDSS.*) + +val round_towards_zero : t -> t +(** Rounds a [float32] to the next integer [float32] toward zero. + The amd64 flambda-backend compiler translates this call to ROUNDSS.*) + val seeded_hash : int -> t -> int (** A seeded hash function for floats, with the same output value as {!Hashtbl.seeded_hash}. This function allows this module to be passed as diff --git a/ocaml/otherlibs/stdlib_beta/float32_u.ml b/ocaml/otherlibs/stdlib_beta/float32_u.ml index 5fc35f491c6..8ae6fcea3d5 100644 --- a/ocaml/otherlibs/stdlib_beta/float32_u.ml +++ b/ocaml/otherlibs/stdlib_beta/float32_u.ml @@ -184,6 +184,22 @@ let[@inline always] min x y = of_float32 (Float32.min (to_float32 x) (to_float32 let[@inline always] max x y = of_float32 (Float32.max (to_float32 x) (to_float32 y)) +module With_weird_nan_behavior = struct + let[@inline always] min x y = of_float32 (Float32.With_weird_nan_behavior.min (to_float32 x) (to_float32 y)) + + let[@inline always] max x y = of_float32 (Float32.With_weird_nan_behavior.max (to_float32 x) (to_float32 y)) +end + let[@inline always] min_num x y = of_float32 (Float32.min_num (to_float32 x) (to_float32 y)) let[@inline always] max_num x y = of_float32 (Float32.max_num (to_float32 x) (to_float32 y)) + +let iround_half_to_even x = unbox_int64 (Float32.iround_half_to_even (to_float32 x)) + +let round_half_to_even x = of_float32 (Float32.round_half_to_even (to_float32 x)) + +let round_down x = of_float32 (Float32.round_down (to_float32 x)) + +let round_up x = of_float32 (Float32.round_up (to_float32 x)) + +let round_towards_zero x = of_float32 (Float32.round_towards_zero (to_float32 x)) diff --git a/ocaml/otherlibs/stdlib_beta/float32_u.mli b/ocaml/otherlibs/stdlib_beta/float32_u.mli index 154755ee386..0ce0538626f 100644 --- a/ocaml/otherlibs/stdlib_beta/float32_u.mli +++ b/ocaml/otherlibs/stdlib_beta/float32_u.mli @@ -364,6 +364,20 @@ val max : t -> t -> t (** [max x y] returns the maximum of [x] and [y]. It returns [nan] when [x] or [y] is [nan]. Moreover [max #-0.s #+0.s = #+0.s] *) +module With_weird_nan_behavior : sig + val min : t -> t -> t + (** [min x y] returns the minimum of [x] and [y]. + If either [x] or [y] is [nan], [y] is returned. + If both [x] and [y] equal zero, [y] is returned. + The amd64 flambda-backend compiler translates this call to MINSS. *) + + val max : t -> t -> t + (** [max x y] returns the maximum of [x] and [y]. + If either [x] or [y] is [nan], [y] is returned. + If both [x] and [y] equal zero, [y] is returned. + The amd64 flambda-backend compiler translates this call to MAXSS. *) +end + val min_num : t -> t -> t (** [min_num x y] returns the minimum of [x] and [y] treating [nan] as missing values. If both [x] and [y] are [nan], [nan] is returned. @@ -374,5 +388,31 @@ val max_num : t -> t -> t missing values. If both [x] and [y] are [nan] [nan] is returned. Moreover [max_num #-0.s #+0.s = #+0.s] *) +val iround_half_to_even : t -> int64# +(** Rounds a [float32#] to an [int64#] using the current rounding mode. The default + rounding mode is "round half to even", and we expect that no program will + change the rounding mode. + If the argument is NaN or infinite or if the rounded value cannot be + represented, then the result is unspecified. + The amd64 flambda-backend compiler translates this call to CVTSS2SI. *) + +val round_half_to_even : t -> t +(** Rounds a [float32#] to an integer [float32#] using the current rounding + mode. The default rounding mode is "round half to even", and we + expect that no program will change the rounding mode. + The amd64 flambda-backend compiler translates this call to ROUNDSS. *) + +val round_down : t -> t +(** Rounds a [float32#] down to the next integer [float32#] toward negative infinity. + The amd64 flambda-backend compiler translates this call to ROUNDSS.*) + +val round_up : t -> t +(** Rounds a [float32#] up to the next integer [float32#] toward positive infinity. + The amd64 flambda-backend compiler translates this call to ROUNDSS.*) + +val round_towards_zero : t -> t +(** Rounds a [float32#] to the next integer [float32#] toward zero. + The amd64 flambda-backend compiler translates this call to ROUNDSS.*) + (* CR layouts v5: add back hash when we deal with the ad-hoc polymorphic functions. *) diff --git a/ocaml/runtime/float32.c b/ocaml/runtime/float32.c index 58e029f9364..6dd946a3ab5 100644 --- a/ocaml/runtime/float32.c +++ b/ocaml/runtime/float32.c @@ -309,6 +309,51 @@ CAMLprim value caml_ldexp_float32_bytecode(value f, value i) return caml_copy_float32(caml_ldexp_float32(Float32_val(f), Int_val(i))); } +float caml_sse_float32_min(float x, float y) { + return x < y ? x : y; +} + +CAMLprim value caml_sse_float32_min_bytecode(value x, value y) { + return Float32_val(x) < Float32_val(y) ? x : y; +} + +float caml_sse_float32_max(float x, float y) { + return x > y ? x : y; +} + +CAMLprim value caml_sse_float32_max_bytecode(value x, value y) { + return Float32_val(x) > Float32_val(y) ? x : y; +} + +int64_t caml_sse_cast_float32_int64(float f) +{ + return llrintf(f); +} + +CAMLprim value caml_sse_cast_float32_int64_bytecode(value f) +{ + return caml_copy_int64(caml_sse_cast_float32_int64(Float32_val(f))); +} + +#define ROUND_NEG_INF 0x9 +#define ROUND_POS_INF 0xA +#define ROUND_ZERO 0xB +#define ROUND_CURRENT 0xC + +float caml_sse41_float32_round(int mode, float f) { + switch(mode) { + case ROUND_NEG_INF: return floorf(f); + case ROUND_POS_INF: return ceilf(f); + case ROUND_ZERO: return truncf(f); + case ROUND_CURRENT: return rintf(f); + default: caml_fatal_error("Unknown rounding mode."); + } +} + +CAMLprim value caml_sse41_float32_round_bytecode(value mode, value f) { + return caml_copy_float32(caml_sse41_float32_round(Int_val(mode), Float32_val(f))); +} + enum { FP_normal, FP_subnormal, FP_zero, FP_infinite, FP_nan }; value caml_classify_float32(float vf) diff --git a/ocaml/runtime4/float32.c b/ocaml/runtime4/float32.c index 931dd960978..228492e2540 100644 --- a/ocaml/runtime4/float32.c +++ b/ocaml/runtime4/float32.c @@ -309,6 +309,51 @@ CAMLprim value caml_ldexp_float32_bytecode(value f, value i) return caml_copy_float32(caml_ldexp_float32(Float32_val(f), Int_val(i))); } +float caml_sse_float32_min(float x, float y) { + return x < y ? x : y; +} + +CAMLprim value caml_sse_float32_min_bytecode(value x, value y) { + return Float32_val(x) < Float32_val(y) ? x : y; +} + +float caml_sse_float32_max(float x, float y) { + return x > y ? x : y; +} + +CAMLprim value caml_sse_float32_max_bytecode(value x, value y) { + return Float32_val(x) > Float32_val(y) ? x : y; +} + +int64_t caml_sse_cast_float32_int64(float f) +{ + return llrintf(f); +} + +CAMLprim value caml_sse_cast_float32_int64_bytecode(value f) +{ + return caml_copy_int64(caml_sse_cast_float32_int64(Float32_val(f))); +} + +#define ROUND_NEG_INF 0x9 +#define ROUND_POS_INF 0xA +#define ROUND_ZERO 0xB +#define ROUND_CURRENT 0xC + +float caml_sse41_float32_round(int mode, float f) { + switch(mode) { + case ROUND_NEG_INF: return floorf(f); + case ROUND_POS_INF: return ceilf(f); + case ROUND_ZERO: return truncf(f); + case ROUND_CURRENT: return rintf(f); + default: caml_fatal_error("Unknown rounding mode."); + } +} + +CAMLprim value caml_sse41_float32_round_bytecode(value mode, value f) { + return caml_copy_float32(caml_sse41_float32_round(Int_val(mode), Float32_val(f))); +} + enum { FP_normal, FP_subnormal, FP_zero, FP_infinite, FP_nan }; value caml_classify_float32(float vf) diff --git a/tests/small_numbers/float32_lib.ml b/tests/small_numbers/float32_lib.ml index 327e0c98f93..022a47ad2f1 100644 --- a/tests/small_numbers/float32_lib.ml +++ b/tests/small_numbers/float32_lib.ml @@ -102,11 +102,16 @@ module CF32 = struct external min : t -> t -> t = "float32_min_boxed" external max : t -> t -> t = "float32_max_boxed" + external min_weird : t -> t -> t = "float32_min_weird_boxed" + external max_weird : t -> t -> t = "float32_max_weird_boxed" external min_num : t -> t -> t = "float32_min_num_boxed" external max_num : t -> t -> t = "float32_max_num_boxed" external min_max : t -> t -> t * t = "float32_min_max_boxed" external min_max_num : t -> t -> t * t = "float32_min_max_num_boxed" + external round_current : t -> t = "float32_round_current_boxed" + external iround_current : t -> int64 = "float32_iround_current_boxed" + external compare : t -> t -> int = "float32_compare_boxed" [@@noalloc] let equal x y = compare x y = 0 @@ -230,6 +235,8 @@ let () = bit_eq (F32.copy_sign f1 f2) (CF32.copy_sign f1 f2); bit_eq (F32.min f1 f2) (CF32.min f1 f2); bit_eq (F32.max f1 f2) (CF32.max f1 f2); + bit_eq (F32.With_weird_nan_behavior.min f1 f2) (CF32.min_weird f1 f2); + bit_eq (F32.With_weird_nan_behavior.max f1 f2) (CF32.max_weird f1 f2); bit_eq (F32.min_num f1 f2) (CF32.min_num f1 f2); bit_eq (F32.max_num f1 f2) (CF32.max_num f1 f2); assert((F32.compare f1 f2) = (CF32.compare f1 f2)); @@ -237,6 +244,16 @@ let () = ) ;; +let () = + CF32.check_float32s (fun f _ -> + bit_eq (F32.round_up f) (CF32.ceil f); + bit_eq (F32.round_down f) (CF32.floor f); + bit_eq (F32.round_half_to_even f) (CF32.round_current f); + (* Returns int64, so can compare directly. *) + assert ((F32.iround_half_to_even f) = (CF32.iround_current f)); + ) +;; + let () = CF32.check_float32s (fun f _ -> let m, i = F32.frexp f in diff --git a/tests/small_numbers/float32_u_lib.ml b/tests/small_numbers/float32_u_lib.ml index 787a5fadd30..3fbbe6aec55 100644 --- a/tests/small_numbers/float32_u_lib.ml +++ b/tests/small_numbers/float32_u_lib.ml @@ -109,11 +109,16 @@ module CF32 = struct external min : t -> t -> t = "float32_min_boxed" external max : t -> t -> t = "float32_max_boxed" + external min_weird : t -> t -> t = "float32_min_weird_boxed" + external max_weird : t -> t -> t = "float32_max_weird_boxed" external min_num : t -> t -> t = "float32_min_num_boxed" external max_num : t -> t -> t = "float32_max_num_boxed" external min_max : t -> t -> t * t = "float32_min_max_boxed" external min_max_num : t -> t -> t * t = "float32_min_max_num_boxed" + external round_current : t -> t = "float32_round_current_boxed" + external iround_current : t -> int64 = "float32_iround_current_boxed" + external compare : t -> t -> int = "float32_compare_boxed" [@@noalloc] let equal x y = compare x y = 0 @@ -222,6 +227,8 @@ let () = bit_eq (F32.copy_sign u1 u2) (CF32.copy_sign f1 f2); bit_eq (F32.min u1 u2) (CF32.min f1 f2); bit_eq (F32.max u1 u2) (CF32.max f1 f2); + bit_eq (F32.With_weird_nan_behavior.min u1 u2) (CF32.min_weird f1 f2); + bit_eq (F32.With_weird_nan_behavior.max u1 u2) (CF32.max_weird f1 f2); bit_eq (F32.min_num u1 u2) (CF32.min_num f1 f2); bit_eq (F32.max_num u1 u2) (CF32.max_num f1 f2); assert((F32.compare u1 u2) = (CF32.compare f1 f2)); @@ -229,6 +236,17 @@ let () = ) ;; +let () = + CF32.check_float32s (fun f _ -> + let u = F32.of_float32 f in + bit_eq (F32.round_up u) (CF32.ceil f); + bit_eq (F32.round_down u) (CF32.floor f); + bit_eq (F32.round_half_to_even u) (CF32.round_current f); + (* Returns int64, so can compare directly. *) + assert (box_int64 (F32.iround_half_to_even u) = (CF32.iround_current f)); + ) +;; + let () = CF32.check_float32s (fun f _ -> let u = F32.of_float32 f in diff --git a/tests/small_numbers/stubs.c b/tests/small_numbers/stubs.c index be933a47efb..17c2db69709 100644 --- a/tests/small_numbers/stubs.c +++ b/tests/small_numbers/stubs.c @@ -192,6 +192,20 @@ value float32_max_boxed(value l, value r) return caml_copy_float32(fmaxf(f, g)); } +value float32_min_weird_boxed(value l, value r) +{ + float f = Float32_val(l); + float g = Float32_val(r); + return caml_copy_float32(f < g ? f : g); +} + +value float32_max_weird_boxed(value l, value r) +{ + float f = Float32_val(l); + float g = Float32_val(r); + return caml_copy_float32(f > g ? f : g); +} + value float32_min_num_boxed(value l, value r) { float f = Float32_val(l); @@ -312,3 +326,13 @@ value float32_classify_boxed(value f) { return float32_classify(Float32_val(f)); } + +value float32_round_current_boxed(value f) +{ + return caml_copy_float32(rintf(Float32_val(f))); +} + +value float32_iround_current_boxed(value f) +{ + return caml_copy_int64(llrintf(Float32_val(f))); +}