Skip to content

Commit

Permalink
flambda-backend: Float32 min/max/rounding intrinsics (#2684)
Browse files Browse the repository at this point in the history
  • Loading branch information
TheNumbat authored Jun 13, 2024
1 parent 93f2ff8 commit e48d83c
Show file tree
Hide file tree
Showing 6 changed files with 224 additions and 7 deletions.
28 changes: 28 additions & 0 deletions otherlibs/stdlib_beta/float32.ml
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,16 @@ let[@inline] max (x : t) (y : t) =
else if is_nan y then y
else x

module With_weird_nan_behavior = struct
external min : t -> t -> t
= "caml_sse_float32_min_bytecode" "caml_sse_float32_min"
[@@noalloc] [@@unboxed] [@@builtin]

external max : t -> t -> t
= "caml_sse_float32_max_bytecode" "caml_sse_float32_max"
[@@noalloc] [@@unboxed] [@@builtin]
end

let[@inline] min_max (x : t) (y : t) =
if is_nan x || is_nan y then (nan, nan)
else if y > x || ((not (sign_bit y)) && sign_bit x) then (x, y)
Expand All @@ -267,6 +277,24 @@ let[@inline] min_max_num (x : t) (y : t) =
else if y > x || ((not (sign_bit y)) && sign_bit x) then (x, y)
else (y, x)

external iround_half_to_even : t -> int64
= "caml_sse_cast_float32_int64_bytecode" "caml_sse_cast_float32_int64"
[@@noalloc] [@@unboxed] [@@builtin]

external round_intrinsic : (int[@untagged]) -> (t[@unboxed]) -> (t[@unboxed])
= "caml_sse41_float32_round_bytecode" "caml_sse41_float32_round"
[@@noalloc] [@@builtin]

(* On amd64, these constants also imply _MM_FROUND_NO_EXC (suppress exceptions). *)
let round_neg_inf = 0x9
let round_pos_inf = 0xA
let round_zero = 0xB
let round_current_mode = 0xC
let[@inline] round_half_to_even x = round_intrinsic round_current_mode x
let[@inline] round_down x = round_intrinsic round_neg_inf x
let[@inline] round_up x = round_intrinsic round_pos_inf x
let[@inline] round_towards_zero x = round_intrinsic round_zero x

external seeded_hash_param : int -> int -> int -> 'a -> int = "caml_hash_exn"
[@@noalloc]

Expand Down
57 changes: 50 additions & 7 deletions otherlibs/stdlib_beta/float32.mli
Original file line number Diff line number Diff line change
Expand Up @@ -374,17 +374,14 @@ external erfc : t -> t = "caml_erfc_float32_bytecode" "erfcf"
external trunc : t -> t = "caml_trunc_float32_bytecode" "truncf"
[@@unboxed] [@@noalloc]
(** [trunc x] rounds [x] to the nearest integer whose absolute value is
less than or equal to [x]. *)
less than or equal to [x]. *)

external round : t -> t = "caml_round_float32_bytecode" "roundf"
[@@unboxed] [@@noalloc]
(** [round x] rounds [x] to the nearest integer with ties (fractional
values of 0.5s) rounded away from zero, regardless of the current
rounding direction. If [x] is an integer, [+0.s], [-0.s], [nan], or
infinite, [x] itself is returned.
On 64-bit mingw-w64, this function may be emulated owing to a bug in the
C runtime library (CRT) on this platform. *)
values of 0.5s) rounded away from zero, regardless of the current
rounding direction. If [x] is an integer, [+0.s], [-0.s], [nan], or
infinite, [x] itself is returned. *)

external ceil : t -> t = "caml_ceil_float32_bytecode" "ceilf"
[@@unboxed] [@@noalloc]
Expand Down Expand Up @@ -461,6 +458,24 @@ val max : t -> t -> t
(** [max x y] returns the maximum of [x] and [y]. It returns [nan]
when [x] or [y] is [nan]. Moreover [max (-0.s) (+0.s) = +0.s] *)

module With_weird_nan_behavior : sig
external min : t -> t -> t
= "caml_sse_float32_min_bytecode" "caml_sse_float32_min"
[@@noalloc] [@@unboxed] [@@builtin]
(** [min x y] returns the minimum of [x] and [y].
If either [x] or [y] is [nan], [y] is returned.
If both [x] and [y] equal zero, [y] is returned.
The amd64 flambda-backend compiler translates this call to MINSS. *)

external max : t -> t -> t
= "caml_sse_float32_max_bytecode" "caml_sse_float32_max"
[@@noalloc] [@@unboxed] [@@builtin]
(** [max x y] returns the maximum of [x] and [y].
If either [x] or [y] is [nan], [y] is returned.
If both [x] and [y] equal zero, [y] is returned.
The amd64 flambda-backend compiler translates this call to MAXSS. *)
end

val min_max : t -> t -> t * t
(** [min_max x y] is [(min x y, max x y)], just more efficient. *)

Expand All @@ -479,6 +494,34 @@ val min_max_num : t -> t -> t * t
efficient. Note that in particular [min_max_num x nan = (x, x)]
and [min_max_num nan y = (y, y)]. *)

external iround_half_to_even : t -> int64
= "caml_sse_cast_float32_int64_bytecode" "caml_sse_cast_float32_int64"
[@@noalloc] [@@unboxed] [@@builtin]
(** Rounds a [float32] to an [int64] using the current rounding mode. The default
rounding mode is "round half to even", and we expect that no program will
change the rounding mode.
If the argument is NaN or infinite or if the rounded value cannot be
represented, then the result is unspecified.
The amd64 flambda-backend compiler translates this call to CVTSS2SI. *)

val round_half_to_even : t -> t
(** Rounds a [float32] to an integer [float32] using the current rounding
mode. The default rounding mode is "round half to even", and we
expect that no program will change the rounding mode.
The amd64 flambda-backend compiler translates this call to ROUNDSS. *)

val round_down : t -> t
(** Rounds a [float32] down to the next integer [float32] toward negative infinity.
The amd64 flambda-backend compiler translates this call to ROUNDSS.*)

val round_up : t -> t
(** Rounds a [float32] up to the next integer [float32] toward positive infinity.
The amd64 flambda-backend compiler translates this call to ROUNDSS.*)

val round_towards_zero : t -> t
(** Rounds a [float32] to the next integer [float32] toward zero.
The amd64 flambda-backend compiler translates this call to ROUNDSS.*)

val seeded_hash : int -> t -> int
(** A seeded hash function for floats, with the same output value as
{!Hashtbl.seeded_hash}. This function allows this module to be passed as
Expand Down
16 changes: 16 additions & 0 deletions otherlibs/stdlib_beta/float32_u.ml
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,22 @@ let[@inline always] min x y = of_float32 (Float32.min (to_float32 x) (to_float32

let[@inline always] max x y = of_float32 (Float32.max (to_float32 x) (to_float32 y))

module With_weird_nan_behavior = struct
let[@inline always] min x y = of_float32 (Float32.With_weird_nan_behavior.min (to_float32 x) (to_float32 y))

let[@inline always] max x y = of_float32 (Float32.With_weird_nan_behavior.max (to_float32 x) (to_float32 y))
end

let[@inline always] min_num x y = of_float32 (Float32.min_num (to_float32 x) (to_float32 y))

let[@inline always] max_num x y = of_float32 (Float32.max_num (to_float32 x) (to_float32 y))

let iround_half_to_even x = unbox_int64 (Float32.iround_half_to_even (to_float32 x))

let round_half_to_even x = of_float32 (Float32.round_half_to_even (to_float32 x))

let round_down x = of_float32 (Float32.round_down (to_float32 x))

let round_up x = of_float32 (Float32.round_up (to_float32 x))

let round_towards_zero x = of_float32 (Float32.round_towards_zero (to_float32 x))
40 changes: 40 additions & 0 deletions otherlibs/stdlib_beta/float32_u.mli
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,20 @@ val max : t -> t -> t
(** [max x y] returns the maximum of [x] and [y]. It returns [nan]
when [x] or [y] is [nan]. Moreover [max #-0.s #+0.s = #+0.s] *)

module With_weird_nan_behavior : sig
val min : t -> t -> t
(** [min x y] returns the minimum of [x] and [y].
If either [x] or [y] is [nan], [y] is returned.
If both [x] and [y] equal zero, [y] is returned.
The amd64 flambda-backend compiler translates this call to MINSS. *)

val max : t -> t -> t
(** [max x y] returns the maximum of [x] and [y].
If either [x] or [y] is [nan], [y] is returned.
If both [x] and [y] equal zero, [y] is returned.
The amd64 flambda-backend compiler translates this call to MAXSS. *)
end

val min_num : t -> t -> t
(** [min_num x y] returns the minimum of [x] and [y] treating [nan] as
missing values. If both [x] and [y] are [nan], [nan] is returned.
Expand All @@ -374,5 +388,31 @@ val max_num : t -> t -> t
missing values. If both [x] and [y] are [nan] [nan] is returned.
Moreover [max_num #-0.s #+0.s = #+0.s] *)

val iround_half_to_even : t -> int64#
(** Rounds a [float32#] to an [int64#] using the current rounding mode. The default
rounding mode is "round half to even", and we expect that no program will
change the rounding mode.
If the argument is NaN or infinite or if the rounded value cannot be
represented, then the result is unspecified.
The amd64 flambda-backend compiler translates this call to CVTSS2SI. *)

val round_half_to_even : t -> t
(** Rounds a [float32#] to an integer [float32#] using the current rounding
mode. The default rounding mode is "round half to even", and we
expect that no program will change the rounding mode.
The amd64 flambda-backend compiler translates this call to ROUNDSS. *)

val round_down : t -> t
(** Rounds a [float32#] down to the next integer [float32#] toward negative infinity.
The amd64 flambda-backend compiler translates this call to ROUNDSS.*)

val round_up : t -> t
(** Rounds a [float32#] up to the next integer [float32#] toward positive infinity.
The amd64 flambda-backend compiler translates this call to ROUNDSS.*)

val round_towards_zero : t -> t
(** Rounds a [float32#] to the next integer [float32#] toward zero.
The amd64 flambda-backend compiler translates this call to ROUNDSS.*)

(* CR layouts v5: add back hash when we deal with the ad-hoc polymorphic
functions. *)
45 changes: 45 additions & 0 deletions runtime/float32.c
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,51 @@ CAMLprim value caml_ldexp_float32_bytecode(value f, value i)
return caml_copy_float32(caml_ldexp_float32(Float32_val(f), Int_val(i)));
}

float caml_sse_float32_min(float x, float y) {
return x < y ? x : y;
}

CAMLprim value caml_sse_float32_min_bytecode(value x, value y) {
return Float32_val(x) < Float32_val(y) ? x : y;
}

float caml_sse_float32_max(float x, float y) {
return x > y ? x : y;
}

CAMLprim value caml_sse_float32_max_bytecode(value x, value y) {
return Float32_val(x) > Float32_val(y) ? x : y;
}

int64_t caml_sse_cast_float32_int64(float f)
{
return llrintf(f);
}

CAMLprim value caml_sse_cast_float32_int64_bytecode(value f)
{
return caml_copy_int64(caml_sse_cast_float32_int64(Float32_val(f)));
}

#define ROUND_NEG_INF 0x9
#define ROUND_POS_INF 0xA
#define ROUND_ZERO 0xB
#define ROUND_CURRENT 0xC

float caml_sse41_float32_round(int mode, float f) {
switch(mode) {
case ROUND_NEG_INF: return floorf(f);
case ROUND_POS_INF: return ceilf(f);
case ROUND_ZERO: return truncf(f);
case ROUND_CURRENT: return rintf(f);
default: caml_fatal_error("Unknown rounding mode.");
}
}

CAMLprim value caml_sse41_float32_round_bytecode(value mode, value f) {
return caml_copy_float32(caml_sse41_float32_round(Int_val(mode), Float32_val(f)));
}

enum { FP_normal, FP_subnormal, FP_zero, FP_infinite, FP_nan };

value caml_classify_float32(float vf)
Expand Down
45 changes: 45 additions & 0 deletions runtime4/float32.c
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,51 @@ CAMLprim value caml_ldexp_float32_bytecode(value f, value i)
return caml_copy_float32(caml_ldexp_float32(Float32_val(f), Int_val(i)));
}

float caml_sse_float32_min(float x, float y) {
return x < y ? x : y;
}

CAMLprim value caml_sse_float32_min_bytecode(value x, value y) {
return Float32_val(x) < Float32_val(y) ? x : y;
}

float caml_sse_float32_max(float x, float y) {
return x > y ? x : y;
}

CAMLprim value caml_sse_float32_max_bytecode(value x, value y) {
return Float32_val(x) > Float32_val(y) ? x : y;
}

int64_t caml_sse_cast_float32_int64(float f)
{
return llrintf(f);
}

CAMLprim value caml_sse_cast_float32_int64_bytecode(value f)
{
return caml_copy_int64(caml_sse_cast_float32_int64(Float32_val(f)));
}

#define ROUND_NEG_INF 0x9
#define ROUND_POS_INF 0xA
#define ROUND_ZERO 0xB
#define ROUND_CURRENT 0xC

float caml_sse41_float32_round(int mode, float f) {
switch(mode) {
case ROUND_NEG_INF: return floorf(f);
case ROUND_POS_INF: return ceilf(f);
case ROUND_ZERO: return truncf(f);
case ROUND_CURRENT: return rintf(f);
default: caml_fatal_error("Unknown rounding mode.");
}
}

CAMLprim value caml_sse41_float32_round_bytecode(value mode, value f) {
return caml_copy_float32(caml_sse41_float32_round(Int_val(mode), Float32_val(f)));
}

enum { FP_normal, FP_subnormal, FP_zero, FP_infinite, FP_nan };

value caml_classify_float32(float vf)
Expand Down

0 comments on commit e48d83c

Please sign in to comment.