Refactor peephole pass (ocaml-flambda#3797)

gretay-js · gretay-js · commit 2c028b3cb899 · 2025-04-21T12:54:56.000+01:00
diff --git a/backend/cfg_selectgen.ml b/backend/cfg_selectgen.ml
@@ -29,6 +29,9 @@ module SU = Select_utils
 module V = Backend_var
 module VP = Backend_var.With_provenance
 
+(* CR-soon gyorsh: This functor must not have state, because it is instantiated
+   twice with the same [Target] (see [Asmgen] and [Peephole_utils] to avoid
+   dependency cycles. *)
 module Make (Target : Cfg_selectgen_target_intf.S) = struct
   (* A syntactic criterion used in addition to judgements about (co)effects as
      to whether the evaluation of a given expression may be deferred by
diff --git a/backend/cfg_selectgen.mli b/backend/cfg_selectgen.mli
@@ -32,4 +32,6 @@ module Make (Target : Cfg_selectgen_target_intf.S) : sig
     future_funcnames:Misc.Stdlib.String.Set.t ->
     Cmm.fundecl ->
     Cfg_with_layout.t
+
+  val is_immediate : Operation.integer_operation -> int -> bool
 end
diff --git a/backend/peephole/peephole_rules.ml b/backend/peephole/peephole_rules.ml
@@ -72,112 +72,68 @@ let remove_useless_mov (cell : Cfg.basic Cfg.instruction DLL.cell) =
 
 (** Logical condition for simplifying the following case:
   {|
-    <op 1> const1, r
-    <op 2> const2, r
+    <op1> const1, r
+    <op2> const2, r
   |}
 
   to:
   {|
-    <op 1> (const1 <op 2> const2), r
+    <op1> (const1 <op2> const2), r
   |}
 
-   Where <op 1> and <op 2> can be any two binary operators that are associative and commutative
-   and const1 and const2 are immediate values. *)
+    where
+    const1 and const2 are immediate values, and
+    <op1> and <op2> are associative binary operators such
+    that either <op1> is the same as <op2>, or <op1> is the inverse of <op2>,
+    or there exists const3 such that <op1 const1> can be expressed as <op2 const3>
+    or <op2 const2> can be expressed as <op1 const3> *)
 
 let are_compatible op1 op2 imm1 imm2 :
     (Operation.integer_operation * int) option =
   match
     (op1 : Operation.integer_operation), (op2 : Operation.integer_operation)
   with
-  (* Folding two bitwise operations such as (AND, OR, XOR) should never produce
-     an overflow so we assert this conditon. *)
-  | Iand, Iand ->
-    assert (U.amd64_imm32_within_bounds imm1 imm2 ( land ));
-    Some (Iand, imm1 land imm2)
-  | Ior, Ior ->
-    assert (U.amd64_imm32_within_bounds imm1 imm2 ( lor ));
-    Some (Ior, imm1 lor imm2)
-  | Ixor, Ixor ->
-    assert (U.amd64_imm32_within_bounds imm1 imm2 ( lxor ));
-    Some (Ixor, imm1 lxor imm2)
+  | Iand, Iand -> U.bitwise_immediates op1 imm1 imm2 ( land )
+  | Ior, Ior -> U.bitwise_immediates op1 imm1 imm2 ( lor )
+  | Ixor, Ixor -> U.bitwise_immediates op1 imm1 imm2 ( lxor )
   (* For the following three cases we have the issue that in some situations,
      one or both immediate values could be out of bounds, but the result might
      be within bounds (e.g. imm1 = -4 and imm2 = 65, their sum being 61). This
      should not happen at all since the immediate values should always be within
      the bounds [0, Sys.int_size]. *)
-  | Ilsl, Ilsl ->
-    if Misc.no_overflow_add imm1 imm2 && imm1 + imm2 <= Sys.int_size
-    then (
-      U.bitwise_shift_assert imm1 imm2;
-      Some (Ilsl, imm1 + imm2))
-    else None
-  | Ilsr, Ilsr ->
-    if Misc.no_overflow_add imm1 imm2 && imm1 + imm2 <= Sys.int_size
-    then (
-      U.bitwise_shift_assert imm1 imm2;
-      Some (Ilsr, imm1 + imm2))
-    else None
-  | Iasr, Iasr ->
-    if Misc.no_overflow_add imm1 imm2 && imm1 + imm2 <= Sys.int_size
-    then (
-      U.bitwise_shift_assert imm1 imm2;
-      Some (Iasr, imm1 + imm2))
-    else None
-  (* for the amd64 instruction set the `ADD` `SUB` `MUL` opperations take at
-     most an imm32 as the second argument, so we need to check for overflows on
-     32-bit signed ints. *)
-  (* CR-someday gtulba-lecu: This condition is architecture specific and should
-     either live in amd64 specific code or this module should contain
-     information about the architecture target. *)
-  | Iadd, Iadd ->
-    if Misc.no_overflow_add imm1 imm2
-       && U.amd64_imm32_within_bounds imm1 imm2 ( + )
-    then Some (Iadd, imm1 + imm2)
-    else None
+  | Ilsl, Ilsl | Ilsr, Ilsr | Iasr, Iasr | Iadd, Iadd ->
+    U.add_immediates op1 imm1 imm2
   | Iadd, Isub ->
+    (* The following transformation changes the order of operations on [r] and
+       therefore might change the overflow behavior: if [r+c1] overflows, but
+       r-[c2-c1] does not overflow. This is fine, other compiler transformations
+       may also do it. The code below only ensures that immediates that the
+       compiler emits do not overflow. *)
     if imm1 >= imm2
-    then
-      if Misc.no_overflow_sub imm1 imm2
-         && U.amd64_imm32_within_bounds imm1 imm2 ( - )
-      then Some (Iadd, imm1 - imm2)
-      else None
-    else if Misc.no_overflow_sub imm2 imm1
-            && U.amd64_imm32_within_bounds imm2 imm1 ( - )
-    then Some (Isub, imm2 - imm1)
-    else None
-  | Isub, Isub ->
-    if Misc.no_overflow_add imm1 imm2
-       && U.amd64_imm32_within_bounds imm1 imm2 ( + )
-    then Some (Isub, imm1 + imm2)
-    else None
+    then U.sub_immediates Iadd imm1 imm2
+    else U.sub_immediates Isub imm2 imm1
+  | Isub, Isub (* r - (imm1 + imm2 *) -> U.add_immediates Isub imm1 imm2
   | Isub, Iadd ->
     if imm1 >= imm2
-    then
-      if Misc.no_overflow_sub imm1 imm2
-         && U.amd64_imm32_within_bounds imm1 imm2 ( - )
-      then Some (Isub, imm1 - imm2)
-      else None
-    else if Misc.no_overflow_sub imm2 imm1
-            && U.amd64_imm32_within_bounds imm2 imm1 ( - )
-    then Some (Iadd, imm2 - imm1)
-    else None
+    then U.sub_immediates Isub imm1 imm2
+    else U.sub_immediates Iadd imm2 imm1
   | Ilsl, Imul ->
-    if imm1 >= 0 && imm1 < 31
-       && Misc.no_overflow_mul (1 lsl imm1) imm2
-       && U.amd64_imm32_within_bounds (1 lsl imm1) imm2 ( * )
-    then Some (Imul, (1 lsl imm1) * imm2)
+    (* [imm1] is guaranteed to be within bounds for [Ilsl], but [1 lsl imm1] may
+       not be within bounds for [Imul]. *)
+    U.assert_within_range Ilsl imm1;
+    let imm1 = 1 lsl imm1 in
+    if U.is_immediate_for_intop Imul imm1
+    then U.mul_immediates Imul imm1 imm2
     else None
   | Imul, Ilsl ->
-    if imm2 >= 0 && imm2 < 31
-       && Misc.no_overflow_mul imm1 (1 lsl imm2)
-       && U.amd64_imm32_within_bounds imm1 (1 lsl imm2) ( * )
-    then Some (Imul, imm1 * (1 lsl imm2))
-    else None
-  | Imul, Imul ->
-    if Misc.no_overflow_mul imm1 imm2
-       && U.amd64_imm32_within_bounds imm1 imm2 ( * )
-    then Some (Imul, imm1 * imm2)
+    (* [imm2] is guaranteed to be within bounds for [Ilsl], but [1 lsl imm2] may
+       not be within bounds for [Imul]. *)
+    U.assert_within_range Ilsl imm2;
+    let imm2 = 1 lsl imm2 in
+    if U.is_immediate_for_intop Imul imm2
+    then U.mul_immediates Imul imm1 imm2
     else None
+  | Imul, Imul -> U.mul_immediates op1 imm1 imm2
   (* CR-soon gtulba-lecu: check this last case | Imod, Imod -> if imm1 mod imm2
      = 0 then Some (Imod, imm2) else None
 
@@ -199,11 +155,8 @@ let fold_intop_imm (cell : Cfg.basic Cfg.instruction DLL.cell) =
     let snd_val = DLL.value snd in
     (* The following check does the following: 1. Ensures that both instructions
        use the same source register; 2. Ensures that both instructions output
-       the result to the source register, this is redundant for amd64 since
-       there are no instructions that invalidate this condition. *)
-    (* CR-someday gtulba-lecu: This condition is architecture specific and
-       should either live in amd64 specific code or this module should contain
-       information about the architecture target. *)
+       the result to the source register. This is currently redundant for amd64
+       since there are no instructions that invalidate this condition. *)
     if Array.length fst_val.arg = 1
        && Array.length snd_val.arg = 1
        && Array.length fst_val.res = 1
@@ -235,9 +188,10 @@ let fold_intop_imm (cell : Cfg.basic Cfg.instruction DLL.cell) =
   | _ -> None
 
 let apply cell =
-  match remove_overwritten_mov cell with
-  | None -> (
-    match remove_useless_mov cell with
-    | None -> ( match fold_intop_imm cell with None -> None | res -> res)
-    | res -> res)
-  | res -> res
+  let[@inline always] if_none_do f o =
+    match o with Some _ -> o | None -> f cell
+  in
+  None
+  |> if_none_do remove_overwritten_mov
+  |> if_none_do remove_useless_mov
+  |> if_none_do fold_intop_imm
diff --git a/backend/peephole/peephole_utils.ml b/backend/peephole/peephole_utils.ml
@@ -1,15 +1,10 @@
 module DLL = Flambda_backend_utils.Doubly_linked_list
 open! Int_replace_polymorphic_compare
 
-(* CR-someday gtulba-lecu: make sure that this comparison is correct and
-   sufficent. Take into consideration using Proc.regs_are_volatile in the
-   future. As we only support amd64 and Proc.regs_are_volatile is always false
-   in amd64 this is not necessary for now. See backend/cfg/cfg_deadcode.ml for
-   more details.*)
 let are_equal_regs (reg1 : Reg.t) (reg2 : Reg.t) =
   Reg.same_loc reg1 reg2 && Cmm.equal_machtype_component reg1.typ reg2.typ
 
-(* CR-soon gtulba-lecu: Delete this when imeplementing auto-generated rules. *)
+(* CR-soon gtulba-lecu: Delete this when implementing auto-generated rules. *)
 let go_back_const = 1
 
 let rec prev_at_most steps cell =
@@ -32,19 +27,48 @@ let get_cells cell size =
   assert (size > 0);
   get_cells' (DLL.next cell) (size - 1) [cell]
 
-let is_bitwise_op (op : Operation.integer_operation) =
-  match op with Iand | Ior | Ixor | Ilsl | Ilsr | Iasr -> true | _ -> false
-  [@@ocaml.warning "-4"]
-
-let bitwise_shift_assert (imm1 : int) (imm2 : int) =
-  if imm1 < 0 || imm1 > Sys.int_size || imm2 < 0 || imm2 > Sys.int_size
-  then assert false
-  [@@inline]
-
-(* CR-someday gtulba-lecu: This is architecture specific and should be moved in
-   a different part of the compiler that is specific to the amd64 architecture.
-   This is fine for now as we only support amd64. *)
-let amd64_imm32_within_bounds imm1 imm2 op =
-  let imm = op imm1 imm2 in
-  Int32.to_int Int32.min_int <= imm && imm <= Int32.to_int Int32.max_int
-  [@@inline]
+(* CR-soon gyorsh: This functor is also instantiated in
+   [Asmgen.compile_fundecl]. Find a shared place to put it, instead of
+   instantiating twice. May require restructuring the backend to avoid
+   dependency cycles. *)
+module Cfg_selection = Cfg_selectgen.Make (Cfg_selection)
+
+let is_immediate_for_intop op n = Cfg_selection.is_immediate op n
+
+let assert_within_range integer_operation imm =
+  if not (is_immediate_for_intop integer_operation imm)
+  then
+    Misc.fatal_errorf "Peephole: unexpected immediate %d for operation %s" imm
+      (Operation.string_of_integer_operation integer_operation)
+
+let[@inline] op_immediates integer_operation imm1 imm2 no_overflow op =
+  (* [no_overflow imm1 imm2] operation may assume that each of the immediates on
+     its own is within bounds. *)
+  assert_within_range integer_operation imm1;
+  assert_within_range integer_operation imm2;
+  let res = op imm1 imm2 in
+  if no_overflow imm1 imm2 && is_immediate_for_intop integer_operation res
+  then Some (integer_operation, res)
+  else None
+
+let add_immediates integer_operation imm1 imm2 =
+  op_immediates integer_operation imm1 imm2 Misc.no_overflow_add ( + )
+
+let sub_immediates integer_operation imm1 imm2 =
+  op_immediates integer_operation imm1 imm2 Misc.no_overflow_sub ( - )
+
+let mul_immediates integer_operation imm1 imm2 =
+  op_immediates integer_operation imm1 imm2 Misc.no_overflow_mul ( * )
+
+let never_overflow _ _ = true
+
+let bitwise_immediates integer_operation imm1 imm2 op =
+  (* Bitwise operations on immediates within range cannot produce immediates
+     outside of range. Bitwise operations do not need overflow check. *)
+  match op_immediates integer_operation imm1 imm2 never_overflow op with
+  | None ->
+    Misc.fatal_errorf
+      "Peephole: cannot rewrite immediates for %s: combining %d %d = %d"
+      (Operation.string_of_integer_operation integer_operation)
+      imm1 imm2 (op imm1 imm2)
+  | Some _ as res -> res
diff --git a/backend/peephole/peephole_utils.mli b/backend/peephole/peephole_utils.mli
@@ -13,8 +13,33 @@ val get_cells :
   int ->
   Cfg.basic Cfg.instruction DLL.cell list
 
-val is_bitwise_op : Operation.integer_operation -> bool
+(** The following functions check for overflow and ranges of immediates w.r.t. the
+    operation and optionally rewrite the operation.  *)
+val add_immediates :
+  Operation.integer_operation ->
+  int ->
+  int ->
+  (Operation.integer_operation * int) option
+
+val sub_immediates :
+  Operation.integer_operation ->
+  int ->
+  int ->
+  (Operation.integer_operation * int) option
+
+val mul_immediates :
+  Operation.integer_operation ->
+  int ->
+  int ->
+  (Operation.integer_operation * int) option
+
+val bitwise_immediates :
+  Operation.integer_operation ->
+  int ->
+  int ->
+  (int -> int -> int) ->
+  (Operation.integer_operation * int) option
 
-val bitwise_shift_assert : int -> int -> unit
+val assert_within_range : Operation.integer_operation -> int -> unit
 
-val amd64_imm32_within_bounds : int -> int -> (int -> int -> int) -> bool
+val is_immediate_for_intop : Operation.integer_operation -> int -> bool