|
| 1 | +(* CR-someday: see whether the `-4` can be dropped. *) |
| 2 | +[@@@ocaml.warning "+a-29-40-41-42-4"] |
| 3 | + |
| 4 | +module DLL = Flambda_backend_utils.Doubly_linked_list |
| 5 | +module U = Peephole_utils |
| 6 | + |
| 7 | +(** Logical condition for simplifying the following case: |
| 8 | + {| |
| 9 | + mov x, y |
| 10 | + mov y, x |
| 11 | + |} |
| 12 | +
|
| 13 | + In this case, the second instruction should be removed *) |
| 14 | + |
| 15 | +let remove_useless_mov (cell : Cfg.basic Cfg.instruction DLL.cell) = |
| 16 | + match U.get_cells cell 2 with |
| 17 | + | [fst; snd] -> ( |
| 18 | + let fst_val = DLL.value fst in |
| 19 | + let snd_val = DLL.value snd in |
| 20 | + match fst_val.desc with |
| 21 | + | Op (Move | Spill | Reload) -> ( |
| 22 | + let fst_src, fst_dst = fst_val.arg.(0), fst_val.res.(0) in |
| 23 | + match snd_val.desc with |
| 24 | + | Op (Move | Spill | Reload) -> |
| 25 | + let snd_src, snd_dst = snd_val.arg.(0), snd_val.res.(0) in |
| 26 | + if U.are_equal_regs fst_src snd_dst && U.are_equal_regs fst_dst snd_src |
| 27 | + then ( |
| 28 | + DLL.delete_curr snd; |
| 29 | + Some (U.prev_at_most U.go_back_const fst)) |
| 30 | + else None |
| 31 | + | _ -> None) |
| 32 | + | _ -> None) |
| 33 | + | _ -> None |
| 34 | + |
| 35 | +(** Logical condition for simplifying the following case: |
| 36 | + {| |
| 37 | + <op 1> const1, r |
| 38 | + <op 2> const2, r |
| 39 | + |} |
| 40 | +
|
| 41 | + to: |
| 42 | + {| |
| 43 | + <op 1> (const1 <op 2> const2), r |
| 44 | + |} |
| 45 | +
|
| 46 | + Where <op 1> and <op 2> can be any two binary operators that are associative and commutative |
| 47 | + and const1 and const2 are immediate values. *) |
| 48 | + |
| 49 | +let are_compatible op1 op2 imm1 imm2 = |
| 50 | + match (op1 : Mach.integer_operation), (op2 : Mach.integer_operation) with |
| 51 | + (* Folding two bitwise operations such as (AND, OR, XOR) should never produce |
| 52 | + an overflow so we assert this conditon. *) |
| 53 | + | Mach.Iand, Mach.Iand -> |
| 54 | + assert (U.amd64_imm32_within_bounds imm1 imm2 ( land )); |
| 55 | + Some (Mach.Iand, imm1 land imm2) |
| 56 | + | Ior, Ior -> |
| 57 | + assert (U.amd64_imm32_within_bounds imm1 imm2 ( lor )); |
| 58 | + Some (Mach.Ior, imm1 lor imm2) |
| 59 | + | Ixor, Ixor -> |
| 60 | + assert (U.amd64_imm32_within_bounds imm1 imm2 ( lxor )); |
| 61 | + Some (Mach.Ixor, imm1 lxor imm2) |
| 62 | + (* For the following three cases we have the issue that in some situations, |
| 63 | + one or both immediate values could be out of bounds, but the result might |
| 64 | + be within bounds (e.g. imm1 = -4 and imm2 = 65, their sum being 61). This |
| 65 | + should not happen at all since the immediate values should always be within |
| 66 | + the bounds [0, Sys.int_size]. *) |
| 67 | + | Ilsl, Ilsl -> |
| 68 | + if Misc.no_overflow_add imm1 imm2 && imm1 + imm2 <= Sys.int_size |
| 69 | + then ( |
| 70 | + U.bitwise_shift_assert imm1 imm2; |
| 71 | + Some (Mach.Ilsl, imm1 + imm2)) |
| 72 | + else None |
| 73 | + | Ilsr, Ilsr -> |
| 74 | + if Misc.no_overflow_add imm1 imm2 && imm1 + imm2 <= Sys.int_size |
| 75 | + then ( |
| 76 | + U.bitwise_shift_assert imm1 imm2; |
| 77 | + Some (Mach.Ilsr, imm1 + imm2)) |
| 78 | + else None |
| 79 | + | Iasr, Iasr -> |
| 80 | + if Misc.no_overflow_add imm1 imm2 && imm1 + imm2 <= Sys.int_size |
| 81 | + then ( |
| 82 | + U.bitwise_shift_assert imm1 imm2; |
| 83 | + Some (Mach.Iasr, imm1 + imm2)) |
| 84 | + else None |
| 85 | + (* for the amd64 instruction set the `ADD` `SUB` `MUL` opperations take at |
| 86 | + most an imm32 as the second argument, so we need to check for overflows on |
| 87 | + 32-bit signed ints. *) |
| 88 | + (* CR-someday gtulba-lecu: This condition is architecture specific and should |
| 89 | + either live in amd64 specific code or this module should contain |
| 90 | + information about the architecture target. *) |
| 91 | + | Iadd, Iadd -> |
| 92 | + if Misc.no_overflow_add imm1 imm2 |
| 93 | + && U.amd64_imm32_within_bounds imm1 imm2 ( + ) |
| 94 | + then Some (Mach.Iadd, imm1 + imm2) |
| 95 | + else None |
| 96 | + | Iadd, Isub -> |
| 97 | + if imm1 >= imm2 |
| 98 | + then |
| 99 | + if Misc.no_overflow_sub imm1 imm2 |
| 100 | + && U.amd64_imm32_within_bounds imm1 imm2 ( - ) |
| 101 | + then Some (Mach.Iadd, imm1 - imm2) |
| 102 | + else None |
| 103 | + else if Misc.no_overflow_sub imm2 imm1 |
| 104 | + && U.amd64_imm32_within_bounds imm2 imm1 ( - ) |
| 105 | + then Some (Mach.Isub, imm2 - imm1) |
| 106 | + else None |
| 107 | + | Isub, Isub -> |
| 108 | + if Misc.no_overflow_add imm1 imm2 |
| 109 | + && U.amd64_imm32_within_bounds imm1 imm2 ( + ) |
| 110 | + then Some (Mach.Isub, imm1 + imm2) |
| 111 | + else None |
| 112 | + | Isub, Iadd -> |
| 113 | + if imm1 >= imm2 |
| 114 | + then |
| 115 | + if Misc.no_overflow_sub imm1 imm2 |
| 116 | + && U.amd64_imm32_within_bounds imm1 imm2 ( - ) |
| 117 | + then Some (Mach.Isub, imm1 - imm2) |
| 118 | + else None |
| 119 | + else if Misc.no_overflow_sub imm2 imm1 |
| 120 | + && U.amd64_imm32_within_bounds imm2 imm1 ( - ) |
| 121 | + then Some (Mach.Iadd, imm2 - imm1) |
| 122 | + else None |
| 123 | + | Ilsl, Imul -> |
| 124 | + if imm1 >= 0 && imm1 < 31 |
| 125 | + && Misc.no_overflow_mul (1 lsl imm1) imm2 |
| 126 | + && U.amd64_imm32_within_bounds (1 lsl imm1) imm2 ( * ) |
| 127 | + then Some (Mach.Imul, (1 lsl imm1) * imm2) |
| 128 | + else None |
| 129 | + | Imul, Ilsl -> |
| 130 | + if imm2 >= 0 && imm2 < 31 |
| 131 | + && Misc.no_overflow_mul imm1 (1 lsl imm2) |
| 132 | + && U.amd64_imm32_within_bounds imm1 (1 lsl imm2) ( * ) |
| 133 | + then Some (Mach.Imul, imm1 * (1 lsl imm2)) |
| 134 | + else None |
| 135 | + | Imul, Imul -> |
| 136 | + if Misc.no_overflow_mul imm1 imm2 |
| 137 | + && U.amd64_imm32_within_bounds imm1 imm2 ( * ) |
| 138 | + then Some (Mach.Imul, imm1 * imm2) |
| 139 | + else None |
| 140 | + (* CR-soon gtulba-lecu: check this last case | Imod, Imod -> if imm1 mod imm2 |
| 141 | + = 0 then Some (Mach.Imod, imm2) else None |
| 142 | +
|
| 143 | + The integer modulo imm2 group is a subgroup of the integer modulo imm1 iff |
| 144 | + imm2 divides imm1 |
| 145 | +
|
| 146 | + This is because the operations in the groups are addition modulo n and m |
| 147 | + respectively. If n divides m, then every result of the operation (addition) |
| 148 | + in the n group will also be a legal result in the m group, which is |
| 149 | + essentially the definition of a subgroup. If n does not divide m, there |
| 150 | + will be some results in the n group that are not acceptable in the m |
| 151 | + group. *) |
| 152 | + | _ -> None |
| 153 | + |
| 154 | +let fold_intop_imm (cell : Cfg.basic Cfg.instruction DLL.cell) = |
| 155 | + match U.get_cells cell 2 with |
| 156 | + | [fst; snd] -> |
| 157 | + let fst_val = DLL.value fst in |
| 158 | + let snd_val = DLL.value snd in |
| 159 | + (* The following check does the following: 1. Ensures that both instructions |
| 160 | + use the same source register; 2. Ensures that both instructions output |
| 161 | + the result to the source register, this is redundant for amd64 since |
| 162 | + there are no instructions that invalidate this condition. *) |
| 163 | + (* CR-someday gtulba-lecu: This condition is architecture specific and |
| 164 | + should either live in amd64 specific code or this module should contain |
| 165 | + information about the architecture target. *) |
| 166 | + if Array.length fst_val.arg = 1 |
| 167 | + && Array.length snd_val.arg = 1 |
| 168 | + && Array.length fst_val.res = 1 |
| 169 | + && Array.length snd_val.res = 1 |
| 170 | + && U.are_equal_regs |
| 171 | + (Array.unsafe_get fst_val.arg 0) |
| 172 | + (Array.unsafe_get snd_val.arg 0) |
| 173 | + && U.are_equal_regs |
| 174 | + (Array.unsafe_get fst_val.arg 0) |
| 175 | + (Array.unsafe_get fst_val.res 0) |
| 176 | + && U.are_equal_regs |
| 177 | + (Array.unsafe_get snd_val.arg 0) |
| 178 | + (Array.unsafe_get snd_val.res 0) |
| 179 | + then |
| 180 | + match fst_val.desc, snd_val.desc with |
| 181 | + | Op (Intop_imm (op1, imm1)), Op (Intop_imm (op2, imm2)) -> ( |
| 182 | + match are_compatible op1 op2 imm1 imm2 with |
| 183 | + | Some (op, imm) -> |
| 184 | + let new_cell = |
| 185 | + DLL.insert_and_return_before fst |
| 186 | + { fst_val with desc = Cfg.Op (Intop_imm (op, imm)) } |
| 187 | + in |
| 188 | + DLL.delete_curr fst; |
| 189 | + DLL.delete_curr snd; |
| 190 | + Some ((U.prev_at_most U.go_back_const) new_cell) |
| 191 | + | _ -> None) |
| 192 | + | _ -> None |
| 193 | + else None |
| 194 | + | _ -> None |
| 195 | + |
| 196 | +let apply cell = |
| 197 | + match remove_useless_mov cell with |
| 198 | + | None -> ( match fold_intop_imm cell with None -> None | res -> res) |
| 199 | + | res -> res |
0 commit comments