Skip to content

Commit 859949c

Browse files
authored
Vectorize [Specific.Istore_int] (#3450)
Used for array initialization (amd64)
1 parent 50f73cb commit 859949c

File tree

4 files changed

+71
-8
lines changed

4 files changed

+71
-8
lines changed

backend/amd64/simd_selection.ml

+48-3
Original file line numberDiff line numberDiff line change
@@ -855,9 +855,54 @@ let vectorize_operation (width_type : Vectorize_utils.Width_in_bits.t)
855855
| W32 -> None (* See previous comment *)
856856
| W16 -> None
857857
| W8 -> None)
858-
| Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Ibswap _ | Irdtsc
859-
| Irdpmc | Ilfence | Isfence | Imfence | Ipause | Isimd _ | Iprefetch _
860-
| Icldemote _ ->
858+
| Ifloatarithmem _ -> None
859+
| Istore_int (_n, addressing_mode, is_assignment) -> (
860+
if not (Vectorize_utils.Width_in_bits.equal width_type W64)
861+
then None
862+
else
863+
let extract_store_int_imm (op : Operation.t) =
864+
match op with
865+
| Specific (Istore_int (n, _addr, _is_assign)) -> Int64.of_nativeint n
866+
| Specific
867+
( Ifloatarithmem _ | Ioffset_loc _ | Iprefetch _ | Icldemote _
868+
| Irdtsc | Irdpmc | Ilfence | Isfence | Imfence | Ipause | Isimd _
869+
| Ilea _ | Ibswap _ | Isextend32 | Izextend32 )
870+
| Intop_imm _ | Move | Load _ | Store _ | Intop _ | Alloc _
871+
| Reinterpret_cast _ | Static_cast _ | Spill | Reload | Const_int _
872+
| Const_float32 _ | Const_float _ | Const_symbol _ | Const_vec128 _
873+
| Stackoffset _ | Intop_atomic _ | Floatop _ | Csel _
874+
| Probe_is_enabled _ | Opaque | Begin_region | End_region
875+
| Name_for_debugger _ | Dls_get | Poll ->
876+
assert false
877+
in
878+
let consts = List.map extract_store_int_imm cfg_ops in
879+
match create_const_vec consts with
880+
| None -> None
881+
| Some [const_instruction] ->
882+
let num_args_addressing = Arch.num_args_addressing addressing_mode in
883+
assert (arg_count = num_args_addressing);
884+
assert (res_count = 0);
885+
assert (Array.length const_instruction.results = 1);
886+
let new_reg = Vectorize_utils.Vectorized_instruction.New 0 in
887+
const_instruction.results.(0) <- new_reg;
888+
let address_args =
889+
Array.init num_args_addressing (fun i ->
890+
Vectorize_utils.Vectorized_instruction.Original i)
891+
in
892+
let store_operation =
893+
Operation.Store
894+
(Onetwentyeight_unaligned, addressing_mode, is_assignment)
895+
in
896+
let store_instruction : Vectorize_utils.Vectorized_instruction.t =
897+
{ operation = store_operation;
898+
arguments = Array.append [| new_reg |] address_args;
899+
results = [||]
900+
}
901+
in
902+
Some [const_instruction; store_instruction]
903+
| Some _ -> None)
904+
| Ioffset_loc _ | Ibswap _ | Irdtsc | Irdpmc | Ilfence | Isfence | Imfence
905+
| Ipause | Isimd _ | Iprefetch _ | Icldemote _ ->
861906
None)
862907
| Alloc _ | Reinterpret_cast _ | Static_cast _ | Spill | Reload
863908
| Const_float32 _ | Const_float _ | Const_symbol _ | Const_vec128 _

backend/amd64/vectorize_specific.ml

+10
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,13 @@ let memory_access : Arch.specific_operation -> Memory_access.t option =
5353
the moment. *)
5454
if Simd.is_pure op then None else create Memory_access.Arbitrary
5555
| Ilea _ | Ibswap _ | Isextend32 | Izextend32 -> None
56+
57+
let is_seed_store :
58+
Arch.specific_operation -> Vectorize_utils.Width_in_bits.t option =
59+
fun op ->
60+
match op with
61+
| Istore_int _ -> Some W64
62+
| Ifloatarithmem _ | Ioffset_loc _ | Iprefetch _ | Icldemote _ | Irdtsc
63+
| Irdpmc | Ilfence | Isfence | Imfence | Ipause | Isimd _ | Ilea _ | Ibswap _
64+
| Isextend32 | Izextend32 ->
65+
None

backend/arm64/vectorize_specific.ml

+7
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,10 @@ let memory_access : Arch.specific_operation -> Memory_access.t option =
2121
(* Conservative. we don't have any specific operations with memory
2222
operations at the moment. *)
2323
if Arch.operation_is_pure op then None else create Memory_access.Arbitrary
24+
25+
let is_seed_store (op : Arch.specific_operation) =
26+
match op with
27+
| Ifar_poll _ | Ifar_alloc _ | Ishiftarith _ | Imuladd | Imulsub | Inegmulf
28+
| Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf | Isqrtf | Ibswap _
29+
| Imove32 | Isignext _ ->
30+
None

backend/cfg/vectorize.ml

+6-5
Original file line numberDiff line numberDiff line change
@@ -2251,13 +2251,15 @@ end = struct
22512251
| None -> None
22522252
| Some op -> (
22532253
match op with
2254-
| Store (chunk, _, _) -> Some chunk
2254+
| Store (chunk, _, _) ->
2255+
Some (Vectorize_utils.Width_in_bits.of_memory_chunk chunk)
2256+
| Specific s -> Vectorize_specific.is_seed_store s
22552257
| Alloc _ | Load _ | Move | Reinterpret_cast _ | Static_cast _ | Spill
22562258
| Reload | Const_int _ | Const_float32 _ | Const_float _
22572259
| Const_symbol _ | Const_vec128 _ | Stackoffset _ | Intop _
22582260
| Intop_imm _ | Intop_atomic _ | Floatop _ | Csel _ | Probe_is_enabled _
2259-
| Opaque | Begin_region | End_region | Specific _ | Name_for_debugger _
2260-
| Dls_get | Poll ->
2261+
| Opaque | Begin_region | End_region | Name_for_debugger _ | Dls_get
2262+
| Poll ->
22612263
None)
22622264

22632265
let from_block (block : Block.t) deps : t list =
@@ -2280,8 +2282,7 @@ end = struct
22802282
DLL.fold_right body ~init:[] ~f:(fun i acc ->
22812283
let i = Instruction.basic i in
22822284
match is_store i with
2283-
| Some chunk ->
2284-
(Vectorize_utils.Width_in_bits.of_memory_chunk chunk, i) :: acc
2285+
| Some width -> (width, i) :: acc
22852286
| None -> acc)
22862287
in
22872288
Format.(

0 commit comments

Comments
 (0)