|
2 | 2 | ; RUN: opt < %s -passes='sroa' -S | FileCheck %s
|
3 | 3 | target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
|
4 | 4 |
|
5 |
| -%ptr_pair = type { ptr, ptr } |
6 |
| - |
7 |
| -%struct.a = type { <32 x i8> } |
8 |
| -define void @vector_promote_memset_a(ptr %arg0) { |
9 |
| -; CHECK-LABEL: @vector_promote_memset_a( |
10 |
| -; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0:%.*]], align 8 |
11 |
| -; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1 |
12 |
| -; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <32 x i8> zeroinitializer, i8 [[TMP3]], i32 0 |
13 |
| -; CHECK-NEXT: ret void |
| 5 | +%struct_a = type { [32 x i8] } |
| 6 | +define i8 @vector_promote_a(ptr %arg0) { |
| 7 | +; CHECK-LABEL: @vector_promote_a( |
| 8 | +; CHECK-NEXT: [[V0:%.*]] = load i8, ptr [[ARG0:%.*]], align 1 |
| 9 | +; CHECK-NEXT: [[A0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <32 x i8> zeroinitializer, i8 [[V0]], i32 0 |
| 10 | +; CHECK-NEXT: [[A0_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[A0_SROA_0_0_VEC_INSERT]], i32 4 |
| 11 | +; CHECK-NEXT: ret i8 [[A0_SROA_0_4_VEC_EXTRACT]] |
14 | 12 | ;
|
15 |
| - %a0 = alloca %struct.a, align 32 |
16 |
| - %a1 = alloca %ptr_pair, align 8 |
| 13 | + %a0 = alloca %struct_a, align 32 |
17 | 14 | call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 32, i1 false)
|
18 |
| - |
19 |
| - store ptr %a0, ptr %a1, align 8 |
20 |
| - |
21 |
| - %p1 = getelementptr inbounds %ptr_pair, ptr %a1, i64 0, i32 1 |
22 |
| - %v0 = load ptr, ptr %arg0, align 8 |
23 |
| - store ptr %v0, ptr %p1, align 8 |
24 |
| - |
25 |
| - %p2 = getelementptr inbounds i8, ptr %a1, i32 8 |
26 |
| - %v1 = load ptr, ptr %p2, align 8 |
27 |
| - |
28 |
| - %v2 = load i8, ptr %v1, align 1 |
29 |
| - store i8 %v2, ptr %a0, align 32 |
30 |
| - |
31 |
| - ret void |
| 15 | + %v0 = load i8, ptr %arg0, align 1 |
| 16 | + store i8 %v0, ptr %a0, align 1 |
| 17 | + %p0 = getelementptr inbounds i8, ptr %a0, i64 4 |
| 18 | + %v1 = load i8, ptr %p0, align 1 |
| 19 | + ret i8 %v1 |
32 | 20 | }
|
33 | 21 |
|
34 |
| -%struct.b = type { <16 x i16> } |
35 |
| -define void @vector_promote_memset_b(ptr %arg0) { |
36 |
| -; CHECK-LABEL: @vector_promote_memset_b( |
37 |
| -; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0:%.*]], align 8 |
38 |
| -; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 1 |
39 |
| -; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <16 x i16> zeroinitializer, i16 [[TMP3]], i32 0 |
40 |
| -; CHECK-NEXT: ret void |
| 22 | +%struct_b = type { [16 x i16] } |
| 23 | +define i16 @vector_promote_b(ptr %arg0) { |
| 24 | +; CHECK-LABEL: @vector_promote_b( |
| 25 | +; CHECK-NEXT: [[V0:%.*]] = load i16, ptr [[ARG0:%.*]], align 1 |
| 26 | +; CHECK-NEXT: [[A0_SROA_0_20_VEC_INSERT:%.*]] = insertelement <16 x i16> zeroinitializer, i16 [[V0]], i32 10 |
| 27 | +; CHECK-NEXT: [[A0_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <16 x i16> [[A0_SROA_0_20_VEC_INSERT]], i32 2 |
| 28 | +; CHECK-NEXT: ret i16 [[A0_SROA_0_4_VEC_EXTRACT]] |
41 | 29 | ;
|
42 |
| - %a0 = alloca %struct.b, align 16 |
43 |
| - %a1 = alloca %ptr_pair, align 8 |
| 30 | + %a0 = alloca %struct_b, align 32 |
44 | 31 | call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 32, i1 false)
|
45 |
| - |
46 |
| - store ptr %a0, ptr %a1, align 8 |
47 |
| - |
48 |
| - %p1 = getelementptr inbounds %ptr_pair, ptr %a1, i64 0, i32 1 |
49 |
| - %v0 = load ptr, ptr %arg0, align 8 |
50 |
| - store ptr %v0, ptr %p1, align 8 |
51 |
| - |
52 |
| - %p2 = getelementptr inbounds i8, ptr %a1, i32 8 |
53 |
| - %v1 = load ptr, ptr %p2, align 8 |
54 |
| - |
55 |
| - %v2 = load i16, ptr %v1, align 1 |
56 |
| - store i16 %v2, ptr %a0, align 16 |
57 |
| - |
58 |
| - ret void |
| 32 | + %v0 = load i16, ptr %arg0, align 1 |
| 33 | + %p0 = getelementptr inbounds i16, ptr %a0, i64 10 |
| 34 | + store i16 %v0, ptr %p0, align 1 |
| 35 | + %p1 = getelementptr inbounds i16, ptr %a0, i64 2 |
| 36 | + %v1 = load i16, ptr %p1, align 1 |
| 37 | + ret i16 %v1 |
59 | 38 | }
|
60 | 39 |
|
61 |
| -%struct.c = type { <4 x i32> } |
62 |
| -define void @vector_promote_memset_c(ptr %arg0) { |
63 |
| -; CHECK-LABEL: @vector_promote_memset_c( |
64 |
| -; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0:%.*]], align 8 |
65 |
| -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 1 |
66 |
| -; CHECK-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[TMP3]], i32 2 |
67 |
| -; CHECK-NEXT: ret void |
| 40 | +%struct_c = type { [4 x i32] } |
| 41 | +define i32 @vector_promote_c(ptr %arg0) { |
| 42 | +; CHECK-LABEL: @vector_promote_c( |
| 43 | +; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[ARG0:%.*]], align 1 |
| 44 | +; CHECK-NEXT: [[A0_SROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[V0]], i32 3 |
| 45 | +; CHECK-NEXT: [[A0_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A0_SROA_0_12_VEC_INSERT]], i32 2 |
| 46 | +; CHECK-NEXT: ret i32 [[A0_SROA_0_8_VEC_EXTRACT]] |
68 | 47 | ;
|
69 |
| - %a0 = alloca %struct.c, align 4 |
70 |
| - %a1 = alloca %ptr_pair, align 8 |
| 48 | + %a0 = alloca %struct_c, align 32 |
71 | 49 | call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 16, i1 false)
|
72 |
| - |
73 |
| - store ptr %a0, ptr %a1, align 8 |
74 |
| - |
75 |
| - %p1 = getelementptr inbounds %ptr_pair, ptr %a1, i64 0, i32 1 |
76 |
| - %v0 = load ptr, ptr %arg0, align 8 |
77 |
| - store ptr %v0, ptr %p1, align 8 |
78 |
| - |
79 |
| - %p2 = getelementptr inbounds i8, ptr %a1, i32 8 |
80 |
| - %v1 = load ptr, ptr %p2, align 8 |
81 |
| - |
82 |
| - %v2 = load i32, ptr %v1, align 1 |
83 |
| - |
84 |
| - %p3 = getelementptr inbounds i32, ptr %a0, i32 2 |
85 |
| - store i32 %v2, ptr %p3, align 4 |
86 |
| - |
87 |
| - ret void |
| 50 | + %v0 = load i32, ptr %arg0, align 1 |
| 51 | + %p0 = getelementptr inbounds i32, ptr %a0, i64 3 |
| 52 | + store i32 %v0, ptr %p0, align 1 |
| 53 | + %p1 = getelementptr inbounds i32, ptr %a0, i64 2 |
| 54 | + %v1 = load i32, ptr %p1, align 1 |
| 55 | + ret i32 %v1 |
88 | 56 | }
|
89 | 57 |
|
90 | 58 | ; We currently prevent promotion if the vector would require padding
|
91 |
| -%struct.d = type { <6 x i32> } |
92 |
| -define void @vector_promote_memset_d(ptr %arg0) { |
93 |
| -; CHECK-LABEL: @vector_promote_memset_d( |
94 |
| -; CHECK-NEXT: [[DOTSROA_2:%.*]] = alloca [3 x i32], align 4 |
95 |
| -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[DOTSROA_2]], i8 0, i64 12, i1 false) |
96 |
| -; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0:%.*]], align 8 |
97 |
| -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 1 |
98 |
| -; CHECK-NEXT: ret void |
| 59 | +%struct_d = type { [6 x i32] } |
| 60 | +define i32 @vector_promote_d(ptr %arg0) { |
| 61 | +; CHECK-LABEL: @vector_promote_d( |
| 62 | +; CHECK-NEXT: [[A0_SROA_3:%.*]] = alloca [3 x i32], align 4 |
| 63 | +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A0_SROA_3]], i8 0, i64 12, i1 false) |
| 64 | +; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[ARG0:%.*]], align 1 |
| 65 | +; CHECK-NEXT: ret i32 0 |
99 | 66 | ;
|
100 |
| - %a0 = alloca %struct.d, align 4 |
101 |
| - %a1 = alloca %ptr_pair, align 8 |
| 67 | + %a0 = alloca %struct_d, align 32 |
102 | 68 | call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 24, i1 false)
|
103 |
| - |
104 |
| - store ptr %a0, ptr %a1, align 8 |
105 |
| - |
106 |
| - %p1 = getelementptr inbounds %ptr_pair, ptr %a1, i64 0, i32 1 |
107 |
| - %v0 = load ptr, ptr %arg0, align 8 |
108 |
| - store ptr %v0, ptr %p1, align 8 |
109 |
| - |
110 |
| - %p2 = getelementptr inbounds i8, ptr %a1, i32 8 |
111 |
| - %v1 = load ptr, ptr %p2, align 8 |
112 |
| - |
113 |
| - %v2 = load i32, ptr %v1, align 1 |
114 |
| - |
115 |
| - %p3 = getelementptr inbounds i32, ptr %a0, i32 2 |
116 |
| - store i32 %v2, ptr %p3, align 4 |
117 |
| - |
118 |
| - ret void |
| 69 | + %v0 = load i32, ptr %arg0, align 1 |
| 70 | + %p0 = getelementptr inbounds i32, ptr %a0, i64 1 |
| 71 | + store i32 %v0, ptr %p0, align 1 |
| 72 | + %p1 = getelementptr inbounds i32, ptr %a0, i64 2 |
| 73 | + %v1 = load i32, ptr %p1, align 1 |
| 74 | + ret i32 %v1 |
119 | 75 | }
|
120 | 76 |
|
121 |
| - |
122 |
| -; We shouldn't promote large memsets. |
123 |
| -%struct.e = type { [65536 x i8] } |
124 |
| -define void @vector_promote_memset_e(ptr %arg0) { |
125 |
| -; CHECK-LABEL: @vector_promote_memset_e( |
126 |
| -; CHECK-NEXT: [[A0_SROA_2:%.*]] = alloca [65524 x i8], align 4 |
127 |
| -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A0_SROA_2]], i8 0, i64 65524, i1 false) |
128 |
| -; CHECK-NEXT: [[V0:%.*]] = load ptr, ptr [[ARG0:%.*]], align 8 |
129 |
| -; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[V0]], align 1 |
130 |
| -; CHECK-NEXT: ret void |
| 77 | +; We shouldn't promote memsets larger than the max value of `unsigned short`. |
| 78 | +; See getMaxNumFixedVectorElements(). |
| 79 | +%struct_e = type { [65536 x i8] } |
| 80 | +define i8 @vector_promote_e(ptr %arg0) { |
| 81 | +; CHECK-LABEL: @vector_promote_e( |
| 82 | +; CHECK-NEXT: [[A0_SROA_3:%.*]] = alloca [65532 x i8], align 4 |
| 83 | +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A0_SROA_3]], i8 0, i64 65532, i1 false) |
| 84 | +; CHECK-NEXT: [[V0:%.*]] = load i8, ptr [[ARG0:%.*]], align 1 |
| 85 | +; CHECK-NEXT: ret i8 0 |
131 | 86 | ;
|
132 |
| - %a0 = alloca %struct.e, align 4 |
133 |
| - %a1 = alloca %ptr_pair, align 8 |
| 87 | + %a0 = alloca %struct_e, align 32 |
134 | 88 | call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 65536, i1 false)
|
135 |
| - |
136 |
| - store ptr %a0, ptr %a1, align 8 |
137 |
| - |
138 |
| - %p1 = getelementptr inbounds %ptr_pair, ptr %a1, i64 0, i32 1 |
139 |
| - %v0 = load ptr, ptr %arg0, align 8 |
140 |
| - store ptr %v0, ptr %p1, align 8 |
141 |
| - |
142 |
| - %p2 = getelementptr inbounds i8, ptr %a1, i32 8 |
143 |
| - %v1 = load ptr, ptr %p2, align 8 |
144 |
| - |
145 |
| - %v2 = load i32, ptr %v1, align 1 |
146 |
| - |
147 |
| - %p3 = getelementptr inbounds i32, ptr %a0, i32 2 |
148 |
| - store i32 %v2, ptr %p3, align 4 |
149 |
| - |
150 |
| - ret void |
| 89 | + %v0 = load i8, ptr %arg0, align 1 |
| 90 | + %p0 = getelementptr inbounds i8, ptr %a0, i64 3 |
| 91 | + store i8 %v0, ptr %p0, align 1 |
| 92 | + %p1 = getelementptr inbounds i8, ptr %a0, i64 2 |
| 93 | + %v1 = load i8, ptr %p1, align 1 |
| 94 | + ret i8 %v1 |
151 | 95 | }
|
152 | 96 |
|
153 |
| - |
| 97 | +; Largest memset we currently promote |
| 98 | +%struct_f = type { [32768 x i8] } |
| 99 | +define i8 @vector_promote_f(ptr %arg0) { |
| 100 | +; CHECK-LABEL: @vector_promote_f( |
| 101 | +; CHECK-NEXT: [[V0:%.*]] = load i8, ptr [[ARG0:%.*]], align 1 |
| 102 | +; CHECK-NEXT: [[A0_SROA_0_12345_VEC_INSERT:%.*]] = insertelement <32768 x i8> zeroinitializer, i8 [[V0]], i32 12345 |
| 103 | +; CHECK-NEXT: [[A0_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <32768 x i8> [[A0_SROA_0_12345_VEC_INSERT]], i32 2 |
| 104 | +; CHECK-NEXT: ret i8 [[A0_SROA_0_2_VEC_EXTRACT]] |
| 105 | +; |
| 106 | + %a0 = alloca %struct_f, align 32 |
| 107 | + call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 32768, i1 false) |
| 108 | + %v0 = load i8, ptr %arg0, align 1 |
| 109 | + %p0 = getelementptr inbounds i8, ptr %a0, i64 12345 |
| 110 | + store i8 %v0, ptr %p0, align 1 |
| 111 | + %p1 = getelementptr inbounds i8, ptr %a0, i64 2 |
| 112 | + %v1 = load i8, ptr %p1, align 1 |
| 113 | + ret i8 %v1 |
| 114 | +} |
154 | 115 |
|
155 | 116 | ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
|
156 | 117 | declare void @llvm.memset.p0.i64(ptr writeonly captures(none), i8, i64, i1 immarg) #0
|
|
0 commit comments