Skip to content

Commit fe79e66

Browse files
committed
fixup! [SROA] Vector promote some memsets
1 parent 9322a4f commit fe79e66

File tree

1 file changed

+89
-128
lines changed

1 file changed

+89
-128
lines changed

llvm/test/Transforms/SROA/vector-promotion-memset.ll

Lines changed: 89 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -2,155 +2,116 @@
22
; RUN: opt < %s -passes='sroa' -S | FileCheck %s
33
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
44

5-
%ptr_pair = type { ptr, ptr }
6-
7-
%struct.a = type { <32 x i8> }
8-
define void @vector_promote_memset_a(ptr %arg0) {
9-
; CHECK-LABEL: @vector_promote_memset_a(
10-
; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0:%.*]], align 8
11-
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1
12-
; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <32 x i8> zeroinitializer, i8 [[TMP3]], i32 0
13-
; CHECK-NEXT: ret void
5+
%struct_a = type { [32 x i8] }
6+
define i8 @vector_promote_a(ptr %arg0) {
7+
; CHECK-LABEL: @vector_promote_a(
8+
; CHECK-NEXT: [[V0:%.*]] = load i8, ptr [[ARG0:%.*]], align 1
9+
; CHECK-NEXT: [[A0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <32 x i8> zeroinitializer, i8 [[V0]], i32 0
10+
; CHECK-NEXT: [[A0_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[A0_SROA_0_0_VEC_INSERT]], i32 4
11+
; CHECK-NEXT: ret i8 [[A0_SROA_0_4_VEC_EXTRACT]]
1412
;
15-
%a0 = alloca %struct.a, align 32
16-
%a1 = alloca %ptr_pair, align 8
13+
%a0 = alloca %struct_a, align 32
1714
call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 32, i1 false)
18-
19-
store ptr %a0, ptr %a1, align 8
20-
21-
%p1 = getelementptr inbounds %ptr_pair, ptr %a1, i64 0, i32 1
22-
%v0 = load ptr, ptr %arg0, align 8
23-
store ptr %v0, ptr %p1, align 8
24-
25-
%p2 = getelementptr inbounds i8, ptr %a1, i32 8
26-
%v1 = load ptr, ptr %p2, align 8
27-
28-
%v2 = load i8, ptr %v1, align 1
29-
store i8 %v2, ptr %a0, align 32
30-
31-
ret void
15+
%v0 = load i8, ptr %arg0, align 1
16+
store i8 %v0, ptr %a0, align 1
17+
%p0 = getelementptr inbounds i8, ptr %a0, i64 4
18+
%v1 = load i8, ptr %p0, align 1
19+
ret i8 %v1
3220
}
3321

34-
%struct.b = type { <16 x i16> }
35-
define void @vector_promote_memset_b(ptr %arg0) {
36-
; CHECK-LABEL: @vector_promote_memset_b(
37-
; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0:%.*]], align 8
38-
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 1
39-
; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <16 x i16> zeroinitializer, i16 [[TMP3]], i32 0
40-
; CHECK-NEXT: ret void
22+
%struct_b = type { [16 x i16] }
23+
define i16 @vector_promote_b(ptr %arg0) {
24+
; CHECK-LABEL: @vector_promote_b(
25+
; CHECK-NEXT: [[V0:%.*]] = load i16, ptr [[ARG0:%.*]], align 1
26+
; CHECK-NEXT: [[A0_SROA_0_20_VEC_INSERT:%.*]] = insertelement <16 x i16> zeroinitializer, i16 [[V0]], i32 10
27+
; CHECK-NEXT: [[A0_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <16 x i16> [[A0_SROA_0_20_VEC_INSERT]], i32 2
28+
; CHECK-NEXT: ret i16 [[A0_SROA_0_4_VEC_EXTRACT]]
4129
;
42-
%a0 = alloca %struct.b, align 16
43-
%a1 = alloca %ptr_pair, align 8
30+
%a0 = alloca %struct_b, align 32
4431
call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 32, i1 false)
45-
46-
store ptr %a0, ptr %a1, align 8
47-
48-
%p1 = getelementptr inbounds %ptr_pair, ptr %a1, i64 0, i32 1
49-
%v0 = load ptr, ptr %arg0, align 8
50-
store ptr %v0, ptr %p1, align 8
51-
52-
%p2 = getelementptr inbounds i8, ptr %a1, i32 8
53-
%v1 = load ptr, ptr %p2, align 8
54-
55-
%v2 = load i16, ptr %v1, align 1
56-
store i16 %v2, ptr %a0, align 16
57-
58-
ret void
32+
%v0 = load i16, ptr %arg0, align 1
33+
%p0 = getelementptr inbounds i16, ptr %a0, i64 10
34+
store i16 %v0, ptr %p0, align 1
35+
%p1 = getelementptr inbounds i16, ptr %a0, i64 2
36+
%v1 = load i16, ptr %p1, align 1
37+
ret i16 %v1
5938
}
6039

61-
%struct.c = type { <4 x i32> }
62-
define void @vector_promote_memset_c(ptr %arg0) {
63-
; CHECK-LABEL: @vector_promote_memset_c(
64-
; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0:%.*]], align 8
65-
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 1
66-
; CHECK-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[TMP3]], i32 2
67-
; CHECK-NEXT: ret void
40+
%struct_c = type { [4 x i32] }
41+
define i32 @vector_promote_c(ptr %arg0) {
42+
; CHECK-LABEL: @vector_promote_c(
43+
; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[ARG0:%.*]], align 1
44+
; CHECK-NEXT: [[A0_SROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[V0]], i32 3
45+
; CHECK-NEXT: [[A0_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A0_SROA_0_12_VEC_INSERT]], i32 2
46+
; CHECK-NEXT: ret i32 [[A0_SROA_0_8_VEC_EXTRACT]]
6847
;
69-
%a0 = alloca %struct.c, align 4
70-
%a1 = alloca %ptr_pair, align 8
48+
%a0 = alloca %struct_c, align 32
7149
call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 16, i1 false)
72-
73-
store ptr %a0, ptr %a1, align 8
74-
75-
%p1 = getelementptr inbounds %ptr_pair, ptr %a1, i64 0, i32 1
76-
%v0 = load ptr, ptr %arg0, align 8
77-
store ptr %v0, ptr %p1, align 8
78-
79-
%p2 = getelementptr inbounds i8, ptr %a1, i32 8
80-
%v1 = load ptr, ptr %p2, align 8
81-
82-
%v2 = load i32, ptr %v1, align 1
83-
84-
%p3 = getelementptr inbounds i32, ptr %a0, i32 2
85-
store i32 %v2, ptr %p3, align 4
86-
87-
ret void
50+
%v0 = load i32, ptr %arg0, align 1
51+
%p0 = getelementptr inbounds i32, ptr %a0, i64 3
52+
store i32 %v0, ptr %p0, align 1
53+
%p1 = getelementptr inbounds i32, ptr %a0, i64 2
54+
%v1 = load i32, ptr %p1, align 1
55+
ret i32 %v1
8856
}
8957

9058
; We currently prevent promotion if the vector would require padding
91-
%struct.d = type { <6 x i32> }
92-
define void @vector_promote_memset_d(ptr %arg0) {
93-
; CHECK-LABEL: @vector_promote_memset_d(
94-
; CHECK-NEXT: [[DOTSROA_2:%.*]] = alloca [3 x i32], align 4
95-
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[DOTSROA_2]], i8 0, i64 12, i1 false)
96-
; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0:%.*]], align 8
97-
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 1
98-
; CHECK-NEXT: ret void
59+
%struct_d = type { [6 x i32] }
60+
define i32 @vector_promote_d(ptr %arg0) {
61+
; CHECK-LABEL: @vector_promote_d(
62+
; CHECK-NEXT: [[A0_SROA_3:%.*]] = alloca [3 x i32], align 4
63+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A0_SROA_3]], i8 0, i64 12, i1 false)
64+
; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[ARG0:%.*]], align 1
65+
; CHECK-NEXT: ret i32 0
9966
;
100-
%a0 = alloca %struct.d, align 4
101-
%a1 = alloca %ptr_pair, align 8
67+
%a0 = alloca %struct_d, align 32
10268
call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 24, i1 false)
103-
104-
store ptr %a0, ptr %a1, align 8
105-
106-
%p1 = getelementptr inbounds %ptr_pair, ptr %a1, i64 0, i32 1
107-
%v0 = load ptr, ptr %arg0, align 8
108-
store ptr %v0, ptr %p1, align 8
109-
110-
%p2 = getelementptr inbounds i8, ptr %a1, i32 8
111-
%v1 = load ptr, ptr %p2, align 8
112-
113-
%v2 = load i32, ptr %v1, align 1
114-
115-
%p3 = getelementptr inbounds i32, ptr %a0, i32 2
116-
store i32 %v2, ptr %p3, align 4
117-
118-
ret void
69+
%v0 = load i32, ptr %arg0, align 1
70+
%p0 = getelementptr inbounds i32, ptr %a0, i64 1
71+
store i32 %v0, ptr %p0, align 1
72+
%p1 = getelementptr inbounds i32, ptr %a0, i64 2
73+
%v1 = load i32, ptr %p1, align 1
74+
ret i32 %v1
11975
}
12076

121-
122-
; We shouldn't promote large memsets.
123-
%struct.e = type { [65536 x i8] }
124-
define void @vector_promote_memset_e(ptr %arg0) {
125-
; CHECK-LABEL: @vector_promote_memset_e(
126-
; CHECK-NEXT: [[A0_SROA_2:%.*]] = alloca [65524 x i8], align 4
127-
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A0_SROA_2]], i8 0, i64 65524, i1 false)
128-
; CHECK-NEXT: [[V0:%.*]] = load ptr, ptr [[ARG0:%.*]], align 8
129-
; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[V0]], align 1
130-
; CHECK-NEXT: ret void
77+
; We shouldn't promote memsets larger than the max value of `unsigned short`.
78+
; See getMaxNumFixedVectorElements().
79+
%struct_e = type { [65536 x i8] }
80+
define i8 @vector_promote_e(ptr %arg0) {
81+
; CHECK-LABEL: @vector_promote_e(
82+
; CHECK-NEXT: [[A0_SROA_3:%.*]] = alloca [65532 x i8], align 4
83+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A0_SROA_3]], i8 0, i64 65532, i1 false)
84+
; CHECK-NEXT: [[V0:%.*]] = load i8, ptr [[ARG0:%.*]], align 1
85+
; CHECK-NEXT: ret i8 0
13186
;
132-
%a0 = alloca %struct.e, align 4
133-
%a1 = alloca %ptr_pair, align 8
87+
%a0 = alloca %struct_e, align 32
13488
call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 65536, i1 false)
135-
136-
store ptr %a0, ptr %a1, align 8
137-
138-
%p1 = getelementptr inbounds %ptr_pair, ptr %a1, i64 0, i32 1
139-
%v0 = load ptr, ptr %arg0, align 8
140-
store ptr %v0, ptr %p1, align 8
141-
142-
%p2 = getelementptr inbounds i8, ptr %a1, i32 8
143-
%v1 = load ptr, ptr %p2, align 8
144-
145-
%v2 = load i32, ptr %v1, align 1
146-
147-
%p3 = getelementptr inbounds i32, ptr %a0, i32 2
148-
store i32 %v2, ptr %p3, align 4
149-
150-
ret void
89+
%v0 = load i8, ptr %arg0, align 1
90+
%p0 = getelementptr inbounds i8, ptr %a0, i64 3
91+
store i8 %v0, ptr %p0, align 1
92+
%p1 = getelementptr inbounds i8, ptr %a0, i64 2
93+
%v1 = load i8, ptr %p1, align 1
94+
ret i8 %v1
15195
}
15296

153-
97+
; Largest memset we currently promote
98+
%struct_f = type { [32768 x i8] }
99+
define i8 @vector_promote_f(ptr %arg0) {
100+
; CHECK-LABEL: @vector_promote_f(
101+
; CHECK-NEXT: [[V0:%.*]] = load i8, ptr [[ARG0:%.*]], align 1
102+
; CHECK-NEXT: [[A0_SROA_0_12345_VEC_INSERT:%.*]] = insertelement <32768 x i8> zeroinitializer, i8 [[V0]], i32 12345
103+
; CHECK-NEXT: [[A0_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <32768 x i8> [[A0_SROA_0_12345_VEC_INSERT]], i32 2
104+
; CHECK-NEXT: ret i8 [[A0_SROA_0_2_VEC_EXTRACT]]
105+
;
106+
%a0 = alloca %struct_f, align 32
107+
call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 32768, i1 false)
108+
%v0 = load i8, ptr %arg0, align 1
109+
%p0 = getelementptr inbounds i8, ptr %a0, i64 12345
110+
store i8 %v0, ptr %p0, align 1
111+
%p1 = getelementptr inbounds i8, ptr %a0, i64 2
112+
%v1 = load i8, ptr %p1, align 1
113+
ret i8 %v1
114+
}
154115

155116
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
156117
declare void @llvm.memset.p0.i64(ptr writeonly captures(none), i8, i64, i1 immarg) #0

0 commit comments

Comments
 (0)