@@ -10,20 +10,20 @@ define void @splat_loads_double(double *%array1, double *%array2, double *%ptrA,
10
10
; CHECK-NEXT: entry:
11
11
; CHECK-NEXT: [[GEP_1_0:%.*]] = getelementptr inbounds double, double* [[ARRAY1:%.*]], i64 0
12
12
; CHECK-NEXT: [[GEP_2_0:%.*]] = getelementptr inbounds double, double* [[ARRAY2:%.*]], i64 0
13
- ; CHECK-NEXT: [[GEP_2_1:%.*]] = getelementptr inbounds double, double* [[ARRAY2]], i64 1
14
- ; CHECK-NEXT: [[LD_2_0:%.*]] = load double, double* [[GEP_2_0]], align 8
15
- ; CHECK-NEXT: [[LD_2_1:%.*]] = load double, double* [[GEP_2_1]], align 8
16
13
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>*
17
14
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
18
- ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0
19
- ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[LD_2_0]], i32 1
20
- ; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
21
- ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0
22
- ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[LD_2_1]], i32 1
23
- ; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP1]], [[TMP6]]
24
- ; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP4]], [[TMP7]]
25
- ; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>*
26
- ; CHECK-NEXT: store <2 x double> [[TMP8]], <2 x double>* [[TMP9]], align 8
15
+ ; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[GEP_2_0]] to <2 x double>*
16
+ ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
17
+ ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
18
+ ; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
19
+ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 1
20
+ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
21
+ ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 0
22
+ ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1
23
+ ; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP1]], [[TMP8]]
24
+ ; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP4]], [[TMP9]]
25
+ ; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>*
26
+ ; CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8
27
27
; CHECK-NEXT: ret void
28
28
;
29
29
entry:
@@ -57,20 +57,20 @@ define void @splat_loads_float(float *%array1, float *%array2, float *%ptrA, flo
57
57
; CHECK-NEXT: entry:
58
58
; CHECK-NEXT: [[GEP_1_0:%.*]] = getelementptr inbounds float, float* [[ARRAY1:%.*]], i64 0
59
59
; CHECK-NEXT: [[GEP_2_0:%.*]] = getelementptr inbounds float, float* [[ARRAY2:%.*]], i64 0
60
- ; CHECK-NEXT: [[GEP_2_1:%.*]] = getelementptr inbounds float, float* [[ARRAY2]], i64 1
61
- ; CHECK-NEXT: [[LD_2_0:%.*]] = load float, float* [[GEP_2_0]], align 8
62
- ; CHECK-NEXT: [[LD_2_1:%.*]] = load float, float* [[GEP_2_1]], align 8
63
60
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[GEP_1_0]] to <2 x float>*
64
61
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 8
65
- ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[LD_2_0]], i32 0
66
- ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[LD_2_0]], i32 1
67
- ; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP1]], [[TMP3]]
68
- ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[LD_2_1]], i32 0
69
- ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[LD_2_1]], i32 1
70
- ; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP1]], [[TMP6]]
71
- ; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[TMP4]], [[TMP7]]
72
- ; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[GEP_1_0]] to <2 x float>*
73
- ; CHECK-NEXT: store <2 x float> [[TMP8]], <2 x float>* [[TMP9]], align 4
62
+ ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[GEP_2_0]] to <2 x float>*
63
+ ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 8
64
+ ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
65
+ ; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP1]], [[SHUFFLE]]
66
+ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[SHUFFLE]], i32 1
67
+ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0
68
+ ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[SHUFFLE]], i32 0
69
+ ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i32 1
70
+ ; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x float> [[TMP1]], [[TMP8]]
71
+ ; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[TMP4]], [[TMP9]]
72
+ ; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[GEP_1_0]] to <2 x float>*
73
+ ; CHECK-NEXT: store <2 x float> [[TMP10]], <2 x float>* [[TMP11]], align 4
74
74
; CHECK-NEXT: ret void
75
75
;
76
76
entry:
@@ -104,20 +104,20 @@ define void @splat_loads_i64(i64 *%array1, i64 *%array2, i64 *%ptrA, i64 *%ptrB)
104
104
; CHECK-NEXT: entry:
105
105
; CHECK-NEXT: [[GEP_1_0:%.*]] = getelementptr inbounds i64, i64* [[ARRAY1:%.*]], i64 0
106
106
; CHECK-NEXT: [[GEP_2_0:%.*]] = getelementptr inbounds i64, i64* [[ARRAY2:%.*]], i64 0
107
- ; CHECK-NEXT: [[GEP_2_1:%.*]] = getelementptr inbounds i64, i64* [[ARRAY2]], i64 1
108
- ; CHECK-NEXT: [[LD_2_0:%.*]] = load i64, i64* [[GEP_2_0]], align 8
109
- ; CHECK-NEXT: [[LD_2_1:%.*]] = load i64, i64* [[GEP_2_1]], align 8
110
107
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[GEP_1_0]] to <2 x i64>*
111
108
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
112
- ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[LD_2_0]], i32 0
113
- ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[LD_2_0]], i32 1
114
- ; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP1]], [[TMP3]]
115
- ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[LD_2_1]], i32 0
116
- ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[LD_2_1]], i32 1
117
- ; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i64> [[TMP1]], [[TMP6]]
118
- ; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP4]], [[TMP7]]
119
- ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64* [[GEP_1_0]] to <2 x i64>*
120
- ; CHECK-NEXT: store <2 x i64> [[TMP8]], <2 x i64>* [[TMP9]], align 4
109
+ ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[GEP_2_0]] to <2 x i64>*
110
+ ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 8
111
+ ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
112
+ ; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP1]], [[SHUFFLE]]
113
+ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[SHUFFLE]], i32 1
114
+ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i32 0
115
+ ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[SHUFFLE]], i32 0
116
+ ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[TMP7]], i32 1
117
+ ; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP1]], [[TMP8]]
118
+ ; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i64> [[TMP4]], [[TMP9]]
119
+ ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64* [[GEP_1_0]] to <2 x i64>*
120
+ ; CHECK-NEXT: store <2 x i64> [[TMP10]], <2 x i64>* [[TMP11]], align 4
121
121
; CHECK-NEXT: ret void
122
122
;
123
123
entry:
@@ -151,20 +151,20 @@ define void @splat_loads_i32(i32 *%array1, i32 *%array2, i32 *%ptrA, i32 *%ptrB)
151
151
; CHECK-NEXT: entry:
152
152
; CHECK-NEXT: [[GEP_1_0:%.*]] = getelementptr inbounds i32, i32* [[ARRAY1:%.*]], i64 0
153
153
; CHECK-NEXT: [[GEP_2_0:%.*]] = getelementptr inbounds i32, i32* [[ARRAY2:%.*]], i64 0
154
- ; CHECK-NEXT: [[GEP_2_1:%.*]] = getelementptr inbounds i32, i32* [[ARRAY2]], i64 1
155
- ; CHECK-NEXT: [[LD_2_0:%.*]] = load i32, i32* [[GEP_2_0]], align 8
156
- ; CHECK-NEXT: [[LD_2_1:%.*]] = load i32, i32* [[GEP_2_1]], align 8
157
154
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[GEP_1_0]] to <2 x i32>*
158
155
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8
159
- ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> poison, i32 [[LD_2_0]], i32 0
160
- ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[LD_2_0]], i32 1
161
- ; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[TMP1]], [[TMP3]]
162
- ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[LD_2_1]], i32 0
163
- ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[LD_2_1]], i32 1
164
- ; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i32> [[TMP1]], [[TMP6]]
165
- ; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP4]], [[TMP7]]
166
- ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[GEP_1_0]] to <2 x i32>*
167
- ; CHECK-NEXT: store <2 x i32> [[TMP8]], <2 x i32>* [[TMP9]], align 4
156
+ ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[GEP_2_0]] to <2 x i32>*
157
+ ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[TMP2]], align 8
158
+ ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
159
+ ; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[TMP1]], [[SHUFFLE]]
160
+ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[SHUFFLE]], i32 1
161
+ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0
162
+ ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[SHUFFLE]], i32 0
163
+ ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP7]], i32 1
164
+ ; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i32> [[TMP1]], [[TMP8]]
165
+ ; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP4]], [[TMP9]]
166
+ ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[GEP_1_0]] to <2 x i32>*
167
+ ; CHECK-NEXT: store <2 x i32> [[TMP10]], <2 x i32>* [[TMP11]], align 4
168
168
; CHECK-NEXT: ret void
169
169
;
170
170
entry:
0 commit comments