@@ -24,3 +24,46 @@ define void @fdiv_2x2(ptr %num, ptr %denom, ptr %out) {
24
24
store <4 x double > %divtt , ptr %out
25
25
ret void
26
26
}
27
+
28
+ define void @fabs_2x2f64 (ptr %in , ptr %out ) {
29
+ ; CHECK-LABEL: @fabs_2x2f64(
30
+ ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, ptr [[IN:%.*]], align 32
31
+ ; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN]], i64 2
32
+ ; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 16
33
+ ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[COL_LOAD]])
34
+ ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[COL_LOAD1]])
35
+ ; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[OUT:%.*]], align 32
36
+ ; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr [[OUT]], i64 2
37
+ ; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[VEC_GEP2]], align 16
38
+ ; CHECK-NEXT: ret void
39
+ ;
40
+ %load = load <4 x double >, ptr %in
41
+ %fabs = call <4 x double > @llvm.fabs.v4f64 (<4 x double > %load )
42
+ %fabst = call <4 x double > @llvm.matrix.transpose (<4 x double > %fabs , i32 2 , i32 2 )
43
+ %fabstt = call <4 x double > @llvm.matrix.transpose (<4 x double > %fabst , i32 2 , i32 2 )
44
+ store <4 x double > %fabstt , ptr %out
45
+ ret void
46
+ }
47
+
48
+ define void @fabs_2x2i32 (ptr %in , ptr %out ) {
49
+ ; CHECK-LABEL: @fabs_2x2i32(
50
+ ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x i32>, ptr [[IN:%.*]], align 16
51
+ ; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i32, ptr [[IN]], i64 2
52
+ ; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x i32>, ptr [[VEC_GEP]], align 8
53
+ ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[COL_LOAD]], i1 false)
54
+ ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[COL_LOAD1]], i1 false)
55
+ ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP1]], i1 true)
56
+ ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP2]], i1 true)
57
+ ; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[OUT:%.*]], align 16
58
+ ; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr i32, ptr [[OUT]], i64 2
59
+ ; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr [[VEC_GEP2]], align 8
60
+ ; CHECK-NEXT: ret void
61
+ ;
62
+ %load = load <4 x i32 >, ptr %in
63
+ %abs = call <4 x i32 > @llvm.abs.v4i32 (<4 x i32 > %load , i1 false )
64
+ %abst = call <4 x i32 > @llvm.matrix.transpose (<4 x i32 > %abs , i32 2 , i32 2 )
65
+ %abstt = call <4 x i32 > @llvm.matrix.transpose (<4 x i32 > %abst , i32 2 , i32 2 )
66
+ %absabstt = call <4 x i32 > @llvm.abs.v4i32 (<4 x i32 > %abstt , i1 true )
67
+ store <4 x i32 > %absabstt , ptr %out
68
+ ret void
69
+ }
0 commit comments