@@ -2979,3 +2979,223 @@ loop:
2979
2979
exit:
2980
2980
ret i32 0
2981
2981
}
2982
+
2983
+ define i32 @test_widening_instr_mull_64 (ptr %p1 , ptr %p2 , i32 %h ) {
2984
+ ; CHECK-LABEL: test_widening_instr_mull_64:
2985
+ ; CHECK: ; %bb.0: ; %entry
2986
+ ; CHECK-NEXT: LBB25_1: ; %loop
2987
+ ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
2988
+ ; CHECK-NEXT: ldr q0, [x0]
2989
+ ; CHECK-NEXT: subs w2, w2, #1
2990
+ ; CHECK-NEXT: ldr q1, [x1]
2991
+ ; CHECK-NEXT: ldr q2, [x1, #16]!
2992
+ ; CHECK-NEXT: ushll2.8h v3, v0, #0
2993
+ ; CHECK-NEXT: ushll.8h v0, v0, #0
2994
+ ; CHECK-NEXT: ushll2.4s v4, v2, #0
2995
+ ; CHECK-NEXT: ushll2.4s v5, v3, #0
2996
+ ; CHECK-NEXT: ushll.4s v2, v2, #0
2997
+ ; CHECK-NEXT: ushll.4s v3, v3, #0
2998
+ ; CHECK-NEXT: umull2.2d v6, v5, v4
2999
+ ; CHECK-NEXT: umull.2d v4, v5, v4
3000
+ ; CHECK-NEXT: umull2.2d v5, v3, v2
3001
+ ; CHECK-NEXT: ushll2.4s v7, v1, #0
3002
+ ; CHECK-NEXT: ushll.4s v1, v1, #0
3003
+ ; CHECK-NEXT: stp q4, q6, [x0, #96]
3004
+ ; CHECK-NEXT: ushll2.4s v6, v0, #0
3005
+ ; CHECK-NEXT: str q5, [x0, #80]
3006
+ ; CHECK-NEXT: ushll.4s v0, v0, #0
3007
+ ; CHECK-NEXT: umull2.2d v4, v6, v7
3008
+ ; CHECK-NEXT: umull.2d v5, v6, v7
3009
+ ; CHECK-NEXT: umull2.2d v6, v0, v1
3010
+ ; CHECK-NEXT: umull.2d v0, v0, v1
3011
+ ; CHECK-NEXT: umull.2d v1, v3, v2
3012
+ ; CHECK-NEXT: stp q5, q4, [x0, #32]
3013
+ ; CHECK-NEXT: stp q0, q6, [x0]
3014
+ ; CHECK-NEXT: str q1, [x0, #64]!
3015
+ ; CHECK-NEXT: b.ne LBB25_1
3016
+ ; CHECK-NEXT: ; %bb.2: ; %exit
3017
+ ; CHECK-NEXT: mov w0, wzr
3018
+ ; CHECK-NEXT: ret
3019
+ ;
3020
+ ; CHECK-BE-LABEL: test_widening_instr_mull_64:
3021
+ ; CHECK-BE: // %bb.0: // %entry
3022
+ ; CHECK-BE-NEXT: .LBB25_1: // %loop
3023
+ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
3024
+ ; CHECK-BE-NEXT: ld1 { v0.16b }, [x0]
3025
+ ; CHECK-BE-NEXT: add x8, x0, #48
3026
+ ; CHECK-BE-NEXT: add x9, x0, #112
3027
+ ; CHECK-BE-NEXT: add x10, x0, #16
3028
+ ; CHECK-BE-NEXT: ld1 { v1.8h }, [x1]
3029
+ ; CHECK-BE-NEXT: add x1, x1, #16
3030
+ ; CHECK-BE-NEXT: subs w2, w2, #1
3031
+ ; CHECK-BE-NEXT: ushll v2.8h, v0.8b, #0
3032
+ ; CHECK-BE-NEXT: ld1 { v6.8h }, [x1]
3033
+ ; CHECK-BE-NEXT: ushll2 v0.8h, v0.16b, #0
3034
+ ; CHECK-BE-NEXT: ushll2 v3.4s, v1.8h, #0
3035
+ ; CHECK-BE-NEXT: ushll2 v4.4s, v2.8h, #0
3036
+ ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
3037
+ ; CHECK-BE-NEXT: umull2 v5.2d, v4.4s, v3.4s
3038
+ ; CHECK-BE-NEXT: ushll2 v7.4s, v6.8h, #0
3039
+ ; CHECK-BE-NEXT: ushll v6.4s, v6.4h, #0
3040
+ ; CHECK-BE-NEXT: ushll v2.4s, v2.4h, #0
3041
+ ; CHECK-BE-NEXT: st1 { v5.2d }, [x8]
3042
+ ; CHECK-BE-NEXT: add x8, x0, #96
3043
+ ; CHECK-BE-NEXT: ushll2 v5.4s, v0.8h, #0
3044
+ ; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
3045
+ ; CHECK-BE-NEXT: umull2 v16.2d, v5.4s, v7.4s
3046
+ ; CHECK-BE-NEXT: umull v5.2d, v5.2s, v7.2s
3047
+ ; CHECK-BE-NEXT: umull2 v7.2d, v0.4s, v6.4s
3048
+ ; CHECK-BE-NEXT: umull v0.2d, v0.2s, v6.2s
3049
+ ; CHECK-BE-NEXT: st1 { v16.2d }, [x9]
3050
+ ; CHECK-BE-NEXT: add x9, x0, #80
3051
+ ; CHECK-BE-NEXT: umull v16.2d, v2.2s, v1.2s
3052
+ ; CHECK-BE-NEXT: st1 { v5.2d }, [x8]
3053
+ ; CHECK-BE-NEXT: umull v3.2d, v4.2s, v3.2s
3054
+ ; CHECK-BE-NEXT: add x8, x0, #32
3055
+ ; CHECK-BE-NEXT: st1 { v7.2d }, [x9]
3056
+ ; CHECK-BE-NEXT: add x9, x0, #64
3057
+ ; CHECK-BE-NEXT: umull2 v1.2d, v2.4s, v1.4s
3058
+ ; CHECK-BE-NEXT: st1 { v16.2d }, [x0]
3059
+ ; CHECK-BE-NEXT: mov x0, x9
3060
+ ; CHECK-BE-NEXT: st1 { v0.2d }, [x9]
3061
+ ; CHECK-BE-NEXT: st1 { v3.2d }, [x8]
3062
+ ; CHECK-BE-NEXT: st1 { v1.2d }, [x10]
3063
+ ; CHECK-BE-NEXT: b.ne .LBB25_1
3064
+ ; CHECK-BE-NEXT: // %bb.2: // %exit
3065
+ ; CHECK-BE-NEXT: mov w0, wzr
3066
+ ; CHECK-BE-NEXT: ret
3067
+ entry:
3068
+ br label %loop
3069
+
3070
+ loop:
3071
+ %iv = phi i32 [ 0 , %entry ], [ %iv.next , %loop ]
3072
+ %gep.1 = getelementptr inbounds <16 x i32 >, ptr %p1 , i32 %iv
3073
+ %gep.2 = getelementptr inbounds <16 x i8 >, ptr %p2 , i32 %iv
3074
+ %l1 = load <16 x i8 >, ptr %gep.1
3075
+ %z2 = zext <16 x i8 > %l1 to <16 x i64 >
3076
+ %l4 = load <16 x i16 >, ptr %gep.2
3077
+ %z5 = zext <16 x i16 > %l4 to <16 x i64 >
3078
+ %mul = mul <16 x i64 > %z2 , %z5
3079
+ store <16 x i64 > %mul , ptr %gep.1
3080
+ %iv.next = add nuw nsw i32 %iv , 1
3081
+ %exitcond.not = icmp eq i32 %iv.next , %h
3082
+ br i1 %exitcond.not , label %exit , label %loop
3083
+
3084
+ exit:
3085
+ ret i32 0
3086
+ }
3087
+
3088
+ define i32 @test_widening_instr_mull_2 (ptr %p1 , ptr %p2 , i32 %h ) {
3089
+ ; CHECK-LABEL: test_widening_instr_mull_2:
3090
+ ; CHECK: ; %bb.0: ; %entry
3091
+ ; CHECK-NEXT: Lloh54:
3092
+ ; CHECK-NEXT: adrp x8, lCPI26_0@PAGE
3093
+ ; CHECK-NEXT: Lloh55:
3094
+ ; CHECK-NEXT: adrp x9, lCPI26_1@PAGE
3095
+ ; CHECK-NEXT: Lloh56:
3096
+ ; CHECK-NEXT: adrp x10, lCPI26_2@PAGE
3097
+ ; CHECK-NEXT: Lloh57:
3098
+ ; CHECK-NEXT: adrp x11, lCPI26_3@PAGE
3099
+ ; CHECK-NEXT: Lloh58:
3100
+ ; CHECK-NEXT: ldr q0, [x8, lCPI26_0@PAGEOFF]
3101
+ ; CHECK-NEXT: mov x8, x0
3102
+ ; CHECK-NEXT: Lloh59:
3103
+ ; CHECK-NEXT: ldr q1, [x9, lCPI26_1@PAGEOFF]
3104
+ ; CHECK-NEXT: Lloh60:
3105
+ ; CHECK-NEXT: ldr q2, [x10, lCPI26_2@PAGEOFF]
3106
+ ; CHECK-NEXT: Lloh61:
3107
+ ; CHECK-NEXT: ldr q3, [x11, lCPI26_3@PAGEOFF]
3108
+ ; CHECK-NEXT: LBB26_1: ; %loop
3109
+ ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
3110
+ ; CHECK-NEXT: ldr q4, [x1], #16
3111
+ ; CHECK-NEXT: ldp q5, q6, [x0, #32]
3112
+ ; CHECK-NEXT: subs w2, w2, #1
3113
+ ; CHECK-NEXT: tbl.16b v16, { v4 }, v0
3114
+ ; CHECK-NEXT: tbl.16b v18, { v4 }, v1
3115
+ ; CHECK-NEXT: tbl.16b v19, { v4 }, v2
3116
+ ; CHECK-NEXT: tbl.16b v4, { v4 }, v3
3117
+ ; CHECK-NEXT: ldr q7, [x0]
3118
+ ; CHECK-NEXT: ldr q17, [x8, #16]!
3119
+ ; CHECK-NEXT: mul.4s v5, v5, v16
3120
+ ; CHECK-NEXT: mul.4s v6, v6, v18
3121
+ ; CHECK-NEXT: mul.4s v7, v7, v19
3122
+ ; CHECK-NEXT: mul.4s v4, v17, v4
3123
+ ; CHECK-NEXT: stp q5, q6, [x0, #32]
3124
+ ; CHECK-NEXT: str q7, [x0]
3125
+ ; CHECK-NEXT: mov x0, x8
3126
+ ; CHECK-NEXT: str q4, [x8]
3127
+ ; CHECK-NEXT: b.ne LBB26_1
3128
+ ; CHECK-NEXT: ; %bb.2: ; %exit
3129
+ ; CHECK-NEXT: mov w0, wzr
3130
+ ; CHECK-NEXT: ret
3131
+ ; CHECK-NEXT: .loh AdrpLdr Lloh57, Lloh61
3132
+ ; CHECK-NEXT: .loh AdrpLdr Lloh56, Lloh60
3133
+ ; CHECK-NEXT: .loh AdrpLdr Lloh55, Lloh59
3134
+ ; CHECK-NEXT: .loh AdrpLdr Lloh54, Lloh58
3135
+ ;
3136
+ ; CHECK-BE-LABEL: test_widening_instr_mull_2:
3137
+ ; CHECK-BE: // %bb.0: // %entry
3138
+ ; CHECK-BE-NEXT: adrp x8, .LCPI26_0
3139
+ ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI26_0
3140
+ ; CHECK-BE-NEXT: ld1 { v0.16b }, [x8]
3141
+ ; CHECK-BE-NEXT: adrp x8, .LCPI26_1
3142
+ ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI26_1
3143
+ ; CHECK-BE-NEXT: ld1 { v1.16b }, [x8]
3144
+ ; CHECK-BE-NEXT: adrp x8, .LCPI26_2
3145
+ ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI26_2
3146
+ ; CHECK-BE-NEXT: ld1 { v2.16b }, [x8]
3147
+ ; CHECK-BE-NEXT: adrp x8, .LCPI26_3
3148
+ ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI26_3
3149
+ ; CHECK-BE-NEXT: ld1 { v3.16b }, [x8]
3150
+ ; CHECK-BE-NEXT: .LBB26_1: // %loop
3151
+ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
3152
+ ; CHECK-BE-NEXT: ld1 { v4.16b }, [x1]
3153
+ ; CHECK-BE-NEXT: add x8, x0, #32
3154
+ ; CHECK-BE-NEXT: add x9, x0, #48
3155
+ ; CHECK-BE-NEXT: add x10, x0, #16
3156
+ ; CHECK-BE-NEXT: ld1 { v6.4s }, [x0]
3157
+ ; CHECK-BE-NEXT: subs w2, w2, #1
3158
+ ; CHECK-BE-NEXT: add x1, x1, #16
3159
+ ; CHECK-BE-NEXT: ld1 { v16.4s }, [x8]
3160
+ ; CHECK-BE-NEXT: tbl v5.16b, { v4.16b }, v1.16b
3161
+ ; CHECK-BE-NEXT: tbl v7.16b, { v4.16b }, v0.16b
3162
+ ; CHECK-BE-NEXT: ld1 { v18.4s }, [x10]
3163
+ ; CHECK-BE-NEXT: tbl v17.16b, { v4.16b }, v3.16b
3164
+ ; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v2.16b
3165
+ ; CHECK-BE-NEXT: rev32 v5.16b, v5.16b
3166
+ ; CHECK-BE-NEXT: rev32 v7.16b, v7.16b
3167
+ ; CHECK-BE-NEXT: rev32 v17.16b, v17.16b
3168
+ ; CHECK-BE-NEXT: rev32 v4.16b, v4.16b
3169
+ ; CHECK-BE-NEXT: mul v5.4s, v6.4s, v5.4s
3170
+ ; CHECK-BE-NEXT: ld1 { v6.4s }, [x9]
3171
+ ; CHECK-BE-NEXT: mul v7.4s, v18.4s, v7.4s
3172
+ ; CHECK-BE-NEXT: st1 { v5.4s }, [x0]
3173
+ ; CHECK-BE-NEXT: mov x0, x10
3174
+ ; CHECK-BE-NEXT: mul v5.4s, v16.4s, v17.4s
3175
+ ; CHECK-BE-NEXT: st1 { v7.4s }, [x10]
3176
+ ; CHECK-BE-NEXT: mul v4.4s, v6.4s, v4.4s
3177
+ ; CHECK-BE-NEXT: st1 { v5.4s }, [x8]
3178
+ ; CHECK-BE-NEXT: st1 { v4.4s }, [x9]
3179
+ ; CHECK-BE-NEXT: b.ne .LBB26_1
3180
+ ; CHECK-BE-NEXT: // %bb.2: // %exit
3181
+ ; CHECK-BE-NEXT: mov w0, wzr
3182
+ ; CHECK-BE-NEXT: ret
3183
+ entry:
3184
+ br label %loop
3185
+
3186
+ loop:
3187
+ %iv = phi i32 [ 0 , %entry ], [ %iv.next , %loop ]
3188
+ %gep.1 = getelementptr inbounds <16 x i8 >, ptr %p1 , i32 %iv
3189
+ %gep.2 = getelementptr inbounds <16 x i8 >, ptr %p2 , i32 %iv
3190
+ %l1 = load <16 x i32 >, ptr %gep.1
3191
+ %l4 = load <16 x i8 >, ptr %gep.2
3192
+ %z5 = zext <16 x i8 > %l4 to <16 x i32 >
3193
+ %mul = mul <16 x i32 > %l1 , %z5
3194
+ store <16 x i32 > %mul , ptr %gep.1
3195
+ %iv.next = add nuw nsw i32 %iv , 1
3196
+ %exitcond.not = icmp eq i32 %iv.next , %h
3197
+ br i1 %exitcond.not , label %exit , label %loop
3198
+
3199
+ exit:
3200
+ ret i32 0
3201
+ }
0 commit comments