Skip to content

Commit 75cdd1c

Browse files
committed
[AArch64] Add additional zext/mull tests.
Extra test coverage for D150482.
1 parent 1efbef4 commit 75cdd1c

File tree

1 file changed

+220
-0
lines changed

1 file changed

+220
-0
lines changed

llvm/test/CodeGen/AArch64/zext-to-tbl.ll

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2979,3 +2979,223 @@ loop:
29792979
exit:
29802980
ret i32 0
29812981
}
2982+
2983+
define i32 @test_widening_instr_mull_64(ptr %p1, ptr %p2, i32 %h) {
2984+
; CHECK-LABEL: test_widening_instr_mull_64:
2985+
; CHECK: ; %bb.0: ; %entry
2986+
; CHECK-NEXT: LBB25_1: ; %loop
2987+
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
2988+
; CHECK-NEXT: ldr q0, [x0]
2989+
; CHECK-NEXT: subs w2, w2, #1
2990+
; CHECK-NEXT: ldr q1, [x1]
2991+
; CHECK-NEXT: ldr q2, [x1, #16]!
2992+
; CHECK-NEXT: ushll2.8h v3, v0, #0
2993+
; CHECK-NEXT: ushll.8h v0, v0, #0
2994+
; CHECK-NEXT: ushll2.4s v4, v2, #0
2995+
; CHECK-NEXT: ushll2.4s v5, v3, #0
2996+
; CHECK-NEXT: ushll.4s v2, v2, #0
2997+
; CHECK-NEXT: ushll.4s v3, v3, #0
2998+
; CHECK-NEXT: umull2.2d v6, v5, v4
2999+
; CHECK-NEXT: umull.2d v4, v5, v4
3000+
; CHECK-NEXT: umull2.2d v5, v3, v2
3001+
; CHECK-NEXT: ushll2.4s v7, v1, #0
3002+
; CHECK-NEXT: ushll.4s v1, v1, #0
3003+
; CHECK-NEXT: stp q4, q6, [x0, #96]
3004+
; CHECK-NEXT: ushll2.4s v6, v0, #0
3005+
; CHECK-NEXT: str q5, [x0, #80]
3006+
; CHECK-NEXT: ushll.4s v0, v0, #0
3007+
; CHECK-NEXT: umull2.2d v4, v6, v7
3008+
; CHECK-NEXT: umull.2d v5, v6, v7
3009+
; CHECK-NEXT: umull2.2d v6, v0, v1
3010+
; CHECK-NEXT: umull.2d v0, v0, v1
3011+
; CHECK-NEXT: umull.2d v1, v3, v2
3012+
; CHECK-NEXT: stp q5, q4, [x0, #32]
3013+
; CHECK-NEXT: stp q0, q6, [x0]
3014+
; CHECK-NEXT: str q1, [x0, #64]!
3015+
; CHECK-NEXT: b.ne LBB25_1
3016+
; CHECK-NEXT: ; %bb.2: ; %exit
3017+
; CHECK-NEXT: mov w0, wzr
3018+
; CHECK-NEXT: ret
3019+
;
3020+
; CHECK-BE-LABEL: test_widening_instr_mull_64:
3021+
; CHECK-BE: // %bb.0: // %entry
3022+
; CHECK-BE-NEXT: .LBB25_1: // %loop
3023+
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
3024+
; CHECK-BE-NEXT: ld1 { v0.16b }, [x0]
3025+
; CHECK-BE-NEXT: add x8, x0, #48
3026+
; CHECK-BE-NEXT: add x9, x0, #112
3027+
; CHECK-BE-NEXT: add x10, x0, #16
3028+
; CHECK-BE-NEXT: ld1 { v1.8h }, [x1]
3029+
; CHECK-BE-NEXT: add x1, x1, #16
3030+
; CHECK-BE-NEXT: subs w2, w2, #1
3031+
; CHECK-BE-NEXT: ushll v2.8h, v0.8b, #0
3032+
; CHECK-BE-NEXT: ld1 { v6.8h }, [x1]
3033+
; CHECK-BE-NEXT: ushll2 v0.8h, v0.16b, #0
3034+
; CHECK-BE-NEXT: ushll2 v3.4s, v1.8h, #0
3035+
; CHECK-BE-NEXT: ushll2 v4.4s, v2.8h, #0
3036+
; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
3037+
; CHECK-BE-NEXT: umull2 v5.2d, v4.4s, v3.4s
3038+
; CHECK-BE-NEXT: ushll2 v7.4s, v6.8h, #0
3039+
; CHECK-BE-NEXT: ushll v6.4s, v6.4h, #0
3040+
; CHECK-BE-NEXT: ushll v2.4s, v2.4h, #0
3041+
; CHECK-BE-NEXT: st1 { v5.2d }, [x8]
3042+
; CHECK-BE-NEXT: add x8, x0, #96
3043+
; CHECK-BE-NEXT: ushll2 v5.4s, v0.8h, #0
3044+
; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
3045+
; CHECK-BE-NEXT: umull2 v16.2d, v5.4s, v7.4s
3046+
; CHECK-BE-NEXT: umull v5.2d, v5.2s, v7.2s
3047+
; CHECK-BE-NEXT: umull2 v7.2d, v0.4s, v6.4s
3048+
; CHECK-BE-NEXT: umull v0.2d, v0.2s, v6.2s
3049+
; CHECK-BE-NEXT: st1 { v16.2d }, [x9]
3050+
; CHECK-BE-NEXT: add x9, x0, #80
3051+
; CHECK-BE-NEXT: umull v16.2d, v2.2s, v1.2s
3052+
; CHECK-BE-NEXT: st1 { v5.2d }, [x8]
3053+
; CHECK-BE-NEXT: umull v3.2d, v4.2s, v3.2s
3054+
; CHECK-BE-NEXT: add x8, x0, #32
3055+
; CHECK-BE-NEXT: st1 { v7.2d }, [x9]
3056+
; CHECK-BE-NEXT: add x9, x0, #64
3057+
; CHECK-BE-NEXT: umull2 v1.2d, v2.4s, v1.4s
3058+
; CHECK-BE-NEXT: st1 { v16.2d }, [x0]
3059+
; CHECK-BE-NEXT: mov x0, x9
3060+
; CHECK-BE-NEXT: st1 { v0.2d }, [x9]
3061+
; CHECK-BE-NEXT: st1 { v3.2d }, [x8]
3062+
; CHECK-BE-NEXT: st1 { v1.2d }, [x10]
3063+
; CHECK-BE-NEXT: b.ne .LBB25_1
3064+
; CHECK-BE-NEXT: // %bb.2: // %exit
3065+
; CHECK-BE-NEXT: mov w0, wzr
3066+
; CHECK-BE-NEXT: ret
3067+
entry:
3068+
br label %loop
3069+
3070+
loop:
3071+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
3072+
%gep.1 = getelementptr inbounds <16 x i32>, ptr %p1, i32 %iv
3073+
%gep.2 = getelementptr inbounds <16 x i8>, ptr %p2, i32 %iv
3074+
%l1 = load <16 x i8>, ptr %gep.1
3075+
%z2 = zext <16 x i8> %l1 to <16 x i64>
3076+
%l4 = load <16 x i16>, ptr %gep.2
3077+
%z5 = zext <16 x i16> %l4 to <16 x i64>
3078+
%mul = mul <16 x i64> %z2, %z5
3079+
store <16 x i64> %mul, ptr %gep.1
3080+
%iv.next= add nuw nsw i32 %iv, 1
3081+
%exitcond.not = icmp eq i32 %iv.next, %h
3082+
br i1 %exitcond.not, label %exit, label %loop
3083+
3084+
exit:
3085+
ret i32 0
3086+
}
3087+
3088+
define i32 @test_widening_instr_mull_2(ptr %p1, ptr %p2, i32 %h) {
3089+
; CHECK-LABEL: test_widening_instr_mull_2:
3090+
; CHECK: ; %bb.0: ; %entry
3091+
; CHECK-NEXT: Lloh54:
3092+
; CHECK-NEXT: adrp x8, lCPI26_0@PAGE
3093+
; CHECK-NEXT: Lloh55:
3094+
; CHECK-NEXT: adrp x9, lCPI26_1@PAGE
3095+
; CHECK-NEXT: Lloh56:
3096+
; CHECK-NEXT: adrp x10, lCPI26_2@PAGE
3097+
; CHECK-NEXT: Lloh57:
3098+
; CHECK-NEXT: adrp x11, lCPI26_3@PAGE
3099+
; CHECK-NEXT: Lloh58:
3100+
; CHECK-NEXT: ldr q0, [x8, lCPI26_0@PAGEOFF]
3101+
; CHECK-NEXT: mov x8, x0
3102+
; CHECK-NEXT: Lloh59:
3103+
; CHECK-NEXT: ldr q1, [x9, lCPI26_1@PAGEOFF]
3104+
; CHECK-NEXT: Lloh60:
3105+
; CHECK-NEXT: ldr q2, [x10, lCPI26_2@PAGEOFF]
3106+
; CHECK-NEXT: Lloh61:
3107+
; CHECK-NEXT: ldr q3, [x11, lCPI26_3@PAGEOFF]
3108+
; CHECK-NEXT: LBB26_1: ; %loop
3109+
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
3110+
; CHECK-NEXT: ldr q4, [x1], #16
3111+
; CHECK-NEXT: ldp q5, q6, [x0, #32]
3112+
; CHECK-NEXT: subs w2, w2, #1
3113+
; CHECK-NEXT: tbl.16b v16, { v4 }, v0
3114+
; CHECK-NEXT: tbl.16b v18, { v4 }, v1
3115+
; CHECK-NEXT: tbl.16b v19, { v4 }, v2
3116+
; CHECK-NEXT: tbl.16b v4, { v4 }, v3
3117+
; CHECK-NEXT: ldr q7, [x0]
3118+
; CHECK-NEXT: ldr q17, [x8, #16]!
3119+
; CHECK-NEXT: mul.4s v5, v5, v16
3120+
; CHECK-NEXT: mul.4s v6, v6, v18
3121+
; CHECK-NEXT: mul.4s v7, v7, v19
3122+
; CHECK-NEXT: mul.4s v4, v17, v4
3123+
; CHECK-NEXT: stp q5, q6, [x0, #32]
3124+
; CHECK-NEXT: str q7, [x0]
3125+
; CHECK-NEXT: mov x0, x8
3126+
; CHECK-NEXT: str q4, [x8]
3127+
; CHECK-NEXT: b.ne LBB26_1
3128+
; CHECK-NEXT: ; %bb.2: ; %exit
3129+
; CHECK-NEXT: mov w0, wzr
3130+
; CHECK-NEXT: ret
3131+
; CHECK-NEXT: .loh AdrpLdr Lloh57, Lloh61
3132+
; CHECK-NEXT: .loh AdrpLdr Lloh56, Lloh60
3133+
; CHECK-NEXT: .loh AdrpLdr Lloh55, Lloh59
3134+
; CHECK-NEXT: .loh AdrpLdr Lloh54, Lloh58
3135+
;
3136+
; CHECK-BE-LABEL: test_widening_instr_mull_2:
3137+
; CHECK-BE: // %bb.0: // %entry
3138+
; CHECK-BE-NEXT: adrp x8, .LCPI26_0
3139+
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI26_0
3140+
; CHECK-BE-NEXT: ld1 { v0.16b }, [x8]
3141+
; CHECK-BE-NEXT: adrp x8, .LCPI26_1
3142+
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI26_1
3143+
; CHECK-BE-NEXT: ld1 { v1.16b }, [x8]
3144+
; CHECK-BE-NEXT: adrp x8, .LCPI26_2
3145+
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI26_2
3146+
; CHECK-BE-NEXT: ld1 { v2.16b }, [x8]
3147+
; CHECK-BE-NEXT: adrp x8, .LCPI26_3
3148+
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI26_3
3149+
; CHECK-BE-NEXT: ld1 { v3.16b }, [x8]
3150+
; CHECK-BE-NEXT: .LBB26_1: // %loop
3151+
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
3152+
; CHECK-BE-NEXT: ld1 { v4.16b }, [x1]
3153+
; CHECK-BE-NEXT: add x8, x0, #32
3154+
; CHECK-BE-NEXT: add x9, x0, #48
3155+
; CHECK-BE-NEXT: add x10, x0, #16
3156+
; CHECK-BE-NEXT: ld1 { v6.4s }, [x0]
3157+
; CHECK-BE-NEXT: subs w2, w2, #1
3158+
; CHECK-BE-NEXT: add x1, x1, #16
3159+
; CHECK-BE-NEXT: ld1 { v16.4s }, [x8]
3160+
; CHECK-BE-NEXT: tbl v5.16b, { v4.16b }, v1.16b
3161+
; CHECK-BE-NEXT: tbl v7.16b, { v4.16b }, v0.16b
3162+
; CHECK-BE-NEXT: ld1 { v18.4s }, [x10]
3163+
; CHECK-BE-NEXT: tbl v17.16b, { v4.16b }, v3.16b
3164+
; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v2.16b
3165+
; CHECK-BE-NEXT: rev32 v5.16b, v5.16b
3166+
; CHECK-BE-NEXT: rev32 v7.16b, v7.16b
3167+
; CHECK-BE-NEXT: rev32 v17.16b, v17.16b
3168+
; CHECK-BE-NEXT: rev32 v4.16b, v4.16b
3169+
; CHECK-BE-NEXT: mul v5.4s, v6.4s, v5.4s
3170+
; CHECK-BE-NEXT: ld1 { v6.4s }, [x9]
3171+
; CHECK-BE-NEXT: mul v7.4s, v18.4s, v7.4s
3172+
; CHECK-BE-NEXT: st1 { v5.4s }, [x0]
3173+
; CHECK-BE-NEXT: mov x0, x10
3174+
; CHECK-BE-NEXT: mul v5.4s, v16.4s, v17.4s
3175+
; CHECK-BE-NEXT: st1 { v7.4s }, [x10]
3176+
; CHECK-BE-NEXT: mul v4.4s, v6.4s, v4.4s
3177+
; CHECK-BE-NEXT: st1 { v5.4s }, [x8]
3178+
; CHECK-BE-NEXT: st1 { v4.4s }, [x9]
3179+
; CHECK-BE-NEXT: b.ne .LBB26_1
3180+
; CHECK-BE-NEXT: // %bb.2: // %exit
3181+
; CHECK-BE-NEXT: mov w0, wzr
3182+
; CHECK-BE-NEXT: ret
3183+
entry:
3184+
br label %loop
3185+
3186+
loop:
3187+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
3188+
%gep.1 = getelementptr inbounds <16 x i8>, ptr %p1, i32 %iv
3189+
%gep.2 = getelementptr inbounds <16 x i8>, ptr %p2, i32 %iv
3190+
%l1 = load <16 x i32>, ptr %gep.1
3191+
%l4 = load <16 x i8>, ptr %gep.2
3192+
%z5 = zext <16 x i8> %l4 to <16 x i32>
3193+
%mul = mul <16 x i32> %l1, %z5
3194+
store <16 x i32> %mul, ptr %gep.1
3195+
%iv.next= add nuw nsw i32 %iv, 1
3196+
%exitcond.not = icmp eq i32 %iv.next, %h
3197+
br i1 %exitcond.not, label %exit, label %loop
3198+
3199+
exit:
3200+
ret i32 0
3201+
}

0 commit comments

Comments
 (0)