-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[NFC][AArch64] test for sdiv with fixed-width vectors, pow2-divisor and SVE enabled #130252
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Sushant Gokhale (sushgokh) ChangesWith SVE enabled, this should generate asrd instruction. Subsequent patch will address this. Full diff: https://github.com/llvm/llvm-project/pull/130252.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
index 21a5abdeaa4d5..e6ee64861c76b 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
@@ -1,10 +1,33 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_128
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
target triple = "aarch64-unknown-linux-gnu"
+define <4 x i32> @sdiv_v4i32_packed(<4 x i32> %op1) vscale_range(1,0) #0 {
+; CHECK-LABEL: sdiv_v4i32_packed:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v1.4s, v0.4s, #0
+; CHECK-NEXT: usra v0.4s, v1.4s, #29
+; CHECK-NEXT: sshr v0.4s, v0.4s, #3
+; CHECK-NEXT: ret
+ %res = sdiv <4 x i32> %op1, splat (i32 8)
+ ret <4 x i32> %res
+}
+
+define <2 x i32> @sdiv_v2i32_unpacked(<2 x i32> %op1) vscale_range(1,0) #0 {
+; CHECK-LABEL: sdiv_v2i32_unpacked:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v1.2s, v0.2s, #0
+; CHECK-NEXT: usra v0.2s, v1.2s, #29
+; CHECK-NEXT: sshr v0.2s, v0.2s, #3
+; CHECK-NEXT: ret
+ %res = sdiv <2 x i32> %op1, splat (i32 8)
+ ret <2 x i32> %res
+}
+
define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) vscale_range(2,0) #0 {
; CHECK-LABEL: sdiv_v8i8:
; CHECK: // %bb.0:
@@ -45,6 +68,26 @@ define void @sdiv_v32i8(ptr %a) vscale_range(2,0) #0 {
}
define void @sdiv_v64i8(ptr %a) #0 {
+; VBITS_GE_128-LABEL: sdiv_v64i8:
+; VBITS_GE_128: // %bb.0:
+; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT: ldp q3, q4, [x0]
+; VBITS_GE_128-NEXT: cmlt v2.16b, v0.16b, #0
+; VBITS_GE_128-NEXT: cmlt v5.16b, v1.16b, #0
+; VBITS_GE_128-NEXT: cmlt v6.16b, v3.16b, #0
+; VBITS_GE_128-NEXT: usra v0.16b, v2.16b, #3
+; VBITS_GE_128-NEXT: cmlt v2.16b, v4.16b, #0
+; VBITS_GE_128-NEXT: usra v1.16b, v5.16b, #3
+; VBITS_GE_128-NEXT: usra v3.16b, v6.16b, #3
+; VBITS_GE_128-NEXT: usra v4.16b, v2.16b, #3
+; VBITS_GE_128-NEXT: sshr v0.16b, v0.16b, #5
+; VBITS_GE_128-NEXT: sshr v1.16b, v1.16b, #5
+; VBITS_GE_128-NEXT: sshr v2.16b, v3.16b, #5
+; VBITS_GE_128-NEXT: sshr v3.16b, v4.16b, #5
+; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT: stp q2, q3, [x0]
+; VBITS_GE_128-NEXT: ret
+;
; VBITS_GE_256-LABEL: sdiv_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
@@ -139,6 +182,26 @@ define void @sdiv_v16i16(ptr %a) vscale_range(2,0) #0 {
}
define void @sdiv_v32i16(ptr %a) #0 {
+; VBITS_GE_128-LABEL: sdiv_v32i16:
+; VBITS_GE_128: // %bb.0:
+; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT: ldp q3, q4, [x0]
+; VBITS_GE_128-NEXT: cmlt v2.8h, v0.8h, #0
+; VBITS_GE_128-NEXT: cmlt v5.8h, v1.8h, #0
+; VBITS_GE_128-NEXT: cmlt v6.8h, v3.8h, #0
+; VBITS_GE_128-NEXT: usra v0.8h, v2.8h, #11
+; VBITS_GE_128-NEXT: cmlt v2.8h, v4.8h, #0
+; VBITS_GE_128-NEXT: usra v1.8h, v5.8h, #11
+; VBITS_GE_128-NEXT: usra v3.8h, v6.8h, #11
+; VBITS_GE_128-NEXT: usra v4.8h, v2.8h, #11
+; VBITS_GE_128-NEXT: sshr v0.8h, v0.8h, #5
+; VBITS_GE_128-NEXT: sshr v1.8h, v1.8h, #5
+; VBITS_GE_128-NEXT: sshr v2.8h, v3.8h, #5
+; VBITS_GE_128-NEXT: sshr v3.8h, v4.8h, #5
+; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT: stp q2, q3, [x0]
+; VBITS_GE_128-NEXT: ret
+;
; VBITS_GE_256-LABEL: sdiv_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
@@ -234,6 +297,26 @@ define void @sdiv_v8i32(ptr %a) vscale_range(2,0) #0 {
}
define void @sdiv_v16i32(ptr %a) #0 {
+; VBITS_GE_128-LABEL: sdiv_v16i32:
+; VBITS_GE_128: // %bb.0:
+; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT: ldp q3, q4, [x0]
+; VBITS_GE_128-NEXT: cmlt v2.4s, v0.4s, #0
+; VBITS_GE_128-NEXT: cmlt v5.4s, v1.4s, #0
+; VBITS_GE_128-NEXT: cmlt v6.4s, v3.4s, #0
+; VBITS_GE_128-NEXT: usra v0.4s, v2.4s, #27
+; VBITS_GE_128-NEXT: cmlt v2.4s, v4.4s, #0
+; VBITS_GE_128-NEXT: usra v1.4s, v5.4s, #27
+; VBITS_GE_128-NEXT: usra v3.4s, v6.4s, #27
+; VBITS_GE_128-NEXT: usra v4.4s, v2.4s, #27
+; VBITS_GE_128-NEXT: sshr v0.4s, v0.4s, #5
+; VBITS_GE_128-NEXT: sshr v1.4s, v1.4s, #5
+; VBITS_GE_128-NEXT: sshr v2.4s, v3.4s, #5
+; VBITS_GE_128-NEXT: sshr v3.4s, v4.4s, #5
+; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT: stp q2, q3, [x0]
+; VBITS_GE_128-NEXT: ret
+;
; VBITS_GE_256-LABEL: sdiv_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
@@ -329,6 +412,26 @@ define void @sdiv_v4i64(ptr %a) vscale_range(2,0) #0 {
}
define void @sdiv_v8i64(ptr %a) #0 {
+; VBITS_GE_128-LABEL: sdiv_v8i64:
+; VBITS_GE_128: // %bb.0:
+; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT: ldp q3, q4, [x0]
+; VBITS_GE_128-NEXT: cmlt v2.2d, v0.2d, #0
+; VBITS_GE_128-NEXT: cmlt v5.2d, v1.2d, #0
+; VBITS_GE_128-NEXT: cmlt v6.2d, v3.2d, #0
+; VBITS_GE_128-NEXT: usra v0.2d, v2.2d, #59
+; VBITS_GE_128-NEXT: cmlt v2.2d, v4.2d, #0
+; VBITS_GE_128-NEXT: usra v1.2d, v5.2d, #59
+; VBITS_GE_128-NEXT: usra v3.2d, v6.2d, #59
+; VBITS_GE_128-NEXT: usra v4.2d, v2.2d, #59
+; VBITS_GE_128-NEXT: sshr v0.2d, v0.2d, #5
+; VBITS_GE_128-NEXT: sshr v1.2d, v1.2d, #5
+; VBITS_GE_128-NEXT: sshr v2.2d, v3.2d, #5
+; VBITS_GE_128-NEXT: sshr v3.2d, v4.2d, #5
+; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT: stp q2, q3, [x0]
+; VBITS_GE_128-NEXT: ret
+;
; VBITS_GE_256-LABEL: sdiv_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Other than the suggestion for an extra test case, LGTM.
; CHECK-NEXT: usra v0.2s, v1.2s, #29 | ||
; CHECK-NEXT: sshr v0.2s, v0.2s, #3 | ||
; CHECK-NEXT: ret | ||
%res = sdiv <2 x i32> %op1, splat (i32 8) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add tests with negative divide amounts too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done although I dont expect codegen improvement with negative divide amounts and hence didnt add in the first version.
SVE enabled With SVE enabled, this should generate asrd instruction. Subsequent patch will address this.
cd35e67
to
785b28e
Compare
With SVE enabled, this should generate asrd instruction. Subsequent patch will address this.