Skip to content

[NFC][AArch64] test for sdiv with fixed-width vectors, pow2-divisor and SVE enabled #130252

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 18, 2025

Conversation

sushgokh
Copy link
Contributor

@sushgokh sushgokh commented Mar 7, 2025

With SVE enabled, this should generate asrd instruction. Subsequent patch will address this.

@llvmbot
Copy link
Member

llvmbot commented Mar 7, 2025

@llvm/pr-subscribers-backend-aarch64

Author: Sushant Gokhale (sushgokh)

Changes

With SVE enabled, this should generate asrd instruction. Subsequent patch will address this.


Full diff: https://github.com/llvm/llvm-project/pull/130252.diff

1 Files Affected:

  • (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll (+103)
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
index 21a5abdeaa4d5..e6ee64861c76b 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
@@ -1,10 +1,33 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -aarch64-sve-vector-bits-min=128  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_128
 ; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
 ; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
 
 target triple = "aarch64-unknown-linux-gnu"
 
+define <4 x i32> @sdiv_v4i32_packed(<4 x i32> %op1) vscale_range(1,0) #0 {
+; CHECK-LABEL: sdiv_v4i32_packed:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmlt v1.4s, v0.4s, #0
+; CHECK-NEXT:    usra v0.4s, v1.4s, #29
+; CHECK-NEXT:    sshr v0.4s, v0.4s, #3
+; CHECK-NEXT:    ret
+  %res = sdiv <4 x i32> %op1, splat (i32 8)
+  ret <4 x i32> %res
+}
+
+define <2 x i32> @sdiv_v2i32_unpacked(<2 x i32> %op1) vscale_range(1,0) #0 {
+; CHECK-LABEL: sdiv_v2i32_unpacked:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmlt v1.2s, v0.2s, #0
+; CHECK-NEXT:    usra v0.2s, v1.2s, #29
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #3
+; CHECK-NEXT:    ret
+  %res = sdiv <2 x i32> %op1, splat (i32 8)
+  ret <2 x i32> %res
+}
+
 define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sdiv_v8i8:
 ; CHECK:       // %bb.0:
@@ -45,6 +68,26 @@ define void @sdiv_v32i8(ptr %a) vscale_range(2,0) #0 {
 }
 
 define void @sdiv_v64i8(ptr %a) #0 {
+; VBITS_GE_128-LABEL: sdiv_v64i8:
+; VBITS_GE_128:       // %bb.0:
+; VBITS_GE_128-NEXT:    ldp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    ldp q3, q4, [x0]
+; VBITS_GE_128-NEXT:    cmlt v2.16b, v0.16b, #0
+; VBITS_GE_128-NEXT:    cmlt v5.16b, v1.16b, #0
+; VBITS_GE_128-NEXT:    cmlt v6.16b, v3.16b, #0
+; VBITS_GE_128-NEXT:    usra v0.16b, v2.16b, #3
+; VBITS_GE_128-NEXT:    cmlt v2.16b, v4.16b, #0
+; VBITS_GE_128-NEXT:    usra v1.16b, v5.16b, #3
+; VBITS_GE_128-NEXT:    usra v3.16b, v6.16b, #3
+; VBITS_GE_128-NEXT:    usra v4.16b, v2.16b, #3
+; VBITS_GE_128-NEXT:    sshr v0.16b, v0.16b, #5
+; VBITS_GE_128-NEXT:    sshr v1.16b, v1.16b, #5
+; VBITS_GE_128-NEXT:    sshr v2.16b, v3.16b, #5
+; VBITS_GE_128-NEXT:    sshr v3.16b, v4.16b, #5
+; VBITS_GE_128-NEXT:    stp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    stp q2, q3, [x0]
+; VBITS_GE_128-NEXT:    ret
+;
 ; VBITS_GE_256-LABEL: sdiv_v64i8:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.b, vl32
@@ -139,6 +182,26 @@ define void @sdiv_v16i16(ptr %a) vscale_range(2,0) #0 {
 }
 
 define void @sdiv_v32i16(ptr %a) #0 {
+; VBITS_GE_128-LABEL: sdiv_v32i16:
+; VBITS_GE_128:       // %bb.0:
+; VBITS_GE_128-NEXT:    ldp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    ldp q3, q4, [x0]
+; VBITS_GE_128-NEXT:    cmlt v2.8h, v0.8h, #0
+; VBITS_GE_128-NEXT:    cmlt v5.8h, v1.8h, #0
+; VBITS_GE_128-NEXT:    cmlt v6.8h, v3.8h, #0
+; VBITS_GE_128-NEXT:    usra v0.8h, v2.8h, #11
+; VBITS_GE_128-NEXT:    cmlt v2.8h, v4.8h, #0
+; VBITS_GE_128-NEXT:    usra v1.8h, v5.8h, #11
+; VBITS_GE_128-NEXT:    usra v3.8h, v6.8h, #11
+; VBITS_GE_128-NEXT:    usra v4.8h, v2.8h, #11
+; VBITS_GE_128-NEXT:    sshr v0.8h, v0.8h, #5
+; VBITS_GE_128-NEXT:    sshr v1.8h, v1.8h, #5
+; VBITS_GE_128-NEXT:    sshr v2.8h, v3.8h, #5
+; VBITS_GE_128-NEXT:    sshr v3.8h, v4.8h, #5
+; VBITS_GE_128-NEXT:    stp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    stp q2, q3, [x0]
+; VBITS_GE_128-NEXT:    ret
+;
 ; VBITS_GE_256-LABEL: sdiv_v32i16:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
@@ -234,6 +297,26 @@ define void @sdiv_v8i32(ptr %a) vscale_range(2,0) #0 {
 }
 
 define void @sdiv_v16i32(ptr %a) #0 {
+; VBITS_GE_128-LABEL: sdiv_v16i32:
+; VBITS_GE_128:       // %bb.0:
+; VBITS_GE_128-NEXT:    ldp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    ldp q3, q4, [x0]
+; VBITS_GE_128-NEXT:    cmlt v2.4s, v0.4s, #0
+; VBITS_GE_128-NEXT:    cmlt v5.4s, v1.4s, #0
+; VBITS_GE_128-NEXT:    cmlt v6.4s, v3.4s, #0
+; VBITS_GE_128-NEXT:    usra v0.4s, v2.4s, #27
+; VBITS_GE_128-NEXT:    cmlt v2.4s, v4.4s, #0
+; VBITS_GE_128-NEXT:    usra v1.4s, v5.4s, #27
+; VBITS_GE_128-NEXT:    usra v3.4s, v6.4s, #27
+; VBITS_GE_128-NEXT:    usra v4.4s, v2.4s, #27
+; VBITS_GE_128-NEXT:    sshr v0.4s, v0.4s, #5
+; VBITS_GE_128-NEXT:    sshr v1.4s, v1.4s, #5
+; VBITS_GE_128-NEXT:    sshr v2.4s, v3.4s, #5
+; VBITS_GE_128-NEXT:    sshr v3.4s, v4.4s, #5
+; VBITS_GE_128-NEXT:    stp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    stp q2, q3, [x0]
+; VBITS_GE_128-NEXT:    ret
+;
 ; VBITS_GE_256-LABEL: sdiv_v16i32:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
@@ -329,6 +412,26 @@ define void @sdiv_v4i64(ptr %a) vscale_range(2,0) #0 {
 }
 
 define void @sdiv_v8i64(ptr %a) #0 {
+; VBITS_GE_128-LABEL: sdiv_v8i64:
+; VBITS_GE_128:       // %bb.0:
+; VBITS_GE_128-NEXT:    ldp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    ldp q3, q4, [x0]
+; VBITS_GE_128-NEXT:    cmlt v2.2d, v0.2d, #0
+; VBITS_GE_128-NEXT:    cmlt v5.2d, v1.2d, #0
+; VBITS_GE_128-NEXT:    cmlt v6.2d, v3.2d, #0
+; VBITS_GE_128-NEXT:    usra v0.2d, v2.2d, #59
+; VBITS_GE_128-NEXT:    cmlt v2.2d, v4.2d, #0
+; VBITS_GE_128-NEXT:    usra v1.2d, v5.2d, #59
+; VBITS_GE_128-NEXT:    usra v3.2d, v6.2d, #59
+; VBITS_GE_128-NEXT:    usra v4.2d, v2.2d, #59
+; VBITS_GE_128-NEXT:    sshr v0.2d, v0.2d, #5
+; VBITS_GE_128-NEXT:    sshr v1.2d, v1.2d, #5
+; VBITS_GE_128-NEXT:    sshr v2.2d, v3.2d, #5
+; VBITS_GE_128-NEXT:    sshr v3.2d, v4.2d, #5
+; VBITS_GE_128-NEXT:    stp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    stp q2, q3, [x0]
+; VBITS_GE_128-NEXT:    ret
+;
 ; VBITS_GE_256-LABEL: sdiv_v8i64:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.d, vl4

@sushgokh
Copy link
Contributor Author

ping

Copy link
Collaborator

@davemgreen davemgreen left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Other than the suggestion for an extra test case, LGTM.

; CHECK-NEXT: usra v0.2s, v1.2s, #29
; CHECK-NEXT: sshr v0.2s, v0.2s, #3
; CHECK-NEXT: ret
%res = sdiv <2 x i32> %op1, splat (i32 8)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add tests with negative divide amounts too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done although I dont expect codegen improvement with negative divide amounts and hence didnt add in the first version.

SVE enabled

With SVE enabled, this should generate asrd instruction. Subsequent
patch will address this.
@sushgokh sushgokh force-pushed the sdiv-pow2-positive-divisor branch from cd35e67 to 785b28e Compare March 18, 2025 05:28
@sushgokh sushgokh merged commit 0f34eba into llvm:main Mar 18, 2025
6 of 10 checks passed
@sushgokh sushgokh deleted the sdiv-pow2-positive-divisor branch April 24, 2025 10:37
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants