[NFC][AArch64] test for sdiv with fixed-width vectors, pow2-divisor and SVE enabled #130252

sushgokh · 2025-03-07T06:12:18Z

With SVE enabled, this should generate asrd instruction. Subsequent patch will address this.

llvmbot · 2025-03-07T06:12:57Z

@llvm/pr-subscribers-backend-aarch64

Author: Sushant Gokhale (sushgokh)

Changes

With SVE enabled, this should generate asrd instruction. Subsequent patch will address this.

Full diff: https://github.com/llvm/llvm-project/pull/130252.diff

1 Files Affected:

(modified) llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll (+103)

diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
index 21a5abdeaa4d5..e6ee64861c76b 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
@@ -1,10 +1,33 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -aarch64-sve-vector-bits-min=128  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_128
 ; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
 ; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
 
 target triple = "aarch64-unknown-linux-gnu"
 
+define <4 x i32> @sdiv_v4i32_packed(<4 x i32> %op1) vscale_range(1,0) #0 {
+; CHECK-LABEL: sdiv_v4i32_packed:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmlt v1.4s, v0.4s, #0
+; CHECK-NEXT:    usra v0.4s, v1.4s, #29
+; CHECK-NEXT:    sshr v0.4s, v0.4s, #3
+; CHECK-NEXT:    ret
+  %res = sdiv <4 x i32> %op1, splat (i32 8)
+  ret <4 x i32> %res
+}
+
+define <2 x i32> @sdiv_v2i32_unpacked(<2 x i32> %op1) vscale_range(1,0) #0 {
+; CHECK-LABEL: sdiv_v2i32_unpacked:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmlt v1.2s, v0.2s, #0
+; CHECK-NEXT:    usra v0.2s, v1.2s, #29
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #3
+; CHECK-NEXT:    ret
+  %res = sdiv <2 x i32> %op1, splat (i32 8)
+  ret <2 x i32> %res
+}
+
 define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sdiv_v8i8:
 ; CHECK:       // %bb.0:
@@ -45,6 +68,26 @@ define void @sdiv_v32i8(ptr %a) vscale_range(2,0) #0 {
 }
 
 define void @sdiv_v64i8(ptr %a) #0 {
+; VBITS_GE_128-LABEL: sdiv_v64i8:
+; VBITS_GE_128:       // %bb.0:
+; VBITS_GE_128-NEXT:    ldp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    ldp q3, q4, [x0]
+; VBITS_GE_128-NEXT:    cmlt v2.16b, v0.16b, #0
+; VBITS_GE_128-NEXT:    cmlt v5.16b, v1.16b, #0
+; VBITS_GE_128-NEXT:    cmlt v6.16b, v3.16b, #0
+; VBITS_GE_128-NEXT:    usra v0.16b, v2.16b, #3
+; VBITS_GE_128-NEXT:    cmlt v2.16b, v4.16b, #0
+; VBITS_GE_128-NEXT:    usra v1.16b, v5.16b, #3
+; VBITS_GE_128-NEXT:    usra v3.16b, v6.16b, #3
+; VBITS_GE_128-NEXT:    usra v4.16b, v2.16b, #3
+; VBITS_GE_128-NEXT:    sshr v0.16b, v0.16b, #5
+; VBITS_GE_128-NEXT:    sshr v1.16b, v1.16b, #5
+; VBITS_GE_128-NEXT:    sshr v2.16b, v3.16b, #5
+; VBITS_GE_128-NEXT:    sshr v3.16b, v4.16b, #5
+; VBITS_GE_128-NEXT:    stp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    stp q2, q3, [x0]
+; VBITS_GE_128-NEXT:    ret
+;
 ; VBITS_GE_256-LABEL: sdiv_v64i8:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.b, vl32
@@ -139,6 +182,26 @@ define void @sdiv_v16i16(ptr %a) vscale_range(2,0) #0 {
 }
 
 define void @sdiv_v32i16(ptr %a) #0 {
+; VBITS_GE_128-LABEL: sdiv_v32i16:
+; VBITS_GE_128:       // %bb.0:
+; VBITS_GE_128-NEXT:    ldp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    ldp q3, q4, [x0]
+; VBITS_GE_128-NEXT:    cmlt v2.8h, v0.8h, #0
+; VBITS_GE_128-NEXT:    cmlt v5.8h, v1.8h, #0
+; VBITS_GE_128-NEXT:    cmlt v6.8h, v3.8h, #0
+; VBITS_GE_128-NEXT:    usra v0.8h, v2.8h, #11
+; VBITS_GE_128-NEXT:    cmlt v2.8h, v4.8h, #0
+; VBITS_GE_128-NEXT:    usra v1.8h, v5.8h, #11
+; VBITS_GE_128-NEXT:    usra v3.8h, v6.8h, #11
+; VBITS_GE_128-NEXT:    usra v4.8h, v2.8h, #11
+; VBITS_GE_128-NEXT:    sshr v0.8h, v0.8h, #5
+; VBITS_GE_128-NEXT:    sshr v1.8h, v1.8h, #5
+; VBITS_GE_128-NEXT:    sshr v2.8h, v3.8h, #5
+; VBITS_GE_128-NEXT:    sshr v3.8h, v4.8h, #5
+; VBITS_GE_128-NEXT:    stp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    stp q2, q3, [x0]
+; VBITS_GE_128-NEXT:    ret
+;
 ; VBITS_GE_256-LABEL: sdiv_v32i16:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
@@ -234,6 +297,26 @@ define void @sdiv_v8i32(ptr %a) vscale_range(2,0) #0 {
 }
 
 define void @sdiv_v16i32(ptr %a) #0 {
+; VBITS_GE_128-LABEL: sdiv_v16i32:
+; VBITS_GE_128:       // %bb.0:
+; VBITS_GE_128-NEXT:    ldp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    ldp q3, q4, [x0]
+; VBITS_GE_128-NEXT:    cmlt v2.4s, v0.4s, #0
+; VBITS_GE_128-NEXT:    cmlt v5.4s, v1.4s, #0
+; VBITS_GE_128-NEXT:    cmlt v6.4s, v3.4s, #0
+; VBITS_GE_128-NEXT:    usra v0.4s, v2.4s, #27
+; VBITS_GE_128-NEXT:    cmlt v2.4s, v4.4s, #0
+; VBITS_GE_128-NEXT:    usra v1.4s, v5.4s, #27
+; VBITS_GE_128-NEXT:    usra v3.4s, v6.4s, #27
+; VBITS_GE_128-NEXT:    usra v4.4s, v2.4s, #27
+; VBITS_GE_128-NEXT:    sshr v0.4s, v0.4s, #5
+; VBITS_GE_128-NEXT:    sshr v1.4s, v1.4s, #5
+; VBITS_GE_128-NEXT:    sshr v2.4s, v3.4s, #5
+; VBITS_GE_128-NEXT:    sshr v3.4s, v4.4s, #5
+; VBITS_GE_128-NEXT:    stp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    stp q2, q3, [x0]
+; VBITS_GE_128-NEXT:    ret
+;
 ; VBITS_GE_256-LABEL: sdiv_v16i32:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
@@ -329,6 +412,26 @@ define void @sdiv_v4i64(ptr %a) vscale_range(2,0) #0 {
 }
 
 define void @sdiv_v8i64(ptr %a) #0 {
+; VBITS_GE_128-LABEL: sdiv_v8i64:
+; VBITS_GE_128:       // %bb.0:
+; VBITS_GE_128-NEXT:    ldp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    ldp q3, q4, [x0]
+; VBITS_GE_128-NEXT:    cmlt v2.2d, v0.2d, #0
+; VBITS_GE_128-NEXT:    cmlt v5.2d, v1.2d, #0
+; VBITS_GE_128-NEXT:    cmlt v6.2d, v3.2d, #0
+; VBITS_GE_128-NEXT:    usra v0.2d, v2.2d, #59
+; VBITS_GE_128-NEXT:    cmlt v2.2d, v4.2d, #0
+; VBITS_GE_128-NEXT:    usra v1.2d, v5.2d, #59
+; VBITS_GE_128-NEXT:    usra v3.2d, v6.2d, #59
+; VBITS_GE_128-NEXT:    usra v4.2d, v2.2d, #59
+; VBITS_GE_128-NEXT:    sshr v0.2d, v0.2d, #5
+; VBITS_GE_128-NEXT:    sshr v1.2d, v1.2d, #5
+; VBITS_GE_128-NEXT:    sshr v2.2d, v3.2d, #5
+; VBITS_GE_128-NEXT:    sshr v3.2d, v4.2d, #5
+; VBITS_GE_128-NEXT:    stp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    stp q2, q3, [x0]
+; VBITS_GE_128-NEXT:    ret
+;
 ; VBITS_GE_256-LABEL: sdiv_v8i64:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.d, vl4

sushgokh · 2025-03-17T09:17:36Z

ping

davemgreen

Other than the suggestion for an extra test case, LGTM.

davemgreen · 2025-03-17T17:31:04Z

llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll

+; CHECK-NEXT:    usra v0.2s, v1.2s, #29
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #3
+; CHECK-NEXT:    ret
+  %res = sdiv <2 x i32> %op1, splat (i32 8)


Can you add tests with negative divide amounts too?

done although I dont expect codegen improvement with negative divide amounts and hence didnt add in the first version.

SVE enabled With SVE enabled, this should generate asrd instruction. Subsequent patch will address this.

sushgokh requested review from madhur13490, davemgreen, sjoerdmeijer and david-arm March 7, 2025 06:12

llvmbot added the backend:AArch64 label Mar 7, 2025

davemgreen approved these changes Mar 17, 2025

View reviewed changes

sushgokh added 2 commits March 17, 2025 22:19

[NFC][AArch64] test for sdiv with fixed-width vectors, pow2-divisor and

357b111

SVE enabled With SVE enabled, this should generate asrd instruction. Subsequent patch will address this.

Address comments

785b28e

sushgokh force-pushed the sdiv-pow2-positive-divisor branch from cd35e67 to 785b28e Compare March 18, 2025 05:28

sushgokh merged commit 0f34eba into llvm:main Mar 18, 2025
6 of 10 checks passed

sushgokh deleted the sdiv-pow2-positive-divisor branch April 24, 2025 10:37

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[NFC][AArch64] test for sdiv with fixed-width vectors, pow2-divisor and SVE enabled #130252

[NFC][AArch64] test for sdiv with fixed-width vectors, pow2-divisor and SVE enabled #130252

Uh oh!

sushgokh commented Mar 7, 2025

Uh oh!

llvmbot commented Mar 7, 2025

Uh oh!

sushgokh commented Mar 17, 2025

Uh oh!

davemgreen left a comment

Uh oh!

davemgreen Mar 17, 2025

Uh oh!

sushgokh Mar 18, 2025

Uh oh!

Uh oh!

Uh oh!

[NFC][AArch64] test for sdiv with fixed-width vectors, pow2-divisor and SVE enabled #130252

[NFC][AArch64] test for sdiv with fixed-width vectors, pow2-divisor and SVE enabled #130252

Uh oh!

Conversation

sushgokh commented Mar 7, 2025

Uh oh!

llvmbot commented Mar 7, 2025

Uh oh!

sushgokh commented Mar 17, 2025

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

davemgreen Mar 17, 2025

Choose a reason for hiding this comment

Uh oh!

sushgokh Mar 18, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!