Skip to content

Commit 4072fd4

Browse files
author
git apple-llvm automerger
committed
Merge commit '0f34eba48a6b' from llvm.org/main into next
2 parents 2752d5b + 0f34eba commit 4072fd4

File tree

1 file changed

+127
-0
lines changed

1 file changed

+127
-0
lines changed

llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,57 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_128
23
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
34
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
45
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
56

67
target triple = "aarch64-unknown-linux-gnu"
78

9+
define <4 x i32> @sdiv_v4i32_negative_pow2_divisor_packed(<4 x i32> %op1) vscale_range(1,0) #0 {
10+
; CHECK-LABEL: sdiv_v4i32_negative_pow2_divisor_packed:
11+
; CHECK: // %bb.0:
12+
; CHECK-NEXT: cmlt v1.4s, v0.4s, #0
13+
; CHECK-NEXT: usra v0.4s, v1.4s, #29
14+
; CHECK-NEXT: sshr v0.4s, v0.4s, #3
15+
; CHECK-NEXT: neg v0.4s, v0.4s
16+
; CHECK-NEXT: ret
17+
%res = sdiv <4 x i32> %op1, splat (i32 -8)
18+
ret <4 x i32> %res
19+
}
20+
21+
define <2 x i32> @sdiv_v2i32_negative_pow2_divisor_unpacked(<2 x i32> %op1) vscale_range(1,0) #0 {
22+
; CHECK-LABEL: sdiv_v2i32_negative_pow2_divisor_unpacked:
23+
; CHECK: // %bb.0:
24+
; CHECK-NEXT: cmlt v1.2s, v0.2s, #0
25+
; CHECK-NEXT: usra v0.2s, v1.2s, #29
26+
; CHECK-NEXT: sshr v0.2s, v0.2s, #3
27+
; CHECK-NEXT: neg v0.2s, v0.2s
28+
; CHECK-NEXT: ret
29+
%res = sdiv <2 x i32> %op1, splat (i32 -8)
30+
ret <2 x i32> %res
31+
}
32+
33+
define <4 x i32> @sdiv_v4i32_positive_pow2_divisor_packed(<4 x i32> %op1) vscale_range(1,0) #0 {
34+
; CHECK-LABEL: sdiv_v4i32_positive_pow2_divisor_packed:
35+
; CHECK: // %bb.0:
36+
; CHECK-NEXT: cmlt v1.4s, v0.4s, #0
37+
; CHECK-NEXT: usra v0.4s, v1.4s, #29
38+
; CHECK-NEXT: sshr v0.4s, v0.4s, #3
39+
; CHECK-NEXT: ret
40+
%res = sdiv <4 x i32> %op1, splat (i32 8)
41+
ret <4 x i32> %res
42+
}
43+
44+
define <2 x i32> @sdiv_v2i32_positive_pow2_divisor_unpacked(<2 x i32> %op1) vscale_range(1,0) #0 {
45+
; CHECK-LABEL: sdiv_v2i32_positive_pow2_divisor_unpacked:
46+
; CHECK: // %bb.0:
47+
; CHECK-NEXT: cmlt v1.2s, v0.2s, #0
48+
; CHECK-NEXT: usra v0.2s, v1.2s, #29
49+
; CHECK-NEXT: sshr v0.2s, v0.2s, #3
50+
; CHECK-NEXT: ret
51+
%res = sdiv <2 x i32> %op1, splat (i32 8)
52+
ret <2 x i32> %res
53+
}
54+
855
define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) vscale_range(2,0) #0 {
956
; CHECK-LABEL: sdiv_v8i8:
1057
; CHECK: // %bb.0:
@@ -45,6 +92,26 @@ define void @sdiv_v32i8(ptr %a) vscale_range(2,0) #0 {
4592
}
4693

4794
define void @sdiv_v64i8(ptr %a) #0 {
95+
; VBITS_GE_128-LABEL: sdiv_v64i8:
96+
; VBITS_GE_128: // %bb.0:
97+
; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
98+
; VBITS_GE_128-NEXT: ldp q3, q4, [x0]
99+
; VBITS_GE_128-NEXT: cmlt v2.16b, v0.16b, #0
100+
; VBITS_GE_128-NEXT: cmlt v5.16b, v1.16b, #0
101+
; VBITS_GE_128-NEXT: cmlt v6.16b, v3.16b, #0
102+
; VBITS_GE_128-NEXT: usra v0.16b, v2.16b, #3
103+
; VBITS_GE_128-NEXT: cmlt v2.16b, v4.16b, #0
104+
; VBITS_GE_128-NEXT: usra v1.16b, v5.16b, #3
105+
; VBITS_GE_128-NEXT: usra v3.16b, v6.16b, #3
106+
; VBITS_GE_128-NEXT: usra v4.16b, v2.16b, #3
107+
; VBITS_GE_128-NEXT: sshr v0.16b, v0.16b, #5
108+
; VBITS_GE_128-NEXT: sshr v1.16b, v1.16b, #5
109+
; VBITS_GE_128-NEXT: sshr v2.16b, v3.16b, #5
110+
; VBITS_GE_128-NEXT: sshr v3.16b, v4.16b, #5
111+
; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
112+
; VBITS_GE_128-NEXT: stp q2, q3, [x0]
113+
; VBITS_GE_128-NEXT: ret
114+
;
48115
; VBITS_GE_256-LABEL: sdiv_v64i8:
49116
; VBITS_GE_256: // %bb.0:
50117
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
@@ -139,6 +206,26 @@ define void @sdiv_v16i16(ptr %a) vscale_range(2,0) #0 {
139206
}
140207

141208
define void @sdiv_v32i16(ptr %a) #0 {
209+
; VBITS_GE_128-LABEL: sdiv_v32i16:
210+
; VBITS_GE_128: // %bb.0:
211+
; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
212+
; VBITS_GE_128-NEXT: ldp q3, q4, [x0]
213+
; VBITS_GE_128-NEXT: cmlt v2.8h, v0.8h, #0
214+
; VBITS_GE_128-NEXT: cmlt v5.8h, v1.8h, #0
215+
; VBITS_GE_128-NEXT: cmlt v6.8h, v3.8h, #0
216+
; VBITS_GE_128-NEXT: usra v0.8h, v2.8h, #11
217+
; VBITS_GE_128-NEXT: cmlt v2.8h, v4.8h, #0
218+
; VBITS_GE_128-NEXT: usra v1.8h, v5.8h, #11
219+
; VBITS_GE_128-NEXT: usra v3.8h, v6.8h, #11
220+
; VBITS_GE_128-NEXT: usra v4.8h, v2.8h, #11
221+
; VBITS_GE_128-NEXT: sshr v0.8h, v0.8h, #5
222+
; VBITS_GE_128-NEXT: sshr v1.8h, v1.8h, #5
223+
; VBITS_GE_128-NEXT: sshr v2.8h, v3.8h, #5
224+
; VBITS_GE_128-NEXT: sshr v3.8h, v4.8h, #5
225+
; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
226+
; VBITS_GE_128-NEXT: stp q2, q3, [x0]
227+
; VBITS_GE_128-NEXT: ret
228+
;
142229
; VBITS_GE_256-LABEL: sdiv_v32i16:
143230
; VBITS_GE_256: // %bb.0:
144231
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
@@ -234,6 +321,26 @@ define void @sdiv_v8i32(ptr %a) vscale_range(2,0) #0 {
234321
}
235322

236323
define void @sdiv_v16i32(ptr %a) #0 {
324+
; VBITS_GE_128-LABEL: sdiv_v16i32:
325+
; VBITS_GE_128: // %bb.0:
326+
; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
327+
; VBITS_GE_128-NEXT: ldp q3, q4, [x0]
328+
; VBITS_GE_128-NEXT: cmlt v2.4s, v0.4s, #0
329+
; VBITS_GE_128-NEXT: cmlt v5.4s, v1.4s, #0
330+
; VBITS_GE_128-NEXT: cmlt v6.4s, v3.4s, #0
331+
; VBITS_GE_128-NEXT: usra v0.4s, v2.4s, #27
332+
; VBITS_GE_128-NEXT: cmlt v2.4s, v4.4s, #0
333+
; VBITS_GE_128-NEXT: usra v1.4s, v5.4s, #27
334+
; VBITS_GE_128-NEXT: usra v3.4s, v6.4s, #27
335+
; VBITS_GE_128-NEXT: usra v4.4s, v2.4s, #27
336+
; VBITS_GE_128-NEXT: sshr v0.4s, v0.4s, #5
337+
; VBITS_GE_128-NEXT: sshr v1.4s, v1.4s, #5
338+
; VBITS_GE_128-NEXT: sshr v2.4s, v3.4s, #5
339+
; VBITS_GE_128-NEXT: sshr v3.4s, v4.4s, #5
340+
; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
341+
; VBITS_GE_128-NEXT: stp q2, q3, [x0]
342+
; VBITS_GE_128-NEXT: ret
343+
;
237344
; VBITS_GE_256-LABEL: sdiv_v16i32:
238345
; VBITS_GE_256: // %bb.0:
239346
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
@@ -329,6 +436,26 @@ define void @sdiv_v4i64(ptr %a) vscale_range(2,0) #0 {
329436
}
330437

331438
define void @sdiv_v8i64(ptr %a) #0 {
439+
; VBITS_GE_128-LABEL: sdiv_v8i64:
440+
; VBITS_GE_128: // %bb.0:
441+
; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
442+
; VBITS_GE_128-NEXT: ldp q3, q4, [x0]
443+
; VBITS_GE_128-NEXT: cmlt v2.2d, v0.2d, #0
444+
; VBITS_GE_128-NEXT: cmlt v5.2d, v1.2d, #0
445+
; VBITS_GE_128-NEXT: cmlt v6.2d, v3.2d, #0
446+
; VBITS_GE_128-NEXT: usra v0.2d, v2.2d, #59
447+
; VBITS_GE_128-NEXT: cmlt v2.2d, v4.2d, #0
448+
; VBITS_GE_128-NEXT: usra v1.2d, v5.2d, #59
449+
; VBITS_GE_128-NEXT: usra v3.2d, v6.2d, #59
450+
; VBITS_GE_128-NEXT: usra v4.2d, v2.2d, #59
451+
; VBITS_GE_128-NEXT: sshr v0.2d, v0.2d, #5
452+
; VBITS_GE_128-NEXT: sshr v1.2d, v1.2d, #5
453+
; VBITS_GE_128-NEXT: sshr v2.2d, v3.2d, #5
454+
; VBITS_GE_128-NEXT: sshr v3.2d, v4.2d, #5
455+
; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
456+
; VBITS_GE_128-NEXT: stp q2, q3, [x0]
457+
; VBITS_GE_128-NEXT: ret
458+
;
332459
; VBITS_GE_256-LABEL: sdiv_v8i64:
333460
; VBITS_GE_256: // %bb.0:
334461
; VBITS_GE_256-NEXT: ptrue p0.d, vl4

0 commit comments

Comments
 (0)