|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
| 2 | +; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_128 |
2 | 3 | ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
3 | 4 | ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
4 | 5 | ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
5 | 6 |
|
6 | 7 | target triple = "aarch64-unknown-linux-gnu"
|
7 | 8 |
|
| 9 | +define <4 x i32> @sdiv_v4i32_negative_pow2_divisor_packed(<4 x i32> %op1) vscale_range(1,0) #0 { |
| 10 | +; CHECK-LABEL: sdiv_v4i32_negative_pow2_divisor_packed: |
| 11 | +; CHECK: // %bb.0: |
| 12 | +; CHECK-NEXT: cmlt v1.4s, v0.4s, #0 |
| 13 | +; CHECK-NEXT: usra v0.4s, v1.4s, #29 |
| 14 | +; CHECK-NEXT: sshr v0.4s, v0.4s, #3 |
| 15 | +; CHECK-NEXT: neg v0.4s, v0.4s |
| 16 | +; CHECK-NEXT: ret |
| 17 | + %res = sdiv <4 x i32> %op1, splat (i32 -8) |
| 18 | + ret <4 x i32> %res |
| 19 | +} |
| 20 | + |
| 21 | +define <2 x i32> @sdiv_v2i32_negative_pow2_divisor_unpacked(<2 x i32> %op1) vscale_range(1,0) #0 { |
| 22 | +; CHECK-LABEL: sdiv_v2i32_negative_pow2_divisor_unpacked: |
| 23 | +; CHECK: // %bb.0: |
| 24 | +; CHECK-NEXT: cmlt v1.2s, v0.2s, #0 |
| 25 | +; CHECK-NEXT: usra v0.2s, v1.2s, #29 |
| 26 | +; CHECK-NEXT: sshr v0.2s, v0.2s, #3 |
| 27 | +; CHECK-NEXT: neg v0.2s, v0.2s |
| 28 | +; CHECK-NEXT: ret |
| 29 | + %res = sdiv <2 x i32> %op1, splat (i32 -8) |
| 30 | + ret <2 x i32> %res |
| 31 | +} |
| 32 | + |
| 33 | +define <4 x i32> @sdiv_v4i32_positive_pow2_divisor_packed(<4 x i32> %op1) vscale_range(1,0) #0 { |
| 34 | +; CHECK-LABEL: sdiv_v4i32_positive_pow2_divisor_packed: |
| 35 | +; CHECK: // %bb.0: |
| 36 | +; CHECK-NEXT: cmlt v1.4s, v0.4s, #0 |
| 37 | +; CHECK-NEXT: usra v0.4s, v1.4s, #29 |
| 38 | +; CHECK-NEXT: sshr v0.4s, v0.4s, #3 |
| 39 | +; CHECK-NEXT: ret |
| 40 | + %res = sdiv <4 x i32> %op1, splat (i32 8) |
| 41 | + ret <4 x i32> %res |
| 42 | +} |
| 43 | + |
| 44 | +define <2 x i32> @sdiv_v2i32_positive_pow2_divisor_unpacked(<2 x i32> %op1) vscale_range(1,0) #0 { |
| 45 | +; CHECK-LABEL: sdiv_v2i32_positive_pow2_divisor_unpacked: |
| 46 | +; CHECK: // %bb.0: |
| 47 | +; CHECK-NEXT: cmlt v1.2s, v0.2s, #0 |
| 48 | +; CHECK-NEXT: usra v0.2s, v1.2s, #29 |
| 49 | +; CHECK-NEXT: sshr v0.2s, v0.2s, #3 |
| 50 | +; CHECK-NEXT: ret |
| 51 | + %res = sdiv <2 x i32> %op1, splat (i32 8) |
| 52 | + ret <2 x i32> %res |
| 53 | +} |
| 54 | + |
8 | 55 | define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) vscale_range(2,0) #0 {
|
9 | 56 | ; CHECK-LABEL: sdiv_v8i8:
|
10 | 57 | ; CHECK: // %bb.0:
|
@@ -45,6 +92,26 @@ define void @sdiv_v32i8(ptr %a) vscale_range(2,0) #0 {
|
45 | 92 | }
|
46 | 93 |
|
47 | 94 | define void @sdiv_v64i8(ptr %a) #0 {
|
| 95 | +; VBITS_GE_128-LABEL: sdiv_v64i8: |
| 96 | +; VBITS_GE_128: // %bb.0: |
| 97 | +; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32] |
| 98 | +; VBITS_GE_128-NEXT: ldp q3, q4, [x0] |
| 99 | +; VBITS_GE_128-NEXT: cmlt v2.16b, v0.16b, #0 |
| 100 | +; VBITS_GE_128-NEXT: cmlt v5.16b, v1.16b, #0 |
| 101 | +; VBITS_GE_128-NEXT: cmlt v6.16b, v3.16b, #0 |
| 102 | +; VBITS_GE_128-NEXT: usra v0.16b, v2.16b, #3 |
| 103 | +; VBITS_GE_128-NEXT: cmlt v2.16b, v4.16b, #0 |
| 104 | +; VBITS_GE_128-NEXT: usra v1.16b, v5.16b, #3 |
| 105 | +; VBITS_GE_128-NEXT: usra v3.16b, v6.16b, #3 |
| 106 | +; VBITS_GE_128-NEXT: usra v4.16b, v2.16b, #3 |
| 107 | +; VBITS_GE_128-NEXT: sshr v0.16b, v0.16b, #5 |
| 108 | +; VBITS_GE_128-NEXT: sshr v1.16b, v1.16b, #5 |
| 109 | +; VBITS_GE_128-NEXT: sshr v2.16b, v3.16b, #5 |
| 110 | +; VBITS_GE_128-NEXT: sshr v3.16b, v4.16b, #5 |
| 111 | +; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32] |
| 112 | +; VBITS_GE_128-NEXT: stp q2, q3, [x0] |
| 113 | +; VBITS_GE_128-NEXT: ret |
| 114 | +; |
48 | 115 | ; VBITS_GE_256-LABEL: sdiv_v64i8:
|
49 | 116 | ; VBITS_GE_256: // %bb.0:
|
50 | 117 | ; VBITS_GE_256-NEXT: ptrue p0.b, vl32
|
@@ -139,6 +206,26 @@ define void @sdiv_v16i16(ptr %a) vscale_range(2,0) #0 {
|
139 | 206 | }
|
140 | 207 |
|
141 | 208 | define void @sdiv_v32i16(ptr %a) #0 {
|
| 209 | +; VBITS_GE_128-LABEL: sdiv_v32i16: |
| 210 | +; VBITS_GE_128: // %bb.0: |
| 211 | +; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32] |
| 212 | +; VBITS_GE_128-NEXT: ldp q3, q4, [x0] |
| 213 | +; VBITS_GE_128-NEXT: cmlt v2.8h, v0.8h, #0 |
| 214 | +; VBITS_GE_128-NEXT: cmlt v5.8h, v1.8h, #0 |
| 215 | +; VBITS_GE_128-NEXT: cmlt v6.8h, v3.8h, #0 |
| 216 | +; VBITS_GE_128-NEXT: usra v0.8h, v2.8h, #11 |
| 217 | +; VBITS_GE_128-NEXT: cmlt v2.8h, v4.8h, #0 |
| 218 | +; VBITS_GE_128-NEXT: usra v1.8h, v5.8h, #11 |
| 219 | +; VBITS_GE_128-NEXT: usra v3.8h, v6.8h, #11 |
| 220 | +; VBITS_GE_128-NEXT: usra v4.8h, v2.8h, #11 |
| 221 | +; VBITS_GE_128-NEXT: sshr v0.8h, v0.8h, #5 |
| 222 | +; VBITS_GE_128-NEXT: sshr v1.8h, v1.8h, #5 |
| 223 | +; VBITS_GE_128-NEXT: sshr v2.8h, v3.8h, #5 |
| 224 | +; VBITS_GE_128-NEXT: sshr v3.8h, v4.8h, #5 |
| 225 | +; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32] |
| 226 | +; VBITS_GE_128-NEXT: stp q2, q3, [x0] |
| 227 | +; VBITS_GE_128-NEXT: ret |
| 228 | +; |
142 | 229 | ; VBITS_GE_256-LABEL: sdiv_v32i16:
|
143 | 230 | ; VBITS_GE_256: // %bb.0:
|
144 | 231 | ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
@@ -234,6 +321,26 @@ define void @sdiv_v8i32(ptr %a) vscale_range(2,0) #0 {
|
234 | 321 | }
|
235 | 322 |
|
236 | 323 | define void @sdiv_v16i32(ptr %a) #0 {
|
| 324 | +; VBITS_GE_128-LABEL: sdiv_v16i32: |
| 325 | +; VBITS_GE_128: // %bb.0: |
| 326 | +; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32] |
| 327 | +; VBITS_GE_128-NEXT: ldp q3, q4, [x0] |
| 328 | +; VBITS_GE_128-NEXT: cmlt v2.4s, v0.4s, #0 |
| 329 | +; VBITS_GE_128-NEXT: cmlt v5.4s, v1.4s, #0 |
| 330 | +; VBITS_GE_128-NEXT: cmlt v6.4s, v3.4s, #0 |
| 331 | +; VBITS_GE_128-NEXT: usra v0.4s, v2.4s, #27 |
| 332 | +; VBITS_GE_128-NEXT: cmlt v2.4s, v4.4s, #0 |
| 333 | +; VBITS_GE_128-NEXT: usra v1.4s, v5.4s, #27 |
| 334 | +; VBITS_GE_128-NEXT: usra v3.4s, v6.4s, #27 |
| 335 | +; VBITS_GE_128-NEXT: usra v4.4s, v2.4s, #27 |
| 336 | +; VBITS_GE_128-NEXT: sshr v0.4s, v0.4s, #5 |
| 337 | +; VBITS_GE_128-NEXT: sshr v1.4s, v1.4s, #5 |
| 338 | +; VBITS_GE_128-NEXT: sshr v2.4s, v3.4s, #5 |
| 339 | +; VBITS_GE_128-NEXT: sshr v3.4s, v4.4s, #5 |
| 340 | +; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32] |
| 341 | +; VBITS_GE_128-NEXT: stp q2, q3, [x0] |
| 342 | +; VBITS_GE_128-NEXT: ret |
| 343 | +; |
237 | 344 | ; VBITS_GE_256-LABEL: sdiv_v16i32:
|
238 | 345 | ; VBITS_GE_256: // %bb.0:
|
239 | 346 | ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
@@ -329,6 +436,26 @@ define void @sdiv_v4i64(ptr %a) vscale_range(2,0) #0 {
|
329 | 436 | }
|
330 | 437 |
|
331 | 438 | define void @sdiv_v8i64(ptr %a) #0 {
|
| 439 | +; VBITS_GE_128-LABEL: sdiv_v8i64: |
| 440 | +; VBITS_GE_128: // %bb.0: |
| 441 | +; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32] |
| 442 | +; VBITS_GE_128-NEXT: ldp q3, q4, [x0] |
| 443 | +; VBITS_GE_128-NEXT: cmlt v2.2d, v0.2d, #0 |
| 444 | +; VBITS_GE_128-NEXT: cmlt v5.2d, v1.2d, #0 |
| 445 | +; VBITS_GE_128-NEXT: cmlt v6.2d, v3.2d, #0 |
| 446 | +; VBITS_GE_128-NEXT: usra v0.2d, v2.2d, #59 |
| 447 | +; VBITS_GE_128-NEXT: cmlt v2.2d, v4.2d, #0 |
| 448 | +; VBITS_GE_128-NEXT: usra v1.2d, v5.2d, #59 |
| 449 | +; VBITS_GE_128-NEXT: usra v3.2d, v6.2d, #59 |
| 450 | +; VBITS_GE_128-NEXT: usra v4.2d, v2.2d, #59 |
| 451 | +; VBITS_GE_128-NEXT: sshr v0.2d, v0.2d, #5 |
| 452 | +; VBITS_GE_128-NEXT: sshr v1.2d, v1.2d, #5 |
| 453 | +; VBITS_GE_128-NEXT: sshr v2.2d, v3.2d, #5 |
| 454 | +; VBITS_GE_128-NEXT: sshr v3.2d, v4.2d, #5 |
| 455 | +; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32] |
| 456 | +; VBITS_GE_128-NEXT: stp q2, q3, [x0] |
| 457 | +; VBITS_GE_128-NEXT: ret |
| 458 | +; |
332 | 459 | ; VBITS_GE_256-LABEL: sdiv_v8i64:
|
333 | 460 | ; VBITS_GE_256: // %bb.0:
|
334 | 461 | ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
0 commit comments