|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | +; RUN: llc -mattr=+sve < %s | FileCheck %s |
| 3 | +; RUN: llc -mattr=+dotprod,+sve < %s | FileCheck %s -check-prefix=DOT |
| 4 | +; RUN: llc -mattr=+dotprod,+sve -force-streaming-compatible < %s | FileCheck %s --check-prefix=STREAMING-SVE |
| 5 | +; RUN: llc -mattr=+dotprod,+sme -force-streaming < %s | FileCheck %s --check-prefix=STREAMING-SVE |
| 6 | + |
| 7 | +target triple = "aarch64-unknown-linux-gnu" |
| 8 | + |
| 9 | +define i32 @reduce_uaddv_v16i8(<32 x i8> %a) { |
| 10 | +; CHECK-LABEL: reduce_uaddv_v16i8: |
| 11 | +; CHECK: // %bb.0: |
| 12 | +; CHECK-NEXT: ushll2 v2.8h, v1.16b, #0 |
| 13 | +; CHECK-NEXT: ushll2 v3.8h, v0.16b, #0 |
| 14 | +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 |
| 15 | +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 |
| 16 | +; CHECK-NEXT: uaddl2 v4.4s, v3.8h, v2.8h |
| 17 | +; CHECK-NEXT: uaddl v2.4s, v3.4h, v2.4h |
| 18 | +; CHECK-NEXT: uaddl2 v5.4s, v0.8h, v1.8h |
| 19 | +; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h |
| 20 | +; CHECK-NEXT: add v1.4s, v5.4s, v4.4s |
| 21 | +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s |
| 22 | +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s |
| 23 | +; CHECK-NEXT: addv s0, v0.4s |
| 24 | +; CHECK-NEXT: fmov w0, s0 |
| 25 | +; CHECK-NEXT: ret |
| 26 | +; |
| 27 | +; DOT-LABEL: reduce_uaddv_v16i8: |
| 28 | +; DOT: // %bb.0: |
| 29 | +; DOT-NEXT: movi v2.16b, #1 |
| 30 | +; DOT-NEXT: movi v3.2d, #0000000000000000 |
| 31 | +; DOT-NEXT: udot v3.4s, v1.16b, v2.16b |
| 32 | +; DOT-NEXT: udot v3.4s, v0.16b, v2.16b |
| 33 | +; DOT-NEXT: addv s0, v3.4s |
| 34 | +; DOT-NEXT: fmov w0, s0 |
| 35 | +; DOT-NEXT: ret |
| 36 | +; |
| 37 | +; STREAMING-SVE-LABEL: reduce_uaddv_v16i8: |
| 38 | +; STREAMING-SVE: // %bb.0: |
| 39 | +; STREAMING-SVE-NEXT: // kill: def $q1 killed $q1 def $z1 |
| 40 | +; STREAMING-SVE-NEXT: uunpklo z2.h, z1.b |
| 41 | +; STREAMING-SVE-NEXT: // kill: def $q0 killed $q0 def $z0 |
| 42 | +; STREAMING-SVE-NEXT: uunpklo z3.h, z0.b |
| 43 | +; STREAMING-SVE-NEXT: ptrue p0.s, vl4 |
| 44 | +; STREAMING-SVE-NEXT: ext z1.b, z1.b, z1.b, #8 |
| 45 | +; STREAMING-SVE-NEXT: ext z0.b, z0.b, z0.b, #8 |
| 46 | +; STREAMING-SVE-NEXT: uunpklo z1.h, z1.b |
| 47 | +; STREAMING-SVE-NEXT: uunpklo z0.h, z0.b |
| 48 | +; STREAMING-SVE-NEXT: uunpklo z4.s, z2.h |
| 49 | +; STREAMING-SVE-NEXT: ext z2.b, z2.b, z2.b, #8 |
| 50 | +; STREAMING-SVE-NEXT: uunpklo z6.s, z3.h |
| 51 | +; STREAMING-SVE-NEXT: ext z3.b, z3.b, z3.b, #8 |
| 52 | +; STREAMING-SVE-NEXT: mov z5.d, z1.d |
| 53 | +; STREAMING-SVE-NEXT: uunpklo z7.s, z0.h |
| 54 | +; STREAMING-SVE-NEXT: ext z0.b, z0.b, z0.b, #8 |
| 55 | +; STREAMING-SVE-NEXT: uunpklo z2.s, z2.h |
| 56 | +; STREAMING-SVE-NEXT: uunpklo z3.s, z3.h |
| 57 | +; STREAMING-SVE-NEXT: add z4.s, z6.s, z4.s |
| 58 | +; STREAMING-SVE-NEXT: ext z5.b, z5.b, z1.b, #8 |
| 59 | +; STREAMING-SVE-NEXT: uunpklo z1.s, z1.h |
| 60 | +; STREAMING-SVE-NEXT: uunpklo z0.s, z0.h |
| 61 | +; STREAMING-SVE-NEXT: add z2.s, z3.s, z2.s |
| 62 | +; STREAMING-SVE-NEXT: uunpklo z5.s, z5.h |
| 63 | +; STREAMING-SVE-NEXT: add z1.s, z7.s, z1.s |
| 64 | +; STREAMING-SVE-NEXT: add z0.s, z0.s, z5.s |
| 65 | +; STREAMING-SVE-NEXT: add z1.s, z4.s, z1.s |
| 66 | +; STREAMING-SVE-NEXT: add z0.s, z2.s, z0.s |
| 67 | +; STREAMING-SVE-NEXT: add z0.s, z1.s, z0.s |
| 68 | +; STREAMING-SVE-NEXT: uaddv d0, p0, z0.s |
| 69 | +; STREAMING-SVE-NEXT: fmov x0, d0 |
| 70 | +; STREAMING-SVE-NEXT: // kill: def $w0 killed $w0 killed $x0 |
| 71 | +; STREAMING-SVE-NEXT: ret |
| 72 | + %1 = zext <32 x i8> %a to <32 x i32> |
| 73 | + %2 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %1) |
| 74 | + ret i32 %2 |
| 75 | +} |
| 76 | + |
| 77 | +define i32 @reduce_saddv_v16i8(<32 x i8> %a) { |
| 78 | +; CHECK-LABEL: reduce_saddv_v16i8: |
| 79 | +; CHECK: // %bb.0: |
| 80 | +; CHECK-NEXT: sshll2 v2.8h, v1.16b, #0 |
| 81 | +; CHECK-NEXT: sshll2 v3.8h, v0.16b, #0 |
| 82 | +; CHECK-NEXT: sshll v1.8h, v1.8b, #0 |
| 83 | +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 |
| 84 | +; CHECK-NEXT: saddl2 v4.4s, v3.8h, v2.8h |
| 85 | +; CHECK-NEXT: saddl v2.4s, v3.4h, v2.4h |
| 86 | +; CHECK-NEXT: saddl2 v5.4s, v0.8h, v1.8h |
| 87 | +; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h |
| 88 | +; CHECK-NEXT: add v1.4s, v5.4s, v4.4s |
| 89 | +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s |
| 90 | +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s |
| 91 | +; CHECK-NEXT: addv s0, v0.4s |
| 92 | +; CHECK-NEXT: fmov w0, s0 |
| 93 | +; CHECK-NEXT: ret |
| 94 | +; |
| 95 | +; DOT-LABEL: reduce_saddv_v16i8: |
| 96 | +; DOT: // %bb.0: |
| 97 | +; DOT-NEXT: movi v2.16b, #1 |
| 98 | +; DOT-NEXT: movi v3.2d, #0000000000000000 |
| 99 | +; DOT-NEXT: sdot v3.4s, v1.16b, v2.16b |
| 100 | +; DOT-NEXT: sdot v3.4s, v0.16b, v2.16b |
| 101 | +; DOT-NEXT: addv s0, v3.4s |
| 102 | +; DOT-NEXT: fmov w0, s0 |
| 103 | +; DOT-NEXT: ret |
| 104 | +; |
| 105 | +; STREAMING-SVE-LABEL: reduce_saddv_v16i8: |
| 106 | +; STREAMING-SVE: // %bb.0: |
| 107 | +; STREAMING-SVE-NEXT: // kill: def $q1 killed $q1 def $z1 |
| 108 | +; STREAMING-SVE-NEXT: sunpklo z2.h, z1.b |
| 109 | +; STREAMING-SVE-NEXT: // kill: def $q0 killed $q0 def $z0 |
| 110 | +; STREAMING-SVE-NEXT: sunpklo z3.h, z0.b |
| 111 | +; STREAMING-SVE-NEXT: ptrue p0.s, vl4 |
| 112 | +; STREAMING-SVE-NEXT: ext z1.b, z1.b, z1.b, #8 |
| 113 | +; STREAMING-SVE-NEXT: ext z0.b, z0.b, z0.b, #8 |
| 114 | +; STREAMING-SVE-NEXT: sunpklo z1.h, z1.b |
| 115 | +; STREAMING-SVE-NEXT: sunpklo z0.h, z0.b |
| 116 | +; STREAMING-SVE-NEXT: sunpklo z4.s, z2.h |
| 117 | +; STREAMING-SVE-NEXT: ext z2.b, z2.b, z2.b, #8 |
| 118 | +; STREAMING-SVE-NEXT: sunpklo z6.s, z3.h |
| 119 | +; STREAMING-SVE-NEXT: ext z3.b, z3.b, z3.b, #8 |
| 120 | +; STREAMING-SVE-NEXT: mov z5.d, z1.d |
| 121 | +; STREAMING-SVE-NEXT: sunpklo z7.s, z0.h |
| 122 | +; STREAMING-SVE-NEXT: ext z0.b, z0.b, z0.b, #8 |
| 123 | +; STREAMING-SVE-NEXT: sunpklo z2.s, z2.h |
| 124 | +; STREAMING-SVE-NEXT: sunpklo z3.s, z3.h |
| 125 | +; STREAMING-SVE-NEXT: add z4.s, z6.s, z4.s |
| 126 | +; STREAMING-SVE-NEXT: ext z5.b, z5.b, z1.b, #8 |
| 127 | +; STREAMING-SVE-NEXT: sunpklo z1.s, z1.h |
| 128 | +; STREAMING-SVE-NEXT: sunpklo z0.s, z0.h |
| 129 | +; STREAMING-SVE-NEXT: add z2.s, z3.s, z2.s |
| 130 | +; STREAMING-SVE-NEXT: sunpklo z5.s, z5.h |
| 131 | +; STREAMING-SVE-NEXT: add z1.s, z7.s, z1.s |
| 132 | +; STREAMING-SVE-NEXT: add z0.s, z0.s, z5.s |
| 133 | +; STREAMING-SVE-NEXT: add z1.s, z4.s, z1.s |
| 134 | +; STREAMING-SVE-NEXT: add z0.s, z2.s, z0.s |
| 135 | +; STREAMING-SVE-NEXT: add z0.s, z1.s, z0.s |
| 136 | +; STREAMING-SVE-NEXT: uaddv d0, p0, z0.s |
| 137 | +; STREAMING-SVE-NEXT: fmov x0, d0 |
| 138 | +; STREAMING-SVE-NEXT: // kill: def $w0 killed $w0 killed $x0 |
| 139 | +; STREAMING-SVE-NEXT: ret |
| 140 | + %1 = sext <32 x i8> %a to <32 x i32> |
| 141 | + %2 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %1) |
| 142 | + ret i32 %2 |
| 143 | +} |
0 commit comments