|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
2 | 2 | ; RUN: llc -mtriple=aarch64 -mattr=+sha3 < %s | FileCheck --check-prefix=SHA3 %s
|
3 | 3 | ; RUN: llc -mtriple=aarch64 -mattr=-sha3 < %s | FileCheck --check-prefix=NOSHA3 %s
|
| 4 | +; RUN: llc -mtriple=aarch64 -mattr=+sve2 < %s | FileCheck --check-prefix=SVE2SHA3 %s |
| 5 | + |
| 6 | +/* 128-bit vectors */ |
4 | 7 |
|
5 | 8 | define <2 x i64> @xar(<2 x i64> %x, <2 x i64> %y) {
|
6 | 9 | ; SHA3-LABEL: xar:
|
@@ -39,14 +42,14 @@ define <1 x i64> @xar_v1i64(<1 x i64> %a, <1 x i64> %b) {
|
39 | 42 | ret <1 x i64> %fshl
|
40 | 43 | }
|
41 | 44 |
|
42 |
| -define <2 x i64> @xar_instead_of_or1(<2 x i64> %r) { |
43 |
| -; SHA3-LABEL: xar_instead_of_or1: |
| 45 | +define <2 x i64> @xar_instead_of_or_v2i64(<2 x i64> %r) { |
| 46 | +; SHA3-LABEL: xar_instead_of_or_v2i64: |
44 | 47 | ; SHA3: // %bb.0: // %entry
|
45 | 48 | ; SHA3-NEXT: movi v1.2d, #0000000000000000
|
46 | 49 | ; SHA3-NEXT: xar v0.2d, v0.2d, v1.2d, #39
|
47 | 50 | ; SHA3-NEXT: ret
|
48 | 51 | ;
|
49 |
| -; NOSHA3-LABEL: xar_instead_of_or1: |
| 52 | +; NOSHA3-LABEL: xar_instead_of_or_v2i64: |
50 | 53 | ; NOSHA3: // %bb.0: // %entry
|
51 | 54 | ; NOSHA3-NEXT: shl v1.2d, v0.2d, #25
|
52 | 55 | ; NOSHA3-NEXT: usra v1.2d, v0.2d, #39
|
@@ -76,63 +79,212 @@ define <1 x i64> @xar_instead_of_or_v1i64(<1 x i64> %v.val) {
|
76 | 79 | ret <1 x i64> %fshl
|
77 | 80 | }
|
78 | 81 |
|
79 |
| -define <4 x i32> @xar_instead_of_or2(<4 x i32> %r) { |
80 |
| -; SHA3-LABEL: xar_instead_of_or2: |
| 82 | +define <4 x i32> @xar_instead_of_or_v4i32(<4 x i32> %r) { |
| 83 | +; SHA3-LABEL: xar_instead_of_or_v4i32: |
81 | 84 | ; SHA3: // %bb.0: // %entry
|
82 | 85 | ; SHA3-NEXT: shl v1.4s, v0.4s, #25
|
83 | 86 | ; SHA3-NEXT: usra v1.4s, v0.4s, #7
|
84 | 87 | ; SHA3-NEXT: mov v0.16b, v1.16b
|
85 | 88 | ; SHA3-NEXT: ret
|
86 | 89 | ;
|
87 |
| -; NOSHA3-LABEL: xar_instead_of_or2: |
| 90 | +; NOSHA3-LABEL: xar_instead_of_or_v4i32: |
88 | 91 | ; NOSHA3: // %bb.0: // %entry
|
89 | 92 | ; NOSHA3-NEXT: shl v1.4s, v0.4s, #25
|
90 | 93 | ; NOSHA3-NEXT: usra v1.4s, v0.4s, #7
|
91 | 94 | ; NOSHA3-NEXT: mov v0.16b, v1.16b
|
92 | 95 | ; NOSHA3-NEXT: ret
|
| 96 | +; |
| 97 | +; SVE2SHA3-LABEL: xar_instead_of_or_v4i32: |
| 98 | +; SVE2SHA3: // %bb.0: // %entry |
| 99 | +; SVE2SHA3-NEXT: movi v1.2d, #0000000000000000 |
| 100 | +; SVE2SHA3-NEXT: // kill: def $q0 killed $q0 def $z0 |
| 101 | +; SVE2SHA3-NEXT: xar z0.s, z0.s, z1.s, #7 |
| 102 | +; SVE2SHA3-NEXT: // kill: def $q0 killed $q0 killed $z0 |
| 103 | +; SVE2SHA3-NEXT: ret |
93 | 104 | entry:
|
94 | 105 | %or = call <4 x i32> @llvm.fshl.v2i32(<4 x i32> %r, <4 x i32> %r, <4 x i32> splat (i32 25))
|
95 | 106 | ret <4 x i32> %or
|
96 | 107 | }
|
97 | 108 |
|
98 |
| -define <8 x i16> @xar_instead_of_or3(<8 x i16> %r) { |
99 |
| -; SHA3-LABEL: xar_instead_of_or3: |
| 109 | +define <8 x i16> @xar_instead_of_or_v8i16(<8 x i16> %r) { |
| 110 | +; SHA3-LABEL: xar_instead_of_or_v8i16: |
100 | 111 | ; SHA3: // %bb.0: // %entry
|
101 | 112 | ; SHA3-NEXT: shl v1.8h, v0.8h, #9
|
102 | 113 | ; SHA3-NEXT: usra v1.8h, v0.8h, #7
|
103 | 114 | ; SHA3-NEXT: mov v0.16b, v1.16b
|
104 | 115 | ; SHA3-NEXT: ret
|
105 | 116 | ;
|
106 |
| -; NOSHA3-LABEL: xar_instead_of_or3: |
| 117 | +; NOSHA3-LABEL: xar_instead_of_or_v8i16: |
107 | 118 | ; NOSHA3: // %bb.0: // %entry
|
108 | 119 | ; NOSHA3-NEXT: shl v1.8h, v0.8h, #9
|
109 | 120 | ; NOSHA3-NEXT: usra v1.8h, v0.8h, #7
|
110 | 121 | ; NOSHA3-NEXT: mov v0.16b, v1.16b
|
111 | 122 | ; NOSHA3-NEXT: ret
|
| 123 | +; |
| 124 | +; SVE2SHA3-LABEL: xar_instead_of_or_v8i16: |
| 125 | +; SVE2SHA3: // %bb.0: // %entry |
| 126 | +; SVE2SHA3-NEXT: movi v1.2d, #0000000000000000 |
| 127 | +; SVE2SHA3-NEXT: // kill: def $q0 killed $q0 def $z0 |
| 128 | +; SVE2SHA3-NEXT: xar z0.h, z0.h, z1.h, #7 |
| 129 | +; SVE2SHA3-NEXT: // kill: def $q0 killed $q0 killed $z0 |
| 130 | +; SVE2SHA3-NEXT: ret |
112 | 131 | entry:
|
113 | 132 | %or = call <8 x i16> @llvm.fshl.v2i16(<8 x i16> %r, <8 x i16> %r, <8 x i16> splat (i16 25))
|
114 | 133 | ret <8 x i16> %or
|
115 | 134 | }
|
116 | 135 |
|
117 |
| -define <16 x i8> @xar_instead_of_or4(<16 x i8> %r) { |
118 |
| -; SHA3-LABEL: xar_instead_of_or4: |
| 136 | +define <16 x i8> @xar_instead_of_or_v16i8(<16 x i8> %r) { |
| 137 | +; SHA3-LABEL: xar_instead_of_or_v16i8: |
119 | 138 | ; SHA3: // %bb.0: // %entry
|
120 | 139 | ; SHA3-NEXT: add v1.16b, v0.16b, v0.16b
|
121 | 140 | ; SHA3-NEXT: usra v1.16b, v0.16b, #7
|
122 | 141 | ; SHA3-NEXT: mov v0.16b, v1.16b
|
123 | 142 | ; SHA3-NEXT: ret
|
124 | 143 | ;
|
125 |
| -; NOSHA3-LABEL: xar_instead_of_or4: |
| 144 | +; NOSHA3-LABEL: xar_instead_of_or_v16i8: |
126 | 145 | ; NOSHA3: // %bb.0: // %entry
|
127 | 146 | ; NOSHA3-NEXT: add v1.16b, v0.16b, v0.16b
|
128 | 147 | ; NOSHA3-NEXT: usra v1.16b, v0.16b, #7
|
129 | 148 | ; NOSHA3-NEXT: mov v0.16b, v1.16b
|
130 | 149 | ; NOSHA3-NEXT: ret
|
| 150 | +; |
| 151 | +; SVE2SHA3-LABEL: xar_instead_of_or_v16i8: |
| 152 | +; SVE2SHA3: // %bb.0: // %entry |
| 153 | +; SVE2SHA3-NEXT: movi v1.2d, #0000000000000000 |
| 154 | +; SVE2SHA3-NEXT: // kill: def $q0 killed $q0 def $z0 |
| 155 | +; SVE2SHA3-NEXT: xar z0.b, z0.b, z1.b, #7 |
| 156 | +; SVE2SHA3-NEXT: // kill: def $q0 killed $q0 killed $z0 |
| 157 | +; SVE2SHA3-NEXT: ret |
131 | 158 | entry:
|
132 | 159 | %or = call <16 x i8> @llvm.fshl.v2i8(<16 x i8> %r, <16 x i8> %r, <16 x i8> splat (i8 25))
|
133 | 160 | ret <16 x i8> %or
|
134 | 161 | }
|
135 | 162 |
|
| 163 | +/* 64 bit vectors */ |
| 164 | + |
| 165 | +define <2 x i32> @xar_v2i32(<2 x i32> %x, <2 x i32> %y) { |
| 166 | +; SHA3-LABEL: xar_v2i32: |
| 167 | +; SHA3: // %bb.0: // %entry |
| 168 | +; SHA3-NEXT: // kill: def $d0 killed $d0 def $q0 |
| 169 | +; SHA3-NEXT: // kill: def $d1 killed $d1 def $q1 |
| 170 | +; SHA3-NEXT: xar v0.2d, v0.2d, v1.2d, #7 |
| 171 | +; SHA3-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| 172 | +; SHA3-NEXT: ret |
| 173 | +; |
| 174 | +; NOSHA3-LABEL: xar_v2i32: |
| 175 | +; NOSHA3: // %bb.0: // %entry |
| 176 | +; NOSHA3-NEXT: eor v1.8b, v0.8b, v1.8b |
| 177 | +; NOSHA3-NEXT: shl v0.2s, v1.2s, #25 |
| 178 | +; NOSHA3-NEXT: usra v0.2s, v1.2s, #7 |
| 179 | +; NOSHA3-NEXT: ret |
| 180 | +entry: |
| 181 | + %a = xor <2 x i32> %x, %y |
| 182 | + %b = call <2 x i32> @llvm.fshl(<2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 25, i32 25>) |
| 183 | + ret <2 x i32> %b |
| 184 | +} |
| 185 | + |
| 186 | +define <2 x i32> @xar_instead_of_or_v2i32(<2 x i32> %r) { |
| 187 | +; SHA3-LABEL: xar_instead_of_or_v2i32: |
| 188 | +; SHA3: // %bb.0: // %entry |
| 189 | +; SHA3-NEXT: movi v1.2d, #0000000000000000 |
| 190 | +; SHA3-NEXT: // kill: def $d0 killed $d0 def $q0 |
| 191 | +; SHA3-NEXT: xar v0.2d, v0.2d, v1.2d, #7 |
| 192 | +; SHA3-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| 193 | +; SHA3-NEXT: ret |
| 194 | +; |
| 195 | +; NOSHA3-LABEL: xar_instead_of_or_v2i32: |
| 196 | +; NOSHA3: // %bb.0: // %entry |
| 197 | +; NOSHA3-NEXT: shl v1.2s, v0.2s, #25 |
| 198 | +; NOSHA3-NEXT: usra v1.2s, v0.2s, #7 |
| 199 | +; NOSHA3-NEXT: fmov d0, d1 |
| 200 | +; NOSHA3-NEXT: ret |
| 201 | +entry: |
| 202 | + %or = call <2 x i32> @llvm.fshl(<2 x i32> %r, <2 x i32> %r, <2 x i32> splat (i32 25)) |
| 203 | + ret <2 x i32> %or |
| 204 | +} |
| 205 | + |
| 206 | +define <4 x i16> @xar_v4i16(<4 x i16> %x, <4 x i16> %y) { |
| 207 | +; SHA3-LABEL: xar_v4i16: |
| 208 | +; SHA3: // %bb.0: // %entry |
| 209 | +; SHA3-NEXT: // kill: def $d0 killed $d0 def $q0 |
| 210 | +; SHA3-NEXT: // kill: def $d1 killed $d1 def $q1 |
| 211 | +; SHA3-NEXT: xar v0.2d, v0.2d, v1.2d, #7 |
| 212 | +; SHA3-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| 213 | +; SHA3-NEXT: ret |
| 214 | +; |
| 215 | +; NOSHA3-LABEL: xar_v4i16: |
| 216 | +; NOSHA3: // %bb.0: // %entry |
| 217 | +; NOSHA3-NEXT: eor v1.8b, v0.8b, v1.8b |
| 218 | +; NOSHA3-NEXT: shl v0.4h, v1.4h, #9 |
| 219 | +; NOSHA3-NEXT: usra v0.4h, v1.4h, #7 |
| 220 | +; NOSHA3-NEXT: ret |
| 221 | +entry: |
| 222 | + %a = xor <4 x i16> %x, %y |
| 223 | + %b = call <4 x i16> @llvm.fshl(<4 x i16> %a, <4 x i16> %a, <4 x i16> splat (i16 25)) |
| 224 | + ret <4 x i16> %b |
| 225 | +} |
| 226 | + |
| 227 | +define <4 x i16> @xar_instead_of_or_v4i16(<4 x i16> %r) { |
| 228 | +; SHA3-LABEL: xar_instead_of_or_v4i16: |
| 229 | +; SHA3: // %bb.0: // %entry |
| 230 | +; SHA3-NEXT: movi v1.2d, #0000000000000000 |
| 231 | +; SHA3-NEXT: // kill: def $d0 killed $d0 def $q0 |
| 232 | +; SHA3-NEXT: xar v0.2d, v0.2d, v1.2d, #7 |
| 233 | +; SHA3-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| 234 | +; SHA3-NEXT: ret |
| 235 | +; |
| 236 | +; NOSHA3-LABEL: xar_instead_of_or_v4i16: |
| 237 | +; NOSHA3: // %bb.0: // %entry |
| 238 | +; NOSHA3-NEXT: shl v1.4h, v0.4h, #9 |
| 239 | +; NOSHA3-NEXT: usra v1.4h, v0.4h, #7 |
| 240 | +; NOSHA3-NEXT: fmov d0, d1 |
| 241 | +; NOSHA3-NEXT: ret |
| 242 | +entry: |
| 243 | + %or = call <4 x i16> @llvm.fshl(<4 x i16> %r, <4 x i16> %r, <4 x i16> splat (i16 25)) |
| 244 | + ret <4 x i16> %or |
| 245 | +} |
| 246 | + |
| 247 | +define <8 x i8> @xar_v8i8(<8 x i8> %x, <8 x i8> %y) { |
| 248 | +; SHA3-LABEL: xar_v8i8: |
| 249 | +; SHA3: // %bb.0: // %entry |
| 250 | +; SHA3-NEXT: // kill: def $d0 killed $d0 def $q0 |
| 251 | +; SHA3-NEXT: // kill: def $d1 killed $d1 def $q1 |
| 252 | +; SHA3-NEXT: xar v0.2d, v0.2d, v1.2d, #7 |
| 253 | +; SHA3-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| 254 | +; SHA3-NEXT: ret |
| 255 | +; |
| 256 | +; NOSHA3-LABEL: xar_v8i8: |
| 257 | +; NOSHA3: // %bb.0: // %entry |
| 258 | +; NOSHA3-NEXT: eor v1.8b, v0.8b, v1.8b |
| 259 | +; NOSHA3-NEXT: add v0.8b, v1.8b, v1.8b |
| 260 | +; NOSHA3-NEXT: usra v0.8b, v1.8b, #7 |
| 261 | +; NOSHA3-NEXT: ret |
| 262 | +entry: |
| 263 | + %a = xor <8 x i8> %x, %y |
| 264 | + %b = call <8 x i8> @llvm.fshl(<8 x i8> %a, <8 x i8> %a, <8 x i8> splat (i8 25)) |
| 265 | + ret <8 x i8> %b |
| 266 | +} |
| 267 | + |
| 268 | +define <8 x i8> @xar_instead_of_or_v8i8(<8 x i8> %r) { |
| 269 | +; SHA3-LABEL: xar_instead_of_or_v8i8: |
| 270 | +; SHA3: // %bb.0: // %entry |
| 271 | +; SHA3-NEXT: movi v1.2d, #0000000000000000 |
| 272 | +; SHA3-NEXT: // kill: def $d0 killed $d0 def $q0 |
| 273 | +; SHA3-NEXT: xar v0.2d, v0.2d, v1.2d, #7 |
| 274 | +; SHA3-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| 275 | +; SHA3-NEXT: ret |
| 276 | +; |
| 277 | +; NOSHA3-LABEL: xar_instead_of_or_v8i8: |
| 278 | +; NOSHA3: // %bb.0: // %entry |
| 279 | +; NOSHA3-NEXT: add v1.8b, v0.8b, v0.8b |
| 280 | +; NOSHA3-NEXT: usra v1.8b, v0.8b, #7 |
| 281 | +; NOSHA3-NEXT: fmov d0, d1 |
| 282 | +; NOSHA3-NEXT: ret |
| 283 | +entry: |
| 284 | + %or = call <8 x i8> @llvm.fshl(<8 x i8> %r, <8 x i8> %r, <8 x i8> splat (i8 25)) |
| 285 | + ret <8 x i8> %or |
| 286 | +} |
| 287 | + |
136 | 288 | declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
|
137 | 289 | declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
|
138 | 290 | declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
|
|
0 commit comments