Skip to content

Commit 1e7171f

Browse files
authored
[AArch64] Add tablegen patterns for concat(extract-high, extract-high) (#118286)
A `concat(extract-high(x), extract-high(y))` is the top half of x inserted into the bottom half of y. This patch adds a tablegen pattern to make sure that we generate a single i64 lane insert.
1 parent 0c8928d commit 1e7171f

File tree

4 files changed

+46
-70
lines changed

4 files changed

+46
-70
lines changed

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,12 @@ def gi_extract_high_v4i32 :
145145

146146
def extract_high_v8f16 :
147147
ComplexPattern<v4f16, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
148+
def extract_high_v8bf16 :
149+
ComplexPattern<v4bf16, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
148150
def extract_high_v4f32 :
149151
ComplexPattern<v2f32, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
152+
def extract_high_v2f64 :
153+
ComplexPattern<v1f64, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
150154

151155
def gi_extract_high_v8f16 :
152156
GIComplexOperandMatcher<v4s16, "selectExtractHigh">,

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7352,7 +7352,8 @@ def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx),
73527352
// All concat_vectors operations are canonicalised to act on i64 vectors for
73537353
// AArch64. In the general case we need an instruction, which had just as well be
73547354
// INS.
7355-
multiclass ConcatPat<ValueType DstTy, ValueType SrcTy> {
7355+
multiclass ConcatPat<ValueType DstTy, ValueType SrcTy,
7356+
ComplexPattern ExtractHigh> {
73567357
def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
73577358
(INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
73587359
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>;
@@ -7365,16 +7366,22 @@ multiclass ConcatPat<ValueType DstTy, ValueType SrcTy> {
73657366
// If the high lanes are undef we can just ignore them:
73667367
def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)),
73677368
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>;
7368-
}
73697369

7370-
defm : ConcatPat<v2i64, v1i64>;
7371-
defm : ConcatPat<v2f64, v1f64>;
7372-
defm : ConcatPat<v4i32, v2i32>;
7373-
defm : ConcatPat<v4f32, v2f32>;
7374-
defm : ConcatPat<v8i16, v4i16>;
7375-
defm : ConcatPat<v8f16, v4f16>;
7376-
defm : ConcatPat<v8bf16, v4bf16>;
7377-
defm : ConcatPat<v16i8, v8i8>;
7370+
// Concatting the high half of two vectors is the insert of the first
7371+
// into the low half of the second.
7372+
def : Pat<(DstTy (concat_vectors (ExtractHigh (DstTy V128:$Rn)),
7373+
(ExtractHigh (DstTy V128:$Rm)))),
7374+
(INSvi64lane V128:$Rm, (i64 0), V128:$Rn, (i64 1))>;
7375+
}
7376+
7377+
defm : ConcatPat<v2i64, v1i64, extract_high_v2i64>;
7378+
defm : ConcatPat<v2f64, v1f64, extract_high_v2f64>;
7379+
defm : ConcatPat<v4i32, v2i32, extract_high_v4i32>;
7380+
defm : ConcatPat<v4f32, v2f32, extract_high_v4f32>;
7381+
defm : ConcatPat<v8i16, v4i16, extract_high_v8i16>;
7382+
defm : ConcatPat<v8f16, v4f16, extract_high_v8f16>;
7383+
defm : ConcatPat<v8bf16, v4bf16, extract_high_v8bf16>;
7384+
defm : ConcatPat<v16i8, v8i8, extract_high_v16i8>;
73787385

73797386
//----------------------------------------------------------------------------
73807387
// AdvSIMD across lanes instructions

llvm/test/CodeGen/AArch64/concat-vector.ll

Lines changed: 24 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -385,19 +385,11 @@ entry:
385385
}
386386

387387
define <8 x i16> @concat_high_high_v8i16(<8 x i16> %a_vec, <8 x i16> %b_vec) {
388-
; CHECK-SD-LABEL: concat_high_high_v8i16:
389-
; CHECK-SD: // %bb.0: // %entry
390-
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
391-
; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8
392-
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
393-
; CHECK-SD-NEXT: ret
394-
;
395-
; CHECK-GI-LABEL: concat_high_high_v8i16:
396-
; CHECK-GI: // %bb.0: // %entry
397-
; CHECK-GI-NEXT: mov d0, v0.d[1]
398-
; CHECK-GI-NEXT: mov d1, v1.d[1]
399-
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
400-
; CHECK-GI-NEXT: ret
388+
; CHECK-LABEL: concat_high_high_v8i16:
389+
; CHECK: // %bb.0: // %entry
390+
; CHECK-NEXT: mov v1.d[0], v0.d[1]
391+
; CHECK-NEXT: mov v0.16b, v1.16b
392+
; CHECK-NEXT: ret
401393
entry:
402394
%shuffle.i3 = shufflevector <8 x i16> %a_vec, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
403395
%shuffle.i = shufflevector <8 x i16> %b_vec, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -406,19 +398,11 @@ entry:
406398
}
407399

408400
define <8 x half> @concat_high_high_v8f16(<8 x half> %a_vec, <8 x half> %b_vec) {
409-
; CHECK-SD-LABEL: concat_high_high_v8f16:
410-
; CHECK-SD: // %bb.0: // %entry
411-
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
412-
; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8
413-
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
414-
; CHECK-SD-NEXT: ret
415-
;
416-
; CHECK-GI-LABEL: concat_high_high_v8f16:
417-
; CHECK-GI: // %bb.0: // %entry
418-
; CHECK-GI-NEXT: mov d0, v0.d[1]
419-
; CHECK-GI-NEXT: mov d1, v1.d[1]
420-
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
421-
; CHECK-GI-NEXT: ret
401+
; CHECK-LABEL: concat_high_high_v8f16:
402+
; CHECK: // %bb.0: // %entry
403+
; CHECK-NEXT: mov v1.d[0], v0.d[1]
404+
; CHECK-NEXT: mov v0.16b, v1.16b
405+
; CHECK-NEXT: ret
422406
entry:
423407
%shuffle.i3 = shufflevector <8 x half> %a_vec, <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
424408
%shuffle.i = shufflevector <8 x half> %b_vec, <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -427,19 +411,11 @@ entry:
427411
}
428412

429413
define <8 x bfloat> @concat_high_high_v8bf16(<8 x bfloat> %a_vec, <8 x bfloat> %b_vec) {
430-
; CHECK-SD-LABEL: concat_high_high_v8bf16:
431-
; CHECK-SD: // %bb.0: // %entry
432-
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
433-
; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8
434-
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
435-
; CHECK-SD-NEXT: ret
436-
;
437-
; CHECK-GI-LABEL: concat_high_high_v8bf16:
438-
; CHECK-GI: // %bb.0: // %entry
439-
; CHECK-GI-NEXT: mov d0, v0.d[1]
440-
; CHECK-GI-NEXT: mov d1, v1.d[1]
441-
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
442-
; CHECK-GI-NEXT: ret
414+
; CHECK-LABEL: concat_high_high_v8bf16:
415+
; CHECK: // %bb.0: // %entry
416+
; CHECK-NEXT: mov v1.d[0], v0.d[1]
417+
; CHECK-NEXT: mov v0.16b, v1.16b
418+
; CHECK-NEXT: ret
443419
entry:
444420
%shuffle.i3 = shufflevector <8 x bfloat> %a_vec, <8 x bfloat> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
445421
%shuffle.i = shufflevector <8 x bfloat> %b_vec, <8 x bfloat> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -455,9 +431,8 @@ define <4 x i32> @concat_high_high_v4i32(<4 x i32> %a_vec, <4 x i32> %b_vec) {
455431
;
456432
; CHECK-GI-LABEL: concat_high_high_v4i32:
457433
; CHECK-GI: // %bb.0: // %entry
458-
; CHECK-GI-NEXT: mov d0, v0.d[1]
459-
; CHECK-GI-NEXT: mov d1, v1.d[1]
460-
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
434+
; CHECK-GI-NEXT: mov v1.d[0], v0.d[1]
435+
; CHECK-GI-NEXT: mov v0.16b, v1.16b
461436
; CHECK-GI-NEXT: ret
462437
entry:
463438
%shuffle.i3 = shufflevector <4 x i32> %a_vec, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
@@ -474,9 +449,8 @@ define <4 x float> @concat_high_high_v4f32(<4 x float> %a_vec, <4 x float> %b_ve
474449
;
475450
; CHECK-GI-LABEL: concat_high_high_v4f32:
476451
; CHECK-GI: // %bb.0: // %entry
477-
; CHECK-GI-NEXT: mov d0, v0.d[1]
478-
; CHECK-GI-NEXT: mov d1, v1.d[1]
479-
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
452+
; CHECK-GI-NEXT: mov v1.d[0], v0.d[1]
453+
; CHECK-GI-NEXT: mov v0.16b, v1.16b
480454
; CHECK-GI-NEXT: ret
481455
entry:
482456
%shuffle.i3 = shufflevector <4 x float> %a_vec, <4 x float> poison, <2 x i32> <i32 2, i32 3>
@@ -486,19 +460,11 @@ entry:
486460
}
487461

488462
define <16 x i8> @concat_high_high_v16i8(<16 x i8> %a_vec, <16 x i8> %b_vec) {
489-
; CHECK-SD-LABEL: concat_high_high_v16i8:
490-
; CHECK-SD: // %bb.0: // %entry
491-
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
492-
; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8
493-
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
494-
; CHECK-SD-NEXT: ret
495-
;
496-
; CHECK-GI-LABEL: concat_high_high_v16i8:
497-
; CHECK-GI: // %bb.0: // %entry
498-
; CHECK-GI-NEXT: mov d0, v0.d[1]
499-
; CHECK-GI-NEXT: mov d1, v1.d[1]
500-
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
501-
; CHECK-GI-NEXT: ret
463+
; CHECK-LABEL: concat_high_high_v16i8:
464+
; CHECK: // %bb.0: // %entry
465+
; CHECK-NEXT: mov v1.d[0], v0.d[1]
466+
; CHECK-NEXT: mov v0.16b, v1.16b
467+
; CHECK-NEXT: ret
502468
entry:
503469
%shuffle.i3 = shufflevector <16 x i8> %a_vec, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
504470
%shuffle.i = shufflevector <16 x i8> %b_vec, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>

llvm/test/CodeGen/AArch64/vecreduce-add.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4885,8 +4885,7 @@ entry:
48854885
define i32 @extract_hi_hi(<8 x i16> %a) {
48864886
; CHECK-SD-LABEL: extract_hi_hi:
48874887
; CHECK-SD: // %bb.0: // %entry
4888-
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
4889-
; CHECK-SD-NEXT: mov v0.d[1], v0.d[0]
4888+
; CHECK-SD-NEXT: mov v0.d[0], v0.d[1]
48904889
; CHECK-SD-NEXT: uaddlv s0, v0.8h
48914890
; CHECK-SD-NEXT: fmov w0, s0
48924891
; CHECK-SD-NEXT: ret

0 commit comments

Comments
 (0)