-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AArch64][GlobalISel] Fold buildvector of bitcast #141553
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
dbe1475
to
01c6d24
Compare
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-globalisel Author: David Green (davemgreen) ChangesThis adds a combine for buildvectors from bitcast values, sinking the bitcast and generating a buildvector from the original scalar type.
It helps clean up some of the inefficiencies from widening scalar types. Patch is 25.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141553.diff 18 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index c15263e0b06f8..529a9e3430463 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -265,6 +265,13 @@ class CombinerHelper {
bool matchCombineShuffleToBuildVector(MachineInstr &MI) const;
void applyCombineShuffleToBuildVector(MachineInstr &MI) const;
+ /// Combine G_BUILD_VECTOR(G_UNMERGE(G_BITCAST), Undef) to
+ /// G_BITCAST(G_BUILD_VECTOR(..))
+ bool matchCombineBuildVectorOfBitcast(MachineInstr &MI,
+ SmallVector<Register> &Ops) const;
+ void applyCombineBuildVectorOfBitcast(MachineInstr &MI,
+ SmallVector<Register> &Ops) const;
+
/// Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
/// Returns true if MI changed.
///
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index efd88524a159e..ea55f1341b68c 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1579,6 +1579,13 @@ def combine_shuffle_vector_to_build_vector : GICombineRule<
[{ return Helper.matchCombineShuffleToBuildVector(*${root}); }]),
(apply [{ Helper.applyCombineShuffleToBuildVector(*${root}); }])>;
+// Combines buildvector operations
+def combine_build_vector_of_bitcast : GICombineRule<
+ (defs root:$root, concat_matchinfo:$matchinfo),
+ (match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root,
+ [{ return Helper.matchCombineBuildVectorOfBitcast(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyCombineBuildVectorOfBitcast(*${root}, ${matchinfo}); }])>;
+
def insert_vector_element_idx_undef : GICombineRule<
(defs root:$root),
(match (G_IMPLICIT_DEF $idx),
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b1e851183de0d..79af1abafaa07 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -386,6 +386,75 @@ void CombinerHelper::applyCombineConcatVectors(
MI.eraseFromParent();
}
+bool CombinerHelper::matchCombineBuildVectorOfBitcast(
+ MachineInstr &MI, SmallVector<Register> &Ops) const {
+ assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
+ "Invalid instruction");
+
+ // Look at the first operand for a unmerge(bitcast) from a scalar type.
+ GUnmerge *Unmerge =
+ dyn_cast<GUnmerge>(MRI.getVRegDef(MI.getOperand(1).getReg()));
+ if (!Unmerge || Unmerge->getReg(0) != MI.getOperand(1).getReg())
+ return false;
+ MachineInstr *BC = MRI.getVRegDef(Unmerge->getSourceReg());
+ if (BC->getOpcode() != TargetOpcode::G_BITCAST)
+ return false;
+ LLT InputTy = MRI.getType(BC->getOperand(1).getReg());
+ unsigned Factor = Unmerge->getNumDefs();
+ if (!InputTy.isScalar() || (MI.getNumOperands() - 1) % Factor != 0)
+ return false;
+
+ // Check if the build_vector is legal
+ LLT BVDstTy = LLT::fixed_vector((MI.getNumOperands() - 1) / Factor, InputTy);
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_BUILD_VECTOR, {BVDstTy, InputTy}}))
+ return false;
+
+ // Check all other operands are bitcasts or undef.
+ for (unsigned Idx = 0; Idx < MI.getNumOperands() - 1; Idx += Factor) {
+ GUnmerge *Unmerge =
+ dyn_cast<GUnmerge>(MRI.getVRegDef(MI.getOperand(Idx + 1).getReg()));
+ if (!all_of(iota_range<unsigned>(0, Factor, false), [&](unsigned J) {
+ MachineInstr *Src =
+ MRI.getVRegDef(MI.getOperand(Idx + J + 1).getReg());
+ if (Src->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
+ return true;
+ return Unmerge &&
+ MI.getOperand(Idx + J + 1).getReg() == Unmerge->getReg(J);
+ }))
+ return false;
+ if (!Unmerge)
+ Ops.push_back(0);
+ else {
+ MachineInstr *BC = MRI.getVRegDef(Unmerge->getSourceReg());
+ if (BC->getOpcode() != TargetOpcode::G_BITCAST ||
+ MRI.getType(BC->getOperand(1).getReg()) != InputTy)
+ return false;
+ Ops.push_back(BC->getOperand(1).getReg());
+ }
+ }
+
+ return true;
+}
+void CombinerHelper::applyCombineBuildVectorOfBitcast(
+ MachineInstr &MI, SmallVector<Register> &Ops) const {
+ LLT SrcTy = MRI.getType(Ops[0]);
+ // Build undef if any operations require it.
+ Register Undef = 0;
+ for (Register &Op : Ops) {
+ if (!Op) {
+ if (!Undef)
+ Undef = Builder.buildUndef(SrcTy).getReg(0);
+ Op = Undef;
+ }
+ }
+
+ LLT BVDstTy = LLT::fixed_vector(Ops.size(), SrcTy);
+ auto BV = Builder.buildBuildVector(BVDstTy, Ops);
+ Builder.buildBitcast(MI.getOperand(0).getReg(), BV);
+ MI.eraseFromParent();
+}
+
bool CombinerHelper::matchCombineShuffleToBuildVector(MachineInstr &MI) const {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
"Invalid instruction");
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index f84e83816bf33..b811d3c03b0a7 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -345,6 +345,7 @@ def AArch64PostLegalizerCombiner
ptr_add_immed_chain, overlapping_and,
split_store_zero_128, undef_combines,
select_to_minmax, or_to_bsp, combine_concat_vector,
+ combine_build_vector_of_bitcast,
commute_constant_to_rhs,
push_freeze_to_prevent_poison_from_propagating,
combine_mul_cmlt, combine_use_vector_truncate, extmultomull]> {
diff --git a/llvm/test/CodeGen/AArch64/add.ll b/llvm/test/CodeGen/AArch64/add.ll
index d5bd1b712a2a6..689c4b9c516c0 100644
--- a/llvm/test/CodeGen/AArch64/add.ll
+++ b/llvm/test/CodeGen/AArch64/add.ll
@@ -149,28 +149,10 @@ define void @v4i8(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: v4i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: ldr w9, [x1]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: mov b2, v0.b[1]
-; CHECK-GI-NEXT: mov b3, v1.b[1]
-; CHECK-GI-NEXT: mov b4, v0.b[2]
-; CHECK-GI-NEXT: mov b5, v0.b[3]
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: mov b2, v1.b[2]
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov b3, v1.b[3]
-; CHECK-GI-NEXT: mov v0.h[1], w8
-; CHECK-GI-NEXT: mov v1.h[1], w9
-; CHECK-GI-NEXT: fmov w8, s4
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: mov v0.h[2], w8
-; CHECK-GI-NEXT: mov v1.h[2], w9
-; CHECK-GI-NEXT: fmov w8, s5
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov v0.h[3], w8
-; CHECK-GI-NEXT: mov v1.h[3], w9
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: ldr s1, [x1]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-GI-NEXT: fmov w8, s0
diff --git a/llvm/test/CodeGen/AArch64/andorxor.ll b/llvm/test/CodeGen/AArch64/andorxor.ll
index f7df1092287bd..04a350b3fc666 100644
--- a/llvm/test/CodeGen/AArch64/andorxor.ll
+++ b/llvm/test/CodeGen/AArch64/andorxor.ll
@@ -439,28 +439,10 @@ define void @and_v4i8(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: and_v4i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: ldr w9, [x1]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: mov b2, v0.b[1]
-; CHECK-GI-NEXT: mov b3, v1.b[1]
-; CHECK-GI-NEXT: mov b4, v0.b[2]
-; CHECK-GI-NEXT: mov b5, v0.b[3]
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: mov b2, v1.b[2]
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov b3, v1.b[3]
-; CHECK-GI-NEXT: mov v0.h[1], w8
-; CHECK-GI-NEXT: mov v1.h[1], w9
-; CHECK-GI-NEXT: fmov w8, s4
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: mov v0.h[2], w8
-; CHECK-GI-NEXT: mov v1.h[2], w9
-; CHECK-GI-NEXT: fmov w8, s5
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov v0.h[3], w8
-; CHECK-GI-NEXT: mov v1.h[3], w9
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: ldr s1, [x1]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-GI-NEXT: fmov w8, s0
@@ -488,28 +470,10 @@ define void @or_v4i8(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: or_v4i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: ldr w9, [x1]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: mov b2, v0.b[1]
-; CHECK-GI-NEXT: mov b3, v1.b[1]
-; CHECK-GI-NEXT: mov b4, v0.b[2]
-; CHECK-GI-NEXT: mov b5, v0.b[3]
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: mov b2, v1.b[2]
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov b3, v1.b[3]
-; CHECK-GI-NEXT: mov v0.h[1], w8
-; CHECK-GI-NEXT: mov v1.h[1], w9
-; CHECK-GI-NEXT: fmov w8, s4
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: mov v0.h[2], w8
-; CHECK-GI-NEXT: mov v1.h[2], w9
-; CHECK-GI-NEXT: fmov w8, s5
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov v0.h[3], w8
-; CHECK-GI-NEXT: mov v1.h[3], w9
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: ldr s1, [x1]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-GI-NEXT: fmov w8, s0
@@ -537,28 +501,10 @@ define void @xor_v4i8(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: xor_v4i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: ldr w9, [x1]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: mov b2, v0.b[1]
-; CHECK-GI-NEXT: mov b3, v1.b[1]
-; CHECK-GI-NEXT: mov b4, v0.b[2]
-; CHECK-GI-NEXT: mov b5, v0.b[3]
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: mov b2, v1.b[2]
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov b3, v1.b[3]
-; CHECK-GI-NEXT: mov v0.h[1], w8
-; CHECK-GI-NEXT: mov v1.h[1], w9
-; CHECK-GI-NEXT: fmov w8, s4
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: mov v0.h[2], w8
-; CHECK-GI-NEXT: mov v1.h[2], w9
-; CHECK-GI-NEXT: fmov w8, s5
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov v0.h[3], w8
-; CHECK-GI-NEXT: mov v1.h[3], w9
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: ldr s1, [x1]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-GI-NEXT: fmov w8, s0
diff --git a/llvm/test/CodeGen/AArch64/bitcast-extend.ll b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
index 3133d0efb4b9b..47cb168065bb5 100644
--- a/llvm/test/CodeGen/AArch64/bitcast-extend.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
@@ -12,15 +12,8 @@ define <4 x i16> @z_i32_v4i16(i32 %x) {
;
; CHECK-GI-LABEL: z_i32_v4i16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fmov s0, w0
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
-; CHECK-GI-NEXT: mov b3, v0.b[2]
-; CHECK-GI-NEXT: mov b0, v0.b[3]
-; CHECK-GI-NEXT: mov v2.b[1], v1.b[0]
-; CHECK-GI-NEXT: mov v2.b[2], v3.b[0]
-; CHECK-GI-NEXT: mov v2.b[3], v0.b[0]
-; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0
+; CHECK-GI-NEXT: mov v0.s[0], w0
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%b = bitcast i32 %x to <4 x i8>
@@ -115,15 +108,8 @@ define <4 x i16> @s_i32_v4i16(i32 %x) {
;
; CHECK-GI-LABEL: s_i32_v4i16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fmov s0, w0
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
-; CHECK-GI-NEXT: mov b3, v0.b[2]
-; CHECK-GI-NEXT: mov b0, v0.b[3]
-; CHECK-GI-NEXT: mov v2.b[1], v1.b[0]
-; CHECK-GI-NEXT: mov v2.b[2], v3.b[0]
-; CHECK-GI-NEXT: mov v2.b[3], v0.b[0]
-; CHECK-GI-NEXT: sshll v0.8h, v2.8b, #0
+; CHECK-GI-NEXT: mov v0.s[0], w0
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%b = bitcast i32 %x to <4 x i8>
diff --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll
index d54cc4adb81b3..442471951bab5 100644
--- a/llvm/test/CodeGen/AArch64/bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast.ll
@@ -79,16 +79,8 @@ define <4 x i8> @bitcast_i32_v4i8(i32 %a, i32 %b){
; CHECK-GI-LABEL: bitcast_i32_v4i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: add w8, w0, w1
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov b2, v0.b[2]
-; CHECK-GI-NEXT: fmov w8, s1
-; CHECK-GI-NEXT: mov b1, v0.b[3]
-; CHECK-GI-NEXT: mov v0.h[1], w8
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: mov v0.h[2], w8
-; CHECK-GI-NEXT: fmov w8, s1
-; CHECK-GI-NEXT: mov v0.h[3], w8
+; CHECK-GI-NEXT: mov v0.s[0], w8
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%c = add i32 %a, %b
@@ -131,11 +123,8 @@ define <2 x i16> @bitcast_i32_v2i16(i32 %a, i32 %b){
; CHECK-GI-LABEL: bitcast_i32_v2i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: add w8, w0, w1
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: mov h1, v0.h[1]
; CHECK-GI-NEXT: mov v0.s[0], w8
-; CHECK-GI-NEXT: fmov w8, s1
-; CHECK-GI-NEXT: mov v0.s[1], w8
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%c = add i32 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/ctlz.ll b/llvm/test/CodeGen/AArch64/ctlz.ll
index a4863d1f74200..512095863e0aa 100644
--- a/llvm/test/CodeGen/AArch64/ctlz.ll
+++ b/llvm/test/CodeGen/AArch64/ctlz.ll
@@ -87,16 +87,8 @@ define void @v4i8(ptr %p1) {
;
; CHECK-GI-LABEL: v4i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
-; CHECK-GI-NEXT: mov b3, v0.b[2]
-; CHECK-GI-NEXT: mov b0, v0.b[3]
-; CHECK-GI-NEXT: mov v2.b[1], v1.b[0]
-; CHECK-GI-NEXT: mov v2.b[2], v3.b[0]
-; CHECK-GI-NEXT: mov v2.b[3], v0.b[0]
-; CHECK-GI-NEXT: clz v0.8b, v2.8b
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: clz v0.8b, v0.8b
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: str w8, [x0]
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/ctpop.ll b/llvm/test/CodeGen/AArch64/ctpop.ll
index 55f75b6bc3f27..356e668d050c9 100644
--- a/llvm/test/CodeGen/AArch64/ctpop.ll
+++ b/llvm/test/CodeGen/AArch64/ctpop.ll
@@ -85,16 +85,8 @@ define void @v4i8(ptr %p1) {
;
; CHECK-GI-LABEL: v4i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
-; CHECK-GI-NEXT: mov b3, v0.b[2]
-; CHECK-GI-NEXT: mov b0, v0.b[3]
-; CHECK-GI-NEXT: mov v2.b[1], v1.b[0]
-; CHECK-GI-NEXT: mov v2.b[2], v3.b[0]
-; CHECK-GI-NEXT: mov v2.b[3], v0.b[0]
-; CHECK-GI-NEXT: cnt v0.8b, v2.8b
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: cnt v0.8b, v0.8b
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: str w8, [x0]
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/cttz.ll b/llvm/test/CodeGen/AArch64/cttz.ll
index 60125f8a19811..60b589885c111 100644
--- a/llvm/test/CodeGen/AArch64/cttz.ll
+++ b/llvm/test/CodeGen/AArch64/cttz.ll
@@ -114,24 +114,15 @@ define void @v4i8(ptr %p1) {
;
; CHECK-GI-LABEL: v4i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr w9, [x0]
; CHECK-GI-NEXT: mov w8, #255 // =0xff
-; CHECK-GI-NEXT: fmov s0, w9
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov b2, v0.b[2]
-; CHECK-GI-NEXT: mov b3, v0.b[3]
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: fmov s1, w8
-; CHECK-GI-NEXT: mov v0.h[1], w9
-; CHECK-GI-NEXT: mov v1.h[1], w8
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: mov v0.h[2], w9
-; CHECK-GI-NEXT: mov v1.h[2], w8
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov v0.h[3], w9
-; CHECK-GI-NEXT: mov v1.h[3], w8
-; CHECK-GI-NEXT: eor v2.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-GI-NEXT: ldr s1, [x0]
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mov v0.h[1], w8
+; CHECK-GI-NEXT: mov v0.h[2], w8
+; CHECK-GI-NEXT: mov v0.h[3], w8
+; CHECK-GI-NEXT: eor v2.8b, v1.8b, v0.8b
+; CHECK-GI-NEXT: add v0.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: and v0.8b, v2.8b, v0.8b
; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-GI-NEXT: cnt v0.8b, v0.8b
diff --git a/llvm/test/CodeGen/AArch64/mul.ll b/llvm/test/CodeGen/AArch64/mul.ll
index 8d9a6e6b92914..0109c716737dd 100644
--- a/llvm/test/CodeGen/AArch64/mul.ll
+++ b/llvm/test/CodeGen/AArch64/mul.ll
@@ -161,28 +161,10 @@ define void @v4i8(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: v4i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: ldr w9, [x1]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: mov b2, v0.b[1]
-; CHECK-GI-NEXT: mov b3, v1.b[1]
-; CHECK-GI-NEXT: mov b4, v0.b[2]
-; CHECK-GI-NEXT: mov b5, v0.b[3]
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: mov b2, v1.b[2]
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov b3, v1.b[3]
-; CHECK-GI-NEXT: mov v0.h[1], w8
-; CHECK-GI-NEXT: mov v1.h[1], w9
-; CHECK-GI-NEXT: fmov w8, s4
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: mov v0.h[2], w8
-; CHECK-GI-NEXT: mov v1.h[2], w9
-; CHECK-GI-NEXT: fmov w8, s5
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov v0.h[3], w8
-; CHECK-GI-NEXT: mov v1.h[3], w9
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: ldr s1, [x1]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h
; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-GI-NEXT: fmov w8, s0
diff --git a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
index 2d3fda704908e..daae69fd4a949 100644
--- a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
+++ b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
@@ -93,16 +93,8 @@ define i32 @test_udot_v4i8_nomla(ptr nocapture readonly %a1) {
;
; CHECK-GI-LABEL: test_udot_v4i8_nomla:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
-; CHECK-GI-NEXT: mov b3, v0.b[2]
-; CHECK-GI-NEXT: mov b0, v0.b[3]
-; CHECK-GI-NEXT: mov v2.b[1], v1.b[0]
-; CHECK-GI-NEXT: mov v2.b[2], v3.b[0]
-; CHECK-GI-NEXT: mov v2.b[3], v0.b[0]
-; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: uaddlv s0, v0.4h
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: and w0, w8, #0xffff
diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
index e1018bbee7893..af205a8352ad2 100644
--- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
@@ -158,25 +158,9 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind {
;
; CHECK-GI-LABEL: v4i8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: ldr w9, [x1]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: mov b2, v0.b[1]
-; CHECK-GI-NEXT: mov v3.b[0], v0.b[0]
-; CHECK-GI-NEXT: mov b4, v1.b[1]
-; CHECK-GI-NEXT: mov v5.b[0], v1.b[0]
-; CHECK-GI-NEXT: mov v3.b[1], v2.b[0]
-; CHECK-GI-NEXT: mov b2, v0.b[2]
-; CHECK-GI-NEXT: mov b0, v0.b[3]
-; CHECK-GI-NEXT: mov v5.b[1], v4.b[0]
-; CHECK-GI-NEXT: mov b4, v1.b[2]
-; CHECK-GI-NEXT: mov b1, v1.b[3]
-; CHECK-GI-NEXT: mov v3....
[truncated]
|
I can't follow the example in the description, the register names and numbers in the result don't match the input, and there's a missing operand to the result bitcast |
In this?
%16 is the same as this input, the G_BITCAST is performed on the G_BUILD_VECTOR and replaces %23. I'll try and update it to be less like pseudocode. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a reason we don't use getOpcodeDef<Ty>()
here?
This adds a combine for buildvectors from bitcast values, sinking the bitcast and generating a buildvector from the original scalar type. %5:_(<4 x s8>) = G_BITCAST %16:_(s32) %18:_(s8), %19:_(s8), %20:_(s8), %21:_(s8) = G_UNMERGE_VALUES %5:_(<4 x s8>) %22:_(s8) = G_IMPLICIT_DEF %23:_(<8 x s8>) = G_BUILD_VECTOR %18:_(s8), %19:_(s8), %20:_(s8), %21:_(s8), %22:_(s8), %22:_(s8), %22:_(s8), %22:_(s8) => <2 x s32> G_BUILD_VECTOR %16, %undef <8 x s8> G_BITCAST It helps clean up some of the inefficiencies from widening scalar types.
01c6d24
to
2907aae
Compare
This adds a combine for buildvectors from bitcast values, sinking the bitcast and generating a buildvector from the original scalar type.
It helps clean up some of the inefficiencies from widening scalar types.