-
Notifications
You must be signed in to change notification settings - Fork 15.1k
Reland "[RISCV] AddEdge between mask producer and user of V0 (#146855)" #148566
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-risc-v Author: Liao Chunyu (ChunyuLiao) ChangesThe defmask vector cannot contain instructions that use V0. Patch is 25.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148566.diff 7 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp b/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp
index be54a8c95a978..64bfe6a2097ea 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp
@@ -10,6 +10,10 @@
// instructions and masked instructions, so that we can reduce the live range
// overlaps of mask registers.
//
+// If there are multiple masks producers followed by multiple masked
+// instructions, then at each masked instructions add dependency edges between
+// every producer and masked instruction.
+//
// The reason why we need to do this:
// 1. When tracking register pressure, we don't track physical registers.
// 2. We have a RegisterClass for mask register (which is `VMV0`), but we don't
@@ -68,11 +72,26 @@ class RISCVVectorMaskDAGMutation : public ScheduleDAGMutation {
void apply(ScheduleDAGInstrs *DAG) override {
SUnit *NearestUseV0SU = nullptr;
+ SmallVector<SUnit *, 2> DefMask;
for (SUnit &SU : DAG->SUnits) {
const MachineInstr *MI = SU.getInstr();
- if (MI->findRegisterUseOperand(RISCV::V0, TRI))
+ bool UseV0 = MI->findRegisterUseOperand(RISCV::V0, TRI);
+ if (isSoleUseCopyToV0(SU) && !UseV0)
+ DefMask.push_back(&SU);
+
+ if (UseV0) {
NearestUseV0SU = &SU;
+ // Copy may not be a real use, so skip it here.
+ if (DefMask.size() > 1 && !MI->isCopy())
+ for (SUnit *Def : DefMask)
+ if (DAG->canAddEdge(Def, &SU))
+ DAG->addEdge(Def, SDep(&SU, SDep::Artificial));
+
+ if (!DefMask.empty())
+ DefMask.erase(DefMask.begin());
+ }
+
if (NearestUseV0SU && NearestUseV0SU != &SU && isSoleUseCopyToV0(SU) &&
// For LMUL=8 cases, there will be more possibilities to spill.
// FIXME: We should use RegPressureTracker to do fine-grained
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
index 0d8aff306252e..2d4fce68f9545 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
@@ -313,12 +313,12 @@ define i32 @test_nxv128i1(<vscale x 128 x i1> %x) {
; CHECK-NEXT: vslidedown.vx v0, v6, a0
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v6, v7, a1
+; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v7, a0
; CHECK-NEXT: vslidedown.vx v5, v6, a0
-; CHECK-NEXT: vslidedown.vx v4, v7, a0
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vmv1r.v v0, v4
; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
; CHECK-NEXT: vmv1r.v v0, v5
; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t
@@ -425,13 +425,15 @@ define i32 @test_nxv256i1(<vscale x 256 x i1> %x) {
; CHECK-NEXT: vmerge.vim v16, v8, 1, v0
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v5, a1
-; CHECK-NEXT: vslidedown.vx v5, v7, a1
-; CHECK-NEXT: vslidedown.vx v4, v6, a1
-; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu
+; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vmv1r.v v0, v4
+; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v6, a1
+; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu
; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
-; CHECK-NEXT: vmv1r.v v0, v5
+; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v7, a1
+; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu
; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t
; CHECK-NEXT: vadd.vv v8, v16, v8
; CHECK-NEXT: addi a2, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
index 796f8dde58f47..15417da962bd3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
@@ -139,21 +139,20 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind {
; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: sub sp, sp, a3
; RV32-NEXT: andi sp, sp, -64
-; RV32-NEXT: addi a3, sp, 64
; RV32-NEXT: vl8r.v v8, (a0)
; RV32-NEXT: slli a2, a2, 3
; RV32-NEXT: add a0, a0, a2
-; RV32-NEXT: vl8r.v v24, (a0)
+; RV32-NEXT: vl8r.v v16, (a0)
; RV32-NEXT: vsetvli a0, zero, e8, m8, ta, ma
; RV32-NEXT: vmseq.vi v0, v8, 0
-; RV32-NEXT: vmv.v.i v16, 0
-; RV32-NEXT: add a1, a3, a1
-; RV32-NEXT: add a2, a3, a2
-; RV32-NEXT: vmseq.vi v8, v24, 0
-; RV32-NEXT: vmerge.vim v24, v16, 1, v0
-; RV32-NEXT: vs8r.v v24, (a3)
-; RV32-NEXT: vmv1r.v v0, v8
-; RV32-NEXT: vmerge.vim v8, v16, 1, v0
+; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: vmerge.vim v24, v8, 1, v0
+; RV32-NEXT: vmseq.vi v0, v16, 0
+; RV32-NEXT: addi a0, sp, 64
+; RV32-NEXT: add a1, a0, a1
+; RV32-NEXT: add a2, a0, a2
+; RV32-NEXT: vs8r.v v24, (a0)
+; RV32-NEXT: vmerge.vim v8, v8, 1, v0
; RV32-NEXT: vs8r.v v8, (a2)
; RV32-NEXT: lbu a0, 0(a1)
; RV32-NEXT: addi sp, s0, -80
@@ -179,21 +178,20 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind {
; RV64-NEXT: slli a3, a3, 4
; RV64-NEXT: sub sp, sp, a3
; RV64-NEXT: andi sp, sp, -64
-; RV64-NEXT: addi a3, sp, 64
; RV64-NEXT: vl8r.v v8, (a0)
; RV64-NEXT: slli a2, a2, 3
; RV64-NEXT: add a0, a0, a2
-; RV64-NEXT: vl8r.v v24, (a0)
+; RV64-NEXT: vl8r.v v16, (a0)
; RV64-NEXT: vsetvli a0, zero, e8, m8, ta, ma
; RV64-NEXT: vmseq.vi v0, v8, 0
-; RV64-NEXT: vmv.v.i v16, 0
-; RV64-NEXT: add a1, a3, a1
-; RV64-NEXT: add a2, a3, a2
-; RV64-NEXT: vmseq.vi v8, v24, 0
-; RV64-NEXT: vmerge.vim v24, v16, 1, v0
-; RV64-NEXT: vs8r.v v24, (a3)
-; RV64-NEXT: vmv1r.v v0, v8
-; RV64-NEXT: vmerge.vim v8, v16, 1, v0
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: vmerge.vim v24, v8, 1, v0
+; RV64-NEXT: vmseq.vi v0, v16, 0
+; RV64-NEXT: addi a0, sp, 64
+; RV64-NEXT: add a1, a0, a1
+; RV64-NEXT: add a2, a0, a2
+; RV64-NEXT: vs8r.v v24, (a0)
+; RV64-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-NEXT: vs8r.v v8, (a2)
; RV64-NEXT: lbu a0, 0(a1)
; RV64-NEXT: addi sp, s0, -80
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
index 2587411566a3f..fb070b24a4f34 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
@@ -324,24 +324,23 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
; RV32-NEXT: addi s0, sp, 384
; RV32-NEXT: andi sp, sp, -128
-; RV32-NEXT: zext.b a1, a1
-; RV32-NEXT: mv a2, sp
-; RV32-NEXT: li a3, 128
-; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV32-NEXT: vle8.v v8, (a0)
; RV32-NEXT: addi a0, a0, 128
; RV32-NEXT: vle8.v v16, (a0)
-; RV32-NEXT: add a1, a2, a1
; RV32-NEXT: vmseq.vi v0, v8, 0
-; RV32-NEXT: vmv.v.i v24, 0
-; RV32-NEXT: vmseq.vi v8, v16, 0
-; RV32-NEXT: vmerge.vim v16, v24, 1, v0
-; RV32-NEXT: vse8.v v16, (a2)
-; RV32-NEXT: vmv1r.v v0, v8
-; RV32-NEXT: vmerge.vim v8, v24, 1, v0
-; RV32-NEXT: addi a0, sp, 128
-; RV32-NEXT: vse8.v v8, (a0)
-; RV32-NEXT: lbu a0, 0(a1)
+; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: vmerge.vim v24, v8, 1, v0
+; RV32-NEXT: vmseq.vi v0, v16, 0
+; RV32-NEXT: zext.b a0, a1
+; RV32-NEXT: mv a1, sp
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: vse8.v v24, (a1)
+; RV32-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-NEXT: addi a1, sp, 128
+; RV32-NEXT: vse8.v v8, (a1)
+; RV32-NEXT: lbu a0, 0(a0)
; RV32-NEXT: addi sp, s0, -384
; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
@@ -355,24 +354,23 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
; RV64-NEXT: addi s0, sp, 384
; RV64-NEXT: andi sp, sp, -128
-; RV64-NEXT: zext.b a1, a1
-; RV64-NEXT: mv a2, sp
-; RV64-NEXT: li a3, 128
-; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV64-NEXT: vle8.v v8, (a0)
; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: vle8.v v16, (a0)
-; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: vmseq.vi v0, v8, 0
-; RV64-NEXT: vmv.v.i v24, 0
-; RV64-NEXT: vmseq.vi v8, v16, 0
-; RV64-NEXT: vmerge.vim v16, v24, 1, v0
-; RV64-NEXT: vse8.v v16, (a2)
-; RV64-NEXT: vmv1r.v v0, v8
-; RV64-NEXT: vmerge.vim v8, v24, 1, v0
-; RV64-NEXT: addi a0, sp, 128
-; RV64-NEXT: vse8.v v8, (a0)
-; RV64-NEXT: lbu a0, 0(a1)
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: vmerge.vim v24, v8, 1, v0
+; RV64-NEXT: vmseq.vi v0, v16, 0
+; RV64-NEXT: zext.b a0, a1
+; RV64-NEXT: mv a1, sp
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: vse8.v v24, (a1)
+; RV64-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64-NEXT: addi a1, sp, 128
+; RV64-NEXT: vse8.v v8, (a1)
+; RV64-NEXT: lbu a0, 0(a0)
; RV64-NEXT: addi sp, s0, -384
; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
@@ -386,24 +384,23 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV32ZBS-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
; RV32ZBS-NEXT: addi s0, sp, 384
; RV32ZBS-NEXT: andi sp, sp, -128
-; RV32ZBS-NEXT: zext.b a1, a1
-; RV32ZBS-NEXT: mv a2, sp
-; RV32ZBS-NEXT: li a3, 128
-; RV32ZBS-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32ZBS-NEXT: li a2, 128
+; RV32ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV32ZBS-NEXT: vle8.v v8, (a0)
; RV32ZBS-NEXT: addi a0, a0, 128
; RV32ZBS-NEXT: vle8.v v16, (a0)
-; RV32ZBS-NEXT: add a1, a2, a1
; RV32ZBS-NEXT: vmseq.vi v0, v8, 0
-; RV32ZBS-NEXT: vmv.v.i v24, 0
-; RV32ZBS-NEXT: vmseq.vi v8, v16, 0
-; RV32ZBS-NEXT: vmerge.vim v16, v24, 1, v0
-; RV32ZBS-NEXT: vse8.v v16, (a2)
-; RV32ZBS-NEXT: vmv1r.v v0, v8
-; RV32ZBS-NEXT: vmerge.vim v8, v24, 1, v0
-; RV32ZBS-NEXT: addi a0, sp, 128
-; RV32ZBS-NEXT: vse8.v v8, (a0)
-; RV32ZBS-NEXT: lbu a0, 0(a1)
+; RV32ZBS-NEXT: vmv.v.i v8, 0
+; RV32ZBS-NEXT: vmerge.vim v24, v8, 1, v0
+; RV32ZBS-NEXT: vmseq.vi v0, v16, 0
+; RV32ZBS-NEXT: zext.b a0, a1
+; RV32ZBS-NEXT: mv a1, sp
+; RV32ZBS-NEXT: add a0, a1, a0
+; RV32ZBS-NEXT: vse8.v v24, (a1)
+; RV32ZBS-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32ZBS-NEXT: addi a1, sp, 128
+; RV32ZBS-NEXT: vse8.v v8, (a1)
+; RV32ZBS-NEXT: lbu a0, 0(a0)
; RV32ZBS-NEXT: addi sp, s0, -384
; RV32ZBS-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
; RV32ZBS-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
@@ -417,24 +414,23 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV64ZBS-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
; RV64ZBS-NEXT: addi s0, sp, 384
; RV64ZBS-NEXT: andi sp, sp, -128
-; RV64ZBS-NEXT: zext.b a1, a1
-; RV64ZBS-NEXT: mv a2, sp
-; RV64ZBS-NEXT: li a3, 128
-; RV64ZBS-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64ZBS-NEXT: li a2, 128
+; RV64ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV64ZBS-NEXT: vle8.v v8, (a0)
; RV64ZBS-NEXT: addi a0, a0, 128
; RV64ZBS-NEXT: vle8.v v16, (a0)
-; RV64ZBS-NEXT: add a1, a2, a1
; RV64ZBS-NEXT: vmseq.vi v0, v8, 0
-; RV64ZBS-NEXT: vmv.v.i v24, 0
-; RV64ZBS-NEXT: vmseq.vi v8, v16, 0
-; RV64ZBS-NEXT: vmerge.vim v16, v24, 1, v0
-; RV64ZBS-NEXT: vse8.v v16, (a2)
-; RV64ZBS-NEXT: vmv1r.v v0, v8
-; RV64ZBS-NEXT: vmerge.vim v8, v24, 1, v0
-; RV64ZBS-NEXT: addi a0, sp, 128
-; RV64ZBS-NEXT: vse8.v v8, (a0)
-; RV64ZBS-NEXT: lbu a0, 0(a1)
+; RV64ZBS-NEXT: vmv.v.i v8, 0
+; RV64ZBS-NEXT: vmerge.vim v24, v8, 1, v0
+; RV64ZBS-NEXT: vmseq.vi v0, v16, 0
+; RV64ZBS-NEXT: zext.b a0, a1
+; RV64ZBS-NEXT: mv a1, sp
+; RV64ZBS-NEXT: add a0, a1, a0
+; RV64ZBS-NEXT: vse8.v v24, (a1)
+; RV64ZBS-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64ZBS-NEXT: addi a1, sp, 128
+; RV64ZBS-NEXT: vse8.v v8, (a1)
+; RV64ZBS-NEXT: lbu a0, 0(a0)
; RV64ZBS-NEXT: addi sp, s0, -384
; RV64ZBS-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
; RV64ZBS-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll
index c11319ff335fd..67584ba8a82cf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll
@@ -143,16 +143,15 @@ define void @deinterleave6_0_i8(ptr %in, ptr %out) {
; CHECK-LABEL: deinterleave6_0_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vmv.v.i v0, 2
-; CHECK-NEXT: vmv.v.i v8, 4
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v9, 8
+; CHECK-NEXT: vslidedown.vi v9, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT: vslidedown.vi v9, v9, 5, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vrgather.vi v9, v10, 4, v0.t
-; CHECK-NEXT: vse8.v v9, (a1)
+; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
+; CHECK-NEXT: vmv.v.i v0, 4
+; CHECK-NEXT: vrgather.vi v8, v9, 4, v0.t
+; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
entry:
%0 = load <16 x i8>, ptr %in, align 1
@@ -188,16 +187,15 @@ define void @deinterleave7_0_i8(ptr %in, ptr %out) {
; CHECK-LABEL: deinterleave7_0_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vmv.v.i v0, 2
-; CHECK-NEXT: vmv.v.i v8, 4
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v9, 8
+; CHECK-NEXT: vslidedown.vi v9, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT: vslidedown.vi v9, v9, 6, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vrgather.vi v9, v10, 6, v0.t
-; CHECK-NEXT: vse8.v v9, (a1)
+; CHECK-NEXT: vslidedown.vi v8, v8, 6, v0.t
+; CHECK-NEXT: vmv.v.i v0, 4
+; CHECK-NEXT: vrgather.vi v8, v9, 6, v0.t
+; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
entry:
%0 = load <16 x i8>, ptr %in, align 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
index 206838917d004..ad2ed47e67e64 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
@@ -153,20 +153,19 @@ define <vscale x 2 x i32> @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y,
; NO_FOLDING: # %bb.0:
; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu
; NO_FOLDING-NEXT: vlm.v v8, (a0)
-; NO_FOLDING-NEXT: vlm.v v9, (a1)
-; NO_FOLDING-NEXT: vlm.v v10, (a2)
-; NO_FOLDING-NEXT: vmv.v.i v11, 0
+; NO_FOLDING-NEXT: vmv.v.i v10, 0
; NO_FOLDING-NEXT: vmv.v.v v0, v8
-; NO_FOLDING-NEXT: vmerge.vim v12, v11, -1, v0
+; NO_FOLDING-NEXT: vmerge.vim v11, v10, -1, v0
+; NO_FOLDING-NEXT: vlm.v v0, (a1)
+; NO_FOLDING-NEXT: vlm.v v9, (a2)
+; NO_FOLDING-NEXT: vmerge.vim v12, v10, -1, v0
; NO_FOLDING-NEXT: vmv.v.v v0, v9
-; NO_FOLDING-NEXT: vmerge.vim v9, v11, -1, v0
-; NO_FOLDING-NEXT: vmv.v.v v0, v10
-; NO_FOLDING-NEXT: vmerge.vim v10, v11, -1, v0
-; NO_FOLDING-NEXT: vmul.vv v9, v12, v9
-; NO_FOLDING-NEXT: vsub.vv v11, v12, v10
+; NO_FOLDING-NEXT: vmerge.vim v9, v10, -1, v0
+; NO_FOLDING-NEXT: vmul.vv v10, v11, v12
+; NO_FOLDING-NEXT: vsub.vv v11, v11, v9
; NO_FOLDING-NEXT: vmv.v.v v0, v8
-; NO_FOLDING-NEXT: vadd.vi v10, v10, -1, v0.t
-; NO_FOLDING-NEXT: vor.vv v8, v9, v10
+; NO_FOLDING-NEXT: vadd.vi v9, v9, -1, v0.t
+; NO_FOLDING-NEXT: vor.vv v8, v10, v9
; NO_FOLDING-NEXT: vor.vv v8, v8, v11
; NO_FOLDING-NEXT: ret
;
@@ -174,20 +173,19 @@ define <vscale x 2 x i32> @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y,
; FOLDING: # %bb.0:
; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu
; FOLDING-NEXT: vlm.v v8, (a0)
-; FOLDING-NEXT: vlm.v v9, (a1)
-; FOLDING-NEXT: vlm.v v10, (a2)
-; FOLDING-NEXT: vmv.v.i v11, 0
+; FOLDING-NEXT: vmv.v.i v10, 0
; FOLDING-NEXT: vmv.v.v v0, v8
-; FOLDING-NEXT: vmerge.vim v12, v11, -1, v0
+; FOLDING-NEXT: vmerge.vim v11, v10, -1, v0
+; FOLDING-NEXT: vlm.v v0, (a1)
+; FOLDING-NEXT: vlm.v v9, (a2)
+; FOLDING-NEXT: vmerge.vim v12, v10, -1, v0
; FOLDING-NEXT: vmv.v.v v0, v9
-; FOLDING-NEXT: vmerge.vim v9, v11, -1, v0
-; FOLDING-NEXT: vmv.v.v v0, v10
-; FOLDING-NEXT: vmerge.vim v10, v11, -1, v0
-; FOLDING-NEXT: vmul.vv v9, v12, v9
-; FOLDING-NEXT: vsub.vv v11, v12, v10
+; FOLDING-NEXT: vmerge.vim v9, v10, -1, v0
+; FOLDING-NEXT: vmul.vv v10, v11, v12
+; FOLDING-NEXT: vsub.vv v11, v11, v9
; FOLDING-NEXT: vmv.v.v v0, v8
-; FOLDING-NEXT: vadd.vi v10, v10, -1, v0.t
-; FOLDING-NEXT: vor.vv v8, v9, v10
+; FOLDING-NEXT: vadd.vi v9, v9, -1, v0.t
+; FOLDING-NEXT: vor.vv v8, v10, v9
; FOLDING-NEXT: vor.vv v8, v8, v11
; FOLDING-NEXT: ret
%a = load <vscale x 2 x i1>, ptr %x
@@ -209,20 +207,19 @@ define <vscale x 2 x i8> @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p
; NO_FOLDING: # %bb.0:
; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
; NO_FOLDING-NEXT: vlm.v v8, (a0)
-; NO_FOLDING-NEXT: vlm.v v9, (a1)
-; NO_FOLDING-NEXT: vlm.v v10, (a2)
-; NO_FOLDING-NEXT: vmv.v.i v11, 0
+; NO_FOLDING-NEXT: vmv.v.i v10, 0
; NO_FOLDING-NEXT: vmv1r.v v0, v8
-; NO_FOLDING-NEXT: vmerge.vim v12, v11, -1, v0
+; NO_FOLDING-NEXT: vmerge.vim v11, v10, -1, v0
+; NO_FOLDING-NEXT: vlm.v v0, (a1)
+; NO_FOLDING-NEXT: vlm.v v9, (a2)
+; NO_FOLDING-NEXT: vmerge.vim v12, v10, -1, v0
; NO_FOLDING-NEXT: vmv1r.v v0, v9
-; NO_FOLDING-NEXT: vmerge.vim v9, v11, -1, v0
-; NO_FOLDING-NEXT: vmv1r.v v0, v10
-; NO_FOLDING-NEXT: vmerge.vim v10, v11, -1, v0
-; NO_FOLDING-NEXT: vmul.vv v9, v12, v9
-; NO_FOLDING-NEXT: vsub.vv v11, v12, v10
+; NO_FOLDING-NEXT: vmerge.vim v9, v10, -1, v0
+; NO_FOLDING-NEXT: vmul.vv v10, v11, v12
+; NO_FOLDING-NEXT: vsub.vv v11, v11, v9
; NO_FOLDING-NEXT: vmv1r.v v0, v8
-; NO_FOLDING-NEXT: vadd.vi v10, v10, -1, v0.t
-; NO_FOLDING-NEXT: vor.vv v8, v9, v10
+; NO_FOLDING-NEXT: vadd.vi v9, v9, -1, v0.t
+; NO_FOLDING-NEXT: vor.vv v8, v10, v9
; NO_FOLDING-NEXT: vor.vv v8, v8, v11
; NO_FOLDING-NEXT: ret
;
@@ -230,20 +227,19 @@ define <vscale x 2 x i8> @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p
; FOLDING: # %bb.0:
; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
; FOLDING-NEXT: vlm.v v8, (a0)
-; FOLDING-NEXT: vlm.v v9, (a1)
-; FOLDING-NEXT: vlm.v v10, (a2)
-; FOLDING-NEXT: vmv.v.i v11, 0
+; FOLDING-NEXT: vmv.v.i v10, 0
; FOLDING-NEXT: vmv1r.v v0, v8
-; FOLDING-NEXT: vmerge.vim v12, v11, -1, v0
+; FOLDING-NEXT: vmerge.vim v11, v10, -1, v0
+; FOLDING-NEXT: vlm.v v0, (a1)
+; FOLDING-NEXT: vlm.v v9, (a2)
+; FOLDING-NEXT: vmerge.vim v12, v10, -1, v0
; FOLDING-NEXT: vmv1r.v v0, v9
-; FOLDING-NEXT: vmerge.vim v9, v11, -1, v0
-; FOLDING-NEXT: vmv1r.v v0, v10
-; FOLDING-NEXT: vmerge.vim v10, v11, -1, v0
-; FOLDING-NEXT: vmul.vv v9, v12, v9
-; FOLDING-NEXT: vsub.vv v11, v12, v10
+; FOLDING-NEXT: vmerge.vim v9, v10, -1, v0
+; FOLDING-NEXT: vmul.vv v10, v11, v12
+; FOLDING-NEXT: vsub.vv v11, v11, v9
; FOLDING-NEXT: vmv1r.v v0, v8
-; FOLDING-NEXT: vadd.vi v10, v10, -1, v0.t
-; FOLDING-NEXT: vor.vv v8, v9, v10
+; FOLDING-NEXT: vadd.vi v9, v9, -1, v0.t
+; FOLDING-NEXT: vor.vv v8, v10, v9
; FOLDING-NEXT: vor.vv v8, v8, v11
; FOLDING-NEXT: ret
%a = load <vscale x 2 x i1>, ptr %x
@@ -444,16 +440,14 @@ define <vscale x 2 x i32> @vwop_vscale_zext_i1i32_multiple_users...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could you add the crash reproducer as a test?
| NearestUseV0SU = &SU; | ||
|
|
||
| // Copy may not be a real use, so skip it here. | ||
| if (DefMask.size() > 1 && !MI->isCopy()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
style: could you add a curly braces here per https://llvm.org/docs/CodingStandards.html#don-t-use-braces-on-simple-single-statement-bodies-of-if-else-loop-statements
reduced MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/mesh.cpp ir, and added it to testcase. |
| declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr captures(none), i32 immarg, <vscale x 4 x i1>, <vscale x 4 x i32>) #1 | ||
|
|
||
| ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) | ||
| declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>) #2 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for future reference and tidiness of the test: you don't need declare intrinsics anymore
| uselistorder ptr @llvm.masked.load.nxv4i32.p0, { 1, 0 } | ||
| uselistorder ptr @llvm.vector.reduce.add.nxv4i32, { 1, 0 } | ||
|
|
||
| attributes #0 = { "target-features"="+v" } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for future reference and tidiness of the test: you already provided -mattr=+v so this would be redundant
The defmask vector cannot contain instructions that use V0.
#146855 (comment)
for
MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/mesh.cppSave
%173:vrm2nov0 = PseudoVMERGE_VVM_M2 undef %173:vrm2nov0(tied-def 0), %116:vrm2, %173:vrm2nov0, killed $v0, -1, 5to def mask caused this error.I also tested other part crashes, there are no crash now.