Skip to content

Commit 7ba6768

Browse files
committed
Revert "[RISCV] Update V0Defs after moving Src in peepholes (#107359)"
This fixes #107950 and adds a test case for it. The issue was due to us incorrectly assuming that we stored a V0Defs entry for every single instruction. We actually only store them for instructions that use V0, so when we updated the V0Def after moving we sometimes ended up copying nullptr over from an instruction that doesn't use V0 and clearing the V0Def entry inadvertently. Because we don't have V0Defs on instructions that don't use V0, the FIXME was never actually needed in the first place since the bookkeeping wasn't out of sync to begin with. That commit also mentioned that a future unmasked to masked pseudo peephole might need unmasked pseudos to have V0Defs entries, but after working on this locally it turns out we don't. This reverts commit ce36480.
1 parent b71d88c commit 7ba6768

File tree

2 files changed

+36
-8
lines changed

2 files changed

+36
-8
lines changed

llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ class RISCVVectorPeephole : public MachineFunctionPass {
6161
}
6262

6363
private:
64-
bool tryToReduceVL(MachineInstr &MI);
64+
bool tryToReduceVL(MachineInstr &MI) const;
6565
bool convertToVLMAX(MachineInstr &MI) const;
6666
bool convertToWholeRegister(MachineInstr &MI) const;
6767
bool convertToUnmasked(MachineInstr &MI) const;
@@ -73,7 +73,7 @@ class RISCVVectorPeephole : public MachineFunctionPass {
7373
bool hasSameEEW(const MachineInstr &User, const MachineInstr &Src) const;
7474
bool isAllOnesMask(const MachineInstr *MaskDef) const;
7575
std::optional<unsigned> getConstant(const MachineOperand &VL) const;
76-
bool ensureDominates(const MachineOperand &Use, MachineInstr &Src);
76+
bool ensureDominates(const MachineOperand &Use, MachineInstr &Src) const;
7777

7878
/// Maps uses of V0 to the corresponding def of V0.
7979
DenseMap<const MachineInstr *, const MachineInstr *> V0Defs;
@@ -116,7 +116,7 @@ bool RISCVVectorPeephole::hasSameEEW(const MachineInstr &User,
116116
// Attempt to reduce the VL of an instruction whose sole use is feeding a
117117
// instruction with a narrower VL. This currently works backwards from the
118118
// user instruction (which might have a smaller VL).
119-
bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) {
119+
bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
120120
// Note that the goal here is a bit multifaceted.
121121
// 1) For store's reducing the VL of the value being stored may help to
122122
// reduce VL toggles. This is somewhat of an artifact of the fact we
@@ -526,18 +526,16 @@ static bool dominates(MachineBasicBlock::const_iterator A,
526526
/// does. Returns false if doesn't dominate and we can't move. \p MO must be in
527527
/// the same basic block as \Src.
528528
bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO,
529-
MachineInstr &Src) {
529+
MachineInstr &Src) const {
530530
assert(MO.getParent()->getParent() == Src.getParent());
531531
if (!MO.isReg() || MO.getReg() == RISCV::NoRegister)
532532
return true;
533533

534534
MachineInstr *Def = MRI->getVRegDef(MO.getReg());
535535
if (Def->getParent() == Src.getParent() && !dominates(Def, Src)) {
536-
MachineInstr *AfterDef = Def->getNextNode();
537-
if (!isSafeToMove(Src, *AfterDef))
536+
if (!isSafeToMove(Src, *Def->getNextNode()))
538537
return false;
539-
V0Defs[&Src] = V0Defs[AfterDef];
540-
Src.moveBefore(AfterDef);
538+
Src.moveBefore(Def->getNextNode());
541539
}
542540

543541
return true;
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -verify-machineinstrs | FileCheck %s
3+
4+
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
5+
target triple = "riscv64-unknown-linux-gnu"
6+
7+
define void @m(<vscale x 4 x i1> %0) #0 {
8+
; CHECK-LABEL: m:
9+
; CHECK: # %bb.0: # %entry
10+
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu
11+
; CHECK-NEXT: vmv.v.i v8, 0
12+
; CHECK-NEXT: vlse32.v v8, (zero), zero, v0.t
13+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
14+
; CHECK-NEXT: vse32.v v8, (zero)
15+
; CHECK-NEXT: ret
16+
entry:
17+
%broadcast.splatinsert184 = insertelement <vscale x 4 x ptr> zeroinitializer, ptr null, i64 0
18+
%broadcast.splat185 = shufflevector <vscale x 4 x ptr> %broadcast.splatinsert184, <vscale x 4 x ptr> zeroinitializer, <vscale x 4 x i32> zeroinitializer
19+
%wide.masked.gather186 = tail call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %broadcast.splat185, i32 4, <vscale x 4 x i1> %0, <vscale x 4 x i32> zeroinitializer)
20+
%predphi187 = select <vscale x 4 x i1> %0, <vscale x 4 x i32> %wide.masked.gather186, <vscale x 4 x i32> zeroinitializer
21+
%1 = extractelement <vscale x 4 x i32> %predphi187, i32 0
22+
store i32 %1, ptr null, align 4
23+
ret void
24+
}
25+
26+
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
27+
declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr>, i32 immarg, <vscale x 4 x i1>, <vscale x 4 x i32>) #1
28+
29+
attributes #0 = { "target-features"="+64bit,+d,+f,+relax,+v,+xsifivecdiscarddlone,+zicsr,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-a,-b,-c,-e,-experimental-smctr,-experimental-smmpm,-experimental-smnpm,-experimental-ssctr,-experimental-ssnpm,-experimental-sspm,-experimental-supm,-experimental-zacas,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-h,-m,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcsrind,-smepmp,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-za64rs,-zaamo,-zabha,-zalrsc,-zama16b,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfbfmin,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zic64b,-zicbom,-zicbop,-zicboz,-ziccamoa,-ziccif,-zicclsm,-ziccrse,-zicntr,-zicond,-zifencei,-zihintntl,-zihintpause,-zihpm,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-ztso,-zvbb,-zvbc,-zvfbfmin,-zvfbfwma,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" }
30+
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(read) }

0 commit comments

Comments
 (0)