-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[AMDGPU][NFC] Avoid copying MachineOperands #166293
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-amdgpu Author: None (LU-JOHN) ChangesAvoid copying machine operands. Full diff: https://github.com/llvm/llvm-project/pull/166293.diff 6 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 0c977416f1793..74e216d4da457 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -469,10 +469,14 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
const TargetRegisterClass &HalfRC
= IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
- MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));
- MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));
- MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
- MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
+ const MachineOperand &Lo1(
+ getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));
+ const MachineOperand &Lo2(
+ getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));
+ const MachineOperand &Hi1(
+ getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
+ const MachineOperand &Hi2(
+ getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
Register DstLo = MRI->createVirtualRegister(&HalfRC);
Register DstHi = MRI->createVirtualRegister(&HalfRC);
@@ -6753,7 +6757,7 @@ bool AMDGPUInstructionSelector::selectSGetBarrierState(
MachineInstr &I, Intrinsic::ID IntrID) const {
MachineBasicBlock *MBB = I.getParent();
const DebugLoc &DL = I.getDebugLoc();
- MachineOperand BarOp = I.getOperand(2);
+ const MachineOperand &BarOp = I.getOperand(2);
std::optional<int64_t> BarValImm =
getIConstantVRegSExtVal(BarOp.getReg(), *MRI);
@@ -6806,8 +6810,8 @@ bool AMDGPUInstructionSelector::selectNamedBarrierInit(
MachineInstr &I, Intrinsic::ID IntrID) const {
MachineBasicBlock *MBB = I.getParent();
const DebugLoc &DL = I.getDebugLoc();
- MachineOperand BarOp = I.getOperand(1);
- MachineOperand CntOp = I.getOperand(2);
+ const MachineOperand &BarOp = I.getOperand(1);
+ const MachineOperand &CntOp = I.getOperand(2);
// BarID = (BarOp >> 4) & 0x3F
Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
diff --git a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp
index 4deb2a9485e4d..62172a0bb89db 100644
--- a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp
@@ -136,7 +136,7 @@ bool GCNPreRAOptimizationsImpl::processReg(Register Reg) {
continue;
if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
- MachineOperand DefSrcMO = Def.getOperand(1);
+ const MachineOperand &DefSrcMO = Def.getOperand(1);
// Immediates are not an issue and can be propagated in
// postrapseudos pass. Only handle cases where defining
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 8bb28084159e8..fbadee1980159 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5435,14 +5435,14 @@ static MachineBasicBlock *Expand64BitScalarArithmetic(MachineInstr &MI,
Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
- MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm(
+ const MachineOperand &Src0Sub0 = TII->buildExtractSubRegOrImm(
MI, MRI, Src0, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
- MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm(
+ const MachineOperand &Src0Sub1 = TII->buildExtractSubRegOrImm(
MI, MRI, Src0, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
- MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm(
+ const MachineOperand &Src1Sub0 = TII->buildExtractSubRegOrImm(
MI, MRI, Src1, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
- MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm(
+ const MachineOperand &Src1Sub1 = TII->buildExtractSubRegOrImm(
MI, MRI, Src1, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
unsigned LoOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
@@ -5603,9 +5603,9 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
const TargetRegisterClass *SrcSubRC =
TRI->getSubRegisterClass(SrcRC, AMDGPU::sub0);
- MachineOperand Op1L = TII->buildExtractSubRegOrImm(
+ const MachineOperand &Op1L = TII->buildExtractSubRegOrImm(
MI, MRI, MI.getOperand(1), SrcRC, AMDGPU::sub0, SrcSubRC);
- MachineOperand Op1H = TII->buildExtractSubRegOrImm(
+ const MachineOperand &Op1H = TII->buildExtractSubRegOrImm(
MI, MRI, MI.getOperand(1), SrcRC, AMDGPU::sub1, SrcSubRC);
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DestSub0)
@@ -5661,9 +5661,9 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
const TargetRegisterClass *Src1SubRC =
TRI->getSubRegisterClass(Src1RC, AMDGPU::sub0);
- MachineOperand Op1L = TII->buildExtractSubRegOrImm(
+ const MachineOperand &Op1L = TII->buildExtractSubRegOrImm(
MI, MRI, MI.getOperand(1), Src1RC, AMDGPU::sub0, Src1SubRC);
- MachineOperand Op1H = TII->buildExtractSubRegOrImm(
+ const MachineOperand &Op1H = TII->buildExtractSubRegOrImm(
MI, MRI, MI.getOperand(1), Src1RC, AMDGPU::sub1, Src1SubRC);
if (Opc == AMDGPU::S_SUB_U64_PSEUDO) {
@@ -5799,9 +5799,9 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
const TargetRegisterClass *SrcSubRC =
TRI->getSubRegisterClass(SrcRC, AMDGPU::sub0);
- MachineOperand Op1L = TII->buildExtractSubRegOrImm(
+ const MachineOperand &Op1L = TII->buildExtractSubRegOrImm(
MI, MRI, MI.getOperand(1), SrcRC, AMDGPU::sub0, SrcSubRC);
- MachineOperand Op1H = TII->buildExtractSubRegOrImm(
+ const MachineOperand &Op1H = TII->buildExtractSubRegOrImm(
MI, MRI, MI.getOperand(1), SrcRC, AMDGPU::sub1, SrcSubRC);
// lane value input should be in an sgpr
BuildMI(*ComputeLoop, I, DL, TII->get(AMDGPU::V_READLANE_B32),
@@ -5839,10 +5839,10 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
const TargetRegisterClass *VSubRegClass =
TRI->getSubRegisterClass(VregClass, AMDGPU::sub0);
Register AccumulatorVReg = MRI.createVirtualRegister(VregClass);
- MachineOperand SrcReg0Sub0 =
+ const MachineOperand &SrcReg0Sub0 =
TII->buildExtractSubRegOrImm(MI, MRI, Accumulator->getOperand(0),
VregClass, AMDGPU::sub0, VSubRegClass);
- MachineOperand SrcReg0Sub1 =
+ const MachineOperand &SrcReg0Sub1 =
TII->buildExtractSubRegOrImm(MI, MRI, Accumulator->getOperand(0),
VregClass, AMDGPU::sub1, VSubRegClass);
BuildMI(*ComputeLoop, I, DL, TII->get(TargetOpcode::REG_SEQUENCE),
@@ -6028,14 +6028,14 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
const TargetRegisterClass *Src1SubRC =
TRI->getSubRegisterClass(Src1RC, AMDGPU::sub1);
- MachineOperand SrcReg0Sub0 = TII->buildExtractSubRegOrImm(
+ const MachineOperand &SrcReg0Sub0 = TII->buildExtractSubRegOrImm(
MI, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
- MachineOperand SrcReg1Sub0 = TII->buildExtractSubRegOrImm(
+ const MachineOperand &SrcReg1Sub0 = TII->buildExtractSubRegOrImm(
MI, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC);
- MachineOperand SrcReg0Sub1 = TII->buildExtractSubRegOrImm(
+ const MachineOperand &SrcReg0Sub1 = TII->buildExtractSubRegOrImm(
MI, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
- MachineOperand SrcReg1Sub1 = TII->buildExtractSubRegOrImm(
+ const MachineOperand &SrcReg1Sub1 = TII->buildExtractSubRegOrImm(
MI, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);
unsigned LoOpc =
@@ -6104,9 +6104,9 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
const TargetRegisterClass *Src2RC = MRI.getRegClass(Src2.getReg());
const TargetRegisterClass *SubRC =
TRI->getSubRegisterClass(Src2RC, AMDGPU::sub0);
- MachineOperand Src2Sub0 = TII->buildExtractSubRegOrImm(
+ const MachineOperand &Src2Sub0 = TII->buildExtractSubRegOrImm(
MII, MRI, Src2, Src2RC, AMDGPU::sub0, SubRC);
- MachineOperand Src2Sub1 = TII->buildExtractSubRegOrImm(
+ const MachineOperand &Src2Sub1 = TII->buildExtractSubRegOrImm(
MII, MRI, Src2, Src2RC, AMDGPU::sub1, SubRC);
Register Src2_32 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
@@ -6252,14 +6252,14 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
const TargetRegisterClass *Src1SubRC =
TRI->getSubRegisterClass(Src1RC, AMDGPU::sub1);
- MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm(
+ const MachineOperand &Src0Sub0 = TII->buildExtractSubRegOrImm(
MI, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
- MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm(
+ const MachineOperand &Src1Sub0 = TII->buildExtractSubRegOrImm(
MI, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC);
- MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm(
+ const MachineOperand &Src0Sub1 = TII->buildExtractSubRegOrImm(
MI, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
- MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm(
+ const MachineOperand &Src1Sub1 = TII->buildExtractSubRegOrImm(
MI, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy).addReg(SrcCond);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 45f591927b86e..3c4a3ebe0be55 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -7945,7 +7945,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
}
legalizeOperands(*NewInstr, MDT);
int SCCIdx = Inst.findRegisterDefOperandIdx(AMDGPU::SCC, /*TRI=*/nullptr);
- MachineOperand SCCOp = Inst.getOperand(SCCIdx);
+ const MachineOperand &SCCOp = Inst.getOperand(SCCIdx);
addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
Inst.eraseFromParent();
return;
@@ -7985,7 +7985,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
legalizeOperandsVALUt16(*NewInstr, MRI);
legalizeOperands(*NewInstr, MDT);
int SCCIdx = Inst.findRegisterDefOperandIdx(AMDGPU::SCC, /*TRI=*/nullptr);
- MachineOperand SCCOp = Inst.getOperand(SCCIdx);
+ const MachineOperand &SCCOp = Inst.getOperand(SCCIdx);
addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
Inst.eraseFromParent();
return;
@@ -8183,7 +8183,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
AMDGPU::OpName::src0_modifiers) >= 0)
NewInstr.addImm(0);
if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::src0)) {
- MachineOperand Src = Inst.getOperand(1);
+ const MachineOperand &Src = Inst.getOperand(1);
NewInstr->addOperand(Src);
}
@@ -8555,8 +8555,8 @@ void SIInstrInfo::splitScalar64BitUnaryOp(SIInstrWorklist &Worklist,
const TargetRegisterClass *Src0SubRC =
RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
- MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
- AMDGPU::sub0, Src0SubRC);
+ const MachineOperand &SrcReg0Sub0 =
+ buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
@@ -8566,8 +8566,8 @@ void SIInstrInfo::splitScalar64BitUnaryOp(SIInstrWorklist &Worklist,
Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
- MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
- AMDGPU::sub1, Src0SubRC);
+ const MachineOperand &SrcReg0Sub1 =
+ buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
Register DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
@@ -8625,13 +8625,13 @@ void SIInstrInfo::splitScalarSMulU64(SIInstrWorklist &Worklist,
// First, we extract the low 32-bit and high 32-bit values from each of the
// operands.
- MachineOperand Op0L =
+ const MachineOperand &Op0L =
buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
- MachineOperand Op1L =
+ const MachineOperand &Op1L =
buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC);
- MachineOperand Op0H =
+ const MachineOperand &Op0H =
buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
- MachineOperand Op1H =
+ const MachineOperand &Op1H =
buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);
// The multilication is done as follows:
@@ -8734,9 +8734,9 @@ void SIInstrInfo::splitScalarSMulPseudo(SIInstrWorklist &Worklist,
// First, we extract the low 32-bit and high 32-bit values from each of the
// operands.
- MachineOperand Op0L =
+ const MachineOperand &Op0L =
buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
- MachineOperand Op1L =
+ const MachineOperand &Op1L =
buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC);
unsigned Opc = Inst.getOpcode();
@@ -8795,14 +8795,14 @@ void SIInstrInfo::splitScalar64BitBinaryOp(SIInstrWorklist &Worklist,
const TargetRegisterClass *Src1SubRC =
RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
- MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
- AMDGPU::sub0, Src0SubRC);
- MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
- AMDGPU::sub0, Src1SubRC);
- MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
- AMDGPU::sub1, Src0SubRC);
- MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
- AMDGPU::sub1, Src1SubRC);
+ const MachineOperand &SrcReg0Sub0 =
+ buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
+ const MachineOperand &SrcReg1Sub0 =
+ buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC);
+ const MachineOperand &SrcReg0Sub1 =
+ buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
+ const MachineOperand &SrcReg1Sub1 =
+ buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
@@ -8899,10 +8899,10 @@ void SIInstrInfo::splitScalar64BitBCNT(SIInstrWorklist &Worklist,
const TargetRegisterClass *SrcSubRC =
RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
- MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
- AMDGPU::sub0, SrcSubRC);
- MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
- AMDGPU::sub1, SrcSubRC);
+ const MachineOperand &SrcRegSub0 =
+ buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub0, SrcSubRC);
+ const MachineOperand &SrcRegSub1 =
+ buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub1, SrcSubRC);
BuildMI(MBB, MII, DL, InstDesc, MidReg).add(SrcRegSub0).addImm(0);
@@ -9003,9 +9003,9 @@ void SIInstrInfo::splitScalar64BitCountOp(SIInstrWorklist &Worklist,
const TargetRegisterClass *SrcSubRC =
RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
- MachineOperand SrcRegSub0 =
+ const MachineOperand &SrcRegSub0 =
buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub0, SrcSubRC);
- MachineOperand SrcRegSub1 =
+ const MachineOperand &SrcRegSub1 =
buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub1, SrcSubRC);
Register MidReg1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -9199,7 +9199,7 @@ void SIInstrInfo::movePackToVALU(SIInstrWorklist &Worklist,
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
-void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op,
+void SIInstrInfo::addSCCDefUsersToVALUWorklist(const MachineOperand &Op,
MachineInstr &SCCDefInst,
SIInstrWorklist &Worklist,
Register NewCond) const {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index dc23a21f959ce..0643b532ea04c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -172,7 +172,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI,
SIInstrWorklist &Worklist) const;
- void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
+ void addSCCDefUsersToVALUWorklist(const MachineOperand &Op,
MachineInstr &SCCDefInst,
SIInstrWorklist &Worklist,
Register NewCond = Register()) const;
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 7431e111ec862..71fdf5d6e54af 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -640,7 +640,7 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
}
void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) {
- MachineOperand DstOp = I.getOperand(0);
+ const MachineOperand &DstOp = I.getOperand(0);
uint16_t UnpackedOpcode = mapToUnpackedOpcode(I);
assert(UnpackedOpcode != std::numeric_limits<uint16_t>::max() &&
|
| MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1)); | ||
| MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1)); | ||
| const MachineOperand &Lo1( | ||
| getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can't do this, this is a reference to an immediately dead temporary
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed dangling refs.
Signed-off-by: John Lu <John.Lu@amd.com>
|
@LU-JOHN I checked this and I see no other uses of dangling references. There are a lot of cases where the instruction is deleted, creating a dangling reference, but that always happens right before the scope ends, so those are not bugs and are not likely to result in bugs during maintenance. LGTM. |
Avoid copying machine operands.