Skip to content

Commit 61396ff

Browse files
committed
AMDGPU/InsertWaitcnts: Cleanup some old cruft (NFCI)
Summary: Remove redundant logic and simplify control flow. Reviewers: msearles, rampitec, scott.linder, kanarayan Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D54086 llvm-svn: 346363
1 parent 0ab31c9 commit 61396ff

File tree

1 file changed

+71
-91
lines changed

1 file changed

+71
-91
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 71 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -880,24 +880,14 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
880880
// Start with an assumption that there is no need to emit.
881881
unsigned int EmitWaitcnt = 0;
882882

883-
// No need to wait before phi. If a phi-move exists, then the wait should
884-
// has been inserted before the move. If a phi-move does not exist, then
885-
// wait should be inserted before the real use. The same is true for
886-
// sc-merge. It is not a coincident that all these cases correspond to the
887-
// instructions that are skipped in the assembling loop.
888-
bool NeedLineMapping = false; // TODO: Check on this.
889-
890883
// ForceEmitZeroWaitcnt: force a single s_waitcnt 0 due to hw bug
891884
bool ForceEmitZeroWaitcnt = false;
892885

893886
setForceEmitWaitcnt();
894887
bool IsForceEmitWaitcnt = isForceEmitWaitcnt();
895888

896-
if (MI.isDebugInstr() &&
897-
// TODO: any other opcode?
898-
!NeedLineMapping) {
889+
if (MI.isDebugInstr())
899890
return;
900-
}
901891

902892
// See if an s_waitcnt is forced at block entry, or is needed at
903893
// program end.
@@ -1141,7 +1131,6 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
11411131
if (EmitWaitcnt || IsForceEmitWaitcnt) {
11421132
int CntVal[NUM_INST_CNTS];
11431133

1144-
bool UseDefaultWaitcntStrategy = true;
11451134
if (ForceEmitZeroWaitcnt || ForceEmitZeroWaitcnts) {
11461135
// Force all waitcnts to 0.
11471136
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
@@ -1151,10 +1140,7 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
11511140
CntVal[VM_CNT] = 0;
11521141
CntVal[EXP_CNT] = 0;
11531142
CntVal[LGKM_CNT] = 0;
1154-
UseDefaultWaitcntStrategy = false;
1155-
}
1156-
1157-
if (UseDefaultWaitcntStrategy) {
1143+
} else {
11581144
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
11591145
T = (enum InstCounterType)(T + 1)) {
11601146
if (EmitWaitcnt & CNT_MASK(T)) {
@@ -1178,95 +1164,89 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
11781164
}
11791165
}
11801166

1181-
// If we are not waiting on any counter we can skip the wait altogether.
1182-
if (EmitWaitcnt != 0 || IsForceEmitWaitcnt) {
1183-
MachineInstr *OldWaitcnt = ScoreBrackets->getWaitcnt();
1184-
int Imm = (!OldWaitcnt) ? 0 : OldWaitcnt->getOperand(0).getImm();
1185-
if (!OldWaitcnt ||
1186-
(AMDGPU::decodeVmcnt(IV, Imm) !=
1187-
(CntVal[VM_CNT] & AMDGPU::getVmcntBitMask(IV))) ||
1188-
(AMDGPU::decodeExpcnt(IV, Imm) !=
1189-
(CntVal[EXP_CNT] & AMDGPU::getExpcntBitMask(IV))) ||
1190-
(AMDGPU::decodeLgkmcnt(IV, Imm) !=
1191-
(CntVal[LGKM_CNT] & AMDGPU::getLgkmcntBitMask(IV)))) {
1192-
MachineLoop *ContainingLoop = MLI->getLoopFor(MI.getParent());
1193-
if (ContainingLoop) {
1194-
MachineBasicBlock *TBB = ContainingLoop->getHeader();
1195-
BlockWaitcntBrackets *ScoreBracket =
1196-
BlockWaitcntBracketsMap[TBB].get();
1197-
if (!ScoreBracket) {
1198-
assert(!BlockVisitedSet.count(TBB));
1199-
BlockWaitcntBracketsMap[TBB] =
1200-
llvm::make_unique<BlockWaitcntBrackets>(ST);
1201-
ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
1202-
}
1203-
ScoreBracket->setRevisitLoop(true);
1204-
LLVM_DEBUG(dbgs()
1205-
<< "set-revisit2: Block"
1206-
<< ContainingLoop->getHeader()->getNumber() << '\n';);
1167+
MachineInstr *OldWaitcnt = ScoreBrackets->getWaitcnt();
1168+
int Imm = (!OldWaitcnt) ? 0 : OldWaitcnt->getOperand(0).getImm();
1169+
if (!OldWaitcnt ||
1170+
(AMDGPU::decodeVmcnt(IV, Imm) !=
1171+
(CntVal[VM_CNT] & AMDGPU::getVmcntBitMask(IV))) ||
1172+
(AMDGPU::decodeExpcnt(IV, Imm) !=
1173+
(CntVal[EXP_CNT] & AMDGPU::getExpcntBitMask(IV))) ||
1174+
(AMDGPU::decodeLgkmcnt(IV, Imm) !=
1175+
(CntVal[LGKM_CNT] & AMDGPU::getLgkmcntBitMask(IV)))) {
1176+
MachineLoop *ContainingLoop = MLI->getLoopFor(MI.getParent());
1177+
if (ContainingLoop) {
1178+
MachineBasicBlock *TBB = ContainingLoop->getHeader();
1179+
BlockWaitcntBrackets *ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
1180+
if (!ScoreBracket) {
1181+
assert(!BlockVisitedSet.count(TBB));
1182+
BlockWaitcntBracketsMap[TBB] =
1183+
llvm::make_unique<BlockWaitcntBrackets>(ST);
1184+
ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
12071185
}
1186+
ScoreBracket->setRevisitLoop(true);
1187+
LLVM_DEBUG(dbgs() << "set-revisit2: Block"
1188+
<< ContainingLoop->getHeader()->getNumber() << '\n';);
12081189
}
1190+
}
12091191

1210-
// Update an existing waitcount, or make a new one.
1211-
unsigned Enc = AMDGPU::encodeWaitcnt(IV,
1192+
// Update an existing waitcount, or make a new one.
1193+
unsigned Enc = AMDGPU::encodeWaitcnt(IV,
12121194
ForceEmitWaitcnt[VM_CNT] ? 0 : CntVal[VM_CNT],
12131195
ForceEmitWaitcnt[EXP_CNT] ? 0 : CntVal[EXP_CNT],
12141196
ForceEmitWaitcnt[LGKM_CNT] ? 0 : CntVal[LGKM_CNT]);
1215-
// We don't remove waitcnts that existed prior to the waitcnt
1216-
// pass. Check if the waitcnt to-be-inserted can be avoided
1217-
// or if the prev waitcnt can be updated.
1218-
bool insertSWaitInst = true;
1219-
for (MachineBasicBlock::iterator I = MI.getIterator(),
1220-
B = MI.getParent()->begin();
1221-
insertSWaitInst && I != B; --I) {
1222-
if (I == MI.getIterator())
1223-
continue;
1197+
// We don't remove waitcnts that existed prior to the waitcnt
1198+
// pass. Check if the waitcnt to-be-inserted can be avoided
1199+
// or if the prev waitcnt can be updated.
1200+
bool insertSWaitInst = true;
1201+
for (MachineBasicBlock::iterator I = MI.getIterator(),
1202+
B = MI.getParent()->begin();
1203+
insertSWaitInst && I != B; --I) {
1204+
if (I == MI.getIterator())
1205+
continue;
12241206

1225-
switch (I->getOpcode()) {
1226-
case AMDGPU::S_WAITCNT:
1227-
if (isWaitcntStronger(I->getOperand(0).getImm(), Enc))
1228-
insertSWaitInst = false;
1229-
else if (!OldWaitcnt) {
1230-
OldWaitcnt = &*I;
1231-
Enc = combineWaitcnt(I->getOperand(0).getImm(), Enc);
1232-
}
1233-
break;
1234-
// TODO: skip over instructions which never require wait.
1207+
switch (I->getOpcode()) {
1208+
case AMDGPU::S_WAITCNT:
1209+
if (isWaitcntStronger(I->getOperand(0).getImm(), Enc))
1210+
insertSWaitInst = false;
1211+
else if (!OldWaitcnt) {
1212+
OldWaitcnt = &*I;
1213+
Enc = combineWaitcnt(I->getOperand(0).getImm(), Enc);
12351214
}
12361215
break;
1216+
// TODO: skip over instructions which never require wait.
12371217
}
1238-
if (insertSWaitInst) {
1239-
if (OldWaitcnt && OldWaitcnt->getOpcode() == AMDGPU::S_WAITCNT) {
1240-
if (ForceEmitZeroWaitcnts)
1241-
LLVM_DEBUG(
1242-
dbgs()
1243-
<< "Force emit s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)\n");
1244-
if (IsForceEmitWaitcnt)
1245-
LLVM_DEBUG(dbgs()
1246-
<< "Force emit a s_waitcnt due to debug counter\n");
1247-
1248-
OldWaitcnt->getOperand(0).setImm(Enc);
1249-
if (!OldWaitcnt->getParent())
1250-
MI.getParent()->insert(MI, OldWaitcnt);
1251-
1252-
LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n"
1253-
<< "Old Instr: " << MI << '\n'
1254-
<< "New Instr: " << *OldWaitcnt << '\n');
1255-
} else {
1256-
auto SWaitInst = BuildMI(*MI.getParent(), MI.getIterator(),
1257-
MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
1218+
break;
1219+
}
1220+
if (insertSWaitInst) {
1221+
if (OldWaitcnt) {
1222+
assert(OldWaitcnt->getOpcode() == AMDGPU::S_WAITCNT);
1223+
if (ForceEmitZeroWaitcnts)
1224+
LLVM_DEBUG(dbgs()
1225+
<< "Force emit s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)\n");
1226+
if (IsForceEmitWaitcnt)
1227+
LLVM_DEBUG(dbgs() << "Force emit a s_waitcnt due to debug counter\n");
1228+
1229+
OldWaitcnt->getOperand(0).setImm(Enc);
1230+
if (!OldWaitcnt->getParent())
1231+
MI.getParent()->insert(MI, OldWaitcnt);
1232+
1233+
LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n"
1234+
<< "Old Instr: " << MI << '\n'
1235+
<< "New Instr: " << *OldWaitcnt << '\n');
1236+
} else {
1237+
auto SWaitInst = BuildMI(*MI.getParent(), MI.getIterator(),
1238+
MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
12581239
.addImm(Enc);
1259-
TrackedWaitcntSet.insert(SWaitInst);
1240+
TrackedWaitcntSet.insert(SWaitInst);
12601241

1261-
LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n"
1262-
<< "Old Instr: " << MI << '\n'
1263-
<< "New Instr: " << *SWaitInst << '\n');
1264-
}
1242+
LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n"
1243+
<< "Old Instr: " << MI << '\n'
1244+
<< "New Instr: " << *SWaitInst << '\n');
12651245
}
1246+
}
12661247

1267-
if (CntVal[EXP_CNT] == 0) {
1268-
ScoreBrackets->setMixedExpTypes(false);
1269-
}
1248+
if (CntVal[EXP_CNT] == 0) {
1249+
ScoreBrackets->setMixedExpTypes(false);
12701250
}
12711251
}
12721252
}

0 commit comments

Comments
 (0)