@@ -880,24 +880,14 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
880
880
// Start with an assumption that there is no need to emit.
881
881
unsigned int EmitWaitcnt = 0 ;
882
882
883
- // No need to wait before phi. If a phi-move exists, then the wait should
884
- // has been inserted before the move. If a phi-move does not exist, then
885
- // wait should be inserted before the real use. The same is true for
886
- // sc-merge. It is not a coincident that all these cases correspond to the
887
- // instructions that are skipped in the assembling loop.
888
- bool NeedLineMapping = false ; // TODO: Check on this.
889
-
890
883
// ForceEmitZeroWaitcnt: force a single s_waitcnt 0 due to hw bug
891
884
bool ForceEmitZeroWaitcnt = false ;
892
885
893
886
setForceEmitWaitcnt ();
894
887
bool IsForceEmitWaitcnt = isForceEmitWaitcnt ();
895
888
896
- if (MI.isDebugInstr () &&
897
- // TODO: any other opcode?
898
- !NeedLineMapping) {
889
+ if (MI.isDebugInstr ())
899
890
return ;
900
- }
901
891
902
892
// See if an s_waitcnt is forced at block entry, or is needed at
903
893
// program end.
@@ -1141,7 +1131,6 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
1141
1131
if (EmitWaitcnt || IsForceEmitWaitcnt) {
1142
1132
int CntVal[NUM_INST_CNTS];
1143
1133
1144
- bool UseDefaultWaitcntStrategy = true ;
1145
1134
if (ForceEmitZeroWaitcnt || ForceEmitZeroWaitcnts) {
1146
1135
// Force all waitcnts to 0.
1147
1136
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
@@ -1151,10 +1140,7 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
1151
1140
CntVal[VM_CNT] = 0 ;
1152
1141
CntVal[EXP_CNT] = 0 ;
1153
1142
CntVal[LGKM_CNT] = 0 ;
1154
- UseDefaultWaitcntStrategy = false ;
1155
- }
1156
-
1157
- if (UseDefaultWaitcntStrategy) {
1143
+ } else {
1158
1144
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
1159
1145
T = (enum InstCounterType)(T + 1 )) {
1160
1146
if (EmitWaitcnt & CNT_MASK (T)) {
@@ -1178,95 +1164,89 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
1178
1164
}
1179
1165
}
1180
1166
1181
- // If we are not waiting on any counter we can skip the wait altogether.
1182
- if (EmitWaitcnt != 0 || IsForceEmitWaitcnt) {
1183
- MachineInstr *OldWaitcnt = ScoreBrackets->getWaitcnt ();
1184
- int Imm = (!OldWaitcnt) ? 0 : OldWaitcnt->getOperand (0 ).getImm ();
1185
- if (!OldWaitcnt ||
1186
- (AMDGPU::decodeVmcnt (IV, Imm) !=
1187
- (CntVal[VM_CNT] & AMDGPU::getVmcntBitMask (IV))) ||
1188
- (AMDGPU::decodeExpcnt (IV, Imm) !=
1189
- (CntVal[EXP_CNT] & AMDGPU::getExpcntBitMask (IV))) ||
1190
- (AMDGPU::decodeLgkmcnt (IV, Imm) !=
1191
- (CntVal[LGKM_CNT] & AMDGPU::getLgkmcntBitMask (IV)))) {
1192
- MachineLoop *ContainingLoop = MLI->getLoopFor (MI.getParent ());
1193
- if (ContainingLoop) {
1194
- MachineBasicBlock *TBB = ContainingLoop->getHeader ();
1195
- BlockWaitcntBrackets *ScoreBracket =
1196
- BlockWaitcntBracketsMap[TBB].get ();
1197
- if (!ScoreBracket) {
1198
- assert (!BlockVisitedSet.count (TBB));
1199
- BlockWaitcntBracketsMap[TBB] =
1200
- llvm::make_unique<BlockWaitcntBrackets>(ST);
1201
- ScoreBracket = BlockWaitcntBracketsMap[TBB].get ();
1202
- }
1203
- ScoreBracket->setRevisitLoop (true );
1204
- LLVM_DEBUG (dbgs ()
1205
- << " set-revisit2: Block"
1206
- << ContainingLoop->getHeader ()->getNumber () << ' \n ' ;);
1167
+ MachineInstr *OldWaitcnt = ScoreBrackets->getWaitcnt ();
1168
+ int Imm = (!OldWaitcnt) ? 0 : OldWaitcnt->getOperand (0 ).getImm ();
1169
+ if (!OldWaitcnt ||
1170
+ (AMDGPU::decodeVmcnt (IV, Imm) !=
1171
+ (CntVal[VM_CNT] & AMDGPU::getVmcntBitMask (IV))) ||
1172
+ (AMDGPU::decodeExpcnt (IV, Imm) !=
1173
+ (CntVal[EXP_CNT] & AMDGPU::getExpcntBitMask (IV))) ||
1174
+ (AMDGPU::decodeLgkmcnt (IV, Imm) !=
1175
+ (CntVal[LGKM_CNT] & AMDGPU::getLgkmcntBitMask (IV)))) {
1176
+ MachineLoop *ContainingLoop = MLI->getLoopFor (MI.getParent ());
1177
+ if (ContainingLoop) {
1178
+ MachineBasicBlock *TBB = ContainingLoop->getHeader ();
1179
+ BlockWaitcntBrackets *ScoreBracket = BlockWaitcntBracketsMap[TBB].get ();
1180
+ if (!ScoreBracket) {
1181
+ assert (!BlockVisitedSet.count (TBB));
1182
+ BlockWaitcntBracketsMap[TBB] =
1183
+ llvm::make_unique<BlockWaitcntBrackets>(ST);
1184
+ ScoreBracket = BlockWaitcntBracketsMap[TBB].get ();
1207
1185
}
1186
+ ScoreBracket->setRevisitLoop (true );
1187
+ LLVM_DEBUG (dbgs () << " set-revisit2: Block"
1188
+ << ContainingLoop->getHeader ()->getNumber () << ' \n ' ;);
1208
1189
}
1190
+ }
1209
1191
1210
- // Update an existing waitcount, or make a new one.
1211
- unsigned Enc = AMDGPU::encodeWaitcnt (IV,
1192
+ // Update an existing waitcount, or make a new one.
1193
+ unsigned Enc = AMDGPU::encodeWaitcnt (IV,
1212
1194
ForceEmitWaitcnt[VM_CNT] ? 0 : CntVal[VM_CNT],
1213
1195
ForceEmitWaitcnt[EXP_CNT] ? 0 : CntVal[EXP_CNT],
1214
1196
ForceEmitWaitcnt[LGKM_CNT] ? 0 : CntVal[LGKM_CNT]);
1215
- // We don't remove waitcnts that existed prior to the waitcnt
1216
- // pass. Check if the waitcnt to-be-inserted can be avoided
1217
- // or if the prev waitcnt can be updated.
1218
- bool insertSWaitInst = true ;
1219
- for (MachineBasicBlock::iterator I = MI.getIterator (),
1220
- B = MI.getParent ()->begin ();
1221
- insertSWaitInst && I != B; --I) {
1222
- if (I == MI.getIterator ())
1223
- continue ;
1197
+ // We don't remove waitcnts that existed prior to the waitcnt
1198
+ // pass. Check if the waitcnt to-be-inserted can be avoided
1199
+ // or if the prev waitcnt can be updated.
1200
+ bool insertSWaitInst = true ;
1201
+ for (MachineBasicBlock::iterator I = MI.getIterator (),
1202
+ B = MI.getParent ()->begin ();
1203
+ insertSWaitInst && I != B; --I) {
1204
+ if (I == MI.getIterator ())
1205
+ continue ;
1224
1206
1225
- switch (I->getOpcode ()) {
1226
- case AMDGPU::S_WAITCNT:
1227
- if (isWaitcntStronger (I->getOperand (0 ).getImm (), Enc))
1228
- insertSWaitInst = false ;
1229
- else if (!OldWaitcnt) {
1230
- OldWaitcnt = &*I;
1231
- Enc = combineWaitcnt (I->getOperand (0 ).getImm (), Enc);
1232
- }
1233
- break ;
1234
- // TODO: skip over instructions which never require wait.
1207
+ switch (I->getOpcode ()) {
1208
+ case AMDGPU::S_WAITCNT:
1209
+ if (isWaitcntStronger (I->getOperand (0 ).getImm (), Enc))
1210
+ insertSWaitInst = false ;
1211
+ else if (!OldWaitcnt) {
1212
+ OldWaitcnt = &*I;
1213
+ Enc = combineWaitcnt (I->getOperand (0 ).getImm (), Enc);
1235
1214
}
1236
1215
break ;
1216
+ // TODO: skip over instructions which never require wait.
1237
1217
}
1238
- if (insertSWaitInst) {
1239
- if (OldWaitcnt && OldWaitcnt->getOpcode () == AMDGPU::S_WAITCNT) {
1240
- if (ForceEmitZeroWaitcnts)
1241
- LLVM_DEBUG (
1242
- dbgs ()
1243
- << " Force emit s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)\n " );
1244
- if (IsForceEmitWaitcnt)
1245
- LLVM_DEBUG (dbgs ()
1246
- << " Force emit a s_waitcnt due to debug counter\n " );
1247
-
1248
- OldWaitcnt->getOperand (0 ).setImm (Enc);
1249
- if (!OldWaitcnt->getParent ())
1250
- MI.getParent ()->insert (MI, OldWaitcnt);
1251
-
1252
- LLVM_DEBUG (dbgs () << " updateWaitcntInBlock\n "
1253
- << " Old Instr: " << MI << ' \n '
1254
- << " New Instr: " << *OldWaitcnt << ' \n ' );
1255
- } else {
1256
- auto SWaitInst = BuildMI (*MI.getParent (), MI.getIterator (),
1257
- MI.getDebugLoc (), TII->get (AMDGPU::S_WAITCNT))
1218
+ break ;
1219
+ }
1220
+ if (insertSWaitInst) {
1221
+ if (OldWaitcnt) {
1222
+ assert (OldWaitcnt->getOpcode () == AMDGPU::S_WAITCNT);
1223
+ if (ForceEmitZeroWaitcnts)
1224
+ LLVM_DEBUG (dbgs ()
1225
+ << " Force emit s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)\n " );
1226
+ if (IsForceEmitWaitcnt)
1227
+ LLVM_DEBUG (dbgs () << " Force emit a s_waitcnt due to debug counter\n " );
1228
+
1229
+ OldWaitcnt->getOperand (0 ).setImm (Enc);
1230
+ if (!OldWaitcnt->getParent ())
1231
+ MI.getParent ()->insert (MI, OldWaitcnt);
1232
+
1233
+ LLVM_DEBUG (dbgs () << " updateWaitcntInBlock\n "
1234
+ << " Old Instr: " << MI << ' \n '
1235
+ << " New Instr: " << *OldWaitcnt << ' \n ' );
1236
+ } else {
1237
+ auto SWaitInst = BuildMI (*MI.getParent (), MI.getIterator (),
1238
+ MI.getDebugLoc (), TII->get (AMDGPU::S_WAITCNT))
1258
1239
.addImm (Enc);
1259
- TrackedWaitcntSet.insert (SWaitInst);
1240
+ TrackedWaitcntSet.insert (SWaitInst);
1260
1241
1261
- LLVM_DEBUG (dbgs () << " insertWaitcntInBlock\n "
1262
- << " Old Instr: " << MI << ' \n '
1263
- << " New Instr: " << *SWaitInst << ' \n ' );
1264
- }
1242
+ LLVM_DEBUG (dbgs () << " insertWaitcntInBlock\n "
1243
+ << " Old Instr: " << MI << ' \n '
1244
+ << " New Instr: " << *SWaitInst << ' \n ' );
1265
1245
}
1246
+ }
1266
1247
1267
- if (CntVal[EXP_CNT] == 0 ) {
1268
- ScoreBrackets->setMixedExpTypes (false );
1269
- }
1248
+ if (CntVal[EXP_CNT] == 0 ) {
1249
+ ScoreBrackets->setMixedExpTypes (false );
1270
1250
}
1271
1251
}
1272
1252
}
0 commit comments