Skip to content

Commit ab43bf6

Browse files
committed
AMDGPU/InsertWaitcnt: Consistently use uint32_t for scores / time points
Summary: There is one obsolete reference to using -1 as an indication of "unknown", but this isn't actually used anywhere. Using unsigned makes robust wrapping checks easier. Reviewers: msearles, rampitec, scott.linder, kanarayan Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, llvm-commits, tpr, t-tye, hakzsam Differential Revision: https://reviews.llvm.org/D54230 llvm-svn: 347852
1 parent f96456c commit ab43bf6

File tree

1 file changed

+49
-55
lines changed

1 file changed

+49
-55
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 49 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,9 @@ iterator_range<enum_iterator<InstCounterType>> inst_counter_types() {
112112
using RegInterval = std::pair<signed, signed>;
113113

114114
struct {
115-
int32_t VmcntMax;
116-
int32_t ExpcntMax;
117-
int32_t LgkmcntMax;
115+
uint32_t VmcntMax;
116+
uint32_t ExpcntMax;
117+
uint32_t LgkmcntMax;
118118
int32_t NumVGPRsMax;
119119
int32_t NumSGPRsMax;
120120
} HardwareLimits;
@@ -194,7 +194,7 @@ class BlockWaitcntBrackets {
194194

195195
~BlockWaitcntBrackets() = default;
196196

197-
static int32_t getWaitCountMax(InstCounterType T) {
197+
static uint32_t getWaitCountMax(InstCounterType T) {
198198
switch (T) {
199199
case VM_CNT:
200200
return HardwareLimits.VmcntMax;
@@ -208,33 +208,33 @@ class BlockWaitcntBrackets {
208208
return 0;
209209
}
210210

211-
void setScoreLB(InstCounterType T, int32_t Val) {
211+
void setScoreLB(InstCounterType T, uint32_t Val) {
212212
assert(T < NUM_INST_CNTS);
213213
if (T >= NUM_INST_CNTS)
214214
return;
215215
ScoreLBs[T] = Val;
216216
}
217217

218-
void setScoreUB(InstCounterType T, int32_t Val) {
218+
void setScoreUB(InstCounterType T, uint32_t Val) {
219219
assert(T < NUM_INST_CNTS);
220220
if (T >= NUM_INST_CNTS)
221221
return;
222222
ScoreUBs[T] = Val;
223223
if (T == EXP_CNT) {
224-
int32_t UB = (int)(ScoreUBs[T] - getWaitCountMax(EXP_CNT));
225-
if (ScoreLBs[T] < UB)
224+
uint32_t UB = ScoreUBs[T] - getWaitCountMax(EXP_CNT);
225+
if (ScoreLBs[T] < UB && UB < ScoreUBs[T])
226226
ScoreLBs[T] = UB;
227227
}
228228
}
229229

230-
int32_t getScoreLB(InstCounterType T) const {
230+
uint32_t getScoreLB(InstCounterType T) const {
231231
assert(T < NUM_INST_CNTS);
232232
if (T >= NUM_INST_CNTS)
233233
return 0;
234234
return ScoreLBs[T];
235235
}
236236

237-
int32_t getScoreUB(InstCounterType T) const {
237+
uint32_t getScoreUB(InstCounterType T) const {
238238
assert(T < NUM_INST_CNTS);
239239
if (T >= NUM_INST_CNTS)
240240
return 0;
@@ -251,7 +251,7 @@ class BlockWaitcntBrackets {
251251
return EXP_CNT;
252252
}
253253

254-
void setRegScore(int GprNo, InstCounterType T, int32_t Val) {
254+
void setRegScore(int GprNo, InstCounterType T, uint32_t Val) {
255255
if (GprNo < NUM_ALL_VGPRS) {
256256
if (GprNo > VgprUB) {
257257
VgprUB = GprNo;
@@ -266,7 +266,7 @@ class BlockWaitcntBrackets {
266266
}
267267
}
268268

269-
int32_t getRegScore(int GprNo, InstCounterType T) {
269+
uint32_t getRegScore(int GprNo, InstCounterType T) {
270270
if (GprNo < NUM_ALL_VGPRS) {
271271
return VgprScores[T][GprNo];
272272
}
@@ -291,15 +291,15 @@ class BlockWaitcntBrackets {
291291

292292
void setExpScore(const MachineInstr *MI, const SIInstrInfo *TII,
293293
const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI,
294-
unsigned OpNo, int32_t Val);
294+
unsigned OpNo, uint32_t Val);
295295

296296
int32_t getMaxVGPR() const { return VgprUB; }
297297
int32_t getMaxSGPR() const { return SgprUB; }
298298

299299
bool counterOutOfOrder(InstCounterType T) const;
300300
bool simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const;
301301
bool simplifyWaitcnt(InstCounterType T, unsigned &Count) const;
302-
void determineWait(InstCounterType T, int ScoreToWait,
302+
void determineWait(InstCounterType T, uint32_t ScoreToWait,
303303
AMDGPU::Waitcnt &Wait) const;
304304
void applyWaitcnt(const AMDGPU::Waitcnt &Wait);
305305
void applyWaitcnt(InstCounterType T, unsigned Count);
@@ -342,19 +342,19 @@ class BlockWaitcntBrackets {
342342
const GCNSubtarget *ST = nullptr;
343343
bool RevisitLoop = false;
344344
int32_t PostOrder = 0;
345-
int32_t ScoreLBs[NUM_INST_CNTS] = {0};
346-
int32_t ScoreUBs[NUM_INST_CNTS] = {0};
345+
uint32_t ScoreLBs[NUM_INST_CNTS] = {0};
346+
uint32_t ScoreUBs[NUM_INST_CNTS] = {0};
347347
uint32_t PendingEvents = 0;
348348
bool MixedPendingEvents[NUM_INST_CNTS] = {false};
349349
// Remember the last flat memory operation.
350-
int32_t LastFlat[NUM_INST_CNTS] = {0};
350+
uint32_t LastFlat[NUM_INST_CNTS] = {0};
351351
// wait_cnt scores for every vgpr.
352352
// Keep track of the VgprUB and SgprUB to make merge at join efficient.
353353
int32_t VgprUB = 0;
354354
int32_t SgprUB = 0;
355-
int32_t VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS];
355+
uint32_t VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS];
356356
// Wait cnt scores for every sgpr, only lgkmcnt is relevant.
357-
int32_t SgprScores[SQ_MAX_PGM_SGPRS] = {0};
357+
uint32_t SgprScores[SQ_MAX_PGM_SGPRS] = {0};
358358
};
359359

360360
// This is a per-loop-region object that records waitcnt status at the end of
@@ -527,7 +527,7 @@ void BlockWaitcntBrackets::setExpScore(const MachineInstr *MI,
527527
const SIInstrInfo *TII,
528528
const SIRegisterInfo *TRI,
529529
const MachineRegisterInfo *MRI,
530-
unsigned OpNo, int32_t Val) {
530+
unsigned OpNo, uint32_t Val) {
531531
RegInterval Interval = getRegInterval(MI, TII, MRI, TRI, OpNo, false);
532532
LLVM_DEBUG({
533533
const MachineOperand &Opnd = MI->getOperand(OpNo);
@@ -544,7 +544,9 @@ void BlockWaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
544544
WaitEventType E, MachineInstr &Inst) {
545545
const MachineRegisterInfo &MRIA = *MRI;
546546
InstCounterType T = eventCounter(E);
547-
int32_t CurrScore = getScoreUB(T) + 1;
547+
uint32_t CurrScore = getScoreUB(T) + 1;
548+
if (CurrScore == 0)
549+
report_fatal_error("InsertWaitcnt score wraparound");
548550
// PendingEvents and ScoreUB need to be update regardless if this event
549551
// changes the score of a register or not.
550552
// Examples including vm_cnt when buffer-store or lgkm_cnt when send-message.
@@ -683,8 +685,8 @@ void BlockWaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
683685
void BlockWaitcntBrackets::print(raw_ostream &OS) {
684686
OS << '\n';
685687
for (auto T : inst_counter_types()) {
686-
int LB = getScoreLB(T);
687-
int UB = getScoreUB(T);
688+
uint32_t LB = getScoreLB(T);
689+
uint32_t UB = getScoreUB(T);
688690

689691
switch (T) {
690692
case VM_CNT:
@@ -704,10 +706,10 @@ void BlockWaitcntBrackets::print(raw_ostream &OS) {
704706
if (LB < UB) {
705707
// Print vgpr scores.
706708
for (int J = 0; J <= getMaxVGPR(); J++) {
707-
int RegScore = getRegScore(J, T);
709+
uint32_t RegScore = getRegScore(J, T);
708710
if (RegScore <= LB)
709711
continue;
710-
int RelScore = RegScore - LB - 1;
712+
uint32_t RelScore = RegScore - LB - 1;
711713
if (J < SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS) {
712714
OS << RelScore << ":v" << J << " ";
713715
} else {
@@ -717,10 +719,10 @@ void BlockWaitcntBrackets::print(raw_ostream &OS) {
717719
// Also need to print sgpr scores for lgkm_cnt.
718720
if (T == LGKM_CNT) {
719721
for (int J = 0; J <= getMaxSGPR(); J++) {
720-
int RegScore = getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
722+
uint32_t RegScore = getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
721723
if (RegScore <= LB)
722724
continue;
723-
int RelScore = RegScore - LB - 1;
725+
uint32_t RelScore = RegScore - LB - 1;
724726
OS << RelScore << ":s" << J << " ";
725727
}
726728
}
@@ -740,30 +742,22 @@ bool BlockWaitcntBrackets::simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const {
740742

741743
bool BlockWaitcntBrackets::simplifyWaitcnt(InstCounterType T,
742744
unsigned &Count) const {
743-
const int32_t LB = getScoreLB(T);
744-
const int32_t UB = getScoreUB(T);
745-
if (Count < (unsigned)UB && UB - (int32_t)Count > LB)
745+
const uint32_t LB = getScoreLB(T);
746+
const uint32_t UB = getScoreUB(T);
747+
if (Count < UB && UB - Count > LB)
746748
return true;
747749

748750
Count = ~0u;
749751
return false;
750752
}
751753

752-
void BlockWaitcntBrackets::determineWait(InstCounterType T, int ScoreToWait,
754+
void BlockWaitcntBrackets::determineWait(InstCounterType T,
755+
uint32_t ScoreToWait,
753756
AMDGPU::Waitcnt &Wait) const {
754-
if (ScoreToWait == -1) {
755-
// The score to wait is unknown. This implies that it was not encountered
756-
// during the path of the CFG walk done during the current traversal but
757-
// may be seen on a different path. Emit an s_wait counter with a
758-
// conservative value of 0 for the counter.
759-
addWait(Wait, T, 0);
760-
return;
761-
}
762-
763757
// If the score of src_operand falls within the bracket, we need an
764758
// s_waitcnt instruction.
765-
const int32_t LB = getScoreLB(T);
766-
const int32_t UB = getScoreUB(T);
759+
const uint32_t LB = getScoreLB(T);
760+
const uint32_t UB = getScoreUB(T);
767761
if ((UB >= ScoreToWait) && (ScoreToWait > LB)) {
768762
if ((T == VM_CNT || T == LGKM_CNT) &&
769763
hasPendingFlat() &&
@@ -790,13 +784,13 @@ void BlockWaitcntBrackets::applyWaitcnt(const AMDGPU::Waitcnt &Wait) {
790784
}
791785

792786
void BlockWaitcntBrackets::applyWaitcnt(InstCounterType T, unsigned Count) {
793-
const int32_t UB = getScoreUB(T);
794-
if (Count >= (unsigned)UB)
787+
const uint32_t UB = getScoreUB(T);
788+
if (Count >= UB)
795789
return;
796790
if (Count != 0) {
797791
if (counterOutOfOrder(T))
798792
return;
799-
setScoreLB(T, std::max(getScoreLB(T), UB - (int32_t)Count));
793+
setScoreLB(T, std::max(getScoreLB(T), UB - Count));
800794
} else {
801795
setScoreLB(T, UB);
802796
MixedPendingEvents[T] = false;
@@ -1235,8 +1229,8 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(
12351229
// this merged score bracket is used when adding waitcnts to the Block
12361230
void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
12371231
BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&Block].get();
1238-
int32_t MaxPending[NUM_INST_CNTS] = {0};
1239-
int32_t MaxFlat[NUM_INST_CNTS] = {0};
1232+
uint32_t MaxPending[NUM_INST_CNTS] = {0};
1233+
uint32_t MaxFlat[NUM_INST_CNTS] = {0};
12401234

12411235
// For single basic block loops, we need to retain the Block's
12421236
// score bracket to have accurate Pred info. So, make a copy of Block's
@@ -1264,7 +1258,7 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
12641258
if (!Visited)
12651259
continue;
12661260
for (auto T : inst_counter_types()) {
1267-
int span =
1261+
uint32_t span =
12681262
PredScoreBrackets->getScoreUB(T) - PredScoreBrackets->getScoreLB(T);
12691263
MaxPending[T] = std::max(MaxPending[T], span);
12701264
span =
@@ -1291,27 +1285,27 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
12911285

12921286
// Now merge the gpr_reg_score information
12931287
for (auto T : inst_counter_types()) {
1294-
int PredLB = PredScoreBrackets->getScoreLB(T);
1295-
int PredUB = PredScoreBrackets->getScoreUB(T);
1288+
uint32_t PredLB = PredScoreBrackets->getScoreLB(T);
1289+
uint32_t PredUB = PredScoreBrackets->getScoreUB(T);
12961290
if (PredLB < PredUB) {
1297-
int PredScale = MaxPending[T] - PredUB;
1291+
uint32_t PredScale = MaxPending[T] - PredUB;
12981292
// Merge vgpr scores.
12991293
for (int J = 0; J <= PredScoreBrackets->getMaxVGPR(); J++) {
1300-
int PredRegScore = PredScoreBrackets->getRegScore(J, T);
1294+
uint32_t PredRegScore = PredScoreBrackets->getRegScore(J, T);
13011295
if (PredRegScore <= PredLB)
13021296
continue;
1303-
int NewRegScore = PredScale + PredRegScore;
1297+
uint32_t NewRegScore = PredScale + PredRegScore;
13041298
ScoreBrackets->setRegScore(
13051299
J, T, std::max(ScoreBrackets->getRegScore(J, T), NewRegScore));
13061300
}
13071301
// Also need to merge sgpr scores for lgkm_cnt.
13081302
if (T == LGKM_CNT) {
13091303
for (int J = 0; J <= PredScoreBrackets->getMaxSGPR(); J++) {
1310-
int PredRegScore =
1304+
uint32_t PredRegScore =
13111305
PredScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
13121306
if (PredRegScore <= PredLB)
13131307
continue;
1314-
int NewRegScore = PredScale + PredRegScore;
1308+
uint32_t NewRegScore = PredScale + PredRegScore;
13151309
ScoreBrackets->setRegScore(
13161310
J + NUM_ALL_VGPRS, LGKM_CNT,
13171311
std::max(

0 commit comments

Comments
 (0)