@@ -69,6 +69,25 @@ static cl::opt<unsigned> ForceEmitZeroFlag(
69
69
70
70
namespace {
71
71
72
+ template <typename EnumT>
73
+ class enum_iterator
74
+ : public iterator_facade_base<enum_iterator<EnumT>,
75
+ std::forward_iterator_tag, const EnumT> {
76
+ EnumT Value;
77
+ public:
78
+ enum_iterator () = default ;
79
+ enum_iterator (EnumT Value) : Value(Value) {}
80
+
81
+ enum_iterator &operator ++() {
82
+ Value = static_cast <EnumT>(Value + 1 );
83
+ return *this ;
84
+ }
85
+
86
+ bool operator ==(const enum_iterator &RHS) const { return Value == RHS.Value ; }
87
+
88
+ EnumT operator *() const { return Value; }
89
+ };
90
+
72
91
// Class of object that encapsulates latest instruction counter score
73
92
// associated with the operand. Used for determining whether
74
93
// s_waitcnt instruction needs to be emited.
@@ -77,6 +96,11 @@ namespace {
77
96
78
97
enum InstCounterType { VM_CNT = 0 , LGKM_CNT, EXP_CNT, NUM_INST_CNTS };
79
98
99
+ iterator_range<enum_iterator<InstCounterType>> inst_counter_types () {
100
+ return make_range (enum_iterator<InstCounterType>(VM_CNT),
101
+ enum_iterator<InstCounterType>(NUM_INST_CNTS));
102
+ }
103
+
80
104
using RegInterval = std::pair<signed , signed >;
81
105
82
106
struct {
@@ -108,6 +132,11 @@ enum WaitEventType {
108
132
NUM_WAIT_EVENTS,
109
133
};
110
134
135
+ iterator_range<enum_iterator<WaitEventType>> wait_event_types () {
136
+ return make_range (enum_iterator<WaitEventType>(VMEM_ACCESS),
137
+ enum_iterator<WaitEventType>(NUM_WAIT_EVENTS));
138
+ }
139
+
111
140
// The mapping is:
112
141
// 0 .. SQ_MAX_PGM_VGPRS-1 real VGPRs
113
142
// SQ_MAX_PGM_VGPRS .. NUM_ALL_VGPRS-1 extra VGPR-like slots
@@ -122,11 +151,6 @@ enum RegisterMapping {
122
151
NUM_ALL_VGPRS = SQ_MAX_PGM_VGPRS + NUM_EXTRA_VGPRS, // Where SGPR starts.
123
152
};
124
153
125
- #define ForAllWaitEventType (w ) \
126
- for (enum WaitEventType w = (enum WaitEventType)0 ; \
127
- (w) < (enum WaitEventType)NUM_WAIT_EVENTS; \
128
- (w) = (enum WaitEventType)((w) + 1 ))
129
-
130
154
void addWait (AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) {
131
155
switch (T) {
132
156
case VM_CNT:
@@ -153,10 +177,8 @@ void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) {
153
177
class BlockWaitcntBrackets {
154
178
public:
155
179
BlockWaitcntBrackets (const GCNSubtarget *SubTarget) : ST(SubTarget) {
156
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
157
- T = (enum InstCounterType)(T + 1 )) {
180
+ for (auto T : inst_counter_types ())
158
181
memset (VgprScores[T], 0 , sizeof (VgprScores[T]));
159
- }
160
182
}
161
183
162
184
~BlockWaitcntBrackets () = default ;
@@ -257,10 +279,8 @@ class BlockWaitcntBrackets {
257
279
memset (ScoreLBs, 0 , sizeof (ScoreLBs));
258
280
memset (ScoreUBs, 0 , sizeof (ScoreUBs));
259
281
memset (EventUBs, 0 , sizeof (EventUBs));
260
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
261
- T = (enum InstCounterType)(T + 1 )) {
282
+ for (auto T : inst_counter_types ())
262
283
memset (VgprScores[T], 0 , sizeof (VgprScores[T]));
263
- }
264
284
memset (SgprScores, 0 , sizeof (SgprScores));
265
285
}
266
286
@@ -426,8 +446,7 @@ class SIInsertWaitcnts : public MachineFunctionPass {
426
446
}
427
447
428
448
bool isForceEmitWaitcnt () const {
429
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
430
- T = (enum InstCounterType)(T + 1 ))
449
+ for (auto T : inst_counter_types ())
431
450
if (ForceEmitWaitcnt[T])
432
451
return true ;
433
452
return false ;
@@ -679,8 +698,7 @@ void BlockWaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
679
698
680
699
void BlockWaitcntBrackets::print (raw_ostream &OS) {
681
700
OS << ' \n ' ;
682
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
683
- T = (enum InstCounterType)(T + 1 )) {
701
+ for (auto T : inst_counter_types ()) {
684
702
int LB = getScoreLB (T);
685
703
int UB = getScoreUB (T);
686
704
@@ -1325,8 +1343,7 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
1325
1343
if (!Visited || PredScoreBrackets->getWaitAtBeginning ()) {
1326
1344
continue ;
1327
1345
}
1328
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
1329
- T = (enum InstCounterType)(T + 1 )) {
1346
+ for (auto T : inst_counter_types ()) {
1330
1347
int span =
1331
1348
PredScoreBrackets->getScoreUB (T) - PredScoreBrackets->getScoreLB (T);
1332
1349
MaxPending[T] = std::max (MaxPending[T], span);
@@ -1367,8 +1384,7 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
1367
1384
#endif
1368
1385
1369
1386
// Now set the current Block's brackets to the largest ending bracket.
1370
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
1371
- T = (enum InstCounterType)(T + 1 )) {
1387
+ for (auto T : inst_counter_types ()) {
1372
1388
ScoreBrackets->setScoreUB (T, MaxPending[T]);
1373
1389
ScoreBrackets->setScoreLB (T, 0 );
1374
1390
ScoreBrackets->setLastFlat (T, MaxFlat[T]);
@@ -1386,8 +1402,7 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
1386
1402
BlockWaitcntBracketsMap[Pred].get ();
1387
1403
1388
1404
// Now merge the gpr_reg_score information
1389
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
1390
- T = (enum InstCounterType)(T + 1 )) {
1405
+ for (auto T : inst_counter_types ()) {
1391
1406
int PredLB = PredScoreBrackets->getScoreLB (T);
1392
1407
int PredUB = PredScoreBrackets->getScoreUB (T);
1393
1408
if (PredLB < PredUB) {
@@ -1420,7 +1435,7 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
1420
1435
}
1421
1436
1422
1437
// Also merge the WaitEvent information.
1423
- ForAllWaitEventType (W ) {
1438
+ for ( auto W : wait_event_types () ) {
1424
1439
enum InstCounterType T = PredScoreBrackets->eventCounter (W);
1425
1440
int PredEventUB = PredScoreBrackets->getEventUB (W);
1426
1441
if (PredEventUB > PredScoreBrackets->getScoreLB (T)) {
@@ -1623,8 +1638,7 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
1623
1638
// generating the precise wait count, just wait on 0.
1624
1639
bool HasPending = false ;
1625
1640
MachineInstr *SWaitInst = WaitcntData->getWaitcnt ();
1626
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
1627
- T = (enum InstCounterType)(T + 1 )) {
1641
+ for (auto T : inst_counter_types ()) {
1628
1642
if (ScoreBrackets->getScoreUB (T) > ScoreBrackets->getScoreLB (T)) {
1629
1643
ScoreBrackets->setScoreLB (T, ScoreBrackets->getScoreUB (T));
1630
1644
HasPending = true ;
@@ -1675,8 +1689,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
1675
1689
const SIMachineFunctionInfo *MFI = MF.getInfo <SIMachineFunctionInfo>();
1676
1690
1677
1691
ForceEmitZeroWaitcnts = ForceEmitZeroFlag;
1678
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
1679
- T = (enum InstCounterType)(T + 1 ))
1692
+ for (auto T : inst_counter_types ())
1680
1693
ForceEmitWaitcnt[T] = false ;
1681
1694
1682
1695
HardwareLimits.VmcntMax = AMDGPU::getVmcntBitMask (IV);
0 commit comments