@@ -495,13 +495,6 @@ class SIInsertWaitcnts {
495495 bool isVMEMOrFlatVMEM (const MachineInstr &MI) const ;
496496 bool run (MachineFunction &MF);
497497
498- bool isForceEmitWaitcnt () const {
499- for (auto T : inst_counter_types ())
500- if (ForceEmitWaitcnt[T])
501- return true ;
502- return false ;
503- }
504-
505498 void setForceEmitWaitcnt () {
506499// For non-debug builds, ForceEmitWaitcnt has been initialized to false;
507500// For debug builds, get the debug counter info and adjust if need be
@@ -570,10 +563,6 @@ class SIInsertWaitcnts {
570563 return VmemReadMapping[getVmemType (Inst)];
571564 }
572565
573- bool hasXcnt () const { return ST->hasWaitXCnt (); }
574-
575- bool mayAccessVMEMThroughFlat (const MachineInstr &MI) const ;
576- bool mayAccessLDSThroughFlat (const MachineInstr &MI) const ;
577566 bool isVmemAccess (const MachineInstr &MI) const ;
578567 bool generateWaitcntInstBefore (MachineInstr &MI,
579568 WaitcntBrackets &ScoreBrackets,
@@ -591,7 +580,6 @@ class SIInsertWaitcnts {
591580 WaitcntBrackets &ScoreBrackets);
592581 bool insertWaitcntInBlock (MachineFunction &MF, MachineBasicBlock &Block,
593582 WaitcntBrackets &ScoreBrackets);
594- static bool asynchronouslyWritesSCC (unsigned Opcode);
595583};
596584
597585// This objects maintains the current score brackets of each wait counter, and
@@ -1109,7 +1097,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
11091097 setRegScore (FIRST_LDS_VGPR, T, CurrScore);
11101098 }
11111099
1112- if (Context-> asynchronouslyWritesSCC (Inst.getOpcode ())) {
1100+ if (SIInstrInfo::isSBarrierSCCWrite (Inst.getOpcode ())) {
11131101 setRegScore (SCC, T, CurrScore);
11141102 PendingSCCWrite = &Inst;
11151103 }
@@ -1831,12 +1819,6 @@ bool WaitcntGeneratorGFX12Plus::createNewWaitcnt(
18311819 return Modified;
18321820}
18331821
1834- static bool readsVCCZ (const MachineInstr &MI) {
1835- unsigned Opc = MI.getOpcode ();
1836- return (Opc == AMDGPU::S_CBRANCH_VCCNZ || Opc == AMDGPU::S_CBRANCH_VCCZ) &&
1837- !MI.getOperand (1 ).isUndef ();
1838- }
1839-
18401822// / \returns true if the callee inserts an s_waitcnt 0 on function entry.
18411823static bool callWaitsOnFunctionEntry (const MachineInstr &MI) {
18421824 // Currently all conventions wait, but this may not always be the case.
@@ -2061,7 +2043,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
20612043 ScoreBrackets.determineWait (SmemAccessCounter, Interval, Wait);
20622044 }
20632045
2064- if (hasXcnt () && Op.isDef ())
2046+ if (ST-> hasWaitXCnt () && Op.isDef ())
20652047 ScoreBrackets.determineWait (X_CNT, Interval, Wait);
20662048 }
20672049 }
@@ -2087,10 +2069,9 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
20872069 // TODO: Remove this work-around, enable the assert for Bug 457939
20882070 // after fixing the scheduler. Also, the Shader Compiler code is
20892071 // independent of target.
2090- if (readsVCCZ (MI) && ST->hasReadVCCZBug ()) {
2091- if (ScoreBrackets.hasPendingEvent (SMEM_ACCESS)) {
2092- Wait.DsCnt = 0 ;
2093- }
2072+ if (SIInstrInfo::isCBranchVCCZRead (MI) && ST->hasReadVCCZBug () &&
2073+ ScoreBrackets.hasPendingEvent (SMEM_ACCESS)) {
2074+ Wait.DsCnt = 0 ;
20942075 }
20952076
20962077 // Verify that the wait is actually needed.
@@ -2185,75 +2166,11 @@ bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait,
21852166 return Modified;
21862167}
21872168
2188- // This is a flat memory operation. Check to see if it has memory tokens other
2189- // than LDS. Other address spaces supported by flat memory operations involve
2190- // global memory.
2191- bool SIInsertWaitcnts::mayAccessVMEMThroughFlat (const MachineInstr &MI) const {
2192- assert (TII->isFLAT (MI));
2193-
2194- // All flat instructions use the VMEM counter except prefetch.
2195- if (!TII->usesVM_CNT (MI))
2196- return false ;
2197-
2198- // If there are no memory operands then conservatively assume the flat
2199- // operation may access VMEM.
2200- if (MI.memoperands_empty ())
2201- return true ;
2202-
2203- // See if any memory operand specifies an address space that involves VMEM.
2204- // Flat operations only supported FLAT, LOCAL (LDS), or address spaces
2205- // involving VMEM such as GLOBAL, CONSTANT, PRIVATE (SCRATCH), etc. The REGION
2206- // (GDS) address space is not supported by flat operations. Therefore, simply
2207- // return true unless only the LDS address space is found.
2208- for (const MachineMemOperand *Memop : MI.memoperands ()) {
2209- unsigned AS = Memop->getAddrSpace ();
2210- assert (AS != AMDGPUAS::REGION_ADDRESS);
2211- if (AS != AMDGPUAS::LOCAL_ADDRESS)
2212- return true ;
2213- }
2214-
2215- return false ;
2216- }
2217-
2218- // This is a flat memory operation. Check to see if it has memory tokens for
2219- // either LDS or FLAT.
2220- bool SIInsertWaitcnts::mayAccessLDSThroughFlat (const MachineInstr &MI) const {
2221- assert (TII->isFLAT (MI));
2222-
2223- // Flat instruction such as SCRATCH and GLOBAL do not use the lgkm counter.
2224- if (!TII->usesLGKM_CNT (MI))
2225- return false ;
2226-
2227- // If in tgsplit mode then there can be no use of LDS.
2228- if (ST->isTgSplitEnabled ())
2229- return false ;
2230-
2231- // If there are no memory operands then conservatively assume the flat
2232- // operation may access LDS.
2233- if (MI.memoperands_empty ())
2234- return true ;
2235-
2236- // See if any memory operand specifies an address space that involves LDS.
2237- for (const MachineMemOperand *Memop : MI.memoperands ()) {
2238- unsigned AS = Memop->getAddrSpace ();
2239- if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS)
2240- return true ;
2241- }
2242-
2243- return false ;
2244- }
2245-
22462169bool SIInsertWaitcnts::isVmemAccess (const MachineInstr &MI) const {
2247- return (TII->isFLAT (MI) && mayAccessVMEMThroughFlat (MI)) ||
2170+ return (TII->isFLAT (MI) && TII-> mayAccessVMEMThroughFlat (MI)) ||
22482171 (TII->isVMEM (MI) && !AMDGPU::getMUBUFIsBufferInv (MI.getOpcode ()));
22492172}
22502173
2251- static bool isGFX12CacheInvOrWBInst (MachineInstr &Inst) {
2252- auto Opc = Inst.getOpcode ();
2253- return Opc == AMDGPU::GLOBAL_INV || Opc == AMDGPU::GLOBAL_WB ||
2254- Opc == AMDGPU::GLOBAL_WBINV;
2255- }
2256-
22572174// Return true if the next instruction is S_ENDPGM, following fallthrough
22582175// blocks if necessary.
22592176bool SIInsertWaitcnts::isNextENDPGM (MachineBasicBlock::instr_iterator It,
@@ -2331,7 +2248,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
23312248 ScoreBrackets->updateByEvent (TII, TRI, MRI, LDS_ACCESS, Inst);
23322249 }
23332250 } else if (TII->isFLAT (Inst)) {
2334- if (isGFX12CacheInvOrWBInst (Inst)) {
2251+ if (SIInstrInfo:: isGFX12CacheInvOrWBInst (Inst. getOpcode () )) {
23352252 ScoreBrackets->updateByEvent (TII, TRI, MRI, getVmemWaitEventType (Inst),
23362253 Inst);
23372254 return ;
@@ -2341,14 +2258,14 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
23412258
23422259 int FlatASCount = 0 ;
23432260
2344- if (mayAccessVMEMThroughFlat (Inst)) {
2261+ if (TII-> mayAccessVMEMThroughFlat (Inst)) {
23452262 ++FlatASCount;
23462263 IsVMEMAccess = true ;
23472264 ScoreBrackets->updateByEvent (TII, TRI, MRI, getVmemWaitEventType (Inst),
23482265 Inst);
23492266 }
23502267
2351- if (mayAccessLDSThroughFlat (Inst)) {
2268+ if (TII-> mayAccessLDSThroughFlat (Inst)) {
23522269 ++FlatASCount;
23532270 ScoreBrackets->updateByEvent (TII, TRI, MRI, LDS_ACCESS, Inst);
23542271 }
@@ -2394,7 +2311,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
23942311 ScoreBrackets->updateByEvent (TII, TRI, MRI, EXP_POS_ACCESS, Inst);
23952312 else
23962313 ScoreBrackets->updateByEvent (TII, TRI, MRI, EXP_GPR_LOCK, Inst);
2397- } else if (asynchronouslyWritesSCC (Inst.getOpcode ())) {
2314+ } else if (SIInstrInfo::isSBarrierSCCWrite (Inst.getOpcode ())) {
23982315 ScoreBrackets->updateByEvent (TII, TRI, MRI, SCC_WRITE, Inst);
23992316 } else {
24002317 switch (Inst.getOpcode ()) {
@@ -2413,7 +2330,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
24132330 }
24142331 }
24152332
2416- if (!hasXcnt ())
2333+ if (!ST-> hasWaitXCnt ())
24172334 return ;
24182335
24192336 if (IsVMEMAccess)
@@ -2516,12 +2433,6 @@ static bool isWaitInstr(MachineInstr &Inst) {
25162433 counterTypeForInstr (Opcode).has_value ();
25172434}
25182435
2519- bool SIInsertWaitcnts::asynchronouslyWritesSCC (unsigned Opcode) {
2520- return Opcode == AMDGPU::S_BARRIER_LEAVE ||
2521- Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM ||
2522- Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0;
2523- }
2524-
25252436// Generate s_waitcnt instructions where needed.
25262437bool SIInsertWaitcnts::insertWaitcntInBlock (MachineFunction &MF,
25272438 MachineBasicBlock &Block,
@@ -2578,7 +2489,7 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
25782489 OldWaitcntInstr = nullptr ;
25792490
25802491 // Restore vccz if it's not known to be correct already.
2581- bool RestoreVCCZ = !VCCZCorrect && readsVCCZ (Inst);
2492+ bool RestoreVCCZ = !VCCZCorrect && SIInstrInfo::isCBranchVCCZRead (Inst);
25822493
25832494 // Don't examine operands unless we need to track vccz correctness.
25842495 if (ST->hasReadVCCZBug () || !ST->partialVCCWritesUpdateVCCZ ()) {
@@ -2701,7 +2612,7 @@ bool SIInsertWaitcnts::isPreheaderToFlush(
27012612
27022613bool SIInsertWaitcnts::isVMEMOrFlatVMEM (const MachineInstr &MI) const {
27032614 if (SIInstrInfo::isFLAT (MI))
2704- return mayAccessVMEMThroughFlat (MI);
2615+ return TII-> mayAccessVMEMThroughFlat (MI);
27052616 return SIInstrInfo::isVMEM (MI);
27062617}
27072618
0 commit comments