Skip to content
Open
59 changes: 21 additions & 38 deletions llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,11 @@ static void printLivenessInfo(raw_ostream &OS,
const auto &MRI = BB->getParent()->getRegInfo();

const auto LiveIns = getLiveRegsBefore(*Begin, *LIS);
OS << "LIn RP: " << print(getRegPressure(MRI, LiveIns));
OS << "LIn RP: " << print(getRegPressure(MRI, LiveIns, Begin->getMF()));

const auto BottomMI = End == BB->end() ? std::prev(End) : End;
const auto LiveOuts = getLiveRegsAfter(*BottomMI, *LIS);
OS << "LOt RP: " << print(getRegPressure(MRI, LiveOuts));
OS << "LOt RP: " << print(getRegPressure(MRI, LiveOuts, Begin->getMF()));
}

LLVM_DUMP_METHOD
Expand Down Expand Up @@ -238,11 +238,8 @@ class SchedStrategyStub : public MachineSchedStrategy {

GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C,
StrategyKind S)
: BaseClass(C, std::make_unique<SchedStrategyStub>())
, Context(C)
, Strategy(S)
, UPTracker(*LIS) {
}
: BaseClass(C, std::make_unique<SchedStrategyStub>()), Context(C),
Strategy(S), UPTracker(*LIS, C->MF) {}

// returns max pressure for a region
GCNRegPressure
Expand Down Expand Up @@ -281,7 +278,7 @@ template <typename Range> GCNRegPressure
GCNIterativeScheduler::getSchedulePressure(const Region &R,
Range &&Schedule) const {
auto const BBEnd = R.Begin->getParent()->end();
GCNUpwardRPTracker RPTracker(*LIS);
GCNUpwardRPTracker RPTracker(*LIS, &MF);
if (R.End != BBEnd) {
// R.End points to the boundary instruction but the
// schedule doesn't include it
Expand Down Expand Up @@ -447,11 +444,7 @@ void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
// BestSchedules aren't deleted on fail.
unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
// TODO: assert Regions are sorted descending by pressure
const auto &ST = MF.getSubtarget<GCNSubtarget>();
const unsigned DynamicVGPRBlockSize =
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
const auto Occ =
Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
const auto Occ = Regions.front()->MaxPressure.getOccupancy(MF);
LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc
<< ", current = " << Occ << '\n');

Expand All @@ -460,7 +453,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
// Always build the DAG to add mutations
BuildDAG DAG(*R, *this);

if (R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >= NewOcc)
if (R->MaxPressure.getOccupancy(MF) >= NewOcc)
continue;

LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
Expand All @@ -471,7 +464,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
printSchedRP(dbgs(), R->MaxPressure, MaxRP));

NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST, DynamicVGPRBlockSize));
NewOcc = std::min(NewOcc, MaxRP.getOccupancy(MF));
if (NewOcc <= Occ)
break;

Expand All @@ -488,15 +481,12 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
}

void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
bool TryMaximizeOccupancy) {
const auto &ST = MF.getSubtarget<GCNSubtarget>();
bool TryMaximizeOccupancy) {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
auto TgtOcc = MFI->getMinAllowedOccupancy();
unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize();

sortRegionsByPressure(TgtOcc);
auto Occ =
Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
auto Occ = Regions.front()->MaxPressure.getOccupancy(MF);

bool IsReentry = false;
if (TryMaximizeOccupancy && Occ < TgtOcc) {
Expand Down Expand Up @@ -527,21 +517,19 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
const auto RP = getRegionPressure(*R);
LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));

if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) {
if (RP.getOccupancy(MF) < TgtOcc) {
LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy(
ST, DynamicVGPRBlockSize) >= TgtOcc) {
if (R->BestSchedule.get() &&
R->BestSchedule->MaxPressure.getOccupancy(MF) >= TgtOcc) {
LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
scheduleBest(*R);
} else {
LLVM_DEBUG(dbgs() << ", restoring\n");
Ovr.restoreOrder();
assert(R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >=
TgtOcc);
assert(R->MaxPressure.getOccupancy(MF) >= TgtOcc);
}
}
FinalOccupancy =
std::min(FinalOccupancy, RP.getOccupancy(ST, DynamicVGPRBlockSize));
FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(MF));
}
}
MFI->limitOccupancy(FinalOccupancy);
Expand Down Expand Up @@ -582,16 +570,12 @@ void GCNIterativeScheduler::scheduleMinReg(bool force) {
///////////////////////////////////////////////////////////////////////////////
// ILP scheduler port

void GCNIterativeScheduler::scheduleILP(
bool TryMaximizeOccupancy) {
const auto &ST = MF.getSubtarget<GCNSubtarget>();
void GCNIterativeScheduler::scheduleILP(bool TryMaximizeOccupancy) {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
auto TgtOcc = MFI->getMinAllowedOccupancy();
unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize();

sortRegionsByPressure(TgtOcc);
auto Occ =
Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
auto Occ = Regions.front()->MaxPressure.getOccupancy(MF);

bool IsReentry = false;
if (TryMaximizeOccupancy && Occ < TgtOcc) {
Expand All @@ -612,18 +596,17 @@ void GCNIterativeScheduler::scheduleILP(
const auto RP = getSchedulePressure(*R, ILPSchedule);
LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));

if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) {
if (RP.getOccupancy(MF) < TgtOcc) {
LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy(
ST, DynamicVGPRBlockSize) >= TgtOcc) {
if (R->BestSchedule.get() &&
R->BestSchedule->MaxPressure.getOccupancy(MF) >= TgtOcc) {
LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
scheduleBest(*R);
}
} else {
scheduleRegion(*R, ILPSchedule, RP);
LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
FinalOccupancy =
std::min(FinalOccupancy, RP.getOccupancy(ST, DynamicVGPRBlockSize));
FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(MF));
}
}
MFI->limitOccupancy(FinalOccupancy);
Expand Down
8 changes: 3 additions & 5 deletions llvm/lib/Target/AMDGPU/GCNIterativeScheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,8 @@ class GCNIterativeScheduler : public ScheduleDAGMILive {
}

void swapIGLPMutations(const Region &R, bool IsReentry);
void setBestSchedule(Region &R,
ScheduleRef Schedule,
const GCNRegPressure &MaxRP = GCNRegPressure());
void setBestSchedule(Region &R, ScheduleRef Schedule,
const GCNRegPressure &MaxRP);

void scheduleBest(Region &R);

Expand All @@ -105,8 +104,7 @@ class GCNIterativeScheduler : public ScheduleDAGMILive {
void sortRegionsByPressure(unsigned TargetOcc);

template <typename Range>
void scheduleRegion(Region &R, Range &&Schedule,
const GCNRegPressure &MaxRP = GCNRegPressure());
void scheduleRegion(Region &R, Range &&Schedule, const GCNRegPressure &MaxRP);

unsigned tryMaximizeOccupancy(unsigned TargetOcc =
std::numeric_limits<unsigned>::max());
Expand Down
74 changes: 39 additions & 35 deletions llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,6 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned OtherVGPRForSGPRSpills =
(OtherExcessSGPR + (WaveSize - 1)) / WaveSize;

unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();

// Unified excess pressure conditions, accounting for VGPRs used for SGPR
// spills
unsigned ExcessVGPR =
Expand All @@ -149,22 +147,26 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
0);
// Arch VGPR excess pressure conditions, accounting for VGPRs used for SGPR
// spills
unsigned ExcessArchVGPR = std::max(
static_cast<int>(getVGPRNum(false) + VGPRForSGPRSpills - MaxArchVGPRs),
0);
unsigned AddressableArchVGPRs = ST.getAddressableNumArchVGPRs();
unsigned ExcessArchVGPR =
std::max(static_cast<int>(getVGPRNum(false) + VGPRForSGPRSpills -
AddressableArchVGPRs),
0);
unsigned OtherExcessArchVGPR =
std::max(static_cast<int>(O.getVGPRNum(false) + OtherVGPRForSGPRSpills -
MaxArchVGPRs),
AddressableArchVGPRs),
0);
// AGPR excess pressure conditions
unsigned ExcessAGPR = std::max(
static_cast<int>(ST.hasGFX90AInsts() ? (getAGPRNum() - MaxArchVGPRs)
: (getAGPRNum() - MaxVGPRs)),
0);
unsigned OtherExcessAGPR = std::max(
static_cast<int>(ST.hasGFX90AInsts() ? (O.getAGPRNum() - MaxArchVGPRs)
: (O.getAGPRNum() - MaxVGPRs)),
0);
unsigned ExcessAGPR =
std::max(static_cast<int>(ST.hasGFX90AInsts()
? (getAGPRNum() - AddressableArchVGPRs)
: (getAGPRNum() - MaxVGPRs)),
0);
unsigned OtherExcessAGPR =
std::max(static_cast<int>(ST.hasGFX90AInsts()
? (O.getAGPRNum() - AddressableArchVGPRs)
: (O.getAGPRNum() - MaxVGPRs)),
0);

bool ExcessRP = ExcessSGPR || ExcessVGPR || ExcessArchVGPR || ExcessAGPR;
bool OtherExcessRP = OtherExcessSGPR || OtherExcessVGPR ||
Expand All @@ -186,12 +188,14 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned PureExcessVGPR =
std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs),
0) +
std::max(static_cast<int>(getVGPRNum(false) - MaxArchVGPRs), 0);
std::max(static_cast<int>(getVGPRNum(false) - AddressableArchVGPRs),
0);
unsigned OtherPureExcessVGPR =
std::max(
static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs),
0) +
std::max(static_cast<int>(O.getVGPRNum(false) - MaxArchVGPRs), 0);
std::max(static_cast<int>(O.getVGPRNum(false) - AddressableArchVGPRs),
0);

// If we have a special case where there is a tie in excess VGPR, but one
// of the pressures has VGPR usage from SGPR spills, prefer the pressure
Expand Down Expand Up @@ -229,14 +233,15 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
}

// Give final precedence to lower general RP.
return SGPRImportant ? (getSGPRNum() < O.getSGPRNum()):
(getVGPRNum(ST.hasGFX90AInsts()) <
return SGPRImportant ? (getSGPRNum() < O.getSGPRNum())
: (getVGPRNum(ST.hasGFX90AInsts()) <
O.getVGPRNum(ST.hasGFX90AInsts()));
}

Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST,
unsigned DynamicVGPRBlockSize) {
return Printable([&RP, ST, DynamicVGPRBlockSize](raw_ostream &OS) {
unsigned DynamicVGPRBlockSize,
const MachineFunction *MF) {
return Printable([&RP, ST, DynamicVGPRBlockSize, MF](raw_ostream &OS) {
OS << "VGPRs: " << RP.getArchVGPRNum() << ' '
<< "AGPRs: " << RP.getAGPRNum();
if (ST)
Expand All @@ -250,7 +255,7 @@ Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST,
OS << ", LVGPR WT: " << RP.getVGPRTuplesWeight()
<< ", LSGPR WT: " << RP.getSGPRTuplesWeight();
if (ST)
OS << " -> Occ: " << RP.getOccupancy(*ST, DynamicVGPRBlockSize);
OS << " -> Occ: " << RP.getOccupancy(*MF);
OS << '\n';
});
}
Expand Down Expand Up @@ -473,8 +478,7 @@ GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI,
void GCNRPTracker::reset(const MachineInstr &MI,
const LiveRegSet *LiveRegsCopy,
bool After) {
const MachineFunction &MF = *MI.getMF();
MRI = &MF.getRegInfo();
MRI = &MF->getRegInfo();
if (LiveRegsCopy) {
if (&LiveRegs != LiveRegsCopy)
LiveRegs = *LiveRegsCopy;
Expand All @@ -483,15 +487,15 @@ void GCNRPTracker::reset(const MachineInstr &MI,
: getLiveRegsBefore(MI, LIS);
}

MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs, MF);
}

void GCNRPTracker::reset(const MachineRegisterInfo &MRI_,
const LiveRegSet &LiveRegs_) {
MRI = &MRI_;
LiveRegs = LiveRegs_;
LastTrackedMI = nullptr;
MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_);
MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_, MF);
}

/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
Expand All @@ -517,7 +521,7 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
return;

// Kill all defs.
GCNRegPressure DefPressure, ECDefPressure;
GCNRegPressure DefPressure(MF), ECDefPressure(MF);
bool HasECDefs = false;
for (const MachineOperand &MO : MI.all_defs()) {
if (!MO.getReg().isVirtual())
Expand Down Expand Up @@ -565,7 +569,7 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
MaxPressure = HasECDefs ? max(CurPressure + ECDefPressure, MaxPressure)
: max(CurPressure, MaxPressure);

assert(CurPressure == getRegPressure(*MRI, LiveRegs));
assert(CurPressure == getRegPressure(*MRI, LiveRegs, MF));
}

////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -800,7 +804,7 @@ bool GCNUpwardRPTracker::isValid() const {
return false;
}

auto LISPressure = getRegPressure(*MRI, LISLR);
auto LISPressure = getRegPressure(*MRI, LISLR, MF);
if (LISPressure != CurPressure) {
dbgs() << "GCNUpwardRPTracker error: Pressure sets different\nTracked: "
<< print(CurPressure) << "LIS rpt: " << print(LISPressure);
Expand Down Expand Up @@ -876,8 +880,8 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {

OS << "---\nname: " << MF.getName() << "\nbody: |\n";

auto printRP = [](const GCNRegPressure &RP) {
return Printable([&RP](raw_ostream &OS) {
auto printRP = [&MF](const GCNRegPressure &RP) {
return Printable([&RP, &MF](raw_ostream &OS) {
OS << format(PFX " %-5d", RP.getSGPRNum())
<< format(" %-5d", RP.getVGPRNum(false));
});
Expand Down Expand Up @@ -906,14 +910,14 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
SlotIndex MBBEndSlot = LIS.getSlotIndexes()->getMBBEndIdx(&MBB);

GCNRPTracker::LiveRegSet LiveIn, LiveOut;
GCNRegPressure RPAtMBBEnd;
GCNRegPressure RPAtMBBEnd(&MF);

if (UseDownwardTracker) {
if (MBB.empty()) {
LiveIn = LiveOut = getLiveRegs(MBBStartSlot, LIS, MRI);
RPAtMBBEnd = getRegPressure(MRI, LiveIn);
RPAtMBBEnd = getRegPressure(MRI, LiveIn, &MF);
} else {
GCNDownwardRPTracker RPT(LIS);
GCNDownwardRPTracker RPT(LIS, &MF);
RPT.reset(MBB.front());

LiveIn = RPT.getLiveRegs();
Expand All @@ -928,7 +932,7 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
RPAtMBBEnd = RPT.getPressure();
}
} else {
GCNUpwardRPTracker RPT(LIS);
GCNUpwardRPTracker RPT(LIS, &MF);
RPT.reset(MRI, MBBEndSlot);

LiveOut = RPT.getLiveRegs();
Expand Down Expand Up @@ -977,7 +981,7 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
}
}
OS << PFX " Live-thr:" << llvm::print(LiveThrough, MRI);
OS << printRP(getRegPressure(MRI, LiveThrough)) << '\n';
OS << printRP(getRegPressure(MRI, LiveThrough, &MF)) << '\n';
}
OS << "...\n";
return false;
Expand Down
Loading