Skip to content

Commit 113052b

Browse files
committed
[AMDGPU] Prefer lower total register usage in regions with spilling
Change-Id: Ia5c434b0945bdcbc357c5e06c3164118fc91df25
1 parent 435e75d commit 113052b

9 files changed

+707
-226
lines changed

llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -409,9 +409,8 @@ void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
409409

410410
// Sort recorded regions by pressure - highest at the front
411411
void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
412-
const auto &ST = MF.getSubtarget<GCNSubtarget>();
413-
llvm::sort(Regions, [&ST, TargetOcc](const Region *R1, const Region *R2) {
414-
return R2->MaxPressure.less(ST, R1->MaxPressure, TargetOcc);
412+
llvm::sort(Regions, [this, TargetOcc](const Region *R1, const Region *R2) {
413+
return R2->MaxPressure.less(MF, R1->MaxPressure, TargetOcc);
415414
});
416415
}
417416

@@ -517,26 +516,25 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
517516
// Minimal Register Strategy
518517

519518
void GCNIterativeScheduler::scheduleMinReg(bool force) {
520-
const auto &ST = MF.getSubtarget<GCNSubtarget>();
521519
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
522520
const auto TgtOcc = MFI->getOccupancy();
523521
sortRegionsByPressure(TgtOcc);
524522

525523
auto MaxPressure = Regions.front()->MaxPressure;
526524
for (auto *R : Regions) {
527-
if (!force && R->MaxPressure.less(ST, MaxPressure, TgtOcc))
525+
if (!force && R->MaxPressure.less(MF, MaxPressure, TgtOcc))
528526
break;
529527

530528
BuildDAG DAG(*R, *this);
531529
const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
532530

533531
const auto RP = getSchedulePressure(*R, MinSchedule);
534-
LLVM_DEBUG(if (R->MaxPressure.less(ST, RP, TgtOcc)) {
532+
LLVM_DEBUG(if (R->MaxPressure.less(MF, RP, TgtOcc)) {
535533
dbgs() << "\nWarning: Pressure becomes worse after minreg!";
536534
printSchedRP(dbgs(), R->MaxPressure, RP);
537535
});
538536

539-
if (!force && MaxPressure.less(ST, RP, TgtOcc))
537+
if (!force && MaxPressure.less(MF, RP, TgtOcc))
540538
break;
541539

542540
scheduleRegion(*R, MinSchedule, RP);

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

Lines changed: 92 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,10 @@ void GCNRegPressure::inc(unsigned Reg,
8888
}
8989
}
9090

91-
bool GCNRegPressure::less(const GCNSubtarget &ST,
92-
const GCNRegPressure& O,
91+
bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
9392
unsigned MaxOccupancy) const {
93+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
94+
9495
const auto SGPROcc = std::min(MaxOccupancy,
9596
ST.getOccupancyWithNumSGPRs(getSGPRNum()));
9697
const auto VGPROcc =
@@ -104,18 +105,103 @@ bool GCNRegPressure::less(const GCNSubtarget &ST,
104105

105106
const auto Occ = std::min(SGPROcc, VGPROcc);
106107
const auto OtherOcc = std::min(OtherSGPROcc, OtherVGPROcc);
108+
109+
// Give first precedence to the better occupancy.
107110
if (Occ != OtherOcc)
108111
return Occ > OtherOcc;
109112

113+
unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
114+
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
115+
116+
// SGPR excess pressure conditions
117+
unsigned ExcessSGPR = std::max(static_cast<int>(getSGPRNum() - MaxSGPRs), 0);
118+
unsigned OtherExcessSGPR =
119+
std::max(static_cast<int>(O.getSGPRNum() - MaxSGPRs), 0);
120+
121+
auto WaveSize = ST.getWavefrontSize();
122+
// The number of virtual VGPRs required to handle excess SGPR
123+
unsigned VGPRForSGPRSpills = (ExcessSGPR + (WaveSize - 1)) / WaveSize;
124+
unsigned OtherVGPRForSGPRSpills =
125+
(OtherExcessSGPR + (WaveSize - 1)) / WaveSize;
126+
127+
unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
128+
129+
// Unified excess pressure conditions, accounting for VGPRs used for SGPR
130+
// spills
131+
unsigned ExcessVGPR =
132+
std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts()) +
133+
VGPRForSGPRSpills - MaxVGPRs),
134+
0);
135+
unsigned OtherExcessVGPR =
136+
std::max(static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts()) +
137+
OtherVGPRForSGPRSpills - MaxVGPRs),
138+
0);
139+
// Arch VGPR excess pressure conditions, accounting for VGPRs used for SGPR
140+
// spills
141+
unsigned ExcessArchVGPR = std::max(
142+
static_cast<int>(getVGPRNum(false) + VGPRForSGPRSpills - MaxArchVGPRs),
143+
0);
144+
unsigned OtherExcessArchVGPR =
145+
std::max(static_cast<int>(O.getVGPRNum(false) + OtherVGPRForSGPRSpills -
146+
MaxArchVGPRs),
147+
0);
148+
// AGPR excess pressure conditions
149+
unsigned ExcessAGPR = std::max(
150+
static_cast<int>(ST.hasGFX90AInsts() ? (getAGPRNum() - MaxArchVGPRs)
151+
: (getAGPRNum() - MaxVGPRs)),
152+
0);
153+
unsigned OtherExcessAGPR = std::max(
154+
static_cast<int>(ST.hasGFX90AInsts() ? (O.getAGPRNum() - MaxArchVGPRs)
155+
: (O.getAGPRNum() - MaxVGPRs)),
156+
0);
157+
158+
bool ExcessRP = ExcessSGPR || ExcessVGPR || ExcessArchVGPR || ExcessAGPR;
159+
bool OtherExcessRP = OtherExcessSGPR || OtherExcessVGPR ||
160+
OtherExcessArchVGPR || OtherExcessAGPR;
161+
162+
// Give second precedence to the reduced number of spills to hold the register
163+
// pressure.
164+
if (ExcessRP || OtherExcessRP) {
165+
// The difference in excess VGPR pressure, after including VGPRs used for
166+
// SGPR spills
167+
int VGPRDiff = ((OtherExcessVGPR + OtherExcessArchVGPR + OtherExcessAGPR) -
168+
(ExcessVGPR + ExcessArchVGPR + ExcessAGPR));
169+
170+
int SGPRDiff = OtherExcessSGPR - ExcessSGPR;
171+
172+
if (VGPRDiff != 0)
173+
return VGPRDiff > 0;
174+
if (SGPRDiff != 0) {
175+
unsigned PureExcessVGPR =
176+
std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs),
177+
0) +
178+
std::max(static_cast<int>(getVGPRNum(false) - MaxArchVGPRs), 0);
179+
unsigned OtherPureExcessVGPR =
180+
std::max(
181+
static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs),
182+
0) +
183+
std::max(static_cast<int>(O.getVGPRNum(false) - MaxArchVGPRs), 0);
184+
185+
// If we have a special case where there is a tie in excess VGPR, but one
186+
// of the pressures has VGPR usage from SGPR spills, prefer the pressure
187+
// with SGPR spills.
188+
if (PureExcessVGPR != OtherPureExcessVGPR)
189+
return SGPRDiff < 0;
190+
// If both pressures have the same excess pressure before and after
191+
// accounting for SGPR spills, prefer fewer SGPR spills.
192+
return SGPRDiff > 0;
193+
}
194+
}
195+
110196
bool SGPRImportant = SGPROcc < VGPROcc;
111197
const bool OtherSGPRImportant = OtherSGPROcc < OtherVGPROcc;
112198

113-
// if both pressures disagree on what is more important compare vgprs
199+
// If both pressures disagree on what is more important compare vgprs.
114200
if (SGPRImportant != OtherSGPRImportant) {
115201
SGPRImportant = false;
116202
}
117203

118-
// compare large regs pressure
204+
// Give third precedence to lower register tuple pressure.
119205
bool SGPRFirst = SGPRImportant;
120206
for (int I = 2; I > 0; --I, SGPRFirst = !SGPRFirst) {
121207
if (SGPRFirst) {
@@ -130,6 +216,8 @@ bool GCNRegPressure::less(const GCNSubtarget &ST,
130216
return VW < OtherVW;
131217
}
132218
}
219+
220+
// Give final precedence to lower general RP.
133221
return SGPRImportant ? (getSGPRNum() < O.getSGPRNum()):
134222
(getVGPRNum(ST.hasGFX90AInsts()) <
135223
O.getVGPRNum(ST.hasGFX90AInsts()));

llvm/lib/Target/AMDGPU/GCNRegPressure.h

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,20 @@ struct GCNRegPressure {
7474
return getOccupancy(ST) > O.getOccupancy(ST);
7575
}
7676

77-
bool less(const GCNSubtarget &ST, const GCNRegPressure& O,
78-
unsigned MaxOccupancy = std::numeric_limits<unsigned>::max()) const;
77+
/// Compares \p this GCNRegpressure to \p O, returning true if \p this is
78+
/// less. Since GCNRegpressure contains different types of pressures, and due
79+
/// to target-specific pecularities (e.g. we care about occupancy rather than
80+
/// raw register usage), we determine if \p this GCNRegPressure is less than
81+
/// \p O based on the following tiered comparisons (in order order of
82+
/// precedence):
83+
/// 1. Better occupancy
84+
/// 2. Less spilling (first preference to VGPR spills, then to SGPR spills)
85+
/// 3. Less tuple register pressure (first preference to VGPR tuples if we
86+
/// determine that SGPR pressure is not important)
87+
/// 4. Less raw register pressure (first preference to VGPR tuples if we
88+
/// determine that SGPR pressure is not important)
89+
bool less(const MachineFunction &MF, const GCNRegPressure &O,
90+
unsigned MaxOccupancy = std::numeric_limits<unsigned>::max()) const;
7991

8092
bool operator==(const GCNRegPressure &O) const {
8193
return std::equal(&Value[0], &Value[TOTAL_KINDS], O.Value);

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -977,6 +977,7 @@ void GCNSchedStage::checkScheduling() {
977977

978978
unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
979979
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
980+
980981
if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
981982
PressureAfter.getAGPRNum() > MaxVGPRs ||
982983
PressureAfter.getSGPRNum() > MaxSGPRs) {
@@ -1199,9 +1200,8 @@ bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
11991200
}
12001201

12011202
bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
1202-
if (WavesAfter <= MFI.getMinWavesPerEU() &&
1203-
!PressureAfter.less(ST, PressureBefore) &&
1204-
isRegionWithExcessRP()) {
1203+
if (WavesAfter <= MFI.getMinWavesPerEU() && isRegionWithExcessRP() &&
1204+
!PressureAfter.less(MF, PressureBefore)) {
12051205
LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
12061206
return true;
12071207
}

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1382,6 +1382,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
13821382
return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
13831383
}
13841384

1385+
/// \returns Addressable number of architectural VGPRs supported by the
1386+
/// subtarget.
1387+
unsigned getAddressableNumArchVGPRs() const {
1388+
return AMDGPU::IsaInfo::getAddressableNumArchVGPRs(this);
1389+
}
1390+
13851391
/// \returns Addressable number of VGPRs supported by the subtarget.
13861392
unsigned getAddressableNumVGPRs() const {
13871393
return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1107,10 +1107,12 @@ unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
11071107
return IsWave32 ? 1024 : 512;
11081108
}
11091109

1110+
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; }
1111+
11101112
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
11111113
if (STI->getFeatureBits().test(FeatureGFX90AInsts))
11121114
return 512;
1113-
return 256;
1115+
return getAddressableNumArchVGPRs(STI);
11141116
}
11151117

11161118
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,10 @@ unsigned getVGPREncodingGranule(
295295
/// \returns Total number of VGPRs for given subtarget \p STI.
296296
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
297297

298+
/// \returns Addressable number of architectural VGPRs for a given subtarget \p
299+
/// STI.
300+
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI);
301+
298302
/// \returns Addressable number of VGPRs for given subtarget \p STI.
299303
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
300304

0 commit comments

Comments
 (0)