Skip to content

Commit 70b37f4

Browse files
committed
[MCA][InstrBuilder] Always check for implicit uses of resource units (PR50725).
When instructions are issued to the underlying pipeline resources, the mca::ResourceManager should also check for the presence of extra uses induced by the explicit consumption of multiple partially overlapping group resources. Fixes PR50725
1 parent a167898 commit 70b37f4

File tree

4 files changed

+75
-3
lines changed

4 files changed

+75
-3
lines changed

llvm/include/llvm/MCA/Instruction.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,9 @@ struct InstrDesc {
359359
// A bitmask of used processor resource units.
360360
uint64_t UsedProcResUnits;
361361

362+
// A bitmask of implicit uses of processor resource units.
363+
uint64_t ImplicitlyUsedProcResUnits;
364+
362365
// A bitmask of used processor resource groups.
363366
uint64_t UsedProcResGroups;
364367

llvm/lib/MCA/HardwareUnits/ResourceManager.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,8 @@ ResourceManager::ResourceManager(const MCSchedModel &SM)
114114
Resource2Groups(SM.getNumProcResourceKinds() - 1, 0),
115115
ProcResID2Mask(SM.getNumProcResourceKinds(), 0),
116116
ResIndex2ProcResID(SM.getNumProcResourceKinds() - 1, 0),
117-
ProcResUnitMask(0), ReservedResourceGroups(0),
118-
AvailableBuffers(~0ULL), ReservedBuffers(0) {
117+
ProcResUnitMask(0), ReservedResourceGroups(0), AvailableBuffers(~0ULL),
118+
ReservedBuffers(0) {
119119
computeProcResourceMasks(SM, ProcResID2Mask);
120120

121121
// initialize vector ResIndex2ProcResID.
@@ -288,6 +288,15 @@ uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const {
288288
BusyResourceMask |= E.first;
289289
}
290290

291+
uint64_t ImplicitUses = Desc.ImplicitlyUsedProcResUnits;
292+
while (ImplicitUses) {
293+
uint64_t Use = ImplicitUses & -ImplicitUses;
294+
ImplicitUses ^= Use;
295+
unsigned Index = getResourceStateIndex(Use);
296+
if (!Resources[Index]->isReady(/* NumUnits */ 1))
297+
BusyResourceMask |= Index;
298+
}
299+
291300
BusyResourceMask &= ProcResUnitMask;
292301
if (BusyResourceMask)
293302
return BusyResourceMask;

llvm/lib/MCA/InstrBuilder.cpp

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ static void initializeUsedResources(InstrDesc &ID,
4343

4444
// Populate resources consumed.
4545
using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
46-
std::vector<ResourcePlusCycles> Worklist;
46+
SmallVector<ResourcePlusCycles, 4> Worklist;
4747

4848
// Track cycles contributed by resources that are in a "Super" relationship.
4949
// This is required if we want to correctly match the behavior of method
@@ -109,6 +109,11 @@ static void initializeUsedResources(InstrDesc &ID,
109109

110110
uint64_t UsedResourceUnits = 0;
111111
uint64_t UsedResourceGroups = 0;
112+
auto GroupIt = find_if(Worklist, [](const ResourcePlusCycles &Elt) {
113+
return countPopulation(Elt.first) > 1;
114+
});
115+
unsigned FirstGroupIdx = std::distance(Worklist.begin(), GroupIt);
116+
uint64_t ImpliedUsesOfResourceUnits = 0;
112117

113118
// Remove cycles contributed by smaller resources.
114119
for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
@@ -127,6 +132,15 @@ static void initializeUsedResources(InstrDesc &ID,
127132
// Remove the leading 1 from the resource group mask.
128133
NormalizedMask ^= PowerOf2Floor(NormalizedMask);
129134
UsedResourceGroups |= (A.first ^ NormalizedMask);
135+
136+
uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
137+
if ((NormalizedMask != AvailableMask) &&
138+
countPopulation(AvailableMask) == 1) {
139+
// At simulation time, this resource group use will decay into a simple
140+
// use of the resource unit identified by `AvailableMask`.
141+
ImpliedUsesOfResourceUnits |= AvailableMask;
142+
UsedResourceUnits |= AvailableMask;
143+
}
130144
}
131145

132146
for (unsigned J = I + 1; J < E; ++J) {
@@ -139,6 +153,31 @@ static void initializeUsedResources(InstrDesc &ID,
139153
}
140154
}
141155

156+
// Look for implicit uses of processor resource units. These are resource
157+
// units which are indirectly consumed by resource groups, and that must be
158+
// always available on instruction issue.
159+
while (ImpliedUsesOfResourceUnits) {
160+
ID.ImplicitlyUsedProcResUnits |= ImpliedUsesOfResourceUnits;
161+
ImpliedUsesOfResourceUnits = 0;
162+
for (unsigned I = FirstGroupIdx, E = Worklist.size(); I < E; ++I) {
163+
ResourcePlusCycles &A = Worklist[I];
164+
if (!A.second.size())
165+
continue;
166+
167+
uint64_t NormalizedMask = A.first;
168+
assert(countPopulation(NormalizedMask) > 1);
169+
// Remove the leading 1 from the resource group mask.
170+
NormalizedMask ^= PowerOf2Floor(NormalizedMask);
171+
uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
172+
if ((NormalizedMask != AvailableMask) &&
173+
countPopulation(AvailableMask) != 1)
174+
continue;
175+
176+
UsedResourceUnits |= AvailableMask;
177+
ImpliedUsesOfResourceUnits |= AvailableMask;
178+
}
179+
}
180+
142181
// A SchedWrite may specify a number of cycles in which a resource group
143182
// is reserved. For example (on target x86; cpu Haswell):
144183
//
@@ -198,6 +237,8 @@ static void initializeUsedResources(InstrDesc &ID,
198237
BufferIDs ^= Current;
199238
}
200239
dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
240+
dbgs() << "\t\tImplicitly Used Units="
241+
<< format_hex(ID.ImplicitlyUsedProcResUnits, 16) << '\n';
201242
dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
202243
<< '\n';
203244
});
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2+
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -all-views=false -summary-view -iterations=1 < %s | FileCheck %s
3+
4+
# Do not crash when running this simulation.
5+
# It is not safe to issue FXRSTOR if SKLPort1 is not available.
6+
7+
bswap %eax
8+
bswap %eax
9+
fxrstor 64(%rsp)
10+
11+
# CHECK: Iterations: 1
12+
# CHECK-NEXT: Instructions: 3
13+
# CHECK-NEXT: Total Cycles: 68
14+
# CHECK-NEXT: Total uOps: 92
15+
16+
# CHECK: Dispatch Width: 6
17+
# CHECK-NEXT: uOps Per Cycle: 1.35
18+
# CHECK-NEXT: IPC: 0.04
19+
# CHECK-NEXT: Block RThroughput: 16.5

0 commit comments

Comments
 (0)