Skip to content

Commit 8fd0afd

Browse files
[clr-interp] Fix performance of compiling huge methods (#119514)
- Remove O(n) algorithm in InterpCompiler::EmitCodeIns which was debug only, replace with O(1) algorithm - The new conservative range computuation code also has an O(N^2) algorithm for ranges, and this is replaced with an O(n*logN) algorithm with better constant factors (Binary search in an array instead of linear scan of a linked list) - Adjust apis on TArray so that the RemoveAt and Remove functions don't re-order the array. Change the names of the existing apis to have a _ArrayIsBag suffix, and add more typical implementations of RemoveAt and InsertAt - Make TArray use an allocator, and implement one based on the MemPool and one based on malloc/free
1 parent d815d8b commit 8fd0afd

File tree

6 files changed

+265
-122
lines changed

6 files changed

+265
-122
lines changed

src/coreclr/interpreter/compiler.cpp

Lines changed: 135 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,23 @@ class InterpIAllocator : public IAllocator
8989
}
9090
};
9191

92+
93+
void* MemPoolAllocator::Alloc(size_t sz) const { return m_compiler->AllocMemPool(sz); }
94+
void MemPoolAllocator::Free(void* ptr) const { /* no-op */ }
95+
96+
void* MallocAllocator::Alloc(size_t sz) const
97+
{
98+
void* mem = malloc(sz);
99+
if (mem == nullptr)
100+
NOMEM();
101+
return mem;
102+
}
103+
void MallocAllocator::Free(void* ptr) const
104+
{
105+
free(ptr);
106+
}
107+
108+
92109
size_t GetGenericLookupOffset(const CORINFO_RUNTIME_LOOKUP *pLookup, uint32_t index)
93110
{
94111
if (pLookup->indirections == CORINFO_USEHELPER)
@@ -716,7 +733,7 @@ uint32_t InterpCompiler::ConvertOffset(int32_t offset)
716733
return offset * sizeof(int32_t) + sizeof(void*);
717734
}
718735

719-
int32_t* InterpCompiler::EmitCodeIns(int32_t *ip, InterpInst *ins, TArray<Reloc*> *relocs)
736+
int32_t* InterpCompiler::EmitCodeIns(int32_t *ip, InterpInst *ins, TArray<Reloc*, MemPoolAllocator> *relocs)
720737
{
721738
ins->nativeOffset = (int32_t)(ip - m_pMethodCode);
722739

@@ -838,10 +855,16 @@ int32_t* InterpCompiler::EmitCodeIns(int32_t *ip, InterpInst *ins, TArray<Reloc*
838855
// This code assumes that instructions for the same IL offset are emitted in a single run without
839856
// any other IL offsets in between and that they don't repeat again after the run ends.
840857
#ifdef DEBUG
841-
for (int i = 0; i < m_ILToNativeMapSize; i++)
858+
// Use a side table for fast checking of whether or not we've emitted this IL offset before.
859+
// Walking the m_pILToNativeMap can be slow for excessively large functions
860+
if (m_pNativeMapIndexToILOffset == NULL)
842861
{
843-
assert(m_pILToNativeMap[i].ilOffset != ilOffset);
862+
m_pNativeMapIndexToILOffset = new (this) int32_t[m_ILCodeSize];
863+
memset(m_pNativeMapIndexToILOffset, 0, sizeof(int32_t) * m_ILCodeSize);
844864
}
865+
866+
assert(m_pNativeMapIndexToILOffset[ilOffset] == 0);
867+
m_pNativeMapIndexToILOffset[ilOffset] = m_ILToNativeMapSize;
845868
#endif // DEBUG
846869

847870
// Since we can have at most one entry per IL offset,
@@ -858,7 +881,7 @@ int32_t* InterpCompiler::EmitCodeIns(int32_t *ip, InterpInst *ins, TArray<Reloc*
858881
return ip;
859882
}
860883

861-
void InterpCompiler::PatchRelocations(TArray<Reloc*> *relocs)
884+
void InterpCompiler::PatchRelocations(TArray<Reloc*, MemPoolAllocator> *relocs)
862885
{
863886
int32_t size = relocs->GetSize();
864887

@@ -883,7 +906,7 @@ void InterpCompiler::PatchRelocations(TArray<Reloc*> *relocs)
883906
}
884907
}
885908

886-
int32_t *InterpCompiler::EmitBBCode(int32_t *ip, InterpBasicBlock *bb, TArray<Reloc*> *relocs)
909+
int32_t *InterpCompiler::EmitBBCode(int32_t *ip, InterpBasicBlock *bb, TArray<Reloc*, MemPoolAllocator> *relocs)
887910
{
888911
m_pCBB = bb;
889912
m_pCBB->nativeOffset = (int32_t)(ip - m_pMethodCode);
@@ -906,12 +929,13 @@ int32_t *InterpCompiler::EmitBBCode(int32_t *ip, InterpBasicBlock *bb, TArray<Re
906929

907930
void InterpCompiler::EmitCode()
908931
{
909-
TArray<Reloc*> relocs;
932+
TArray<Reloc*, MemPoolAllocator> relocs(GetMemPoolAllocator());
910933
int32_t codeSize = ComputeCodeSize();
911934
m_pMethodCode = (int32_t*)AllocMethodData(codeSize * sizeof(int32_t));
912935

913936
// These will eventually be freed by the VM, and they use the delete [] operator for the deletion.
914937
m_pILToNativeMap = new ICorDebugInfo::OffsetMapping[m_ILCodeSize];
938+
915939
ICorDebugInfo::NativeVarInfo* eeVars = NULL;
916940
if (m_numILVars > 0)
917941
{
@@ -1001,6 +1025,13 @@ class InterpGcSlotAllocator
10011025
uint32_t startOffset;
10021026
uint32_t endOffset;
10031027

1028+
bool OverlapsOrAdjacentTo(uint32_t startOther, uint32_t endOther) const
1029+
{
1030+
assert (startOther < endOther);
1031+
1032+
return !(endOffset < startOther || startOffset > endOther);
1033+
}
1034+
10041035
void MergeWith(uint32_t startOther, uint32_t endOther)
10051036
{
10061037
// This can only be called with overlapping or adjacent ranges
@@ -1017,38 +1048,81 @@ class InterpGcSlotAllocator
10171048
}
10181049
};
10191050

1020-
ConservativeRange** m_conservativeRanges = nullptr;
1021-
1022-
ConservativeRange* FindAndRemoveOverlappingOrAdjacentRange(uint32_t offsetBytes, uint32_t start, uint32_t end)
1051+
struct ConservativeRanges
10231052
{
1024-
uint32_t slotIndex = offsetBytes / sizeof(void *);
1025-
ConservativeRange* prev = nullptr;
1026-
ConservativeRange* range = m_conservativeRanges[slotIndex];
1027-
while (range)
1053+
ConservativeRanges(InterpCompiler* pCompiler) : m_liveRanges(pCompiler->GetMemPoolAllocator())
10281054
{
1029-
if (!(end < range->startOffset || start > range->endOffset))
1055+
}
1056+
1057+
TArray<ConservativeRange, MemPoolAllocator> m_liveRanges;
1058+
1059+
void InsertRange(InterpCompiler* pCompiler, uint32_t start, uint32_t end)
1060+
{
1061+
#ifdef DEBUG
1062+
bool m_verbose = pCompiler->m_verbose;
1063+
#endif
1064+
1065+
ConservativeRange newRange(start, end);
1066+
1067+
if (m_liveRanges.GetSize() != 0)
10301068
{
1031-
// Overlapping or adjacent ranges
1032-
if (prev)
1033-
prev->next = range->next;
1034-
else
1035-
m_conservativeRanges[slotIndex] = range->next;
1036-
range->next = nullptr;
1037-
return range;
1069+
// Find the first range which has a start offset greater or equal to the new ranges start offset
1070+
1071+
int32_t hiBound = m_liveRanges.GetSize();
1072+
int32_t loBound = 0;
1073+
while (loBound < hiBound)
1074+
{
1075+
int32_t mid = (loBound + hiBound) / 2;
1076+
ConservativeRange existingRange = m_liveRanges.Get(mid);
1077+
if (existingRange.startOffset >= start)
1078+
{
1079+
hiBound = mid;
1080+
}
1081+
else
1082+
{
1083+
loBound = mid + 1;
1084+
}
1085+
}
1086+
int32_t iFirstRangeWithGreaterStartOffset = hiBound;
1087+
1088+
// The range before the first range which is greater, may overlap with the new range, so subtract 1 if possible
1089+
int32_t iFirstInterestingRange = iFirstRangeWithGreaterStartOffset > 0 ? iFirstRangeWithGreaterStartOffset - 1: 0;
1090+
1091+
for (int32_t i = iFirstInterestingRange; i < m_liveRanges.GetSize(); i++)
1092+
{
1093+
ConservativeRange existingRange = m_liveRanges.Get(i);
1094+
if (existingRange.OverlapsOrAdjacentTo(start, end))
1095+
{
1096+
ConservativeRange updatedRange = existingRange;
1097+
INTERP_DUMP("Merging with existing range [%u - %u]\n", existingRange.startOffset, existingRange.endOffset);
1098+
updatedRange.MergeWith(start, end);
1099+
while ((i + 1 < m_liveRanges.GetSize()) && m_liveRanges.Get(i + 1).OverlapsOrAdjacentTo(start, end))
1100+
{
1101+
ConservativeRange otherExistingRange = m_liveRanges.Get(i + 1);
1102+
INTERP_DUMP("Merging with existing range [%u - %u]\n", otherExistingRange.startOffset, otherExistingRange.endOffset);
1103+
updatedRange.MergeWith(otherExistingRange.startOffset, otherExistingRange.endOffset);
1104+
m_liveRanges.RemoveAt(i + 1);
1105+
}
1106+
INTERP_DUMP("Final merged range [%u - %u]\n", updatedRange.startOffset, updatedRange.endOffset);
1107+
m_liveRanges.Set(i, updatedRange);
1108+
return;
1109+
}
1110+
1111+
if (existingRange.startOffset > start)
1112+
{
1113+
// If we reach here, the new range is disjoint from all existing ranges, and we've found the right place to insert
1114+
m_liveRanges.InsertAt(i, newRange);
1115+
return;
1116+
}
1117+
}
1118+
10381119
}
1039-
prev = range;
1040-
range = range->next;
1120+
// If we reach here, the new range is disjoint from all existing ranges, and should be added at the end of the list
1121+
m_liveRanges.Add(newRange);
10411122
}
1042-
return nullptr;
1043-
}
1044-
1045-
void InsertConservativeRange(uint32_t offsetBytes, ConservativeRange* range)
1046-
{
1047-
uint32_t slotIndex = offsetBytes / sizeof(void *);
1123+
};
10481124

1049-
range->next = m_conservativeRanges[slotIndex];
1050-
m_conservativeRanges[slotIndex] = range;
1051-
}
1125+
TArray<ConservativeRanges*, MemPoolAllocator> m_conservativeRanges;
10521126

10531127
unsigned m_slotTableSize;
10541128

@@ -1068,6 +1142,7 @@ class InterpGcSlotAllocator
10681142
InterpGcSlotAllocator(InterpCompiler *compiler, InterpreterGcInfoEncoder *encoder)
10691143
: m_compiler(compiler)
10701144
, m_encoder(encoder)
1145+
, m_conservativeRanges(compiler->GetMemPoolAllocator())
10711146
, m_slotTableSize(compiler->m_totalVarsStackSize / sizeof(void *))
10721147
#ifdef DEBUG
10731148
, m_verbose(compiler->m_verbose)
@@ -1079,8 +1154,7 @@ class InterpGcSlotAllocator
10791154
// 0 is a valid slot id so default-initialize all the slots to 0xFFFFFFFF
10801155
memset(m_slotTables[i], 0xFF, sizeof(GcSlotId) * m_slotTableSize);
10811156
}
1082-
m_conservativeRanges = new (m_compiler) ConservativeRange *[m_slotTableSize];
1083-
memset(m_conservativeRanges, 0, sizeof(ConservativeRange *) * m_slotTableSize);
1157+
m_conservativeRanges.GrowBy(m_slotTableSize);
10841158
}
10851159

10861160
void AllocateOrReuseGcSlot(uint32_t offsetBytes, GcSlotFlags flags)
@@ -1131,51 +1205,41 @@ class InterpGcSlotAllocator
11311205
startOffset, endOffset
11321206
);
11331207

1134-
1135-
ConservativeRange* pExistingRange = FindAndRemoveOverlappingOrAdjacentRange(offsetBytes, startOffset, endOffset);
1136-
if (pExistingRange != nullptr)
1137-
{
1138-
INTERP_DUMP("Merging with existing range [%u - %u]\n", pExistingRange->startOffset, pExistingRange->endOffset);
1139-
pExistingRange->MergeWith(startOffset, endOffset);
1140-
ConservativeRange* pOtherExistingRange = FindAndRemoveOverlappingOrAdjacentRange(offsetBytes, pExistingRange->startOffset, pExistingRange->endOffset);
1141-
while (pOtherExistingRange != nullptr)
1142-
{
1143-
INTERP_DUMP("Merging with existing range [%u - %u]\n", pOtherExistingRange->startOffset, pOtherExistingRange->endOffset);
1144-
pExistingRange->MergeWith(pOtherExistingRange->startOffset, pOtherExistingRange->endOffset);
1145-
pOtherExistingRange = FindAndRemoveOverlappingOrAdjacentRange(offsetBytes, pExistingRange->startOffset, pExistingRange->endOffset);
1146-
};
1147-
InsertConservativeRange(offsetBytes, pExistingRange);
1148-
}
1149-
else
1208+
uint32_t slotIndex = offsetBytes / sizeof(void *);
1209+
if (m_conservativeRanges.Get(slotIndex) == nullptr)
11501210
{
1151-
ConservativeRange* pNewRange = new (m_compiler) ConservativeRange(startOffset, endOffset);
1152-
InsertConservativeRange(offsetBytes, pNewRange);
1211+
m_conservativeRanges.Set(slotIndex, new (m_compiler) ConservativeRanges(m_compiler));
11531212
}
1213+
m_conservativeRanges.Get(slotIndex)->InsertRange(m_compiler, startOffset, endOffset);
11541214
}
11551215

11561216
void ReportConservativeRangesToGCEncoder()
11571217
{
11581218
uint32_t maxEndOffset = m_compiler->ConvertOffset(m_compiler->m_methodCodeSize);
1159-
for (uint32_t i = 0; i < m_slotTableSize; i++)
1219+
for (uint32_t iSlot = 0; iSlot < m_slotTableSize; iSlot++)
11601220
{
1161-
ConservativeRange* range = m_conservativeRanges[i];
1221+
ConservativeRanges* ranges = m_conservativeRanges.Get(iSlot);
1222+
if (ranges == nullptr)
1223+
continue;
11621224

1163-
if (range == nullptr)
1225+
if (ranges->m_liveRanges.GetSize() == 0)
11641226
continue;
11651227

1166-
GcSlotId* pSlot = LocateGcSlotTableEntry(i * sizeof(void *), GC_SLOT_INTERIOR);
1228+
GcSlotId* pSlot = LocateGcSlotTableEntry(iSlot * sizeof(void *), GC_SLOT_INTERIOR);
11671229
assert(pSlot != nullptr && *pSlot != (GcSlotId)-1);
11681230

1169-
while (range)
1231+
for(int32_t iRange = 0; iRange < ranges->m_liveRanges.GetSize(); iRange++)
11701232
{
1233+
ConservativeRange range = ranges->m_liveRanges.Get(iRange);
1234+
11711235
INTERP_DUMP(
11721236
"Conservative range for slot %u at %u [%u - %u]\n",
11731237
*pSlot,
1174-
(unsigned)(i * sizeof(void *)),
1175-
range->startOffset,
1176-
range->endOffset + 1
1238+
(unsigned)(iSlot * sizeof(void *)),
1239+
range.startOffset,
1240+
range.endOffset + 1
11771241
);
1178-
m_encoder->SetSlotState(range->startOffset, *pSlot, GC_SLOT_LIVE);
1242+
m_encoder->SetSlotState(range.startOffset, *pSlot, GC_SLOT_LIVE);
11791243

11801244
// Conservatively assume that the lifetime of a slot extends to just past the end of what
11811245
// the lifetime analysis has determined. This is due to an inaccuracy of reporting in the
@@ -1190,16 +1254,15 @@ class InterpGcSlotAllocator
11901254
// in the caller's stack. This would likely reduce the conservative reporting being done by
11911255
// functions up the stack in the interpreter case, such that we don't have excessive conservative
11921256
// reporting causing substantial pinning overhead.
1193-
uint32_t endOffset = range->endOffset + 1;
1257+
uint32_t endOffset = range.endOffset + 1;
11941258

11951259
if (endOffset > maxEndOffset)
11961260
{
11971261
// The GcInfoEncoder should not report slots beyond the method's end
11981262
endOffset = maxEndOffset;
1199-
assert(endOffset == range->endOffset);
1263+
assert(endOffset == range.endOffset);
12001264
}
12011265
m_encoder->SetSlotState(endOffset, *pSlot, GC_SLOT_DEAD);
1202-
range = range->next;
12031266
}
12041267
}
12051268
}
@@ -1500,9 +1563,15 @@ static void FreeInterpreterStackMap(void *key, void *value, void *userdata)
15001563

15011564
InterpCompiler::InterpCompiler(COMP_HANDLE compHnd,
15021565
CORINFO_METHOD_INFO* methodInfo)
1503-
: m_stackmapsByClass(FreeInterpreterStackMap)
1566+
:
1567+
#ifdef DEBUG
1568+
m_methodName(GetMallocAllocator()),
1569+
#endif
1570+
m_stackmapsByClass(FreeInterpreterStackMap)
15041571
, m_pInitLocalsIns(nullptr)
15051572
, m_hiddenArgumentVar(-1)
1573+
, m_leavesTable(this)
1574+
, m_dataItems(this)
15061575
, m_globalVarsWithRefsStackTop(0)
15071576
{
15081577
m_genericLookupToDataItemIndex.Init(&m_dataItems, this);
@@ -6853,7 +6922,7 @@ void InterpCompiler::PrintMethodName(CORINFO_METHOD_HANDLE method)
68536922
CORINFO_SIG_INFO sig;
68546923
m_compHnd->getMethodSig(method, &sig, cls);
68556924

6856-
TArray<char> methodName = ::PrintMethodName(m_compHnd, cls, method, &sig,
6925+
TArray<char, MallocAllocator> methodName = ::PrintMethodName(m_compHnd, cls, method, &sig,
68576926
/* includeAssembly */ false,
68586927
/* includeClass */ true,
68596928
/* includeClassInstantiation */ true,

0 commit comments

Comments
 (0)