Skip to content

Commit 1728588

Browse files
committed
make concurrent threads help with building the index
1 parent a985d18 commit 1728588

File tree

1 file changed

+37
-8
lines changed

1 file changed

+37
-8
lines changed

src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ bool CoffNativeCodeManager::AllocFuncTableIndex()
206206
if (!index)
207207
return false;
208208

209+
memset(index, 0, indexSize * sizeof(uint32_t));
209210
m_indexCount++;
210211

211212
while (indexSize > INDEX_BRANCHING_FACTOR)
@@ -216,6 +217,7 @@ bool CoffNativeCodeManager::AllocFuncTableIndex()
216217
if (!index)
217218
return false;
218219

220+
memset(index, 0, indexSize * sizeof(uint32_t));
219221
m_indexCount++;
220222
}
221223

@@ -228,34 +230,61 @@ uint32_t** CoffNativeCodeManager::InitFuncTableIndex()
228230
// max offset is beyond the range of managed methods.
229231
int maxOffset = (int)((TADDR)m_pvManagedCodeStartRange + m_cbManagedCodeRange - m_moduleBase);
230232

233+
// It is possible to see several threads come here at once.
234+
// We can spin-wait for one thread to do the work or just let all threads do the initialization.
235+
// Either way it will take roughly the same time as for the first thread to complete the work.
236+
// Yet we can make this complete faster if threads help each other by working on different
237+
// parts of the index.
238+
uint32_t perThreadBias = (uint32_t)(((size_t)&perThreadBias * 11400714819323198485ul) >> 32);
239+
231240
// lets build the index for the runtime table. for every granule that has elements we will have an index entry
232241
uint32_t indexSize = (m_nRuntimeFunctionTable + FUNCTABLE_INDEX_GRANULARITY - 1) / FUNCTABLE_INDEX_GRANULARITY;
233242
uint32_t indexCount = 0;
234243
uint32_t* index = m_indices[indexCount++];
235244

236-
// in every index N we will put the lowest value from the granule N + 1
245+
// every index N will contain the lowest value from the granule N + 1
237246
// when we will scan the value N in the indices and see that it is higher than the target, we will know
238-
// that the granule N must be scanned for the entry as the next granule will have higher addresses.
239-
for (uint32_t i = 1; i < indexSize; i++)
247+
// that the granule N must be searched for the entry as the next granule will have higher addresses.
248+
uint32_t start = (perThreadBias % indexSize) | 1;
249+
for (uint32_t i = start; i < indexSize; i++)
240250
{
241-
_ASSERTE(i * FUNCTABLE_INDEX_GRANULARITY < m_nRuntimeFunctionTable);
242-
index[i - 1] = m_pRuntimeFunctionTable[i * FUNCTABLE_INDEX_GRANULARITY].BeginAddress;
251+
if (index[i - 1] == 0)
252+
{
253+
_ASSERTE(i * FUNCTABLE_INDEX_GRANULARITY < m_nRuntimeFunctionTable);
254+
index[i - 1] = m_pRuntimeFunctionTable[i * FUNCTABLE_INDEX_GRANULARITY].BeginAddress;
255+
}
256+
}
257+
258+
for (uint32_t i = 1; i < start; i++)
259+
{
260+
if (index[i - 1] == 0)
261+
{
262+
_ASSERTE(i * FUNCTABLE_INDEX_GRANULARITY < m_nRuntimeFunctionTable);
263+
index[i - 1] = m_pRuntimeFunctionTable[i * FUNCTABLE_INDEX_GRANULARITY].BeginAddress;
264+
}
243265
}
244266

245267
// we put the maxOffset at the end of the index.
246268
// there is no N + 1 granule to get the value from, so the last slot will contain the sentinel.
247269
index[indexSize - 1] = maxOffset;
248270

249-
// Now build the N-ary tree of indices.
250-
// At branching factor 16 a program with 32K methods will have 3 sub-index levels.
271+
// Now build an N-ary tree of indices.
272+
// Example: at branching factor 16 a program with 32K methods will have 3 sub-index levels.
251273
uint32_t* prevIdx = index;
252274
while (indexSize > INDEX_BRANCHING_FACTOR)
253275
{
254276
uint32_t prevSize = indexSize;
255277
indexSize = (indexSize + INDEX_BRANCHING_FACTOR - 1) / INDEX_BRANCHING_FACTOR;
256278
index = m_indices[indexCount++];
257279

258-
for (uint32_t i = 1; i < indexSize; i++)
280+
start = (perThreadBias % indexSize) | 1;
281+
for (uint32_t i = start; i < indexSize; i++)
282+
{
283+
_ASSERTE(i * INDEX_BRANCHING_FACTOR < prevSize);
284+
index[i - 1] = prevIdx[i * INDEX_BRANCHING_FACTOR];
285+
}
286+
287+
for (uint32_t i = 1; i < start; i++)
259288
{
260289
_ASSERTE(i * INDEX_BRANCHING_FACTOR < prevSize);
261290
index[i - 1] = prevIdx[i * INDEX_BRANCHING_FACTOR];

0 commit comments

Comments
 (0)