@@ -206,6 +206,7 @@ bool CoffNativeCodeManager::AllocFuncTableIndex()
206
206
if (!index)
207
207
return false ;
208
208
209
+ memset (index, 0 , indexSize * sizeof (uint32_t ));
209
210
m_indexCount++;
210
211
211
212
while (indexSize > INDEX_BRANCHING_FACTOR)
@@ -216,6 +217,7 @@ bool CoffNativeCodeManager::AllocFuncTableIndex()
216
217
if (!index)
217
218
return false ;
218
219
220
+ memset (index, 0 , indexSize * sizeof (uint32_t ));
219
221
m_indexCount++;
220
222
}
221
223
@@ -228,34 +230,61 @@ uint32_t** CoffNativeCodeManager::InitFuncTableIndex()
228
230
// max offset is beyond the range of managed methods.
229
231
int maxOffset = (int )((TADDR)m_pvManagedCodeStartRange + m_cbManagedCodeRange - m_moduleBase);
230
232
233
+ // It is possible to see several threads come here at once.
234
+ // We can spin-wait for one thread to do the work or just let all threads do the initialization.
235
+ // Either way it will take roughly the same time as for the first thread to complete the work.
236
+ // Yet we can make this complete faster if threads help each other by working on different
237
+ // parts of the index.
238
+ uint32_t perThreadBias = (uint32_t )(((size_t )&perThreadBias * 11400714819323198485ul ) >> 32 );
239
+
231
240
// lets build the index for the runtime table. for every granule that has elements we will have an index entry
232
241
uint32_t indexSize = (m_nRuntimeFunctionTable + FUNCTABLE_INDEX_GRANULARITY - 1 ) / FUNCTABLE_INDEX_GRANULARITY;
233
242
uint32_t indexCount = 0 ;
234
243
uint32_t * index = m_indices[indexCount++];
235
244
236
- // in every index N we will put the lowest value from the granule N + 1
245
+ // every index N will contain the lowest value from the granule N + 1
237
246
// when we will scan the value N in the indices and see that it is higher than the target, we will know
238
- // that the granule N must be scanned for the entry as the next granule will have higher addresses.
239
- for (uint32_t i = 1 ; i < indexSize; i++)
247
+ // that the granule N must be searched for the entry as the next granule will have higher addresses.
248
+ uint32_t start = (perThreadBias % indexSize) | 1 ;
249
+ for (uint32_t i = start; i < indexSize; i++)
240
250
{
241
- _ASSERTE (i * FUNCTABLE_INDEX_GRANULARITY < m_nRuntimeFunctionTable);
242
- index[i - 1 ] = m_pRuntimeFunctionTable[i * FUNCTABLE_INDEX_GRANULARITY].BeginAddress ;
251
+ if (index[i - 1 ] == 0 )
252
+ {
253
+ _ASSERTE (i * FUNCTABLE_INDEX_GRANULARITY < m_nRuntimeFunctionTable);
254
+ index[i - 1 ] = m_pRuntimeFunctionTable[i * FUNCTABLE_INDEX_GRANULARITY].BeginAddress ;
255
+ }
256
+ }
257
+
258
+ for (uint32_t i = 1 ; i < start; i++)
259
+ {
260
+ if (index[i - 1 ] == 0 )
261
+ {
262
+ _ASSERTE (i * FUNCTABLE_INDEX_GRANULARITY < m_nRuntimeFunctionTable);
263
+ index[i - 1 ] = m_pRuntimeFunctionTable[i * FUNCTABLE_INDEX_GRANULARITY].BeginAddress ;
264
+ }
243
265
}
244
266
245
267
// we put the maxOffset at the end of the index.
246
268
// there is no N + 1 granule to get the value from, so the last slot will contain the sentinel.
247
269
index[indexSize - 1 ] = maxOffset;
248
270
249
- // Now build the N-ary tree of indices.
250
- // At branching factor 16 a program with 32K methods will have 3 sub-index levels.
271
+ // Now build an N-ary tree of indices.
272
+ // Example: at branching factor 16 a program with 32K methods will have 3 sub-index levels.
251
273
uint32_t * prevIdx = index;
252
274
while (indexSize > INDEX_BRANCHING_FACTOR)
253
275
{
254
276
uint32_t prevSize = indexSize;
255
277
indexSize = (indexSize + INDEX_BRANCHING_FACTOR - 1 ) / INDEX_BRANCHING_FACTOR;
256
278
index = m_indices[indexCount++];
257
279
258
- for (uint32_t i = 1 ; i < indexSize; i++)
280
+ start = (perThreadBias % indexSize) | 1 ;
281
+ for (uint32_t i = start; i < indexSize; i++)
282
+ {
283
+ _ASSERTE (i * INDEX_BRANCHING_FACTOR < prevSize);
284
+ index[i - 1 ] = prevIdx[i * INDEX_BRANCHING_FACTOR];
285
+ }
286
+
287
+ for (uint32_t i = 1 ; i < start; i++)
259
288
{
260
289
_ASSERTE (i * INDEX_BRANCHING_FACTOR < prevSize);
261
290
index[i - 1 ] = prevIdx[i * INDEX_BRANCHING_FACTOR];
0 commit comments