Skip to content

Commit da44466

Browse files
committed
Add ee_alloc_context
This change is some preparatory refactoring for the randomized allocation sampling feature. We need to add more state onto allocation context but we don't want to do a breaking change of the GC interface. The new state only needs to be visible to the EE but we want it physically near the existing alloc context state for good cache locality. To accomplish this we created a new ee_alloc_context struct which contains an instance of gc_alloc_context within it. The new ee_alloc_context.combined_limit field should be used by fast allocation helpers to determine when to go down the slow path. Most of the time combined_limit has the same value as alloc_limit, but periodically we need to emit an allocation sampling event on an object that is somewhere in the middle of an AC. Using combined_limit rather than alloc_limit as the slow path trigger allows us to keep all the sampling event logic in the slow path.
1 parent 42b2b19 commit da44466

23 files changed

+206
-81
lines changed

src/coreclr/debug/daccess/dacdbiimpl.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6551,10 +6551,11 @@ HRESULT DacHeapWalker::Init(CORDB_ADDRESS start, CORDB_ADDRESS end)
65516551
j++;
65526552
}
65536553
}
6554-
if ((&g_global_alloc_context)->alloc_ptr != nullptr)
6554+
gc_alloc_context globalCtx = ((ee_alloc_context)g_global_alloc_context).gc_allocation_context;
6555+
if (globalCtx.alloc_ptr != nullptr)
65556556
{
6556-
mAllocInfo[j].Ptr = (CORDB_ADDRESS)(&g_global_alloc_context)->alloc_ptr;
6557-
mAllocInfo[j].Limit = (CORDB_ADDRESS)(&g_global_alloc_context)->alloc_limit;
6557+
mAllocInfo[j].Ptr = (CORDB_ADDRESS)globalCtx.alloc_ptr;
6558+
mAllocInfo[j].Limit = (CORDB_ADDRESS)globalCtx.alloc_limit;
65586559
}
65596560

65606561
mThreadCount = j;

src/coreclr/debug/daccess/request.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5493,8 +5493,9 @@ HRESULT ClrDataAccess::GetGlobalAllocationContext(
54935493
}
54945494

54955495
SOSDacEnter();
5496-
*allocPtr = (CLRDATA_ADDRESS)((&g_global_alloc_context)->alloc_ptr);
5497-
*allocLimit = (CLRDATA_ADDRESS)((&g_global_alloc_context)->alloc_limit);
5496+
gc_alloc_context global_alloc_context = ((ee_alloc_context)g_global_alloc_context).gc_allocation_context;
5497+
*allocPtr = (CLRDATA_ADDRESS)global_alloc_context.alloc_ptr;
5498+
*allocLimit = (CLRDATA_ADDRESS)global_alloc_context.alloc_limit;
54985499
SOSDacLeave();
54995500
return hr;
55005501
}

src/coreclr/debug/runtimeinfo/datadescriptor.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,14 @@ CDAC_TYPE_END(ThreadStore)
132132

133133
CDAC_TYPE_BEGIN(RuntimeThreadLocals)
134134
CDAC_TYPE_INDETERMINATE(RuntimeThreadLocals)
135-
CDAC_TYPE_FIELD(RuntimeThreadLocals, AllocContext, AllocContext, offsetof(RuntimeThreadLocals, alloc_context))
135+
CDAC_TYPE_FIELD(RuntimeThreadLocals, /*EEAllocContext*/, AllocContext, offsetof(RuntimeThreadLocals, alloc_context))
136136
CDAC_TYPE_END(RuntimeThreadLocals)
137137

138+
CDAC_TYPE_BEGIN(EEAllocContext)
139+
CDAC_TYPE_INDETERMINATE(EEAllocContext)
140+
CDAC_TYPE_FIELD(EEAllocContext, /*GCAllocContext*/, GCAllocationContext, offsetof(ee_alloc_context, gc_allocation_context))
141+
CDAC_TYPE_END(EEAllocContext)
142+
138143
CDAC_TYPE_BEGIN(GCAllocContext)
139144
CDAC_TYPE_INDETERMINATE(GCAllocContext)
140145
CDAC_TYPE_FIELD(GCAllocContext, /*pointer*/, Pointer, offsetof(gc_alloc_context, alloc_ptr))

src/coreclr/inc/dacvars.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ DEFINE_DACVAR(ProfControlBlock, dac__g_profControlBlock, ::g_profControlBlock)
140140
DEFINE_DACVAR(PTR_DWORD, dac__g_card_table, ::g_card_table)
141141
DEFINE_DACVAR(PTR_BYTE, dac__g_lowest_address, ::g_lowest_address)
142142
DEFINE_DACVAR(PTR_BYTE, dac__g_highest_address, ::g_highest_address)
143-
DEFINE_DACVAR(gc_alloc_context, dac__g_global_alloc_context, ::g_global_alloc_context)
143+
DEFINE_DACVAR(ee_alloc_context, dac__g_global_alloc_context, ::g_global_alloc_context)
144144

145145
DEFINE_DACVAR(IGCHeap, dac__g_pGCHeap, ::g_pGCHeap)
146146

src/coreclr/vm/amd64/JitHelpers_Slow.asm

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -180,15 +180,15 @@ LEAF_ENTRY JIT_TrialAllocSFastSP, _TEXT
180180
inc [g_global_alloc_lock]
181181
jnz JIT_NEW
182182

183-
mov rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr] ; alloc_ptr
184-
mov r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit] ; limit_ptr
183+
mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr
184+
mov r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] ; combined_limit
185185

186186
add r8, rax
187187

188188
cmp r8, r10
189189
ja AllocFailed
190190

191-
mov qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8 ; update the alloc ptr
191+
mov qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr
192192
mov [rax], rcx
193193
mov [g_global_alloc_lock], -1
194194

@@ -208,8 +208,8 @@ NESTED_ENTRY JIT_BoxFastUP, _TEXT
208208
inc [g_global_alloc_lock]
209209
jnz JIT_Box
210210

211-
mov rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr] ; alloc_ptr
212-
mov r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit] ; limit_ptr
211+
mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr
212+
mov r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] ; combined_limit
213213

214214
add r8, rax
215215

@@ -219,7 +219,7 @@ NESTED_ENTRY JIT_BoxFastUP, _TEXT
219219
test rdx, rdx
220220
je NullRef
221221

222-
mov qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8 ; update the alloc ptr
222+
mov qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr
223223
mov [rax], rcx
224224
mov [g_global_alloc_lock], -1
225225

@@ -287,15 +287,15 @@ LEAF_ENTRY AllocateStringFastUP, _TEXT
287287
inc [g_global_alloc_lock]
288288
jnz FramedAllocateString
289289

290-
mov rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr] ; alloc_ptr
291-
mov r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit] ; limit_ptr
290+
mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr
291+
mov r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] ; combined_limit
292292

293293
add r8, rax
294294

295295
cmp r8, r10
296296
ja AllocFailed
297297

298-
mov qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8 ; update the alloc ptr
298+
mov qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr
299299
mov [rax], r11
300300
mov [g_global_alloc_lock], -1
301301

@@ -343,16 +343,16 @@ LEAF_ENTRY JIT_NewArr1VC_UP, _TEXT
343343
inc [g_global_alloc_lock]
344344
jnz JIT_NewArr1
345345

346-
mov rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr] ; alloc_ptr
347-
mov r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit] ; limit_ptr
346+
mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr
347+
mov r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] ; combined_limit
348348

349349
add r8, rax
350350
jc AllocFailed
351351

352352
cmp r8, r10
353353
ja AllocFailed
354354

355-
mov qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8 ; update the alloc ptr
355+
mov qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr
356356
mov [rax], rcx
357357
mov [g_global_alloc_lock], -1
358358

@@ -396,15 +396,15 @@ LEAF_ENTRY JIT_NewArr1OBJ_UP, _TEXT
396396
inc [g_global_alloc_lock]
397397
jnz JIT_NewArr1
398398

399-
mov rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr] ; alloc_ptr
400-
mov r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit] ; limit_ptr
399+
mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr
400+
mov r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] ; combined_limit
401401

402402
add r8, rax
403403

404404
cmp r8, r10
405405
ja AllocFailed
406406

407-
mov qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8 ; update the alloc ptr
407+
mov qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr
408408
mov [rax], rcx
409409
mov [g_global_alloc_lock], -1
410410

src/coreclr/vm/amd64/asmconstants.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,11 +111,12 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__Thread__m_pFrame
111111
#define Thread_m_pFrame OFFSETOF__Thread__m_pFrame
112112

113113

114-
#define OFFSETOF__gc_alloc_context__alloc_ptr 0x0
115-
ASMCONSTANT_OFFSETOF_ASSERT(gc_alloc_context, alloc_ptr);
114+
#define OFFSETOF__ee_alloc_context__alloc_ptr 0x8
115+
ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__alloc_ptr == offsetof(ee_alloc_context, gc_allocation_context) +
116+
offsetof(gc_alloc_context, alloc_ptr));
116117

117-
#define OFFSETOF__gc_alloc_context__alloc_limit 0x8
118-
ASMCONSTANT_OFFSETOF_ASSERT(gc_alloc_context, alloc_limit);
118+
#define OFFSETOF__ee_alloc_context__combined_limit 0x0
119+
ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__combined_limit == offsetof(ee_alloc_context, combined_limit));
119120

120121
#define OFFSETOF__ThreadExceptionState__m_pCurrentTracker 0x000
121122
ASMCONSTANTS_C_ASSERT(OFFSETOF__ThreadExceptionState__m_pCurrentTracker

src/coreclr/vm/comutilnative.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -848,7 +848,7 @@ FCIMPL0(INT64, GCInterface::GetAllocatedBytesForCurrentThread)
848848

849849
INT64 currentAllocated = 0;
850850
Thread *pThread = GetThread();
851-
gc_alloc_context* ac = &t_runtime_thread_locals.alloc_context;
851+
gc_alloc_context* ac = &t_runtime_thread_locals.alloc_context.gc_allocation_context;
852852
currentAllocated = ac->alloc_bytes + ac->alloc_bytes_uoh - (ac->alloc_limit - ac->alloc_ptr);
853853

854854
return currentAllocated;

src/coreclr/vm/gccover.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1834,7 +1834,7 @@ void DoGcStress (PCONTEXT regs, NativeCodeVersion nativeCodeVersion)
18341834
// BUG(github #10318) - when not using allocation contexts, the alloc lock
18351835
// must be acquired here. Until fixed, this assert prevents random heap corruption.
18361836
assert(GCHeapUtilities::UseThreadAllocationContexts());
1837-
GCHeapUtilities::GetGCHeap()->StressHeap(&t_runtime_thread_locals.alloc_context);
1837+
GCHeapUtilities::GetGCHeap()->StressHeap(&t_runtime_thread_locals.alloc_context.gc_allocation_context);
18381838

18391839
// StressHeap can exit early w/o forcing a SuspendEE to trigger the instruction update
18401840
// We can not rely on the return code to determine if the instruction update happened

src/coreclr/vm/gcenv.ee.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,14 @@ gc_alloc_context * GCToEEInterface::GetAllocContext()
443443
return nullptr;
444444
}
445445

446-
return &t_runtime_thread_locals.alloc_context;
446+
return &t_runtime_thread_locals.alloc_context.gc_allocation_context;
447+
}
448+
449+
void InvokeGCAllocCallback(ee_alloc_context* pEEAllocContext, enum_alloc_context_func* fn, void* param)
450+
{
451+
gc_alloc_context* pAllocContext = &pEEAllocContext->gc_allocation_context;
452+
fn(pAllocContext, param);
453+
pEEAllocContext->UpdateCombinedLimit();
447454
}
448455

449456
void GCToEEInterface::GcEnumAllocContexts(enum_alloc_context_func* fn, void* param)
@@ -460,16 +467,16 @@ void GCToEEInterface::GcEnumAllocContexts(enum_alloc_context_func* fn, void* par
460467
Thread * pThread = NULL;
461468
while ((pThread = ThreadStore::GetThreadList(pThread)) != NULL)
462469
{
463-
gc_alloc_context* palloc_context = pThread->GetAllocContext();
470+
ee_alloc_context* palloc_context = pThread->GetEEAllocContext();
464471
if (palloc_context != nullptr)
465472
{
466-
fn(palloc_context, param);
473+
InvokeGCAllocCallback(palloc_context, fn, param);
467474
}
468475
}
469476
}
470477
else
471478
{
472-
fn(&g_global_alloc_context, param);
479+
InvokeGCAllocCallback(&g_global_alloc_context, fn, param);
473480
}
474481
}
475482

src/coreclr/vm/gcheaputilities.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ bool g_sw_ww_enabled_for_gc_heap = false;
4141

4242
#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
4343

44-
GVAL_IMPL_INIT(gc_alloc_context, g_global_alloc_context, {});
44+
GVAL_IMPL_INIT(ee_alloc_context, g_global_alloc_context, {});
4545

4646
enum GC_LOAD_STATUS {
4747
GC_LOAD_STATUS_BEFORE_START,

src/coreclr/vm/gcheaputilities.h

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,69 @@ GPTR_DECL(IGCHeap, g_pGCHeap);
1212
#ifndef DACCESS_COMPILE
1313
extern "C" {
1414
#endif // !DACCESS_COMPILE
15+
16+
// This struct allows adding some state that is only visible to the EE onto the standard gc_alloc_context
17+
struct ee_alloc_context
18+
{
19+
// Any allocation that would overlap combined_limit needs to be handled by the allocation slow path.
20+
// combined_limit is the minimum of:
21+
// - gc_alloc_context.alloc_limit (the end of the current AC)
22+
// - the sampling_limit
23+
//
24+
// In the simple case that randomized sampling is disabled, combined_limit is always equal to alloc_limit.
25+
//
26+
// There are two different useful interpretations for the sampling_limit. One is to treat the sampling_limit
27+
// as an address and when we allocate an object that overlaps that address we should emit a sampling event.
28+
// The other is that we can treat (sampling_limit - alloc_ptr) as a budget of how many bytes we can allocate
29+
// before emitting a sampling event. If we always allocated objects contiguously in the AC and incremented
30+
// alloc_ptr by the size of the object, these two interpretations would be equivalent. However, when objects
31+
// don't fit in the AC we allocate them in some other address range. The budget interpretation is more
32+
// flexible to handle those cases.
33+
//
34+
// The sampling limit isn't stored in any separate field explicitly, instead it is implied:
35+
// - if combined_limit == alloc_limit there is no sampled byte in the AC. In the budget interpretation
36+
// we can allocate (alloc_limit - alloc_ptr) unsampled bytes. We'll need a new random number after
37+
// that to determine whether future allocated bytes should be sampled.
38+
// This occurs either because the sampling feature is disabled, or because the randomized selection
39+
// of sampled bytes didn't select a byte in this AC.
40+
// - if combined_limit < alloc_limit there is a sample limit in the AC. sample_limit = combined_limit.
41+
uint8_t* combined_limit;
42+
gc_alloc_context gc_allocation_context;
43+
44+
void init()
45+
{
46+
LIMITED_METHOD_CONTRACT;
47+
combined_limit = 0;
48+
gc_allocation_context.init();
49+
}
50+
51+
uint8_t* getCombinedLimit()
52+
{
53+
LIMITED_METHOD_CONTRACT;
54+
return combined_limit;
55+
}
56+
57+
static size_t getAllocPtrFieldOffset()
58+
{
59+
LIMITED_METHOD_CONTRACT;
60+
return offsetof(ee_alloc_context, gc_allocation_context) + offsetof(gc_alloc_context, alloc_ptr);
61+
}
62+
63+
static size_t getCombinedLimitFieldOffset()
64+
{
65+
LIMITED_METHOD_CONTRACT;
66+
return offsetof(ee_alloc_context, combined_limit);
67+
}
68+
69+
// Regenerate the randomized sampling limit and update the combined_limit field.
70+
inline void UpdateCombinedLimit()
71+
{
72+
// The randomized sampling feature is being submitted in stages. At this point the sampling is never
73+
// activated so combined_limit is always equal to alloc_limit.
74+
combined_limit = gc_allocation_context.alloc_limit;
75+
}
76+
};
77+
1578
GPTR_DECL(uint8_t,g_lowest_address);
1679
GPTR_DECL(uint8_t,g_highest_address);
1780
GPTR_DECL(uint32_t,g_card_table);
@@ -21,7 +84,8 @@ GVAL_DECL(GCHeapType, g_heap_type);
2184
// for all allocations. In order to avoid extra indirections in assembly
2285
// allocation helpers, the EE owns the global allocation context and the
2386
// GC will update it when it needs to.
24-
GVAL_DECL(gc_alloc_context, g_global_alloc_context);
87+
GVAL_DECL(ee_alloc_context, g_global_alloc_context);
88+
2589
#ifndef DACCESS_COMPILE
2690
}
2791
#endif // !DACCESS_COMPILE

src/coreclr/vm/gchelpers.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
//
4141
//========================================================================
4242

43-
inline gc_alloc_context* GetThreadAllocContext()
43+
inline ee_alloc_context* GetThreadEEAllocContext()
4444
{
4545
WRAPPER_NO_CONTRACT;
4646

@@ -222,16 +222,19 @@ inline Object* Alloc(size_t size, GC_ALLOC_FLAGS flags)
222222

223223
if (GCHeapUtilities::UseThreadAllocationContexts())
224224
{
225-
gc_alloc_context *threadContext = GetThreadAllocContext();
226-
GCStress<gc_on_alloc>::MaybeTrigger(threadContext);
227-
retVal = GCHeapUtilities::GetGCHeap()->Alloc(threadContext, size, flags);
225+
ee_alloc_context *threadContext = GetThreadEEAllocContext();
226+
GCStress<gc_on_alloc>::MaybeTrigger(&threadContext->gc_allocation_context);
227+
retVal = GCHeapUtilities::GetGCHeap()->Alloc(&threadContext->gc_allocation_context, size, flags);
228+
threadContext->UpdateCombinedLimit();
229+
228230
}
229231
else
230232
{
231233
GlobalAllocLockHolder holder(&g_global_alloc_lock);
232-
gc_alloc_context *globalContext = &g_global_alloc_context;
233-
GCStress<gc_on_alloc>::MaybeTrigger(globalContext);
234-
retVal = GCHeapUtilities::GetGCHeap()->Alloc(globalContext, size, flags);
234+
ee_alloc_context *globalContext = &g_global_alloc_context;
235+
GCStress<gc_on_alloc>::MaybeTrigger(&globalContext->gc_allocation_context);
236+
retVal = GCHeapUtilities::GetGCHeap()->Alloc(&globalContext->gc_allocation_context, size, flags);
237+
globalContext->UpdateCombinedLimit();
235238
}
236239

237240

src/coreclr/vm/gcstress.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ namespace _GCStress
298298
// BUG(github #10318) - when not using allocation contexts, the alloc lock
299299
// must be acquired here. Until fixed, this assert prevents random heap corruption.
300300
_ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
301-
GCHeapUtilities::GetGCHeap()->StressHeap(&t_runtime_thread_locals.alloc_context);
301+
GCHeapUtilities::GetGCHeap()->StressHeap(&t_runtime_thread_locals.alloc_context.gc_allocation_context);
302302
}
303303

304304
FORCEINLINE

0 commit comments

Comments
 (0)