Skip to content

Commit a207c9c

Browse files
committed
Use FlushInstructionCache to protect stub reads instead of using a barrier
1 parent 1a8c6b0 commit a207c9c

File tree

12 files changed

+91
-20
lines changed

12 files changed

+91
-20
lines changed

src/coreclr/clrdefinitions.cmake

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,10 @@ if (FEATURE_STUBPRECODE_DYNAMIC_HELPERS)
207207
add_definitions(-DFEATURE_STUBPRECODE_DYNAMIC_HELPERS)
208208
endif()
209209

210+
if (FEATURE_CORECLR_FLUSH_INSTRUCTION_CACHE_TO_PROTECT_STUB_READS)
211+
add_definitions(-DFEATURE_CORECLR_FLUSH_INSTRUCTION_CACHE_TO_PROTECT_STUB_READS)
212+
endif()
213+
210214
if (CLR_CMAKE_TARGET_APPLE)
211215
# Re-enable when dbgshim containing https://github.com/dotnet/diagnostics/pull/5487 is generally available
212216
# add_definitions(-DFEATURE_MAP_THUNKS_FROM_IMAGE)

src/coreclr/clrfeatures.cmake

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,12 @@ else()
8484
set(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH 1)
8585
endif()
8686

87+
# We use a flush instruction cache to protect reads from the StubPrecodeData/CallCountingStub structures in the stubs.
88+
# This is needed because the StubPrecodeData structure is initialized after the stub code is written, and we need to ensure that
89+
# the reads in the stub happen after the writes to the StubPrecodeData structure. We could do this with a barrier instruction in the stub,
90+
# but that would be more expensive.
91+
set(FEATURE_CORECLR_FLUSH_INSTRUCTION_CACHE_TO_PROTECT_STUB_READS 1)
92+
8793
if (CLR_CMAKE_HOST_UNIX AND CLR_CMAKE_HOST_ARCH_AMD64)
8894
# Allow 16 byte compare-exchange (cmpxchg16b)
8995
add_compile_options($<${FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH}:-mcx16>)

src/coreclr/vm/arm64/thunktemplates.S

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,11 @@
3030
IN_PAGE_INDEX = 0
3131
.rept STUB_PRECODE_NUM_THUNKS_PER_MAPPING
3232

33-
dmb ishld
3433
ldr x10, DATA_SLOT(StubPrecode, Target, STUB_PRECODE_CODESIZE, StubPrecodeCodeTemplate)
3534
ldr x12, DATA_SLOT(StubPrecode, SecretParam, STUB_PRECODE_CODESIZE, StubPrecodeCodeTemplate)
3635
br x10
3736

37+
brk 0xf000 // Stubs need to be 24-byte in size to allow for the data to be 2 pointers + 1 byte
3838
brk 0xf000 // Stubs need to be 24-byte in size to allow for the data to be 2 pointers + 1 byte
3939
brk 0xf000 // Stubs need to be 24-byte in size to allow for the data to be 2 pointers + 1 byte
4040

@@ -52,7 +52,7 @@ LEAF_END_MARKED StubPrecodeCodeTemplate, _TEXT
5252
// FixupPrecode
5353
// ----------
5454

55-
#define FIXUP_PRECODE_CODESIZE 0x18 // 5 instructions, 4 bytes each (and we also have 4 bytes of padding)
55+
#define FIXUP_PRECODE_CODESIZE 0x18 // 6 instructions, 4 bytes each
5656
#define FIXUP_PRECODE_DATASIZE 0x18 // 3 qwords
5757
.set FIXUP_PRECODE_NUM_THUNKS_PER_MAPPING,(THUNKS_MAP_SIZE / FIXUP_PRECODE_CODESIZE)
5858

@@ -89,7 +89,6 @@ LEAF_END_MARKED FixupPrecodeCodeTemplate, _TEXT
8989
IN_PAGE_INDEX = 0
9090
.rept CALLCOUNTING_NUM_THUNKS_PER_MAPPING
9191

92-
dmb ishld
9392
ldr x9, DATA_SLOT(CallCountingStub, RemainingCallCountCell, CALLCOUNTING_CODESIZE, CallCountingStubCodeTemplate)
9493
ldrh w10, [x9]
9594
subs w10, w10, #1
@@ -100,6 +99,7 @@ LEAF_END_MARKED FixupPrecodeCodeTemplate, _TEXT
10099
0:
101100
ldr x10, DATA_SLOT(CallCountingStub, TargetForThresholdReached, CALLCOUNTING_CODESIZE, CallCountingStubCodeTemplate)
102101
br x10
102+
brk 0xf000 // Stubs need to be aligned
103103

104104
IN_PAGE_INDEX = IN_PAGE_INDEX + 1
105105
.endr
@@ -120,12 +120,12 @@ LEAF_END_MARKED CallCountingStubCodeTemplate, _TEXT
120120
.irp STUB_PAGE_SIZE, 16384, 32768, 65536
121121

122122
LEAF_ENTRY StubPrecodeCode\STUB_PAGE_SIZE
123-
dmb ishld
124123
ldr x10, DATA_SLOT(StubPrecode, Target)
125124
ldr x12, DATA_SLOT(StubPrecode, SecretParam)
126125
br x10
127126
brk 0xf000 // Stubs need to be 24-byte in size to allow for the data to be 3 pointers
128127
brk 0xf000 // Stubs need to be 24-byte in size to allow for the data to be 3 pointers
128+
brk 0xf000 // Stubs need to be 24-byte in size to allow for the data to be 3 pointers
129129
LEAF_END_MARKED StubPrecodeCode\STUB_PAGE_SIZE
130130

131131
LEAF_ENTRY FixupPrecodeCode\STUB_PAGE_SIZE
@@ -150,6 +150,7 @@ LOCAL_LABEL(StubStart\STUB_PAGE_SIZE):
150150
LOCAL_LABEL(CountReachedZero\STUB_PAGE_SIZE):
151151
ldr x10, DATA_SLOT(CallCountingStub, TargetForThresholdReached)
152152
br x10
153+
brk 0xf000 // Stubs need to be aligned in total size
153154
LEAF_END_MARKED CallCountingStubCode\STUB_PAGE_SIZE
154155

155156
.endr

src/coreclr/vm/arm64/thunktemplates.asm

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
#define DATA_SLOT(stub, field) (stub##Code + STUB_PAGE_SIZE + stub##Data__##field)
1212

1313
LEAF_ENTRY StubPrecodeCode
14-
dmb ishld
1514
ldr x10, DATA_SLOT(StubPrecode, Target)
1615
ldr x12, DATA_SLOT(StubPrecode, SecretParam)
1716
br x10
@@ -26,7 +25,6 @@
2625
LEAF_END_MARKED FixupPrecodeCode
2726

2827
LEAF_ENTRY CallCountingStubCode
29-
dmb ishld
3028
ldr x9, DATA_SLOT(CallCountingStub, RemainingCallCountCell)
3129
ldrh w10, [x9]
3230
subs w10, w10, #1

src/coreclr/vm/callcounting.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,15 @@ const CallCountingStub *CallCountingManager::CallCountingStubAllocator::Allocate
263263
allocationAddressHolder.SuppressRelease();
264264
stub->Initialize(targetForMethod, remainingCallCountCell);
265265

266+
#ifdef FEATURE_CORECLR_FLUSH_INSTRUCTION_CACHE_TO_PROTECT_STUB_READS
267+
// While the allocation of a precode will use a memory mapping technique, and not actually write to the set of instructions,
268+
// the set of instructions in the stub has non-barrier protected reads from the StubPrecodeData structure. In order to protect those
269+
// reads we would either need barrier instructions in the stub, or we need to ensure that the precode is flushed in the instruction cache
270+
// which will have the side effect of ensuring that the reads within the stub will happen *after* the writes to the StubPrecodeData structure which
271+
// happened in the Init routine above.
272+
ClrFlushInstructionCache(stub, CallCountingStub::CodeSize);
273+
#endif // FEATURE_CORECLR_FLUSH_INSTRUCTION_CACHE_TO_PROTECT_STUB_READS
274+
266275
return stub;
267276
}
268277

src/coreclr/vm/callcounting.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ class CallCountingStub
9898
#elif defined(TARGET_ARM)
9999
static const SIZE_T CodeSize = 32;
100100
#elif defined(TARGET_LOONGARCH64)
101-
static const SIZE_T CodeSize = 48;
101+
static const SIZE_T CodeSize = 40;
102102
#elif defined(TARGET_RISCV64)
103103
static const SIZE_T CodeSize = 40;
104104
#elif defined(TARGET_WASM)

src/coreclr/vm/dllimportcallback.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,15 @@ void UMEntryThunkData::Terminate()
289289
// TheUMEntryPrestub includes diagnostic for collected delegates
290290
m_pUMEntryThunk->SetTargetUnconditional(TheUMThunkPreStub());
291291

292+
#ifdef FEATURE_CORECLR_FLUSH_INSTRUCTION_CACHE_TO_PROTECT_STUB_READS
293+
// While the allocation of a precode will use a memory mapping technique, and not actually write to the set of instructions,
294+
// the set of instructions in the stub has non-barrier protected reads from the StubPrecodeData structure. In order to protect those
295+
// reads we would either need barrier instructions in the stub, or we need to ensure that the precode is flushed in the instruction cache
296+
// which will have the side effect of ensuring that the reads within the stub will happen *after* the writes to the StubPrecodeData structure which
297+
// happened in the Init routine above.
298+
ClrFlushInstructionCache(m_pUMEntryThunk, sizeof(m_pUMEntryThunk), true);
299+
#endif // FEATURE_CORECLR_FLUSH_INSTRUCTION_CACHE_TO_PROTECT_STUB_READS
300+
292301
OBJECTHANDLE pObjectHandle = m_pObjectHandle;
293302

294303
// Set m_pObjectHandle indicate the collected state

src/coreclr/vm/dllimportcallback.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,15 @@ class UMEntryThunkData
208208
#ifdef _DEBUG
209209
m_state = kLoadTimeInited;
210210
#endif
211+
212+
#ifdef FEATURE_CORECLR_FLUSH_INSTRUCTION_CACHE_TO_PROTECT_STUB_READS
213+
// While the allocation of a precode will use a memory mapping technique, and not actually write to the set of instructions,
214+
// the set of instructions in the stub has non-barrier protected reads from the StubPrecodeData structure. In order to protect those
215+
// reads we would either need barrier instructions in the stub, or we need to ensure that the precode is flushed in the instruction cache
216+
// which will have the side effect of ensuring that the reads within the stub will happen *after* the writes to the StubPrecodeData structure which
217+
// happened in the Init routine above.
218+
ClrFlushInstructionCache(m_pUMEntryThunk, sizeof(m_pUMEntryThunk));
219+
#endif // FEATURE_CORECLR_FLUSH_INSTRUCTION_CACHE_TO_PROTECT_STUB_READS
211220
}
212221

213222
void Terminate();

src/coreclr/vm/loongarch64/thunktemplates.S

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,7 @@
77
// Note that the offsets specified in pcaddi must match the behavior of GetStubCodePageSize() on this architecture/os.
88

99
LEAF_ENTRY StubPrecodeCode
10-
dbar 0
11-
pcaddi $r21, 0xFFF
10+
pcaddi $r21, 0x1000
1211
ld.d $t2,$r21, StubPrecodeData__SecretParam
1312
ld.d $r21,$r21, StubPrecodeData__Target
1413
jirl $r0,$r21,0
@@ -28,8 +27,7 @@ LEAF_END_MARKED FixupPrecodeCode
2827
// NOTE: For LoongArch64 `CallCountingStubData__RemainingCallCountCell` must be zero !!!
2928
// Because the stub-identifying token is $t1 within the `OnCallCountThresholdReachedStub`.
3029
LEAF_ENTRY CallCountingStubCode
31-
dbar 0
32-
pcaddi $t2, 0xFFF
30+
pcaddi $t2, 0x1000
3331
ld.d $t1, $t2, CallCountingStubData__RemainingCallCountCell
3432
ld.h $r21, $t1, 0
3533
addi.w $r21, $r21, -1

src/coreclr/vm/precode.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,16 @@ InterpreterPrecode* Precode::AllocateInterpreterPrecode(PCODE byteCode,
229229

230230
InterpreterPrecode* pPrecode = (InterpreterPrecode*)pamTracker->Track(pLoaderAllocator->GetNewStubPrecodeHeap()->AllocStub());
231231
pPrecode->Init(pPrecode, byteCode);
232+
233+
#ifdef FEATURE_CORECLR_FLUSH_INSTRUCTION_CACHE_TO_PROTECT_STUB_READS
234+
// While the allocation of a precode will use a memory mapping technique, and not actually write to the set of instructions,
235+
// the set of instructions in the stub has non-barrier protected reads from the StubPrecodeData structure. In order to protect those
236+
// reads we would either need barrier instructions in the stub, or we need to ensure that the precode is flushed in the instruction cache
237+
// which will have the side effect of ensuring that the reads within the stub will happen *after* the writes to the StubPrecodeData structure which
238+
// happened in the Init routine above.
239+
ClrFlushInstructionCache(pPrecode->GetCode(), Precode::SizeOf(t));
240+
#endif // FEATURE_CORECLR_FLUSH_INSTRUCTION_CACHE_TO_PROTECT_STUB_READS
241+
232242
#ifdef FEATURE_PERFMAP
233243
PerfMap::LogStubs(__FUNCTION__, "UMEntryThunk", (PCODE)pPrecode, sizeof(InterpreterPrecode), PerfMapStubType::IndividualWithinBlock);
234244
#endif
@@ -265,6 +275,16 @@ Precode* Precode::Allocate(PrecodeType t, MethodDesc* pMD,
265275
ThisPtrRetBufPrecodeData *pData = (ThisPtrRetBufPrecodeData*)pamTracker->Track(pLoaderAllocator->GetLowFrequencyHeap()->AllocMem(S_SIZE_T(sizeof(ThisPtrRetBufPrecodeData))));
266276
pThisPtrRetBufPrecode->Init(pData, pMD, pLoaderAllocator);
267277
pPrecode = (Precode*)pThisPtrRetBufPrecode;
278+
279+
#ifdef FEATURE_CORECLR_FLUSH_INSTRUCTION_CACHE_TO_PROTECT_STUB_READS
280+
// While the allocation of a precode will use a memory mapping technique, and not actually write to the set of instructions,
281+
// the set of instructions in the stub has non-barrier protected reads from the StubPrecodeData structure. In order to protect those
282+
// reads we would either need barrier instructions in the stub, or we need to ensure that the precode is flushed in the instruction cache
283+
// which will have the side effect of ensuring that the reads within the stub will happen *after* the writes to the StubPrecodeData structure which
284+
// happened in the Init routine above.
285+
ClrFlushInstructionCache(pPrecode->GetCode(), Precode::SizeOf(t));
286+
#endif // FEATURE_CORECLR_FLUSH_INSTRUCTION_CACHE_TO_PROTECT_STUB_READS
287+
268288
#ifdef FEATURE_PERFMAP
269289
PerfMap::LogStubs(__FUNCTION__, "ThisPtrRetBuf", (PCODE)pPrecode, sizeof(ThisPtrRetBufPrecodeData), PerfMapStubType::IndividualWithinBlock);
270290
#endif
@@ -275,6 +295,16 @@ Precode* Precode::Allocate(PrecodeType t, MethodDesc* pMD,
275295
_ASSERTE(t == PRECODE_STUB || t == PRECODE_NDIRECT_IMPORT);
276296
pPrecode = (Precode*)pamTracker->Track(pLoaderAllocator->GetNewStubPrecodeHeap()->AllocStub());
277297
pPrecode->Init(pPrecode, t, pMD, pLoaderAllocator);
298+
299+
#ifdef FEATURE_CORECLR_FLUSH_INSTRUCTION_CACHE_TO_PROTECT_STUB_READS
300+
// While the allocation of a precode will use a memory mapping technique, and not actually write to the set of instructions,
301+
// the set of instructions in the stub has non-barrier protected reads from the StubPrecodeData structure. In order to protect those
302+
// reads we would either need barrier instructions in the stub, or we need to ensure that the precode is flushed in the instruction cache
303+
// which will have the side effect of ensuring that the reads within the stub will happen *after* the writes to the StubPrecodeData structure which
304+
// happened in the Init routine above.
305+
ClrFlushInstructionCache(pPrecode->GetCode(), Precode::SizeOf(t));
306+
#endif // FEATURE_CORECLR_FLUSH_INSTRUCTION_CACHE_TO_PROTECT_STUB_READS
307+
278308
#ifdef FEATURE_PERFMAP
279309
PerfMap::LogStubs(__FUNCTION__, t == PRECODE_STUB ? "StubPrecode" : "PInvokeImportPrecode", (PCODE)pPrecode, sizeof(StubPrecode), PerfMapStubType::IndividualWithinBlock);
280310
#endif

0 commit comments

Comments
 (0)