Skip to content

Commit 0666ad4

Browse files
Stub Precode variant for DynamicHelpers (#113402)
Run DynamicHelpers logic with StubPrecodes instead of dynamically generating the stubs. Performance measurements indicate that this path is about 1% faster than the current approach for startup of my powershell benchmark, and results seem similar between X64 and Arm64 which was a surprise to me. Throughput results look like roughly a wash. I'd like to merge this PR in with its current state of changing all Arm64 and X64 behavior to use the new path to get a full set of performance data, as it appears this might just be a better approach than generating stubs dynamically.
1 parent 48ace18 commit 0666ad4

18 files changed

+1636
-14
lines changed

src/coreclr/clrdefinitions.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,9 @@ if (CLR_CMAKE_TARGET_WIN32 AND (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_
213213
add_definitions(-DFEATURE_SPECIAL_USER_MODE_APC)
214214
endif()
215215

216+
if (FEATURE_STUBPRECODE_DYNAMIC_HELPERS)
217+
add_definitions(-DFEATURE_STUBPRECODE_DYNAMIC_HELPERS)
218+
endif()
216219

217220
# Use this function to enable building with a specific target OS and architecture set of defines
218221
# This is known to work for the set of defines used by the JIT and gcinfo, it is not likely correct for

src/coreclr/clrfeatures.cmake

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,7 @@ if (CLR_CMAKE_HOST_UNIX AND CLR_CMAKE_HOST_ARCH_AMD64)
7070
# Allow 16 byte compare-exchange (cmpxchg16b)
7171
add_compile_options($<${FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH}:-mcx16>)
7272
endif()
73+
74+
if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64)
75+
set(FEATURE_STUBPRECODE_DYNAMIC_HELPERS 1)
76+
endif()

src/coreclr/debug/daccess/request.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3674,6 +3674,9 @@ static const char *LoaderAllocatorLoaderHeapNames[] =
36743674
"ExecutableHeap",
36753675
"FixupPrecodeHeap",
36763676
"NewStubPrecodeHeap",
3677+
#if defined(FEATURE_READYTORUN) && defined(FEATURE_STUBPRECODE_DYNAMIC_HELPERS)
3678+
"DynamicHelpersStubHeap",
3679+
#endif // defined(FEATURE_READYTORUN) && defined(FEATURE_STUBPRECODE_DYNAMIC_HELPERS)
36773680
"IndcellHeap",
36783681
#ifdef FEATURE_VIRTUAL_STUB_DISPATCH
36793682
"CacheEntryHeap",
@@ -3711,7 +3714,9 @@ HRESULT ClrDataAccess::GetLoaderAllocatorHeaps(CLRDATA_ADDRESS loaderAllocatorAd
37113714
pLoaderHeaps[i++] = HOST_CDADDR(pLoaderAllocator->GetExecutableHeap());
37123715
pLoaderHeaps[i++] = HOST_CDADDR(pLoaderAllocator->GetFixupPrecodeHeap());
37133716
pLoaderHeaps[i++] = HOST_CDADDR(pLoaderAllocator->GetNewStubPrecodeHeap());
3714-
3717+
#if defined(FEATURE_READYTORUN) && defined(FEATURE_STUBPRECODE_DYNAMIC_HELPERS)
3718+
pLoaderHeaps[i++] = HOST_CDADDR(pLoaderAllocator->GetDynamicHelpersStubHeap());
3719+
#endif // defined(FEATURE_READYTORUN) && defined(FEATURE_STUBPRECODE_DYNAMIC_HELPERS)
37153720
VirtualCallStubManager *pVcsMgr = pLoaderAllocator->GetVirtualCallStubManager();
37163721
if (pVcsMgr == nullptr)
37173722
{

src/coreclr/vm/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,7 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64)
641641
${ARCH_SOURCES_DIR}/ExternalMethodFixupThunk.asm
642642
${ARCH_SOURCES_DIR}/UMThunkStub.asm
643643
${ARCH_SOURCES_DIR}/VirtualCallStubAMD64.asm
644+
${ARCH_SOURCES_DIR}/StubPrecodeDynamicHelpers.asm
644645
)
645646

646647
set(VM_HEADERS_WKS_ARCH_ASM
@@ -668,7 +669,8 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64)
668669
${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.asm
669670
${ARCH_SOURCES_DIR}/thunktemplates.asm
670671
${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.asm
671-
)
672+
${ARCH_SOURCES_DIR}/StubPrecodeDynamicHelpers.asm
673+
)
672674

673675
set(VM_HEADERS_WKS_ARCH_ASM
674676
${ARCH_SOURCES_DIR}/asmconstants.h
@@ -699,6 +701,7 @@ else(CLR_CMAKE_TARGET_WIN32)
699701
${ARCH_SOURCES_DIR}/unixasmhelpers.S
700702
${ARCH_SOURCES_DIR}/umthunkstub.S
701703
${ARCH_SOURCES_DIR}/virtualcallstubamd64.S
704+
${ARCH_SOURCES_DIR}/StubPrecodeDynamicHelpers.S
702705
)
703706
elseif(CLR_CMAKE_TARGET_ARCH_I386)
704707
set(VM_SOURCES_WKS_ARCH_ASM
@@ -727,6 +730,7 @@ else(CLR_CMAKE_TARGET_WIN32)
727730
${ARCH_SOURCES_DIR}/pinvokestubs.S
728731
${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.S
729732
${ARCH_SOURCES_DIR}/thunktemplates.S
733+
${ARCH_SOURCES_DIR}/StubPrecodeDynamicHelpers.S
730734
)
731735
elseif(CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
732736
set(VM_SOURCES_WKS_ARCH_ASM
Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
.intel_syntax noprefix
5+
#include "unixasmmacros.inc"
6+
#include "asmconstants.h"
7+
8+
#ifdef FEATURE_STUBPRECODE_DYNAMIC_HELPERS
9+
10+
#define SecretArg_Reg r10
11+
#define FirstArg_Reg rdi
12+
#define SecondArg_Reg rsi
13+
#define SecondArg_DwordReg esi
14+
#define ThirdArg_Reg rdx
15+
#define ThirdArg_DwordReg edx
16+
#define FourthArg_Reg rcx
17+
18+
#define DATA_SLOT(field) r10 + OFFSETOF__DynamicHelperStubArgs__ ## field
19+
#define GENERIC_DICT_DATA_SLOT(field) r10 + OFFSETOF__GenericDictionaryDynamicHelperStubData__ ## field
20+
21+
LEAF_ENTRY DynamicHelper_CallHelper_1Arg, _TEXT
22+
mov FirstArg_Reg, QWORD PTR [DATA_SLOT(Constant1)]
23+
jmp QWORD PTR [DATA_SLOT(Helper)]
24+
LEAF_END DynamicHelper_CallHelper_1Arg, _TEXT
25+
26+
LEAF_ENTRY DynamicHelper_CallHelper_AddSecondArg, _TEXT
27+
mov SecondArg_Reg, QWORD PTR [DATA_SLOT(Constant1)]
28+
jmp QWORD PTR [DATA_SLOT(Helper)]
29+
LEAF_END DynamicHelper_CallHelper_AddSecondArg, _TEXT
30+
31+
LEAF_ENTRY DynamicHelper_CallHelper_2Arg, _TEXT
32+
mov FirstArg_Reg, QWORD PTR [DATA_SLOT(Constant1)]
33+
mov SecondArg_Reg, QWORD PTR [DATA_SLOT(Constant2)]
34+
jmp QWORD PTR [DATA_SLOT(Helper)]
35+
LEAF_END DynamicHelper_CallHelper_2Arg, _TEXT
36+
37+
LEAF_ENTRY DynamicHelper_CallHelper_ArgMove, _TEXT
38+
mov SecondArg_Reg, FirstArg_Reg
39+
mov FirstArg_Reg, QWORD PTR [DATA_SLOT(Constant1)]
40+
jmp QWORD PTR [DATA_SLOT(Helper)]
41+
LEAF_END DynamicHelper_CallHelper_ArgMove, _TEXT
42+
43+
LEAF_ENTRY DynamicHelper_Return, _TEXT
44+
ret
45+
LEAF_END DynamicHelper_Return, _TEXT
46+
47+
LEAF_ENTRY DynamicHelper_ReturnConst, _TEXT
48+
mov rax, SecretArg_Reg
49+
ret
50+
LEAF_END DynamicHelper_ReturnConst, _TEXT
51+
52+
LEAF_ENTRY DynamicHelper_ReturnIndirConst, _TEXT
53+
mov rax, QWORD PTR [SecretArg_Reg]
54+
ret
55+
LEAF_END DynamicHelper_ReturnIndirConst, _TEXT
56+
57+
LEAF_ENTRY DynamicHelper_ReturnIndirConstWithOffset, _TEXT
58+
mov rax, QWORD PTR [DATA_SLOT(Constant1)]
59+
mov rax, QWORD PTR [rax]
60+
add rax, QWORD PTR [DATA_SLOT(Constant2)]
61+
ret
62+
LEAF_END DynamicHelper_ReturnIndirConstWithOffset, _TEXT
63+
64+
LEAF_ENTRY DynamicHelper_CallHelper_AddThirdArg, _TEXT
65+
mov ThirdArg_Reg, QWORD PTR [DATA_SLOT(Constant1)]
66+
jmp QWORD PTR [DATA_SLOT(Helper)]
67+
LEAF_END DynamicHelper_CallHelper_AddThirdArg, _TEXT
68+
69+
LEAF_ENTRY DynamicHelper_CallHelper_AddThirdAndFourthArg, _TEXT
70+
mov ThirdArg_Reg, QWORD PTR [DATA_SLOT(Constant1)]
71+
mov FourthArg_Reg, QWORD PTR [DATA_SLOT(Constant2)]
72+
jmp QWORD PTR [DATA_SLOT(Helper)]
73+
LEAF_END DynamicHelper_CallHelper_AddThirdAndFourthArg, _TEXT
74+
75+
// Generic dictionaries can have 2 or 3 indirections (5 indirs of 32bit size, and 2 8 byte quantities) = 40 bytes
76+
// If it has 2 its for a Method, and the first indirection is always offsetof(InstantiatiedMethodDesc, m_pPerInstInfo)
77+
// If it has 3 its for a Class, and the first indirection is always MethodTable::GetOffsetOfPerInstInfo
78+
// It can also have 0, 0, to just return the class type
79+
// Test For Null Or Not (If not present, cannot have a size check)
80+
// SizeCheck or not (Only needed if size > Some number)
81+
//
82+
// Also special case where we just return the TypeHandle or MethodDesc itself
83+
// Should probably have special case for 1, 2, 3 generic arg of MethodDesc/MethodTable
84+
85+
LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull, _TEXT
86+
// First indirection
87+
mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo]
88+
// Standard Indirection
89+
mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SecondIndir)]
90+
mov rax, QWORD PTR [SecondArg_Reg+rax]
91+
// SizeCheck
92+
mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SizeOffset)]
93+
mov ThirdArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SlotOffset)]
94+
cmp qword ptr[rax + SecondArg_Reg], ThirdArg_Reg
95+
jle LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull_HelperCall)
96+
// Standard Indirection
97+
mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(LastIndir)]
98+
mov rax, QWORD PTR [SecondArg_Reg+rax]
99+
// Null test
100+
test rax, rax
101+
je LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull_HelperCall)
102+
ret
103+
LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull_HelperCall):
104+
mov SecondArg_Reg, QWORD PTR [GENERIC_DICT_DATA_SLOT(HandleArgs)]
105+
PREPARE_EXTERNAL_VAR g_pClassWithSlotAndModule, rax
106+
jmp [rax]
107+
LEAF_END DynamicHelper_GenericDictionaryLookup_Class_SizeCheck_TestForNull, _TEXT
108+
109+
LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_TestForNull, _TEXT
110+
// First indirection
111+
mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo]
112+
// Standard Indirection
113+
mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SecondIndir)]
114+
mov rax, QWORD PTR [SecondArg_Reg+rax]
115+
// Standard Indirection
116+
mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(LastIndir)]
117+
mov rax, QWORD PTR [SecondArg_Reg+rax]
118+
// Null test
119+
test rax, rax
120+
je LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Class_TestForNull_HelperCall)
121+
ret
122+
LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Class_TestForNull_HelperCall):
123+
mov SecondArg_Reg, QWORD PTR [GENERIC_DICT_DATA_SLOT(HandleArgs)]
124+
PREPARE_EXTERNAL_VAR g_pClassWithSlotAndModule, rax
125+
jmp [rax]
126+
LEAF_END DynamicHelper_GenericDictionaryLookup_Class_TestForNull, _TEXT
127+
128+
LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class, _TEXT
129+
// First indirection
130+
mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo]
131+
// Standard Indirection
132+
mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SecondIndir)]
133+
mov rax, QWORD PTR [SecondArg_Reg+rax]
134+
// Standard Indirection
135+
mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(LastIndir)]
136+
mov rax, QWORD PTR [SecondArg_Reg+rax]
137+
ret
138+
LEAF_END DynamicHelper_GenericDictionaryLookup_Class, _TEXT
139+
140+
LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull, _TEXT
141+
// First indirection
142+
mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo]
143+
// SizeCheck
144+
mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SizeOffset)]
145+
mov ThirdArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(SlotOffset)]
146+
cmp qword ptr[rax + SecondArg_Reg], ThirdArg_Reg
147+
jle LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull_HelperCall)
148+
// Standard Indirection
149+
mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(LastIndir)]
150+
mov rax, QWORD PTR [SecondArg_Reg+rax]
151+
// Null test
152+
test rax, rax
153+
je LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull_HelperCall)
154+
ret
155+
LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull_HelperCall):
156+
mov SecondArg_Reg, QWORD PTR [GENERIC_DICT_DATA_SLOT(HandleArgs)]
157+
PREPARE_EXTERNAL_VAR g_pMethodWithSlotAndModule, rax
158+
jmp [rax]
159+
LEAF_END DynamicHelper_GenericDictionaryLookup_Method_SizeCheck_TestForNull, _TEXT
160+
161+
LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_TestForNull, _TEXT
162+
// First indirection
163+
mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo]
164+
// Standard Indirection
165+
mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(LastIndir)]
166+
mov rax, QWORD PTR [SecondArg_Reg+rax]
167+
// Null test
168+
test rax, rax
169+
je LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Method_TestForNull_HelperCall)
170+
ret
171+
LOCAL_LABEL(DynamicHelper_GenericDictionaryLookup_Method_TestForNull_HelperCall):
172+
mov SecondArg_Reg, QWORD PTR [GENERIC_DICT_DATA_SLOT(HandleArgs)]
173+
PREPARE_EXTERNAL_VAR g_pMethodWithSlotAndModule, rax
174+
jmp [rax]
175+
LEAF_END DynamicHelper_GenericDictionaryLookup_Method_TestForNull, _TEXT
176+
177+
LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method, _TEXT
178+
// First indirection
179+
mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo]
180+
// Standard Indirection
181+
mov SecondArg_DwordReg, DWORD PTR [GENERIC_DICT_DATA_SLOT(LastIndir)]
182+
mov rax, QWORD PTR [SecondArg_Reg+rax]
183+
ret
184+
LEAF_END DynamicHelper_GenericDictionaryLookup_Method, _TEXT
185+
186+
LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_0_0, _TEXT
187+
// First indirection
188+
mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo]
189+
// Standard Indirection
190+
mov rax, QWORD PTR [rax]
191+
// Standard Indirection
192+
mov rax, QWORD PTR [rax]
193+
ret
194+
LEAF_END DynamicHelper_GenericDictionaryLookup_Class_0_0, _TEXT
195+
196+
197+
LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_0_1, _TEXT
198+
// First indirection
199+
mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo]
200+
// Standard Indirection
201+
mov rax, QWORD PTR [rax]
202+
// Standard Indirection
203+
mov rax, QWORD PTR [rax + 0x8]
204+
ret
205+
LEAF_END DynamicHelper_GenericDictionaryLookup_Class_0_1, _TEXT
206+
207+
208+
LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_0_2, _TEXT
209+
// First indirection
210+
mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo]
211+
// Standard Indirection
212+
mov rax, QWORD PTR [rax]
213+
// Standard Indirection
214+
mov rax, QWORD PTR [rax + 0x10]
215+
ret
216+
LEAF_END DynamicHelper_GenericDictionaryLookup_Class_0_2, _TEXT
217+
218+
219+
LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Class_0_3, _TEXT
220+
// First indirection
221+
mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__MethodTable__m_pPerInstInfo]
222+
// Standard Indirection
223+
mov rax, QWORD PTR [rax]
224+
// Standard Indirection
225+
mov rax, QWORD PTR [rax + 0x18]
226+
ret
227+
LEAF_END DynamicHelper_GenericDictionaryLookup_Class_0_3, _TEXT
228+
229+
LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_0, _TEXT
230+
// First indirection
231+
mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo]
232+
// Standard Indirection
233+
mov rax, QWORD PTR [rax]
234+
ret
235+
LEAF_END DynamicHelper_GenericDictionaryLookup_Method_0, _TEXT
236+
237+
238+
LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_1, _TEXT
239+
// First indirection
240+
mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo]
241+
// Standard Indirection
242+
mov rax, QWORD PTR [rax + 0x8]
243+
ret
244+
LEAF_END DynamicHelper_GenericDictionaryLookup_Method_1, _TEXT
245+
246+
247+
LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_2, _TEXT
248+
// First indirection
249+
mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo]
250+
// Standard Indirection
251+
mov rax, QWORD PTR [rax + 0x10]
252+
ret
253+
LEAF_END DynamicHelper_GenericDictionaryLookup_Method_2, _TEXT
254+
255+
256+
LEAF_ENTRY DynamicHelper_GenericDictionaryLookup_Method_3, _TEXT
257+
// First indirection
258+
mov rax, QWORD PTR [FirstArg_Reg+OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo]
259+
// Standard Indirection
260+
mov rax, QWORD PTR [rax + 0x18]
261+
ret
262+
LEAF_END DynamicHelper_GenericDictionaryLookup_Method_3, _TEXT
263+
264+
#endif //// FEATURE_STUBPRECODE_DYNAMIC_HELPERS

0 commit comments

Comments
 (0)