Skip to content

Commit c12e876

Browse files
Add a cache for Native offset to IL Offset mapping in the stack walking logic (#117218)
- This cache reduces an issue where the locking around the `DebuggerJitInfo` is extremely inefficient to access in a multithreaded system, or if MANY different methods are on the stack - Since we do not have extensive experience with this cache, its size is configurable - This allows us to discover scenarios in production where the cache is insufficiently big, and address problems as needed - The initial size is 1024 which is expected to be fairly ok, at a cost of 16KB of space on 64 systems. Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 4a06ac3 commit c12e876

File tree

2 files changed

+216
-29
lines changed

2 files changed

+216
-29
lines changed

src/coreclr/inc/clrconfigvalues.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_legacyCorruptedStateExceptionsPolicy, W("le
234234
CONFIG_DWORD_INFO(INTERNAL_SuppressLostExceptionTypeAssert, W("SuppressLostExceptionTypeAssert"), 0, "")
235235
RETAIL_CONFIG_DWORD_INFO(INTERNAL_UseEntryPointFilter, W("UseEntryPointFilter"), 0, "")
236236
RETAIL_CONFIG_DWORD_INFO(INTERNAL_Corhost_Swallow_Uncaught_Exceptions, W("Corhost_Swallow_Uncaught_Exceptions"), 0, "")
237+
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_NativeToILOffsetCacheSize, W("NativeToILOffsetCacheSize"), 1024, "")
237238
CONFIG_DWORD_INFO(INTERNAL_LogStackOverflowExit, W("LogStackOverflowExit"), 0, "Temporary flag to log stack overflow exit process")
238239

239240
///

src/coreclr/vm/debugdebugger.cpp

Lines changed: 215 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,14 +1013,180 @@ void DebugStackTrace::Element::InitPass1(
10131013
}
10141014

10151015
#ifndef DACCESS_COMPILE
1016+
// This is an implementation of a cache of the Native->IL offset mappings used by managed stack traces. It exists for the following reasons:
1017+
// 1. When a large server experiences a large number of exceptions due to some other system failing, it can cause a tremendous number of stack traces to be generated, if customers are attempting to log.
1018+
// 2. The native->IL offset mapping is somewhat expensive to compute, and it is not necessary to compute it repeatedly for the same IP.
1019+
// 3. Often when these mappings are needed, the system is under stress, and throwing on MANY different threads with similar callstacks, so the cost of having locking around the cache may be significant.
1020+
//
1021+
// The cache is implemented as a simple hash table, where the key is the IP + fAdjustOffset
1022+
// flag, and the value is the IL offset. We use a version number to indicate when the cache
1023+
// is being updated, and to indicate that a found value is valid, and we use a simple linear
1024+
// probing algorithm to find the entry in the cache.
1025+
//
1026+
// The replacement policy is randomized, and there are s_stackWalkCacheWalk(8) possible buckets to check before giving up.
1027+
//
1028+
// Since the cache entries are greater than a single pointer, we use a simple version locking scheme to protect readers.
1029+
1030+
struct StackWalkNativeToILCacheEntry
1031+
{
1032+
void* ip = NULL; // The IP of the native code
1033+
uint32_t ilOffset = 0; // The IL offset, with the adjust offset flag set if the native offset was adjusted by STACKWALK_CONTROLPC_ADJUST_OFFSET
1034+
};
1035+
1036+
static LONG s_stackWalkNativeToILCacheVersion = 0;
1037+
static DWORD s_stackWalkCacheSize = 0; // This is the total size of the cache (We use a pointer+4 bytes for each entry, so on 64bit platforms 12KB of memory)
1038+
const DWORD s_stackWalkCacheWalk = 8; // Walk up to this many entries in the cache before giving up
1039+
const DWORD s_stackWalkCacheAdjustOffsetFlag = 0x80000000; // 2^31, put into the IL offset portion of the cache entry to check if the native offset was adjusted by STACKWALK_CONTROLPC_ADJUST_OFFSET
1040+
static StackWalkNativeToILCacheEntry* s_stackWalkCache = NULL;
1041+
1042+
bool CheckNativeToILCacheCore(void* ip, bool fAdjustOffset, uint32_t* pILOffset)
1043+
{
1044+
// Check the cache for the IP
1045+
int hashCode = MixPointerIntoHash(ip);
1046+
StackWalkNativeToILCacheEntry* cacheTable = VolatileLoad(&s_stackWalkCache);
1047+
1048+
if (cacheTable == NULL)
1049+
{
1050+
// Cache is not initialized
1051+
return false;
1052+
}
1053+
DWORD cacheSize = VolatileLoadWithoutBarrier(&s_stackWalkCacheSize);
1054+
int index = hashCode % cacheSize;
1055+
1056+
DWORD count = 0;
1057+
do
1058+
{
1059+
if (VolatileLoadWithoutBarrier(&cacheTable[index].ip) == ip)
1060+
{
1061+
// Cache hit
1062+
uint32_t dwILOffset = VolatileLoad(&cacheTable[index].ilOffset); // It is IMPORTANT that this load have a barrier after it, so that the version check in the containing funciton is safe.
1063+
if (fAdjustOffset != ((dwILOffset & s_stackWalkCacheAdjustOffsetFlag) == s_stackWalkCacheAdjustOffsetFlag))
1064+
{
1065+
continue; // The cache entry did not match on the adjust offset flag, so move to the next entry.
1066+
}
1067+
1068+
dwILOffset &= ~s_stackWalkCacheAdjustOffsetFlag; // Clear the adjust offset flag
1069+
*pILOffset = dwILOffset;
1070+
return true;
1071+
}
1072+
} while (index = (index + 1) % cacheSize, count++ < s_stackWalkCacheWalk);
1073+
1074+
return false; // Not found in cache
1075+
}
1076+
1077+
bool CheckNativeToILCache(void* ip, bool fAdjustOffset, uint32_t* pILOffset)
1078+
{
1079+
LIMITED_METHOD_CONTRACT;
1080+
1081+
LONG versionStart = VolatileLoad(&s_stackWalkNativeToILCacheVersion);
1082+
1083+
if ((versionStart & 1) == 1)
1084+
{
1085+
// Cache is being updated, so we cannot use it
1086+
return false;
1087+
}
1088+
1089+
if (CheckNativeToILCacheCore(ip, fAdjustOffset, pILOffset))
1090+
{
1091+
// When looking in the cache, the last load from the cache is a VolatileLoad, which allows a load here to check the version in the cache
1092+
LONG versionEnd = VolatileLoadWithoutBarrier(&s_stackWalkNativeToILCacheVersion);
1093+
if (versionEnd == versionStart)
1094+
{
1095+
// Cache was not updated while we were checking it, so we can use it
1096+
return true;
1097+
}
1098+
}
1099+
1100+
return false;
1101+
}
1102+
1103+
void InsertIntoNativeToILCache(void* ip, bool fAdjustOffset, uint32_t dwILOffset)
1104+
{
1105+
CONTRACTL
1106+
{
1107+
THROWS;
1108+
}
1109+
CONTRACTL_END;
1110+
1111+
uint32_t dwILOffsetCheck;
1112+
if (CheckNativeToILCache(ip, fAdjustOffset, &dwILOffsetCheck))
1113+
{
1114+
// The entry already exists, so we don't need to insert it again
1115+
_ASSERTE(dwILOffsetCheck == dwILOffset);
1116+
return;
1117+
}
1118+
1119+
// Insert the IP and IL offset into the cache
1120+
1121+
LONG versionStart = VolatileLoadWithoutBarrier(&s_stackWalkNativeToILCacheVersion);
1122+
if ((versionStart & 1) == 1)
1123+
{
1124+
// Cache is being updated by someone else, so we can't modify it
1125+
return;
1126+
}
1127+
1128+
if (versionStart != InterlockedCompareExchange(&s_stackWalkNativeToILCacheVersion, versionStart | 1, versionStart))
1129+
{
1130+
// Someone else updated the cache version while we were attempting to take the lock, so abort updating the cache
1131+
return;
1132+
}
1133+
// Now we have the lock, so we can safely update the cache
1134+
1135+
if (s_stackWalkCache == NULL)
1136+
{
1137+
// Initialize the cache if it is not already initialized
1138+
DWORD cacheSize = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_NativeToILOffsetCacheSize);
1139+
if (cacheSize < 1)
1140+
{
1141+
cacheSize = 1; // Ensure cache size is at least 1 to prevent division-by-zero
1142+
}
1143+
VolatileStore(&s_stackWalkCacheSize, cacheSize);
1144+
VolatileStore(&s_stackWalkCache, new(nothrow)StackWalkNativeToILCacheEntry[cacheSize]);
1145+
1146+
if (s_stackWalkCache == NULL)
1147+
{
1148+
// Failed to allocate memory for the cache, so we cannot insert into it
1149+
// Abort the cache update
1150+
VolatileStore(&s_stackWalkNativeToILCacheVersion, versionStart);
1151+
return;
1152+
}
1153+
}
1154+
1155+
// First check to see if the cache already has an entry
1156+
uint32_t dwILOffsetFound;
1157+
if (CheckNativeToILCacheCore(ip, fAdjustOffset, &dwILOffsetFound))
1158+
{
1159+
// The entry already exists, so we don't need to insert it again
1160+
_ASSERTE(dwILOffsetFound == dwILOffset);
1161+
1162+
// Store back the original version to indicate that the cache has not been updated, and is ready for use.
1163+
VolatileStore(&s_stackWalkNativeToILCacheVersion, versionStart);
1164+
}
1165+
else
1166+
{
1167+
// Insert the IP and IL offset into the cache
1168+
1169+
int hashCode = MixPointerIntoHash(ip);
1170+
1171+
// Insert the entry into a psuedo-random location in the set of cache entries. The idea is to attempt to be somewhat collision resistant
1172+
int index = (hashCode + (MixOneValueIntoHash(versionStart) % s_stackWalkCacheWalk)) % s_stackWalkCacheSize;
1173+
1174+
s_stackWalkCache[index].ip = ip;
1175+
s_stackWalkCache[index].ilOffset = dwILOffset | (fAdjustOffset ? s_stackWalkCacheAdjustOffsetFlag : 0);
1176+
1177+
// Increment the version to indicate that the cache has been updated, and is ready for use.
1178+
VolatileStore(&s_stackWalkNativeToILCacheVersion, versionStart + 2);
1179+
}
1180+
}
1181+
10161182

10171183
// Initialization done outside the TSL.
10181184
// This may need to call locking operations that aren't safe under the TSL.
10191185
void DebugStackTrace::Element::InitPass2()
10201186
{
10211187
CONTRACTL
10221188
{
1023-
MODE_ANY;
1189+
MODE_COOPERATIVE;
10241190
GC_TRIGGERS;
10251191
THROWS;
10261192
}
@@ -1030,37 +1196,57 @@ void DebugStackTrace::Element::InitPass2()
10301196

10311197
bool bRes = false;
10321198

1033-
#ifdef DEBUGGING_SUPPORTED
1034-
// Calculate the IL offset using the debugging services
1035-
if ((this->ip != (PCODE)NULL) && g_pDebugInterface)
1199+
bool fAdjustOffset = (this->flags & STEF_IP_ADJUSTED) == 0 && this->dwOffset > 0;
1200+
if (this->ip != (PCODE)NULL)
10361201
{
1037-
// To get the source line number of the actual code that threw an exception, the dwOffset needs to be
1038-
// adjusted in certain cases when calculating the IL offset.
1039-
//
1040-
// The dwOffset of the stack frame points to either:
1041-
//
1042-
// 1) The instruction that caused a hardware exception (div by zero, null ref, etc).
1043-
// 2) The instruction after the call to an internal runtime function (FCALL like IL_Throw, IL_Rethrow,
1044-
// JIT_OverFlow, etc.) that caused a software exception.
1045-
// 3) The instruction after the call to a managed function (non-leaf node).
1046-
//
1047-
// #2 and #3 are the cases that need to adjust dwOffset because they point after the call instruction
1048-
// and may point to the next (incorrect) IL instruction/source line. If STEF_IP_ADJUSTED is set,
1049-
// IP/dwOffset has already be decremented so don't decrement it again.
1050-
//
1051-
// When the dwOffset needs to be adjusted it is a lot simpler to decrement instead of trying to figure out
1052-
// the beginning of the instruction. It is enough for GetILOffsetFromNative to return the IL offset of the
1053-
// instruction throwing the exception.
1054-
bool fAdjustOffset = (this->flags & STEF_IP_ADJUSTED) == 0 && this->dwOffset > 0;
1055-
bRes = g_pDebugInterface->GetILOffsetFromNative(
1056-
pFunc,
1057-
(LPCBYTE)this->ip,
1058-
fAdjustOffset ? this->dwOffset - STACKWALK_CONTROLPC_ADJUST_OFFSET : this->dwOffset,
1059-
&this->dwILOffset);
1202+
// Check the cache!
1203+
uint32_t dwILOffsetFromCache;
1204+
if (CheckNativeToILCache((void*)this->ip, fAdjustOffset, &dwILOffsetFromCache))
1205+
{
1206+
this->dwILOffset = dwILOffsetFromCache;
1207+
bRes = true;
1208+
}
1209+
#ifdef DEBUGGING_SUPPORTED
1210+
else if (g_pDebugInterface)
1211+
{
1212+
// To get the source line number of the actual code that threw an exception, the dwOffset needs to be
1213+
// adjusted in certain cases when calculating the IL offset.
1214+
//
1215+
// The dwOffset of the stack frame points to either:
1216+
//
1217+
// 1) The instruction that caused a hardware exception (div by zero, null ref, etc).
1218+
// 2) The instruction after the call to an internal runtime function (FCALL like IL_Throw, IL_Rethrow,
1219+
// JIT_OverFlow, etc.) that caused a software exception.
1220+
// 3) The instruction after the call to a managed function (non-leaf node).
1221+
//
1222+
// #2 and #3 are the cases that need to adjust dwOffset because they point after the call instruction
1223+
// and may point to the next (incorrect) IL instruction/source line. If STEF_IP_ADJUSTED is set,
1224+
// IP/dwOffset has already be decremented so don't decrement it again.
1225+
//
1226+
// When the dwOffset needs to be adjusted it is a lot simpler to decrement instead of trying to figure out
1227+
// the beginning of the instruction. It is enough for GetILOffsetFromNative to return the IL offset of the
1228+
// instruction throwing the exception.
1229+
bRes = g_pDebugInterface->GetILOffsetFromNative(
1230+
pFunc,
1231+
(LPCBYTE)this->ip,
1232+
fAdjustOffset ? this->dwOffset - STACKWALK_CONTROLPC_ADJUST_OFFSET : this->dwOffset,
1233+
&this->dwILOffset);
1234+
1235+
if (bRes)
1236+
{
1237+
if (!pFunc->IsLCGMethod() && !pFunc->GetLoaderAllocator()->IsCollectible())
1238+
{
1239+
// Only insert into the cache if the value found will not change throughout the lifetime of the process.
1240+
InsertIntoNativeToILCache(
1241+
(void*)this->ip,
1242+
fAdjustOffset,
1243+
this->dwILOffset);
1244+
}
1245+
}
1246+
}
1247+
#endif
10601248
}
10611249

1062-
#endif // !DEBUGGING_SUPPORTED
1063-
10641250
// If there was no mapping information, then set to an invalid value
10651251
if (!bRes)
10661252
{

0 commit comments

Comments
 (0)