Add a cache for Native offset to IL Offset mapping in the stack walking logic (#117218)

davidwrighton · Copilot · web-flow · commit c12e8765e13f · 2025-07-03T09:51:53.000-07:00
- This cache reduces an issue where the locking around the `DebuggerJitInfo` is extremely inefficient to access in a multithreaded system, or if MANY different methods are on the stack
- Since we do not have extensive experience with this cache, its size is configurable
  - This allows us to discover scenarios in production where the cache is insufficiently big, and address problems as needed
  - The initial size is 1024 which is expected to be fairly ok, at a cost of 16KB of space on 64 systems.

Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;
diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h
@@ -234,6 +234,7 @@ RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_legacyCorruptedStateExceptionsPolicy, W("le
 CONFIG_DWORD_INFO(INTERNAL_SuppressLostExceptionTypeAssert, W("SuppressLostExceptionTypeAssert"), 0, "")
 RETAIL_CONFIG_DWORD_INFO(INTERNAL_UseEntryPointFilter, W("UseEntryPointFilter"), 0, "")
 RETAIL_CONFIG_DWORD_INFO(INTERNAL_Corhost_Swallow_Uncaught_Exceptions, W("Corhost_Swallow_Uncaught_Exceptions"), 0, "")
+RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_NativeToILOffsetCacheSize, W("NativeToILOffsetCacheSize"), 1024, "")
 CONFIG_DWORD_INFO(INTERNAL_LogStackOverflowExit, W("LogStackOverflowExit"), 0, "Temporary flag to log stack overflow exit process")
 
 ///
diff --git a/src/coreclr/vm/debugdebugger.cpp b/src/coreclr/vm/debugdebugger.cpp
@@ -1013,14 +1013,180 @@ void DebugStackTrace::Element::InitPass1(
 }
 
 #ifndef DACCESS_COMPILE
+// This is an implementation of a cache of the Native->IL offset mappings used by managed stack traces. It exists for the following reasons:
+// 1. When a large server experiences a large number of exceptions due to some other system failing, it can cause a tremendous number of stack traces to be generated, if customers are attempting to log.
+// 2. The native->IL offset mapping is somewhat expensive to compute, and it is not necessary to compute it repeatedly for the same IP.
+// 3. Often when these mappings are needed, the system is under stress, and throwing on MANY different threads with similar callstacks, so the cost of having locking around the cache may be significant.
+//
+// The cache is implemented as a simple hash table, where the key is the IP + fAdjustOffset
+// flag, and the value is the IL offset. We use a version number to indicate when the cache
+// is being updated, and to indicate that a found value is valid, and we use a simple linear
+// probing algorithm to find the entry in the cache.
+//
+// The replacement policy is randomized, and there are s_stackWalkCacheWalk(8) possible buckets to check before giving up.
+//
+// Since the cache entries are greater than a single pointer, we use a simple version locking scheme to protect readers.
+
+struct StackWalkNativeToILCacheEntry
+{
+    void* ip = NULL; // The IP of the native code
+    uint32_t ilOffset = 0; // The IL offset, with the adjust offset flag set if the native offset was adjusted by STACKWALK_CONTROLPC_ADJUST_OFFSET
+};
+
+static LONG s_stackWalkNativeToILCacheVersion = 0;
+static DWORD s_stackWalkCacheSize = 0; // This is the total size of the cache (We use a pointer+4 bytes for each entry, so on 64bit platforms 12KB of memory)
+const DWORD s_stackWalkCacheWalk = 8; // Walk up to this many entries in the cache before giving up
+const DWORD s_stackWalkCacheAdjustOffsetFlag = 0x80000000; // 2^31, put into the IL offset portion of the cache entry to check if the native offset was adjusted by STACKWALK_CONTROLPC_ADJUST_OFFSET
+static StackWalkNativeToILCacheEntry* s_stackWalkCache = NULL;
+
+bool CheckNativeToILCacheCore(void* ip, bool fAdjustOffset, uint32_t* pILOffset)
+{
+    // Check the cache for the IP
+    int hashCode = MixPointerIntoHash(ip);
+    StackWalkNativeToILCacheEntry* cacheTable = VolatileLoad(&s_stackWalkCache);
+    
+    if (cacheTable == NULL)
+    {
+        // Cache is not initialized
+        return false;
+    }
+    DWORD cacheSize = VolatileLoadWithoutBarrier(&s_stackWalkCacheSize);
+    int index = hashCode % cacheSize;
+
+    DWORD count = 0;
+    do
+    {
+        if (VolatileLoadWithoutBarrier(&cacheTable[index].ip) == ip)
+        {
+            // Cache hit
+            uint32_t dwILOffset = VolatileLoad(&cacheTable[index].ilOffset); // It is IMPORTANT that this load have a barrier after it, so that the version check in the containing funciton is safe.
+            if (fAdjustOffset != ((dwILOffset & s_stackWalkCacheAdjustOffsetFlag) == s_stackWalkCacheAdjustOffsetFlag))
+            {
+                continue; // The cache entry did not match on the adjust offset flag, so move to the next entry.
+            }
+
+            dwILOffset &= ~s_stackWalkCacheAdjustOffsetFlag; // Clear the adjust offset flag
+            *pILOffset = dwILOffset;
+            return true;
+        }
+    } while (index = (index + 1) % cacheSize, count++ < s_stackWalkCacheWalk);
+
+    return false; // Not found in cache
+}
+
+bool CheckNativeToILCache(void* ip, bool fAdjustOffset, uint32_t* pILOffset)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    LONG versionStart = VolatileLoad(&s_stackWalkNativeToILCacheVersion);
+
+    if ((versionStart & 1) == 1)
+    {
+        // Cache is being updated, so we cannot use it
+        return false;
+    }
+
+    if (CheckNativeToILCacheCore(ip, fAdjustOffset, pILOffset))
+    {
+        // When looking in the cache, the last load from the cache is a VolatileLoad, which allows a load here to check the version in the cache
+        LONG versionEnd = VolatileLoadWithoutBarrier(&s_stackWalkNativeToILCacheVersion);
+        if (versionEnd == versionStart)
+        {
+            // Cache was not updated while we were checking it, so we can use it
+            return true;
+        }
+    }
+
+    return false;
+}
+
+void InsertIntoNativeToILCache(void* ip, bool fAdjustOffset, uint32_t dwILOffset)
+{
+    CONTRACTL
+    {
+        THROWS;
+    }
+    CONTRACTL_END;
+
+    uint32_t dwILOffsetCheck;
+    if (CheckNativeToILCache(ip, fAdjustOffset, &dwILOffsetCheck))
+    {
+        // The entry already exists, so we don't need to insert it again
+        _ASSERTE(dwILOffsetCheck == dwILOffset);
+        return;
+    }
+
+    // Insert the IP and IL offset into the cache
+    
+    LONG versionStart = VolatileLoadWithoutBarrier(&s_stackWalkNativeToILCacheVersion);
+    if ((versionStart & 1) == 1)
+    {
+        // Cache is being updated by someone else, so we can't modify it
+        return;
+    }
+
+    if (versionStart != InterlockedCompareExchange(&s_stackWalkNativeToILCacheVersion, versionStart | 1, versionStart))
+    {
+        // Someone else updated the cache version while we were attempting to take the lock, so abort updating the cache
+        return;
+    }
+    // Now we have the lock, so we can safely update the cache
+
+    if (s_stackWalkCache == NULL)
+    {
+        // Initialize the cache if it is not already initialized
+        DWORD cacheSize = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_NativeToILOffsetCacheSize);
+        if (cacheSize < 1)
+        {
+            cacheSize = 1; // Ensure cache size is at least 1 to prevent division-by-zero
+        }
+        VolatileStore(&s_stackWalkCacheSize, cacheSize);
+        VolatileStore(&s_stackWalkCache, new(nothrow)StackWalkNativeToILCacheEntry[cacheSize]);
+
+        if (s_stackWalkCache == NULL)
+        {
+            // Failed to allocate memory for the cache, so we cannot insert into it
+            // Abort the cache update
+            VolatileStore(&s_stackWalkNativeToILCacheVersion, versionStart);
+            return;
+        }
+    }
+
+    // First check to see if the cache already has an entry
+    uint32_t dwILOffsetFound;
+    if (CheckNativeToILCacheCore(ip, fAdjustOffset, &dwILOffsetFound))
+    {
+        // The entry already exists, so we don't need to insert it again
+        _ASSERTE(dwILOffsetFound == dwILOffset);
+
+        // Store back the original version to indicate that the cache has not been updated, and is ready for use.
+        VolatileStore(&s_stackWalkNativeToILCacheVersion, versionStart);
+    }
+    else
+    {
+        // Insert the IP and IL offset into the cache
+
+        int hashCode = MixPointerIntoHash(ip);
+
+        // Insert the entry into a psuedo-random location in the set of cache entries. The idea is to attempt to be somewhat collision resistant
+        int index = (hashCode + (MixOneValueIntoHash(versionStart) % s_stackWalkCacheWalk)) % s_stackWalkCacheSize;
+
+        s_stackWalkCache[index].ip = ip;
+        s_stackWalkCache[index].ilOffset = dwILOffset | (fAdjustOffset ? s_stackWalkCacheAdjustOffsetFlag : 0);
+
+        // Increment the version to indicate that the cache has been updated, and is ready for use.
+        VolatileStore(&s_stackWalkNativeToILCacheVersion, versionStart + 2);
+    }
+}
+
 
 // Initialization done outside the TSL.
 // This may need to call locking operations that aren't safe under the TSL.
 void DebugStackTrace::Element::InitPass2()
 {
     CONTRACTL
     {
-        MODE_ANY;
+        MODE_COOPERATIVE;
         GC_TRIGGERS;
         THROWS;
     }
@@ -1030,37 +1196,57 @@ void DebugStackTrace::Element::InitPass2()
 
     bool bRes = false;
 
-#ifdef DEBUGGING_SUPPORTED
-    // Calculate the IL offset using the debugging services
-    if ((this->ip != (PCODE)NULL) && g_pDebugInterface)
+    bool fAdjustOffset = (this->flags & STEF_IP_ADJUSTED) == 0 && this->dwOffset > 0;
+    if (this->ip != (PCODE)NULL)
     {
-        // To get the source line number of the actual code that threw an exception, the dwOffset needs to be
-        // adjusted in certain cases when calculating the IL offset.
-        //
-        // The dwOffset of the stack frame points to either:
-        //
-        // 1) The instruction that caused a hardware exception (div by zero, null ref, etc).
-        // 2) The instruction after the call to an internal runtime function (FCALL like IL_Throw, IL_Rethrow,
-        //    JIT_OverFlow, etc.) that caused a software exception.
-        // 3) The instruction after the call to a managed function (non-leaf node).
-        //
-        // #2 and #3 are the cases that need to adjust dwOffset because they point after the call instruction
-        // and may point to the next (incorrect) IL instruction/source line. If STEF_IP_ADJUSTED is set,
-        // IP/dwOffset has already be decremented so don't decrement it again.
-        //
-        // When the dwOffset needs to be adjusted it is a lot simpler to decrement instead of trying to figure out
-        // the beginning of the instruction. It is enough for GetILOffsetFromNative to return the IL offset of the
-        // instruction throwing the exception.
-        bool fAdjustOffset = (this->flags & STEF_IP_ADJUSTED) == 0 && this->dwOffset > 0;
-        bRes = g_pDebugInterface->GetILOffsetFromNative(
-            pFunc,
-            (LPCBYTE)this->ip,
-            fAdjustOffset ? this->dwOffset - STACKWALK_CONTROLPC_ADJUST_OFFSET : this->dwOffset,
-            &this->dwILOffset);
+        // Check the cache!
+        uint32_t dwILOffsetFromCache;
+        if (CheckNativeToILCache((void*)this->ip, fAdjustOffset, &dwILOffsetFromCache))
+        {
+            this->dwILOffset = dwILOffsetFromCache;
+            bRes = true;
+        }
+#ifdef DEBUGGING_SUPPORTED
+        else if (g_pDebugInterface)
+        {
+            // To get the source line number of the actual code that threw an exception, the dwOffset needs to be
+            // adjusted in certain cases when calculating the IL offset.
+            //
+            // The dwOffset of the stack frame points to either:
+            //
+            // 1) The instruction that caused a hardware exception (div by zero, null ref, etc).
+            // 2) The instruction after the call to an internal runtime function (FCALL like IL_Throw, IL_Rethrow,
+            //    JIT_OverFlow, etc.) that caused a software exception.
+            // 3) The instruction after the call to a managed function (non-leaf node).
+            //
+            // #2 and #3 are the cases that need to adjust dwOffset because they point after the call instruction
+            // and may point to the next (incorrect) IL instruction/source line. If STEF_IP_ADJUSTED is set,
+            // IP/dwOffset has already be decremented so don't decrement it again.
+            //
+            // When the dwOffset needs to be adjusted it is a lot simpler to decrement instead of trying to figure out
+            // the beginning of the instruction. It is enough for GetILOffsetFromNative to return the IL offset of the
+            // instruction throwing the exception.
+            bRes = g_pDebugInterface->GetILOffsetFromNative(
+                pFunc,
+                (LPCBYTE)this->ip,
+                fAdjustOffset ? this->dwOffset - STACKWALK_CONTROLPC_ADJUST_OFFSET : this->dwOffset,
+                &this->dwILOffset);
+
+            if (bRes)
+            {
+                if (!pFunc->IsLCGMethod() && !pFunc->GetLoaderAllocator()->IsCollectible())
+                {
+                    // Only insert into the cache if the value found will not change throughout the lifetime of the process.
+                    InsertIntoNativeToILCache(
+                        (void*)this->ip,
+                        fAdjustOffset,
+                        this->dwILOffset);
+                }
+            }
+        }
+#endif
     }
 
-#endif // !DEBUGGING_SUPPORTED
-
     // If there was no mapping information, then set to an invalid value
     if (!bRes)
     {