Skip to content

Commit 9f23ddb

Browse files
authored
Fix an issue with sysconf returning the wrong last level cache values on Linux running on certain AMD Processors. (#109749)
* Fixed merge conflicts * Fix up build related issues
1 parent 4b94961 commit 9f23ddb

File tree

3 files changed

+94
-58
lines changed

3 files changed

+94
-58
lines changed

src/coreclr/gc/gcconfig.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,8 @@ class GCConfigStringHolder
142142
INT_CONFIG (GCSpinCountUnit, "GCSpinCountUnit", NULL, 0, "Specifies the spin count unit used by the GC.") \
143143
INT_CONFIG (GCDynamicAdaptationMode, "GCDynamicAdaptationMode", "System.GC.DynamicAdaptationMode", 1, "Enable the GC to dynamically adapt to application sizes.") \
144144
INT_CONFIG (GCDTargetTCP, "GCDTargetTCP", "System.GC.DTargetTCP", 0, "Specifies the target tcp for DATAS") \
145-
BOOL_CONFIG (GCLogBGCThreadId, "GCLogBGCThreadId", NULL, false, "Specifies if BGC ThreadId should be logged")
146-
145+
BOOL_CONFIG (GCLogBGCThreadId, "GCLogBGCThreadId", NULL, false, "Specifies if BGC ThreadId should be logged") \
146+
BOOL_CONFIG (GCCacheSizeFromSysConf, "GCCacheSizeFromSysConf", NULL, false, "Specifies using sysconf to retrieve the last level cache size for Unix.")
147147

148148
// This class is responsible for retreiving configuration information
149149
// for how the GC should operate.

src/coreclr/gc/unix/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
set(CMAKE_INCLUDE_CURRENT_DIR ON)
22
include_directories("../env")
3+
include_directories("..")
34

45
include(configure.cmake)
56

src/coreclr/gc/unix/gcenv.unix.cpp

Lines changed: 91 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
#include "gcenv.structs.h"
1919
#include "gcenv.base.h"
2020
#include "gcenv.os.h"
21+
#include "gcenv.ee.h"
2122
#include "gcenv.unix.inl"
2223
#include "volatile.h"
24+
#include "gcconfig.h"
2325
#include "numasupport.h"
2426

2527
#if HAVE_SWAPCTL
@@ -862,10 +864,10 @@ bool ReadMemoryValueFromFile(const char* filename, uint64_t* val)
862864
return result;
863865
}
864866

865-
static size_t GetLogicalProcessorCacheSizeFromOS()
867+
static void GetLogicalProcessorCacheSizeFromSysConf(size_t* cacheLevel, size_t* cacheSize)
866868
{
867-
size_t cacheLevel = 0;
868-
size_t cacheSize = 0;
869+
assert (cacheLevel != nullptr);
870+
assert (cacheSize != nullptr);
869871

870872
#if defined(_SC_LEVEL1_DCACHE_SIZE) || defined(_SC_LEVEL2_CACHE_SIZE) || defined(_SC_LEVEL3_CACHE_SIZE) || defined(_SC_LEVEL4_CACHE_SIZE)
871873
const int cacheLevelNames[] =
@@ -881,47 +883,105 @@ static size_t GetLogicalProcessorCacheSizeFromOS()
881883
long size = sysconf(cacheLevelNames[i]);
882884
if (size > 0)
883885
{
884-
cacheSize = (size_t)size;
885-
cacheLevel = i + 1;
886+
*cacheSize = (size_t)size;
887+
*cacheLevel = i + 1;
886888
break;
887889
}
888890
}
889891
#endif
892+
}
893+
894+
static void GetLogicalProcessorCacheSizeFromSysFs(size_t* cacheLevel, size_t* cacheSize)
895+
{
896+
assert (cacheLevel != nullptr);
897+
assert (cacheSize != nullptr);
890898

891899
#if defined(TARGET_LINUX) && !defined(HOST_ARM) && !defined(HOST_X86)
892-
if (cacheSize == 0)
900+
//
901+
// Retrieve cachesize via sysfs by reading the file /sys/devices/system/cpu/cpu0/cache/index{LastLevelCache}/size
902+
// for the platform. Currently musl and arm64 should be only cases to use
903+
// this method to determine cache size.
904+
//
905+
size_t level;
906+
char path_to_size_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/size";
907+
char path_to_level_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/level";
908+
int index = 40;
909+
assert(path_to_size_file[index] == '-');
910+
assert(path_to_level_file[index] == '-');
911+
912+
for (int i = 0; i < 5; i++)
893913
{
894-
//
895-
// Fallback to retrieve cachesize via /sys/.. if sysconf was not available
896-
// for the platform. Currently musl and arm64 should be only cases to use
897-
// this method to determine cache size.
898-
//
899-
size_t level;
900-
char path_to_size_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/size";
901-
char path_to_level_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/level";
902-
int index = 40;
903-
assert(path_to_size_file[index] == '-');
904-
assert(path_to_level_file[index] == '-');
905-
906-
for (int i = 0; i < 5; i++)
907-
{
908-
path_to_size_file[index] = (char)(48 + i);
914+
path_to_size_file[index] = (char)(48 + i);
909915

910-
uint64_t cache_size_from_sys_file = 0;
916+
uint64_t cache_size_from_sys_file = 0;
911917

912-
if (ReadMemoryValueFromFile(path_to_size_file, &cache_size_from_sys_file))
913-
{
914-
cacheSize = std::max(cacheSize, (size_t)cache_size_from_sys_file);
918+
if (ReadMemoryValueFromFile(path_to_size_file, &cache_size_from_sys_file))
919+
{
920+
*cacheSize = std::max(*cacheSize, (size_t)cache_size_from_sys_file);
915921

916-
path_to_level_file[index] = (char)(48 + i);
917-
if (ReadMemoryValueFromFile(path_to_level_file, &level))
918-
{
919-
cacheLevel = level;
920-
}
922+
path_to_level_file[index] = (char)(48 + i);
923+
if (ReadMemoryValueFromFile(path_to_level_file, &level))
924+
{
925+
*cacheLevel = level;
921926
}
922927
}
923928
}
929+
#endif
930+
}
931+
932+
static void GetLogicalProcessorCacheSizeFromHeuristic(size_t* cacheLevel, size_t* cacheSize)
933+
{
934+
assert (cacheLevel != nullptr);
935+
assert (cacheSize != nullptr);
936+
937+
#if (defined(TARGET_LINUX) && !defined(TARGET_APPLE))
938+
{
939+
// Use the following heuristics at best depending on the CPU count
940+
// 1 ~ 4 : 4 MB
941+
// 5 ~ 16 : 8 MB
942+
// 17 ~ 64 : 16 MB
943+
// 65+ : 32 MB
944+
DWORD logicalCPUs = g_processAffinitySet.Count();
945+
if (logicalCPUs < 5)
946+
{
947+
*cacheSize = 4;
948+
}
949+
else if (logicalCPUs < 17)
950+
{
951+
*cacheSize = 8;
952+
}
953+
else if (logicalCPUs < 65)
954+
{
955+
*cacheSize = 16;
956+
}
957+
else
958+
{
959+
*cacheSize = 32;
960+
}
961+
962+
*cacheSize *= (1024 * 1024);
963+
}
924964
#endif
965+
}
966+
967+
static size_t GetLogicalProcessorCacheSizeFromOS()
968+
{
969+
size_t cacheLevel = 0;
970+
size_t cacheSize = 0;
971+
972+
if (GCConfig::GetGCCacheSizeFromSysConf())
973+
{
974+
GetLogicalProcessorCacheSizeFromSysConf(&cacheLevel, &cacheSize);
975+
}
976+
977+
if (cacheSize == 0)
978+
{
979+
GetLogicalProcessorCacheSizeFromSysFs(&cacheLevel, &cacheSize);
980+
if (cacheSize == 0)
981+
{
982+
GetLogicalProcessorCacheSizeFromHeuristic(&cacheLevel, &cacheSize);
983+
}
984+
}
925985

926986
#if HAVE_SYSCTLBYNAME
927987
if (cacheSize == 0)
@@ -948,32 +1008,7 @@ static size_t GetLogicalProcessorCacheSizeFromOS()
9481008
#if (defined(HOST_ARM64) || defined(HOST_LOONGARCH64)) && !defined(TARGET_APPLE)
9491009
if (cacheLevel != 3)
9501010
{
951-
// We expect to get the L3 cache size for Arm64 but currently expected to be missing that info
952-
// from most of the machines.
953-
// Hence, just use the following heuristics at best depending on the CPU count
954-
// 1 ~ 4 : 4 MB
955-
// 5 ~ 16 : 8 MB
956-
// 17 ~ 64 : 16 MB
957-
// 65+ : 32 MB
958-
DWORD logicalCPUs = g_processAffinitySet.Count();
959-
if (logicalCPUs < 5)
960-
{
961-
cacheSize = 4;
962-
}
963-
else if (logicalCPUs < 17)
964-
{
965-
cacheSize = 8;
966-
}
967-
else if (logicalCPUs < 65)
968-
{
969-
cacheSize = 16;
970-
}
971-
else
972-
{
973-
cacheSize = 32;
974-
}
975-
976-
cacheSize *= (1024 * 1024);
1011+
GetLogicalProcessorCacheSizeFromHeuristic(&cacheLevel, &cacheSize);
9771012
}
9781013
#endif
9791014

0 commit comments

Comments
 (0)