Skip to content

Enable TLS on linux/arm64 only for static resolver #106052

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions src/coreclr/hosts/corerun/corerun.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ static void display_usage()
W(" -p, --property - Property to pass to runtime during initialization.\n")
W(" If a property value contains spaces, quote the entire argument.\n")
W(" May be supplied multiple times. Format: <key>=<value>.\n")
W(" -l, --preload - path to shared library to load before loading the CLR.\n")
W(" -d, --debug - causes corerun to wait for a debugger to attach before executing.\n")
W(" -e, --env - path to a .env file with environment variables that corerun should set.\n")
W(" -?, -h, --help - show this help.\n")
Expand Down Expand Up @@ -569,6 +570,22 @@ static bool parse_args(
config.user_defined_keys.push_back(std::move(key));
config.user_defined_values.push_back(std::move(value));
}
else if (pal::strcmp(option, W("l")) == 0 || (pal::strcmp(option, W("preload")) == 0))
{
i++;
if (i >= argc)
{
pal::fprintf(stderr, W("Option %s: missing shared library path\n"), arg);
break;
}

string_t library = argv[i];
pal::mod_t hMod;
if (!pal::try_load_library(library, hMod))
{
break;
}
}
else if (pal::strcmp(option, W("d")) == 0 || (pal::strcmp(option, W("debug")) == 0))
{
config.wait_to_debug = true;
Expand Down
23 changes: 23 additions & 0 deletions src/coreclr/hosts/corerun/corerun.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,17 @@ namespace pal
return hMod != nullptr;
}

inline bool try_load_library(const pal::string_t& path, pal::mod_t& hMod)
{
hMod = (pal::mod_t)::LoadLibraryExW(path.c_str(), nullptr, 0);
if (hMod == nullptr)
{
pal::fprintf(stderr, W("Failed to load: '%s'. Error: 0x%08x\n"), path.c_str(), ::GetLastError());
return false;
}
return true;
}

inline bool try_load_coreclr(const pal::string_t& core_root, pal::mod_t& hMod)
{
pal::string_t coreclr_path = core_root;
Expand Down Expand Up @@ -600,6 +611,18 @@ namespace pal
return hMod != nullptr;
}

inline bool try_load_library(const pal::string_t& path, pal::mod_t& hMod)
{
hMod = (pal::mod_t)dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
if (hMod == nullptr)
{
pal::fprintf(stderr, W("Failed to load: '%s'. Error: %s\n"), path.c_str(), dlerror());
return false;
}
return true;
}


inline bool try_load_coreclr(const pal::string_t& core_root, pal::mod_t& hMod)
{
pal::string_t coreclr_path = core_root;
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,8 @@ CONFIG_STRING_INFO(INTERNAL_TailCallMax, W("TailCallMax"), "")
RETAIL_CONFIG_STRING_INFO(EXTERNAL_TailCallOpt, W("TailCallOpt"), "")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TailCallLoopOpt, W("TailCallLoopOpt"), 1, "Convert recursive tail calls to loops")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_Jit_NetFx40PInvokeStackResilience, W("NetFx40_PInvokeStackResilience"), (DWORD)-1, "Makes P/Invoke resilient against mismatched signature and calling convention (significant perf penalty).")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_DisableOptimizedThreadStaticAccess, W("DisableOptimizedThreadStaticAccess"), (DWORD)0, "Disable the OptimizedThreadStaticAccess feature.")
CONFIG_DWORD_INFO(EXTERNAL_AssertNotStaticTlsResolver, W("AssertNotStaticTlsResolver"), (DWORD)0, "Assert if we attempt to use the static tls resolver path.")

// AltJitAssertOnNYI should be 0 on targets where JIT is under development or bring up stage, so as to facilitate fallback to main JIT on hitting a NYI.
#if defined(TARGET_X86)
Expand Down
14 changes: 14 additions & 0 deletions src/coreclr/vm/arm64/asmhelpers.S
Original file line number Diff line number Diff line change
Expand Up @@ -794,4 +794,18 @@ LEAF_ENTRY GetThreadStaticsVariableOffset, _TEXT
EPILOG_RETURN
LEAF_END GetThreadStaticsVariableOffset, _TEXT
// ------------------------------------------------------------------

// ------------------------------------------------------------------
// size_t GetTLSResolverAddress()

// Helper to get the TLS resolver address. This will be then used to determine if we have a static or dynamic resolver.
LEAF_ENTRY GetTLSResolverAddress, _TEXT
PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32
adrp x0, :tlsdesc:t_ThreadStatics
ldr x1, [x0, #:tlsdesc_lo12:t_ThreadStatics]
mov x0, x1
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32
EPILOG_RETURN
LEAF_END GetTLSResolverAddress, _TEXT
// ------------------------------------------------------------------
#endif // !TARGET_OSX
3 changes: 3 additions & 0 deletions src/coreclr/vm/eeconfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ HRESULT EEConfig::Init()
fJitFramed = false;
fJitMinOpts = false;
fJitEnableOptionalRelocs = false;
fDisableOptimizedThreadStaticAccess = false;
fPInvokeRestoreEsp = (DWORD)-1;

fStressLog = false;
Expand Down Expand Up @@ -503,6 +504,8 @@ HRESULT EEConfig::sync()
iJitOptimizeType = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_JitOptimizeType);
if (iJitOptimizeType > OPT_RANDOM) iJitOptimizeType = OPT_DEFAULT;

fDisableOptimizedThreadStaticAccess = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_DisableOptimizedThreadStaticAccess) != 0;

#ifdef TARGET_X86
fPInvokeRestoreEsp = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_Jit_NetFx40PInvokeStackResilience);
#endif
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/vm/eeconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ class EEConfig
bool JitFramed(void) const {LIMITED_METHOD_CONTRACT; return fJitFramed; }
bool JitMinOpts(void) const {LIMITED_METHOD_CONTRACT; return fJitMinOpts; }
bool JitEnableOptionalRelocs(void) const {LIMITED_METHOD_CONTRACT; return fJitEnableOptionalRelocs; }
bool DisableOptimizedThreadStaticAccess(void) const {LIMITED_METHOD_CONTRACT; return fDisableOptimizedThreadStaticAccess; }

// Tiered Compilation config
#if defined(FEATURE_TIERED_COMPILATION)
Expand Down Expand Up @@ -459,6 +460,7 @@ class EEConfig
bool fJitFramed; // Enable/Disable EBP based frames
bool fJitMinOpts; // Enable MinOpts for all jitted methods
bool fJitEnableOptionalRelocs; // Allow optional relocs
bool fDisableOptimizedThreadStaticAccess; // Disable OptimizedThreadStatic access

unsigned iJitOptimizeType; // 0=Blended,1=SmallCode,2=FastCode, default is 0=Blended

Expand Down
40 changes: 40 additions & 0 deletions src/coreclr/vm/threadstatics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,10 @@ void FreeTLSIndicesForLoaderAllocator(LoaderAllocator *pLoaderAllocator)

static void* GetTlsIndexObjectAddress();

#if !defined(TARGET_OSX) && defined(TARGET_UNIX) && defined(TARGET_ARM64)
extern "C" size_t GetTLSResolverAddress();
#endif // !TARGET_OSX && TARGET_UNIX && TARGET_ARM64

bool CanJITOptimizeTLSAccess()
{
LIMITED_METHOD_CONTRACT;
Expand All @@ -799,6 +803,36 @@ bool CanJITOptimizeTLSAccess()
// Optimization is disabled for FreeBSD/arm64
#elif defined(FEATURE_INTERPRETER)
// Optimization is disabled when interpreter may be used
#elif !defined(TARGET_OSX) && defined(TARGET_UNIX) && defined(TARGET_ARM64)
// Optimization is enabled for linux/arm64 only for static resolver.
// For static resolver, the TP offset is same for all threads.
// For dynamic resolver, TP offset returned is for the current thread and
// will be different for the other threads.
uint32_t* resolverAddress = reinterpret_cast<uint32_t*>(GetTLSResolverAddress());
int ip = 0;
if ((resolverAddress[ip] == 0xd503201f) || (resolverAddress[ip] == 0xd503241f))
{
// nop might not be present in older resolver, so skip it.

// nop or hint 32
ip++;
}

if (
// ldr x0, [x0, #8]
(resolverAddress[ip] == 0xf9400400) &&
// ret
(resolverAddress[ip + 1] == 0xd65f03c0)
)
{
optimizeThreadStaticAccess = true;
#ifdef _DEBUG
if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_AssertNotStaticTlsResolver) != 0)
{
_ASSERTE(!"Detected static resolver in use when not expected");
}
#endif
}
#else
optimizeThreadStaticAccess = true;
#if !defined(TARGET_OSX) && defined(TARGET_UNIX) && defined(TARGET_AMD64)
Expand All @@ -808,6 +842,12 @@ bool CanJITOptimizeTLSAccess()
optimizeThreadStaticAccess = GetTlsIndexObjectAddress() != nullptr;
#endif // !TARGET_OSX && TARGET_UNIX && TARGET_AMD64
#endif

if (g_pConfig->DisableOptimizedThreadStaticAccess())
{
optimizeThreadStaticAccess = false;
}

return optimizeThreadStaticAccess;
}

Expand Down
8 changes: 8 additions & 0 deletions src/tests/JIT/Directed/tls/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Licensed to the .NET Foundation under one or more agreements.
# The .NET Foundation licenses this file to you under the MIT license.

include_directories(${INC_PLATFORM_DIR})

add_library(usetls SHARED testtls.cpp)

install (TARGETS usetls DESTINATION bin)
130 changes: 130 additions & 0 deletions src/tests/JIT/Directed/tls/TestTLSWithLoadedDlls.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.


// This test is verifying that the runtime properly handles the cases where the TLS infra in the runtime is forced
// to use a dynamic resolver. This is done by means of a private config variable to validate the behavior on Linux Arm64
// and a set of multithreaded tasks, that has been known to cause the runtime to crash when this is handled incorrectly.

using System;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Runtime.InteropServices;
using System.Runtime.Loader;
using System.Text;
using System.Threading;
using System.Threading.Tasks;

namespace TestTLSWithLoadedDlls
{
static class TLSWithLoadedDlls
{
private const int CountOfLibTlsToLoad = 40;

static async Task DoLotsOfAsyncWork(int loopCount)
{
for (int i = 0; i < loopCount; i++)
{
Console.WriteLine("Starting a new batch of tasks...");
var tasks = Enumerable.Range(1, 100).Select(i => Task.Run(async () =>
{
await Task.Delay(1);
})).ToArray();

await Task.WhenAll(tasks);

Console.WriteLine("Batch of tasks completed. Main loop sleeping for 20 ms...");
await Task.Delay(20);
}
}

static int Main(string[] args)
{
if ((args.Length == 1) && (args[0] == "RunLotsOfTasks"))
{
DoLotsOfAsyncWork(100).GetAwaiter().GetResult();
return 100;
}

int CountOfLibTlsToLoad = 60;

if (OperatingSystem.IsWindows()) // Windows does not have a really long command line length limit, and doesn't have a problem with many TLS using images used
CountOfLibTlsToLoad = 10;

StringBuilder arguments = new();

(string prefix, string suffix) = GetSharedLibraryPrefixSuffix();

string UseTlsFileName = GetSharedLibraryFileNameForCurrentPlatform("usetls");
string testDirectory = Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location);
string UseTlsFilePath = Path.Combine(testDirectory, UseTlsFileName);

for (int i = 0; i < CountOfLibTlsToLoad; i++)
{
string tlsNumberSpecificPath = Path.Combine(testDirectory, i.ToString());
string finalUseTlsPath = Path.Combine(tlsNumberSpecificPath, prefix + "usetls" + suffix);

Directory.CreateDirectory(tlsNumberSpecificPath);
if (!File.Exists(finalUseTlsPath))
{
File.Copy(
UseTlsFilePath,
finalUseTlsPath);
}

arguments.Append(" -l ");
arguments.Append(finalUseTlsPath);
}

arguments.Append(' ');
arguments.Append(System.Reflection.Assembly.GetExecutingAssembly().Location);
arguments.Append(" RunLotsOfTasks");

Process process = new Process();
process.StartInfo.FileName = GetCorerunPath();
process.StartInfo.Arguments = arguments.ToString();
process.StartInfo.UseShellExecute = false;
process.StartInfo.EnvironmentVariables["DOTNET_AssertNotStaticTlsResolver"] = "1";

Console.WriteLine($"Launching {process.StartInfo.FileName} {process.StartInfo.Arguments}");

process.Start();
process.WaitForExit();
return process.ExitCode;
}

private static string GetCorerunPath()
{
string corerunName;
if (OperatingSystem.IsWindows())
{
corerunName = "CoreRun.exe";
}
else
{
corerunName = "corerun";
}

return Path.Combine(Environment.GetEnvironmentVariable("CORE_ROOT"), corerunName);
}

public static (string, string) GetSharedLibraryPrefixSuffix()
{
if (OperatingSystem.IsWindows())
return (string.Empty, ".dll");

if (OperatingSystem.IsMacOS())
return ("lib", ".dylib");

return ("lib", ".so");
}

public static string GetSharedLibraryFileNameForCurrentPlatform(string libraryName)
{
(string prefix, string suffix) = GetSharedLibraryPrefixSuffix();
return prefix + libraryName + suffix;
}
}
}
16 changes: 16 additions & 0 deletions src/tests/JIT/Directed/tls/TestTLSWithLoadedDlls.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<CLRTestPriority>0</CLRTestPriority>
<RequiresProcessIsolation>true</RequiresProcessIsolation>
<ReferenceXUnitWrapperGenerator>false</ReferenceXUnitWrapperGenerator>
<NativeAotIncompatible>true</NativeAotIncompatible>
</PropertyGroup>
<PropertyGroup>
<DebugType>PdbOnly</DebugType>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
<CMakeProjectReference Include="CMakeLists.txt" />
</ItemGroup>
</Project>
46 changes: 46 additions & 0 deletions src/tests/JIT/Directed/tls/testtls.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

#ifdef _MSC_VER
#define DLLEXPORT __declspec(dllexport)
#else
#define DLLEXPORT __attribute__((visibility("default")))
#endif // _MSC_VER

thread_local int tls0;
thread_local int tls1;
thread_local int tls2;
thread_local int tls3;
thread_local int tls4;
thread_local int tls5;
thread_local int tls6;
thread_local int tls7;
thread_local int tls8;
thread_local int tls9;
thread_local int tls10;
thread_local int tls11;
thread_local int tls12;
thread_local int tls13;
thread_local int tls14;
thread_local int tls15;
thread_local int tls16;

extern "C" DLLEXPORT void initializeTLS() {
tls0=0;
tls1=0;
tls2=0;
tls3=0;
tls4=0;
tls5=0;
tls6=0;
tls7=0;
tls8=0;
tls9=0;
tls10=0;
tls11=0;
tls12=0;
tls13=0;
tls14=0;
tls15=0;
tls16=0;
}
Loading