Skip to content

Commit

Permalink
Configurable TLS support for HPC-GAP.
Browse files Browse the repository at this point in the history
  • Loading branch information
rbehrends authored and fingolfin committed Nov 14, 2019
1 parent 79118b8 commit 3111e67
Show file tree
Hide file tree
Showing 9 changed files with 381 additions and 29 deletions.
131 changes: 131 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ AX_GCC_FUNC_ATTRIBUTE([always_inline])
AX_GCC_FUNC_ATTRIBUTE([format])
AX_GCC_FUNC_ATTRIBUTE([noinline])
AX_GCC_FUNC_ATTRIBUTE([noreturn])
AX_GCC_FUNC_ATTRIBUTE([constructor])
AX_GCC_FUNC_ATTRIBUTE([pure])

dnl compiler builtins
AC_DEFUN([CHECK_COMPILER_BUILTIN],
Expand Down Expand Up @@ -202,6 +204,135 @@ AS_CASE([$with_gc],
)
AC_MSG_RESULT([$with_gc])

dnl
dnl User setting: native thread-local storage (off by default)
dnl See src/hpc/tls.h for more details on thread-local storage options.
dnl

AC_ARG_ENABLE([native-tls],
[AS_HELP_STRING([--enable-native-tls],
[use native thread-local storage implementation])],
[enable_native_tls=$enableval],
[enable_native_tls=no])
AC_MSG_CHECKING([whether to use native tls])
dnl
dnl Distinguish between cases where we support __thread declarations
dnl and situations where we use a pthread_getspecific() implementation.
dnl Right now, we only do the latter for 64-bit macOS. See src/hpc/tls.h
dnl for details.
dnl
enable_macos_tls_asm=default
AS_IF([[test "x$enable_native_tls" == "xyes"]], [
case "$host" in
x86_64-apple-darwin*)
AC_DEFINE([USE_PTHREAD_TLS], [1], [define as 1 if using pthread_getspecific])
dnl
dnl Test if we can optimize pthread_getspecific() calls via
dnl inline assembly on macOS.
dnl
AC_RUN_IFELSE(
[AC_LANG_SOURCE([[
// The following code also occurs in src/hpc/thread.c and both need to be
// kept in sync.
#include <pthread.h>
#include <string.h>
#define OFFS 0x100
#define END (-1)
int cmpOpCode(unsigned char *code, int *with) {
int result = 0;
while (*with >= 0) {
if (*with == OFFS) {
result = *code;
} else {
if (*code != *with)
return -1;
}
code++;
with++;
}
return result;
}
int main() {
// This is an idea borrowed from Mono. We test if the implementation
// of pthread_getspecific() uses the assembly code below. If that is
// true, we can replace calls to pthread_getspecific() with the
// matching inline assembly, allowing a significant performance boost.
#if defined(__APPLE__) && defined(__x86_64__)
// There are two possible implementations.
static int asm_code[] = {
// movq %gs:[OFFS](,%rdi,8), %rax
// retq
0x65, 0x48, 0x8b, 0x04, 0xfd, OFFS, 0x00, 0x00, 0x00, 0xc3, END
};
static int asm_code2[] = {
// pushq %rbp
// movq %rsp, %rbp
// movq %gs:[OFFS](,%rdi,8),%rax
// popq %rbp
// retq
0x55, 0x48, 0x89, 0xe5, 0x65, 0x48, 0x8b, 0x04, 0xfd, OFFS,
0x00, 0x00, 0x00, 0x5d, 0xc3, END
};
if (cmpOpCode((unsigned char *)pthread_getspecific, asm_code) >= 0) {
return 0;
}
if (cmpOpCode((unsigned char *)pthread_getspecific, asm_code2) >= 0) {
return 0;
}
return 1;
#else
#error FAIL
#endif
}
]])],
dnl => Test succeeded
[AC_DEFINE([USE_MACOS_PTHREAD_TLS_ASM], [1],
[define as 1 if macOS assembly implementation supported])
AC_MSG_RESULT([[yes (macOS assembly)]])
enable_macos_tls_asm=yes
],
dnl => Test failed
[AC_MSG_RESULT([[yes (pthread-based)]])
enable_macos_tls_asm=no
],
dnl => Cross-compilation, test impossible
[AC_MSG_RESULT([[yes (pthread-based)]])
enable_macos_tls_asm=no
])
;;
*)
dnl => Any other OS, --with-native-tls specified
AC_DEFINE([USE_NATIVE_TLS], [1], [define as 1 if using native TLS])
AC_MSG_RESULT([yes])
;;
esac
],
[
dnl => --without-native-tls
AC_MSG_RESULT([no])
]
)
dnl
dnl Test if pthread_getspecific() can be overridden as
dnl __attribute__((pure)).
dnl
AC_MSG_CHECKING([[whether pthread_getspecific() can be made pure]])
AC_COMPILE_IFELSE(
[AC_LANG_SOURCE([[
#include <pthread.h>
__attribute__((pure))
void * pthread_getspecific(pthread_key_t key);
]])], [
AC_MSG_RESULT(yes)
AC_DEFINE([ALLOW_PURE_PTHREAD_GETSPECIFIC], [1],
[define as 1 if pthread_getspecific() can be declared pure])
], [
AC_MSG_RESULT(no)
])

dnl
dnl User setting: Debug mode (off by default)
dnl
Expand Down
1 change: 0 additions & 1 deletion src/hpc/guards.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ static ALWAYS_INLINE Bag ImpliedReadGuard(Bag bag)
return bag;
}


static ALWAYS_INLINE int CheckReadAccess(Bag bag)
{
Region *region;
Expand Down
119 changes: 106 additions & 13 deletions src/hpc/thread.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,95 @@ void UnlockThreadControl(void)
#define MAP_ANONYMOUS MAP_ANON
#endif

#ifndef HAVE_NATIVE_TLS
#ifndef USE_NATIVE_TLS

#ifdef USE_PTHREAD_TLS
static int InitTLSKey;
static pthread_key_t TLSKey;
#ifdef USE_MACOS_PTHREAD_TLS_ASM
static UInt TLSOffset;

// The following code also occurs in configure.ac, and both need to be
// kept in sync.

#define OFFS 0x100
#define END (-1)

int cmpOpCode(unsigned char *code, int *with) {
int result = 0;
while (*with >= 0) {
if (*with == OFFS) {
result = *code;
} else {
if (*code != *with)
return -1;
}
code++;
with++;
}
return result;
}

void FindTLSOffset() {
// This is an idea borrowed from Mono. We test if the implementation
// of pthread_getspecific() uses the assembly code below. If that is
// true, we can replace calls to pthread_getspecific() with the
// matching inline assembly, allowing a significant performance boost.
// There are two possible implementations.
static int asm_code[] = {
// movq %gs:[OFFS](,%rdi,8), %rax
// retq
0x65, 0x48, 0x8b, 0x04, 0xfd, OFFS, 0x00, 0x00, 0x00, 0xc3, END
};
static int asm_code2[] = {
// pushq %rbp
// movq %rsp, %rbp
// movq %gs:[OFFS](,%rdi,8),%rax
// popq %rbp
// retq
0x55, 0x48, 0x89, 0xe5, 0x65, 0x48, 0x8b, 0x04, 0xfd, OFFS,
0x00, 0x00, 0x00, 0x5d, 0xc3, END
};
TLSOffset = cmpOpCode((unsigned char *)pthread_getspecific, asm_code);
if (TLSOffset >= 0)
return;
TLSOffset = cmpOpCode((unsigned char *)pthread_getspecific, asm_code2);
if (TLSOffset >= 0)
return;
Panic("Unable to find macOS thread-local storage offset");
}
#endif

static void CreateTLSKey(void)
{
pthread_key_create(&TLSKey, NULL);
#ifdef USE_MACOS_PTHREAD_TLS_ASM
FindTLSOffset();
#endif
InitTLSKey = 1;
}

#ifdef USE_MACOS_PTHREAD_TLS_ASM
UInt GetTLSOffset(void)
{
if (!InitTLSKey) {
CreateTLSKey();
}
return (UInt)TLSKey * sizeof(void *) + TLSOffset;
}
#endif
pthread_key_t GetTLSKey(void)
{
if (!InitTLSKey) {
CreateTLSKey();
}
return TLSKey;
}
#endif /* USE_PTHREAD_TLS */

void * AllocateTLS(void)
{
#ifndef USE_PTHREAD_TLS
void * addr;
void * result;
size_t pagesize = getpagesize();
Expand All @@ -126,6 +211,14 @@ void * AllocateTLS(void)
mprotect((char *)result + tlssize, pagesize, PROT_NONE);
#endif
return result;
#else
void * result = pthread_getspecific(GetTLSKey());
if (!result) {
result = malloc(sizeof(GAPState));
pthread_setspecific(GetTLSKey(), result);
}
return result;
#endif /* USE_PTHREAD_TLS */
}

void FreeTLS(void * address)
Expand All @@ -137,7 +230,7 @@ void FreeTLS(void * address)
#endif
}

#endif /* HAVE_NATIVE_TLS */
#endif /* USE_NATIVE_TLS */

#ifndef DISABLE_GC
void AddGCRoots(void)
Expand All @@ -157,7 +250,7 @@ static void RemoveGCRoots(void)
}
#endif /* DISABLE_GC */

#ifndef HAVE_NATIVE_TLS
#if !defined(USE_NATIVE_TLS) && !defined(USE_PTHREAD_TLS)

/* In order to safely use thread-local memory on the main stack, we have
* to work around an idiosyncracy in some virtual memory systems. These
Expand Down Expand Up @@ -191,7 +284,7 @@ static NOINLINE void GrowStack(void)

static NOINLINE void SetupTLS(void)
{
#ifndef HAVE_NATIVE_TLS
#if !defined(USE_NATIVE_TLS) && !defined(USE_PTHREAD_TLS)
GrowStack();
#endif
InitializeTLS();
Expand All @@ -212,7 +305,7 @@ void RunThreadedMain(int (*mainFunction)(int, char **),
int argc,
char ** argv)
{
#ifndef HAVE_NATIVE_TLS
#ifndef USE_NATIVE_TLS
#ifdef STACK_GROWS_UP
#error Upward growing stack not yet supported
#else
Expand Down Expand Up @@ -355,9 +448,8 @@ static void * DispatchThread(void * arg)
Obj RunThread(void (*start)(void *), void * arg)
{
ThreadData * result;
#ifndef HAVE_NATIVE_TLS
#ifndef USE_NATIVE_TLS
void * tls;
size_t pagesize = getpagesize();
#endif
pthread_attr_t thread_attr;
LockThreadControl(1);
Expand All @@ -370,7 +462,7 @@ Obj RunThread(void (*start)(void *), void * arg)
}
result = thread_free_list;
thread_free_list = thread_free_list->next;
#ifndef HAVE_NATIVE_TLS
#ifndef USE_NATIVE_TLS
if (!result->tls)
result->tls = AllocateTLS();
tls = result->tls;
Expand All @@ -395,7 +487,8 @@ Obj RunThread(void (*start)(void *), void * arg)
result->thread_object = NewThreadObject(result - thread_data);
/* set up the thread attribute to support a custom stack in our TLS */
pthread_attr_init(&thread_attr);
#ifndef HAVE_NATIVE_TLS
#if !defined(USE_NATIVE_TLS) && !defined(USE_PTHREAD_TLS)
size_t pagesize = getpagesize();
pthread_attr_setstack(&thread_attr, (char *)tls + pagesize * 2,
TLS_SIZE - pagesize * 2);
#endif
Expand All @@ -411,7 +504,7 @@ Obj RunThread(void (*start)(void *), void * arg)
thread_free_list = result;
UnlockThreadControl();
pthread_attr_destroy(&thread_attr);
#ifndef HAVE_NATIVE_TLS
#ifndef USE_NATIVE_TLS
FreeTLS(tls);
#endif
return (Obj)0;
Expand All @@ -424,15 +517,15 @@ int JoinThread(int id)
{
pthread_t pthread_id;
void (*start)(void *);
#ifndef HAVE_NATIVE_TLS
#ifndef USE_NATIVE_TLS
void * tls;
#endif
if (id < 0 || id >= MAX_THREADS)
return 0;
LockThreadControl(1);
pthread_id = thread_data[id].pthread_id;
start = thread_data[id].start;
#ifndef HAVE_NATIVE_TLS
#ifndef USE_NATIVE_TLS
tls = thread_data[id].tls;
#endif
if (thread_data[id].joined || start == NULL) {
Expand All @@ -451,7 +544,7 @@ int JoinThread(int id)
*/
thread_data[id].start = NULL;
UnlockThreadControl();
#ifndef HAVE_NATIVE_TLS
#ifndef USE_NATIVE_TLS
FreeTLS(tls);
#endif
return 1;
Expand Down
2 changes: 1 addition & 1 deletion src/hpc/thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

extern int PreThreadCreation;

#ifndef HAVE_NATIVE_TLS
#ifndef USE_NATIVE_TLS
void *AllocateTLS(void);
void FreeTLS(void *address);
#endif
Expand Down
Loading

0 comments on commit 3111e67

Please sign in to comment.