Skip to content

Commit 557fcb7

Browse files
committed
fix jl_active_task_stack to return correct values
Seems potentially slightly costly to need to generate these extra writes for every safepoint transition to keep track of the current stack location, but that is potentially an unavoidable cost to doing conservative stack scanning for gc.
1 parent efc43cb commit 557fcb7

13 files changed

+176
-188
lines changed

src/gc-stock.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3335,6 +3335,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
33353335
}
33363336
jl_gc_debug_print();
33373337

3338+
ct->ctx.activefp = (char*)jl_get_frame_addr();
33383339
int8_t old_state = jl_atomic_load_relaxed(&ptls->gc_state);
33393340
jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING);
33403341
// `jl_safepoint_start_gc()` makes sure only one thread can run the GC.

src/jl_exported_funcs.inc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,6 @@
450450
XX(jl_tagged_gensym) \
451451
XX(jl_take_buffer) \
452452
XX(jl_task_get_next) \
453-
XX(jl_task_stack_buffer) \
454453
XX(jl_termios_size) \
455454
XX(jl_test_cpu_feature) \
456455
XX(jl_threadid) \

src/julia.h

Lines changed: 8 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -73,21 +73,20 @@ typedef struct _jl_tls_states_t *jl_ptls_t;
7373
#endif
7474
#include "gc-interface.h"
7575
#include "julia_atomics.h"
76-
#include "julia_threads.h"
7776
#include "julia_assert.h"
7877

78+
// the common fields are hidden before the pointer, but the following macro is
79+
// used to indicate which types below are subtypes of jl_value_t
80+
#define JL_DATA_TYPE
81+
typedef struct _jl_value_t jl_value_t;
82+
#include "julia_threads.h"
83+
7984
#ifdef __cplusplus
8085
extern "C" {
8186
#endif
8287

8388
// core data types ------------------------------------------------------------
8489

85-
// the common fields are hidden before the pointer, but the following macro is
86-
// used to indicate which types below are subtypes of jl_value_t
87-
#define JL_DATA_TYPE
88-
89-
typedef struct _jl_value_t jl_value_t;
90-
9190
struct _jl_taggedvalue_bits {
9291
uintptr_t gc:2;
9392
uintptr_t in_image:1;
@@ -486,9 +485,6 @@ typedef struct _jl_code_instance_t {
486485
} specptr; // private data for `jlcall entry point
487486
} jl_code_instance_t;
488487

489-
// all values are callable as Functions
490-
typedef jl_value_t jl_function_t;
491-
492488
typedef struct {
493489
JL_DATA_TYPE
494490
jl_sym_t *name;
@@ -2255,12 +2251,8 @@ JL_DLLEXPORT void jl_sigatomic_end(void);
22552251

22562252
// tasks and exceptions -------------------------------------------------------
22572253

2258-
typedef struct _jl_timing_block_t jl_timing_block_t;
2259-
typedef struct _jl_timing_event_t jl_timing_event_t;
2260-
typedef struct _jl_excstack_t jl_excstack_t;
2261-
22622254
// info describing an exception handler
2263-
typedef struct _jl_handler_t {
2255+
struct _jl_handler_t {
22642256
jl_jmp_buf eh_ctx;
22652257
jl_gcframe_t *gcstack;
22662258
struct _jl_handler_t *prev;
@@ -2269,62 +2261,7 @@ typedef struct _jl_handler_t {
22692261
sig_atomic_t defer_signal;
22702262
jl_timing_block_t *timing_stack;
22712263
size_t world_age;
2272-
} jl_handler_t;
2273-
2274-
#define JL_RNG_SIZE 5 // xoshiro 4 + splitmix 1
2275-
2276-
typedef struct _jl_task_t {
2277-
JL_DATA_TYPE
2278-
jl_value_t *next; // invasive linked list for scheduler
2279-
jl_value_t *queue; // invasive linked list for scheduler
2280-
jl_value_t *tls;
2281-
jl_value_t *donenotify;
2282-
jl_value_t *result;
2283-
jl_value_t *scope;
2284-
jl_function_t *start;
2285-
// 4 byte padding on 32-bit systems
2286-
// uint32_t padding0;
2287-
uint64_t rngState[JL_RNG_SIZE];
2288-
_Atomic(uint8_t) _state;
2289-
uint8_t sticky; // record whether this Task can be migrated to a new thread
2290-
_Atomic(uint8_t) _isexception; // set if `result` is an exception to throw or that we exited with
2291-
// 1 byte padding
2292-
// uint8_t padding1;
2293-
// multiqueue priority
2294-
uint16_t priority;
2295-
2296-
// hidden state:
2297-
// cached floating point environment
2298-
// only updated at task switch
2299-
fenv_t fenv;
2300-
2301-
// id of owning thread - does not need to be defined until the task runs
2302-
_Atomic(int16_t) tid;
2303-
// threadpool id
2304-
int8_t threadpoolid;
2305-
// Reentrancy bits
2306-
// Bit 0: 1 if we are currently running inference/codegen
2307-
// Bit 1-2: 0-3 counter of how many times we've reentered inference
2308-
// Bit 3: 1 if we are writing the image and inference is illegal
2309-
uint8_t reentrant_timing;
2310-
// 2 bytes of padding on 32-bit, 6 bytes on 64-bit
2311-
// uint16_t padding2_32;
2312-
// uint48_t padding2_64;
2313-
// saved gc stack top for context switches
2314-
jl_gcframe_t *gcstack;
2315-
size_t world_age;
2316-
// quick lookup for current ptls
2317-
jl_ptls_t ptls; // == jl_all_tls_states[tid]
2318-
#ifdef USE_TRACY
2319-
const char *name;
2320-
#endif
2321-
// saved exception stack
2322-
jl_excstack_t *excstack;
2323-
// current exception handler
2324-
jl_handler_t *eh;
2325-
// saved thread state
2326-
jl_ucontext_t ctx; // pointer into stkbuf, if suspended
2327-
} jl_task_t;
2264+
};
23282265

23292266
#define JL_TASK_STATE_RUNNABLE 0
23302267
#define JL_TASK_STATE_DONE 1

src/julia_gcext.h

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -135,19 +135,13 @@ JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void);
135135
// NOTE: Only valid to call from within a GC context.
136136
JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) JL_NOTSAFEPOINT;
137137

138-
// Return a non-null pointer to the start of the stack area if the task
139-
// has an associated stack buffer. In that case, *size will also contain
140-
// the size of that stack buffer upon return. Also, if task is a thread's
141-
// current task, that thread's id will be stored in *tid; otherwise,
142-
// *tid will be set to -1.
143-
//
144-
// DEPRECATED: use jl_active_task_stack() instead.
145-
JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *tid);
146-
147138
// Query the active and total stack range for the given task, and set
148139
// *active_start and *active_end respectively *total_start and *total_end
149140
// accordingly. The range for the active part is a best-effort approximation
150141
// and may not be tight.
142+
//
143+
// NOTE: Only valid to call from within a GC context. May return incorrect
144+
// values and segfault otherwise.
151145
JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task,
152146
char **active_start, char **active_end,
153147
char **total_start, char **total_end) JL_NOTSAFEPOINT;

src/julia_internal.h

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1720,18 +1720,6 @@ JL_DLLEXPORT unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *fi
17201720
void register_eh_frames(uint8_t *Addr, size_t Size) JL_NOTSAFEPOINT;
17211721
void deregister_eh_frames(uint8_t *Addr, size_t Size) JL_NOTSAFEPOINT;
17221722

1723-
STATIC_INLINE void *jl_get_frame_addr(void) JL_NOTSAFEPOINT
1724-
{
1725-
#ifdef __GNUC__
1726-
return __builtin_frame_address(0);
1727-
#else
1728-
void *dummy = NULL;
1729-
// The mask is to suppress the compiler warning about returning
1730-
// address of local variable
1731-
return (void*)((uintptr_t)&dummy & ~(uintptr_t)15);
1732-
#endif
1733-
}
1734-
17351723
// Log `msg` to the current logger by calling CoreLogging.logmsg_shim() on the
17361724
// julia side. If any of module, group, id, file or line are NULL, these will
17371725
// be passed to the julia side as `nothing`. If `kwargs` is NULL an empty set

src/julia_threads.h

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ typedef struct {
9292
_jl_ucontext_t *ctx;
9393
jl_stack_context_t *copy_ctx;
9494
};
95+
void *activefp;
9596
void *stkbuf; // malloc'd memory (either copybuf or stack)
9697
size_t bufsz; // actual sizeof stkbuf
9798
unsigned int copy_stack:31; // sizeof stack for copybuf
@@ -214,6 +215,68 @@ typedef struct _jl_tls_states_t {
214215
#endif
215216
} jl_tls_states_t;
216217

218+
#define JL_RNG_SIZE 5 // xoshiro 4 + splitmix 1
219+
220+
// all values are callable as Functions
221+
typedef jl_value_t jl_function_t;
222+
typedef struct _jl_timing_block_t jl_timing_block_t;
223+
typedef struct _jl_timing_event_t jl_timing_event_t;
224+
typedef struct _jl_excstack_t jl_excstack_t;
225+
typedef struct _jl_handler_t jl_handler_t;
226+
227+
typedef struct _jl_task_t {
228+
JL_DATA_TYPE
229+
jl_value_t *next; // invasive linked list for scheduler
230+
jl_value_t *queue; // invasive linked list for scheduler
231+
jl_value_t *tls;
232+
jl_value_t *donenotify;
233+
jl_value_t *result;
234+
jl_value_t *scope;
235+
jl_function_t *start;
236+
// 4 byte padding on 32-bit systems
237+
// uint32_t padding0;
238+
uint64_t rngState[JL_RNG_SIZE];
239+
_Atomic(uint8_t) _state;
240+
uint8_t sticky; // record whether this Task can be migrated to a new thread
241+
_Atomic(uint8_t) _isexception; // set if `result` is an exception to throw or that we exited with
242+
// 1 byte padding
243+
// uint8_t padding1;
244+
// multiqueue priority
245+
uint16_t priority;
246+
247+
// hidden state:
248+
// cached floating point environment
249+
// only updated at task switch
250+
fenv_t fenv;
251+
252+
// id of owning thread - does not need to be defined until the task runs
253+
_Atomic(int16_t) tid;
254+
// threadpool id
255+
int8_t threadpoolid;
256+
// Reentrancy bits
257+
// Bit 0: 1 if we are currently running inference/codegen
258+
// Bit 1-2: 0-3 counter of how many times we've reentered inference
259+
// Bit 3: 1 if we are writing the image and inference is illegal
260+
uint8_t reentrant_timing;
261+
// 2 bytes of padding on 32-bit, 6 bytes on 64-bit
262+
// uint16_t padding2_32;
263+
// uint48_t padding2_64;
264+
// current gc stack top
265+
jl_gcframe_t *gcstack;
266+
size_t world_age;
267+
// quick lookup for current ptls
268+
jl_ptls_t ptls; // == jl_all_tls_states[tid]
269+
#ifdef USE_TRACY
270+
const char *name;
271+
#endif
272+
// saved exception stack
273+
jl_excstack_t *excstack;
274+
// current exception handler
275+
jl_handler_t *eh;
276+
// saved thread state
277+
jl_ucontext_t ctx; // pointer into stkbuf, if suspended
278+
} jl_task_t;
279+
217280
JL_DLLEXPORT void *jl_get_ptls_states(void);
218281

219282
// Update codegen version in `ccall.cpp` after changing either `pause` or `wake`
@@ -243,6 +306,18 @@ JL_DLLEXPORT void (jl_cpu_pause)(void);
243306
JL_DLLEXPORT void (jl_cpu_suspend)(void);
244307
JL_DLLEXPORT void (jl_cpu_wake)(void);
245308

309+
STATIC_INLINE void *jl_get_frame_addr(void) JL_NOTSAFEPOINT
310+
{
311+
#ifdef __GNUC__
312+
return __builtin_frame_address(0);
313+
#else
314+
void *dummy = NULL;
315+
// The mask is to suppress the compiler warning about returning
316+
// address of local variable
317+
return (void*)((uintptr_t)&dummy & ~(uintptr_t)15);
318+
#endif
319+
}
320+
246321
#ifdef __clang_gcanalyzer__
247322
// Note that the sigint safepoint can also trigger GC, albeit less likely
248323
void jl_gc_safepoint_(jl_ptls_t tls);
@@ -270,6 +345,9 @@ STATIC_INLINE int8_t jl_gc_state_set(jl_ptls_t ptls, int8_t state,
270345
{
271346
assert(old_state != JL_GC_PARALLEL_COLLECTOR_THREAD);
272347
assert(old_state != JL_GC_CONCURRENT_COLLECTOR_THREAD);
348+
if (__builtin_constant_p(state) ? state : // required if !old_state && state, otherwise optional
349+
__builtin_constant_p(old_state) ? !old_state : 1)
350+
jl_atomic_load_relaxed(&ptls->current_task)->ctx.activefp = (char*)jl_get_frame_addr();
273351
jl_atomic_store_release(&ptls->gc_state, state);
274352
if (state == JL_GC_STATE_UNSAFE || old_state == JL_GC_STATE_UNSAFE)
275353
jl_gc_safepoint_(ptls);

src/llvm-codegen-shared.h

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -256,16 +256,24 @@ static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_s
256256
emit_signal_fence(builder);
257257
}
258258

259-
static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, llvm::Value *old_state, bool final)
259+
static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, llvm::Value *state, llvm::Value *old_state, bool final)
260260
{
261261
using namespace llvm;
262+
Value *ptls = get_current_ptls_from_task(builder, task, tbaa);
262263
Type *T_int8 = state->getType();
263264
unsigned offset = offsetof(jl_tls_states_t, gc_state);
264265
Value *gc_state = builder.CreateConstInBoundsGEP1_32(T_int8, ptls, offset, "gc_state");
265266
if (old_state == nullptr) {
266267
old_state = builder.CreateLoad(T_int8, gc_state);
267268
cast<LoadInst>(old_state)->setOrdering(AtomicOrdering::Monotonic);
268269
}
270+
if (isa<Constant>(state) ? state == builder.getInt8(0) :
271+
isa<Constant>(old_state) ? old_state == builder.getInt8(0) : true) {
272+
unsigned offset = offsetof(jl_task_t, ctx.activefp);
273+
Value *currentfp = builder.CreateConstInBoundsGEP1_32(T_int8, task, offset, "gc_state");
274+
Value *fp = builder.CreateIntrinsic(Intrinsic::frameaddress, {builder.getPtrTy()}, {builder.getInt32(0)});
275+
builder.CreateAlignedStore(fp, currentfp, Align(sizeof(void*)));
276+
}
269277
builder.CreateAlignedStore(state, gc_state, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release);
270278
if (auto *C = dyn_cast<ConstantInt>(old_state))
271279
if (C->isZero())
@@ -280,39 +288,38 @@ static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::T
280288
builder.CreateICmpEQ(state, zero8)),
281289
passBB, exitBB);
282290
builder.SetInsertPoint(passBB);
283-
MDNode *tbaa = get_tbaa_const(builder.getContext());
284-
emit_gc_safepoint(builder, T_size, ptls, tbaa, final);
291+
emit_gc_safepoint(builder, T_size, ptls, get_tbaa_const(builder.getContext()), final);
285292
builder.CreateBr(exitBB);
286293
builder.SetInsertPoint(exitBB);
287294
return old_state;
288295
}
289296

290-
static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, bool final)
297+
static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, bool final)
291298
{
292299
using namespace llvm;
293300
Value *state = builder.getInt8(0);
294-
return emit_gc_state_set(builder, T_size, ptls, state, nullptr, final);
301+
return emit_gc_state_set(builder, T_size, task, tbaa, state, nullptr, final);
295302
}
296303

297-
static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, bool final)
304+
static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, llvm::Value *state, bool final)
298305
{
299306
using namespace llvm;
300307
Value *old_state = builder.getInt8(JL_GC_STATE_UNSAFE);
301-
return emit_gc_state_set(builder, T_size, ptls, state, old_state, final);
308+
return emit_gc_state_set(builder, T_size, task, tbaa, state, old_state, final);
302309
}
303310

304-
static inline llvm::Value *emit_gc_safe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, bool final)
311+
static inline llvm::Value *emit_gc_safe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, bool final)
305312
{
306313
using namespace llvm;
307314
Value *state = builder.getInt8(JL_GC_STATE_SAFE);
308-
return emit_gc_state_set(builder, T_size, ptls, state, nullptr, final);
315+
return emit_gc_state_set(builder, T_size, task, tbaa, state, nullptr, final);
309316
}
310317

311-
static inline llvm::Value *emit_gc_safe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, bool final)
318+
static inline llvm::Value *emit_gc_safe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, llvm::Value *state, bool final)
312319
{
313320
using namespace llvm;
314321
Value *old_state = builder.getInt8(JL_GC_STATE_SAFE);
315-
return emit_gc_state_set(builder, T_size, ptls, state, old_state, final);
322+
return emit_gc_state_set(builder, T_size, task, tbaa, state, old_state, final);
316323
}
317324

318325
// Compatibility shims for LLVM attribute APIs that were renamed in LLVM 14.

src/llvm-ptls.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
193193
builder.SetInsertPoint(fastTerm->getParent());
194194
fastTerm->removeFromParent();
195195
MDNode *tbaa = tbaa_gcframe;
196-
Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, pgcstack), tbaa), true);
196+
Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_task_from_pgcstack(builder, pgcstack), tbaa, true);
197197
builder.Insert(fastTerm);
198198
phi->addIncoming(pgcstack, fastTerm->getParent());
199199
// emit pre-return cleanup
@@ -205,7 +205,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
205205
for (auto &BB : *pgcstack->getParent()->getParent()) {
206206
if (isa<ReturnInst>(BB.getTerminator())) {
207207
builder.SetInsertPoint(BB.getTerminator());
208-
emit_gc_unsafe_leave(builder, T_size, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, phi), tbaa), last_gc_state, true);
208+
emit_gc_unsafe_leave(builder, T_size, get_current_task_from_pgcstack(builder, phi), tbaa, last_gc_state, true);
209209
}
210210
}
211211
}

src/safepoint.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ void jl_set_gc_and_wait(void) // n.b. not used on _OS_DARWIN_
219219
// reading own gc state doesn't need atomic ops since no one else
220220
// should store to it.
221221
int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
222+
ct->ctx.activefp = (char*)jl_get_frame_addr();
222223
jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING);
223224
uv_mutex_lock(&safepoint_lock);
224225
uv_cond_broadcast(&safepoint_cond_begin);
@@ -258,6 +259,7 @@ void jl_safepoint_wait_thread_resume(void)
258259
// might have already observed our gc_state.
259260
// if (!jl_atomic_load_relaxed(&ct->ptls->suspend_count)) return;
260261
int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
262+
ct->ctx.activefp = (char*)jl_get_frame_addr();
261263
jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING);
262264
uv_mutex_lock(&ct->ptls->sleep_lock);
263265
if (jl_atomic_load_relaxed(&ct->ptls->suspend_count)) {

0 commit comments

Comments
 (0)