-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP thread cpu time #175
WIP thread cpu time #175
Changes from all commits
d7391e2
4cbb97f
644fd2b
be78b81
14099bb
138385a
e0558bb
fb6597b
3484567
9b457b9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -124,6 +124,14 @@ typedef struct { | |
|
||
struct _jl_bt_element_t; | ||
|
||
typedef struct { | ||
uint64_t start_time; | ||
uint64_t sleep_time; | ||
uint64_t scheduler_time; | ||
/* uint64_t lock_spin_time; */ | ||
uint64_t gc_time; | ||
} jl_timing_tls_states_t; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TODO: should this be like GC_Num and have a corresponding struct on the Julia side, so on that side we work with the struct rather than individual numbers? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. downside: any user-facing struct is impossible to expand, so probably just want to be exposing functions that return numbers... could still be passing data from C -> Julia side as a struct but idk if that gains us much tbh |
||
|
||
// This includes all the thread local states we care about for a thread. | ||
// Changes to TLS field types must be reflected in codegen. | ||
#define JL_MAX_BT_SIZE 80000 | ||
|
@@ -155,6 +163,7 @@ typedef struct _jl_tls_states_t { | |
// Counter to disable finalizer **on the current thread** | ||
int finalizers_inhibited; | ||
jl_gc_tls_states_t gc_tls; // this is very large, and the offset of the first member is baked into codegen | ||
jl_timing_tls_states_t timing_tls; | ||
volatile sig_atomic_t defer_signal; | ||
_Atomic(struct _jl_task_t*) current_task; | ||
struct _jl_task_t *next_task; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -389,12 +389,14 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q, | |
{ | ||
jl_task_t *ct = jl_current_task; | ||
uint64_t start_cycles = 0; | ||
|
||
uint64_t t0 = jl_hrtime(); | ||
while (1) { | ||
jl_ptls_t ptls = ct->ptls; | ||
jl_task_t *task = get_next_task(trypoptask, q); | ||
if (task) | ||
if (task) { | ||
ptls->timing_tls.scheduler_time += jl_hrtime() - t0; | ||
return task; | ||
|
||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is the fast-path for task-switches (i think?), but i think we've concluded this shouldn't add too much overhead (given |
||
// quick, race-y check to see if there seems to be any stuff in there | ||
jl_cpu_pause(); | ||
if (!check_empty(checkempty)) { | ||
|
@@ -403,7 +405,6 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q, | |
} | ||
|
||
jl_cpu_pause(); | ||
jl_ptls_t ptls = ct->ptls; | ||
if (sleep_check_after_threshold(&start_cycles) || (ptls->tid == 0 && (!jl_atomic_load_relaxed(&_threadedregion) || wait_empty))) { | ||
// acquire sleep-check lock | ||
jl_atomic_store_relaxed(&ptls->sleep_check_state, sleeping); | ||
|
@@ -425,6 +426,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q, | |
JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls); | ||
} | ||
if (task) | ||
ptls->timing_tls.scheduler_time += jl_hrtime() - t0; | ||
return task; | ||
continue; | ||
} | ||
|
@@ -433,6 +435,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q, | |
jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us | ||
JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls); | ||
} | ||
ptls->timing_tls.scheduler_time += jl_hrtime() - t0; | ||
return task; | ||
} | ||
|
||
|
@@ -507,6 +510,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q, | |
|
||
// the other threads will just wait for an individual wake signal to resume | ||
JULIA_DEBUG_SLEEPWAKE( ptls->sleep_enter = cycleclock() ); | ||
uint64_t tsleep0 = jl_hrtime(); | ||
int8_t gc_state = jl_gc_safe_enter(ptls); | ||
uv_mutex_lock(&ptls->sleep_lock); | ||
while (may_sleep(ptls)) { | ||
|
@@ -523,6 +527,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q, | |
assert(jl_atomic_load_relaxed(&ptls->sleep_check_state) == not_sleeping); | ||
uv_mutex_unlock(&ptls->sleep_lock); | ||
JULIA_DEBUG_SLEEPWAKE( ptls->sleep_leave = cycleclock() ); | ||
ptls->timing_tls.sleep_time += jl_hrtime() - tsleep0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. currently |
||
jl_gc_safe_leave(ptls, gc_state); // contains jl_gc_safepoint | ||
start_cycles = 0; | ||
if (task) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -161,7 +161,10 @@ void jl_safepoint_wait_gc(void) | |
jl_task_t *ct = jl_current_task; (void)ct; | ||
JL_TIMING_SUSPEND_TASK(GC_SAFEPOINT, ct); | ||
// The thread should have set this is already | ||
assert(jl_atomic_load_relaxed(&ct->ptls->gc_state) != 0); | ||
jl_ptls_t ptls = ct->ptls; | ||
assert(jl_atomic_load_relaxed(&ptls->gc_state) != 0); | ||
// Time how long this thread is stopped while GC is running. | ||
uint64_t t0 = jl_hrtime(); | ||
// Use normal volatile load in the loop for speed until GC finishes. | ||
// Then use an acquire load to make sure the GC result is visible on this thread. | ||
while (jl_atomic_load_relaxed(&jl_gc_running) || jl_atomic_load_acquire(&jl_gc_running)) { | ||
|
@@ -173,6 +176,7 @@ void jl_safepoint_wait_gc(void) | |
uv_cond_wait(&safepoint_cond, &safepoint_lock); | ||
uv_mutex_unlock(&safepoint_lock); | ||
} | ||
ptls->timing_tls.gc_time = jl_hrtime() - t0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. GC time for the other threads |
||
} | ||
|
||
void jl_safepoint_enable_sigint(void) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,6 +49,24 @@ JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled = 0; | |
JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time = 0; | ||
JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time = 0; | ||
|
||
static uint64_t jl_thread_start_time; | ||
void jl_set_thread_start_time(void) | ||
{ | ||
jl_thread_start_time = jl_hrtime(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is global shared by all threads, which technically isn't correct since threads will start at very slightly different times, but i think this is fine at least for a first pass? |
||
} | ||
|
||
// TODO: not just current thread | ||
JL_DLLEXPORT uint64_t jl_thread_up_time(void) | ||
{ | ||
return jl_hrtime() - jl_thread_start_time; | ||
} | ||
JL_DLLEXPORT uint64_t jl_thread_user_time(uint8_t tid) | ||
{ | ||
jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[tid]; | ||
jl_timing_tls_states_t *timing = &ptls->timing_tls; | ||
return jl_thread_up_time() - timing->gc_time - timing->scheduler_time; | ||
} | ||
|
||
JL_DLLEXPORT void *jl_get_ptls_states(void) | ||
{ | ||
// mostly deprecated: use current_task instead | ||
|
@@ -765,6 +783,7 @@ void jl_start_threads(void) | |
} | ||
uv_thread_detach(&uvtid); | ||
} | ||
jl_set_thread_start_time(); | ||
} | ||
|
||
_Atomic(unsigned) _threadedregion; // HACK: keep track of whether to prioritize IO or threading | ||
|
@@ -794,6 +813,13 @@ JL_DLLEXPORT void jl_exit_threaded_region(void) | |
|
||
// Profiling stubs | ||
|
||
/* JL_DLLEXPORT void jl_record_lock_spin_time(uint64_t time) JL_NOTSAFEPOINT */ | ||
/* { */ | ||
/* jl_task_t *ct = jl_current_task; */ | ||
/* jl_ptls_t ptls = ct->ptls; */ | ||
/* ptls->timing_tls.lock_spin_time += time; */ | ||
/* } */ | ||
|
||
void _jl_mutex_init(jl_mutex_t *lock, const char *name) JL_NOTSAFEPOINT | ||
{ | ||
jl_atomic_store_relaxed(&lock->owner, (jl_task_t*)NULL); | ||
|
@@ -816,10 +842,12 @@ void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint) | |
return; | ||
} | ||
JL_TIMING(LOCK_SPIN, LOCK_SPIN); | ||
/* uint64_t t0 = jl_hrtime(); */ | ||
while (1) { | ||
if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) { | ||
lock->count = 1; | ||
jl_profile_lock_acquired(lock); | ||
/* jl_record_lock_spin_time(jl_hrtime() - t0); */ | ||
return; | ||
} | ||
if (safepoint) { | ||
|
@@ -919,7 +947,6 @@ void _jl_mutex_unlock(jl_task_t *self, jl_mutex_t *lock) | |
} | ||
} | ||
|
||
|
||
// Make gc alignment available for threading | ||
// see threads.jl alignment | ||
JL_DLLEXPORT int jl_alignment(size_t sz) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GC time for the thread coordinating the GC