-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add per-Task cpu time metric #56320
base: master
Are you sure you want to change the base?
Add per-Task cpu time metric #56320
Changes from 21 commits
52b5080
1f769ce
46bc184
f7dfd0f
78849e5
70482c1
7f59ae4
f30d331
007777b
d8e829f
f23145c
4ff9e83
f545146
00fbb08
c037506
4ef3023
c51bf70
5a7fbfe
6af1a2a
2077fac
36a5f7b
242cc7f
4a678a3
adeb30f
0f60a27
2ad9bf6
1d139f0
635b3c7
337eda2
3ac24cb
631b7bc
89759e6
ffc7679
bcd95da
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -810,6 +810,12 @@ end | |
# runtime system hook called when a task finishes | ||
function task_done_hook(t::Task) | ||
# `finish_task` sets `sigatomic` before entering this function | ||
if t.is_timing_enabled | ||
# user_time -task-finished-> wait_time | ||
now = time_ns() | ||
record_cpu_time!(t, now) | ||
record_wall_time!(t, now) | ||
end | ||
err = istaskfailed(t) | ||
result = task_result(t) | ||
handled = false | ||
|
@@ -972,7 +978,13 @@ function enq_work(t::Task) | |
return t | ||
end | ||
|
||
schedule(t::Task) = enq_work(t) | ||
function schedule(t::Task) | ||
# user_time -task-(re)scheduled-> wait_time | ||
if t.is_timing_enabled && t.first_enqueued_at == 0 | ||
t.first_enqueued_at = time_ns() | ||
end | ||
nickrobinson251 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
enq_work(t) | ||
end | ||
|
||
""" | ||
schedule(t::Task, [val]; error=false) | ||
|
@@ -1018,6 +1030,10 @@ true | |
function schedule(t::Task, @nospecialize(arg); error=false) | ||
# schedule a task to be (re)started with the given value or exception | ||
t._state === task_state_runnable || Base.error("schedule: Task not runnable") | ||
# user_time -task-(re)scheduled-> wait_time | ||
if t.is_timing_enabled && t.first_enqueued_at == 0 | ||
t.first_enqueued_at = time_ns() | ||
end | ||
nickrobinson251 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if error | ||
q = t.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, t) | ||
setfield!(t, :result, arg) | ||
|
@@ -1039,6 +1055,7 @@ tasks. | |
""" | ||
function yield() | ||
ct = current_task() | ||
record_cpu_time!(ct) | ||
nickrobinson251 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
enq_work(ct) | ||
try | ||
wait() | ||
|
@@ -1060,6 +1077,7 @@ Throws a `ConcurrencyViolationError` if `t` is the currently running task. | |
""" | ||
function yield(t::Task, @nospecialize(x=nothing)) | ||
current = current_task() | ||
record_cpu_time!(current) | ||
nickrobinson251 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
t === current && throw(ConcurrencyViolationError("Cannot yield to currently running task!")) | ||
(t._state === task_state_runnable && t.queue === nothing) || throw(ConcurrencyViolationError("yield: Task not runnable")) | ||
t.result = x | ||
|
@@ -1084,6 +1102,9 @@ function yieldto(t::Task, @nospecialize(x=nothing)) | |
elseif t._state === task_state_failed | ||
throw(t.result) | ||
end | ||
if t.is_timing_enabled && t.first_enqueued_at == 0 | ||
t.first_enqueued_at = time_ns() | ||
end | ||
nickrobinson251 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
t.result = x | ||
set_next_task(t) | ||
return try_yieldto(identity) | ||
|
@@ -1097,6 +1118,12 @@ function try_yieldto(undo) | |
rethrow() | ||
end | ||
ct = current_task() | ||
# scheduler -task-started-> user | ||
# scheduler -task-resumed-> user | ||
if ct.is_timing_enabled | ||
# @assert ct.last_dequeued_at == 0 | ||
nickrobinson251 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
ct.last_dequeued_at = time_ns() | ||
end | ||
if ct._isexception | ||
exc = ct.result | ||
ct.result = nothing | ||
|
@@ -1110,6 +1137,9 @@ end | |
|
||
# yield to a task, throwing an exception in it | ||
function throwto(t::Task, @nospecialize exc) | ||
if t.is_timing_enabled && t.first_enqueued_at == 0 | ||
t.first_enqueued_at = time_ns() | ||
end | ||
nickrobinson251 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
t.result = exc | ||
t._isexception = true | ||
set_next_task(t) | ||
|
@@ -1176,3 +1206,20 @@ if Sys.iswindows() | |
else | ||
pause() = ccall(:pause, Cvoid, ()) | ||
end | ||
|
||
function record_cpu_time!(t::Task, stopped_at::UInt64=time_ns()) | ||
if t.is_timing_enabled | ||
@assert t.last_dequeued_at != 0 | ||
t.cpu_time_ns += stopped_at - t.last_dequeued_at | ||
t.last_dequeued_at = 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you ever check/rely anywhere that this is reset to 0? It seems unnecessary to me (and we want to keep task-switches as cheap as we can, even though this will probably be very cheap since we'll already have the task resident in-cache, but still writes to main memory are slowish). And maybe someone would even want to look at the last-ran-time even for a sleeping task if they were trying to debug things? Not sure though. |
||
end | ||
return t | ||
end | ||
|
||
function record_wall_time!(t::Task, done_at::UInt64=time_ns()) | ||
if t.is_timing_enabled | ||
@assert t.first_enqueued_at != 0 | ||
t.wall_time_ns = done_at - t.first_enqueued_at | ||
end | ||
return t | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -86,6 +86,64 @@ end | |
# total time spend in garbage collection, in nanoseconds | ||
gc_time_ns() = ccall(:jl_gc_total_hrtime, UInt64, ()) | ||
|
||
""" | ||
task_cpu_time_ns(t::Task) -> UInt64 | ||
Return the total nanoseconds that the task `t` has spent running. | ||
This metric is only updated when the task yields or completes. | ||
See also [`task_wall_time_ns`](@ref). | ||
Will be `UInt64(0)` if task timings are not enabled. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe this method should throw if timings are not enabled for this task instead of returning 0, which is a valid return value. Thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. or There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMO you should not use unsigned integers for representing positive values - you should use It's better in julia than it is in C, since julia's defaults are less dangerous, but using unsigned ints for positive numeric values is a consistent source of bugs in C programs, and i think should be still frowned upon in julia. (https://jacobegner.blogspot.com/2019/11/unsigned-integers-are-dangerous.html) IMO Unsigned ints should be used only for things like hashes, identifiers, addresses, and possibly for indexes but even for indexes i'd use signed ints. But anyway, another benefit is that this also gives |
||
See [`Base.task_timing`](@ref). | ||
!!! note "This metric is from the Julia scheduler" | ||
A task may be running on an OS thread that is descheduled by the OS | ||
scheduler, this time still counts towards the metric. | ||
!!! compat "Julia 1.12" | ||
This method was added in Julia 1.12. | ||
""" | ||
function task_cpu_time_ns(t::Task) | ||
return t.cpu_time_ns | ||
end | ||
|
||
""" | ||
task_wall_time_ns(t::Task) -> UInt64 | ||
Return the total nanoseconds that the task `t` was runnable. | ||
This is the time since the task entered the run queue until the time at which it completed, | ||
or until the current time if the task has not yet completed. | ||
See also [`task_cpu_time_ns`](@ref). | ||
Will be `UInt64(0)` if task timings are not enabled. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto for consistency, although in this case 0 is clearly not a valid return value. |
||
See [`Base.task_timing`](@ref). | ||
!!! compat "Julia 1.12" | ||
This method was added in Julia 1.12. | ||
""" | ||
function task_wall_time_ns(t::Task) | ||
# TODO: report up til current time if not done? too racy? | ||
# return istaskdone(t) ? t.wall_time_ns : time_ns() - t.first_enqueued_at | ||
return t.wall_time_ns | ||
end | ||
|
||
""" | ||
Base.task_timing(::Bool) | ||
Enable or disable the collection of per-task timing information. | ||
Task created when Base.task_timing(true) is in effect will have [`task_cpu_time_ns`](@ref) | ||
and [`task_wall_time_ns`](@ref) timing information available. | ||
""" | ||
function task_timing(b::Bool) | ||
if b | ||
ccall(:jl_task_timing_enable, Cvoid, ()) | ||
else | ||
# TODO: prevent decrementing the counter below zero | ||
ccall(:jl_task_timing_disable, Cvoid, ()) | ||
end | ||
return nothing | ||
end | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since you've put effort into making this flag reentrant, maybe you want to provide a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no, for this PR, i don't think so, because we don't have a use for it right now, and because (as of this PR) the only way to write such a function would be enabling the task-timings process-wide, whereas ideally i think such a function would only enable task-timings for task created by |
||
|
||
""" | ||
Base.gc_live_bytes() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -809,6 +809,33 @@ JL_DLLEXPORT uint64_t jl_cumulative_recompile_time_ns(void) | |
return jl_atomic_load_relaxed(&jl_cumulative_recompile_time); | ||
} | ||
|
||
/** | ||
* @brief Enable per-task timing. | ||
*/ | ||
JL_DLLEXPORT void jl_task_timing_enable(void) | ||
{ | ||
// Increment the flag to allow reentrant callers. | ||
jl_atomic_fetch_add(&jl_task_timing_enabled, 1); | ||
} | ||
|
||
/** | ||
* @brief Disable per-task timing. | ||
*/ | ||
JL_DLLEXPORT void jl_task_timing_disable(void) | ||
{ | ||
jl_atomic_fetch_add(&jl_task_timing_enabled, -1); | ||
nickrobinson251 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
// TODO: remove this | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree on the TODO comment, but do please remove all TODOs. |
||
JL_DLLEXPORT void jl_is_task_timing_enabled(void) | ||
{ | ||
jl_atomic_load_relaxed(&jl_task_timing_enabled); | ||
} | ||
JL_DLLEXPORT void jl_is_compile_timing_enabled(void) | ||
{ | ||
jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); | ||
} | ||
|
||
/** | ||
* @brief Retrieve floating-point environment constants. | ||
* | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3744,7 +3744,7 @@ void jl_init_types(void) JL_GC_DISABLED | |
NULL, | ||
jl_any_type, | ||
jl_emptysvec, | ||
jl_perm_symsvec(16, | ||
jl_perm_symsvec(21, | ||
"next", | ||
"queue", | ||
"storage", | ||
|
@@ -3760,8 +3760,13 @@ void jl_init_types(void) JL_GC_DISABLED | |
"_state", | ||
"sticky", | ||
"_isexception", | ||
"priority"), | ||
jl_svec(16, | ||
"priority", | ||
"is_timing_enabled", | ||
"first_enqueued_at", | ||
"last_dequeued_at", | ||
"cpu_time_ns", | ||
"wall_time_ns"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should these go in a separate struct called There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It could be a separate struct inlined here, but I don't see much value to that. More valuable would probably be marking each of these as atomic though (in jltypes.c) so that observing them isn't UB. Most reads can probably be monotonic, but some writes are probably release and some reads might be acquires There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay on the separation concern. And yeah, I guess it is necessary for these to be atomic. Will discuss with @nickrobinson251. |
||
jl_svec(21, | ||
jl_any_type, | ||
jl_any_type, | ||
jl_any_type, | ||
|
@@ -3777,7 +3782,12 @@ void jl_init_types(void) JL_GC_DISABLED | |
jl_uint8_type, | ||
jl_bool_type, | ||
jl_bool_type, | ||
jl_uint16_type), | ||
jl_uint16_type, | ||
jl_bool_type, | ||
jl_uint64_type, | ||
jl_uint64_type, | ||
jl_uint64_type, | ||
jl_uint64_type), | ||
jl_emptysvec, | ||
0, 1, 6); | ||
XX(task); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2289,6 +2289,16 @@ typedef struct _jl_task_t { | |
// uint8_t padding1; | ||
// multiqueue priority | ||
uint16_t priority; | ||
// flag indicating whether or not this to measure the wall and cpu time of this task | ||
uint8_t is_timing_enabled; | ||
nickrobinson251 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// timestamp this task first entered the run queue (TODO: int32 of ms instead?) | ||
uint64_t first_enqueued_at; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you think it's valuable to also have created_at? It might be interesting in some cases to see the difference between the created-at and first-scheduled-at timestamps? Maybe someone would be surprised to see that gap being large? Not sure though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, maybe, but i didn't particularly want to add things i wasn't adding a public API for... what do you think? worth adding? if so, worth adding a public API? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 that it should either have a public API or not exist at all. 🤔 My instinct was to say "just add it," but i know that we want to keep the struct smallish if possible for data-locality so I think you're right to leave it off unless we have a use for it. |
||
// timestamp this task was most recently scheduled to run | ||
uint64_t last_dequeued_at; | ||
nickrobinson251 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// time this task has spent running; updated when it yields | ||
uint64_t cpu_time_ns; | ||
nickrobinson251 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// time between first entering the run queue and being done/failed. | ||
uint64_t wall_time_ns; | ||
|
||
// hidden state: | ||
// cached floating point environment | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At first I was going to suggest that it might actually be nice to have an ascii-art state diagram for the state transitions you've added here...
... But I guess for now there'd only be two states: user_time and wait_time, with all the like 15 different arrows you've added.
... so maybe that's actually not super useful right now and it makes more sense to just grep the code base for
->
.Which actually, maybe you can just make them more searchable, by adding a keyword? Like:
at the front of each arrow or something?