Skip to content

Commit

Permalink
Try to handle interrupting runtests better (#25348)
Browse files Browse the repository at this point in the history
I had a test that was hanging and I did not know which one. Hitting
^C would kill everything without showing which tests failed and which
tests didn't complete. My next attempt was to enable InterruptException,
but that doesn't seem to play well with the Distributed stuff (a topic
for another time). This works by setting STDIN to raw and monitoring it
for ^C instead.
  • Loading branch information
Keno authored Jan 7, 2018
1 parent bbcd773 commit 743539b
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 52 deletions.
19 changes: 13 additions & 6 deletions stdlib/Test/src/Test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ mutable struct Error <: Result
source::LineNumberNode
end
function Base.show(io::IO, t::Error)
if t.test_type == :test_interrupted
print_with_color(Base.error_color(), io, "Interrupted")
return
end
print_with_color(Base.error_color(), io, "Error During Test"; bold = true)
print(io, " at ")
print_with_color(:default, io, t.source.file, ":", t.source.line, "\n"; bold = true)
Expand Down Expand Up @@ -666,13 +670,16 @@ record(ts::DefaultTestSet, t::Pass) = (ts.n_passed += 1; t)
function record(ts::DefaultTestSet, t::Union{Fail, Error})
if myid() == 1
print_with_color(:white, ts.description, ": ")
print(t)
# don't print the backtrace for Errors because it gets printed in the show
# method
if !isa(t, Error)
Base.show_backtrace(STDOUT, scrub_backtrace(backtrace()))
# don't print for interrupted tests
if !(t isa Error) || t.test_type != :test_interrupted
print(t)
# don't print the backtrace for Errors because it gets printed in the show
# method
if !isa(t, Error)
Base.show_backtrace(STDOUT, scrub_backtrace(backtrace()))
end
println()
end
println()
end
push!(ts.results, t)
t, isa(t, Error) || backtrace()
Expand Down
141 changes: 95 additions & 46 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -92,62 +92,101 @@ cd(dirname(@__FILE__)) do
end
end

@sync begin
for p in workers()
@async begin
while length(tests) > 0
test = popfirst!(tests)
local resp
wrkr = p
try
resp = remotecall_fetch(runtests, wrkr, test, test_path(test); seed=seed)
catch e
resp = [e]
all_tests = [tests; node1_tests]

local stdin_monitor
all_tasks = Task[]
try
if isa(STDIN, Base.TTY)
t = current_task()
# Monitor STDIN and kill this task on ^C
stdin_monitor = @async begin
term = Base.Terminals.TTYTerminal("xterm", STDIN, STDOUT, STDERR)
try
Base.Terminals.raw!(term, true)
while true
if read(term, Char) == '\x3'
Base.throwto(t, InterruptException())
break
end
end
push!(results, (test, resp))
if resp[1] isa Exception
if exit_on_error
skipped = length(tests)
empty!(tests)
catch e
isa(e, InterruptException) || rethrow(e)
finally
Base.Terminals.raw!(term, false)
end
end
end
@sync begin
for p in workers()
@async begin
push!(all_tasks, current_task())
while length(tests) > 0
test = popfirst!(tests)
local resp
wrkr = p
try
resp = remotecall_fetch(runtests, wrkr, test, test_path(test); seed=seed)
catch e
isa(e, InterruptException) && return
resp = [e]
end
elseif resp[end] > max_worker_rss
if n > 1
rmprocs(wrkr, waitfor=30)
p = addprocs_with_testenv(1)[1]
remotecall_fetch(include, p, "testdefs.jl")
else # single process testing
error("Halting tests. Memory limit reached : $resp > $max_worker_rss")
push!(results, (test, resp))
if resp[1] isa Exception
if exit_on_error
skipped = length(tests)
empty!(tests)
end
elseif resp[end] > max_worker_rss
if n > 1
rmprocs(wrkr, waitfor=30)
p = addprocs_with_testenv(1)[1]
remotecall_fetch(include, p, "testdefs.jl")
else # single process testing
error("Halting tests. Memory limit reached : $resp > $max_worker_rss")
end
end
end

!isa(resp[1], Exception) && print_testworker_stats(test, wrkr, resp)
end
if p != 1
# Free up memory =)
rmprocs(p, waitfor=30)
!isa(resp[1], Exception) && print_testworker_stats(test, wrkr, resp)
end
if p != 1
# Free up memory =)
rmprocs(p, waitfor=30)
end
end
end
end
end

n > 1 && length(node1_tests) > 1 && print("\nExecuting tests that run on node 1 only:\n")
for t in node1_tests
# As above, try to run each test
# which must run on node 1. If
# the test fails, catch the error,
# and either way, append the results
# to the overall aggregator
isolate = true
t == "SharedArrays" && (isolate = false)
local resp
try
resp = eval(Expr(:call, () -> runtests(t, test_path(t), isolate, seed=seed))) # runtests is defined by the include above
print_testworker_stats(t, 1, resp)
catch e
resp = [e]
n > 1 && length(node1_tests) > 1 && print("\nExecuting tests that run on node 1 only:\n")
for t in node1_tests
# As above, try to run each test
# which must run on node 1. If
# the test fails, catch the error,
# and either way, append the results
# to the overall aggregator
isolate = true
t == "SharedArrays" && (isolate = false)
local resp
try
resp = eval(Expr(:call, () -> runtests(t, test_path(t), isolate, seed=seed))) # runtests is defined by the include above
print_testworker_stats(t, 1, resp)
catch e
resp = [e]
end
push!(results, (t, resp))
end
catch e
isa(e, InterruptException) || rethrow(e)
# If the test suite was merely interrupted, still print the
# summary, which can be useful to diagnose what's going on
foreach(task->try; schedule(task, InterruptException(); error=true); end, all_tasks)
foreach(wait, all_tasks)
finally
if isa(STDIN, Base.TTY)
schedule(stdin_monitor, InterruptException(); error=true)
end
push!(results, (t, resp))
end

#=
` Construct a testset on the master node which will hold results from all the
test files run on workers and on node1. The loop goes through the results,
Expand All @@ -172,7 +211,9 @@ cd(dirname(@__FILE__)) do
=#
o_ts = Test.DefaultTestSet("Overall")
Test.push_testset(o_ts)
completed_tests = Set{String}()
for res in results
push!(completed_tests, res[1])
if isa(res[2][1], Test.DefaultTestSet)
Test.push_testset(res[2][1])
Test.record(o_ts, res[2][1])
Expand Down Expand Up @@ -218,6 +259,14 @@ cd(dirname(@__FILE__)) do
error(string("Unknown result type : ", typeof(res)))
end
end
for test in all_tests
(test in completed_tests) && continue
fake = Test.DefaultTestSet(test)
Test.record(fake, Test.Error(:test_interrupted, test, InterruptException(), [], LineNumberNode(1)))
Test.push_testset(fake)
Test.record(o_ts, fake)
Test.pop_testset()
end
println()
Test.print_test_results(o_ts,1)
if !o_ts.anynonpass
Expand Down

0 comments on commit 743539b

Please sign in to comment.