Test: try really harder to show tests correctly (#37809)

By serializing both tests and backtraces (as strings), and trying to ensure other parts are not going to mess them up along the way. Hopefully this should improve error message reporting reliability in our Base runtests.
JuliaLang · Oct 6, 2020 · 7d3dac4 · 7d3dac4 · nanosoldier · Oct 7, 2020
1 parent c402113
commit 7d3dac4
Show file tree

Hide file tree

Showing 6 changed files with 114 additions and 71 deletions.
diff --git a/stdlib/Test/Project.toml b/stdlib/Test/Project.toml
@@ -2,7 +2,7 @@ name = "Test"
 uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [deps]
-Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl
@@ -23,12 +23,11 @@ export detect_ambiguities, detect_unbound_args
 export GenericString, GenericSet, GenericDict, GenericArray, GenericOrder
 export TestSetException
 
-import Distributed: myid
-
 using Random
 using Random: AbstractRNG, default_rng
 using InteractiveUtils: gen_call_with_extracted_types
 using Base: typesplit
+using Serialization: Serialization
 
 const DISPLAY_FAILED = (
     :isequal,
@@ -85,15 +84,19 @@ struct Pass <: Result
     orig_expr
     data
     value
+    function Pass(test_type::Symbol, orig_expr, data, thrown)
+        return new(test_type, orig_expr, data, thrown isa String ? "String" : thrown)
+    end
 end
+
 function Base.show(io::IO, t::Pass)
     printstyled(io, "Test Passed"; bold = true, color=:green)
     if !(t.orig_expr === nothing)
         print(io, "\n  Expression: ", t.orig_expr)
     end
     if t.test_type === :test_throws
         # The correct type of exception was thrown
-        print(io, "\n      Thrown: ", typeof(t.value))
+        print(io, "\n      Thrown: ", t.value isa String ? t.value : typeof(t.value))
     elseif t.test_type === :test && t.data !== nothing
         # The test was an expression, so display the term-by-term
         # evaluated version as well
@@ -107,13 +110,21 @@ end
 The test condition was false, i.e. the expression evaluated to false or
 the correct exception was not thrown.
 """
-mutable struct Fail <: Result
+struct Fail <: Result
     test_type::Symbol
-    orig_expr
-    data
-    value
+    orig_expr::String
+    data::Union{Nothing, String}
+    value::String
     source::LineNumberNode
+    function Fail(test_type::Symbol, orig_expr, data, value, source::LineNumberNode)
+        return new(test_type,
+            string(orig_expr),
+            data === nothing ? nothing : string(data),
+            string(isa(data, Type) ? typeof(value) : value),
+            source)
+    end
 end
+
 function Base.show(io::IO, t::Fail)
     printstyled(io, "Test Failed"; bold=true, color=Base.error_color())
     print(io, " at ")
@@ -122,7 +133,7 @@ function Base.show(io::IO, t::Fail)
     if t.test_type === :test_throws_wrong
         # An exception was thrown, but it was of the wrong type
         print(io, "\n    Expected: ", t.data)
-        print(io, "\n      Thrown: ", isa(t.data, Type) ? typeof(t.value) : t.value)
+        print(io, "\n      Thrown: ", t.value)
     elseif t.test_type === :test_throws_nothing
         # An exception was expected, but no exception was thrown
         print(io, "\n    Expected: ", t.data)
@@ -142,10 +153,10 @@ it evaluated to something other than a [`Bool`](@ref).
 In the case of `@test_broken` it is used to indicate that an
 unexpected `Pass` `Result` occurred.
 """
-mutable struct Error <: Result
+struct Error <: Result
     test_type::Symbol
-    orig_expr
-    value
+    orig_expr::String
+    value::String
     backtrace::String
     source::LineNumberNode
 
@@ -158,13 +169,14 @@ mutable struct Error <: Result
         else
             bt_str = ""
         end
-        new(test_type,
-            orig_expr,
+        return new(test_type,
+            string(orig_expr),
             sprint(show, value, context = :limit => true),
             bt_str,
             source)
     end
 end
+
 function Base.show(io::IO, t::Error)
     if t.test_type === :test_interrupted
         printstyled(io, "Interrupted", color=Base.error_color())
@@ -201,10 +213,11 @@ end
 The test condition is the expected (failed) result of a broken test,
 or was explicitly skipped with `@test_skip`.
 """
-mutable struct Broken <: Result
+struct Broken <: Result
     test_type::Symbol
     orig_expr
 end
+
 function Base.show(io::IO, t::Broken)
     printstyled(io, "Test Broken\n"; bold=true, color=Base.warn_color())
     if t.test_type === :skipped && !(t.orig_expr === nothing)
@@ -214,6 +227,25 @@ function Base.show(io::IO, t::Broken)
     end
 end
 
+# Types that appear in TestSetException.errors_and_fails we convert eagerly into strings
+# other types we convert lazily
+function Serialization.serialize(s::Serialization.AbstractSerializer, t::Pass)
+    Serialization.serialize_type(s, typeof(t))
+    Serialization.serialize(s, t.test_type)
+    Serialization.serialize(s, t.orig_expr === nothing ? nothing : string(t.orig_expr))
+    Serialization.serialize(s, t.data === nothing ? nothing : string(t.data))
+    Serialization.serialize(s, string(t.value))
+    nothing
+end
+
+function Serialization.serialize(s::Serialization.AbstractSerializer, t::Broken)
+    Serialization.serialize_type(s, typeof(t))
+    Serialization.serialize(s, t.test_type)
+    Serialization.serialize(s, t.orig_expr === nothing ? nothing : string(t.orig_expr))
+    nothing
+end
+
+
 #-----------------------------------------------------------------------
 
 abstract type ExecutionResult end
@@ -730,8 +762,8 @@ end
 
 # Records nothing, and throws an error immediately whenever a Fail or
 # Error occurs. Takes no action in the event of a Pass or Broken result
-record(ts::FallbackTestSet, t::Union{Pass,Broken}) = t
-function record(ts::FallbackTestSet, t::Union{Fail,Error})
+record(ts::FallbackTestSet, t::Union{Pass, Broken}) = t
+function record(ts::FallbackTestSet, t::Union{Fail, Error})
     println(t)
     throw(FallbackTestSetException("There was an error during testing"))
 end
@@ -763,7 +795,7 @@ record(ts::DefaultTestSet, t::Pass) = (ts.n_passed += 1; t)
 # For the other result types, immediately print the error message
 # but do not terminate. Print a backtrace.
 function record(ts::DefaultTestSet, t::Union{Fail, Error})
-    if myid() == 1
+    if TESTSET_PRINT_ENABLE[]
         printstyled(ts.description, ": ", color=:white)
         # don't print for interrupted tests
         if !(t isa Error) || t.test_type !== :test_interrupted
@@ -775,7 +807,6 @@ function record(ts::DefaultTestSet, t::Union{Fail, Error})
         end
     end
     push!(ts.results, t)
-    isa(t, Error) || backtrace()
     return t
 end
 
@@ -788,9 +819,9 @@ record(ts::DefaultTestSet, t::AbstractTestSet) = push!(ts.results, t)
 
 function print_test_errors(ts::DefaultTestSet)
     for t in ts.results
-        if (isa(t, Error) || isa(t, Fail)) && myid() == 1
+        if isa(t, Error) || isa(t, Fail)
             println("Error in testset $(ts.description):")
-            Base.show(stdout,t)
+            show(t)
             println()
         elseif isa(t, DefaultTestSet)
             print_test_errors(t)
@@ -874,7 +905,7 @@ function finish(ts::DefaultTestSet)
     if total != total_pass + total_broken
         # Get all the error/failures and bring them along for the ride
         efs = filter_errors(ts)
-        throw(TestSetException(total_pass,total_fail,total_error, total_broken, efs))
+        throw(TestSetException(total_pass, total_fail, total_error, total_broken, efs))
     end
 
     # return the testset so it is returned from the @testset macro

diff --git a/stdlib/Test/src/logging.jl b/stdlib/Test/src/logging.jl
@@ -83,7 +83,7 @@ function record(::FallbackTestSet, t::LogTestFailure)
 end
 
 function record(ts::DefaultTestSet, t::LogTestFailure)
-    if myid() == 1
+    if TESTSET_PRINT_ENABLE[]
         printstyled(ts.description, ": ", color=:white)
         print(t)
         Base.show_backtrace(stdout, scrub_backtrace(backtrace()))

diff --git a/stdlib/Test/test/runtests.jl b/stdlib/Test/test/runtests.jl
@@ -1,7 +1,9 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Test, Distributed, Random
+using Test, Random
 using Test: guardseed
+using Serialization
+using Distributed: RemoteException
 
 import Logging: Debug, Info, Warn
 
@@ -394,7 +396,7 @@ end
         @test total_broken == 0
     end
     ts.anynonpass = false
-    deleteat!(Test.get_testset().results,1)
+    deleteat!(Test.get_testset().results, 1)
 end
 
 @test .1+.1+.1 ≈ .3
@@ -953,3 +955,17 @@ end
     end
     @test ok
 end
+
+let ex = :(something_complex + [1, 2, 3])
+    b = PipeBuffer()
+    let t = Test.Pass(:test, (ex, 1), (ex, 2), (ex, 3))
+        serialize(b, t)
+        @test string(t) == string(deserialize(b))
+        @test eof(b)
+    end
+    let t = Test.Broken(:test, ex)
+        serialize(b, t)
+        @test string(t) == string(deserialize(b))
+        @test eof(b)
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -147,12 +147,21 @@ cd(@__DIR__) do
         end
     end
 
-    function print_testworker_errored(name, wrkr)
+    function print_testworker_errored(name, wrkr, @nospecialize(e))
         lock(print_lock)
         try
             printstyled(name, color=:red)
             printstyled(lpad("($wrkr)", name_align - textwidth(name) + 1, " "), " |",
                 " "^elapsed_align, " failed at $(now())\n", color=:red)
+            if isa(e, Test.TestSetException)
+                for t in e.errors_and_fails
+                    show(t)
+                    println()
+                end
+            elseif e !== nothing
+                Base.showerror(stdout, e)
+            end
+            println()
         finally
             unlock(print_lock)
         end
@@ -200,18 +209,17 @@ cd(@__DIR__) do
                     while length(tests) > 0
                         test = popfirst!(tests)
                         running_tests[test] = now()
-                        local resp
                         wrkr = p
-                        try
-                            resp = remotecall_fetch(runtests, wrkr, test, test_path(test); seed=seed)
-                        catch e
-                            isa(e, InterruptException) && return
-                            resp = Any[e]
-                        end
+                        resp = try
+                                remotecall_fetch(runtests, wrkr, test, test_path(test); seed=seed)
+                            catch e
+                                isa(e, InterruptException) && return
+                                Any[CapturedException(e, catch_backtrace())]
+                            end
                         delete!(running_tests, test)
                         push!(results, (test, resp))
-                        if resp[1] isa Exception
-                            print_testworker_errored(test, wrkr)
+                        if resp[1] isa Exception && !(resp[1] isa TestSetException && isempty(resp[1].errors_and_fails))
+                            print_testworker_errored(test, wrkr, exit_on_error ? nothing : resp[1])
                             if exit_on_error
                                 skipped = length(tests)
                                 empty!(tests)
@@ -311,6 +319,7 @@ cd(@__DIR__) do
     Errored, and execution continues until the summary at the end of the test
     run, where the test file is printed out as the "failed expression".
     =#
+    Test.TESTSET_PRINT_ENABLE[] = false
     o_ts = Test.DefaultTestSet("Overall")
     Test.push_testset(o_ts)
     completed_tests = Set{String}()
@@ -320,29 +329,15 @@ cd(@__DIR__) do
             Test.push_testset(resp)
             Test.record(o_ts, resp)
             Test.pop_testset()
-        elseif isa(resp, Tuple{Int,Int})
-            fake = Test.DefaultTestSet(testname)
-            for i in 1:resp[1]
-                Test.record(fake, Test.Pass(:test, nothing, nothing, nothing))
-            end
-            for i in 1:resp[2]
-                Test.record(fake, Test.Broken(:test, nothing))
-            end
-            Test.push_testset(fake)
-            Test.record(o_ts, fake)
-            Test.pop_testset()
-        elseif isa(resp, RemoteException) && isa(resp.captured.ex, Test.TestSetException)
-            println("Worker $(resp.pid) failed running test $(testname):")
-            Base.showerror(stdout, resp.captured)
-            println()
+        elseif isa(resp, Test.TestSetException)
             fake = Test.DefaultTestSet(testname)
-            for i in 1:resp.captured.ex.pass
+            for i in 1:resp.pass
                 Test.record(fake, Test.Pass(:test, nothing, nothing, nothing))
             end
-            for i in 1:resp.captured.ex.broken
+            for i in 1:resp.broken
                 Test.record(fake, Test.Broken(:test, nothing))
             end
-            for t in resp.captured.ex.errors_and_fails
+            for t in resp.errors_and_fails
                 Test.record(fake, t)
             end
             Test.push_testset(fake)
@@ -371,6 +366,7 @@ cd(@__DIR__) do
         Test.record(o_ts, fake)
         Test.pop_testset()
     end
+    Test.TESTSET_PRINT_ENABLE[] = true
     println()
     Test.print_test_results(o_ts, 1)
     if !o_ts.anynonpass

diff --git a/test/testdefs.jl b/test/testdefs.jl
@@ -18,28 +18,28 @@ function runtests(name, path, isolate=true; seed=nothing)
         let id = myid()
             wait(@spawnat 1 print_testworker_started(name, id))
         end
-        ex = quote
-            @timed @testset $"$name" begin
-                # Random.seed!(nothing) will fail
-                $seed != nothing && Random.seed!($seed)
-                include($"$path.jl")
-            end
+        res_and_time_data = @timed @testset "$name" begin
+            # Random.seed!(nothing) will fail
+            seed != nothing && Random.seed!(seed)
+            Base.include(m, "$path.jl")
         end
-        res_and_time_data = Core.eval(m, ex)
         rss = Sys.maxrss()
         #res_and_time_data[1] is the testset
-        passes,fails,error,broken,c_passes,c_fails,c_errors,c_broken = Test.get_test_counts(res_and_time_data[1])
-        if res_and_time_data[1].anynonpass == false
-            res_and_time_data = (
-                                 (passes+c_passes,broken+c_broken),
-                                 res_and_time_data[2],
-                                 res_and_time_data[3],
-                                 res_and_time_data[4],
-                                 res_and_time_data[5])
-        end
-        vcat(collect(res_and_time_data), rss)
-    finally
+        ts = res_and_time_data[1]
+        passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken = Test.get_test_counts(ts)
+        # simplify our stored data to just contain the counts
+        res_and_time_data = (TestSetException(passes+c_passes, fails+c_fails, errors+c_errors, broken+c_broken, Test.filter_errors(ts)),
+                             res_and_time_data[2],
+                             res_and_time_data[3],
+                             res_and_time_data[4],
+                             res_and_time_data[5],
+                             rss)
+        return res_and_time_data
+    catch ex
         Test.TESTSET_PRINT_ENABLE[] = old_print_setting
+        ex isa TestSetException || (ex = CapturedException(ex, catch_backtrace()))
+        # return this as a value to avoid remotecall from mangling it and discarding half of it
+        return Any[ex]
     end
 end