Initial support for running test failures first via failures_first=true (#212)

nickrobinson251 · web-flow · commit c449b775e13e · 2025-09-15T14:54:16.000Z
* Hacky prototype for running test failures first

* Also sort unseen before passes

* Use id =&gt; unit8 mapping

* Update option name, default it to true

* Add tests

* Defailt to `false`

* Another test

* Add to README

* Bump version

* Support nworkers&gt;0
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "ReTestItems"
 uuid = "817f1d60-ba6b-4fd5-9520-3cf149f6a823"
-version = "1.33.2"
+version = "1.34.0"
 
 [deps]
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
diff --git a/README.md b/README.md
@@ -130,6 +130,12 @@ You can set `runtests` to stop on the first test-item failure by passing `failfa
 
 If you want individual test-items to stop on their first test failure, but not stop the whole `runtests` early, you can instead pass just `testitem_failfast=true` to `runtests`.
 
+#### Running previous failures first
+
+You can set `runtests` to run first any test-items that failed the last time they were run by passing `failures_first=true`.
+When `failures_first=true` is set, test-items are order so that previously failing test-items run first, followed by previously unseen test-items, followed by previously passing test-items.
+
+This option can be combined with `failfast=true` to efficiently find the next failing test-item during development.
 
 ## Writing tests
 
diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
@@ -23,6 +23,20 @@ else
     const errmon = identity
 end
 
+# Used by failures_first to sort failures before unseen before passes.
+@enum _TEST_STATUS::UInt8 begin
+    _FAILED = 0
+    _UNSEEN = 1
+    _PASSED = 2
+end
+const GLOBAL_TEST_STATUS = Dict{String,_TEST_STATUS}()
+reset_test_status!() = (empty!(GLOBAL_TEST_STATUS); nothing)
+_status_when_last_seen(ti) = get(GLOBAL_TEST_STATUS, ti.id, _UNSEEN)
+function _cache_status!(ti)
+    status = ti.is_non_pass[] ? _FAILED : _PASSED
+    GLOBAL_TEST_STATUS[ti.id] = status
+end
+
 # We use the Test.jl stdlib `failfast` mechanism to implement `testitem_failfast`, but that
 # feature was only added in Julia v1.9, so we define these shims so our code can be
 # compatible with earlier Julia versions, with `testitem_failfast` just having no effect.
@@ -245,6 +259,9 @@ will be run.
   Defaults to the value passed to the `failfast` keyword.
   If a `@testitem` sets its own `failfast` keyword, then that takes precedence.
   Note that the `testitem_failfast` keyword only takes effect in Julia v1.9+ and is ignored in earlier Julia versions.
+- `failures_first::Bool=false`: if `true`, first runs test items that failed the last time
+  they ran, followed by new test items, followed by test items that passed the last time they ran.
+  Can also be set using the `RETESTITEMS_FAILURES_FIRST` environment variable.
 """
 function runtests end
 
@@ -274,6 +291,7 @@ end
     timeout_profile_wait::Int
     memory_threshold::Float64
     gc_between_testitems::Bool
+    failures_first::Bool
 end
 
 
@@ -298,6 +316,7 @@ function runtests(
     gc_between_testitems::Bool=parse(Bool, get(ENV, "RETESTITEMS_GC_BETWEEN_TESTITEMS", string(nworkers > 1))),
     failfast::Bool=parse(Bool, get(ENV, "RETESTITEMS_FAILFAST", "false")),
     testitem_failfast::Bool=parse(Bool, get(ENV, "RETESTITEMS_TESTITEM_FAILFAST", string(failfast))),
+    failures_first::Bool=parse(Bool, get(ENV, "RETESTITEMS_FAILURES_FIRST", "false")),
 )
     nworker_threads = _validated_nworker_threads(nworker_threads)
     paths′ = _validated_paths(paths, validate_paths)
@@ -318,7 +337,7 @@ function runtests(
     (timeout_profile_wait > 0 && Sys.iswindows()) && @warn "CPU profiles on timeout is not supported on Windows, ignoring `timeout_profile_wait`"
     mkpath(RETESTITEMS_TEMP_FOLDER[]) # ensure our folder wasn't removed
     save_current_stdio()
-    cfg = _Config(; nworkers, nworker_threads, worker_init_expr, test_end_expr, testitem_timeout, testitem_failfast, failfast, retries, logs, report, verbose_results, timeout_profile_wait, memory_threshold, gc_between_testitems)
+    cfg = _Config(; nworkers, nworker_threads, worker_init_expr, test_end_expr, testitem_timeout, testitem_failfast, failfast, retries, logs, report, verbose_results, timeout_profile_wait, memory_threshold, gc_between_testitems, failures_first)
     debuglvl = Int(debug)
     if debuglvl > 0
         withdebug(debuglvl) do
@@ -400,6 +419,15 @@ function _runtests_in_current_env(
     @info "Scheduling $ntestitems tests on pid $(Libc.getpid())" *
         (nworkers == 0 ? "" : " with $nworkers worker processes and $nworker_threads threads per worker.")
     try
+        if cfg.failures_first && !isempty(GLOBAL_TEST_STATUS)
+            sort!(testitems.testitems; by=_status_when_last_seen)
+            foreach(enumerate(testitems.testitems)) do (i, ti)
+                ti.number[] = i # reset number to match new order
+            end
+            is_sorted_queue = true
+        else
+            is_sorted_queue = false
+        end
         if nworkers == 0
             length(cfg.worker_init_expr.args) > 0 && error("worker_init_expr is set, but will not run because number of workers is 0.")
             # This is where we disable printing for the serial executor case.
@@ -423,7 +451,7 @@ function _runtests_in_current_env(
                         @debugv 2 "Running GC"
                         GC.gc(true)
                     end
-                    is_non_pass = any_non_pass(ts)
+                    testitem.is_non_pass[] = is_non_pass = any_non_pass(ts)
                     if is_non_pass && run_number != max_runs
                         run_number += 1
                         @info "Retrying $(repr(testitem.name)). Run=$run_number."
@@ -458,7 +486,7 @@ function _runtests_in_current_env(
             end
             # Now all workers are started, we can begin processing test items.
             @info "Starting running test items"
-            starting = get_starting_testitems(testitems, nworkers)
+            starting = get_starting_testitems(testitems, nworkers; is_sorted=is_sorted_queue)
             @sync for (i, w) in enumerate(workers)
                 ti = starting[i]
                 @spawn begin
@@ -651,7 +679,7 @@ function manage_worker(
                     @debugv 2 "Running GC on $worker"
                     remote_fetch(worker, :(GC.gc(true)))
                 end
-                is_non_pass = any_non_pass(ts)
+                testitem.is_non_pass[] = is_non_pass = any_non_pass(ts)
                 if is_non_pass && run_number != max_runs
                     run_number += 1
                     @info "Retrying $(repr(testitem.name)) on $worker. Run=$run_number."
diff --git a/src/macros.jl b/src/macros.jl
@@ -129,6 +129,7 @@ struct TestItem
     testsetups::Vector{TestSetup} # populated by runtests coordinator
     workerid::Base.RefValue{Int} # populated when the test item is scheduled
     testsets::Vector{DefaultTestSet} # populated when the test item is finished running
+    is_non_pass::Base.RefValue{Bool} # populated when the test item is finished running
     eval_number::Base.RefValue{Int} # to keep track of how many items have been run so far
     stats::Vector{PerfStats} # populated when the test item is finished running
     scheduled_for_evaluation::ScheduledForEvaluation # to keep track of whether the test item has been scheduled for evaluation
@@ -140,6 +141,7 @@ function TestItem(number, name, id, tags, default_imports, setups, retries, time
         TestSetup[],
         Ref{Int}(0),
         DefaultTestSet[],
+        Ref{Bool}(),
         Ref{Int}(0),
         PerfStats[],
         ScheduledForEvaluation(),
diff --git a/src/testcontext.jl b/src/testcontext.jl
@@ -141,6 +141,7 @@ function record_results!(file::FileNode, ti::TestItem)
         # Always record last try as the final status, so a pass-on-retry is a pass.
         Test.record(file.testset, last(ti.testsets))
         junit_record!(file.junit, ti)
+        _cache_status!(ti)
     end
 end
 
@@ -151,11 +152,16 @@ junit_record!(_, ::Nothing) = nothing
 
 Test.finish(ti::TestItems) = Test.finish(ti.graph.testset)
 
-function get_starting_testitems(ti::TestItems, n)
-    # we want to select n evenly spaced test items from ti.testitems
+function get_starting_testitems(ti::TestItems, n::Int; is_sorted::Bool=false)
     len = length(ti.testitems)
-    step = max(1, len / n)
-    testitems = [ti.testitems[round(Int, i)] for i in 1:step:len]
+    if is_sorted
+        # select the first n test items
+        testitems = ti.testitems[1:min(n, len)]
+    else
+        # select n evenly spaced test items starting with the first one
+        step = max(1, len / n)
+        testitems = [ti.testitems[round(Int, i)] for i in 1:step:len]
+    end
     @debugv 2 "get_starting_testitems len=$len n=$n allunique=$(allunique(testitems))"
     @assert length(testitems) == min(n, len) && allunique(testitems)
     for (i, t) in enumerate(testitems)
diff --git a/test/integrationtests.jl b/test/integrationtests.jl
@@ -1514,4 +1514,76 @@ end
     @test contains(c.output, "3/3 test items were run.")
 end
 
+@testset "failures_first" verbose=true begin
+    using IOCapture
+    # we use logs to tell us the order in which tests were run.
+    function testitems_runorder(logstr::String)
+        re = r"START \((?<num>\d)/\d\) test item \"(?<name>.*)\""
+        names = [String(m[:name]) for m in eachmatch(re, logstr)]
+        order = [parse(Int, m[:num]) for m in eachmatch(re, logstr)]
+        return names[order]
+    end
+    file = joinpath(TEST_FILES_DIR, "_failures_first_tests.jl")
+    @testset for nworkers in (0, 1)
+        ReTestItems.reset_test_status!()
+        for run in (1, 2)
+            c = IOCapture.capture() do
+                encased_testset(()->runtests(file; failures_first=true, nworkers))
+            end
+            results = c.value
+            @test n_tests(results) == 4
+            @test n_passed(results) == 2
+            tis = testitems_runorder(c.output)
+            if run == 1
+                @test tis == ["a. pass", "b. fail", "c. pass", "d. fail"]
+            else
+                @test tis == ["b. fail", "d. fail", "a. pass", "c. pass"]
+            end
+        end
+        # run a subset of tests
+        name = r"^a|^d"
+        c = IOCapture.capture() do
+            encased_testset(()->runtests(file; failures_first=true, nworkers, name))
+        end
+        results = c.value
+        @test n_tests(results) == 2
+        @test n_passed(results) == 1
+        tis = testitems_runorder(c.output)
+        @test tis == ["d. fail", "a. pass"]
+        # run including new tests
+        file2 = joinpath(TEST_FILES_DIR, "_happy_tests.jl")
+        c = IOCapture.capture() do
+            encased_testset(()->runtests(file, file2; failures_first=true, nworkers))
+        end
+        results = c.value
+        @test n_tests(results) == 4 + 3
+        @test n_passed(results) == 2 + 3
+        tis = testitems_runorder(c.output)
+        new_tests = ["happy 1", "happy 2", "happy 3"]
+        @test tis == ["b. fail", "d. fail", new_tests..., "a. pass", "c. pass"]
+    end
+    @testset "nworkers=2" begin
+        nworkers = 2
+        ReTestItems.reset_test_status!()
+        for run in (1, 2)
+            c = IOCapture.capture() do
+                encased_testset(()->runtests(file; failures_first=true, nworkers))
+            end
+            results = c.value
+            @test n_tests(results) == 4
+            tis = testitems_runorder(c.output)
+            if run == 1
+                # The 2 workers grab evenly spaced out testitems, starting with the first
+                # one, hence a. and c.
+                @test Set(tis[1:2]) == Set(["a. pass", "c. pass"])
+                @test Set(tis[3:4]) == Set(["b. fail", "d. fail"])
+            else
+                # The 2 workers should get the failures first, hence b. and d.
+                @test Set(tis[1:2]) == Set(["b. fail", "d. fail"])
+                @test Set(tis[3:4]) == Set(["a. pass", "c. pass"])
+            end
+        end
+    end
+end
+
 end # integrationtests.jl testset
diff --git a/test/internals.jl b/test/internals.jl
@@ -11,14 +11,21 @@ using ReTestItems
     # let's test this exhaustively for 1-10 testitems across 1-10 workers.
     for nworkers in 1:10
         for nitems in 1:10
-            testitems = [@testitem("ti-$i", _run=false, begin end) for i in 1:nitems]
-            starts = get_starting_testitems(TestItems(graph, testitems), nworkers)
-            startitems = [x for x in starts if !isnothing(x)]
-            @test length(starts) == nworkers
-            @test length(startitems) == min(nworkers, nitems)
-            @test allunique(ti.name for ti in startitems)
+            for is_sorted in (true, false)
+                testitems = [@testitem("ti-$i", _run=false, begin end) for i in 1:nitems]
+                starts = get_starting_testitems(TestItems(graph, testitems), nworkers; is_sorted)
+                startitems = [x for x in starts if !isnothing(x)]
+                @test length(starts) == nworkers
+                @test length(startitems) == min(nworkers, nitems)
+                @test allunique(ti.name for ti in startitems)
+            end
         end
     end
+    # the `is_sorted` case just returns the first `n` items
+    n = 3
+    testitems = [@testitem("ti-$i", _run=false, begin end) for i in 1:(2n)]
+    starts = get_starting_testitems(TestItems(graph, testitems), n; is_sorted=true)
+    @test starts == testitems[1:n]
 end
 
 @testset "is_test_file" begin
diff --git a/test/testfiles/_failures_first_tests.jl b/test/testfiles/_failures_first_tests.jl
@@ -0,0 +1,13 @@
+# Used to test the order in which tests are run
+@testitem "a. pass" begin
+    @test 1 == 1
+end
+@testitem "b. fail" begin
+    @test 1 == 3
+end
+@testitem "c. pass" begin
+    @test 2 == 2
+end
+@testitem "d. fail" begin
+    @test 2 == 4
+end