Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add unit tests for some reproducibility infrastructure #3437

Merged
merged 1 commit into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion reproducibility_tests/compute_mse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ function reproducibility_test(;
paths = String[] # initialize for later handling

if haskey(ENV, "BUILDKITE_COMMIT")
paths = latest_comparable_paths(10)
paths = latest_comparable_paths(; n = 10)
isempty(paths) && return (reference_mse, paths)
@info "`ds_filename_computed`: `$ds_filename_computed`"
ds_filename_references =
Expand Down
121 changes: 58 additions & 63 deletions reproducibility_tests/latest_comparable_paths.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ are found.
"""
function sorted_dataset_folder(; dir = pwd())
matching_paths = filter(ispath, readdir(dir; join = true))
isempty(matching_paths) && return ""
isempty(matching_paths) && return String[]
# sort by timestamp
sorted_paths =
sort(matching_paths; by = f -> Dates.unix2datetime(stat(f).mtime))
Expand Down Expand Up @@ -40,85 +40,80 @@ function ref_counters_per_path(paths)
end

"""
latest_comparable_paths(n::Integer)
paths = latest_comparable_paths(;
n = 5,
root_path = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
ref_counter_PR = read_ref_counter(joinpath(@__DIR__, "ref_counter.jl"))
)
Returns a vector of strings, containing the `n`
latest comparable paths based on
`reproducibility_tests/ref_counter.jl`.
"""
function latest_comparable_paths(n = 5)
if get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) != "climaatmos-ci"
@warn "Not using climaatmos-ci pipeline slug, assuming no comparable references"
@info "Please review output results before merging."
return String[]
end
latest comparable paths. The assumed folder structure
is:
```
root_path/some_folder_1/ref_counter.jl
root_path/some_folder_2/ref_counter.jl
root_path/some_folder_3/ref_counter.jl
```
# Note: cluster_data_prefix is also defined in move_output.jl
cluster_data_prefix = "/central/scratch/esm/slurm-buildkite/climaatmos-main"
If a subfolder does not contain a `ref_counter.jl` file
then it is filtered out as not-comparable. The `ref_counter.jl`
files are assumed to start with a single integer,
which is read. If that integer matches `ref_counter_PR`,
then that path is considered comparable.
`paths[1]` is the most recent comparable path, and
`paths[end]` is the oldest comparable path.
"""
function latest_comparable_paths(;
n = 5,
root_path = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
ref_counter_PR = read_ref_counter(joinpath(@__DIR__, "ref_counter.jl")),
)
@info "---Finding the latest comparable paths"
# Note: root_path is also defined in move_output.jl
# Get (sorted) array of paths, `pop!(sorted_paths)`
# is the most recent merged folder.
sorted_paths = sorted_dataset_folder(; dir = cluster_data_prefix)
sorted_paths = sorted_dataset_folder(; dir = root_path)
if isempty(sorted_paths)
@warn "No paths on main found, assuming no comparable references"
@info "Please review output results before merging."
@warn "No paths found in $root_path"
return String[]
end
# Find oldest path in main with the same reference
# counter as the one in the PR. If none exists,
# then assume no comparable references.

ref_counter_file_PR = joinpath(@__DIR__, "ref_counter.jl")
@assert isfile(ref_counter_file_PR)
ref_counter_PR = read_ref_counter(ref_counter_file_PR)

ref_counters_main = ref_counters_per_path(sorted_paths)
i_comparable_references = findall(ref_counters_main) do ref_counter_main
ref_counter_main == ref_counter_PR
end
if isnothing(i_comparable_references)
@warn "`ref_counter.jl` not found on main, assuming no comparable references"
@info "Please review output results before merging."
# Short circuit if we don't find anything:
found_ref_counters =
filter(p -> isfile(joinpath(p, "ref_counter.jl")), sorted_paths)
if isempty(found_ref_counters)
@warn "No reference counters found in paths: $sorted_paths"
return String[]
end
@info "Found $(length(i_comparable_references)) comparable references:$i_comparable_references"
# Largest ref-counter reference path:
paths = map(i -> sorted_paths[i], i_comparable_references)
@info "$(length(paths)) paths found:"
for p in paths
@info " $p, $(Dates.unix2datetime(stat(p).mtime))"

# Find comparable paths
comparable_paths = String[]
@info "Reference counters found:"
for (i, path) in enumerate(sorted_paths)
ref_counter_file = joinpath(path, "ref_counter.jl")
!isfile(ref_counter_file) && continue
rc = read_ref_counter(ref_counter_file)
comparable = ref_counter_PR == rc
suffix = comparable ? ", comparable" : ""
@info " $path: $rc$suffix"
comparable && push!(comparable_paths, path)
end
ref_counter_files_main = map(p -> joinpath(p, "ref_counter.jl"), paths)
@info "$(length(ref_counter_files_main)) reference counter paths on central"
filter!(isfile, ref_counter_files_main)
@info "$(length(ref_counter_files_main)) reference counter paths on central after filtering isfile"

# for p in paths
# @info "Files in $p:" # for debugging
# for file_on_main in readdir(p)
# @info " File:`$file_on_main`"
# end
# end
@assert all(isfile, ref_counter_files_main)
ref_counters_main = map(read_ref_counter, ref_counter_files_main)
if all(rc -> ref_counter_PR == rc + 1, ref_counters_main) # new reference
@warn "`ref_counter.jl` incremented, assuming no comparable references"
@info "Ref counters main: $ref_counters_main."
@info "Please review output results before merging."

if isempty(comparable_paths)
@warn "No comparable paths found in any of the paths:$sorted_paths"
return String[]
elseif all(rc -> ref_counter_PR == rc, ref_counters_main) # unchanged reference
@info "Ref counters main: $ref_counters_main."
@info "Comparing results against main path:$paths"
else
error(
"Unexpected reference. Please open an issue pointing to this build.",
)
end

paths = reverse(paths)[1:min(n, length(paths))]
@info "Limiting comparable paths to $n:"
for p in paths
@info " $p, $(Dates.unix2datetime(stat(p).mtime))"
comparable_paths = reverse(comparable_paths) # sort so that

if length(comparable_paths) > n # limit to n comparable paths
comparable_paths = comparable_paths[1:min(n, length(comparable_paths))]
end
# Get the top 10 most recent paths to compare against:
return paths

return comparable_paths
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ using Test
@safetestset "Model getters" begin @time include("solver/model_getters.jl") end
@safetestset "Topography tests" begin @time include("topography.jl") end
@safetestset "Restarts" begin @time include("restart.jl") end
@safetestset "Reproducibility infra" begin @time include("unit_reproducibility_infra.jl") end

#! format: on

Expand Down
89 changes: 89 additions & 0 deletions test/unit_reproducibility_infra.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#=
using Revise; include("test/unit_reproducibility_infra.jl")
=#
using Test
using Dates

include(joinpath("..", "reproducibility_tests/latest_comparable_paths.jl"))

function make_ref_file_counter(dir, pathname, i)
d = mkdir(pathname)
open(io -> println(io, i), joinpath(d, "ref_counter.jl"), "w")
return joinpath(dir, d)
end

@testset "Reproducibility infrastructure: latest_comparable_paths" begin
# No paths at all
mktempdir() do path
cd(path) do
paths =
latest_comparable_paths(; root_path = path, ref_counter_PR = 2)
@test paths == []
end
end

# No paths with ref counters
mktempdir() do path
cd(path) do
p1 = mkdir("d1")
paths =
latest_comparable_paths(; root_path = path, ref_counter_PR = 2)
@test paths == []
end
end

# No paths with matching ref counters
mktempdir() do path
cd(path) do
p1 = make_ref_file_counter(path, "d1", 1)
paths =
latest_comparable_paths(; root_path = path, ref_counter_PR = 2)
@test paths == []
end
end

# 1 matching ref counter
mktempdir() do path
cd(path) do
p1 = make_ref_file_counter(path, "d1", 1)
p2 = make_ref_file_counter(path, "d2", 2)
p3 = make_ref_file_counter(path, "d3", 3)
paths =
latest_comparable_paths(; root_path = path, ref_counter_PR = 2)
@test paths == [p2]
end
end

# multiple matching ref counters
mktempdir() do path
cd(path) do
p1 = make_ref_file_counter(path, "d1", 1)
p2 = make_ref_file_counter(path, "d2", 2)
p3 = make_ref_file_counter(path, "d3", 3)
p4 = make_ref_file_counter(path, "d4", 3)
p5 = make_ref_file_counter(path, "d5", 3)
p6 = make_ref_file_counter(path, "d6", 3)
paths =
latest_comparable_paths(; root_path = path, ref_counter_PR = 3)
@test paths == [p6, p5, p4, p3] # p6 is most recent
end
end

# matching ref counters that exceed n
mktempdir() do path
cd(path) do
p1 = make_ref_file_counter(path, "d1", 1)
p2 = make_ref_file_counter(path, "d2", 2)
p3 = make_ref_file_counter(path, "d3", 3)
p4 = make_ref_file_counter(path, "d4", 3)
p5 = make_ref_file_counter(path, "d5", 3)
p6 = make_ref_file_counter(path, "d6", 3)
paths = latest_comparable_paths(;
n = 2,
root_path = path,
ref_counter_PR = 3,
)
@test paths == [p6, p5] # p6 is most recent
end
end
end
Loading