From 19f838f0ecc08fd1438485625af6b4dc9e799152 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Fri, 21 Jun 2024 12:58:48 -0400 Subject: [PATCH] finish implementation of upgradable stdlibs (#54739) This now allows the user to load any number of copies of a module, and uses the combination of the environment, explicitly loaded modules, and the requirements of the precompile caches to determine the meaning of a name and which files need to be loaded. Note however that package extensions continue to primarily only apply to the explicitly loaded modules, although they may get loaded incidentally as the dependency of another package, they won't get defined for every pair of combinations of triggering modules. Fixes #53983 (cherry picked from commit a1a2ac63cffd11f0edc3fe730797023867d82527) --- base/Base.jl | 4 ++ base/loading.jl | 148 ++++++++++++++++++++++++++------------------- test/precompile.jl | 10 ++- 3 files changed, 96 insertions(+), 66 deletions(-) diff --git a/base/Base.jl b/base/Base.jl index e192fd3f48cf6..438d08e2e01d1 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -604,6 +604,10 @@ function __init__() init_active_project() append!(empty!(_sysimage_modules), keys(loaded_modules)) empty!(explicit_loaded_modules) + @assert isempty(loaded_precompiles) + for (mod, key) in module_keys + loaded_precompiles[key => module_build_id(mod)] = mod + end if haskey(ENV, "JULIA_MAX_NUM_PRECOMPILE_FILES") MAX_NUM_PRECOMPILE_FILES[] = parse(Int, ENV["JULIA_MAX_NUM_PRECOMPILE_FILES"]) end diff --git a/base/loading.jl b/base/loading.jl index 95f9100121414..0a51cd90eb265 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1176,8 +1176,8 @@ function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{No dep = depmods[i] dep isa Module && continue _, depkey, depbuild_id = dep::Tuple{String, PkgId, UInt128} - @assert root_module_exists(depkey) - dep = root_module(depkey) + dep = loaded_precompiles[depkey => depbuild_id] + @assert PkgId(dep) == depkey && module_build_id(dep) === depbuild_id depmods[i] = dep end @@ -1234,7 +1234,8 @@ function register_restored_modules(sv::SimpleVector, pkg::PkgId, path::String) push!(Base.Docs.modules, M) end if parentmodule(M) === M - register_root_module(M) + push!(loaded_modules_order, M) + loaded_precompiles[pkg => module_build_id(M)] = M end end @@ -1654,7 +1655,7 @@ function compilecache_path(pkg::PkgId; if staledeps === true continue end - staledeps, _ = staledeps::Tuple{Vector{Any}, Union{Nothing, String}} + staledeps, _, _ = staledeps::Tuple{Vector{Any}, Union{Nothing, String}, UInt128} # finish checking staledeps module graph for i in 1:length(staledeps) dep = staledeps[i] @@ -1742,23 +1743,23 @@ end # search for a precompile cache file to load, after some various checks function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt128) assert_havelock(require_lock) - if root_module_exists(modkey) - loaded = root_module(modkey) - else + loaded = maybe_root_module(modkey) + if loaded === nothing loaded = start_loading(modkey) - if loaded === nothing - try - modpath = locate_package(modkey) - modpath === nothing && return nothing - set_pkgorigin_version_path(modkey, String(modpath)) - loaded = _require_search_from_serialized(modkey, String(modpath), build_id, true) - finally - end_loading(modkey, loaded) - end - if loaded isa Module - insert_extension_triggers(modkey) - run_package_callbacks(modkey) - end + end + if loaded === nothing + try + modpath = locate_package(modkey) + isnothing(modpath) && error("Cannot locate source for $(repr("text/plain", modkey))") + modpath = String(modpath)::String + set_pkgorigin_version_path(modkey, modpath) + loaded = _require_search_from_serialized(modkey, modpath, build_id, true) + finally + end_loading(modkey, loaded) + end + if loaded isa Module + insert_extension_triggers(modkey) + run_package_callbacks(modkey) end end if loaded isa Module && PkgId(loaded) == modkey && module_build_id(loaded) === build_id @@ -1831,10 +1832,12 @@ function _tryrequire_from_serialized(pkg::PkgId, path::String, ocachepath::Union depmods[i] = dep end # then load the file - return _include_from_serialized(pkg, path, ocachepath, depmods, ignore_native) + loaded = _include_from_serialized(pkg, path, ocachepath, depmods, ignore_native) + loaded isa Module && register_root_module(loaded) + return loaded end -# returns `nothing` if require found a precompile cache for this sourcepath, but couldn't load it +# returns `nothing` if require found a precompile cache for this sourcepath, but couldn't load it or it was stale # returns the set of modules restored if the cache load succeeded @constprop :none function _require_search_from_serialized(pkg::PkgId, sourcepath::String, build_id::UInt128, stalecheck::Bool; reasons=nothing, DEPOT_PATH::typeof(DEPOT_PATH)=DEPOT_PATH) assert_havelock(require_lock) @@ -1846,7 +1849,7 @@ end continue end try - staledeps, ocachefile = staledeps::Tuple{Vector{Any}, Union{Nothing, String}} + staledeps, ocachefile, build_id = staledeps::Tuple{Vector{Any}, Union{Nothing, String}, UInt128} # finish checking staledeps module graph for i in 1:length(staledeps) dep = staledeps[i] @@ -1858,7 +1861,7 @@ end if modstaledeps === true continue end - modstaledeps, modocachepath = modstaledeps::Tuple{Vector{Any}, Union{Nothing, String}} + modstaledeps, modocachepath, _ = modstaledeps::Tuple{Vector{Any}, Union{Nothing, String}, UInt128} staledeps[i] = (modpath, modkey, modbuild_id, modpath_to_try, modstaledeps, modocachepath) @goto check_next_dep end @@ -1866,6 +1869,11 @@ end @goto check_next_path @label check_next_dep end + M = get(loaded_precompiles, pkg => build_id, nothing) + if isa(M, Module) + stalecheck && register_root_module(M) + return M + end if stalecheck try touch(path_to_try) # update timestamp of precompilation file @@ -1878,19 +1886,17 @@ end dep = staledeps[i] dep isa Module && continue modpath, modkey, modbuild_id, modcachepath, modstaledeps, modocachepath = dep::Tuple{String, PkgId, UInt128, String, Vector{Any}, Union{Nothing, String}} - dep = nothing - if root_module_exists(modkey) - dep = root_module(modkey) + dep = get(loaded_precompiles, modkey => modbuild_id, nothing) + if dep === nothing + dep = maybe_root_module(modkey) end while true if dep isa Module if PkgId(dep) == modkey && module_build_id(dep) === modbuild_id break else - if stalecheck - @debug "Rejecting cache file $path_to_try because module $modkey is already loaded and incompatible." - @goto check_next_path - end + @debug "Rejecting cache file $path_to_try because module $modkey got loaded at a different version than expected." + @goto check_next_path end end dep = start_loading(modkey) @@ -1898,6 +1904,7 @@ end try set_pkgorigin_version_path(modkey, modpath) dep = _include_from_serialized(modkey, modcachepath, modocachepath, modstaledeps) + dep isa Module && stalecheck && register_root_module(dep) finally end_loading(modkey, dep) end @@ -1911,7 +1918,11 @@ end end staledeps[i] = dep end - restored = _include_from_serialized(pkg, path_to_try, ocachefile, staledeps) + restored = get(loaded_precompiles, pkg => build_id, nothing) + if !isa(restored, Module) + restored = _include_from_serialized(pkg, path_to_try, ocachefile, staledeps) + end + isa(restored, Module) && stalecheck && register_root_module(restored) isa(restored, Module) && return restored @debug "Deserialization checks failed while attempting to load cache from $path_to_try" exception=restored @label check_next_path @@ -2000,7 +2011,7 @@ const package_callbacks = Any[] const include_callbacks = Any[] # used to optionally track dependencies when requiring a module: -const _concrete_dependencies = Pair{PkgId,UInt128}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them +const _concrete_dependencies = Pair{PkgId,UInt128}[] # these dependency versions are "set in stone", because they are explicitly loaded, and the process should try to avoid invalidating them const _require_dependencies = Any[] # a list of (mod, abspath, fsize, hash, mtime) tuples that are the file dependencies of the module currently being precompiled const _track_dependencies = Ref(false) # set this to true to track the list of file dependencies function _include_dependency(mod::Module, _path::AbstractString; track_content=true, @@ -2251,15 +2262,20 @@ end PkgOrigin() = PkgOrigin(nothing, nothing, nothing) const pkgorigins = Dict{PkgId,PkgOrigin}() -const loaded_modules = Dict{PkgId,Module}() -# Emptied on Julia start -const explicit_loaded_modules = Dict{PkgId,Module}() +const explicit_loaded_modules = Dict{PkgId,Module}() # Emptied on Julia start +const loaded_modules = Dict{PkgId,Module}() # available to be explicitly loaded +const loaded_precompiles = Dict{Pair{PkgId,UInt128},Module}() # extended (complete) list of modules, available to be loaded const loaded_modules_order = Vector{Module}() -const module_keys = IdDict{Module,PkgId}() # the reverse +const module_keys = IdDict{Module,PkgId}() # the reverse of loaded_modules is_root_module(m::Module) = @lock require_lock haskey(module_keys, m) root_module_key(m::Module) = @lock require_lock module_keys[m] +function module_build_id(m::Module) + hi, lo = ccall(:jl_module_build_id, NTuple{2,UInt64}, (Any,), m) + return (UInt128(hi) << 64) | lo +end + @constprop :none function register_root_module(m::Module) # n.b. This is called from C after creating a new module in `Base.__toplevel__`, # instead of adding them to the binding table there. @@ -2275,7 +2291,7 @@ root_module_key(m::Module) = @lock require_lock module_keys[m] end end end - push!(loaded_modules_order, m) + haskey(loaded_precompiles, key => module_build_id(m)) || push!(loaded_modules_order, m) loaded_modules[key] = m explicit_loaded_modules[key] = m module_keys[m] = key @@ -2307,6 +2323,9 @@ root_module_exists(key::PkgId) = @lock require_lock haskey(loaded_modules, key) loaded_modules_array() = @lock require_lock copy(loaded_modules_order) function unreference_module(key::PkgId) + if haskey(explicit_loaded_modules, key) + m = pop!(explicit_loaded_modules, key) + end if haskey(loaded_modules, key) m = pop!(loaded_modules, key) # need to ensure all modules are GC rooted; will still be referenced @@ -2450,7 +2469,7 @@ function _require(pkg::PkgId, env=nothing) return loaded end -# load a serialized file directly +# load a serialized file directly, including dependencies (without checking staleness except for immediate conflicts) function _require_from_serialized(uuidkey::PkgId, path::String, ocachepath::Union{String, Nothing}, sourcepath::String) @lock require_lock begin set_pkgorigin_version_path(uuidkey, sourcepath) @@ -2884,13 +2903,15 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in cachepath = compilecache_dir(pkg) # build up the list of modules that we want the precompile process to preserve - concrete_deps = copy(_concrete_dependencies) if keep_loaded_modules - for mod in loaded_modules_array() - if !(mod === Main || mod === Core || mod === Base) - push!(concrete_deps, PkgId(mod) => module_build_id(mod)) + concrete_deps = copy(_concrete_dependencies) + for (pkgreq, modreq) in loaded_modules # TODO: convert all relevant staleness heuristics to use explicit_loaded_modules instead + if !(pkgreq === Main || pkgreq === Core || pkgreq === Base) + push!(concrete_deps, pkgreq => module_build_id(modreq)) end end + else + concrete_deps = empty(_concrete_dependencies) end # run the expression and cache the result verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug @@ -3013,11 +3034,6 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in end end -function module_build_id(m::Module) - hi, lo = ccall(:jl_module_build_id, NTuple{2,UInt64}, (Any,), m) - return (UInt128(hi) << 64) | lo -end - function isvalid_cache_header(f::IOStream) pkgimage = Ref{UInt8}() checksum = ccall(:jl_read_verify_header, UInt64, (Ptr{Cvoid}, Ptr{UInt8}, Ptr{Int64}, Ptr{Int64}), f.ios, pkgimage, Ref{Int64}(), Ref{Int64}()) # returns checksum id or zero @@ -3570,7 +3586,7 @@ end @constprop :none function stale_cachefile(modkey::PkgId, build_id::UInt128, modpath::String, cachefile::String; ignore_loaded::Bool=false, requested_flags::CacheFlags=CacheFlags(), reasons::Union{Dict{String,Int},Nothing}=nothing, stalecheck::Bool=true) - # XXX: this function appears to dl all of the file validation, not just those checks related to stale + # n.b.: this function does nearly all of the file validation, not just those checks related to stale, so the name is potentially unclear io = open(cachefile, "r") try checksum = isvalid_cache_header(io) @@ -3624,8 +3640,8 @@ end record_reason(reasons, "for different pkgid") return true end + id_build = (UInt128(checksum) << 64) | id.second if build_id != UInt128(0) - id_build = (UInt128(checksum) << 64) | id.second if id_build != build_id @debug "Ignoring cache file $cachefile for $modkey ($((UUID(id_build)))) since it does not provide desired build_id ($((UUID(build_id))))" record_reason(reasons, "for different buildid") @@ -3640,8 +3656,12 @@ end depmods = Vector{Any}(undef, ndeps) for i in 1:ndeps req_key, req_build_id = required_modules[i] - # Module is already loaded - if root_module_exists(req_key) + # Check if module is already loaded + if !stalecheck && haskey(loaded_precompiles, req_key => req_build_id) + M = loaded_precompiles[req_key => req_build_id] + @assert PkgId(M) == req_key && module_build_id(M) === req_build_id + depmods[i] = M + elseif root_module_exists(req_key) M = root_module(req_key) if PkgId(M) == req_key && module_build_id(M) === req_build_id depmods[i] = M @@ -3672,17 +3692,19 @@ end # check if this file is going to provide one of our concrete dependencies # or if it provides a version that conflicts with our concrete dependencies # or neither - for (req_key, req_build_id) in _concrete_dependencies - build_id = get(modules, req_key, UInt64(0)) - if build_id !== UInt64(0) - build_id |= UInt128(checksum) << 64 - if build_id === req_build_id - stalecheck = false - break + if stalecheck + for (req_key, req_build_id) in _concrete_dependencies + build_id = get(modules, req_key, UInt64(0)) + if build_id !== UInt64(0) + build_id |= UInt128(checksum) << 64 + if build_id === req_build_id + stalecheck = false + break + end + @debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $((UUID(build_id)))) for $req_key (want $(UUID(req_build_id)))" + record_reason(reasons, "wrong dep buildid") + return true # cachefile doesn't provide the required version of the dependency end - @debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $((UUID(build_id)))) for $req_key (want $(UUID(req_build_id)))" - record_reason(reasons, "wrong dep buildid") - return true # cachefile doesn't provide the required version of the dependency end end @@ -3770,7 +3792,7 @@ end return true end - return depmods, ocachefile # fresh cachefile + return depmods, ocachefile, id_build # fresh cachefile finally close(io) end diff --git a/test/precompile.jl b/test/precompile.jl index 2327462dac9d0..be894e5d26133 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -639,13 +639,13 @@ precompile_test_harness(false) do dir empty_prefs_hash = Base.get_preferences_hash(nothing, String[]) @test cachefile == Base.compilecache_path(Base.PkgId("FooBar"), empty_prefs_hash) @test isfile(joinpath(cachedir, "FooBar.ji")) - Tsc = Bool(Base.JLOptions().use_pkgimages) ? Tuple{<:Vector, String} : Tuple{<:Vector, Nothing} + Tsc = Bool(Base.JLOptions().use_pkgimages) ? Tuple{<:Vector, String, UInt128} : Tuple{<:Vector, Nothing, UInt128} @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc @test !isdefined(Main, :FooBar) @test !isdefined(Main, :FooBar1) relFooBar_file = joinpath(dir, "subfolder", "..", "FooBar.jl") - @test Base.stale_cachefile(relFooBar_file, joinpath(cachedir, "FooBar.ji")) isa (Sys.iswindows() ? Tuple{<:Vector, String} : Bool) # `..` is not a symlink on Windows + @test Base.stale_cachefile(relFooBar_file, joinpath(cachedir, "FooBar.ji")) isa (Sys.iswindows() ? Tuple{<:Vector, String, UInt128} : Bool) # `..` is not a symlink on Windows mkdir(joinpath(dir, "subfolder")) @test Base.stale_cachefile(relFooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc @@ -1546,6 +1546,7 @@ precompile_test_harness("Issue #26028") do load_path module Foo26028 module Bar26028 x = 0 + y = 0 end function __init__() include(joinpath(@__DIR__, "Baz26028.jl")) @@ -1555,7 +1556,10 @@ precompile_test_harness("Issue #26028") do load_path write(joinpath(load_path, "Baz26028.jl"), """ module Baz26028 - import Foo26028.Bar26028.x + using Test + @test_throws(ConcurrencyViolationError("deadlock detected in loading Foo26028 -> Foo26028"), + @eval import Foo26028.Bar26028.x) + import ..Foo26028.Bar26028.y end """) Base.compilecache(Base.PkgId("Foo26028"))