Skip to content

Commit

Permalink
Lower Pidfile stale_age multiplier. Add pidfile to cache log message. (
Browse files Browse the repository at this point in the history
…#51714)

(cherry picked from commit 6ec149f)
  • Loading branch information
IanButterworth authored and KristofferC committed Oct 23, 2023
1 parent 5f8f33b commit 516725d
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 27 deletions.
19 changes: 11 additions & 8 deletions base/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2967,24 +2967,27 @@ global parse_pidfile_hook
# the same package cannot be precompiled from different projects and/or different preferences at the same time.
compilecache_pidfile_path(pkg::PkgId) = compilecache_path(pkg, UInt64(0); project="") * ".pidfile"

const compilecache_pidlock_stale_age = 10

# Allows processes to wait if another process is precompiling a given source already.
# The lock file mtime will be updated when held every `stale_age/2` seconds.
# The lock file mtime will be updated when held at most every `stale_age/2` seconds, with expected
# variance of 10 seconds or more being infrequent but not unusual.
# After `stale_age` seconds beyond the mtime of the lock file, the lock file is deleted and
# precompilation will proceed if
# - the locking process no longer exists
# - the lock is held by another host, since processes cannot be checked remotely
# or after `stale_age * 25` seconds if the process does still exist.
function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String; stale_age=10)
# precompilation will proceed if the locking process no longer exists or after `stale_age * 5`
# seconds if the process does still exist.
# If the lock is held by another host, it will conservatively wait `stale_age * 5`
# seconds since processes cannot be checked remotely
function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String; stale_age=compilecache_pidlock_stale_age)
if @isdefined(mkpidlock_hook) && @isdefined(trymkpidlock_hook) && @isdefined(parse_pidfile_hook)
pidfile = compilecache_pidfile_path(pkg)
cachefile = invokelatest(trymkpidlock_hook, f, pidfile; stale_age)
if cachefile === false
pid, hostname, age = invokelatest(parse_pidfile_hook, pidfile)
verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug
if isempty(hostname) || hostname == gethostname()
@logmsg verbosity "Waiting for another process (pid: $pid) to finish precompiling $pkg"
@logmsg verbosity "Waiting for another process (pid: $pid) to finish precompiling $pkg. Pidfile: $pidfile"
else
@logmsg verbosity "Waiting for another machine (hostname: $hostname, pid: $pid) to finish precompiling $pkg"
@logmsg verbosity "Waiting for another machine (hostname: $hostname, pid: $pid) to finish precompiling $pkg. Pidfile: $pidfile"
end
# wait until the lock is available, but don't actually acquire it
# returning nothing indicates a process waited for another
Expand Down
21 changes: 12 additions & 9 deletions stdlib/FileWatching/src/pidfile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ Optional keyword arguments:
- `mode`: file access mode (modified by the process umask). Defaults to world-readable.
- `poll_interval`: Specify the maximum time to between attempts (if `watch_file` doesn't work)
- `stale_age`: Delete an existing pidfile (ignoring the lock) if it is older than this many seconds, based on its mtime.
The file won't be deleted until 25x longer than this if the pid in the file appears that it may be valid.
The file won't be deleted until 5x longer than this if the pid in the file appears that it may be valid.
Or 25x longer if `refresh` is overridden to 0 to disable lock refreshing.
By default this is disabled (`stale_age` = 0), but a typical recommended value would be about 3-5x an
estimated normal completion time.
- `refresh`: Keeps a lock from becoming stale by updating the mtime every interval of time that passes.
Expand Down Expand Up @@ -63,7 +64,7 @@ mutable struct LockMonitor
atdir, atname = splitdir(at)
isempty(atdir) && (atdir = pwd())
at = realpath(atdir) * path_separator * atname
fd = open_exclusive(at; stale_age=stale_age, kwopts...)
fd = open_exclusive(at; stale_age, refresh, kwopts...)
update = nothing
try
write_pidfile(fd, pid)
Expand Down Expand Up @@ -184,15 +185,16 @@ function isvalidpid(hostname::AbstractString, pid::Cuint)
end

"""
stale_pidfile(path::String, stale_age::Real) :: Bool
stale_pidfile(path::String, stale_age::Real, refresh::Real) :: Bool
Helper function for `open_exclusive` for deciding if a pidfile is stale.
"""
function stale_pidfile(path::String, stale_age::Real)
function stale_pidfile(path::String, stale_age::Real, refresh::Real)
pid, hostname, age = parse_pidfile(path)
age < -stale_age && @warn "filesystem time skew detected" path=path
longer_factor = refresh == 0 ? 25 : 5
if age > stale_age
if (age > stale_age * 25) || !isvalidpid(hostname, pid)
if (age > stale_age * longer_factor) || !isvalidpid(hostname, pid)
return true
end
end
Expand All @@ -219,7 +221,7 @@ struct PidlockedError <: Exception
end

"""
open_exclusive(path::String; mode, poll_interval, wait, stale_age) :: File
open_exclusive(path::String; mode, poll_interval, wait, stale_age, refresh) :: File
Create a new a file for read-write advisory-exclusive access.
If `wait` is `false` then error out if the lock files exist
Expand All @@ -231,13 +233,14 @@ function open_exclusive(path::String;
mode::Integer = 0o444 #= read-only =#,
poll_interval::Real = 10 #= seconds =#,
wait::Bool = true #= return on failure if false =#,
stale_age::Real = 0 #= disabled =#)
stale_age::Real = 0 #= disabled =#,
refresh::Real = stale_age/2)
# fast-path: just try to open it
file = tryopen_exclusive(path, mode)
file === nothing || return file
if !wait
if file === nothing && stale_age > 0
if stale_age > 0 && stale_pidfile(path, stale_age)
if stale_age > 0 && stale_pidfile(path, stale_age, refresh)
@warn "attempting to remove probably stale pidfile" path=path
tryrmopenfile(path)
end
Expand All @@ -263,7 +266,7 @@ function open_exclusive(path::String;
file = tryopen_exclusive(path, mode)
file === nothing || return file
Base.wait(t) # sleep for a bit before trying again
if stale_age > 0 && stale_pidfile(path, stale_age)
if stale_age > 0 && stale_pidfile(path, stale_age, refresh)
# if the file seems stale, try to remove it before attempting again
# set stale_age to zero so we won't attempt again, even if the attempt fails
stale_age -= stale_age
Expand Down
35 changes: 25 additions & 10 deletions stdlib/FileWatching/test/pidfile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -203,18 +203,33 @@ end

@assert !ispath("pidfile")
@testset "open_exclusive: break lock" begin
# test for stale_age
t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
try
write_pidfile(f, getpid())
finally
@testset "using stale_age without lock refreshing" begin
t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10, refresh=0)::File
try
write_pidfile(f, getpid())
finally
close(f)
end
@test t < 2
t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=1, refresh=0)::File
close(f)
@test 20 < t < 50
rm("pidfile")
end

@testset "using stale_age with lock refreshing on (default)" begin
t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
try
write_pidfile(f, getpid())
finally
close(f)
end
@test t < 2
t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=5)::File
close(f)
@test 20 < t < 50
rm("pidfile")
end
@test t < 2
t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=1)::File
close(f)
@test 20 < t < 50
rm("pidfile")

t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
close(f)
Expand Down

0 comments on commit 516725d

Please sign in to comment.