Skip to content

Commit 6ec149f

Browse files
Lower Pidfile stale_age multiplier. Add pidfile to cache log message. (#51714)
1 parent 4d36da7 commit 6ec149f

File tree

3 files changed

+48
-27
lines changed

3 files changed

+48
-27
lines changed

base/loading.jl

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3053,24 +3053,27 @@ global parse_pidfile_hook
30533053
# the same package cannot be precompiled from different projects and/or different preferences at the same time.
30543054
compilecache_pidfile_path(pkg::PkgId) = compilecache_path(pkg, UInt64(0); project="") * ".pidfile"
30553055

3056+
const compilecache_pidlock_stale_age = 10
3057+
30563058
# Allows processes to wait if another process is precompiling a given source already.
3057-
# The lock file mtime will be updated when held every `stale_age/2` seconds.
3059+
# The lock file mtime will be updated when held at most every `stale_age/2` seconds, with expected
3060+
# variance of 10 seconds or more being infrequent but not unusual.
30583061
# After `stale_age` seconds beyond the mtime of the lock file, the lock file is deleted and
3059-
# precompilation will proceed if
3060-
# - the locking process no longer exists
3061-
# - the lock is held by another host, since processes cannot be checked remotely
3062-
# or after `stale_age * 25` seconds if the process does still exist.
3063-
function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String; stale_age=10)
3062+
# precompilation will proceed if the locking process no longer exists or after `stale_age * 5`
3063+
# seconds if the process does still exist.
3064+
# If the lock is held by another host, it will conservatively wait `stale_age * 5`
3065+
# seconds since processes cannot be checked remotely
3066+
function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String; stale_age=compilecache_pidlock_stale_age)
30643067
if @isdefined(mkpidlock_hook) && @isdefined(trymkpidlock_hook) && @isdefined(parse_pidfile_hook)
30653068
pidfile = compilecache_pidfile_path(pkg)
30663069
cachefile = invokelatest(trymkpidlock_hook, f, pidfile; stale_age)
30673070
if cachefile === false
30683071
pid, hostname, age = invokelatest(parse_pidfile_hook, pidfile)
30693072
verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug
30703073
if isempty(hostname) || hostname == gethostname()
3071-
@logmsg verbosity "Waiting for another process (pid: $pid) to finish precompiling $pkg"
3074+
@logmsg verbosity "Waiting for another process (pid: $pid) to finish precompiling $pkg. Pidfile: $pidfile"
30723075
else
3073-
@logmsg verbosity "Waiting for another machine (hostname: $hostname, pid: $pid) to finish precompiling $pkg"
3076+
@logmsg verbosity "Waiting for another machine (hostname: $hostname, pid: $pid) to finish precompiling $pkg. Pidfile: $pidfile"
30743077
end
30753078
# wait until the lock is available, but don't actually acquire it
30763079
# returning nothing indicates a process waited for another

stdlib/FileWatching/src/pidfile.jl

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ Optional keyword arguments:
3333
- `mode`: file access mode (modified by the process umask). Defaults to world-readable.
3434
- `poll_interval`: Specify the maximum time to between attempts (if `watch_file` doesn't work)
3535
- `stale_age`: Delete an existing pidfile (ignoring the lock) if it is older than this many seconds, based on its mtime.
36-
The file won't be deleted until 25x longer than this if the pid in the file appears that it may be valid.
36+
The file won't be deleted until 5x longer than this if the pid in the file appears that it may be valid.
37+
Or 25x longer if `refresh` is overridden to 0 to disable lock refreshing.
3738
By default this is disabled (`stale_age` = 0), but a typical recommended value would be about 3-5x an
3839
estimated normal completion time.
3940
- `refresh`: Keeps a lock from becoming stale by updating the mtime every interval of time that passes.
@@ -64,7 +65,7 @@ mutable struct LockMonitor
6465
atdir, atname = splitdir(at)
6566
isempty(atdir) && (atdir = pwd())
6667
at = realpath(atdir) * path_separator * atname
67-
fd = open_exclusive(at; stale_age=stale_age, kwopts...)
68+
fd = open_exclusive(at; stale_age, refresh, kwopts...)
6869
update = nothing
6970
try
7071
write_pidfile(fd, pid)
@@ -185,15 +186,16 @@ function isvalidpid(hostname::AbstractString, pid::Cuint)
185186
end
186187

187188
"""
188-
stale_pidfile(path::String, stale_age::Real) :: Bool
189+
stale_pidfile(path::String, stale_age::Real, refresh::Real) :: Bool
189190
190191
Helper function for `open_exclusive` for deciding if a pidfile is stale.
191192
"""
192-
function stale_pidfile(path::String, stale_age::Real)
193+
function stale_pidfile(path::String, stale_age::Real, refresh::Real)
193194
pid, hostname, age = parse_pidfile(path)
194195
age < -stale_age && @warn "filesystem time skew detected" path=path
196+
longer_factor = refresh == 0 ? 25 : 5
195197
if age > stale_age
196-
if (age > stale_age * 25) || !isvalidpid(hostname, pid)
198+
if (age > stale_age * longer_factor) || !isvalidpid(hostname, pid)
197199
return true
198200
end
199201
end
@@ -220,7 +222,7 @@ struct PidlockedError <: Exception
220222
end
221223

222224
"""
223-
open_exclusive(path::String; mode, poll_interval, wait, stale_age) :: File
225+
open_exclusive(path::String; mode, poll_interval, wait, stale_age, refresh) :: File
224226
225227
Create a new a file for read-write advisory-exclusive access.
226228
If `wait` is `false` then error out if the lock files exist
@@ -232,13 +234,14 @@ function open_exclusive(path::String;
232234
mode::Integer = 0o444 #= read-only =#,
233235
poll_interval::Real = 10 #= seconds =#,
234236
wait::Bool = true #= return on failure if false =#,
235-
stale_age::Real = 0 #= disabled =#)
237+
stale_age::Real = 0 #= disabled =#,
238+
refresh::Real = stale_age/2)
236239
# fast-path: just try to open it
237240
file = tryopen_exclusive(path, mode)
238241
file === nothing || return file
239242
if !wait
240243
if file === nothing && stale_age > 0
241-
if stale_age > 0 && stale_pidfile(path, stale_age)
244+
if stale_age > 0 && stale_pidfile(path, stale_age, refresh)
242245
@warn "attempting to remove probably stale pidfile" path=path
243246
tryrmopenfile(path)
244247
end
@@ -264,7 +267,7 @@ function open_exclusive(path::String;
264267
file = tryopen_exclusive(path, mode)
265268
file === nothing || return file
266269
Base.wait(t) # sleep for a bit before trying again
267-
if stale_age > 0 && stale_pidfile(path, stale_age)
270+
if stale_age > 0 && stale_pidfile(path, stale_age, refresh)
268271
# if the file seems stale, try to remove it before attempting again
269272
# set stale_age to zero so we won't attempt again, even if the attempt fails
270273
stale_age -= stale_age

stdlib/FileWatching/test/pidfile.jl

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -203,18 +203,33 @@ end
203203

204204
@assert !ispath("pidfile")
205205
@testset "open_exclusive: break lock" begin
206-
# test for stale_age
207-
t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
208-
try
209-
write_pidfile(f, getpid())
210-
finally
206+
@testset "using stale_age without lock refreshing" begin
207+
t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10, refresh=0)::File
208+
try
209+
write_pidfile(f, getpid())
210+
finally
211+
close(f)
212+
end
213+
@test t < 2
214+
t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=1, refresh=0)::File
211215
close(f)
216+
@test 20 < t < 50
217+
rm("pidfile")
218+
end
219+
220+
@testset "using stale_age with lock refreshing on (default)" begin
221+
t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
222+
try
223+
write_pidfile(f, getpid())
224+
finally
225+
close(f)
226+
end
227+
@test t < 2
228+
t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=5)::File
229+
close(f)
230+
@test 20 < t < 50
231+
rm("pidfile")
212232
end
213-
@test t < 2
214-
t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=1)::File
215-
close(f)
216-
@test 20 < t < 50
217-
rm("pidfile")
218233

219234
t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
220235
close(f)

0 commit comments

Comments
 (0)