Skip to content

Commit

Permalink
CA-364194: use timespans for script timeouts (#6200)
Browse files Browse the repository at this point in the history
This has two advantages:
1. Always non-negative: they represent absolute differences in time
2. Forces users to define the units of time, allowing to read the time
in minutes, when appropriate

Passed the toolstack's smoke and validation tests: Job 209833
  • Loading branch information
robhoes authored Jan 14, 2025
2 parents d8ac4d9 + 87013f1 commit f58d8d3
Show file tree
Hide file tree
Showing 14 changed files with 77 additions and 51 deletions.
2 changes: 2 additions & 0 deletions ocaml/forkexecd/lib/dune
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
(wrapped false)
(libraries
astring
clock
fd-send-recv
mtime
rpclib.core
rpclib.json
rpclib.xml
Expand Down
19 changes: 12 additions & 7 deletions ocaml/forkexecd/lib/forkhelpers.ml
Original file line number Diff line number Diff line change
Expand Up @@ -315,8 +315,8 @@ let safe_close_and_exec ?tracing ?env stdin stdout stderr
close_fds

let execute_command_get_output_inner ?tracing ?env ?stdin
?(syslog_stdout = NoSyslogging) ?(redirect_stderr_to_stdout = false)
?(timeout = -1.0) cmd args =
?(syslog_stdout = NoSyslogging) ?(redirect_stderr_to_stdout = false) timeout
cmd args =
let to_close = ref [] in
let close fd =
if List.mem fd !to_close then (
Expand Down Expand Up @@ -354,8 +354,13 @@ let execute_command_get_output_inner ?tracing ?env ?stdin
close wr
)
stdinandpipes ;
if timeout > 0. then
Unix.setsockopt_float sock Unix.SO_RCVTIMEO timeout ;
( match timeout with
| Some span ->
let timeout = Clock.Timer.span_to_s span in
Unix.setsockopt_float sock Unix.SO_RCVTIMEO timeout
| None ->
()
) ;
with_tracing ~tracing ~name:"Forkhelpers.waitpid" @@ fun _ ->
try waitpid (sock, pid)
with Unix.(Unix_error ((EAGAIN | EWOULDBLOCK), _, _)) ->
Expand All @@ -380,12 +385,12 @@ let execute_command_get_output_inner ?tracing ?env ?stdin
let execute_command_get_output ?tracing ?env ?(syslog_stdout = NoSyslogging)
?(redirect_stderr_to_stdout = false) ?timeout cmd args =
with_tracing ~tracing ~name:__FUNCTION__ @@ fun tracing ->
execute_command_get_output_inner ?tracing ?env ?stdin:None ?timeout
~syslog_stdout ~redirect_stderr_to_stdout cmd args
execute_command_get_output_inner ?tracing ?env ?stdin:None ~syslog_stdout
~redirect_stderr_to_stdout timeout cmd args

let execute_command_get_output_send_stdin ?tracing ?env
?(syslog_stdout = NoSyslogging) ?(redirect_stderr_to_stdout = false)
?timeout cmd args stdin =
with_tracing ~tracing ~name:__FUNCTION__ @@ fun tracing ->
execute_command_get_output_inner ?tracing ?env ~stdin ~syslog_stdout
~redirect_stderr_to_stdout ?timeout cmd args
~redirect_stderr_to_stdout timeout cmd args
4 changes: 2 additions & 2 deletions ocaml/forkexecd/lib/forkhelpers.mli
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ val execute_command_get_output :
-> ?env:string array
-> ?syslog_stdout:syslog_stdout
-> ?redirect_stderr_to_stdout:bool
-> ?timeout:float
-> ?timeout:Mtime.Span.t
-> string
-> string list
-> string * string
Expand All @@ -61,7 +61,7 @@ val execute_command_get_output_send_stdin :
-> ?env:string array
-> ?syslog_stdout:syslog_stdout
-> ?redirect_stderr_to_stdout:bool
-> ?timeout:float
-> ?timeout:Mtime.Span.t
-> string
-> string list
-> string
Expand Down
2 changes: 1 addition & 1 deletion ocaml/forkexecd/test/dune
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
(executable
(modes exe)
(name fe_test)
(libraries forkexec uuid xapi-stdext-unix fd-send-recv))
(libraries fmt forkexec mtime clock mtime.clock.os uuid xapi-stdext-unix fd-send-recv))

(rule
(alias runtest)
Expand Down
44 changes: 26 additions & 18 deletions ocaml/forkexecd/test/fe_test.ml
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,6 @@ let one fds x =
:: string_of_int (fds - (x.max_extra - number_of_extra))
:: shuffle cmdline_names
in
(* Printf.fprintf stderr "stdin = %s\n" (if x.stdin then "Some" else "None");
Printf.fprintf stderr "stdout = %s\n" (if x.stdout then "Some" else "None");
Printf.fprintf stderr "stderr = %s\n" (if x.stderr then "Some" else "None");
List.iter (fun (uuid, _) -> Printf.fprintf stderr "uuid %s -> stdin\n" uuid) table;
Printf.fprintf stderr "%s %s\n" exe (String.concat " " args);
*)
Forkhelpers.waitpid_fail_if_bad_exit
(Forkhelpers.safe_close_and_exec
(if x.stdin then Some fd else None)
Expand All @@ -129,26 +123,43 @@ let one fds x =
table exe args
)

type in_range = In_range | Longer | Shorter

let in_range ~e:leeway ~around span =
let upper = Mtime.Span.add around leeway in
if Clock.Timer.span_is_shorter ~than:around span then
Shorter
else if Clock.Timer.span_is_longer ~than:upper span then
Longer
else
In_range

let test_delay () =
let start = Unix.gettimeofday () in
let start = Mtime_clock.counter () in
let args = ["sleep"] in
(* Need to have fractional part because some internal usage split integer
and fractional and do computation.
Better to have a high fractional part (> 0.5) to more probably exceed
the unit.
*)
let timeout = 1.7 in
let timeout = Mtime.Span.(1700 * ms) in
try
Forkhelpers.execute_command_get_output ~timeout exe args |> ignore ;
fail "Failed to timeout"
with
| Forkhelpers.Subprocess_timeout ->
let elapsed = Unix.gettimeofday () -. start in
Printf.printf "Caught timeout exception after %f seconds\n%!" elapsed ;
if elapsed < timeout then
failwith "Process exited too soon" ;
if elapsed > timeout +. 0.2 then
failwith "Excessive time elapsed"
| Forkhelpers.Subprocess_timeout -> (
let elapsed = Mtime_clock.count start in
Printf.printf "Caught timeout exception after %s seconds\n%!"
Fmt.(to_to_string Mtime.Span.pp elapsed) ;

match in_range ~e:Mtime.Span.(200 * ms) ~around:timeout elapsed with
| In_range ->
()
| Shorter ->
failwith "Process exited too soon"
| Longer ->
failwith "Process took too long to exit"
)
| e ->
fail "Failed with unexpected exception: %s" (Printexc.to_string e)

Expand Down Expand Up @@ -289,9 +300,6 @@ let slave = function
)
fds ;
(* Check that we have the expected number *)
(*
Printf.fprintf stderr "%s %d\n" total_fds (List.length present - 1)
*)
if total_fds <> List.length filtered then
fail "Expected %d fds; /proc/self/fd has %d: %s" total_fds
(List.length filtered) ls
Expand Down
3 changes: 2 additions & 1 deletion ocaml/networkd/bin/dune
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@
(modes exe)
(libraries
astring

clock
forkexec
http_lib
integers
mtime
netlink
networklibs
rpclib.core
Expand Down
10 changes: 8 additions & 2 deletions ocaml/networkd/bin/networkd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,14 @@ let options =
, "Path to the Unix command dracut"
)
; ( "dracut-timeout"
, Arg.Set_float Network_utils.dracut_timeout
, (fun () -> string_of_float !Network_utils.dracut_timeout)
, Arg.Float
(fun x ->
let x = Float.to_int (x *. 1000.) in
Network_utils.dracut_timeout := Mtime.Span.(x * ms)
)
, (fun () ->
Float.to_string (Clock.Timer.span_to_s !Network_utils.dracut_timeout)
)
, "Default value for the dracut command timeout"
)
; ( "modinfo-cmd-path"
Expand Down
8 changes: 5 additions & 3 deletions ocaml/networkd/lib/network_utils.ml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ let dracut = ref "/sbin/dracut"

let modinfo = ref "/sbin/modinfo"

let dracut_timeout = ref 180.0
let dracut_timeout = ref Mtime.Span.(3 * min)

let fcoedriver = ref "/opt/xensource/libexec/fcoe_driver"

Expand Down Expand Up @@ -128,7 +128,8 @@ let check_n_run ?(on_error = default_error_handler) ?(log = true) run_func
| Forkhelpers.Spawn_internal_error (stderr, stdout, status) ->
on_error script args stdout stderr status

let call_script ?(timeout = Some 60.0) ?on_error ?log script args =
let call_script ?(timeout = Some Mtime.Span.(1 * min)) ?on_error ?log script
args =
let call_script_internal env script args =
let out, _err =
Forkhelpers.execute_command_get_output ~env ?timeout script args
Expand Down Expand Up @@ -1064,7 +1065,8 @@ end = struct
end

module Fcoe = struct
let call ?log args = call_script ?log ~timeout:(Some 10.0) !fcoedriver args
let call ?log args =
call_script ?log ~timeout:(Some Mtime.Span.(10 * s)) !fcoedriver args

let get_capabilities name =
match Sys.file_exists !fcoedriver with
Expand Down
2 changes: 1 addition & 1 deletion ocaml/xapi/helpers.ml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ let call_script ?(log_output = Always) ?env ?stdin ?timeout script args =
| None ->
"without a timeout"
| Some t ->
Printf.sprintf "with a timeout of %.3f seconds" t
Fmt.str "with a timeout of %a" Mtime.Span.pp t
in
debug "about to call script %s: %s %s" timeout_msg script
(String.concat " " (filter_args args)) ;
Expand Down
4 changes: 3 additions & 1 deletion ocaml/xapi/static_vdis.ml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ let permanent_vdi_attach ~__context ~vdi ~reason =
)
) ;
ignore
(Helpers.call_script ~timeout:60.0 !Xapi_globs.static_vdis
(Helpers.call_script
~timeout:Mtime.Span.(1 * min)
!Xapi_globs.static_vdis
["add"; Db.VDI.get_uuid ~__context ~self:vdi; reason]
) ;
(* VDI will be attached on next boot; attach it now too *)
Expand Down
15 changes: 8 additions & 7 deletions ocaml/xapi/xapi_globs.ml
Original file line number Diff line number Diff line change
Expand Up @@ -994,7 +994,7 @@ let winbind_allow_kerberos_auth_fallback = ref false

let winbind_keep_configuration = ref false

let winbind_ldap_query_subject_timeout = ref 20.
let winbind_ldap_query_subject_timeout = ref Mtime.Span.(20 * s)

let tdb_tool = ref "/usr/bin/tdbtool"

Expand Down Expand Up @@ -1146,7 +1146,13 @@ let xapi_globs_spec =
; ("local_yum_repo_port", Int local_yum_repo_port)
]

let xapi_globs_spec_with_descriptions = []
let xapi_globs_spec_with_descriptions =
[
( "winbind_ldap_query_subject_timeout"
, ShortDurationFromSeconds winbind_ldap_query_subject_timeout
, "Timeout to perform ldap query for subject information"
)
]

let option_of_xapi_globs_spec ?(description = None) (name, ty) =
let spec =
Expand Down Expand Up @@ -1467,11 +1473,6 @@ let other_options =
, "Whether to clear winbind configuration when join domain failed or leave \
domain"
)
; ( "winbind_ldap_query_subject_timeout"
, Arg.Set_float winbind_ldap_query_subject_timeout
, (fun () -> string_of_float !winbind_ldap_query_subject_timeout)
, "Timeout to perform ldap query for subject information"
)
; ( "hsts_max_age"
, Arg.Set_int hsts_max_age
, (fun () -> string_of_int !hsts_max_age)
Expand Down
1 change: 1 addition & 0 deletions ocaml/xenopsd/lib/dune
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
fd-send-recv
fmt
forkexec
mtime
re
gzip
zstd
Expand Down
2 changes: 1 addition & 1 deletion ocaml/xenopsd/lib/xenopsd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ let vgpu_ready_timeout = ref 30.

let varstored_ready_timeout = ref 30.

let swtpm_ready_timeout = ref 60
let swtpm_ready_timeout = ref Mtime.Span.(1 * min)

let use_upstream_qemu = ref false

Expand Down
12 changes: 5 additions & 7 deletions ocaml/xenopsd/xc/service.ml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ type t = {
; exec_path: string
; pid_filename: string
; chroot: Chroot.t
; timeout_seconds: int
; timeout: Mtime.Span.t
; args: string list
; execute:
path:string -> args:string list -> domid:Xenctrl.domid -> unit -> string
Expand Down Expand Up @@ -180,7 +180,7 @@ let start_and_wait_for_readyness ~task ~service =

Xenops_task.check_cancelling task ;

let amount = Mtime.Span.(service.timeout_seconds * s) in
let amount = service.timeout in
(* wait for pidfile to appear *)
Result.iter_error raise_e (wait ~amount ~service_name:syslog_key) ;

Expand Down Expand Up @@ -797,16 +797,14 @@ module Swtpm = struct
swtpm-wrapper runs as a service and getting the exact error back is
difficult. *)
let needs_init = check_state_needs_init task vtpm_uuid in
let timeout_seconds = !Xenopsd.swtpm_ready_timeout in
let timeout = !Xenopsd.swtpm_ready_timeout in
if needs_init then (
debug "vTPM %s is empty, needs to be created" (Uuidm.to_string vtpm_uuid) ;
let key = Printf.sprintf "%s-%d" (Filename.basename exec_path) domid in
let _, _ =
Forkhelpers.execute_command_get_output
~syslog_stdout:(Forkhelpers.Syslog_WithKey key)
~redirect_stderr_to_stdout:true
~timeout:(float_of_int timeout_seconds)
exec_path (args true)
~redirect_stderr_to_stdout:true ~timeout exec_path (args true)
in
let state_file = Filename.concat tpm_root "tpm2-00.permall" in
let state = Unixext.string_of_file state_file |> Base64.encode_exn in
Expand All @@ -825,7 +823,7 @@ module Swtpm = struct
; chroot
; args= args false
; execute
; timeout_seconds
; timeout
}
in

Expand Down

0 comments on commit f58d8d3

Please sign in to comment.