Skip to content

Commit

Permalink
massively improved process handling in boot scripts
Browse files Browse the repository at this point in the history
- handles SIGINT separately and correctly
- outer handler only kills subshells now
- subshells have their own handlers for terminating their children
- killing subshells now brings things down
- no more lingering detached processes sometimes
  • Loading branch information
dzuelke committed Jan 29, 2015
1 parent f2179c9 commit 0d77240
Show file tree
Hide file tree
Showing 4 changed files with 241 additions and 97 deletions.
73 changes: 53 additions & 20 deletions bin/heroku-hhvm-apache2
Original file line number Diff line number Diff line change
Expand Up @@ -236,33 +236,66 @@ rm -f $wait_pipe
mkfifo $wait_pipe
exec 3<> $wait_pipe

# trap SIGINT/SIGQUIT (ctrl+c or ctrl+\ on the console), SIGTERM, and EXIT (upon failure of any command due to set -e, or because of the exit 1 at the very end), kill subshell child processes, then subshells
# 1) restore EXIT trap immediately, or the exit at the end of the line will trigger this trap again
# 2) kill childrens' child processes (the stuff running inside the sub-shells) using xargs because this is easier (-P expects a comma separated list); the || true prevents premature exit (set -e) if one of those doesn't have children anymore (it's likely that's why we're hitting this bit of code in the first place), and redirect all to /dev/null as usage help when no args given (because jobs -p was empty) is sometimes (Linux) printed to STDOUT
# 3) kill child processes (that's the sub-shells); it's likely that some of them have already disappeared, so xarg || true it too and suppress "no such process" complaints by sending them to /dev/null
# FIXME: this doesn't currently fire when the subshells themselves are terminated
# TODO: for extra brownie points, move to a function and curry for each given signal, passing the signal in as an arg, so we can use different exit codes or messages
trap 'trap - EXIT; echo "Going down, terminating child processes..." >&2; rm -f ${wait_pipe} || true; jobs -p | xargs -n1 pkill -TERM -P &> /dev/null || true; jobs -p | xargs -n1 kill -TERM 2> /dev/null || true; exit' SIGINT SIGQUIT SIGTERM EXIT

# launch processes. all run using || true to prevent premature exit of the subshell (from set -e) regardless of exit status
# after a subprocess terminates (because it was killed or because it crashed or because it quit voluntarily), we write the name to FD 3 (because programs could output something on FD 1 (STDOUT) or FD 2 (STDERR)) and send that to the shared pipe (mkfifo) above, and a read command further down waits for something to come in on the shared pipe

# redirect logs to STDERR; write "tail ..." to the shared pipe if it exits
pids=()

# trap SIGQUIT (ctrl+\ on the console), SIGTERM (when we get killed) and EXIT (upon failure of any command due to set -e, or because of the exit 1 at the very end), we then
# 1) restore the EXIT trap so it doesn't fire again a loop due to the exit at the end (if we're handling SIGQUIT or SIGTERM)
# 2) remove our FIFO from above
# 3) kill all the subshells we've spawned - they in turn have their own traps to kill their respective subprocesses
# 3a) send STDERR to /dev/null so we don't see "no such process" errors - after all, one of the subshells may be gone
# 3b) || true so that set -e doesn't cause a mess if the kill returns 1 on "no such process" cases (which is likely)
# 4) exit in case we're handling SIGQUIT or SIGTERM
trap 'trap - EXIT; echo "Going down, terminating child processes..." >&2; rm -f ${wait_pipe} || true; kill -TERM "${pids[@]}" 2> /dev/null || true; exit' QUIT TERM EXIT
# trap SIGINT (ctrl+c on the console)
# 1) restore the INT trap so it doesn't fire in a loop due to 2)
# 2) be nice to the caller and send SIGINT to ourselves (http://mywiki.wooledge.org/SignalTrap#Special_Note_On_SIGINT)
# 3) *do* exit after all to run the cleanup code from above (avoids duplication)
trap 'trap - INT; kill -INT $$; exit' INT

# we are now launching a subshell for each of the tasks (log tail, app server, web server)
# 1) each subshell has a trap on EXIT that echos the command name to FD 3 (see the FIFO set up above)
# 1a) a 'read' at the end of the script will block on reading from that FD and then trigger the exit trap above, which does the cleanup
# 2) each subshell also has a trap on TERM that
# 2a) kills $! (the last process executed)
# 2b) ... which in turn will unblock the 'wait' in 4)
# 3) execute the command in the background
# 4) 'wait' on the command (wait is interrupted by an incoming TERM to the subshell, whereas running 3) in the foreground would wait for that 3) to finish before triggering the trap)
# 5) add the PID of the subshell to the array that the EXIT trap further above uses to clean everything up

[[ $verbose ]] && echo "Starting log redirection..." >&2
( touch "${logs[@]}"; tail -qF -n 0 "${logs[@]}" 1>&2 || true; echo 'tail "${logs[@]}"' >&3; ) &
# start HHVM; write "hhvm" to the shared pipe if it exits
(
trap 'echo "tail" >&3;' EXIT
trap 'kill -TERM $!' TERM
touch "${logs[@]}"
tail -qF -n 0 "${logs[@]}" 1>&2 &
wait
) & pids+=($!)

echo "Starting hhvm..." >&2
( hhvm --mode server -vServer.Type=fastcgi -vServer.FileSocket=/tmp/heroku.fcgi.$PORT.sock -c "$php_config" || true; echo "hhvm" >&3; ) &
# wait a few seconds for HHVM to finish initializing; otherwise an early request might break Apache with the FastCGI pipe not being ready
(
trap 'echo "hhvm" >&3;' EXIT
trap 'kill -TERM $!' TERM
hhvm --mode server -vServer.Type=fastcgi -vServer.FileSocket=/tmp/heroku.fcgi.$PORT.sock -c "$php_config" &
wait
) & pids+=($!)

# wait a few seconds for FPM to finish initializing; otherwise an early request might break Apache with the FastCGI pipe not being ready
sleep 2
# start apache; write "httpd" to the shared pipe if it exits

echo "Starting httpd..." >&2
( httpd -D NO_DETACH -c "Include $httpd_config" || true; echo "httpd" >&3; ) &
(
trap 'echo "httpd" >&3;' EXIT
trap 'kill -TERM $!' TERM
httpd -D NO_DETACH -c "Include $httpd_config" &
wait
) & pids+=($!)

# wait for something to come from the shared pipe, which means that the given process was killed or has failed
# wait for something to come from the FIFO attached to FD 3, which means that the given process was killed or has failed
# this will be interrupted by a SIGTERM or SIGINT in the traps further up
# if the pipe unblocks and this executes, then we won't read it again, so if the traps further up kill the remaining subshells above, their writing to FD 3 will have no effect
read exitproc <&3
# we'll only reach this if one of the processes above has terminated
echo "Process exited unexpectedly: $exitproc" >&2

# this will trigger the trap and kill all remaining children
# this will trigger the EXIT trap further up and kill all remaining children
exit 1
73 changes: 53 additions & 20 deletions bin/heroku-hhvm-nginx
Original file line number Diff line number Diff line change
Expand Up @@ -236,33 +236,66 @@ rm -f $wait_pipe
mkfifo $wait_pipe
exec 3<> $wait_pipe

# trap SIGINT/SIGQUIT (ctrl+c or ctrl+\ on the console), SIGTERM, and EXIT (upon failure of any command due to set -e, or because of the exit 1 at the very end), kill subshell child processes, then subshells
# 1) restore EXIT trap immediately, or the exit at the end of the line will trigger this trap again
# 2) kill childrens' child processes (the stuff running inside the sub-shells) using xargs because this is easier (-P expects a comma separated list); the || true prevents premature exit (set -e) if one of those doesn't have children anymore (it's likely that's why we're hitting this bit of code in the first place), and redirect all to /dev/null as usage help when no args given (because jobs -p was empty) is sometimes (Linux) printed to STDOUT
# 3) kill child processes (that's the sub-shells); it's likely that some of them have already disappeared, so xarg || true it too and suppress "no such process" complaints by sending them to /dev/null
# FIXME: this doesn't currently fire when the subshells themselves are terminated
# TODO: for extra brownie points, move to a function and curry for each given signal, passing the signal in as an arg, so we can use different exit codes or messages
trap 'trap - EXIT; echo "Going down, terminating child processes..." >&2; rm -f ${wait_pipe} || true; jobs -p | xargs -n1 pkill -TERM -P &> /dev/null || true; jobs -p | xargs -n1 kill -TERM 2> /dev/null || true; exit' SIGINT SIGQUIT SIGTERM EXIT

# launch processes. all run using || true to prevent premature exit of the subshell (from set -e) regardless of exit status
# after a subprocess terminates (because it was killed or because it crashed or because it quit voluntarily), we write the name to FD 3 (because programs could output something on FD 1 (STDOUT) or FD 2 (STDERR)) and send that to the shared pipe (mkfifo) above, and a read command further down waits for something to come in on the shared pipe

# redirect logs to STDERR; write "tail ..." to the shared pipe if it exits
pids=()

# trap SIGQUIT (ctrl+\ on the console), SIGTERM (when we get killed) and EXIT (upon failure of any command due to set -e, or because of the exit 1 at the very end), we then
# 1) restore the EXIT trap so it doesn't fire again a loop due to the exit at the end (if we're handling SIGQUIT or SIGTERM)
# 2) remove our FIFO from above
# 3) kill all the subshells we've spawned - they in turn have their own traps to kill their respective subprocesses
# 3a) send STDERR to /dev/null so we don't see "no such process" errors - after all, one of the subshells may be gone
# 3b) || true so that set -e doesn't cause a mess if the kill returns 1 on "no such process" cases (which is likely)
# 4) exit in case we're handling SIGQUIT or SIGTERM
trap 'trap - EXIT; echo "Going down, terminating child processes..." >&2; rm -f ${wait_pipe} || true; kill -TERM "${pids[@]}" 2> /dev/null || true; exit' QUIT TERM EXIT
# trap SIGINT (ctrl+c on the console)
# 1) restore the INT trap so it doesn't fire in a loop due to 2)
# 2) be nice to the caller and send SIGINT to ourselves (http://mywiki.wooledge.org/SignalTrap#Special_Note_On_SIGINT)
# 3) *do* exit after all to run the cleanup code from above (avoids duplication)
trap 'trap - INT; kill -INT $$; exit' INT

# we are now launching a subshell for each of the tasks (log tail, app server, web server)
# 1) each subshell has a trap on EXIT that echos the command name to FD 3 (see the FIFO set up above)
# 1a) a 'read' at the end of the script will block on reading from that FD and then trigger the exit trap above, which does the cleanup
# 2) each subshell also has a trap on TERM that
# 2a) kills $! (the last process executed)
# 2b) ... which in turn will unblock the 'wait' in 4)
# 3) execute the command in the background
# 4) 'wait' on the command (wait is interrupted by an incoming TERM to the subshell, whereas running 3) in the foreground would wait for that 3) to finish before triggering the trap)
# 5) add the PID of the subshell to the array that the EXIT trap further above uses to clean everything up

[[ $verbose ]] && echo "Starting log redirection..." >&2
( touch "${logs[@]}"; tail -qF -n 0 "${logs[@]}" 1>&2 || true; echo 'tail "${logs[@]}"' >&3; ) &
# start HHVM; write "hhvm" to the shared pipe if it exits
(
trap 'echo "tail" >&3;' EXIT
trap 'kill -TERM $!' TERM
touch "${logs[@]}"
tail -qF -n 0 "${logs[@]}" 1>&2 &
wait
) & pids+=($!)

echo "Starting hhvm..." >&2
( hhvm --mode server -vServer.Type=fastcgi -vServer.FileSocket=/tmp/heroku.fcgi.$PORT.sock -c "$php_config" || true; echo "hhvm" >&3; ) &
# wait a few seconds for HHVM to finish initializing; otherwise an early request might break nginx with the FastCGI pipe not being ready
(
trap 'echo "hhvm" >&3;' EXIT
trap 'kill -TERM $!' TERM
hhvm --mode server -vServer.Type=fastcgi -vServer.FileSocket=/tmp/heroku.fcgi.$PORT.sock -c "$php_config" &
wait
) & pids+=($!)

# wait a few seconds for FPM to finish initializing; otherwise an early request might break Apache with the FastCGI pipe not being ready
sleep 2
# start nginx; write "nginx" to the shared pipe if it exits

echo "Starting nginx..." >&2
( nginx -g "daemon off; include $nginx_config;" || true; echo "nginx" >&3; ) &
(
trap 'echo "nginx" >&3;' EXIT
trap 'kill -TERM $!' TERM
nginx -g "daemon off; include $nginx_config;" &
wait
) & pids+=($!)

# wait for something to come from the shared pipe, which means that the given process was killed or has failed
# wait for something to come from the FIFO attached to FD 3, which means that the given process was killed or has failed
# this will be interrupted by a SIGTERM or SIGINT in the traps further up
# if the pipe unblocks and this executes, then we won't read it again, so if the traps further up kill the remaining subshells above, their writing to FD 3 will have no effect
read exitproc <&3
# we'll only reach this if one of the processes above has terminated
echo "Process exited unexpectedly: $exitproc" >&2

# this will trigger the trap and kill all remaining children
# this will trigger the EXIT trap further up and kill all remaining children
exit 1
Loading

0 comments on commit 0d77240

Please sign in to comment.