Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve GDB usage in CI #5429

Merged
merged 4 commits into from
Mar 1, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 29 additions & 11 deletions qa/common/util.sh
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,33 @@ function wait_for_model_stable() {
echo "=== Timeout $wait_time_secs secs. Not all models stable."
}

function gdb_helper () {
if ! command -v gdb; then
echo "=== WARNING: gdb not installed"
return
fi

### Server Hang ###
if kill -0 ${SERVER_PID}; then
# If server process is still alive, try to get backtrace and core dump from it
GDB_LOG="gdb_bt.${SERVER_PID}.log"
echo -e "=== WARNING: SERVER HANG DETECTED, DUMPING GDB BACKTRACE TO [${PWD}/${GDB_LOG}] ==="
# Dump backtrace log for quick analysis. Allow these commands to fail.
gdb -batch -ex "thread apply all bt" -p "${SERVER_PID}" 2>&1 | tee "${GDB_LOG}" || true

# Generate core dump for deeper analysis. Default filename is "core.${PID}"
gdb -batch -ex "gcore" -p "${SERVER_PID}" || true
fi

### Server Segfaulted ###
# If there are any core dumps locally from a segfault, load them and get a backtrace
for corefile in $(ls core.*); do
GDB_LOG="${corefile}.log"
echo -e "=== WARNING: SEGFAULT DETECTED, DUMPING GDB BACKTRACE TO [${PWD}/${GDB_LOG}] ==="
gdb -batch ${SERVER} ${corefile} -ex "thread apply all bt" | tee "${corefile}.log" || true;
done
}

# Run inference server. Return once server's health endpoint shows
# ready or timeout expires. Sets SERVER_PID to pid of SERVER, or 0 if
# error (including expired timeout)
Expand Down Expand Up @@ -173,17 +200,8 @@ function run_server () {

wait_for_server_ready $SERVER_PID $SERVER_TIMEOUT
if [ "$WAIT_RET" != "0" ]; then
# If gdb is installed, collect a backtrace from the hanging process
if command -v gdb; then
GDB_LOG="gdb_bt.${SERVER_PID}.log"
echo -e "=== WARNING: SERVER FAILED TO START, DUMPING GDB BACKTRACE TO [${PWD}/${GDB_LOG}] ==="
# Dump backtrace log for quick analysis. Allow these commands to fail.
gdb -batch -ex "thread apply all bt" -p "${SERVER_PID}" 2>&1 >> "${GDB_LOG}" || true
# Generate core dump for deeper analysis. Default filename is "core.${PID}"
gdb -batch -ex "gcore" -p "${SERVER_PID}" || true
else
echo -e "=== ERROR: SERVER FAILED TO START, BUT GDB NOT FOUND ==="
fi
# Get further debug information about server startup failure
gdb_helper || true

# Cleanup
kill $SERVER_PID || true
Expand Down