forked from iovisor/bcc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
tools: Added exitsnoop: Trace all process termination
- Loading branch information
Showing
3 changed files
with
528 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
.TH exitsnoop 8 "2019-05-28" "USER COMMANDS" | ||
.SH NAME | ||
exitsnoop \- Trace all process termination (exit, fatal signal). Uses Linux eBPF/bcc. | ||
.SH SYNOPSIS | ||
.B exitsnoop [\-h] [\-t] [\-u] [\-x] [\-p PID] [\-\-label LABEL] | ||
.SH DESCRIPTION | ||
exitsnoop traces process termination, showing the command name and reason for | ||
termination, either an exit or a fatal signal. | ||
|
||
It catches processes of all users, processes in containers, as well | ||
as processes that become zombie. | ||
|
||
This works by tracing the kernel sched_process_exit() function using dynamic tracing, | ||
and will need updating to match any changes to this function. | ||
|
||
Since this uses BPF, only the root user can use this tool. | ||
.SH REQUIREMENTS | ||
CONFIG_BPF and bcc. | ||
.SH OPTIONS | ||
.TP | ||
\-h | ||
Print usage message. | ||
.TP | ||
\-t | ||
Include a timestamp column. | ||
.TP | ||
\-u | ||
Include a timestamp column, use UTC timezone. | ||
.TP | ||
\-x | ||
Exclude successful exits, exit( 0 ) | ||
.TP | ||
\-p PID | ||
Trace this process ID only (filtered in-kernel). | ||
.TP | ||
\-\-label LABEL | ||
Label each line with LABEL (default 'exit') in first column (2nd if timestamp is present). | ||
.SH EXAMPLES | ||
.TP | ||
Trace all process termination | ||
# | ||
.B exitsnoop | ||
.TP | ||
Trace all process termination, and include timestamps: | ||
# | ||
.B exitsnoop \-t | ||
.TP | ||
Exclude successful exits, only include non-zero exit codes and fatal signals: | ||
# | ||
.B exitsnoop \-x | ||
.TP | ||
Trace PID 181 only: | ||
# | ||
.B exitsnoop \-p 181 | ||
.TP | ||
Label each output line with 'EXIT': | ||
# | ||
.B exitsnoop \-\-label EXIT | ||
.SH FIELDS | ||
.TP | ||
TIME-TZ | ||
Time of process termination HH:MM:SS.sss with milliseconds, where TZ is | ||
the local time zone, 'UTC' with \-u option. | ||
.TP | ||
LABEL | ||
The optional label if \-\-label option is used. This is useful with the | ||
\-t option for timestamps when the output of several tracing tools is | ||
sorted into one combined output. | ||
.TP | ||
PCOMM | ||
Process/command name. | ||
.TP | ||
PID | ||
Process ID | ||
.TP | ||
PPID | ||
The process ID of the process that will be notified of PID termination. | ||
.TP | ||
TID | ||
Thread ID. | ||
.TP | ||
EXIT_CODE | ||
The exit code for exit() or the signal number for a fatal signal. | ||
.SH OVERHEAD | ||
This traces the kernel sched_process_exit() function and prints output for each event. | ||
As the rate of this is generally expected to be low (< 1000/s), the overhead is also | ||
expected to be negligible. If you have an application that has a high rate of | ||
process termination, then test and understand overhead before use. | ||
.SH SOURCE | ||
This is from bcc. | ||
.IP | ||
https://github.com/iovisor/bcc | ||
.PP | ||
Also look in the bcc distribution for a companion _examples.txt file containing | ||
example usage, output, and commentary for this tool. | ||
.SH OS | ||
Linux | ||
.SH STABILITY | ||
Unstable - in development. | ||
.SH AUTHOR | ||
Arturo Martin-de-Nicolas | ||
.SH SEE ALSO | ||
execsnoop(8) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,277 @@ | ||
#!/usr/bin/python | ||
# @lint-avoid-python-3-compatibility-imports | ||
from __future__ import print_function | ||
|
||
import argparse | ||
import ctypes as ct | ||
import os | ||
import platform | ||
import re | ||
import signal | ||
import sys | ||
|
||
from bcc import BPF | ||
from datetime import datetime | ||
from time import strftime | ||
|
||
# | ||
# exitsnoop Trace all process termination (exit, fatal signal) | ||
# For Linux, uses BCC, eBPF. Embedded C. | ||
# | ||
# USAGE: exitsnoop [-h] [-x] [-t] [--utc] [--label[=LABEL]] [-p PID] | ||
# | ||
_examples = """examples: | ||
exitsnoop # trace all process termination | ||
exitsnoop -x # trace only fails, exclude exit(0) | ||
exitsnoop -t # include timestamps (local time) | ||
exitsnoop --utc # include timestamps (UTC) | ||
exitsnoop -p 181 # only trace PID 181 | ||
exitsnoop --label=exit # label each output line with 'exit' | ||
""" | ||
""" | ||
Exit status (from <include/sysexits.h>): | ||
0 EX_OK Success | ||
2 argparse error | ||
70 EX_SOFTWARE syntax error detected by compiler, or | ||
verifier error from kernel | ||
77 EX_NOPERM Need sudo (CAP_SYS_ADMIN) for BPF() system call | ||
The template for this script was Brendan Gregg's execsnoop | ||
https://github.com/iovisor/bcc/blob/master/tools/execsnoop.py | ||
More information about this script is in bcc/tools/exitsnoop_example.txt | ||
Copyright 2016 Netflix, Inc. | ||
Copyright 2019 Instana, Inc. | ||
Licensed under the Apache License, Version 2.0 (the "License") | ||
07-Feb-2016 Brendan Gregg (Netflix) Created execsnoop | ||
04-May-2019 Arturo Martin-de-Nicolas (Instana) Created exitsnoop | ||
13-May-2019 Jeroen Soeters (Instana) Refactor to import as module | ||
""" | ||
|
||
def _getParser(): | ||
parser = argparse.ArgumentParser( | ||
description="Trace all process termination (exit, fatal signal)", | ||
formatter_class=argparse.RawDescriptionHelpFormatter, | ||
epilog=_examples) | ||
a=parser.add_argument | ||
a("-t", "--timestamp", action="store_true", help="include timestamp (local time default)") | ||
a("--utc", action="store_true", help="include timestamp in UTC (-t implied)") | ||
a("-p", "--pid", help="trace this PID only") | ||
a("--label", help="label each line") | ||
a("-x", "--failed", action="store_true", help="trace only fails, exclude exit(0)") | ||
# print the embedded C program and exit, for debugging | ||
a("--ebpf", action="store_true", help=argparse.SUPPRESS) | ||
# RHEL 7.6 keeps task->start_time as struct timespec, convert to u64 nanoseconds | ||
a("--timespec", action="store_true", help=argparse.SUPPRESS) | ||
return parser.parse_args | ||
|
||
|
||
class Global(): | ||
parse_args = _getParser() | ||
args = None | ||
argv = None | ||
SIGNUM_TO_SIGNAME = dict((v, re.sub("^SIG", "", k)) | ||
for k,v in signal.__dict__.items() if re.match("^SIG[A-Z]+$", k)) | ||
|
||
|
||
class Data(ct.Structure): | ||
"""Event data matching struct data_t in _embedded_c().""" | ||
_TASK_COMM_LEN = 16 # linux/sched.h | ||
_pack_ = 1 | ||
_fields_ = [ | ||
("start_time", ct.c_ulonglong), # task->start_time, see --timespec arg | ||
("exit_time", ct.c_ulonglong), # bpf_ktime_get_ns() | ||
("pid", ct.c_uint), # task->tgid, thread group id == sys_getpid() | ||
("tid", ct.c_uint), # task->pid, thread id == sys_gettid() | ||
("ppid", ct.c_uint),# task->parent->tgid, notified of exit | ||
("exit_code", ct.c_int), | ||
("sig_info", ct.c_uint), | ||
("task", ct.c_char * _TASK_COMM_LEN) | ||
] | ||
|
||
def _embedded_c(args): | ||
"""Generate C program for sched_process_exit tracepoint in kernel/exit.c.""" | ||
c = """ | ||
EBPF_COMMENT | ||
#include <linux/sched.h> | ||
BPF_STATIC_ASSERT_DEF | ||
struct data_t { | ||
u64 start_time; | ||
u64 exit_time; | ||
u32 pid; | ||
u32 tid; | ||
u32 ppid; | ||
int exit_code; | ||
u32 sig_info; | ||
char task[TASK_COMM_LEN]; | ||
} __attribute__((packed)); | ||
BPF_STATIC_ASSERT(sizeof(struct data_t) == CTYPES_SIZEOF_DATA); | ||
BPF_PERF_OUTPUT(events); | ||
TRACEPOINT_PROBE(sched, sched_process_exit) | ||
{ | ||
struct task_struct *task = (typeof(task))bpf_get_current_task(); | ||
if (FILTER_PID || FILTER_EXIT_CODE) { return 0; } | ||
struct data_t data = { | ||
.start_time = PROCESS_START_TIME_NS, | ||
.exit_time = bpf_ktime_get_ns(), | ||
.pid = task->tgid, | ||
.tid = task->pid, | ||
.ppid = task->parent->tgid, | ||
.exit_code = task->exit_code >> 8, | ||
.sig_info = task->exit_code & 0xFF, | ||
}; | ||
bpf_get_current_comm(&data.task, sizeof(data.task)); | ||
events.perf_submit(args, &data, sizeof(data)); | ||
return 0; | ||
} | ||
""" | ||
# TODO: this macro belongs in bcc/src/cc/export/helpers.h | ||
bpf_static_assert_def = r""" | ||
#ifndef BPF_STATIC_ASSERT | ||
#define BPF_STATIC_ASSERT(condition) __attribute__((unused)) \ | ||
extern int bpf_static_assert[(condition) ? 1 : -1] | ||
#endif | ||
""" | ||
code_substitutions = [ | ||
('EBPF_COMMENT', '' if not Global.args.ebpf else _ebpf_comment()), | ||
("BPF_STATIC_ASSERT_DEF", bpf_static_assert_def), | ||
("CTYPES_SIZEOF_DATA", str(ct.sizeof(Data))), | ||
('FILTER_PID', '0' if not Global.args.pid else "task->tgid != %s" % Global.args.pid), | ||
('FILTER_EXIT_CODE', '0' if not Global.args.failed else 'task->exit_code == 0'), | ||
('PROCESS_START_TIME_NS', 'task->start_time' if not Global.args.timespec else | ||
'(task->start_time.tv_sec * 1000000000L) + task->start_time.tv_nsec'), | ||
] | ||
for old,new in code_substitutions: | ||
c = c.replace(old, new) | ||
return c | ||
|
||
def _ebpf_comment(): | ||
"""Return a C-style comment with information about the generated code.""" | ||
comment=('Created by %s at %s:\n\t%s' % | ||
(sys.argv[0], strftime("%Y-%m-%d %H:%M:%S %Z"), _embedded_c.__doc__)) | ||
args = str(vars(Global.args)).replace('{','{\n\t').replace(', ',',\n\t').replace('}',',\n }\n\n') | ||
return ("\n /*" + ("\n %s\n\n ARGV = %s\n\n ARGS = %s/" % | ||
(comment, ' '.join(Global.argv), args)) | ||
.replace('\n','\n\t*').replace('\t',' ')) | ||
|
||
def _print_header(): | ||
if Global.args.timestamp: | ||
title = 'TIME-' + ('UTC' if Global.args.utc else strftime("%Z")) | ||
print("%-13s" % title, end="") | ||
if Global.args.label is not None: | ||
print("%-6s" % "LABEL", end="") | ||
print("%-16s %-6s %-6s %-6s %-7s %-10s" % | ||
("PCOMM", "PID", "PPID", "TID", "AGE(s)", "EXIT_CODE")) | ||
|
||
def _print_event(cpu, data, size): # callback | ||
"""Print the exit event.""" | ||
e = ct.cast(data, ct.POINTER(Data)).contents | ||
if Global.args.timestamp: | ||
now = datetime.utcnow() if Global.args.utc else datetime.now() | ||
print("%-13s" % (now.strftime("%H:%M:%S.%f")[:-3]), end="") | ||
if Global.args.label is not None: | ||
label = Global.args.label if len(Global.args.label) else 'exit' | ||
print("%-6s" % label, end="") | ||
age = (e.exit_time - e.start_time) / 1e9 | ||
print("%-16s %-6d %-6d %-6d %-7.2f " % | ||
(e.task.decode(), e.pid, e.ppid, e.tid, age), end="") | ||
if e.sig_info == 0: | ||
print("0" if e.exit_code == 0 else "code %d" % e.exit_code) | ||
else: | ||
sig = e.sig_info & 0x7F | ||
if sig: | ||
print("signal %d (%s)" % (sig, signum_to_signame(sig)), end="") | ||
if e.sig_info & 0x80: | ||
print(", core dumped ", end="") | ||
print() | ||
|
||
# ============================= | ||
# Module: These functions are available for import | ||
# ============================= | ||
def initialize(arg_list = sys.argv[1:]): | ||
"""Trace all process termination. | ||
arg_list - list of args, if omitted then uses command line args | ||
arg_list is passed to argparse.ArgumentParser.parse_args() | ||
For example, if arg_list = [ '-x', '-t' ] | ||
args.failed == True | ||
args.timestamp == True | ||
Returns a tuple (return_code, result) | ||
0 = Ok, result is the return value from BPF() | ||
1 = args.ebpf is requested, result is the generated C code | ||
os.EX_NOPERM: need CAP_SYS_ADMIN, result is error message | ||
os.EX_SOFTWARE: internal software error, result is error message | ||
""" | ||
Global.argv = arg_list | ||
Global.args = Global.parse_args(arg_list) | ||
if Global.args.utc and not Global.args.timestamp: | ||
Global.args.timestamp = True | ||
if not Global.args.ebpf and os.geteuid() != 0: | ||
return (os.EX_NOPERM, "Need sudo (CAP_SYS_ADMIN) for BPF() system call") | ||
if re.match('^3\.10\..*el7.*$', platform.release()): # Centos/Red Hat | ||
Global.args.timespec = True | ||
for _ in range(2): | ||
c = _embedded_c(Global.args) | ||
if Global.args.ebpf: | ||
return (1, c) | ||
try: | ||
return (os.EX_OK, BPF(text=c)) | ||
except Exception as e: | ||
error = format(e) | ||
if (not Global.args.timespec | ||
and error.find('struct timespec') | ||
and error.find('start_time')): | ||
print('This kernel keeps task->start_time in a struct timespec.\n' + | ||
'Retrying with --timespec') | ||
Global.args.timespec = True | ||
continue | ||
return (os.EX_SOFTWARE, "BPF error: " + error) | ||
except: | ||
return (os.EX_SOFTWARE, "Unexpected error: {0}".format(sys.exc_info()[0])) | ||
|
||
def snoop(bpf, event_handler): | ||
"""Call event_handler for process termination events. | ||
bpf - result returned by successful initialize() | ||
event_handler - callback function to handle termination event | ||
args.pid - Return after event_handler is called, only monitoring this pid | ||
""" | ||
bpf["events"].open_perf_buffer(event_handler) | ||
while True: | ||
bpf.perf_buffer_poll() | ||
if Global.args.pid: | ||
return | ||
|
||
def signum_to_signame(signum): | ||
"""Return the name of the signal corresponding to signum.""" | ||
return Global.SIGNUM_TO_SIGNAME.get(signum, "unknown") | ||
|
||
# ============================= | ||
# Script: invoked as a script | ||
# ============================= | ||
def main(): | ||
try: | ||
rc, buffer = initialize() | ||
if rc: | ||
print(buffer) | ||
sys.exit(0 if Global.args.ebpf else rc) | ||
_print_header() | ||
snoop(buffer, _print_event) | ||
except KeyboardInterrupt: | ||
print() | ||
sys.exit() | ||
|
||
return 0 | ||
|
||
if __name__ == '__main__': | ||
main() |
Oops, something went wrong.