Skip to content

Commit b231f74

Browse files
committed
Initial functionality added
1 parent ca0babc commit b231f74

File tree

2 files changed

+363
-0
lines changed

2 files changed

+363
-0
lines changed

README.md

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# c-deadlock-detector
2+
Python based deadlock detector for C programs
3+
4+
This python script can be used to detect if a program is in a deadlocked state and further more
5+
gain more insight about potential threads that could be the culprits to the nasty deadlock.
6+
7+
Now I know that figuring out where a deadlock occurs is a lot fun and I don't want to take the
8+
fun out of it trust me. So for the people that are new to C and have not had the joy of trying
9+
to figure out what threads are causing a deadlock using GDB please just go and do it manually,
10+
you'll learn a lot and it will help you greatly in your career.
11+
12+
Now for those that have done this once or twice you probably know that its a joyous time to
13+
troubleshoot a deadlock and use various gdb commands like 'info threads', 'info reg', etc.
14+
to figure out what threads are waiting for what locks and which threads are the owners of those
15+
locks. In the case that you want an automated script that does those commands for you then this
16+
is the script for you. Plus its python so no need to recompile if it needs a minor change, just
17+
change and go.
18+
19+
# What do you need to run this program
20+
1. Python
21+
2. GDB
22+
3. A program that is in a deadlock
23+
24+
# Lets troubleshoot a deadlock
25+
26+
## By Process ID
27+
# Grab the process id
28+
ps aux | grep <PROCESS>
29+
30+
# Type the into the program
31+
./deadlock_detector.py /path/to/program/binary <PID>
32+
33+
## By Core File
34+
In some environments running batch commands on gdb do not work and the script
35+
by process id will fail. In those cases you will want to save the core file
36+
first from GDB.
37+
# Go into gdb and generate a core file
38+
gdb /path/to/program/binary PID
39+
(gdb) generate-core-file my-core
40+
41+
# Run the script with the core file
42+
./deadlock_detector.py /path/to/program/binary my-core

deadlock_detector.py

Lines changed: 321 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,321 @@
1+
#!/usr/bin/python
2+
3+
"""C deadlock detector
4+
5+
Designed to look at a program through GDB and see whether there is a
6+
deadlock as well as to pin point the threads that are likely causing
7+
the deadlock.
8+
"""
9+
10+
from __future__ import print_function
11+
import os, sys, argparse, re
12+
import subprocess, shlex
13+
14+
__author__ = "Nodar Nutsubidze"
15+
__licence__ = "GPL"
16+
17+
if os.system("which gdb") != 0:
18+
print("Could not find 'gdb' on the system. Exiting")
19+
sys.exit(1)
20+
21+
class GDB:
22+
def __init__(self, binary, pid):
23+
"""Initialize the GDB object"""
24+
self.binary = binary
25+
self.pid = pid
26+
self.threads = []
27+
self.num_locked = 0
28+
29+
@property
30+
def base(self):
31+
"""Return the base gdb command to run"""
32+
return "gdb {} {}".format(self.binary, self.pid)
33+
34+
def get_output(self, cmd):
35+
"""Perform a gdb batch command
36+
37+
Args:
38+
cmd (str or list): Command(s) to perform
39+
40+
Returns(str):
41+
GDB output of the commands that were performed
42+
"""
43+
cmds = []
44+
if type(cmd) is list:
45+
cmds = cmd
46+
else:
47+
cmds = [cmd]
48+
full = "{} --batch".format(self.base)
49+
for item in cmds:
50+
full += " -ex '{}'".format(item)
51+
dev_null = open(os.devnull, 'w')
52+
result = subprocess.check_output(shlex.split(full), stderr=dev_null)
53+
dev_null.close()
54+
return result
55+
56+
def parse_thread_state(self):
57+
"""Parse the state of the threads"""
58+
lines = self.get_output('thread apply all bt').split('\n')
59+
thread = None
60+
for line in lines:
61+
if thread:
62+
if not line:
63+
# End of the thread
64+
thread = None
65+
continue
66+
if line[0] != '#':
67+
print("Skipping invalid frame: {}".format(line))
68+
continue
69+
thread.add_frame(line)
70+
elif line.find('Thread ') == 0:
71+
# New thread
72+
match = re.match('Thread ([0-9]*) \(Thread (0x[a-f0-9]*) \(LWP ([0-9]*)\)\)', line)
73+
if not match:
74+
print("No match for thread {}".format(line))
75+
continue
76+
data = match.groups()
77+
thread = Thread(self, data[0], data[1], data[2])
78+
self.threads.append(thread)
79+
for line in self.get_output('info threads').split('\n'):
80+
match = re.search('\(LWP ([0-9]*)\) "([a-zA-Z0-9_-]*)"', line)
81+
if match:
82+
self.set_thread_name(match.group(1), match.group(2))
83+
self.set_locks()
84+
85+
86+
def set_thread_name(self, lwp, name):
87+
"""Set a thread name based on the LWP that is passed in
88+
89+
Args:
90+
lwp (str): The LWP as seen in GDB for a given thread
91+
name (str): The name of the thread
92+
"""
93+
94+
th = self.thread_by_lwp(lwp)
95+
if th:
96+
th.name = name
97+
else:
98+
print("No thread found with lwp: {}".format(lwp))
99+
print("Threads: {}".format([th.lwp for th in self.threads]))
100+
101+
def set_locks(self):
102+
"""What functions were called before the lock"""
103+
for th in self.threads:
104+
if th.locked:
105+
th.lock_func = None
106+
for frame in th.frames:
107+
if frame.index > th.locked_index:
108+
if frame.at_file and frame.in_func:
109+
th.lock_func = frame.in_func
110+
break
111+
112+
def print_status(self, show_bt=False):
113+
"""Print the status of the threads
114+
115+
Args:
116+
show_bt (bool): Whether to show the back trace for the locked threads
117+
"""
118+
if not self.num_locked:
119+
print("There are no locked threads")
120+
return
121+
for th in self.threads:
122+
if th.locked:
123+
owner = self.thread_by_lwp(th.lock_owner_lwp)
124+
125+
# We want to print the back track
126+
if show_bt:
127+
print("=" * 80)
128+
print("{} is waiting for a lock ({}) owned by {}"
129+
.format(th.readable(), th.lock_func, owner.readable()))
130+
if show_bt:
131+
th.print_backtrace()
132+
owner.print_backtrace()
133+
134+
def thread_by_lwp(self, lwp):
135+
"""Retrieve a thread object by LWP id
136+
137+
Args:
138+
lwp (str): The LWP id
139+
140+
Returns(Thread)
141+
"""
142+
for th in self.threads:
143+
if th.lwp == lwp:
144+
return th
145+
print("Did not find {} in {}".format(lwp, [th.lwp for th in self.threads]))
146+
return None
147+
148+
class Thread:
149+
def __init__(self, gdb, index, addr, lwp):
150+
"""Initialize a new thread object
151+
152+
Args:
153+
gdb (GDB): The GDB class object
154+
index (str): The thread index
155+
addr (str): Memory address of the thread
156+
lwp (str): The light weigh process id
157+
"""
158+
self.gdb = gdb
159+
self.name = None
160+
self.index = int(index, 10)
161+
self.addr = addr
162+
self.lwp = lwp
163+
self.frames = []
164+
self.locked = False
165+
self.locked_index = None
166+
self.lock_func = None
167+
self.lock_owner_lwp = None
168+
169+
def __str__(self):
170+
return ("{} {} {} Locked: {}".format(
171+
self.index, self.addr, self.lwp, self.locked))
172+
173+
def readable(self):
174+
"""Returns a readable version of the thread name"""
175+
data = "Thread #{}".format(self.index)
176+
if self.name:
177+
data += " {}".format(self.name)
178+
return data
179+
180+
def print_backtrace(self):
181+
"""Show the back trace of a thread"""
182+
print('\n{} {} {}'.format('-' * 20, self.readable(), '-' * 20))
183+
for frame in self.frames:
184+
print(frame.raw)
185+
186+
def add_frame(self, line):
187+
"""Add a frame to a thread
188+
189+
Args:
190+
line (str): Frame line
191+
"""
192+
frame = Frame(self, line)
193+
if frame.index == -1:
194+
print("Skipping invalid frame: {}".format(line))
195+
if frame.locked:
196+
self.locked = True
197+
self.locked_index = frame.index
198+
self.gdb.num_locked += 1
199+
self.frames.append(frame)
200+
201+
class Frame:
202+
def __init__(self, thread, data):
203+
"""Initialize a frame object
204+
205+
Args:
206+
thread (Thread): The thread object for the frame
207+
data (str): The data line to parse
208+
"""
209+
self.thread = thread
210+
self.gdb = self.thread.gdb
211+
self.raw = data
212+
self.index = -1
213+
self.addr = None
214+
self.in_func = None
215+
self.args = None
216+
self.from_file = None
217+
self.at_file = None
218+
self.locked = False
219+
self.parse()
220+
221+
def __str__(self):
222+
return ("#{} {} in: {} from: {} at: {} Locked: {}".format(
223+
self.index,
224+
self.addr,
225+
self.in_func,
226+
self.from_file,
227+
self.at_file,
228+
self.locked))
229+
230+
def parse(self):
231+
"""Parse a frame line from a gdb thread"""
232+
base_pattern = '#([0-9]*) *([a-zA-Z0-9_-]*).*'
233+
in_pattern = 'in ([\?a-zA-Z0-9_-]*) \(.*\)'
234+
file_pattern = '([,/.:_\-a-zA-Z0-9]*)'
235+
at_pattern = 'at ' + file_pattern
236+
from_pattern = 'from ' + file_pattern
237+
238+
pattern = base_pattern
239+
found = []
240+
if self.raw.find(' in ') >= 0:
241+
found.append('in')
242+
pattern += ' ' + in_pattern
243+
if self.raw.find(' from ') >= 0:
244+
found.append('from')
245+
pattern += ' ' + from_pattern
246+
if self.raw.find(' at ') >= 0:
247+
found.append('at')
248+
pattern += ' ' + at_pattern
249+
match = re.match(pattern, self.raw)
250+
if not match:
251+
print("{} did not match the pattern: {}".format(self.raw, pattern))
252+
return
253+
data = match.groups()
254+
self.index = int(data[0], 10)
255+
self.addr = data[1]
256+
if 'in' in found:
257+
self.in_func = data[2]
258+
if 'from' in found:
259+
self.from_file = data[3]
260+
elif 'at' in found:
261+
self.at_file = data[3]
262+
elif 'at':
263+
self.at_file = data[2]
264+
if self.in_func and 'pthread_mutex_lock' in self.in_func:
265+
self.locked = True
266+
self.parse_locked_state()
267+
268+
def parse_locked_state(self):
269+
"""Fiture out what thread is causing this thread to wait"""
270+
sep = '======='
271+
lines = self.gdb.get_output([
272+
"thread {}".format(self.thread.index),
273+
"frame {}".format(self.index),
274+
"echo {}\n".format(sep),
275+
"info reg",
276+
]).split('\n')
277+
found_sep = False
278+
found_special = False
279+
for line in lines:
280+
if not found_sep:
281+
if line.find(sep) == 0:
282+
found_sep = True
283+
continue
284+
else:
285+
arr = line.split()
286+
if len(arr) != 3:
287+
continue
288+
register, mem_addr, val = arr
289+
# Need to have some way to know what memory address to look
290+
# at. I know this is silly but at the moment the only pattern I've
291+
# found is that the memory address to look at is after mem_addr is
292+
# 0x80 and value is 128
293+
if mem_addr == '0x80' and val == '128':
294+
found_special = True
295+
continue
296+
if found_special:
297+
info = self.gdb.get_output("p *(pthread_mutex_t*){}".format(mem_addr))
298+
match = re.search('__owner = ([0-9]*)', info).groups()
299+
if match:
300+
self.thread.lock_owner_lwp = match[0]
301+
break
302+
303+
if __name__ == "__main__":
304+
def ap_detector(args):
305+
gdb = GDB(args.binary, args.pid)
306+
gdb.parse_thread_state()
307+
gdb.print_status(show_bt=args.back_trace)
308+
309+
def add_sp(sub_p, action, func=None, help=None):
310+
p = sub_p.add_parser(action, help=help)
311+
if func:
312+
p.set_defaults(func=func)
313+
return p
314+
315+
parser = argparse.ArgumentParser(description = 'C deadlock detector')
316+
parser.add_argument('binary', help='Path to the binary')
317+
parser.add_argument('pid', help='PID or Core File of the process')
318+
parser.add_argument('-b', '--back-trace', action='store_true',
319+
help='Show the back trace for locked threads')
320+
args = parser.parse_args()
321+
ap_detector(args)

0 commit comments

Comments
 (0)