Skip to content

Commit

Permalink
A tool for mapping runtime addresses to symbol names.
Browse files Browse the repository at this point in the history
BUG=123763
TEST=compare with pprof --symbols.


Review URL: https://chromiumcodereview.appspot.com/10795028

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@147784 0039d316-1c4b-4281-b951-d872f2087c98
  • Loading branch information
dmikurube@chromium.org committed Jul 21, 2012
1 parent ecb008c commit 5875d06
Show file tree
Hide file tree
Showing 6 changed files with 541 additions and 0 deletions.
24 changes: 24 additions & 0 deletions tools/find_runtime_symbols/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
This script maps runtime addresses to symbol names. It is robust over
Address Space Layout Randomization (ASLR) since it uses runtime addresses with
runtime mapping information (/proc/.../maps).
Like 'pprof --symbols' in gperftools <http://code.google.com/p/gperftools/>.


Step 1: Prepare symbol information.

It is required to collect symbol information before mapping runtime addresses
to symbol names.

./prepare_symbol_info.py /path/to/maps [/another/path/to/symbol_info_dir]

The required 'maps' file is /proc/.../maps of the process at runtime.


Step 2: Find symbols.

./find_runtime_symbols.py /path/to/symbol_info_dir < addresses.txt

'symbol_info_dir' is the result of the Step 1.
The stdin should be a list of hex addresses to map, one per line.

The results will be printed to stdout like 'pprof --symbols'.
134 changes: 134 additions & 0 deletions tools/find_runtime_symbols/find_runtime_symbols.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#!/usr/bin/env python
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import json
import logging
import os
import re
import sys

from parse_proc_maps import parse_proc_maps
from procedure_boundaries import get_procedure_boundaries_from_nm_bsd
from util import executable_condition


def _determine_symbol_name(address, symbol):
if symbol:
return symbol.name
else:
return '0x%016x' % address


class _ListOutput(object):
def __init__(self, result):
self.result = result

def output(self, address, symbol=None):
self.result.append(_determine_symbol_name(address, symbol))


class _DictOutput(object):
def __init__(self, result):
self.result = result

def output(self, address, symbol=None):
self.result[address] = _determine_symbol_name(address, symbol)


class _FileOutput(object):
def __init__(self, result, with_address):
self.result = result
self.with_address = with_address

def output(self, address, symbol=None):
symbol_name = _determine_symbol_name(address, symbol)
if self.with_address:
self.result.write('%016x %s\n' % (address, symbol_name))
else:
self.result.write('%s\n' % symbol_name)


def _find_runtime_symbols(
prepared_data_dir, addresses, outputter, loglevel=logging.WARN):
log = logging.getLogger('find_runtime_symbols')
log.setLevel(loglevel)
handler = logging.StreamHandler()
handler.setLevel(loglevel)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
log.addHandler(handler)

if not os.path.exists(prepared_data_dir):
log.warn("Nothing found: %s" % prepared_data_dir)
return 1
if not os.path.isdir(prepared_data_dir):
log.warn("Not a directory: %s" % prepared_data_dir)
return 1

with open(os.path.join(prepared_data_dir, 'maps'), mode='r') as f:
maps = parse_proc_maps(f)

with open(os.path.join(prepared_data_dir, 'nm.json'), mode='r') as f:
nm_files = json.load(f)

symbol_table = {}
for entry in maps.iter(executable_condition):
if nm_files.has_key(entry.name):
if nm_files[entry.name]['format'] == 'bsd':
with open(os.path.join(prepared_data_dir,
nm_files[entry.name]['file']), mode='r') as f:
symbol_table[entry.name] = get_procedure_boundaries_from_nm_bsd(
f, nm_files[entry.name]['mangled'])

for address in addresses:
if isinstance(address, str):
address = int(address, 16)
is_found = False
for entry in maps.iter(executable_condition):
if entry.begin <= address < entry.end:
if entry.name in symbol_table:
found = symbol_table[entry.name].find_procedure(
address - (entry.begin - entry.offset))
outputter.output(address, found)
else:
outputter.output(address)
is_found = True
break
if not is_found:
outputter.output(address)

return 0


def find_runtime_symbols_list(prepared_data_dir, addresses):
result = []
_find_runtime_symbols(prepared_data_dir, addresses, _ListOutput(result))
return result


def find_runtime_symbols_dict(prepared_data_dir, addresses):
result = {}
_find_runtime_symbols(prepared_data_dir, addresses, _DictOutput(result))
return result


def find_runtime_symbols_file(prepared_data_dir, addresses, f):
_find_runtime_symbols(
prepared_data_dir, addresses, _FileOutput(f, False))


def main():
# FIX: Accept only .pre data
if len(sys.argv) < 2:
sys.stderr.write("""Usage:
%s /path/to/prepared_data_dir/ < addresses.txt
""" % sys.argv[0])
return 1

return find_runtime_symbols_file(sys.argv[1], sys.stdin, sys.stdout)


if __name__ == '__main__':
sys.exit(main())
104 changes: 104 additions & 0 deletions tools/find_runtime_symbols/parse_proc_maps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/usr/bin/env python
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import re
import sys


_MAPS_PATTERN = re.compile(
'^([a-f0-9]+)-([a-f0-9]+)\s+(.)(.)(.)(.)\s+([a-f0-9]+)\s+(\S+):(\S+)\s+'
'(\d+)\s+(\S+)$', re.IGNORECASE)


class ProcMapsEntry(object):
"""A class representing one line in /proc/.../maps."""

def __init__(
self, begin, end, readable, writable, executable, private, offset,
major, minor, inode, name):
self.begin = begin
self.end = end
self.readable = readable
self.writable = writable
self.executable = executable
self.private = private
self.offset = offset
self.major = major
self.minor = minor
self.inode = inode
self.name = name


class ProcMaps(object):
"""A class representing contents in /proc/.../maps."""

def __init__(self):
self._sorted_indexes = []
self._dictionary = {}
self._sorted = True

def append(self, entry):
if self._sorted_indexes and self._sorted_indexes[-1] > entry.begin:
self._sorted = False
self._sorted_indexes.append(entry.begin)
self._dictionary[entry.begin] = entry

def iter(self, condition):
if not self._sorted:
self._sorted_indexes.sort()
self._sorted = True
for index in self._sorted_indexes:
if not condition or condition(self._dictionary[index]):
yield self._dictionary[index]

def __iter__(self):
if not self._sorted:
self._sorted_indexes.sort()
self._sorted = True
for index in self._sorted_indexes:
yield self._dictionary[index]


def parse_proc_maps(f):
table = ProcMaps()
for line in f:
matched = _MAPS_PATTERN.match(line)
if matched:
table.append(ProcMapsEntry(
int(matched.group(1), 16), # begin
int(matched.group(2), 16), # end
matched.group(3), # readable
matched.group(4), # writable
matched.group(5), # executable
matched.group(6), # private
int(matched.group(7), 16), # offset
matched.group(8), # major
matched.group(9), # minor
int(matched.group(10), 10), # inode
matched.group(11) # name
))

return table


def main():
if len(sys.argv) < 2:
sys.stderr.write("""Usage:
%s /path/to/maps
""" % sys.argv[0])
return 1

with open(sys.argv[1], mode='r') as f:
maps = parse_proc_maps(f)

for entry in maps:
print "%016x-%016x +%06x %s" % (
entry.begin, entry.end, entry.offset, entry.name)

return 0


if __name__ == '__main__':
sys.exit(main())
121 changes: 121 additions & 0 deletions tools/find_runtime_symbols/prepare_symbol_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#!/usr/bin/env python
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import json
import logging
import os
import re
import shutil
import subprocess
import sys
import tempfile

from parse_proc_maps import parse_proc_maps
from util import executable_condition


def prepare_symbol_info(maps_path, output_dir_path=None, loglevel=logging.WARN):
log = logging.getLogger('prepare_symbol_info')
log.setLevel(loglevel)
handler = logging.StreamHandler()
handler.setLevel(loglevel)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
log.addHandler(handler)

if not output_dir_path:
matched = re.match('^(.*)\.maps$', os.path.basename(maps_path))
if matched:
output_dir_path = matched.group(1) + '.pre'
if not output_dir_path:
matched = re.match('^/proc/(.*)/maps$', os.path.realpath(maps_path))
if matched:
output_dir_path = matched.group(1) + '.pre'
if not output_dir_path:
output_dir_prefix = os.path.basename(maps_path) + '.pre'
# TODO(dmikurube): Find another candidate for output_dir_path.

log.info('Data for profiling will be collected in "%s".' % output_dir_path)
output_dir_path_exists = False
if os.path.exists(output_dir_path):
if os.path.isdir(output_dir_path) and not os.listdir(output_dir_path):
log.warn('Using an empty directory existing at "%s".' % output_dir_path)
else:
log.warn('A file or a directory exists at "%s".' % output_dir_path)
output_dir_path_exists = True
else:
log.info('Creating a new directory at "%s".' % output_dir_path)
os.mkdir(output_dir_path)

if output_dir_path_exists:
return 1

shutil.copyfile(maps_path, os.path.join(output_dir_path, 'maps'))

with open(maps_path, mode='r') as f:
maps = parse_proc_maps(f)

log.debug('Listing up symbols.')
nm_files = {}
for entry in maps.iter(executable_condition):
log.debug(' %016x-%016x +%06x %s' % (
entry.begin, entry.end, entry.offset, entry.name))
with tempfile.NamedTemporaryFile(
prefix=os.path.basename(entry.name) + '.',
suffix='.nm', delete=False, mode='w', dir=output_dir_path) as f:
nm_filename = os.path.realpath(f.name)
nm_succeeded = False
cppfilt_succeeded = False
p_nm = subprocess.Popen(
'nm -n --format bsd %s' % entry.name, shell=True,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p_cppfilt = subprocess.Popen(
'c++filt', shell=True,
stdin=p_nm.stdout, stdout=f, stderr=subprocess.PIPE)

if p_nm.wait() == 0:
nm_succeeded = True
for line in p_nm.stderr:
log.debug(line.rstrip())
if p_cppfilt.wait() == 0:
cppfilt_succeeded = True
for line in p_cppfilt.stderr:
log.debug(line.rstrip())

if nm_succeeded and cppfilt_succeeded:
nm_files[entry.name] = {
'file': os.path.basename(nm_filename),
'format': 'bsd',
'mangled': False}
else:
os.remove(nm_filename)

with open(os.path.join(output_dir_path, 'nm.json'), 'w') as f:
json.dump(nm_files, f, indent=2, sort_keys=True)

log.info('Collected symbol information at "%s".' % output_dir_path)
return 0


def main():
if not sys.platform.startswith('linux'):
sys.stderr.write('This script work only on Linux.')
return 1

if len(sys.argv) < 2:
sys.stderr.write("""Usage:
%s /path/to/maps [/path/to/output_data_dir/]
""" % sys.argv[0])
return 1
elif len(sys.argv) == 2:
sys.exit(prepare_symbol_info(sys.argv[1], loglevel=logging.DEBUG))
else:
sys.exit(prepare_symbol_info(sys.argv[1], sys.argv[2],
loglevel=logging.INFO))
return 0


if __name__ == '__main__':
sys.exit(main())
Loading

0 comments on commit 5875d06

Please sign in to comment.