diff --git a/tools/find_runtime_symbols/README b/tools/find_runtime_symbols/README new file mode 100644 index 00000000000000..ee5c2ac88ca5d0 --- /dev/null +++ b/tools/find_runtime_symbols/README @@ -0,0 +1,24 @@ +This script maps runtime addresses to symbol names. It is robust over +Address Space Layout Randomization (ASLR) since it uses runtime addresses with +runtime mapping information (/proc/.../maps). +Like 'pprof --symbols' in gperftools . + + +Step 1: Prepare symbol information. + +It is required to collect symbol information before mapping runtime addresses +to symbol names. + +./prepare_symbol_info.py /path/to/maps [/another/path/to/symbol_info_dir] + +The required 'maps' file is /proc/.../maps of the process at runtime. + + +Step 2: Find symbols. + +./find_runtime_symbols.py /path/to/symbol_info_dir < addresses.txt + +'symbol_info_dir' is the result of the Step 1. +The stdin should be a list of hex addresses to map, one per line. + +The results will be printed to stdout like 'pprof --symbols'. diff --git a/tools/find_runtime_symbols/find_runtime_symbols.py b/tools/find_runtime_symbols/find_runtime_symbols.py new file mode 100755 index 00000000000000..1c96c7fddafa01 --- /dev/null +++ b/tools/find_runtime_symbols/find_runtime_symbols.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python +# Copyright (c) 2012 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import json +import logging +import os +import re +import sys + +from parse_proc_maps import parse_proc_maps +from procedure_boundaries import get_procedure_boundaries_from_nm_bsd +from util import executable_condition + + +def _determine_symbol_name(address, symbol): + if symbol: + return symbol.name + else: + return '0x%016x' % address + + +class _ListOutput(object): + def __init__(self, result): + self.result = result + + def output(self, address, symbol=None): + self.result.append(_determine_symbol_name(address, symbol)) + + +class _DictOutput(object): + def __init__(self, result): + self.result = result + + def output(self, address, symbol=None): + self.result[address] = _determine_symbol_name(address, symbol) + + +class _FileOutput(object): + def __init__(self, result, with_address): + self.result = result + self.with_address = with_address + + def output(self, address, symbol=None): + symbol_name = _determine_symbol_name(address, symbol) + if self.with_address: + self.result.write('%016x %s\n' % (address, symbol_name)) + else: + self.result.write('%s\n' % symbol_name) + + +def _find_runtime_symbols( + prepared_data_dir, addresses, outputter, loglevel=logging.WARN): + log = logging.getLogger('find_runtime_symbols') + log.setLevel(loglevel) + handler = logging.StreamHandler() + handler.setLevel(loglevel) + formatter = logging.Formatter('%(message)s') + handler.setFormatter(formatter) + log.addHandler(handler) + + if not os.path.exists(prepared_data_dir): + log.warn("Nothing found: %s" % prepared_data_dir) + return 1 + if not os.path.isdir(prepared_data_dir): + log.warn("Not a directory: %s" % prepared_data_dir) + return 1 + + with open(os.path.join(prepared_data_dir, 'maps'), mode='r') as f: + maps = parse_proc_maps(f) + + with open(os.path.join(prepared_data_dir, 'nm.json'), mode='r') as f: + nm_files = json.load(f) + + symbol_table = {} + for entry in maps.iter(executable_condition): + if nm_files.has_key(entry.name): + if nm_files[entry.name]['format'] == 'bsd': + with open(os.path.join(prepared_data_dir, + nm_files[entry.name]['file']), mode='r') as f: + symbol_table[entry.name] = get_procedure_boundaries_from_nm_bsd( + f, nm_files[entry.name]['mangled']) + + for address in addresses: + if isinstance(address, str): + address = int(address, 16) + is_found = False + for entry in maps.iter(executable_condition): + if entry.begin <= address < entry.end: + if entry.name in symbol_table: + found = symbol_table[entry.name].find_procedure( + address - (entry.begin - entry.offset)) + outputter.output(address, found) + else: + outputter.output(address) + is_found = True + break + if not is_found: + outputter.output(address) + + return 0 + + +def find_runtime_symbols_list(prepared_data_dir, addresses): + result = [] + _find_runtime_symbols(prepared_data_dir, addresses, _ListOutput(result)) + return result + + +def find_runtime_symbols_dict(prepared_data_dir, addresses): + result = {} + _find_runtime_symbols(prepared_data_dir, addresses, _DictOutput(result)) + return result + + +def find_runtime_symbols_file(prepared_data_dir, addresses, f): + _find_runtime_symbols( + prepared_data_dir, addresses, _FileOutput(f, False)) + + +def main(): + # FIX: Accept only .pre data + if len(sys.argv) < 2: + sys.stderr.write("""Usage: +%s /path/to/prepared_data_dir/ < addresses.txt +""" % sys.argv[0]) + return 1 + + return find_runtime_symbols_file(sys.argv[1], sys.stdin, sys.stdout) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/tools/find_runtime_symbols/parse_proc_maps.py b/tools/find_runtime_symbols/parse_proc_maps.py new file mode 100755 index 00000000000000..13c15681cac223 --- /dev/null +++ b/tools/find_runtime_symbols/parse_proc_maps.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python +# Copyright (c) 2012 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import re +import sys + + +_MAPS_PATTERN = re.compile( + '^([a-f0-9]+)-([a-f0-9]+)\s+(.)(.)(.)(.)\s+([a-f0-9]+)\s+(\S+):(\S+)\s+' + '(\d+)\s+(\S+)$', re.IGNORECASE) + + +class ProcMapsEntry(object): + """A class representing one line in /proc/.../maps.""" + + def __init__( + self, begin, end, readable, writable, executable, private, offset, + major, minor, inode, name): + self.begin = begin + self.end = end + self.readable = readable + self.writable = writable + self.executable = executable + self.private = private + self.offset = offset + self.major = major + self.minor = minor + self.inode = inode + self.name = name + + +class ProcMaps(object): + """A class representing contents in /proc/.../maps.""" + + def __init__(self): + self._sorted_indexes = [] + self._dictionary = {} + self._sorted = True + + def append(self, entry): + if self._sorted_indexes and self._sorted_indexes[-1] > entry.begin: + self._sorted = False + self._sorted_indexes.append(entry.begin) + self._dictionary[entry.begin] = entry + + def iter(self, condition): + if not self._sorted: + self._sorted_indexes.sort() + self._sorted = True + for index in self._sorted_indexes: + if not condition or condition(self._dictionary[index]): + yield self._dictionary[index] + + def __iter__(self): + if not self._sorted: + self._sorted_indexes.sort() + self._sorted = True + for index in self._sorted_indexes: + yield self._dictionary[index] + + +def parse_proc_maps(f): + table = ProcMaps() + for line in f: + matched = _MAPS_PATTERN.match(line) + if matched: + table.append(ProcMapsEntry( + int(matched.group(1), 16), # begin + int(matched.group(2), 16), # end + matched.group(3), # readable + matched.group(4), # writable + matched.group(5), # executable + matched.group(6), # private + int(matched.group(7), 16), # offset + matched.group(8), # major + matched.group(9), # minor + int(matched.group(10), 10), # inode + matched.group(11) # name + )) + + return table + + +def main(): + if len(sys.argv) < 2: + sys.stderr.write("""Usage: +%s /path/to/maps +""" % sys.argv[0]) + return 1 + + with open(sys.argv[1], mode='r') as f: + maps = parse_proc_maps(f) + + for entry in maps: + print "%016x-%016x +%06x %s" % ( + entry.begin, entry.end, entry.offset, entry.name) + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/tools/find_runtime_symbols/prepare_symbol_info.py b/tools/find_runtime_symbols/prepare_symbol_info.py new file mode 100755 index 00000000000000..57fcfbc3a83d04 --- /dev/null +++ b/tools/find_runtime_symbols/prepare_symbol_info.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python +# Copyright (c) 2012 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import json +import logging +import os +import re +import shutil +import subprocess +import sys +import tempfile + +from parse_proc_maps import parse_proc_maps +from util import executable_condition + + +def prepare_symbol_info(maps_path, output_dir_path=None, loglevel=logging.WARN): + log = logging.getLogger('prepare_symbol_info') + log.setLevel(loglevel) + handler = logging.StreamHandler() + handler.setLevel(loglevel) + formatter = logging.Formatter('%(message)s') + handler.setFormatter(formatter) + log.addHandler(handler) + + if not output_dir_path: + matched = re.match('^(.*)\.maps$', os.path.basename(maps_path)) + if matched: + output_dir_path = matched.group(1) + '.pre' + if not output_dir_path: + matched = re.match('^/proc/(.*)/maps$', os.path.realpath(maps_path)) + if matched: + output_dir_path = matched.group(1) + '.pre' + if not output_dir_path: + output_dir_prefix = os.path.basename(maps_path) + '.pre' + # TODO(dmikurube): Find another candidate for output_dir_path. + + log.info('Data for profiling will be collected in "%s".' % output_dir_path) + output_dir_path_exists = False + if os.path.exists(output_dir_path): + if os.path.isdir(output_dir_path) and not os.listdir(output_dir_path): + log.warn('Using an empty directory existing at "%s".' % output_dir_path) + else: + log.warn('A file or a directory exists at "%s".' % output_dir_path) + output_dir_path_exists = True + else: + log.info('Creating a new directory at "%s".' % output_dir_path) + os.mkdir(output_dir_path) + + if output_dir_path_exists: + return 1 + + shutil.copyfile(maps_path, os.path.join(output_dir_path, 'maps')) + + with open(maps_path, mode='r') as f: + maps = parse_proc_maps(f) + + log.debug('Listing up symbols.') + nm_files = {} + for entry in maps.iter(executable_condition): + log.debug(' %016x-%016x +%06x %s' % ( + entry.begin, entry.end, entry.offset, entry.name)) + with tempfile.NamedTemporaryFile( + prefix=os.path.basename(entry.name) + '.', + suffix='.nm', delete=False, mode='w', dir=output_dir_path) as f: + nm_filename = os.path.realpath(f.name) + nm_succeeded = False + cppfilt_succeeded = False + p_nm = subprocess.Popen( + 'nm -n --format bsd %s' % entry.name, shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + p_cppfilt = subprocess.Popen( + 'c++filt', shell=True, + stdin=p_nm.stdout, stdout=f, stderr=subprocess.PIPE) + + if p_nm.wait() == 0: + nm_succeeded = True + for line in p_nm.stderr: + log.debug(line.rstrip()) + if p_cppfilt.wait() == 0: + cppfilt_succeeded = True + for line in p_cppfilt.stderr: + log.debug(line.rstrip()) + + if nm_succeeded and cppfilt_succeeded: + nm_files[entry.name] = { + 'file': os.path.basename(nm_filename), + 'format': 'bsd', + 'mangled': False} + else: + os.remove(nm_filename) + + with open(os.path.join(output_dir_path, 'nm.json'), 'w') as f: + json.dump(nm_files, f, indent=2, sort_keys=True) + + log.info('Collected symbol information at "%s".' % output_dir_path) + return 0 + + +def main(): + if not sys.platform.startswith('linux'): + sys.stderr.write('This script work only on Linux.') + return 1 + + if len(sys.argv) < 2: + sys.stderr.write("""Usage: +%s /path/to/maps [/path/to/output_data_dir/] +""" % sys.argv[0]) + return 1 + elif len(sys.argv) == 2: + sys.exit(prepare_symbol_info(sys.argv[1], loglevel=logging.DEBUG)) + else: + sys.exit(prepare_symbol_info(sys.argv[1], sys.argv[2], + loglevel=logging.INFO)) + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/tools/find_runtime_symbols/procedure_boundaries.py b/tools/find_runtime_symbols/procedure_boundaries.py new file mode 100644 index 00000000000000..be1d76cc3345ba --- /dev/null +++ b/tools/find_runtime_symbols/procedure_boundaries.py @@ -0,0 +1,148 @@ +# Copyright (c) 2012 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import bisect +import os +import re +import sys + + +_ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?') +_TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>') +_LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)') + + +class ParsingException(Exception): + def __str__(self): + return repr(self.args[0]) + + +class ProcedureBoundary(object): + """A class for a procedure symbol and an address range for the symbol.""" + + def __init__(self, start, end, name): + self.start = start + self.end = end + self.name = name + + +class ProcedureBoundaryTable(object): + """A class of a set of ProcedureBoundary.""" + + def __init__(self): + self.sorted_value_list = [] + self.dictionary = {} + self.sorted = True + + def append(self, entry): + if self.sorted_value_list: + if self.sorted_value_list[-1] > entry.start: + self.sorted = False + elif self.sorted_value_list[-1] == entry.start: + return + self.sorted_value_list.append(entry.start) + self.dictionary[entry.start] = entry + + def find_procedure(self, address): + if not self.sorted: + self.sorted_value_list.sort() + self.sorted = True + found_index = bisect.bisect_left(self.sorted_value_list, address) + found_start_address = self.sorted_value_list[found_index - 1] + return self.dictionary[found_start_address] + + +def _get_short_function_name(function): + while True: + function, number = _ARGUMENT_TYPE_PATTERN.subn('', function) + if not number: + break + while True: + function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function) + if not number: + break + return _LEADING_TYPE_PATTERN.sub('\g<1>', function) + + +def get_procedure_boundaries_from_nm_bsd(f, mangled=False): + """Gets procedure boundaries from a result of nm -n --format bsd. + + Args: + f: A file object containing a result of nm. It must be sorted and + in BSD-style. (Use "[eu-]nm -n --format bsd") + + Returns: + A result ProcedureBoundaryTable object. + """ + symbol_table = ProcedureBoundaryTable() + + last_start = 0 + routine = '' + + for line in f: + symbol_info = line.rstrip().split(None, 2) + if len(symbol_info) == 3: + if len(symbol_info[0]) == 1: + symbol_info = line.split(None, 1) + (sym_type, this_routine) = symbol_info + sym_value = '' + else: + (sym_value, sym_type, this_routine) = symbol_info + elif len(symbol_info) == 2: + if len(symbol_info[0]) == 1: + (sym_type, this_routine) = symbol_info + sym_value = '' + elif len(symbol_info[0]) == 8 or len(symbol_info[0]) == 16: + (sym_value, this_routine) = symbol_info + sym_type = ' ' + else: + raise ParsingException('Invalid output 1 from (eu-)nm.') + else: + raise ParsingException('Invalid output 2 from (eu-)nm.') + + if sym_value == '': + continue + + start_val = int(sym_value, 16) + + # It's possible for two symbols to share the same address, if + # one is a zero-length variable (like __start_google_malloc) or + # one symbol is a weak alias to another (like __libc_malloc). + # In such cases, we want to ignore all values except for the + # actual symbol, which in nm-speak has type "T". The logic + # below does this, though it's a bit tricky: what happens when + # we have a series of lines with the same address, is the first + # one gets queued up to be processed. However, it won't + # *actually* be processed until later, when we read a line with + # a different address. That means that as long as we're reading + # lines with the same address, we have a chance to replace that + # item in the queue, which we do whenever we see a 'T' entry -- + # that is, a line with type 'T'. If we never see a 'T' entry, + # we'll just go ahead and process the first entry (which never + # got touched in the queue), and ignore the others. + if start_val == last_start and (sym_type == 't' or sym_type == 'T'): + # We are the 'T' symbol at this address, replace previous symbol. + routine = this_routine + continue + elif start_val == last_start: + # We're not the 'T' symbol at this address, so ignore us. + continue + + # Tag this routine with the starting address in case the image + # has multiple occurrences of this routine. We use a syntax + # that resembles template paramters that are automatically + # stripped out by ShortFunctionName() + this_routine += "<%016x>" % start_val + + if not mangled: + routine = _get_short_function_name(routine) + symbol_table.append(ProcedureBoundary(last_start, start_val, routine)) + + last_start = start_val + routine = this_routine + + if not mangled: + routine = _get_short_function_name(routine) + symbol_table.append(ProcedureBoundary(last_start, last_start, routine)) + return symbol_table diff --git a/tools/find_runtime_symbols/util.py b/tools/find_runtime_symbols/util.py new file mode 100644 index 00000000000000..87daf434946fba --- /dev/null +++ b/tools/find_runtime_symbols/util.py @@ -0,0 +1,10 @@ +# Copyright (c) 2012 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import re + +def executable_condition(entry): + return (entry.executable == 'x' and re.match( + '\S+(\.(so|dll|dylib|bundle)|chrome)((\.\d+)+\w*(\.\d+){0,3})?', + entry.name))