Skip to content

Fix parsing of dwarfdump output to be more resilient #20777

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -596,3 +596,4 @@ a license to everyone to use it as detailed in LICENSE.)
* 郑苏波 (Super Zheng) <superzheng@tencent.com>
* James Hu <jameshu2022@gmail.com>
* Jerry Zhuang <jerry.zhuang@jwzg.com>
* Taisei Kon <kinsei0916@gmail.com>
71 changes: 71 additions & 0 deletions test/other/wasm_sourcemap_extract_comp_dir_map/bar.wasm.dump
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
bar.wasm: file format WASM

.debug_info contents:
0x00000000: Compile Unit: length = 0x00000129, format = DWARF32, version = 0x0004, abbr_offset = 0x0000, addr_size = 0x04 (next unit at 0x0000012d)

0x0000000b: DW_TAG_compile_unit
DW_AT_stmt_list (0x00000000)
DW_AT_ranges (0x00000000
[0x00000002, 0x0000000a)
[0x0000000b, 0x00000015)
[0x00000016, 0x00000030)
[0x00000031, 0x00000039))
DW_AT_name ("system/lib/compiler-rt/stack_ops.S")
DW_AT_comp_dir ("/emsdk/emscripten")
DW_AT_producer ("clang version 18.0.0 (https://github.com/llvm/llvm-project 269685545e439ad050b67740533c59f965cae955)")
DW_AT_language (DW_LANG_Mips_Assembler)

.debug_line contents:
debug_line[0x00000000]
Line table prologue:
total_length: 0x00000091
format: DWARF32
version: 4
prologue_length: 0x00000043
min_inst_length: 1
max_ops_per_inst: 1
default_is_stmt: 1
line_base: -5
line_range: 14
opcode_base: 13
standard_opcode_lengths[DW_LNS_copy] = 0
standard_opcode_lengths[DW_LNS_advance_pc] = 1
standard_opcode_lengths[DW_LNS_advance_line] = 1
standard_opcode_lengths[DW_LNS_set_file] = 1
standard_opcode_lengths[DW_LNS_set_column] = 1
standard_opcode_lengths[DW_LNS_negate_stmt] = 0
standard_opcode_lengths[DW_LNS_set_basic_block] = 0
standard_opcode_lengths[DW_LNS_const_add_pc] = 0
standard_opcode_lengths[DW_LNS_fixed_advance_pc] = 1
standard_opcode_lengths[DW_LNS_set_prologue_end] = 0
standard_opcode_lengths[DW_LNS_set_epilogue_begin] = 0
standard_opcode_lengths[DW_LNS_set_isa] = 1
include_directories[ 1] = "system/lib/compiler-rt"
file_names[ 1]:
name: "stack_ops.S"
dir_index: 1
mod_time: 0x00000000
length: 0x00000000

Address Line Column File ISA Discriminator OpIndex Flags
------------------ ------ ------ ------ --- ------------- ------- -------------
0x0000000000000002 18 0 1 0 0 0 is_stmt
0x0000000000000009 19 0 1 0 0 0 is_stmt
0x000000000000000a 19 0 1 0 0 0 is_stmt end_sequence
0x000000000000000b 23 0 1 0 0 0 is_stmt
0x000000000000000e 24 0 1 0 0 0 is_stmt
0x0000000000000014 25 0 1 0 0 0 is_stmt
0x0000000000000015 25 0 1 0 0 0 is_stmt end_sequence
0x0000000000000019 30 0 1 0 0 0 is_stmt
0x000000000000001f 32 0 1 0 0 0 is_stmt
0x0000000000000021 34 0 1 0 0 0 is_stmt
0x0000000000000022 36 0 1 0 0 0 is_stmt
0x0000000000000024 37 0 1 0 0 0 is_stmt
0x0000000000000025 38 0 1 0 0 0 is_stmt
0x0000000000000027 39 0 1 0 0 0 is_stmt
0x000000000000002d 40 0 1 0 0 0 is_stmt
0x000000000000002f 41 0 1 0 0 0 is_stmt
0x0000000000000030 41 0 1 0 0 0 is_stmt end_sequence
0x0000000000000031 45 0 1 0 0 0 is_stmt
0x0000000000000038 46 0 1 0 0 0 is_stmt
0x0000000000000039 46 0 1 0 0 0 is_stmt end_sequence
51 changes: 51 additions & 0 deletions test/other/wasm_sourcemap_extract_comp_dir_map/foo.wasm.dump
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
foo.wasm: file format WASM

.debug_info contents:
0x00000000: Compile Unit: length = 0x0000003a, format = DWARF32, version = 0x0004, abbr_offset = 0x0000, addr_size = 0x04 (next unit at 0x0000003e)

0x0000000b: DW_TAG_compile_unit
DW_AT_producer ("clang version 18.0.0 (https://github.com/llvm/llvm-project 269685545e439ad050b67740533c59f965cae955)")
DW_AT_language (DW_LANG_C11)
DW_AT_name ("system/lib/compiler-rt/__trap.c")
DW_AT_stmt_list (0x00000000)
DW_AT_comp_dir ("/emsdk/emscripten")
DW_AT_low_pc (0x00000002)
DW_AT_high_pc (0x00000006)

.debug_line contents:
debug_line[0x00000000]
Line table prologue:
total_length: 0x00000059
format: DWARF32
version: 4
prologue_length: 0x00000040
min_inst_length: 1
max_ops_per_inst: 1
default_is_stmt: 1
line_base: -5
line_range: 14
opcode_base: 13
standard_opcode_lengths[DW_LNS_copy] = 0
standard_opcode_lengths[DW_LNS_advance_pc] = 1
standard_opcode_lengths[DW_LNS_advance_line] = 1
standard_opcode_lengths[DW_LNS_set_file] = 1
standard_opcode_lengths[DW_LNS_set_column] = 1
standard_opcode_lengths[DW_LNS_negate_stmt] = 0
standard_opcode_lengths[DW_LNS_set_basic_block] = 0
standard_opcode_lengths[DW_LNS_const_add_pc] = 0
standard_opcode_lengths[DW_LNS_fixed_advance_pc] = 1
standard_opcode_lengths[DW_LNS_set_prologue_end] = 0
standard_opcode_lengths[DW_LNS_set_epilogue_begin] = 0
standard_opcode_lengths[DW_LNS_set_isa] = 1
include_directories[ 1] = "system/lib/compiler-rt"
file_names[ 1]:
name: "__trap.c"
dir_index: 1
mod_time: 0x00000000
length: 0x00000000

Address Line Column File ISA Discriminator OpIndex Flags
------------------ ------ ------ ------ --- ------------- ------- -------------
0x0000000000000003 2 3 1 0 0 0 is_stmt prologue_end
0x0000000000000004 3 1 1 0 0 0 is_stmt
0x0000000000000006 3 1 1 0 0 0 is_stmt end_sequence
19 changes: 19 additions & 0 deletions test/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from functools import wraps
import glob
import gzip
import importlib
import itertools
import json
import os
Expand Down Expand Up @@ -9585,6 +9586,24 @@ def test(infile, source_map_added_dir=''):
ensure_dir('inner')
test('inner/a.cpp', 'inner')

def test_wasm_sourcemap_extract_comp_dir_map(self):
wasm_sourcemap = importlib.import_module('tools.wasm-sourcemap')

def test(dump_file):
dwarfdump_output = read_file(
test_file(
os.path.join('other/wasm_sourcemap_extract_comp_dir_map',
dump_file)))
map_stmt_list_to_comp_dir = wasm_sourcemap.extract_comp_dir_map(
dwarfdump_output)
self.assertEqual(map_stmt_list_to_comp_dir,
{'0x00000000': '/emsdk/emscripten'})

# Make sure we can extract the compilation directories no matter what the
# order of `DW_AT_*` attributes is.
test('foo.wasm.dump')
test('bar.wasm.dump')

def test_emsymbolizer(self):
def check_dwarf_loc_info(address, funcs, locs):
out = self.run_process(
Expand Down
24 changes: 16 additions & 8 deletions tools/wasm-sourcemap.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,19 @@ def remove_dead_entries(entries):
block_start = cur_entry


def extract_comp_dir_map(text):
map_stmt_list_to_comp_dir = {}
chunks = re.split(r"0x[0-9a-f]*: DW_TAG_compile_unit", text)
for chunk in chunks[1:]:
stmt_list_match = re.search(r"DW_AT_stmt_list\s+\((0x[0-9a-f]*)\)", chunk)
if stmt_list_match is not None:
stmt_list = stmt_list_match.group(1)
comp_dir_match = re.search(r"DW_AT_comp_dir\s+\(\"([^\"]+)\"\)", chunk)
comp_dir = comp_dir_match.group(1) if comp_dir_match is not None else ''
map_stmt_list_to_comp_dir[stmt_list] = comp_dir
return map_stmt_list_to_comp_dir


def read_dwarf_entries(wasm, options):
if options.dwarfdump_output:
output = Path(options.dwarfdump_output).read_bytes()
Expand All @@ -198,14 +211,9 @@ def read_dwarf_entries(wasm, options):

entries = []
debug_line_chunks = re.split(r"debug_line\[(0x[0-9a-f]*)\]", output.decode('utf-8'))
maybe_debug_info_content = debug_line_chunks[0]
for i in range(1, len(debug_line_chunks), 2):
stmt_list = debug_line_chunks[i]
comp_dir_match = re.search(r"DW_AT_stmt_list\s+\(" + stmt_list + r"\)\s+" +
r"DW_AT_comp_dir\s+\(\"([^\"]+)", maybe_debug_info_content)
comp_dir = comp_dir_match.group(1) if comp_dir_match is not None else ""

line_chunk = debug_line_chunks[i + 1]
map_stmt_list_to_comp_dir = extract_comp_dir_map(debug_line_chunks[0])
for stmt_list, line_chunk in zip(debug_line_chunks[1::2], debug_line_chunks[2::2]):
comp_dir = map_stmt_list_to_comp_dir.get(stmt_list, '')

# include_directories[ 1] = "/Users/yury/Work/junk/sqlite-playground/src"
# file_names[ 1]:
Expand Down