Files
wamr/test-tools/addr2line/addr2line.py
Enrico Loparco 0e4c4799b1 Get location info from function indexes in addr2line script (#3206)
Update the `addr2line` script so that:
- line info is printed in a more convenient format, e.g.
```
0: c
        at wasm-micro-runtime/test-tools/addr2line/trap.c:5:1
1: b
        at wasm-micro-runtime/test-tools/addr2line/trap.c:11:12
2: a
        at wasm-micro-runtime/test-tools/addr2line/trap.c:17:12
```
similar to how Rust prints stack traces when there's a panic. In an IDE, the user
can conveniently click on the printed path and be redirected to the file line.
- a new `--no-addr` argument can be provided to the script 

It can be used in fast interpreter mode (that is not supported by the script otherwise)
or with older wamr versions (where the stack trace only had the function index info
and not the function address). In that case, `wasm-objdump` is used to get the function
name from the index and `llvm-dwarfdump` to obtain the location info (where the line
refers to the start of the function).
2024-03-08 10:20:04 +08:00

297 lines
8.9 KiB
Python

#!/usr/bin/env python3
#
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
import argparse
import os
from pathlib import Path
import re
import shlex
import subprocess
import sys
"""
This is a tool to convert addresses, which are from a call-stack dump generated by iwasm, into line info for a wasm file.
When a wasm file is compiled with debug info, it is possible to transfer the address to line info.
For example, there is a call-stack dump:
```
#00: 0x0a04 - $f18
#01: 0x08e4 - $f11
#02: 0x096f - $f12
#03: 0x01aa - _start
```
- store the call-stack dump into a file, e.g. call_stack.txt
- run the following command to convert the address into line info:
```
$ cd test-tools/addr2line
$ python3 addr2line.py --wasi-sdk <wasi-sdk installation> --wabt <wabt installation> --wasm-file <wasm file path> call_stack.txt
```
The script will use *wasm-objdump* in wabt to transform address, then use *llvm-dwarfdump* to lookup the line info for each address
in the call-stack dump.
- if addresses are not available in the stack trace (i.e. iwasm <= 1.3.2) or iwasm is used in fast interpreter mode,
run the following command to convert the function index into line info (passing the `--no-addr` option):
```
$ python3 addr2line.py --wasi-sdk <wasi-sdk installation> --wabt <wabt installation> --wasm-file <wasm file path> call_stack.txt --no-addr
```
The script will use *wasm-objdump* in wabt to get the function names corresponding to function indexes, then use *llvm-dwarfdump* to lookup the line info for each
function index in the call-stack dump.
"""
def get_code_section_start(wasm_objdump: Path, wasm_file: Path) -> int:
"""
Find the start offset of Code section in a wasm file.
if the code section header likes:
Code start=0x0000017c end=0x00004382 (size=0x00004206) count: 47
the start offset is 0x0000017c
"""
cmd = f"{wasm_objdump} -h {wasm_file}"
p = subprocess.run(
shlex.split(cmd),
check=True,
capture_output=True,
text=True,
universal_newlines=True,
)
outputs = p.stdout.split(os.linesep)
# if there is no .debug section, return -1
for line in outputs:
line = line.strip()
if ".debug_info" in line:
break
else:
print(f"No .debug_info section found {wasm_file}")
return -1
for line in outputs:
line = line.strip()
if "Code" in line:
return int(line.split()[1].split("=")[1], 16)
return -1
def get_line_info_from_function_addr(
dwarf_dump: Path, wasm_file: Path, offset: int
) -> tuple[str, str, str, str]:
"""
Find the location info of a given offset in a wasm file.
"""
cmd = f"{dwarf_dump} --lookup={offset} {wasm_file}"
p = subprocess.run(
shlex.split(cmd),
check=False,
capture_output=True,
text=True,
universal_newlines=True,
)
outputs = p.stdout.split(os.linesep)
function_name, function_file = "<unknown>", "unknown"
function_line, function_column = "?", "?"
for line in outputs:
line = line.strip()
if "DW_AT_name" in line:
function_name = get_dwarf_tag_value("DW_AT_name", line)
if "DW_AT_decl_file" in line:
function_file = get_dwarf_tag_value("DW_AT_decl_file", line)
if "Line info" in line:
_, function_line, function_column = parse_line_info(line)
return (function_name, function_file, function_line, function_column)
def get_dwarf_tag_value(tag: str, line: str) -> str:
# Try extracting value as string
STR_PATTERN = rf"{tag}\s+\(\"(.*)\"\)"
m = re.match(STR_PATTERN, line)
if m:
return m.groups()[0]
# Try extracting value as integer
INT_PATTERN = rf"{tag}\s+\((\d+)\)"
m = re.match(INT_PATTERN, line)
return m.groups()[0]
def get_line_info_from_function_name(
dwarf_dump: Path, wasm_file: Path, function_name: str
) -> tuple[str, str, str]:
"""
Find the location info of a given function in a wasm file.
"""
cmd = f"{dwarf_dump} --name={function_name} {wasm_file}"
p = subprocess.run(
shlex.split(cmd),
check=False,
capture_output=True,
text=True,
universal_newlines=True,
)
outputs = p.stdout.split(os.linesep)
function_name, function_file = "<unknown>", "unknown"
function_line = "?"
for line in outputs:
line = line.strip()
if "DW_AT_name" in line:
function_name = get_dwarf_tag_value("DW_AT_name", line)
if "DW_AT_decl_file" in line:
function_file = get_dwarf_tag_value("DW_AT_decl_file", line)
if "DW_AT_decl_line" in line:
function_line = get_dwarf_tag_value("DW_AT_decl_line", line)
return (function_name, function_file, function_line)
def parse_line_info(line_info: str) -> tuple[str, str, str]:
"""
line_info -> [file, line, column]
"""
PATTERN = r"Line info: file \'(.+)\', line ([0-9]+), column ([0-9]+)"
m = re.search(PATTERN, line_info)
assert m is not None
file, line, column = m.groups()
return (file, int(line), int(column))
def parse_call_stack_line(line: str) -> tuple[str, str, str]:
"""
New format (WAMR > 1.3.2):
#00: 0x0a04 - $f18 => (00, 0x0a04, $f18)
Old format:
#00 $f18 => (00, _, $f18)
"""
# New format
PATTERN = r"#([0-9]+): 0x([0-9a-f]+) - (\S+)"
m = re.match(PATTERN, line)
if m is not None:
return m.groups()
# Old format
PATTERN = r"#([0-9]+) (\S+)"
m = re.match(PATTERN, line)
if m is not None:
return (m.groups()[0], None, m.groups()[1])
return None
def parse_module_functions(wasm_objdump: Path, wasm_file: Path) -> dict[str, str]:
function_index_to_name = {}
cmd = f"{wasm_objdump} -x {wasm_file} --section=function"
p = subprocess.run(
shlex.split(cmd),
check=True,
capture_output=True,
text=True,
universal_newlines=True,
)
outputs = p.stdout.split(os.linesep)
for line in outputs:
if not f"func[" in line:
continue
PATTERN = r".*func\[([0-9]+)\].*<(.*)>"
m = re.match(PATTERN, line)
assert m is not None
index = m.groups()[0]
name = m.groups()[1]
function_index_to_name[index] = name
return function_index_to_name
def main():
parser = argparse.ArgumentParser(description="addr2line for wasm")
parser.add_argument("--wasi-sdk", type=Path, help="path to wasi-sdk")
parser.add_argument("--wabt", type=Path, help="path to wabt")
parser.add_argument("--wasm-file", type=Path, help="path to wasm file")
parser.add_argument("call_stack_file", type=Path, help="path to a call stack file")
parser.add_argument(
"--no-addr",
action="store_true",
help="use call stack without addresses or from fast interpreter mode",
)
args = parser.parse_args()
wasm_objdump = args.wabt.joinpath("bin/wasm-objdump")
assert wasm_objdump.exists()
llvm_dwarf_dump = args.wasi_sdk.joinpath("bin/llvm-dwarfdump")
assert llvm_dwarf_dump.exists()
code_section_start = get_code_section_start(wasm_objdump, args.wasm_file)
if code_section_start == -1:
return -1
if args.no_addr:
function_index_to_name = parse_module_functions(wasm_objdump, args.wasm_file)
assert args.call_stack_file.exists()
with open(args.call_stack_file, "rt", encoding="ascii") as f:
for i, line in enumerate(f):
line = line.strip()
if not line:
continue
splitted = parse_call_stack_line(line)
assert splitted is not None
_, offset, index = splitted
if not index.startswith("$f"): # E.g. _start
print(f"{i}: {index}")
continue
index = index[2:]
if args.no_addr:
if index not in function_index_to_name:
print(f"{i}: {line}")
continue
line_info = get_line_info_from_function_name(
llvm_dwarf_dump, args.wasm_file, function_index_to_name[index]
)
_, funciton_file, function_line = line_info
function_name = function_index_to_name[index]
print(f"{i}: {function_name}")
print(f"\tat {funciton_file}:{function_line}")
else:
offset = int(offset, 16)
offset = offset - code_section_start
line_info = get_line_info_from_function_addr(
llvm_dwarf_dump, args.wasm_file, offset
)
function_name, funciton_file, function_line, function_column = line_info
print(f"{i}: {function_name}")
print(f"\tat {funciton_file}:{function_line}:{function_column}")
return 0
if __name__ == "__main__":
sys.exit(main())