Get location info from function indexes in addr2line script (#3206)

Update the `addr2line` script so that:
- line info is printed in a more convenient format, e.g.
```
0: c
        at wasm-micro-runtime/test-tools/addr2line/trap.c:5:1
1: b
        at wasm-micro-runtime/test-tools/addr2line/trap.c:11:12
2: a
        at wasm-micro-runtime/test-tools/addr2line/trap.c:17:12
```
similar to how Rust prints stack traces when there's a panic. In an IDE, the user
can conveniently click on the printed path and be redirected to the file line.
- a new `--no-addr` argument can be provided to the script 

It can be used in fast interpreter mode (that is not supported by the script otherwise)
or with older wamr versions (where the stack trace only had the function index info
and not the function address). In that case, `wasm-objdump` is used to get the function
name from the index and `llvm-dwarfdump` to obtain the location info (where the line
refers to the start of the function).
This commit is contained in:
Enrico Loparco
2024-03-08 03:20:04 +01:00
committed by GitHub
parent a43018ff72
commit 0e4c4799b1
9 changed files with 469 additions and 56 deletions

View File

@ -12,9 +12,7 @@ import subprocess
import sys
"""
it is a tool to transfer the address, which is from a call-stack dump generated by iwasm, to line info for a wasm file.
> in order to generate the call-stack dump, you can use the following command: `$ cmake -DWAMR_BUILD_DUMP_CALL_STACK=1 ...`
This is a tool to convert addresses, which are from a call-stack dump generated by iwasm, into line info for a wasm file.
When a wasm file is compiled with debug info, it is possible to transfer the address to line info.
@ -28,21 +26,20 @@ For example, there is a call-stack dump:
```
- store the call-stack dump into a file, e.g. call_stack.txt
- run the following command to transfer the address to line info:
- run the following command to convert the address into line info:
```
$ cd test-tools/addr2line
$ python3 addr2line.py --wasi-sdk <wasi-sdk installation> --wabt <wabt installation> --wasm-file <wasm file path> call_stack.txt
```
- the script will use *wasm-objdump* in wabt to transform address, then use *llvm-dwarfdump* to lookup the line info for each address
The script will use *wasm-objdump* in wabt to transform address, then use *llvm-dwarfdump* to lookup the line info for each address
in the call-stack dump.
- the output will be:
- if addresses are not available in the stack trace (i.e. iwasm <= 1.3.2) or iwasm is used in fast interpreter mode,
run the following command to convert the function index into line info (passing the `--no-addr` option):
```
#00: 0x0a04 - $f18
#01: 0x08e4 - $f11 (FILE:quicksort.c LINE: 176 COLUMN: 11 FUNC:Quick)
#02: 0x096f - $f12 (FILE:quicksort.c LINE: 182 COLUMN: 3 FUNC:main)
#03: 0x01aa - _start
$ python3 addr2line.py --wasi-sdk <wasi-sdk installation> --wabt <wabt installation> --wasm-file <wasm file path> call_stack.txt --no-addr
```
The script will use *wasm-objdump* in wabt to get the function names corresponding to function indexes, then use *llvm-dwarfdump* to lookup the line info for each
function index in the call-stack dump.
"""
@ -82,7 +79,9 @@ def get_code_section_start(wasm_objdump: Path, wasm_file: Path) -> int:
return -1
def get_line_info(dwarf_dump: Path, wasm_file: Path, offset: int) -> str:
def get_line_info_from_function_addr(
dwarf_dump: Path, wasm_file: Path, offset: int
) -> tuple[str, str, str, str]:
"""
Find the location info of a given offset in a wasm file.
"""
@ -96,29 +95,72 @@ def get_line_info(dwarf_dump: Path, wasm_file: Path, offset: int) -> str:
)
outputs = p.stdout.split(os.linesep)
capture_name = False
function_name, function_file = "<unknown>", "unknown"
function_line, function_column = "?", "?"
for line in outputs:
line = line.strip()
if "DW_TAG_subprogram" in line:
capture_name = True
continue
if "DW_AT_name" in line:
function_name = get_dwarf_tag_value("DW_AT_name", line)
if "DW_AT_name" in line and capture_name:
PATTERN = r"DW_AT_name\s+\(\"(\S+)\"\)"
m = re.match(PATTERN, line)
assert m is not None
if "DW_AT_decl_file" in line:
function_file = get_dwarf_tag_value("DW_AT_decl_file", line)
function_name = m.groups()[0]
if "Line info" in line:
_, function_line, function_column = parse_line_info(line)
if line.startswith("Line info"):
location = line
return (function_name, location)
return ()
return (function_name, function_file, function_line, function_column)
def parse_line_info(line_info: str) -> ():
def get_dwarf_tag_value(tag: str, line: str) -> str:
# Try extracting value as string
STR_PATTERN = rf"{tag}\s+\(\"(.*)\"\)"
m = re.match(STR_PATTERN, line)
if m:
return m.groups()[0]
# Try extracting value as integer
INT_PATTERN = rf"{tag}\s+\((\d+)\)"
m = re.match(INT_PATTERN, line)
return m.groups()[0]
def get_line_info_from_function_name(
dwarf_dump: Path, wasm_file: Path, function_name: str
) -> tuple[str, str, str]:
"""
Find the location info of a given function in a wasm file.
"""
cmd = f"{dwarf_dump} --name={function_name} {wasm_file}"
p = subprocess.run(
shlex.split(cmd),
check=False,
capture_output=True,
text=True,
universal_newlines=True,
)
outputs = p.stdout.split(os.linesep)
function_name, function_file = "<unknown>", "unknown"
function_line = "?"
for line in outputs:
line = line.strip()
if "DW_AT_name" in line:
function_name = get_dwarf_tag_value("DW_AT_name", line)
if "DW_AT_decl_file" in line:
function_file = get_dwarf_tag_value("DW_AT_decl_file", line)
if "DW_AT_decl_line" in line:
function_line = get_dwarf_tag_value("DW_AT_decl_line", line)
return (function_name, function_file, function_line)
def parse_line_info(line_info: str) -> tuple[str, str, str]:
"""
line_info -> [file, line, column]
"""
@ -130,13 +172,55 @@ def parse_line_info(line_info: str) -> ():
return (file, int(line), int(column))
def parse_call_stack_line(line: str) -> ():
def parse_call_stack_line(line: str) -> tuple[str, str, str]:
"""
New format (WAMR > 1.3.2):
#00: 0x0a04 - $f18 => (00, 0x0a04, $f18)
Old format:
#00 $f18 => (00, _, $f18)
"""
# New format
PATTERN = r"#([0-9]+): 0x([0-9a-f]+) - (\S+)"
m = re.match(PATTERN, line)
return m.groups() if m else None
if m is not None:
return m.groups()
# Old format
PATTERN = r"#([0-9]+) (\S+)"
m = re.match(PATTERN, line)
if m is not None:
return (m.groups()[0], None, m.groups()[1])
return None
def parse_module_functions(wasm_objdump: Path, wasm_file: Path) -> dict[str, str]:
function_index_to_name = {}
cmd = f"{wasm_objdump} -x {wasm_file} --section=function"
p = subprocess.run(
shlex.split(cmd),
check=True,
capture_output=True,
text=True,
universal_newlines=True,
)
outputs = p.stdout.split(os.linesep)
for line in outputs:
if not f"func[" in line:
continue
PATTERN = r".*func\[([0-9]+)\].*<(.*)>"
m = re.match(PATTERN, line)
assert m is not None
index = m.groups()[0]
name = m.groups()[1]
function_index_to_name[index] = name
return function_index_to_name
def main():
@ -145,6 +229,11 @@ def main():
parser.add_argument("--wabt", type=Path, help="path to wabt")
parser.add_argument("--wasm-file", type=Path, help="path to wasm file")
parser.add_argument("call_stack_file", type=Path, help="path to a call stack file")
parser.add_argument(
"--no-addr",
action="store_true",
help="use call stack without addresses or from fast interpreter mode",
)
args = parser.parse_args()
wasm_objdump = args.wabt.joinpath("bin/wasm-objdump")
@ -157,46 +246,51 @@ def main():
if code_section_start == -1:
return -1
if args.no_addr:
function_index_to_name = parse_module_functions(wasm_objdump, args.wasm_file)
assert args.call_stack_file.exists()
with open(args.call_stack_file, "rt", encoding="ascii") as f:
for line in f:
for i, line in enumerate(f):
line = line.strip()
if not line:
continue
splitted = parse_call_stack_line(line)
if splitted is None:
print(line)
assert splitted is not None
_, offset, index = splitted
if not index.startswith("$f"): # E.g. _start
print(f"{i}: {index}")
continue
index = index[2:]
_, offset, _ = splitted
if args.no_addr:
if index not in function_index_to_name:
print(f"{i}: {line}")
continue
offset = int(offset, 16)
offset = offset - code_section_start
line_info = get_line_info(llvm_dwarf_dump, args.wasm_file, offset)
if not line_info:
print(line)
continue
line_info = get_line_info_from_function_name(
llvm_dwarf_dump, args.wasm_file, function_index_to_name[index]
)
function_name, line_info = line_info
src_file, src_line, src_column = parse_line_info(line_info)
print(
f"{line} (FILE:{src_file} LINE:{src_line:5} COLUMN:{src_column:3} FUNC:{function_name})"
)
_, funciton_file, function_line = line_info
function_name = function_index_to_name[index]
print(f"{i}: {function_name}")
print(f"\tat {funciton_file}:{function_line}")
else:
offset = int(offset, 16)
offset = offset - code_section_start
line_info = get_line_info_from_function_addr(
llvm_dwarf_dump, args.wasm_file, offset
)
function_name, funciton_file, function_line, function_column = line_info
print(f"{i}: {function_name}")
print(f"\tat {funciton_file}:{function_line}:{function_column}")
return 0
if __name__ == "__main__":
print(
"**************************************************\n"
+ "Before running this script, please make sure:\n"
+ " - the wasm file is compiled with debug info. (like: clang -g) \n"
+ " - the call-stack dump is generated by iwasm\n"
+ " - iwasm is compiled with -DWAMR_BUILD_DUMP_CALL_STACK=1\n"
+ " - iwasm isn't running under fast-interp mode. -DWAMR_BUILD_FAST_INTERP=0\n"
+ " - if using .aot, the aot file is generated with `--enable-dump-call-stack`\n"
+ "**************************************************\n"
)
sys.exit(main())