wamr/test-tools/addr2line/addr2line.py

#!/usr/bin/env python3
#
# Copyright (C) 2019 Intel Corporation.  All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
import argparse
import os
from pathlib import Path
import re
import shlex
import subprocess
import sys

"""
This is a tool to convert addresses, which are from a call-stack dump generated by iwasm, into line info for a wasm file.

When a wasm file is compiled with debug info, it is possible to transfer the address to line info.

For example, there is a call-stack dump:

```
#00: 0x0a04 - $f18
#01: 0x08e4 - $f11
#02: 0x096f - $f12
#03: 0x01aa - _start
```

- store the call-stack dump into a file, e.g. call_stack.txt
- run the following command to convert the address into line info:
  ```
  $ cd test-tools/addr2line
  $ python3 addr2line.py --wasi-sdk <wasi-sdk installation> --wabt <wabt installation> --wasm-file <wasm file path> call_stack.txt
  ```
  The script will use *wasm-objdump* in wabt to transform address, then use *llvm-dwarfdump* to lookup the line info for each address
  in the call-stack dump.
- if addresses are not available in the stack trace (i.e. iwasm <= 1.3.2) or iwasm is used in fast interpreter mode,
  run the following command to convert the function index into line info (passing the `--no-addr` option):
  ```
  $ python3 addr2line.py --wasi-sdk <wasi-sdk installation> --wabt <wabt installation> --wasm-file <wasm file path> call_stack.txt --no-addr
  ```
  The script will use *wasm-objdump* in wabt to get the function names corresponding to function indexes, then use *llvm-dwarfdump* to lookup the line info for each
  function index in the call-stack dump.
"""


def get_code_section_start(wasm_objdump: Path, wasm_file: Path) -> int:
    """
    Find the start offset of Code section in a wasm file.

    if the code section header likes:
      Code start=0x0000017c end=0x00004382 (size=0x00004206) count: 47

    the start offset is 0x0000017c
    """
    cmd = f"{wasm_objdump} -h {wasm_file}"
    p = subprocess.run(
        shlex.split(cmd),
        check=True,
        capture_output=True,
        text=True,
        universal_newlines=True,
    )
    outputs = p.stdout.split(os.linesep)

    # if there is no .debug section, return -1
    for line in outputs:
        line = line.strip()
        if ".debug_info" in line:
            break
    else:
        print(f"No .debug_info section found {wasm_file}")
        return -1

    for line in outputs:
        line = line.strip()
        if "Code" in line:
            return int(line.split()[1].split("=")[1], 16)

    return -1


def get_line_info_from_function_addr(
    dwarf_dump: Path, wasm_file: Path, offset: int
) -> tuple[str, str, str, str]:
    """
    Find the location info of a given offset in a wasm file.
    """
    cmd = f"{dwarf_dump} --lookup={offset} {wasm_file}"
    p = subprocess.run(
        shlex.split(cmd),
        check=False,
        capture_output=True,
        text=True,
        universal_newlines=True,
    )
    outputs = p.stdout.split(os.linesep)

    function_name, function_file = "<unknown>", "unknown"
    function_line, function_column = "?", "?"

    for line in outputs:
        line = line.strip()

        if "DW_AT_name" in line:
            function_name = get_dwarf_tag_value("DW_AT_name", line)

        if "DW_AT_decl_file" in line:
            function_file = get_dwarf_tag_value("DW_AT_decl_file", line)

        if "Line info" in line:
            _, function_line, function_column = parse_line_info(line)

    return (function_name, function_file, function_line, function_column)


def get_dwarf_tag_value(tag: str, line: str) -> str:
    # Try extracting value as string
    STR_PATTERN = rf"{tag}\s+\(\"(.*)\"\)"
    m = re.match(STR_PATTERN, line)
    if m:
        return m.groups()[0]

    # Try extracting value as integer
    INT_PATTERN = rf"{tag}\s+\((\d+)\)"
    m = re.match(INT_PATTERN, line)
    return m.groups()[0]


def get_line_info_from_function_name(
    dwarf_dump: Path, wasm_file: Path, function_name: str
) -> tuple[str, str, str]:
    """
    Find the location info of a given function in a wasm file.
    """
    cmd = f"{dwarf_dump} --name={function_name} {wasm_file}"
    p = subprocess.run(
        shlex.split(cmd),
        check=False,
        capture_output=True,
        text=True,
        universal_newlines=True,
    )
    outputs = p.stdout.split(os.linesep)

    function_name, function_file = "<unknown>", "unknown"
    function_line = "?"

    for line in outputs:
        line = line.strip()

        if "DW_AT_name" in line:
            function_name = get_dwarf_tag_value("DW_AT_name", line)

        if "DW_AT_decl_file" in line:
            function_file = get_dwarf_tag_value("DW_AT_decl_file", line)

        if "DW_AT_decl_line" in line:
            function_line = get_dwarf_tag_value("DW_AT_decl_line", line)

    return (function_name, function_file, function_line)


def parse_line_info(line_info: str) -> tuple[str, str, str]:
    """
    line_info -> [file, line, column]
    """
    PATTERN = r"Line info: file \'(.+)\', line ([0-9]+), column ([0-9]+)"
    m = re.search(PATTERN, line_info)
    assert m is not None

    file, line, column = m.groups()
    return (file, int(line), int(column))


def parse_call_stack_line(line: str) -> tuple[str, str, str]:
    """
    New format (WAMR > 1.3.2):
    #00: 0x0a04 - $f18   => (00, 0x0a04, $f18)
    Old format:
    #00 $f18             => (00, _, $f18)
    """

    # New format
    PATTERN = r"#([0-9]+): 0x([0-9a-f]+) - (\S+)"
    m = re.match(PATTERN, line)
    if m is not None:
        return m.groups()

    # Old format
    PATTERN = r"#([0-9]+) (\S+)"
    m = re.match(PATTERN, line)
    if m is not None:
        return (m.groups()[0], None, m.groups()[1])

    return None


def parse_module_functions(wasm_objdump: Path, wasm_file: Path) -> dict[str, str]:
    function_index_to_name = {}

    cmd = f"{wasm_objdump} -x {wasm_file} --section=function"
    p = subprocess.run(
        shlex.split(cmd),
        check=True,
        capture_output=True,
        text=True,
        universal_newlines=True,
    )
    outputs = p.stdout.split(os.linesep)

    for line in outputs:
        if not f"func[" in line:
            continue

        PATTERN = r".*func\[([0-9]+)\].*<(.*)>"
        m = re.match(PATTERN, line)
        assert m is not None

        index = m.groups()[0]
        name = m.groups()[1]
        function_index_to_name[index] = name

    return function_index_to_name


def main():
    parser = argparse.ArgumentParser(description="addr2line for wasm")
    parser.add_argument("--wasi-sdk", type=Path, help="path to wasi-sdk")
    parser.add_argument("--wabt", type=Path, help="path to wabt")
    parser.add_argument("--wasm-file", type=Path, help="path to wasm file")
    parser.add_argument("call_stack_file", type=Path, help="path to a call stack file")
    parser.add_argument(
        "--no-addr",
        action="store_true",
        help="use call stack without addresses or from fast interpreter mode",
    )
    args = parser.parse_args()

    wasm_objdump = args.wabt.joinpath("bin/wasm-objdump")
    assert wasm_objdump.exists()

    llvm_dwarf_dump = args.wasi_sdk.joinpath("bin/llvm-dwarfdump")
    assert llvm_dwarf_dump.exists()

    code_section_start = get_code_section_start(wasm_objdump, args.wasm_file)
    if code_section_start == -1:
        return -1

    if args.no_addr:
        function_index_to_name = parse_module_functions(wasm_objdump, args.wasm_file)

    assert args.call_stack_file.exists()
    with open(args.call_stack_file, "rt", encoding="ascii") as f:
        for i, line in enumerate(f):
            line = line.strip()
            if not line:
                continue

            splitted = parse_call_stack_line(line)
            assert splitted is not None

            _, offset, index = splitted
            if not index.startswith("$f"):  # E.g. _start
                print(f"{i}: {index}")
                continue
            index = index[2:]

            if args.no_addr:
                if index not in function_index_to_name:
                    print(f"{i}: {line}")
                    continue

                line_info = get_line_info_from_function_name(
                    llvm_dwarf_dump, args.wasm_file, function_index_to_name[index]
                )

                _, funciton_file, function_line = line_info
                function_name = function_index_to_name[index]
                print(f"{i}: {function_name}")
                print(f"\tat {funciton_file}:{function_line}")
            else:
                offset = int(offset, 16)
                offset = offset - code_section_start
                line_info = get_line_info_from_function_addr(
                    llvm_dwarf_dump, args.wasm_file, offset
                )

                function_name, funciton_file, function_line, function_column = line_info
                print(f"{i}: {function_name}")
                print(f"\tat {funciton_file}:{function_line}:{function_column}")

    return 0


if __name__ == "__main__":
    sys.exit(main())