addr2line.py: Support sourceMappingURL section produced by emcc (#3302)

And update the debug-tools sample.
This commit is contained in:
liang.he
2024-04-12 11:43:40 +08:00
committed by GitHub
parent 1a043b6eb5
commit fef26ead3e
10 changed files with 324 additions and 110 deletions

View File

@ -43,6 +43,28 @@ For example, there is a call-stack dump:
"""
def locate_sourceMappingURL_section(wasm_objdump: Path, wasm_file: Path) -> bool:
"""
Figure out if the wasm file has a sourceMappingURL section.
"""
cmd = f"{wasm_objdump} -h {wasm_file}"
p = subprocess.run(
shlex.split(cmd),
check=True,
capture_output=True,
text=True,
universal_newlines=True,
)
outputs = p.stdout.split(os.linesep)
for line in outputs:
line = line.strip()
if "sourceMappingURL" in line:
return True
return False
def get_code_section_start(wasm_objdump: Path, wasm_file: Path) -> int:
"""
Find the start offset of Code section in a wasm file.
@ -62,15 +84,6 @@ def get_code_section_start(wasm_objdump: Path, wasm_file: Path) -> int:
)
outputs = p.stdout.split(os.linesep)
# if there is no .debug section, return -1
for line in outputs:
line = line.strip()
if ".debug_info" in line:
break
else:
print(f"No .debug_info section found {wasm_file}")
return -1
for line in outputs:
line = line.strip()
if "Code" in line:
@ -79,7 +92,7 @@ def get_code_section_start(wasm_objdump: Path, wasm_file: Path) -> int:
return -1
def get_line_info_from_function_addr(
def get_line_info_from_function_addr_dwarf(
dwarf_dump: Path, wasm_file: Path, offset: int
) -> tuple[str, str, str, str]:
"""
@ -126,7 +139,7 @@ def get_dwarf_tag_value(tag: str, line: str) -> str:
return m.groups()[0]
def get_line_info_from_function_name(
def get_line_info_from_function_name_dwarf(
dwarf_dump: Path, wasm_file: Path, function_name: str
) -> tuple[str, str, str]:
"""
@ -160,6 +173,51 @@ def get_line_info_from_function_name(
return (function_name, function_file, function_line)
def get_line_info_from_function_addr_sourcemapping(
emsymbolizer: Path, wasm_file: Path, offset: int
) -> tuple[str, str, str, str]:
"""
Find the location info of a given offset in a wasm file which is compiled with emcc.
{emsymbolizer} {wasm_file} {offset of file}
there usually are two lines:
??
relative path to source file:line:column
"""
debug_info_source = wasm_file.with_name(f"{wasm_file.name}.map")
cmd = f"{emsymbolizer} -t code -f {debug_info_source} {wasm_file} {offset}"
p = subprocess.run(
shlex.split(cmd),
check=False,
capture_output=True,
text=True,
universal_newlines=True,
cwd=Path.cwd(),
)
outputs = p.stdout.split(os.linesep)
function_name, function_file = "<unknown>", "unknown"
function_line, function_column = "?", "?"
for line in outputs:
line = line.strip()
if not line:
continue
m = re.match("(.*):(\d+):(\d+)", line)
if m:
function_file, function_line, function_column = m.groups()
continue
else:
# it's always ??, not sure about that
if "??" != line:
function_name = line
return (function_name, function_file, function_line, function_column)
def parse_line_info(line_info: str) -> tuple[str, str, str]:
"""
line_info -> [file, line, column]
@ -250,6 +308,7 @@ def main():
action="store_true",
help="use call stack without addresses or from fast interpreter mode",
)
parser.add_argument("--emsdk", type=Path, help="path to emsdk")
args = parser.parse_args()
wasm_objdump = args.wabt.joinpath("bin/wasm-objdump")
@ -261,6 +320,15 @@ def main():
llvm_cxxfilt = args.wasi_sdk.joinpath("bin/llvm-cxxfilt")
assert llvm_cxxfilt.exists()
emcc_production = locate_sourceMappingURL_section(wasm_objdump, args.wasm_file)
if emcc_production:
if args.emsdk is None:
print("Please provide the path to emsdk via --emsdk")
return -1
emsymbolizer = args.emsdk.joinpath("upstream/emscripten/emsymbolizer")
assert emsymbolizer.exists()
code_section_start = get_code_section_start(wasm_objdump, args.wasm_file)
if code_section_start == -1:
return -1
@ -281,6 +349,7 @@ def main():
_, offset, index = splitted
if args.no_addr:
# FIXME: w/ emcc production
if not index.startswith("$f"): # E.g. _start or Text format
print(f"{i}: {index}")
continue
@ -290,22 +359,40 @@ def main():
print(f"{i}: {line}")
continue
line_info = get_line_info_from_function_name(
llvm_dwarf_dump, args.wasm_file, function_index_to_name[index]
)
if not emcc_production:
_, function_file, function_line = (
get_line_info_from_function_name_dwarf(
llvm_dwarf_dump,
args.wasm_file,
function_index_to_name[index],
)
)
else:
_, function_file, function_line = _, "unknown", "?"
_, function_file, function_line = line_info
function_name = demangle(llvm_cxxfilt, function_index_to_name[index])
print(f"{i}: {function_name}")
print(f"\tat {function_file}:{function_line}")
else:
offset = int(offset, 16)
# match the algorithm in wasm_interp_create_call_stack()
# either a *offset* to *code* section start
# or a *offset* in a file
assert offset > code_section_start
offset = offset - code_section_start
function_name, function_file, function_line, function_column = (
get_line_info_from_function_addr(
llvm_dwarf_dump, args.wasm_file, offset
if emcc_production:
function_name, function_file, function_line, function_column = (
get_line_info_from_function_addr_sourcemapping(
emsymbolizer, args.wasm_file, offset
)
)
else:
function_name, function_file, function_line, function_column = (
get_line_info_from_function_addr_dwarf(
llvm_dwarf_dump, args.wasm_file, offset
)
)
)
# if can't parse function_name, use name section or <index>
if function_name == "<unknown>":