Enhance wasm loading with LoadArgs and support module names (#3265)

- Add new API wasm_runtime_load_ex() in wasm_export.h
  and wasm_module_new_ex in wasm_c_api.h
- Put aot_create_perf_map() into a separated file aot_perf_map.c
- In perf.map, function names include user specified module name
- Enhance the script to help flamegraph generations
This commit is contained in:
liang.he
2024-04-07 15:04:35 +08:00
committed by GitHub
parent cee9b826a5
commit 4ef724bbff
28 changed files with 3008 additions and 346 deletions

View File

@ -0,0 +1,2 @@
*.*
!*.py

View File

@ -0,0 +1,325 @@
#!/usr/bin/env python3
#
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
"""
It is used to process *out.folded* file generated by [FlameGraph](https://github.com/brendangregg/FlameGraph).
- translate jitted function names, which are in a form like `aot_func#N` or `[module name]#aot_func#N`, into corresponding names in a name section in .wasm
- divide the translated functions into different modules if the module name is specified in the symbol
Usage:
After
``` bash
# collect profiling data in perf.data
$ perf script -i perf.data > out.perf
$ ./FlameGraph/stackcollapse-perf.pl out.perf > out.folded
```
Use this script to translate the function names in out.folded
```
$ python translate_wasm_function_name.py --wabt_home <wabt-installation> --folded out.folded <.wasm>
# out.folded -> out.folded.translated
```
"""
import argparse
import os
from pathlib import Path
import re
import shlex
import subprocess
from typing import Dict, List
# parse arguments like "foo=bar,fiz=biz" into a dictatory {foo:bar,fiz=biz}
class ParseKVArgs(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
setattr(namespace, self.dest, dict())
for value in values.split(","):
k, v = value.split("=")
getattr(namespace, self.dest)[k] = v
def calculate_import_function_count(
wasm_objdump_bin: Path, module_names: Dict[str, Path]
) -> Dict[str, int]:
"""
for every wasm file in <module_names>, calculate the number of functions in the import section.
using "<wasm_objdump_bin> -j Import -x <wasm_file>"
"""
assert wasm_objdump_bin.exists()
import_function_counts = {}
for module_name, wasm_path in module_names.items():
assert wasm_path.exists()
command = f"{wasm_objdump_bin} -j Import -x {wasm_path}"
p = subprocess.run(
shlex.split(command),
capture_output=True,
check=False,
text=True,
universal_newlines=True,
)
if p.stderr:
print("No content in import section")
import_function_counts[module_name] = 0
continue
import_function_count = 0
for line in p.stdout.split(os.linesep):
line = line.strip()
if not line:
continue
if not " func" in line:
continue
m = re.search(r"^-\s+func", line)
assert m
import_function_count += 1
# print(f"! there are {import_function_count} import function in {module_name}")
import_function_counts[module_name] = import_function_count
return import_function_counts
def collect_name_section_content(
wasm_objdump_bin: Path, module_names: Dict[str, Path]
) -> Dict[str, Dict[int, str]]:
"""
for every wasm file in <module_names>, get the content of name section.
execute "wasm_objdump_bin -j name -x wasm_file"
"""
assert wasm_objdump_bin.exists()
name_sections = {}
for module_name, wasm_path in module_names.items():
assert wasm_path.exists()
command = f"{wasm_objdump_bin} -j name -x {wasm_path}"
p = subprocess.run(
shlex.split(command),
capture_output=True,
check=False,
text=True,
universal_newlines=True,
)
if p.stderr:
print("No content in name section")
name_sections[module_name] = {}
continue
name_section = {}
for line in p.stdout.split(os.linesep):
line = line.strip()
if not line:
continue
if not " func" in line:
continue
# - func[N] <__imported_wasi_snapshot_preview1_fd_close>
m = re.match(r"- func\[(\d+)\] <(.+)>", line)
assert m
func_index, func_name = m.groups()
name_section.update({int(func_index): func_name})
name_sections[module_name] = name_section
return name_sections
def is_stack_check_mode(folded: Path) -> bool:
"""
check if there is a function name looks like "aot_func_internal#N", it means that WAMR adds a stack check function before the original function.
"""
with open(folded, "rt", encoding="utf-8") as f:
for line in f:
line = line.strip()
if "aot_func_internal" in line:
return True
return False
def replace_function_name(
import_function_counts: Dict[str, int],
name_sections: Dict[str, Dict[int, str]],
folded_in: Path,
module_names: Dict[str, Path],
) -> None:
"""
read content in <folded_in>. every line contains symbols which are separated by ";".
Usually, all jitted functions are in the form of "aot_func#N". N is its function index. Use the index to find the corresponding function name in the name section.
if there is a function name looks like "aot_func_internal#N", it means that WAMR adds a stack check function before the original function.
In this case, "aot_func#N" should be translated with "_precheck" as a suffix and "aot_func_internal#N" should be treated as the original one
"""
assert folded_in.exists(), f"{folded_in} doesn't exist"
stack_check_mode = is_stack_check_mode(folded_in)
# every wasm has a translated out.folded, like out.<module_name>.folded.translated
folded_out_files = {}
for module_name in module_names.keys():
wasm_folded_out_path = folded_in.with_suffix(f".{module_name}.translated")
print(f"-> write into {wasm_folded_out_path}")
folded_out_files[module_name] = wasm_folded_out_path.open(
"wt", encoding="utf-8"
)
# Plus a default translated out.folded
default_folded_out_path = folded_in.with_suffix(".translated")
print(f"-> write into {default_folded_out_path}")
default_folded_out = default_folded_out_path.open("wt", encoding="utf-8")
with folded_in.open("rt", encoding="utf-8") as f_in:
for line in f_in:
line = line.strip()
m = re.match(r"(.*) (\d+)", line)
assert m
syms, samples = m.groups()
new_line = []
last_function_module_name = ""
for sym in syms.split(";"):
if not "aot_func" in sym:
new_line.append(sym)
continue
# [module_name]#aot_func#N or aot_func#N
splitted = sym.split("#")
module_name = "" if splitted[0] == "aot_func" else splitted[0]
# remove [ and ]
module_name = module_name[1:-1]
if len(module_name) == 0 and len(module_names) > 1:
raise RuntimeError(
f"{sym} doesn't have a module name, but there are multiple wasm files"
)
if not module_name in module_names:
raise RuntimeError(
f"❌ can't find corresponds wasm file for {module_name}"
)
last_function_module_name = module_name
func_idx = int(splitted[-1])
# adjust index
func_idx = func_idx + import_function_counts[module_name]
# print(f"🔍 {module_name} {splitted[1]} {func_idx}")
if func_idx in name_sections[module_name]:
if len(module_name) > 0:
wasm_func_name = f"[Wasm] [{module_name}] {name_sections[module_name][func_idx]}"
else:
wasm_func_name = (
f"[Wasm] {name_sections[module_name][func_idx]}"
)
else:
if len(module_name) > 0:
wasm_func_name = f"[Wasm] [{module_name}] func[{func_idx}]"
else:
wasm_func_name = f"[Wasm] func[{func_idx}]"
if stack_check_mode:
# aot_func_internal -> xxx
# aot_func --> xxx_precheck
if "aot_func" == splitted[1]:
wasm_func_name += "_precheck"
new_line.append(wasm_func_name)
line = ";".join(new_line)
line += f" {samples}"
# always write into the default output
default_folded_out.write(line + os.linesep)
# based on the module name of last function, write into the corresponding output
if len(last_function_module_name) > 0:
folded_out_files[last_function_module_name].write(line + os.linesep)
default_folded_out.close()
for f in folded_out_files.values():
f.close()
def main(wabt_home: str, folded: str, module_names: Dict[str, Path]) -> None:
wabt_home = Path(wabt_home)
assert wabt_home.exists()
folded = Path(folded)
assert folded.exists()
wasm_objdump_bin = wabt_home.joinpath("bin", "wasm-objdump")
import_function_counts = calculate_import_function_count(
wasm_objdump_bin, module_names
)
name_sections = collect_name_section_content(wasm_objdump_bin, module_names)
replace_function_name(import_function_counts, name_sections, folded, module_names)
if __name__ == "__main__":
argparse = argparse.ArgumentParser()
argparse.add_argument(
"--wabt_home", required=True, help="wabt home, like /opt/wabt-1.0.33"
)
argparse.add_argument(
"--wasm",
action="append",
default=[],
help="wasm files for profiling before. like --wasm apple.wasm --wasm banana.wasm",
)
argparse.add_argument(
"--wasm_names",
action=ParseKVArgs,
default={},
metavar="module_name=wasm_file, ...",
help="multiple wasm files and their module names, like a=apple.wasm,b=banana.wasm,c=cake.wasm",
)
argparse.add_argument(
"folded_file",
help="a out.folded generated by flamegraph/stackcollapse-perf.pl",
)
args = argparse.parse_args()
if not args.wasm and not args.wasm_names:
print("Please specify wasm files with either --wasm or --wasm_names")
exit(1)
# - only one wasm file. And there is no [module name] in out.folded
# - multiple wasm files. via `--wasm X --wasm Y --wasm Z`. And there is [module name] in out.folded. use the basename of wasm as the module name
# - multiple wasm files. via `--wasm_names X=x,Y=y,Z=z`. And there is [module name] in out.folded. use the specified module name
module_names = {}
if args.wasm_names:
for name, wasm_path in args.wasm_names.items():
module_names[name] = Path(wasm_path)
else:
# use the basename of wasm as the module name
for wasm in args.wasm:
wasm_path = Path(wasm)
module_names[wasm_path.stem] = wasm_path
main(args.wabt_home, args.folded_file, module_names)

View File

@ -1,213 +0,0 @@
#!/usr/bin/env python3
#
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
"""
It is used to translate jitted functions' names(in out.folded) to coorespond name in name section in .wasm
Usage:
After
```
$ perf script -i perf.data > out.perf
# fold call stacks
$ ./FlameGraph/stackcollapse-perf.pl out.perf > out.folded
```
Add a step:
```
# translate jitted functions' names
$ python translate_wasm_function_name.py --wabt_home <wabt-installation> --folded out.folded <.wasm>
# out.folded -> out.folded.translated
$ ls out.folded.translated
```
Then
```
# generate flamegraph
$ ./FlameGraph/flamegraph.pl out.folded.translated > perf.wasm.svg
```
"""
import argparse
import os
from pathlib import Path
import re
import shlex
import subprocess
def preflight_check(wabt_home: Path) -> Path:
"""
if wasm-objdump exists in wabt_home
"""
wasm_objdump_bin = wabt_home.joinpath("bin", "wasm-objdump")
if not wasm_objdump_bin.exists():
raise RuntimeError(f"wasm-objdump not found in {wabt_home}")
return wasm_objdump_bin
def collect_import_section_content(wasm_objdump_bin: Path, wasm_file: Path) -> dict:
"""
execute "wasm_objdump_bin -j Import -x <wasm_file>" and return a dict like {function: X, global: Y, memory: Z, table: N}
"""
assert wasm_objdump_bin.exists()
assert wasm_file.exists()
command = f"{wasm_objdump_bin} -j Import -x {wasm_file}"
p = subprocess.run(
shlex.split(command),
capture_output=True,
check=False,
text=True,
universal_newlines=True,
)
if p.stderr:
print("No content in import section")
return {}
import_section = {}
for line in p.stdout.split(os.linesep):
line = line.strip()
if not line:
continue
if re.search(r"^-\s+func", line):
import_section.update(function=import_section.get("function", 0) + 1)
else:
pass
assert len(import_section) > 0, "failed to retrive content of import section"
return import_section
def collect_name_section_content(wasm_objdump_bin: Path, wasm_file: Path) -> dict:
"""
execute "wasm_objdump_bin -j name -x wasm_file" and store the output in a dict
{1: xxxx, 2: yyyy, 3: zzzz}
"""
assert wasm_objdump_bin.exists()
assert wasm_file.exists()
command = f"{wasm_objdump_bin} -j name -x {wasm_file}"
p = subprocess.run(
shlex.split(command),
capture_output=True,
check=False,
text=True,
universal_newlines=True,
)
if p.stderr:
raise RuntimeError(f"not found name section in {wasm_file}")
name_section = {}
for line in p.stdout.split(os.linesep):
line = line.strip()
if not line:
continue
# - func[0] <__imported_wasi_snapshot_preview1_fd_close>
if line.startswith("- func"):
m = re.match(r"- func\[(\d+)\] <(.+)>", line)
assert m
func_index, func_name = m.groups()
name_section.update({int(func_index): func_name})
assert name_section
return name_section
def replace_function_name(
import_section: dict, name_section: dict, folded_in: str, folded_out: str
) -> None:
"""
read content in <folded_in>. each line will be like:
quiche::BalsaFrame::ProcessHeaders;non-virtual thunk to Envoy::Http::Http1::BalsaParser::MessageDone;Envoy::Http::Http1::ConnectionImpl::onMessageComplete;Envoy::Http::Http1::ConnectionImpl::onMessageCompleteImpl;Envoy::Http::Http1::ServerConnectionImpl::onMessageCompleteBase;Envoy::Http::ConnectionManagerImpl::ActiveStream::decodeHeaders;Envoy::Http::FilterManager::decodeHeaders;virtual thunk to Envoy::Extensions::Common::Wasm::Context::decodeHeaders;proxy_wasm::ContextBase::onRequestHeaders;proxy_wasm::wamr::Wamr::getModuleFunctionImpl<proxy_wasm::Word, proxy_wasm::Word, proxy_wasm::Word, proxy_wasm::Word>;wasm_func_call;wasm_runtime_call_wasm;wasm_call_function;call_wasm_with_hw_bound_check;wasm_interp_call_wasm;llvm_jit_call_func_bytecode;wasm_runtime_invoke_native;push_args_end;aot_func_internal#3302;aot_func_internal#3308;asm_sysvec_apic_timer_interrupt;sysvec_apic_timer_interrupt;__sysvec_apic_timer_interrupt;hrtimer_interrupt;__hrtimer_run_queues;__remove_hrtimer;rb_next 1110899
symbol names are spearated by ";"
if there is a symbol named like "aot_func#XXX" or "aot_func_internal#XXX", it will be replaced with the function name in name section by index
"""
folded_in = Path(folded_in)
assert folded_in.exists()
folded_out = Path(folded_out)
import_function_count = import_section.get("function", 0)
with folded_in.open("rt", encoding="utf-8") as f_in, folded_out.open(
"wt", encoding="utf-8"
) as f_out:
precheck_mode = False
for line in f_in:
line = line.strip()
if "aot_func_internal" in line:
precheck_mode = True
f_in.seek(0)
for line in f_in:
new_line = []
line = line.strip()
m = re.match(r"(.*) (\d+)", line)
syms, samples = m.groups()
for sym in syms.split(";"):
m = re.match(r"aot_func(_internal)?#(\d+)", sym)
if not m:
new_line.append(sym)
continue
func_idx = int(m.groups()[-1]) + import_function_count
if func_idx in name_section:
wasm_func_name = f"[Wasm] {name_section[func_idx]}"
else:
wasm_func_name = (
f"[Wasm] function[{func_idx + import_function_count}]"
)
if precheck_mode:
# aot_func_internal -> xxx
# aot_func --> xxx_precheck
wasm_func_name += "_precheck" if not m.groups()[0] else ""
else:
# aot_func --> xxx
pass
new_line.append(wasm_func_name)
line = ";".join(new_line)
line += f" {samples}"
f_out.write(line + os.linesep)
print(f"⚙️ {folded_in} -> {folded_out}")
def main(wabt_home: str, wasm_file: str, folded: str) -> None:
wabt_home = Path(wabt_home)
wasm_file = Path(wasm_file)
wasm_objdump_bin = preflight_check(wabt_home)
import_section = collect_import_section_content(wasm_objdump_bin, wasm_file)
name_section = collect_name_section_content(wasm_objdump_bin, wasm_file)
replace_function_name(import_section, name_section, folded, folded + ".translated")
if __name__ == "__main__":
argparse = argparse.ArgumentParser()
argparse.add_argument(
"--folded", help="stackcollapse-perf.pl generated, like out.folded"
)
argparse.add_argument("wasm_file", help="wasm file")
argparse.add_argument("--wabt_home", help="wabt home, like /opt/wabt-1.0.33")
args = argparse.parse_args()
main(args.wabt_home, args.wasm_file, args.folded)