AOT call stack optimizations (#3773)

- Implement TINY / STANDARD frame modes - tiny mode is only able to keep track on the IP
  and func idx, STANDARD mode provides more capabilities (parameters, stack pointer etc.).
- Implement FRAME_PER_FUNCTION / FRAME_PER_CALL modes - frame per function adds
  code at the beginning and at the end of each function for allocating / deallocating stack frame,
  whereas in per-call mode the frame is allocated before each call. The exception is call to
  the imported function, where frame-per-function mode also allocates the stack before the
  `call` instruction (as it can't instrument the imported function).

At the moment TINY + FRAME_PER_FUNCTION is automatically enabled in case GC and perf
profiling are disabled and `values` call stack feature is not requested. In all the other cases
STANDARD + FRAME_PER_CALL is used.

STANDARD + FRAME_PER_FUNCTION and TINY + FRAME_PER_CALL are currently not
implemented but possible, and might be enabled in the future.

ps. https://github.com/bytecodealliance/wasm-micro-runtime/issues/3758
This commit is contained in:
Marcin Kolny
2024-09-10 02:05:23 +01:00
committed by GitHub
parent 0599351262
commit cbc2078898
17 changed files with 590 additions and 85 deletions

View File

@ -307,6 +307,13 @@ finish:
return ret;
}
static bool
can_enable_tiny_frame(const AOTCompOption *opt)
{
return !opt->call_stack_features.values && !opt->enable_gc
&& !opt->enable_perf_profiling;
}
static uint32
resolve_segue_flags(char *str_flags)
{
@ -403,9 +410,7 @@ main(int argc, char *argv[])
option.enable_bulk_memory = true;
option.enable_ref_types = true;
option.enable_gc = false;
/* Set all the features to true by default */
memset(&option.call_stack_features, 1, sizeof(AOTCallStackFeatures));
aot_call_stack_features_init_default(&option.call_stack_features);
/* Process options */
for (argc--, argv++; argc > 0 && argv[0][0] == '-'; argc--, argv++) {
@ -519,7 +524,7 @@ main(int argc, char *argv[])
option.enable_aux_stack_check = false;
}
else if (!strcmp(argv[0], "--enable-dump-call-stack")) {
option.enable_aux_stack_frame = true;
option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD;
}
else if (!strncmp(argv[0], "--call-stack-features=", 22)) {
/* Reset all the features, only enable the user-defined ones */
@ -535,7 +540,7 @@ main(int argc, char *argv[])
}
}
else if (!strcmp(argv[0], "--enable-perf-profiling")) {
option.enable_aux_stack_frame = true;
option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD;
option.enable_perf_profiling = true;
}
else if (!strcmp(argv[0], "--enable-memory-profiling")) {
@ -550,7 +555,7 @@ main(int argc, char *argv[])
option.is_indirect_mode = true;
}
else if (!strcmp(argv[0], "--enable-gc")) {
option.enable_aux_stack_frame = true;
option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD;
option.enable_gc = true;
}
else if (!strcmp(argv[0], "--disable-llvm-intrinsics")) {
@ -652,6 +657,14 @@ main(int argc, char *argv[])
if (!use_dummy_wasm && (argc == 0 || !out_file_name))
PRINT_HELP_AND_EXIT();
if (option.aux_stack_frame_type == AOT_STACK_FRAME_TYPE_STANDARD
&& can_enable_tiny_frame(&option)) {
LOG_VERBOSE("Use tiny frame mode for stack frames");
option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_TINY;
/* for now we only enable frame per function for a TINY frame mode */
option.call_stack_features.frame_per_function = true;
}
if (!size_level_set) {
/**
* Set opt level to 1 by default for Windows and MacOS as