AOT call stack optimizations (#3773)

- Implement TINY / STANDARD frame modes - tiny mode is only able to keep track on the IP
  and func idx, STANDARD mode provides more capabilities (parameters, stack pointer etc.).
- Implement FRAME_PER_FUNCTION / FRAME_PER_CALL modes - frame per function adds
  code at the beginning and at the end of each function for allocating / deallocating stack frame,
  whereas in per-call mode the frame is allocated before each call. The exception is call to
  the imported function, where frame-per-function mode also allocates the stack before the
  `call` instruction (as it can't instrument the imported function).

At the moment TINY + FRAME_PER_FUNCTION is automatically enabled in case GC and perf
profiling are disabled and `values` call stack feature is not requested. In all the other cases
STANDARD + FRAME_PER_CALL is used.

STANDARD + FRAME_PER_FUNCTION and TINY + FRAME_PER_CALL are currently not
implemented but possible, and might be enabled in the future.

ps. https://github.com/bytecodealliance/wasm-micro-runtime/issues/3758
This commit is contained in:
Marcin Kolny
2024-09-10 02:05:23 +01:00
committed by GitHub
parent 0599351262
commit cbc2078898
17 changed files with 590 additions and 85 deletions

View File

@ -6,6 +6,7 @@
#include "aot_emit_control.h"
#include "aot_compiler.h"
#include "aot_emit_exception.h"
#include "aot_stack_frame_comp.h"
#if WASM_ENABLE_GC != 0
#include "aot_emit_gc.h"
#endif
@ -38,13 +39,24 @@ format_block_name(char *name, uint32 name_size, uint32 block_index,
snprintf(name, name_size, "%s", "func_end");
}
#define CREATE_BLOCK(new_llvm_block, name) \
do { \
if (!(new_llvm_block = LLVMAppendBasicBlockInContext( \
comp_ctx->context, func_ctx->func, name))) { \
aot_set_last_error("add LLVM basic block failed."); \
goto fail; \
} \
#define CREATE_BLOCK(new_llvm_block, name) \
do { \
if (!(new_llvm_block = LLVMAppendBasicBlockInContext( \
comp_ctx->context, func_ctx->func, name))) { \
aot_set_last_error("add LLVM basic block failed."); \
goto fail; \
} \
if (!strcmp(name, "func_end") && comp_ctx->aux_stack_frame_type \
&& comp_ctx->call_stack_features.frame_per_function) { \
LLVMBasicBlockRef cur_block = \
LLVMGetInsertBlock(comp_ctx->builder); \
SET_BUILDER_POS(new_llvm_block); \
if (!aot_free_frame_per_function_frame_for_aot_func(comp_ctx, \
func_ctx)) { \
goto fail; \
} \
SET_BUILDER_POS(cur_block); \
} \
} while (0)
#define CURR_BLOCK() LLVMGetInsertBlock(comp_ctx->builder)
@ -93,6 +105,11 @@ format_block_name(char *name, uint32 name_size, uint32 block_index,
goto fail; \
} \
SET_BUILDER_POS(block->llvm_end_block); \
LLVMValueRef first_instr = \
get_first_non_phi(block->llvm_end_block); \
if (first_instr) { \
LLVMPositionBuilderBefore(comp_ctx->builder, first_instr); \
} \
for (_i = 0; _i < block->result_count; _i++) { \
if (!(block->result_phis[_i] = LLVMBuildPhi( \
comp_ctx->builder, \
@ -158,6 +175,18 @@ get_target_block(AOTFuncContext *func_ctx, uint32 br_depth)
return block;
}
LLVMValueRef
get_first_non_phi(LLVMBasicBlockRef block)
{
LLVMValueRef instr = LLVMGetFirstInstruction(block);
while (instr && LLVMIsAPHINode(instr)) {
instr = LLVMGetNextInstruction(instr);
}
return instr;
}
static void
clear_frame_locals(AOTCompFrame *aot_frame)
{
@ -1361,6 +1390,13 @@ aot_compile_op_return(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
(*p_frame_ip - 1) - comp_ctx->comp_data->wasm_module->buf_code);
#endif
if (comp_ctx->aux_stack_frame_type
&& comp_ctx->call_stack_features.frame_per_function
&& !aot_free_frame_per_function_frame_for_aot_func(comp_ctx,
func_ctx)) {
return false;
}
if (block_func->result_count) {
/* Store extra result values to function parameters */
for (i = 0; i < block_func->result_count - 1; i++) {