AOT call stack optimizations (#3773)

- Implement TINY / STANDARD frame modes - tiny mode is only able to keep track on the IP
  and func idx, STANDARD mode provides more capabilities (parameters, stack pointer etc.).
- Implement FRAME_PER_FUNCTION / FRAME_PER_CALL modes - frame per function adds
  code at the beginning and at the end of each function for allocating / deallocating stack frame,
  whereas in per-call mode the frame is allocated before each call. The exception is call to
  the imported function, where frame-per-function mode also allocates the stack before the
  `call` instruction (as it can't instrument the imported function).

At the moment TINY + FRAME_PER_FUNCTION is automatically enabled in case GC and perf
profiling are disabled and `values` call stack feature is not requested. In all the other cases
STANDARD + FRAME_PER_CALL is used.

STANDARD + FRAME_PER_FUNCTION and TINY + FRAME_PER_CALL are currently not
implemented but possible, and might be enabled in the future.

ps. https://github.com/bytecodealliance/wasm-micro-runtime/issues/3758
This commit is contained in:
Marcin Kolny
2024-09-10 02:05:23 +01:00
committed by GitHub
parent 0599351262
commit cbc2078898
17 changed files with 590 additions and 85 deletions

View File

@ -7,6 +7,7 @@
#include "aot_emit_exception.h"
#include "aot_emit_control.h"
#include "aot_emit_table.h"
#include "aot_stack_frame_comp.h"
#include "../aot/aot_runtime.h"
#if WASM_ENABLE_GC != 0
#include "aot_emit_gc.h"
@ -1403,6 +1404,7 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVMValueRef *param_values = NULL, value_ret = NULL, func;
LLVMValueRef import_func_idx, res;
LLVMValueRef ext_ret, ext_ret_ptr, ext_ret_idx;
LLVMValueRef func_idx_ref;
int32 i, j = 0, param_count, result_count, ext_ret_count;
uint64 total_size;
uint8 wasm_ret_type;
@ -1447,12 +1449,28 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
return false;
}
if (comp_ctx->enable_aux_stack_frame) {
#if WASM_ENABLE_AOT_STACK_FRAME != 0
if (!alloc_frame_for_aot_func(comp_ctx, func_ctx, func_idx))
return false;
#endif
if (comp_ctx->aux_stack_frame_type) {
if (func_idx < import_func_count
&& comp_ctx->call_stack_features.frame_per_function) {
INT_CONST(func_idx_ref, func_idx, I32_TYPE, true);
if (!aot_alloc_frame_per_function_frame_for_aot_func(
comp_ctx, func_ctx, func_idx_ref)) {
return false;
}
}
else if (!comp_ctx->call_stack_features.frame_per_function) {
if (comp_ctx->aux_stack_frame_type
!= AOT_STACK_FRAME_TYPE_STANDARD) {
aot_set_last_error("unsupported mode");
return false;
}
if (!alloc_frame_for_aot_func(comp_ctx, func_ctx, func_idx)) {
return false;
}
}
}
#endif
/* Get param cell number */
param_cell_num = func_type->param_cell_num;
@ -1522,7 +1540,7 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
}
if (func_idx < import_func_count) {
if (comp_ctx->enable_aux_stack_frame
if (comp_ctx->aux_stack_frame_type == AOT_STACK_FRAME_TYPE_STANDARD
&& !commit_params_to_frame_of_import_func(
comp_ctx, func_ctx, func_type, param_values + 1)) {
goto fail;
@ -1813,12 +1831,26 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
}
}
if (comp_ctx->enable_aux_stack_frame) {
#if WASM_ENABLE_AOT_STACK_FRAME != 0
if (!free_frame_for_aot_func(comp_ctx, func_ctx))
goto fail;
#endif
if (comp_ctx->aux_stack_frame_type) {
if (func_idx < import_func_count
&& comp_ctx->call_stack_features.frame_per_function) {
if (!aot_free_frame_per_function_frame_for_aot_func(comp_ctx,
func_ctx)) {
goto fail;
}
}
else if (!comp_ctx->call_stack_features.frame_per_function) {
if (comp_ctx->aux_stack_frame_type
!= AOT_STACK_FRAME_TYPE_STANDARD) {
aot_set_last_error("unsupported mode");
}
if (!free_frame_for_aot_func(comp_ctx, func_ctx)) {
goto fail;
}
}
}
#endif
/* Insert suspend check point */
if (comp_ctx->enable_thread_mgr) {
@ -2439,7 +2471,8 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
goto fail;
}
if (comp_ctx->enable_aux_stack_frame) {
if (comp_ctx->aux_stack_frame_type
&& !comp_ctx->call_stack_features.frame_per_function) {
#if WASM_ENABLE_AOT_STACK_FRAME != 0
/* TODO: use current frame instead of allocating new frame
for WASM_OP_RETURN_CALL_INDIRECT */
@ -2508,7 +2541,13 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
/* Translate call import block */
LLVMPositionBuilderAtEnd(comp_ctx->builder, block_call_import);
if (comp_ctx->enable_aux_stack_frame
if (comp_ctx->aot_frame && comp_ctx->call_stack_features.frame_per_function
&& !aot_alloc_frame_per_function_frame_for_aot_func(comp_ctx, func_ctx,
func_idx)) {
goto fail;
}
if (comp_ctx->aux_stack_frame_type == AOT_STACK_FRAME_TYPE_STANDARD
&& !commit_params_to_frame_of_import_func(comp_ctx, func_ctx, func_type,
param_values + 1)) {
goto fail;
@ -2545,6 +2584,12 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
&& !check_call_return(comp_ctx, func_ctx, res))
goto fail;
if (comp_ctx->aot_frame && comp_ctx->call_stack_features.frame_per_function
&& !aot_free_frame_per_function_frame_for_aot_func(comp_ctx,
func_ctx)) {
goto fail;
}
block_curr = LLVMGetInsertBlock(comp_ctx->builder);
for (i = 0; i < func_result_count; i++) {
LLVMAddIncoming(result_phis[i], &value_rets[i], &block_curr, 1);
@ -2629,7 +2674,8 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
PUSH(result_phis[i], func_type->types[func_param_count + i]);
}
if (comp_ctx->enable_aux_stack_frame) {
if (comp_ctx->aux_stack_frame_type
&& !comp_ctx->call_stack_features.frame_per_function) {
#if WASM_ENABLE_AOT_STACK_FRAME != 0
if (!free_frame_for_aot_func(comp_ctx, func_ctx))
goto fail;
@ -2936,7 +2982,8 @@ aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
goto fail;
}
if (comp_ctx->enable_aux_stack_frame) {
if (comp_ctx->aux_stack_frame_type
&& !comp_ctx->call_stack_features.frame_per_function) {
#if WASM_ENABLE_AOT_STACK_FRAME != 0
/* TODO: use current frame instead of allocating new frame
for WASM_OP_RETURN_CALL_REF */
@ -3005,7 +3052,7 @@ aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
/* Translate call import block */
LLVMPositionBuilderAtEnd(comp_ctx->builder, block_call_import);
if (comp_ctx->enable_aux_stack_frame
if (comp_ctx->aux_stack_frame_type == AOT_STACK_FRAME_TYPE_STANDARD
&& !commit_params_to_frame_of_import_func(comp_ctx, func_ctx, func_type,
param_values + 1)) {
goto fail;
@ -3133,7 +3180,8 @@ aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
PUSH(result_phis[i], func_type->types[func_param_count + i]);
}
if (comp_ctx->enable_aux_stack_frame) {
if (comp_ctx->aux_stack_frame_type
&& !comp_ctx->call_stack_features.frame_per_function) {
#if WASM_ENABLE_AOT_STACK_FRAME != 0
if (!free_frame_for_aot_func(comp_ctx, func_ctx))
goto fail;