AOT call stack optimizations (#3773)

- Implement TINY / STANDARD frame modes - tiny mode is only able to keep track on the IP
  and func idx, STANDARD mode provides more capabilities (parameters, stack pointer etc.).
- Implement FRAME_PER_FUNCTION / FRAME_PER_CALL modes - frame per function adds
  code at the beginning and at the end of each function for allocating / deallocating stack frame,
  whereas in per-call mode the frame is allocated before each call. The exception is call to
  the imported function, where frame-per-function mode also allocates the stack before the
  `call` instruction (as it can't instrument the imported function).

At the moment TINY + FRAME_PER_FUNCTION is automatically enabled in case GC and perf
profiling are disabled and `values` call stack feature is not requested. In all the other cases
STANDARD + FRAME_PER_CALL is used.

STANDARD + FRAME_PER_FUNCTION and TINY + FRAME_PER_CALL are currently not
implemented but possible, and might be enabled in the future.

ps. https://github.com/bytecodealliance/wasm-micro-runtime/issues/3758
This commit is contained in:
Marcin Kolny
2024-09-10 02:05:23 +01:00
committed by GitHub
parent 0599351262
commit cbc2078898
17 changed files with 590 additions and 85 deletions

View File

@ -16,6 +16,7 @@
#include "aot_emit_parametric.h"
#include "aot_emit_table.h"
#include "aot_emit_gc.h"
#include "aot_stack_frame_comp.h"
#include "simd/simd_access_lanes.h"
#include "simd/simd_bitmask_extracts.h"
#include "simd/simd_bit_shifts.h"
@ -253,6 +254,13 @@ store_value(AOTCompContext *comp_ctx, LLVMValueRef value, uint8 value_type,
return true;
}
void
aot_call_stack_features_init_default(AOTCallStackFeatures *features)
{
memset(features, 1, sizeof(AOTCallStackFeatures));
features->frame_per_function = false;
}
bool
aot_frame_store_value(AOTCompContext *comp_ctx, LLVMValueRef value,
uint8 value_type, LLVMValueRef cur_frame, uint32 offset)
@ -573,9 +581,10 @@ aot_gen_commit_values(AOTCompFrame *frame)
return true;
}
bool
aot_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVMValueRef ip_value, bool is_64bit)
static bool
aot_standard_frame_gen_commit_ip(AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx,
LLVMValueRef ip_value, bool is_64bit)
{
LLVMValueRef cur_frame = func_ctx->cur_frame;
LLVMValueRef value_offset, value_addr, value_ptr;
@ -613,6 +622,23 @@ aot_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
return true;
}
bool
aot_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVMValueRef ip_value, bool is_64bit)
{
switch (comp_ctx->aux_stack_frame_type) {
case AOT_STACK_FRAME_TYPE_STANDARD:
return aot_standard_frame_gen_commit_ip(comp_ctx, func_ctx,
ip_value, is_64bit);
case AOT_STACK_FRAME_TYPE_TINY:
return aot_tiny_frame_gen_commit_ip(comp_ctx, func_ctx, ip_value);
default:
aot_set_last_error(
"unsupported mode when generating commit_ip code");
return false;
}
}
bool
aot_gen_commit_sp_ip(AOTCompFrame *frame, bool commit_sp, bool commit_ip)
{
@ -962,6 +988,7 @@ static bool
aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
{
AOTFuncContext *func_ctx = comp_ctx->func_ctxes[func_index];
LLVMValueRef func_index_ref;
uint8 *frame_ip = func_ctx->aot_func->code, opcode, *p_f32, *p_f64;
uint8 *frame_ip_end = frame_ip + func_ctx->aot_func->code_size;
uint8 *param_types = NULL;
@ -984,16 +1011,27 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
LLVMMetadataRef location;
#endif
if (comp_ctx->enable_aux_stack_frame) {
/* Start to translate the opcodes */
LLVMPositionBuilderAtEnd(
comp_ctx->builder,
func_ctx->block_stack.block_list_head->llvm_entry_block);
if (comp_ctx->aux_stack_frame_type
&& comp_ctx->call_stack_features.frame_per_function) {
INT_CONST(func_index_ref,
func_index + comp_ctx->comp_data->import_func_count, I32_TYPE,
true);
if (!aot_alloc_frame_per_function_frame_for_aot_func(comp_ctx, func_ctx,
func_index_ref)) {
return false;
}
}
if (comp_ctx->aux_stack_frame_type) {
if (!init_comp_frame(comp_ctx, func_ctx, func_index)) {
return false;
}
}
/* Start to translate the opcodes */
LLVMPositionBuilderAtEnd(
comp_ctx->builder,
func_ctx->block_stack.block_list_head->llvm_entry_block);
while (frame_ip < frame_ip_end) {
opcode = *frame_ip++;