diff --git a/core/iwasm/compilation/aot.h b/core/iwasm/compilation/aot.h index c67251a6..c1601bdb 100644 --- a/core/iwasm/compilation/aot.h +++ b/core/iwasm/compilation/aot.h @@ -19,6 +19,15 @@ extern "C" { #define AOT_FUNC_PREFIX "aot_func#" #endif +#ifndef AOT_FUNC_INTERNAL_PREFIX +#define AOT_FUNC_INTERNAL_PREFIX "aot_func_internal#" +#endif + +#ifndef AOT_STACK_SIZES_NAME +#define AOT_STACK_SIZES_NAME "aot_stack_sizes" +#endif +extern const char *aot_stack_sizes_name; + typedef InitializerExpression AOTInitExpr; typedef WASMType AOTFuncType; typedef WASMExport AOTExport; diff --git a/core/iwasm/compilation/aot_compiler.c b/core/iwasm/compilation/aot_compiler.c index 06235fe3..bbb31384 100644 --- a/core/iwasm/compilation/aot_compiler.c +++ b/core/iwasm/compilation/aot_compiler.c @@ -2761,6 +2761,16 @@ aot_compile_wasm(AOTCompContext *comp_ctx) aot_handle_llvm_errmsg("failed to addIRModule", err); return false; } + + if (comp_ctx->stack_sizes != NULL) { + LLVMOrcJITTargetAddress addr; + if ((err = LLVMOrcLLLazyJITLookup(comp_ctx->orc_jit, &addr, + aot_stack_sizes_name))) { + aot_handle_llvm_errmsg("failed to look up stack_sizes", err); + return false; + } + comp_ctx->jit_stack_sizes = (uint32 *)addr; + } } return true; @@ -2815,6 +2825,55 @@ aot_emit_llvm_file(AOTCompContext *comp_ctx, const char *file_name) return true; } +static bool +aot_move_file(const char *dest, const char *src) +{ + FILE *dfp = fopen(dest, "w"); + FILE *sfp = fopen(src, "r"); + size_t rsz; + char buf[128]; + bool success = false; + + if (dfp == NULL || sfp == NULL) { + LOG_DEBUG("open error %s %s", dest, src); + goto fail; + } + do { + rsz = fread(buf, 1, sizeof(buf), sfp); + if (rsz > 0) { + size_t wsz = fwrite(buf, 1, rsz, dfp); + if (wsz < rsz) { + LOG_DEBUG("write error"); + goto fail; + } + } + if (rsz < sizeof(buf)) { + if (ferror(sfp)) { + LOG_DEBUG("read error"); + goto fail; + } + } + } while (rsz > 0); + success = true; +fail: + if (dfp != NULL) { + if (fclose(dfp)) { + LOG_DEBUG("close error"); + success = false; + } + if (!success) { + (void)unlink(dest); + } + } + if (sfp != NULL) { + (void)fclose(sfp); + } + if (success) { + (void)unlink(src); + } + return success; +} + bool aot_emit_object_file(AOTCompContext *comp_ctx, char *file_name) { @@ -2830,7 +2889,25 @@ aot_emit_object_file(AOTCompContext *comp_ctx, char *file_name) int ret; if (comp_ctx->external_llc_compiler) { + const char *stack_usage_flag = ""; char bc_file_name[64]; + char su_file_name[65]; /* See the comment below */ + + if (comp_ctx->stack_usage_file != NULL) { + /* + * Note: we know the caller uses 64 byte buffer for + * file_name. It will get 1 byte longer because we + * replace ".o" with ".su". + */ + size_t len = strlen(file_name); + bh_assert(len + 1 <= sizeof(su_file_name)); + bh_assert(len > 3); + bh_assert(file_name[len - 2] == '.'); + bh_assert(file_name[len - 1] == 'o'); + snprintf(su_file_name, sizeof(su_file_name), "%.*s.su", + (int)(len - 2), file_name); + stack_usage_flag = " -fstack-usage"; + } if (!aot_generate_tempfile_name("wamrc-bc", "bc", bc_file_name, sizeof(bc_file_name))) { @@ -2842,8 +2919,8 @@ aot_emit_object_file(AOTCompContext *comp_ctx, char *file_name) return false; } - snprintf(cmd, sizeof(cmd), "%s %s -o %s %s", - comp_ctx->external_llc_compiler, + snprintf(cmd, sizeof(cmd), "%s%s %s -o %s %s", + comp_ctx->external_llc_compiler, stack_usage_flag, comp_ctx->llc_compiler_flags ? comp_ctx->llc_compiler_flags : "-O3 -c", file_name, bc_file_name); @@ -2858,6 +2935,22 @@ aot_emit_object_file(AOTCompContext *comp_ctx, char *file_name) "with external LLC compiler."); return false; } + if (comp_ctx->stack_usage_file != NULL) { + /* + * move the temporary .su file to the specified location. + * + * Note: the former is automatimally inferred from the output + * filename (file_name here) by clang. + * + * Note: the latter might be user-specified. + * (wamrc --stack-usage=) + */ + if (!aot_move_file(comp_ctx->stack_usage_file, su_file_name)) { + aot_set_last_error("failed to move su file."); + (void)unlink(su_file_name); + return false; + } + } } else if (comp_ctx->external_asm_compiler) { char asm_file_name[64]; diff --git a/core/iwasm/compilation/aot_emit_aot_file.c b/core/iwasm/compilation/aot_emit_aot_file.c index 893e3991..75c8cd99 100644 --- a/core/iwasm/compilation/aot_emit_aot_file.c +++ b/core/iwasm/compilation/aot_emit_aot_file.c @@ -140,6 +140,10 @@ typedef struct AOTObjectData { AOTSymbolList symbol_list; AOTRelocationGroup *relocation_groups; uint32 relocation_group_count; + + const char *stack_sizes_section_name; + uint32 stack_sizes_offset; + uint32 *stack_sizes; } AOTObjectData; #if 0 @@ -1634,7 +1638,31 @@ aot_emit_object_data_section_info(uint8 *buf, uint8 *buf_end, uint32 *p_offset, EMIT_STR(data_section->name); offset = align_uint(offset, 4); EMIT_U32(data_section->size); - EMIT_BUF(data_section->data, data_section->size); + if (obj_data->stack_sizes_section_name != NULL + && !strcmp(obj_data->stack_sizes_section_name, + data_section->name)) { + uint32 ss_offset = obj_data->stack_sizes_offset; + uint32 ss_size = + obj_data->func_count * sizeof(*obj_data->stack_sizes); + LOG_VERBOSE("Replacing stack_sizes in %s section, offset %" PRIu32 + ", size %" PRIu32, + obj_data->stack_sizes_section_name, ss_offset, ss_size); + bh_assert(ss_offset + ss_size <= data_section->size); + /* 0 .. ss_offset */ + if (ss_offset > 0) { + EMIT_BUF(data_section->data, ss_offset); + } + /* ss_offset .. ss_offset+ss_size */ + EMIT_BUF(obj_data->stack_sizes, ss_size); + /* ss_offset+ss_size .. data_section->size */ + if (data_section->size > ss_offset + ss_size) { + EMIT_BUF(data_section->data + ss_offset + ss_size, + data_section->size - (ss_offset + ss_size)); + } + } + else { + EMIT_BUF(data_section->data, data_section->size); + } } if (offset - *p_offset @@ -2418,6 +2446,293 @@ aot_resolve_object_data_sections(AOTObjectData *obj_data) return true; } +static bool +read_stack_usage_file(const AOTCompContext *comp_ctx, const char *filename, + uint32 *sizes, uint32 count) +{ + FILE *fp = NULL; + if (filename == NULL) { + aot_set_last_error("no stack usage file is specified."); + return false; + } + fp = fopen(filename, "r"); + if (fp == NULL) { + LOG_ERROR("failed to open stack usage file: %s", filename); + goto fail; + } + /* + * the file consists of lines like: + * + * WASM Module:aot_func#9 72 static + */ + const char *aot_func_prefix = AOT_FUNC_PREFIX; + const char *aot_func_internal_prefix = AOT_FUNC_INTERNAL_PREFIX; + uint32 precheck_found = 0; + uint32 precheck_stack_size_max = 0; + uint32 precheck_stack_size_min = UINT32_MAX; + uint32 found = 0; + while (true) { + const char *prefix; + char line[100]; + char *cp = fgets(line, sizeof(line), fp); + char *fn; + char *colon; + uintmax_t func_idx; + uintmax_t sz; + int ret; + + if (cp == NULL) { + break; + } + /* + * Note: strrchr (not strchr) because a module name can contain + * colons. + */ + colon = strrchr(cp, ':'); + if (colon == NULL) { + goto fail; + } + fn = strstr(colon, aot_func_prefix); + if (fn != NULL) { + prefix = aot_func_prefix; + } + else { + fn = strstr(colon, aot_func_internal_prefix); + if (fn == NULL) { + LOG_ERROR("failed to parse stack usage line: %s", cp); + goto fail; + } + prefix = aot_func_internal_prefix; + } + ret = sscanf(fn + strlen(prefix), "%ju %ju static", &func_idx, &sz); + if (ret != 2) { + goto fail; + } + if (sz > UINT32_MAX) { + goto fail; + } + if (func_idx > UINT32_MAX) { + goto fail; + } + if (func_idx >= count) { + goto fail; + } + if (prefix == aot_func_prefix) { + if (sz < precheck_stack_size_min) { + precheck_stack_size_min = sz; + } + if (sz > precheck_stack_size_max) { + precheck_stack_size_max = sz; + } + precheck_found++; + continue; + } + sizes[func_idx] = sz; + found++; + } + fclose(fp); + if (precheck_found != count) { + LOG_ERROR("%" PRIu32 " precheck entries found while %" PRIu32 + " entries are expected", + precheck_found, count); + return false; + } + if (found != count) { + /* + * LLVM seems to eliminate calls to an empty function + * (and eliminate the function) even if it's marked noinline. + */ + LOG_VERBOSE("%" PRIu32 " entries found while %" PRIu32 + " entries are expected. Maybe LLVM optimization eliminated " + "some functions.", + found, count); + } + if (precheck_stack_size_min != precheck_stack_size_max) { + /* + * Note: this is too strict. + * + * actually, the stack consumption of the precheck functions + * can depend on the type of them. + * that is, depending on various factors including + * calling conventions and compilers, a function with many + * parameters can consume more stack, even if it merely does + * a tail-call to another function. + */ + bool musttail = aot_target_precheck_can_use_musttail(comp_ctx); + if (musttail) { + LOG_WARNING( + "precheck functions use variable amount of stack. (%" PRIu32 + " - %" PRIu32 ")", + precheck_stack_size_min, precheck_stack_size_max); + } + else { + LOG_VERBOSE("precheck functions use %" PRIu32 " - %" PRIu32 + " bytes of stack.", + precheck_stack_size_min, precheck_stack_size_max); + } + } + else { + LOG_VERBOSE("precheck functions use %" PRIu32 " bytes of stack.", + precheck_stack_size_max); + } + if (precheck_stack_size_max >= 1024) { + LOG_WARNING("precheck functions themselves consume relatively large " + "amount of stack (%" PRIu32 + "). Please ensure the runtime has large enough " + "WASM_STACK_GUARD_SIZE.", + precheck_stack_size_max); + } + return true; +fail: + if (fp != NULL) + fclose(fp); + aot_set_last_error("failed to read stack usage file."); + return false; +} + +static bool +aot_resolve_stack_sizes(AOTCompContext *comp_ctx, AOTObjectData *obj_data) +{ + LLVMSectionIteratorRef sec_itr = NULL; + LLVMSymbolIteratorRef sym_itr; + const char *name; + + if (!(sym_itr = LLVMObjectFileCopySymbolIterator(obj_data->binary))) { + aot_set_last_error("llvm get symbol iterator failed."); + return false; + } + + while (!LLVMObjectFileIsSymbolIteratorAtEnd(obj_data->binary, sym_itr)) { + if ((name = LLVMGetSymbolName(sym_itr)) + && !strcmp(name, aot_stack_sizes_name)) { + uint64 sz = LLVMGetSymbolSize(sym_itr); + if (sz != sizeof(uint32) * obj_data->func_count) { + aot_set_last_error("stack_sizes had unexpected size."); + goto fail; + } + uint64 addr = LLVMGetSymbolAddress(sym_itr); + if (!(sec_itr = + LLVMObjectFileCopySectionIterator(obj_data->binary))) { + aot_set_last_error("llvm get section iterator failed."); + goto fail; + } + LLVMMoveToContainingSection(sec_itr, sym_itr); + const char *sec_name = LLVMGetSectionName(sec_itr); + LOG_VERBOSE("stack_sizes found in section %s offset %" PRIu64 ".", + sec_name, addr); + /* + * Note: We can't always modify stack_sizes in-place. + * Eg. When WAMRC_LLC_COMPILER is used, LLVM sometimes uses + * read-only mmap of the temporary file to back + * LLVMGetSectionContents. + */ + const uint32 *ro_stack_sizes = + (const uint32 *)(LLVMGetSectionContents(sec_itr) + addr); + uint32 i; + for (i = 0; i < obj_data->func_count; i++) { + /* Note: -1 == AOT_NEG_ONE from aot_create_stack_sizes */ + if (ro_stack_sizes[i] != (uint32)-1) { + aot_set_last_error("unexpected data in stack_sizes."); + goto fail; + } + } + if (addr > UINT32_MAX) { + aot_set_last_error("too large stack_sizes offset."); + goto fail; + } + /* + * Record section/offset and construct a copy of stack_sizes. + * aot_emit_object_data_section_info will emit this copy. + */ + obj_data->stack_sizes_section_name = sec_name; + obj_data->stack_sizes_offset = addr; + obj_data->stack_sizes = wasm_runtime_malloc( + obj_data->func_count * sizeof(*obj_data->stack_sizes)); + if (obj_data->stack_sizes == NULL) { + aot_set_last_error("failed to allocate memory."); + goto fail; + } + uint32 *stack_sizes = obj_data->stack_sizes; + for (i = 0; i < obj_data->func_count; i++) { + stack_sizes[i] = (uint32)-1; + } + if (!read_stack_usage_file(comp_ctx, comp_ctx->stack_usage_file, + stack_sizes, obj_data->func_count)) { + goto fail; + } + for (i = 0; i < obj_data->func_count; i++) { + const AOTFuncContext *func_ctx = comp_ctx->func_ctxes[i]; + bool musttail = aot_target_precheck_can_use_musttail(comp_ctx); + unsigned int stack_consumption_to_call_wrapped_func = + musttail ? 0 + : aot_estimate_stack_usage_for_function_call( + comp_ctx, func_ctx->aot_func->func_type); + + /* + * LLVM seems to eliminate calls to an empty function + * (and eliminate the function) even if it's marked noinline. + * + * Note: -1 == AOT_NEG_ONE from aot_create_stack_sizes + */ + if (stack_sizes[i] == (uint32)-1) { + if (func_ctx->stack_consumption_for_func_call != 0) { + /* + * This happens if a function calling another + * function has been optimized out. + * + * for example, + * + * (func $func + * (local i32) + * local.get 0 + * if + * call $another + * end + * ) + */ + LOG_VERBOSE("AOT func#%" PRIu32 + " had call(s) but eliminated?", + i); + } + else { + LOG_VERBOSE("AOT func#%" PRIu32 " eliminated?", i); + } + stack_sizes[i] = 0; + } + else { + LOG_VERBOSE("AOT func#%" PRIu32 " stack_size %u + %" PRIu32 + " + %u", + i, stack_consumption_to_call_wrapped_func, + stack_sizes[i], + func_ctx->stack_consumption_for_func_call); + if (UINT32_MAX - stack_sizes[i] + < func_ctx->stack_consumption_for_func_call) { + aot_set_last_error("stack size overflow."); + goto fail; + } + stack_sizes[i] += func_ctx->stack_consumption_for_func_call; + if (UINT32_MAX - stack_sizes[i] + < stack_consumption_to_call_wrapped_func) { + aot_set_last_error("stack size overflow."); + goto fail; + } + stack_sizes[i] += stack_consumption_to_call_wrapped_func; + } + } + LLVMDisposeSectionIterator(sec_itr); + LLVMDisposeSymbolIterator(sym_itr); + return true; + } + LLVMMoveToNextSymbol(sym_itr); + } + aot_set_last_error("stack_sizes not found."); +fail: + if (sec_itr) + LLVMDisposeSectionIterator(sec_itr); + LLVMDisposeSymbolIterator(sym_itr); + return false; +} + static bool aot_resolve_functions(AOTCompContext *comp_ctx, AOTObjectData *obj_data) { @@ -2429,6 +2744,10 @@ aot_resolve_functions(AOTCompContext *comp_ctx, AOTObjectData *obj_data) /* allocate memory for aot function */ obj_data->func_count = comp_ctx->comp_data->func_count; if (obj_data->func_count) { + if ((comp_ctx->enable_stack_bound_check + || comp_ctx->enable_stack_estimation) + && !aot_resolve_stack_sizes(comp_ctx, obj_data)) + return false; total_size = (uint32)sizeof(AOTObjectFunc) * obj_data->func_count; if (!(obj_data->funcs = wasm_runtime_malloc(total_size))) { aot_set_last_error("allocate memory for functions failed."); @@ -2922,6 +3241,8 @@ aot_obj_data_destroy(AOTObjectData *obj_data) obj_data->relocation_group_count); if (obj_data->symbol_list.len) destroy_relocation_symbol_list(&obj_data->symbol_list); + if (obj_data->stack_sizes) + wasm_runtime_free(obj_data->stack_sizes); wasm_runtime_free(obj_data); } diff --git a/core/iwasm/compilation/aot_emit_function.c b/core/iwasm/compilation/aot_emit_function.c index cce66429..aa837a3c 100644 --- a/core/iwasm/compilation/aot_emit_function.c +++ b/core/iwasm/compilation/aot_emit_function.c @@ -366,143 +366,6 @@ fail: #endif /* end of (WASM_ENABLE_DUMP_CALL_STACK != 0) \ || (WASM_ENABLE_PERF_PROFILING != 0) */ -static bool -record_stack_usage(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - uint32 callee_cell_num) -{ - LLVMBasicBlockRef block_curr = LLVMGetInsertBlock(comp_ctx->builder); - LLVMBasicBlockRef block_update; - LLVMBasicBlockRef block_after_update; - LLVMValueRef callee_local_size, new_sp, cmp; - LLVMValueRef native_stack_top_min; - LLVMTypeRef ptrdiff_type; - if (comp_ctx->pointer_size == sizeof(uint64_t)) { - ptrdiff_type = I64_TYPE; - } - else { - ptrdiff_type = I32_TYPE; - } - - /* - * new_sp = last_alloca - callee_local_size; - * if (*native_stack_top_min_addr > new_sp) { - * *native_stack_top_min_addr = new_sp; - * } - */ - - if (!(callee_local_size = LLVMConstInt( - ptrdiff_type, -(int64_t)callee_cell_num * 4, true))) { - aot_set_last_error("llvm build const failed."); - return false; - } - if (!(new_sp = LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, - func_ctx->last_alloca, - &callee_local_size, 1, "new_sp"))) { - aot_set_last_error("llvm build gep failed"); - return false; - } - if (!(native_stack_top_min = LLVMBuildLoad2( - comp_ctx->builder, OPQ_PTR_TYPE, - func_ctx->native_stack_top_min_addr, "native_stack_top_min"))) { - aot_set_last_error("llvm build load failed"); - return false; - } - if (!(cmp = LLVMBuildICmp(comp_ctx->builder, LLVMIntULT, new_sp, - native_stack_top_min, "cmp"))) { - aot_set_last_error("llvm build icmp failed."); - return false; - } - - if (!(block_update = LLVMAppendBasicBlockInContext( - comp_ctx->context, func_ctx->func, "block_update"))) { - aot_set_last_error("llvm add basic block failed."); - return false; - } - if (!(block_after_update = LLVMAppendBasicBlockInContext( - comp_ctx->context, func_ctx->func, "block_after_update"))) { - aot_set_last_error("llvm add basic block failed."); - return false; - } - LLVMMoveBasicBlockAfter(block_update, block_curr); - LLVMMoveBasicBlockAfter(block_after_update, block_update); - - if (!LLVMBuildCondBr(comp_ctx->builder, cmp, block_update, - block_after_update)) { - aot_set_last_error("llvm build cond br failed."); - return false; - } - - LLVMPositionBuilderAtEnd(comp_ctx->builder, block_update); - if (!LLVMBuildStore(comp_ctx->builder, new_sp, - func_ctx->native_stack_top_min_addr)) { - aot_set_last_error("llvm build store failed"); - return false; - } - if (!LLVMBuildBr(comp_ctx->builder, block_after_update)) { - aot_set_last_error("llvm build br failed."); - return false; - } - - LLVMPositionBuilderAtEnd(comp_ctx->builder, block_after_update); - return true; -} - -static bool -check_stack_boundary(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - uint32 callee_cell_num) -{ - LLVMBasicBlockRef block_curr = LLVMGetInsertBlock(comp_ctx->builder); - LLVMBasicBlockRef check_stack; - LLVMValueRef callee_local_size, stack_bound, cmp; - - if (!(callee_local_size = I32_CONST(callee_cell_num * 4))) { - aot_set_last_error("llvm build const failed."); - return false; - } - - if (!(stack_bound = LLVMBuildInBoundsGEP2( - comp_ctx->builder, INT8_TYPE, func_ctx->native_stack_bound, - &callee_local_size, 1, "stack_bound"))) { - aot_set_last_error("llvm build inbound gep failed."); - return false; - } - - if (!(check_stack = LLVMAppendBasicBlockInContext( - comp_ctx->context, func_ctx->func, "check_stack"))) { - aot_set_last_error("llvm add basic block failed."); - return false; - } - - LLVMMoveBasicBlockAfter(check_stack, block_curr); - - if (!(cmp = LLVMBuildICmp(comp_ctx->builder, LLVMIntULT, - func_ctx->last_alloca, stack_bound, "cmp"))) { - aot_set_last_error("llvm build icmp failed."); - return false; - } - - if (!aot_emit_exception(comp_ctx, func_ctx, EXCE_NATIVE_STACK_OVERFLOW, - true, cmp, check_stack)) { - return false; - } - - LLVMPositionBuilderAtEnd(comp_ctx->builder, check_stack); - return true; -} - -static bool -check_stack(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - uint32 callee_cell_num) -{ - if (comp_ctx->enable_stack_estimation - && !record_stack_usage(comp_ctx, func_ctx, callee_cell_num)) - return false; - if (comp_ctx->enable_stack_bound_check - && !check_stack_boundary(comp_ctx, func_ctx, callee_cell_num)) - return false; - return true; -} - /** * Check whether the app address and its buffer are inside the linear memory, * if no, throw exception @@ -610,6 +473,30 @@ check_app_addr_and_convert(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, return true; } +static void +aot_estimate_and_record_stack_usage_for_function_call( + const AOTCompContext *comp_ctx, AOTFuncContext *caller_func_ctx, + const AOTFuncType *callee_func_type) +{ + unsigned int size; + + if (!(comp_ctx->enable_stack_bound_check + || comp_ctx->enable_stack_estimation)) { + return; + } + + size = + aot_estimate_stack_usage_for_function_call(comp_ctx, callee_func_type); + /* + * only record the max value, assuming that LLVM emits machine code + * which rewinds the stack before making the next call in the + * function. + */ + if (caller_func_ctx->stack_consumption_for_func_call < size) { + caller_func_ctx->stack_consumption_for_func_call = size; + } +} + bool aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 func_idx, bool tail_call) @@ -620,7 +507,6 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 ext_ret_cell_num = 0, cell_num = 0; AOTFuncContext **func_ctxes = comp_ctx->func_ctxes; AOTFuncType *func_type; - AOTFunc *aot_func; LLVMTypeRef *param_types = NULL, ret_type; LLVMTypeRef ext_ret_ptr_type; LLVMValueRef *param_values = NULL, value_ret = NULL, func; @@ -628,7 +514,6 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, LLVMValueRef ext_ret, ext_ret_ptr, ext_ret_idx; int32 i, j = 0, param_count, result_count, ext_ret_count; uint64 total_size; - uint32 callee_cell_num; uint8 wasm_ret_type; uint8 *ext_ret_types = NULL; const char *signature = NULL; @@ -658,6 +543,8 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, func_type = func_ctxes[func_idx - import_func_count]->aot_func->func_type; } + aot_estimate_and_record_stack_usage_for_function_call(comp_ctx, func_ctx, + func_type); /* Get param cell number */ param_cell_num = func_type->param_cell_num; @@ -885,15 +772,17 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, else { if (func_ctxes[func_idx - import_func_count] == func_ctx) { /* recursive call */ - func = func_ctx->func; + func = func_ctx->precheck_func; } else { if (!comp_ctx->is_jit_mode) { - func = func_ctxes[func_idx - import_func_count]->func; + func = + func_ctxes[func_idx - import_func_count]->precheck_func; } else { #if !(WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_LAZY_JIT != 0) - func = func_ctxes[func_idx - import_func_count]->func; + func = + func_ctxes[func_idx - import_func_count]->precheck_func; #else /* JIT tier-up, load func ptr from func_ptrs[func_idx] */ LLVMValueRef func_ptr, func_idx_const; @@ -938,13 +827,6 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, } } - aot_func = func_ctxes[func_idx - import_func_count]->aot_func; - callee_cell_num = - aot_func->param_cell_num + aot_func->local_cell_num + 1; - - if (!check_stack(comp_ctx, func_ctx, callee_cell_num)) - goto fail; - #if LLVM_VERSION_MAJOR >= 14 llvm_func_type = func_ctxes[func_idx - import_func_count]->func_type; #endif @@ -1213,6 +1095,8 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, CHECK_LLVM_CONST(ftype_idx_const); func_type = comp_ctx->comp_data->func_types[type_idx]; + aot_estimate_and_record_stack_usage_for_function_call(comp_ctx, func_ctx, + func_type); func_param_count = func_type->param_count; func_result_count = func_type->result_count; @@ -1564,13 +1448,6 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, /* Translate call non-import block */ LLVMPositionBuilderAtEnd(comp_ctx->builder, block_call_non_import); - if (!check_stack(comp_ctx, func_ctx, - param_cell_num + ext_cell_num - + 1 - /* Reserve some local variables */ - + 16)) - goto fail; - /* Load function pointer */ if (!(func_ptr = LLVMBuildInBoundsGEP2(comp_ctx->builder, OPQ_PTR_TYPE, func_ctx->func_ptrs, &func_idx, 1, diff --git a/core/iwasm/compilation/aot_emit_function.h b/core/iwasm/compilation/aot_emit_function.h index 26f09c66..798243e6 100644 --- a/core/iwasm/compilation/aot_emit_function.h +++ b/core/iwasm/compilation/aot_emit_function.h @@ -29,6 +29,7 @@ aot_compile_op_ref_is_null(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); bool aot_compile_op_ref_func(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 func_idx); + #ifdef __cplusplus } /* end of extern "C" */ #endif diff --git a/core/iwasm/compilation/aot_llvm.c b/core/iwasm/compilation/aot_llvm.c index 81b7e8c3..667a1977 100644 --- a/core/iwasm/compilation/aot_llvm.c +++ b/core/iwasm/compilation/aot_llvm.c @@ -14,6 +14,15 @@ #include "debug/dwarf_extractor.h" #endif +static bool +create_native_symbol(const AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); +static bool +create_native_stack_bound(const AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); +static bool +create_native_stack_top_min(const AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + LLVMTypeRef wasm_type_to_llvm_type(const AOTLLVMTypes *llvm_types, uint8 wasm_type) { @@ -38,17 +47,472 @@ wasm_type_to_llvm_type(const AOTLLVMTypes *llvm_types, uint8 wasm_type) return NULL; } +static LLVMValueRef +aot_add_llvm_func1(const AOTCompContext *comp_ctx, LLVMModuleRef module, + uint32 func_index, uint32 param_count, LLVMTypeRef func_type, + const char *prefix) +{ + char func_name[48]; + LLVMValueRef func; + LLVMValueRef local_value; + uint32 i, j; + + /* Add LLVM function */ + snprintf(func_name, sizeof(func_name), "%s%d", prefix, func_index); + if (!(func = LLVMAddFunction(module, func_name, func_type))) { + aot_set_last_error("add LLVM function failed."); + return NULL; + } + + j = 0; + local_value = LLVMGetParam(func, j++); + LLVMSetValueName(local_value, "exec_env"); + + /* Set parameter names */ + for (i = 0; i < param_count; i++) { + local_value = LLVMGetParam(func, j++); + LLVMSetValueName(local_value, ""); + } + + return func; +} + +/* + * create a basic func_ctx enough to call aot_emit_exception. + * + * that is: + * - exec_env + * - aot_inst + * - native_symbol (if is_indirect_mode) + */ +static bool +create_basic_func_context(const AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + LLVMValueRef aot_inst_offset = I32_TWO, aot_inst_addr; + + /* Save the pameters for fast access */ + func_ctx->exec_env = LLVMGetParam(func_ctx->func, 0); + + /* Get aot inst address, the layout of exec_env is: + exec_env->next, exec_env->prev, exec_env->module_inst, and argv_buf */ + if (!(aot_inst_addr = LLVMBuildInBoundsGEP2( + comp_ctx->builder, OPQ_PTR_TYPE, func_ctx->exec_env, + &aot_inst_offset, 1, "aot_inst_addr"))) { + aot_set_last_error("llvm build in bounds gep failed"); + goto fail; + } + + /* Load aot inst */ + if (!(func_ctx->aot_inst = LLVMBuildLoad2(comp_ctx->builder, OPQ_PTR_TYPE, + aot_inst_addr, "aot_inst"))) { + aot_set_last_error("llvm build load failed"); + goto fail; + } + + if (comp_ctx->is_indirect_mode + && !create_native_symbol(comp_ctx, func_ctx)) { + goto fail; + } + + return true; +fail: + return false; +} + +/* + * return if the "precheck" wrapper function can use tail call optimization + */ +bool +aot_target_precheck_can_use_musttail(const AOTCompContext *comp_ctx) +{ + if (!strcmp(comp_ctx->target_arch, "xtensa")) { + /* + * xtensa windowed ABI doesn't have tail call optimization. + * + * Note: as of writing this, the xtensa version of LLVM + * simply ignores the musttail attribute. + * https://github.com/espressif/llvm-project/pull/73 + */ + return false; + } + if (!strcmp(comp_ctx->target_arch, "riscv32") + || !strcmp(comp_ctx->target_arch, "riscv64")) { + /* + * REVISIT: actually, riscv can use tail call optimization + * in some cases. I (yamamoto) don't know the exact conditions + * though. + */ + return false; + } + /* + * x86-64/i386: true + * + * others: assume true for now + */ + return true; +} + +unsigned int +aot_estimate_stack_usage_for_function_call(const AOTCompContext *comp_ctx, + const AOTFuncType *callee_func_type) +{ + /* + * Estimate how much stack is necessary to make a function call. + * This does not include the stack consumption of the callee function. + * + * For precise estimation, ideally this function needs to be + * target-specific. + * However, this implementation aims to be target-independent, + * allowing a small overstimation, which is probably ok for our purpose. + * (overflow detection and memory profiling) + * On the other hand, an underestimation should be avoided as it + * can cause more serious problems like silent data corruptions. + * + * Assumptions: + * + * - the first result is returned via a register. + * + * - all parameters, including exec_env and pointers to non-first + * results, are passed via stack. + * (this is a bit pessimistic than many of real calling conventions, + * where some of parameters are passed via register.) + * + * - N-byte value needs N-byte alignment on stack. + * + * - a value smaller than a pointer is extended. + * (eg. 4 byte values are extended to 8 byte on x86-64.) + */ + + const unsigned int param_count = callee_func_type->param_count; + const unsigned int result_count = callee_func_type->result_count; + unsigned int size = 0; + unsigned int i; + unsigned int nb; + + if (!strcmp(comp_ctx->target_arch, "xtensa")) { + /* + * In the xtensa windowed ABI, outgoing arguments are already + * included in the callee's stack frame size, which equals to + * the operand of the ENTRY instruction and what LLVM + * MFI->getStackSize returns. + */ + return 0; + } + + /* exec_env */ + size = comp_ctx->pointer_size; + + /* parameters */ + for (i = 0; i < param_count; i++) { + nb = wasm_value_type_cell_num(callee_func_type->types[i]) * 4; + if (nb < comp_ctx->pointer_size) { + nb = comp_ctx->pointer_size; + } + size = align_uint(size, nb) + nb; + } + + /* pointers to results */ + nb = comp_ctx->pointer_size; + for (i = 1; i < result_count; i++) { + size = align_uint(size, nb) + nb; + } + + /* return address */ + nb = comp_ctx->pointer_size; + size = align_uint(size, nb) + nb; + + /* + * some extra for possible arch-dependent things like + * 16-byte alignment for x86_64. + */ + size += 16; + return size; +} + +/* + * a "precheck" function performs a few things before calling wrapped_func. + * + * - update native_stack_top_min if necessary + * - stack overflow check (if it does, trap) + */ +static LLVMValueRef +aot_add_precheck_function(AOTCompContext *comp_ctx, LLVMModuleRef module, + uint32 func_index, uint32 orig_param_count, + LLVMTypeRef func_type, LLVMValueRef wrapped_func) +{ + LLVMValueRef precheck_func; + LLVMBasicBlockRef begin; + LLVMBasicBlockRef check_top_block; + LLVMBasicBlockRef update_top_block; + LLVMBasicBlockRef stack_bound_check_block; + LLVMBasicBlockRef call_wrapped_func_block; + LLVMValueRef *params = NULL; + + precheck_func = + aot_add_llvm_func1(comp_ctx, module, func_index, orig_param_count, + func_type, AOT_FUNC_PREFIX); + if (!precheck_func) { + goto fail; + } + begin = LLVMAppendBasicBlockInContext(comp_ctx->context, precheck_func, + "begin"); + check_top_block = LLVMAppendBasicBlockInContext( + comp_ctx->context, precheck_func, "check_top_block"); + if (comp_ctx->enable_stack_estimation) { + update_top_block = LLVMAppendBasicBlockInContext( + comp_ctx->context, precheck_func, "update_top_block"); + if (!update_top_block) { + goto fail; + } + } + stack_bound_check_block = LLVMAppendBasicBlockInContext( + comp_ctx->context, precheck_func, "stack_bound_check_block"); + call_wrapped_func_block = LLVMAppendBasicBlockInContext( + comp_ctx->context, precheck_func, "call_wrapped_func"); + if (!begin || !check_top_block || !stack_bound_check_block + || !call_wrapped_func_block) { + goto fail; + } + LLVMBuilderRef b = comp_ctx->builder; + LLVMPositionBuilderAtEnd(b, begin); + + /* create a temporary minimum func_ctx */ + AOTFuncContext tmp; + AOTFuncContext *func_ctx = &tmp; + memset(func_ctx, 0, sizeof(*func_ctx)); + func_ctx->func = precheck_func; + func_ctx->module = module; + func_ctx->aot_func = comp_ctx->comp_data->funcs[func_index]; +#if WASM_ENABLE_DEBUG_AOT != 0 + func_ctx->debug_func = NULL; +#endif + if (!create_basic_func_context(comp_ctx, func_ctx)) + goto fail; + if (comp_ctx->enable_stack_bound_check + && !create_native_stack_bound(comp_ctx, func_ctx)) + goto fail; + if (comp_ctx->enable_stack_estimation + && !create_native_stack_top_min(comp_ctx, func_ctx)) { + goto fail; + } + + unsigned int param_count = LLVMCountParams(precheck_func); + uint64 sz = param_count * sizeof(LLVMValueRef); + params = wasm_runtime_malloc(sz); + if (params == NULL) { + goto fail; + } + LLVMGetParams(precheck_func, params); + + const bool is_64bit = comp_ctx->pointer_size == sizeof(uint64); + LLVMTypeRef uintptr_type; + if (is_64bit) + uintptr_type = I64_TYPE; + else + uintptr_type = I32_TYPE; + + /* + * load the stack pointer + */ + LLVMValueRef sp_ptr = LLVMBuildAlloca(b, I32_TYPE, "sp_ptr"); + if (!sp_ptr) { + goto fail; + } + LLVMValueRef sp = LLVMBuildPtrToInt(b, sp_ptr, uintptr_type, "sp"); + if (!sp) { + goto fail; + } + + /* + * load the value for this wrapped function from the stack_sizes array + */ + LLVMValueRef func_index_const = I32_CONST(func_index); + LLVMValueRef sizes = + LLVMBuildBitCast(b, comp_ctx->stack_sizes, INT32_PTR_TYPE, "sizes"); + if (!sizes) { + goto fail; + } + LLVMValueRef sizep = LLVMBuildInBoundsGEP2(b, I32_TYPE, sizes, + &func_index_const, 1, "sizep"); + if (!sizep) { + goto fail; + } + LLVMValueRef size32 = LLVMBuildLoad2(b, I32_TYPE, sizep, "size32"); + if (!size32) { + goto fail; + } + LLVMValueRef size; + if (is_64bit) { + size = LLVMBuildZExt(b, size32, uintptr_type, "size"); + if (!size) { + goto fail; + } + } + else { + size = size32; + } + /* + * calculate new sp + */ + LLVMValueRef underflow = + LLVMBuildICmp(b, LLVMIntULT, sp, size, "underflow"); + if (!underflow) { + goto fail; + } + LLVMValueRef new_sp = LLVMBuildSub(b, sp, size, "new_sp"); + if (!new_sp) { + goto fail; + } + if (!LLVMBuildBr(b, check_top_block)) { + goto fail; + } + + LLVMPositionBuilderAtEnd(b, check_top_block); + if (comp_ctx->enable_stack_estimation) { + /* + * load native_stack_top_min from the exec_env + */ + LLVMValueRef top_min = + LLVMBuildLoad2(b, OPQ_PTR_TYPE, func_ctx->native_stack_top_min_addr, + "native_stack_top_min"); + if (!top_min) { + goto fail; + } + LLVMValueRef top_min_int = LLVMBuildPtrToInt( + b, top_min, uintptr_type, "native_stack_top_min_int"); + if (!top_min_int) { + goto fail; + } + + /* + * update native_stack_top_min if + * new_sp = sp - size < native_stack_top_min + * + * Note: unless the stack has already overflown in this exec_env, + * native_stack_bound <= native_stack_top_min + */ + LLVMValueRef cmp_top = + LLVMBuildICmp(b, LLVMIntULT, new_sp, top_min_int, "cmp_top"); + if (!cmp_top) { + goto fail; + } + cmp_top = LLVMBuildOr(b, underflow, cmp_top, "cmp_top2"); + if (!cmp_top) { + goto fail; + } + if (!LLVMBuildCondBr(b, cmp_top, update_top_block, + call_wrapped_func_block)) { + aot_set_last_error("llvm build cond br failed."); + goto fail; + } + + /* + * update native_stack_top_min + */ + LLVMPositionBuilderAtEnd(b, update_top_block); + LLVMValueRef new_sp_ptr = + LLVMBuildIntToPtr(b, new_sp, OPQ_PTR_TYPE, "new_sp_ptr"); + if (!new_sp_ptr) { + goto fail; + } + if (!LLVMBuildStore(b, new_sp_ptr, + func_ctx->native_stack_top_min_addr)) { + goto fail; + } + if (!LLVMBuildBr(b, stack_bound_check_block)) { + goto fail; + } + } + else { + if (!LLVMBuildBr(b, stack_bound_check_block)) { + goto fail; + } + } + + LLVMPositionBuilderAtEnd(b, stack_bound_check_block); + if (comp_ctx->enable_stack_bound_check) { + /* + * trap if new_sp < native_stack_bound + */ + LLVMValueRef bound_int = LLVMBuildPtrToInt( + b, func_ctx->native_stack_bound, uintptr_type, "bound_base_int"); + if (!bound_int) { + goto fail; + } + LLVMValueRef cmp = + LLVMBuildICmp(b, LLVMIntULT, new_sp, bound_int, "cmp"); + if (!cmp) { + goto fail; + } + cmp = LLVMBuildOr(b, underflow, cmp, "cmp2"); + if (!cmp) { + goto fail; + } + /* todo: @llvm.expect.i1(i1 %cmp, i1 0) */ + if (!aot_emit_exception(comp_ctx, func_ctx, EXCE_NATIVE_STACK_OVERFLOW, + true, cmp, call_wrapped_func_block)) + goto fail; + } + else { + if (!LLVMBuildBr(b, call_wrapped_func_block)) { + goto fail; + } + } + + /* + * call the wrapped function + * use a tail-call if possible + */ + LLVMPositionBuilderAtEnd(b, call_wrapped_func_block); + const char *name = "tail_call"; + LLVMTypeRef ret_type = LLVMGetReturnType(func_type); + if (ret_type == VOID_TYPE) { + name = ""; + } + LLVMValueRef retval = + LLVMBuildCall2(b, func_type, wrapped_func, params, param_count, name); + if (!retval) { + goto fail; + } + wasm_runtime_free(params); + params = NULL; + if (aot_target_precheck_can_use_musttail(comp_ctx)) { + LLVMSetTailCallKind(retval, LLVMTailCallKindMustTail); + } + else { + LLVMSetTailCallKind(retval, LLVMTailCallKindTail); + } + if (ret_type == VOID_TYPE) { + if (!LLVMBuildRetVoid(b)) { + goto fail; + } + } + else { + if (!LLVMBuildRet(b, retval)) { + goto fail; + } + } + + return precheck_func; +fail: + if (params != NULL) { + wasm_runtime_free(params); + } + aot_set_last_error("failed to build precheck wrapper function."); + return NULL; +} + /** * Add LLVM function */ static LLVMValueRef -aot_add_llvm_func(const AOTCompContext *comp_ctx, LLVMModuleRef module, +aot_add_llvm_func(AOTCompContext *comp_ctx, LLVMModuleRef module, const AOTFuncType *aot_func_type, uint32 func_index, - LLVMTypeRef *p_func_type) + LLVMTypeRef *p_func_type, LLVMValueRef *p_precheck_func) { LLVMValueRef func = NULL; LLVMTypeRef *param_types, ret_type, func_type; - LLVMValueRef local_value; LLVMTypeRef func_type_wrapper; LLVMValueRef func_wrapper; LLVMBasicBlockRef func_begin; @@ -101,21 +565,44 @@ aot_add_llvm_func(const AOTCompContext *comp_ctx, LLVMModuleRef module, goto fail; } - /* Add LLVM function */ - snprintf(func_name, sizeof(func_name), "%s%d", AOT_FUNC_PREFIX, func_index); - if (!(func = LLVMAddFunction(module, func_name, func_type))) { - aot_set_last_error("add LLVM function failed."); - goto fail; + bh_assert(func_index < comp_ctx->func_ctx_count); + bh_assert(LLVMGetReturnType(func_type) == ret_type); + const char *prefix = AOT_FUNC_PREFIX; + const bool need_precheck = + comp_ctx->enable_stack_bound_check || comp_ctx->enable_stack_estimation; + if (need_precheck) { + /* + * REVISIT: probably this breaks windows hw bound check + * (the RtlAddFunctionTable stuff) + */ + prefix = AOT_FUNC_INTERNAL_PREFIX; } + if (!(func = aot_add_llvm_func1(comp_ctx, module, func_index, + aot_func_type->param_count, func_type, + prefix))) + goto fail; - j = 0; - local_value = LLVMGetParam(func, j++); - LLVMSetValueName(local_value, "exec_env"); + if (need_precheck) { + if (!comp_ctx->is_jit_mode) + LLVMSetLinkage(func, LLVMInternalLinkage); + unsigned int kind = + LLVMGetEnumAttributeKindForName("noinline", strlen("noinline")); + LLVMAttributeRef attr_noinline = + LLVMCreateEnumAttribute(comp_ctx->context, kind, 0); + LLVMAddAttributeAtIndex(func, LLVMAttributeFunctionIndex, + attr_noinline); - /* Set parameter names */ - for (i = 0; i < aot_func_type->param_count; i++) { - local_value = LLVMGetParam(func, j++); - LLVMSetValueName(local_value, ""); + LLVMValueRef precheck_func = aot_add_precheck_function( + comp_ctx, module, func_index, aot_func_type->param_count, func_type, + func); + if (!precheck_func) + goto fail; + LLVMAddAttributeAtIndex(precheck_func, LLVMAttributeFunctionIndex, + attr_noinline); + *p_precheck_func = precheck_func; + } + else { + *p_precheck_func = func; } if (p_func_type) @@ -454,27 +941,6 @@ create_local_variables(const AOTCompData *comp_data, } } - if (comp_ctx->enable_stack_bound_check - || comp_ctx->enable_stack_estimation) { - if (aot_func_type->param_count + func->local_count > 0) { - func_ctx->last_alloca = func_ctx->locals[aot_func_type->param_count - + func->local_count - 1]; - if (!(func_ctx->last_alloca = - LLVMBuildBitCast(comp_ctx->builder, func_ctx->last_alloca, - INT8_PTR_TYPE, "stack_ptr"))) { - aot_set_last_error("llvm build bit cast failed."); - return false; - } - } - else { - if (!(func_ctx->last_alloca = LLVMBuildAlloca( - comp_ctx->builder, INT8_TYPE, "stack_ptr"))) { - aot_set_last_error("llvm build alloca failed."); - return false; - } - } - } - return true; } @@ -904,6 +1370,68 @@ create_func_ptrs(const AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) return true; } +const char *aot_stack_sizes_name = AOT_STACK_SIZES_NAME; + +static bool +aot_create_stack_sizes(const AOTCompData *comp_data, AOTCompContext *comp_ctx) +{ + const char *stack_sizes_name = "stack_sizes"; + LLVMTypeRef stack_sizes_type = + LLVMArrayType(I32_TYPE, comp_data->func_count); + if (!stack_sizes_type) { + aot_set_last_error("failed to create stack_sizes type."); + return false; + } + LLVMValueRef stack_sizes = + LLVMAddGlobal(comp_ctx->module, stack_sizes_type, stack_sizes_name); + if (!stack_sizes) { + aot_set_last_error("failed to create stack_sizes global."); + return false; + } + LLVMValueRef *values; + uint64 size = sizeof(LLVMValueRef) * comp_data->func_count; + if (size >= UINT32_MAX || !(values = wasm_runtime_malloc((uint32)size))) { + aot_set_last_error("allocate memory failed."); + return false; + } + uint32 i; + for (i = 0; i < comp_data->func_count; i++) { + /* + * This value is a placeholder, which will be replaced + * after the corresponding functions are compiled. + * + * Don't use zeros becasue LLVM can optimize them to + * zeroinitializer. + */ + values[i] = I32_NEG_ONE; + } + LLVMValueRef array = + LLVMConstArray(I32_TYPE, values, comp_data->func_count); + wasm_runtime_free(values); + if (!array) { + aot_set_last_error("failed to create stack_sizes initializer."); + return false; + } + LLVMSetInitializer(stack_sizes, array); + /* + * create an alias so that aot_resolve_stack_sizes can find it. + */ + LLVMValueRef alias = LLVMAddAlias2(comp_ctx->module, stack_sizes_type, 0, + stack_sizes, aot_stack_sizes_name); + if (!alias) { + aot_set_last_error("failed to create stack_sizes alias."); + return false; + } + /* + * make the original symbol internal. we mainly use this version to + * avoid creating extra relocations in the precheck functions. + */ + LLVMSetLinkage(stack_sizes, LLVMInternalLinkage); + comp_ctx->stack_sizes_type = stack_sizes_type; + comp_ctx->stack_sizes = stack_sizes; + return true; +} + /** * Create function compiler context */ @@ -917,7 +1445,6 @@ aot_create_func_context(const AOTCompData *comp_data, AOTCompContext *comp_ctx, WASMFunction *wasm_func = module->functions[func_index]; AOTBlock *aot_block; LLVMTypeRef int8_ptr_type; - LLVMValueRef aot_inst_offset = I32_TWO, aot_inst_addr; uint64 size; /* Allocate memory for the function context */ @@ -935,9 +1462,9 @@ aot_create_func_context(const AOTCompData *comp_data, AOTCompContext *comp_ctx, func_ctx->module = comp_ctx->module; /* Add LLVM function */ - if (!(func_ctx->func = - aot_add_llvm_func(comp_ctx, func_ctx->module, aot_func_type, - func_index, &func_ctx->func_type))) { + if (!(func_ctx->func = aot_add_llvm_func( + comp_ctx, func_ctx->module, aot_func_type, func_index, + &func_ctx->func_type, &func_ctx->precheck_func))) { goto fail; } @@ -956,22 +1483,7 @@ aot_create_func_context(const AOTCompData *comp_data, AOTCompContext *comp_ctx, /* Add local variables */ LLVMPositionBuilderAtEnd(comp_ctx->builder, aot_block->llvm_entry_block); - /* Save the pameters for fast access */ - func_ctx->exec_env = LLVMGetParam(func_ctx->func, 0); - - /* Get aot inst address, the layout of exec_env is: - exec_env->next, exec_env->prev, exec_env->module_inst, and argv_buf */ - if (!(aot_inst_addr = LLVMBuildInBoundsGEP2( - comp_ctx->builder, OPQ_PTR_TYPE, func_ctx->exec_env, - &aot_inst_offset, 1, "aot_inst_addr"))) { - aot_set_last_error("llvm build in bounds gep failed"); - goto fail; - } - - /* Load aot inst */ - if (!(func_ctx->aot_inst = LLVMBuildLoad2(comp_ctx->builder, OPQ_PTR_TYPE, - aot_inst_addr, "aot_inst"))) { - aot_set_last_error("llvm build load failed"); + if (!create_basic_func_context(comp_ctx, func_ctx)) { goto fail; } @@ -980,28 +1492,12 @@ aot_create_func_context(const AOTCompData *comp_data, AOTCompContext *comp_ctx, goto fail; } - /* Get native stack boundary address */ - if (comp_ctx->enable_stack_bound_check - && !create_native_stack_bound(comp_ctx, func_ctx)) { - goto fail; - } - if (comp_ctx->enable_stack_estimation - && !create_native_stack_top_min(comp_ctx, func_ctx)) { - goto fail; - } - /* Get auxiliary stack info */ if (wasm_func->has_op_set_global_aux_stack && !create_aux_stack_info(comp_ctx, func_ctx)) { goto fail; } - /* Get native symbol list */ - if (comp_ctx->is_indirect_mode - && !create_native_symbol(comp_ctx, func_ctx)) { - goto fail; - } - /* Create local variables */ if (!create_local_variables(comp_data, comp_ctx, func_ctx, func)) { goto fail; @@ -1070,6 +1566,11 @@ aot_create_func_contexts(const AOTCompData *comp_data, AOTCompContext *comp_ctx) uint64 size; uint32 i; + if ((comp_ctx->enable_stack_bound_check + || comp_ctx->enable_stack_estimation) + && !aot_create_stack_sizes(comp_data, comp_ctx)) + return NULL; + /* Allocate memory */ size = sizeof(AOTFuncContext *) * (uint64)comp_data->func_count; if (size >= UINT32_MAX @@ -1483,6 +1984,55 @@ fail: return ret; } +static void +jit_stack_size_callback(void *user_data, const char *name, size_t namelen, + size_t stack_size) +{ + AOTCompContext *comp_ctx = user_data; + /* + * Note: the longest name we care is + * something like "aot_func_internal#4294967295". + */ + char buf[64]; + uint32 func_idx; + const AOTFuncContext *func_ctx; + bool musttail; + unsigned int stack_consumption_to_call_wrapped_func; + unsigned int call_size; + int ret; + + bh_assert(comp_ctx != NULL); + bh_assert(comp_ctx->jit_stack_sizes != NULL); + + if (namelen >= sizeof(buf)) { + LOG_DEBUG("too long name: %.*s", (int)namelen, name); + return; + } + /* ensure NUL termination */ + bh_memcpy_s(buf, sizeof(buf), name, namelen); + buf[namelen] = 0; + + ret = sscanf(buf, AOT_FUNC_INTERNAL_PREFIX "%" SCNu32, &func_idx); + if (ret != 1) { + return; + } + + bh_assert(func_idx < comp_ctx->func_ctx_count); + func_ctx = comp_ctx->func_ctxes[func_idx]; + call_size = func_ctx->stack_consumption_for_func_call; + musttail = aot_target_precheck_can_use_musttail(comp_ctx); + stack_consumption_to_call_wrapped_func = + musttail ? 0 + : aot_estimate_stack_usage_for_function_call( + comp_ctx, func_ctx->aot_func->func_type); + LOG_VERBOSE("func %.*s stack %u + %zu + %u", (int)namelen, name, + stack_consumption_to_call_wrapped_func, stack_size, call_size); + + /* Note: -1 == AOT_NEG_ONE from aot_create_stack_sizes */ + bh_assert(comp_ctx->jit_stack_sizes[func_idx] == (uint32)-1); + comp_ctx->jit_stack_sizes[func_idx] = stack_size + call_size; +} + static bool orc_jit_create(AOTCompContext *comp_ctx) { @@ -1498,6 +2048,10 @@ orc_jit_create(AOTCompContext *comp_ctx) goto fail; } + if (comp_ctx->enable_stack_bound_check || comp_ctx->enable_stack_estimation) + LLVMOrcLLJITBuilderSetCompileFuncitonCreatorWithStackSizesCallback( + builder, jit_stack_size_callback, comp_ctx); + err = LLVMOrcJITTargetMachineBuilderDetectHost(&jtmb); if (err != LLVMErrorSuccess) { aot_handle_llvm_errmsg( @@ -1688,14 +2242,6 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option) if (option->is_jit_mode) { comp_ctx->is_jit_mode = true; - /* Create TargetMachine */ - if (!create_target_machine_detect_host(comp_ctx)) - goto fail; - - /* Create LLJIT Instance */ - if (!orc_jit_create(comp_ctx)) - goto fail; - #ifndef OS_ENABLE_HW_BOUND_CHECK comp_ctx->enable_bound_check = true; /* Always enable stack boundary check if `bounds-checks` @@ -1715,6 +2261,14 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option) comp_ctx->enable_stack_bound_check = false; #endif #endif + + /* Create TargetMachine */ + if (!create_target_machine_detect_host(comp_ctx)) + goto fail; + + /* Create LLJIT Instance */ + if (!orc_jit_create(comp_ctx)) + goto fail; } else { /* Create LLVM target machine */ @@ -2037,6 +2591,19 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option) (option->stack_bounds_checks == 1) ? true : false; } + if ((comp_ctx->enable_stack_bound_check + || comp_ctx->enable_stack_estimation) + && option->stack_usage_file == NULL) { + if (!aot_generate_tempfile_name( + "wamrc-su", "su", comp_ctx->stack_usage_temp_file, + sizeof(comp_ctx->stack_usage_temp_file))) + goto fail; + comp_ctx->stack_usage_file = comp_ctx->stack_usage_temp_file; + } + else { + comp_ctx->stack_usage_file = option->stack_usage_file; + } + os_printf("Create AoT compiler with:\n"); os_printf(" target: %s\n", comp_ctx->target_arch); os_printf(" target cpu: %s\n", cpu); @@ -2095,7 +2662,7 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option) if (!(comp_ctx->target_machine = LLVMCreateTargetMachineWithOpts( target, triple_norm, cpu, features, opt_level, LLVMRelocStatic, code_model, false, - option->stack_usage_file))) { + comp_ctx->stack_usage_file))) { aot_set_last_error("create LLVM target machine failed."); goto fail; } @@ -2239,6 +2806,10 @@ aot_destroy_comp_context(AOTCompContext *comp_ctx) if (!comp_ctx) return; + if (comp_ctx->stack_usage_file == comp_ctx->stack_usage_temp_file) { + (void)unlink(comp_ctx->stack_usage_temp_file); + } + if (comp_ctx->target_machine) LLVMDisposeTargetMachine(comp_ctx->target_machine); @@ -2534,8 +3105,8 @@ aot_checked_addr_list_destroy(AOTFuncContext *func_ctx) } bool -aot_build_zero_function_ret(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - AOTFuncType *func_type) +aot_build_zero_function_ret(const AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, AOTFuncType *func_type) { LLVMValueRef ret = NULL; @@ -2574,9 +3145,12 @@ aot_build_zero_function_ret(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, return false; } #if WASM_ENABLE_DEBUG_AOT != 0 - LLVMMetadataRef return_location = - dwarf_gen_func_ret_location(comp_ctx, func_ctx); - LLVMInstructionSetDebugLoc(ret, return_location); + /* debug_func is NULL for precheck function */ + if (func_ctx->debug_func != NULL) { + LLVMMetadataRef return_location = + dwarf_gen_func_ret_location(comp_ctx, func_ctx); + LLVMInstructionSetDebugLoc(ret, return_location); + } #endif return true; } diff --git a/core/iwasm/compilation/aot_llvm.h b/core/iwasm/compilation/aot_llvm.h index 76fedcc6..2f187b69 100644 --- a/core/iwasm/compilation/aot_llvm.h +++ b/core/iwasm/compilation/aot_llvm.h @@ -153,6 +153,7 @@ typedef struct AOTMemInfo { typedef struct AOTFuncContext { AOTFunc *aot_func; LLVMValueRef func; + LLVMValueRef precheck_func; LLVMTypeRef func_type; LLVMModuleRef module; AOTBlockStack block_stack; @@ -165,7 +166,6 @@ typedef struct AOTFuncContext { LLVMValueRef aux_stack_bound; LLVMValueRef aux_stack_bottom; LLVMValueRef native_symbol; - LLVMValueRef last_alloca; LLVMValueRef func_ptrs; AOTMemInfo *mem_info; @@ -182,6 +182,9 @@ typedef struct AOTFuncContext { #if WASM_ENABLE_DEBUG_AOT != 0 LLVMMetadataRef debug_func; #endif + + unsigned int stack_consumption_for_func_call; + LLVMValueRef locals[1]; } AOTFuncContext; @@ -378,6 +381,11 @@ typedef struct AOTCompContext { /* LLVM floating-point exception behavior metadata */ LLVMValueRef fp_exception_behavior; + /* a global array to store stack sizes */ + LLVMTypeRef stack_sizes_type; + LLVMValueRef stack_sizes; + uint32 *jit_stack_sizes; /* for JIT */ + /* LLVM data types */ AOTLLVMTypes basic_types; LLVMTypeRef exec_env_type; @@ -406,6 +414,9 @@ typedef struct AOTCompContext { * file for some architecture (such as arc) */ const char *external_asm_compiler; const char *asm_compiler_flags; + + const char *stack_usage_file; + char stack_usage_temp_file[64]; } AOTCompContext; enum { @@ -509,8 +520,8 @@ void aot_checked_addr_list_destroy(AOTFuncContext *func_ctx); bool -aot_build_zero_function_ret(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - AOTFuncType *func_type); +aot_build_zero_function_ret(const AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, AOTFuncType *func_type); LLVMValueRef aot_call_llvm_intrinsic(const AOTCompContext *comp_ctx, @@ -554,6 +565,13 @@ bool aot_set_cond_br_weights(AOTCompContext *comp_ctx, LLVMValueRef cond_br, int32 weights_true, int32 weights_false); +bool +aot_target_precheck_can_use_musttail(const AOTCompContext *comp_ctx); + +unsigned int +aot_estimate_stack_usage_for_function_call(const AOTCompContext *comp_ctx, + const AOTFuncType *callee_func_type); + #ifdef __cplusplus } /* end of extern "C" */ #endif diff --git a/core/iwasm/compilation/aot_llvm_extra2.cpp b/core/iwasm/compilation/aot_llvm_extra2.cpp index 8c3f3a39..94eee858 100644 --- a/core/iwasm/compilation/aot_llvm_extra2.cpp +++ b/core/iwasm/compilation/aot_llvm_extra2.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #if LLVM_VERSION_MAJOR >= 14 #include #else @@ -112,3 +113,20 @@ LLVMCreateTargetMachineWithOpts(LLVMTargetRef ctarget, const char *triple, opts, rm, cm, ol, jit); return reinterpret_cast(targetmachine); } + +/* https://reviews.llvm.org/D153107 */ +#if LLVM_VERSION_MAJOR < 17 +using namespace llvm; + +LLVMTailCallKind +LLVMGetTailCallKind(LLVMValueRef Call) +{ + return (LLVMTailCallKind)unwrap(Call)->getTailCallKind(); +} + +void +LLVMSetTailCallKind(LLVMValueRef Call, LLVMTailCallKind kind) +{ + unwrap(Call)->setTailCallKind((CallInst::TailCallKind)kind); +} +#endif diff --git a/core/iwasm/compilation/aot_llvm_extra2.h b/core/iwasm/compilation/aot_llvm_extra2.h index ef99622a..f3f89799 100644 --- a/core/iwasm/compilation/aot_llvm_extra2.h +++ b/core/iwasm/compilation/aot_llvm_extra2.h @@ -3,6 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception */ +#include #include LLVM_C_EXTERN_C_BEGIN @@ -14,4 +15,20 @@ LLVMCreateTargetMachineWithOpts(LLVMTargetRef ctarget, const char *triple, LLVMCodeModel code_model, bool EmitStackSizeSection, const char *StackUsageOutput); + +/* https://reviews.llvm.org/D153107 */ +#if LLVM_VERSION_MAJOR < 17 +typedef enum { + LLVMTailCallKindNone = 0, + LLVMTailCallKindTail = 1, + LLVMTailCallKindMustTail = 2, + LLVMTailCallKindNoTail = 3, +} LLVMTailCallKind; + +LLVMTailCallKind +LLVMGetTailCallKind(LLVMValueRef CallInst); +void +LLVMSetTailCallKind(LLVMValueRef CallInst, LLVMTailCallKind kind); +#endif + LLVM_C_EXTERN_C_END diff --git a/core/iwasm/compilation/aot_orc_extra.cpp b/core/iwasm/compilation/aot_orc_extra.cpp index b778b634..9cfe331e 100644 --- a/core/iwasm/compilation/aot_orc_extra.cpp +++ b/core/iwasm/compilation/aot_orc_extra.cpp @@ -157,13 +157,29 @@ PartitionFunction(GlobalValueSet Requested) const char *wrapper; uint32 prefix_len = strlen(AOT_FUNC_PREFIX); + LOG_DEBUG("requested func %s", gvname); /* Convert "aot_func#n_wrapper" to "aot_func#n" */ - if (strstr(gvname, AOT_FUNC_PREFIX) - && (wrapper = strstr(gvname + prefix_len, "_wrapper"))) { + if (strstr(gvname, AOT_FUNC_PREFIX)) { char buf[16] = { 0 }; char func_name[64]; int group_stride, i, j; + int num; + /* + * if the jit wrapper (which has "_wrapper" suffix in + * the name) is requested, compile others in the group too. + * otherwise, only compile the requested one. + * (and possibly the correspondig wrapped function, + * which has AOT_FUNC_INTERNAL_PREFIX.) + */ + wrapper = strstr(gvname + prefix_len, "_wrapper"); + if (wrapper != NULL) { + num = WASM_ORC_JIT_COMPILE_THREAD_NUM; + } + else { + num = 1; + wrapper = strchr(gvname + prefix_len, 0); + } bh_assert(wrapper - (gvname + prefix_len) > 0); /* Get AOT function index */ bh_memcpy_s(buf, (uint32)sizeof(buf), gvname + prefix_len, @@ -173,10 +189,18 @@ PartitionFunction(GlobalValueSet Requested) group_stride = WASM_ORC_JIT_BACKEND_THREAD_NUM; /* Compile some functions each time */ - for (j = 0; j < WASM_ORC_JIT_COMPILE_THREAD_NUM; j++) { + for (j = 0; j < num; j++) { + Function *F1; snprintf(func_name, sizeof(func_name), "%s%d", AOT_FUNC_PREFIX, i + j * group_stride); - Function *F1 = M->getFunction(func_name); + F1 = M->getFunction(func_name); + if (F1) { + LOG_DEBUG("compile func %s", func_name); + GVsToAdd.push_back(cast(F1)); + } + snprintf(func_name, sizeof(func_name), "%s%d", + AOT_FUNC_INTERNAL_PREFIX, i + j * group_stride); + F1 = M->getFunction(func_name); if (F1) { LOG_DEBUG("compile func %s", func_name); GVsToAdd.push_back(cast(F1)); diff --git a/core/iwasm/compilation/aot_orc_extra.h b/core/iwasm/compilation/aot_orc_extra.h index e152b877..44c2cd7a 100644 --- a/core/iwasm/compilation/aot_orc_extra.h +++ b/core/iwasm/compilation/aot_orc_extra.h @@ -71,5 +71,10 @@ LLVMOrcLLLazyJITGetIRTransformLayer(LLVMOrcLLLazyJITRef J); LLVMOrcObjectTransformLayerRef LLVMOrcLLLazyJITGetObjTransformLayer(LLVMOrcLLLazyJITRef J); +void +LLVMOrcLLJITBuilderSetCompileFuncitonCreatorWithStackSizesCallback( + LLVMOrcLLLazyJITBuilderRef Builder, + void (*cb)(void *, const char *, size_t, size_t), void *cb_data); + LLVM_C_EXTERN_C_END #endif diff --git a/core/iwasm/compilation/aot_orc_extra2.cpp b/core/iwasm/compilation/aot_orc_extra2.cpp new file mode 100644 index 00000000..3b028f15 --- /dev/null +++ b/core/iwasm/compilation/aot_orc_extra2.cpp @@ -0,0 +1,139 @@ +/* + * Copyright (C) 2023 Midokura Japan KK. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "llvm/ExecutionEngine/Orc/CompileUtils.h" +#include "llvm/ExecutionEngine/Orc/LLJIT.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/SmallVectorMemoryBuffer.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +#include "aot_orc_extra.h" +#include "bh_log.h" + +typedef void (*cb_t)(void *, const char *, size_t, size_t); + +class MyCompiler : public llvm::orc::IRCompileLayer::IRCompiler +{ + public: + MyCompiler(llvm::orc::JITTargetMachineBuilder JTMB, cb_t cb, void *cb_data); + llvm::Expected operator()( + llvm::Module &M) override; + + private: + llvm::orc::JITTargetMachineBuilder JTMB; + + cb_t cb; + void *cb_data; +}; + +MyCompiler::MyCompiler(llvm::orc::JITTargetMachineBuilder JTMB, cb_t cb, + void *cb_data) + : IRCompiler(llvm::orc::irManglingOptionsFromTargetOptions(JTMB.getOptions())) + , JTMB(std::move(JTMB)) + , cb(cb) + , cb_data(cb_data) +{} + +class PrintStackSizes : public llvm::MachineFunctionPass +{ + public: + PrintStackSizes(cb_t cb, void *cb_data); + bool runOnMachineFunction(llvm::MachineFunction &MF) override; + static char ID; + + private: + cb_t cb; + void *cb_data; +}; + +PrintStackSizes::PrintStackSizes(cb_t cb, void *cb_data) + : MachineFunctionPass(ID) + , cb(cb) + , cb_data(cb_data) +{} + +char PrintStackSizes::ID = 0; + +bool +PrintStackSizes::runOnMachineFunction(llvm::MachineFunction &MF) +{ + auto name = MF.getName(); + auto MFI = &MF.getFrameInfo(); + size_t sz = MFI->getStackSize(); + cb(cb_data, name.data(), name.size(), sz); + return false; +} + +class MyPassManager : public llvm::legacy::PassManager +{ + public: + void add(llvm::Pass *P) override; +}; + +void +MyPassManager::add(llvm::Pass *P) +{ + // a hack to avoid having a copy of the whole addPassesToEmitMC. + // we want to add PrintStackSizes before FreeMachineFunctionPass. + if (P->getPassName() == "Free MachineFunction") { + return; + } + llvm::legacy::PassManager::add(P); +} + +// a modified copy from llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp +llvm::Expected +MyCompiler::operator()(llvm::Module &M) +{ + auto TM = cantFail(JTMB.createTargetMachine()); + llvm::SmallVector ObjBufferSV; + + { + llvm::raw_svector_ostream ObjStream(ObjBufferSV); + + MyPassManager PM; + llvm::MCContext *Ctx; + if (TM->addPassesToEmitMC(PM, Ctx, ObjStream)) + return llvm::make_error( + "Target does not support MC emission", + llvm::inconvertibleErrorCode()); + PM.add(new PrintStackSizes(cb, cb_data)); + dynamic_cast(&PM)->add( + llvm::createFreeMachineFunctionPass()); + PM.run(M); + } + + auto ObjBuffer = std::make_unique( + std::move(ObjBufferSV), + M.getModuleIdentifier() + "-jitted-objectbuffer", + /*RequiresNullTerminator=*/false); + + return std::move(ObjBuffer); +} + +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::orc::LLLazyJITBuilder, + LLVMOrcLLLazyJITBuilderRef) + +void +LLVMOrcLLJITBuilderSetCompileFuncitonCreatorWithStackSizesCallback( + LLVMOrcLLLazyJITBuilderRef Builder, + void (*cb)(void *, const char *, size_t, size_t), void *cb_data) +{ + auto b = unwrap(Builder); + b->setCompileFunctionCreator( + [cb, cb_data](llvm::orc::JITTargetMachineBuilder JTMB) + -> llvm::Expected< + std::unique_ptr> { + return std::make_unique( + MyCompiler(std::move(JTMB), cb, cb_data)); + }); +}