From 55cb9c59c58c005d18e95711015d32ff911a6529 Mon Sep 17 00:00:00 2001 From: bianchui Date: Wed, 14 Aug 2024 08:42:01 +0800 Subject: [PATCH 01/24] Enable merged os_mmap for aot data sections (#3681) And enable merged os_mmap for aot data and text sections except on platform nuttx and esp-idf. Fix issue that aarch64 AOT module fails to load on android: https://github.com/bytecodealliance/wasm-micro-runtime/issues/2274 --- core/iwasm/aot/aot_loader.c | 158 +++++++++++++++++++++++++++++++---- core/iwasm/aot/aot_runtime.h | 7 ++ 2 files changed, 150 insertions(+), 15 deletions(-) diff --git a/core/iwasm/aot/aot_loader.c b/core/iwasm/aot/aot_loader.c index 645f68b1..11c7495f 100644 --- a/core/iwasm/aot/aot_loader.c +++ b/core/iwasm/aot/aot_loader.c @@ -2378,7 +2378,6 @@ destroy_object_data_sections(AOTObjectDataSection *data_sections, } } #endif - os_munmap(data_section->data, data_section->size); } wasm_runtime_free(data_sections); } @@ -2392,6 +2391,9 @@ load_object_data_sections(const uint8 **p_buf, const uint8 *buf_end, AOTObjectDataSection *data_sections; uint64 size; uint32 i; + uint64 total_size = 0; + uint32 page_size = os_getpagesize(); + uint8 *merged_sections = NULL; /* Allocate memory */ size = sizeof(AOTObjectDataSection) * (uint64)module->data_section_count; @@ -2400,8 +2402,22 @@ load_object_data_sections(const uint8 **p_buf, const uint8 *buf_end, return false; } - /* Create each data section */ + /* First iteration: read data from buf, and calculate total memory needed */ for (i = 0; i < module->data_section_count; i++) { + read_string(buf, buf_end, data_sections[i].name); + read_uint32(buf, buf_end, data_sections[i].size); + CHECK_BUF(buf, buf_end, data_sections[i].size); + /* temporary record data ptr for merge, will be replaced after mmaped */ + if (data_sections[i].size > 0) + data_sections[i].data = (uint8 *)buf; + buf += data_sections[i].size; + total_size += align_uint64((uint64)data_sections[i].size, page_size); + } + if (total_size > UINT32_MAX) { + set_error_buf(error_buf, error_buf_size, "data sections too large"); + return false; + } + if (total_size > 0) { int map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE; #if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \ || defined(BUILD_TARGET_RISCV64_LP64D) \ @@ -2412,29 +2428,33 @@ load_object_data_sections(const uint8 **p_buf, const uint8 *buf_end, #else int map_flags = MMAP_MAP_NONE; #endif - - read_string(buf, buf_end, data_sections[i].name); - read_uint32(buf, buf_end, data_sections[i].size); - /* Allocate memory for data */ - if (data_sections[i].size > 0 - && !(data_sections[i].data = - os_mmap(NULL, data_sections[i].size, map_prot, map_flags, - os_get_invalid_handle()))) { + merged_sections = module->merged_data_sections = + os_mmap(NULL, (uint32)total_size, map_prot, map_flags, + os_get_invalid_handle()); + if (!merged_sections) { set_error_buf(error_buf, error_buf_size, "allocate memory failed"); return false; } + module->merged_data_sections_size = (uint32)total_size; #if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) #if !defined(BH_PLATFORM_LINUX_SGX) && !defined(BH_PLATFORM_WINDOWS) \ && !defined(BH_PLATFORM_DARWIN) /* address must be in the first 2 Gigabytes of the process address space */ - bh_assert((uintptr_t)data_sections[i].data < INT32_MAX); + bh_assert((uintptr_t)merged_sections < INT32_MAX); #endif #endif + } - read_byte_array(buf, buf_end, data_sections[i].data, - data_sections[i].size); + /* Second iteration: Create each data section */ + for (i = 0; i < module->data_section_count; i++) { + if (data_sections[i].size > 0) { + bh_memcpy_s(merged_sections, data_sections[i].size, + data_sections[i].data, data_sections[i].size); + data_sections[i].data = merged_sections; + merged_sections += align_uint(data_sections[i].size, page_size); + } } *p_buf = buf; @@ -2532,6 +2552,90 @@ fail: return false; } +#if !defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF) +static bool +try_merge_data_and_text(const uint8 **buf, const uint8 **buf_end, + AOTModule *module, char *error_buf, + uint32 error_buf_size) +{ + uint8 *old_buf = (uint8 *)*buf; + uint8 *old_end = (uint8 *)*buf_end; + size_t code_size = (size_t)(old_end - old_buf); + uint32 page_size = os_getpagesize(); + uint64 total_size = 0; + uint32 i; + uint8 *sections; + + if (code_size == 0) { + return true; + } + + /* calc total memory needed */ + total_size += align_uint64((uint64)code_size, page_size); + for (i = 0; i < module->data_section_count; ++i) { + total_size += + align_uint64((uint64)module->data_sections[i].size, page_size); + } + /* distance between .data and .text should not greater than 4GB for some + * targets (eg. arm64 reloc need < 4G distance) */ + if (total_size > UINT32_MAX) { + return false; + } + + if (total_size != 0) { + int map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE; + +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \ + || defined(BUILD_TARGET_RISCV64_LP64D) \ + || defined(BUILD_TARGET_RISCV64_LP64) + /* aot code and data in x86_64 must be in range 0 to 2G due + to relocation for R_X86_64_32/32S/PC32 */ + int map_flags = MMAP_MAP_32BIT; +#else + int map_flags = MMAP_MAP_NONE; +#endif + + sections = os_mmap(NULL, (uint32)total_size, map_prot, map_flags, + os_get_invalid_handle()); + if (!sections) { + /* merge failed but maybe not critical for some targes */ + return false; + } + if (os_mprotect(sections, code_size, map_prot | MMAP_PROT_EXEC) != 0) { + os_munmap(sections, (uint32)total_size); + return false; + } + + module->merged_data_text_sections = sections; + module->merged_data_text_sections_size = (uint32)total_size; + + /* order not essential just as compilers do: .text section first */ + *buf = sections; + *buf_end = sections + code_size; + bh_memcpy_s(sections, code_size, old_buf, code_size); + os_munmap(old_buf, code_size); + sections += align_uint((uint32)code_size, page_size); + + /* then .data sections */ + for (i = 0; i < module->data_section_count; ++i) { + AOTObjectDataSection *data_section = module->data_sections + i; + uint8 *old_data = data_section->data; + data_section->data = sections; + bh_memcpy_s(data_section->data, data_section->size, old_data, + data_section->size); + sections += align_uint(data_section->size, page_size); + } + if (module->merged_data_sections) { + os_munmap(module->merged_data_sections, + module->merged_data_sections_size); + module->merged_data_sections = NULL; + module->merged_data_sections_size = 0; + } + } + return true; +} +#endif //! defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF) + static bool load_text_section(const uint8 *buf, const uint8 *buf_end, AOTModule *module, char *error_buf, uint32 error_buf_size) @@ -3749,6 +3853,17 @@ load_from_sections(AOTModule *module, AOTSection *sections, return false; break; case AOT_SECTION_TYPE_TEXT: +#if !defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF) + /* try to merge .data and .text, with exceptions: + * 1. XIP mode + * 2. pre-mmapped module load from aot_load_from_sections() + * 3. nuttx & esp-idf: have separate region for MMAP_PROT_EXEC + */ + if (!module->is_indirect_mode && is_load_from_file_buf) + if (!try_merge_data_and_text(&buf, &buf_end, module, + error_buf, error_buf_size)) + LOG_WARNING("merge .data and .text sections failed"); +#endif //! defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF) if (!load_text_section(buf, buf_end, module, error_buf, error_buf_size)) return false; @@ -4179,7 +4294,11 @@ load(const uint8 *buf, uint32 size, AOTModule *module, if (!ret) { /* If load_from_sections() fails, then aot text is destroyed in destroy_sections() */ - destroy_sections(section_list, module->is_indirect_mode ? false : true); + destroy_sections(section_list, + module->is_indirect_mode + || module->merged_data_text_sections + ? false + : true); /* aot_unload() won't destroy aot text again */ module->code = NULL; } @@ -4329,7 +4448,8 @@ aot_unload(AOTModule *module) } #endif - if (module->code && !module->is_indirect_mode) { + if (module->code && !module->is_indirect_mode + && !module->merged_data_text_sections) { /* The layout is: literal size + literal + code (with plt table) */ uint8 *mmap_addr = module->literal - sizeof(uint32); uint32 total_size = @@ -4364,6 +4484,14 @@ aot_unload(AOTModule *module) destroy_object_data_sections(module->data_sections, module->data_section_count); + if (module->merged_data_sections) + os_munmap(module->merged_data_sections, + module->merged_data_sections_size); + + if (module->merged_data_text_sections) + os_munmap(module->merged_data_text_sections, + module->merged_data_text_sections_size); + #if WASM_ENABLE_DEBUG_AOT != 0 jit_code_entry_destroy(module->elf_hdr); #endif diff --git a/core/iwasm/aot/aot_runtime.h b/core/iwasm/aot/aot_runtime.h index e3704f82..05d66386 100644 --- a/core/iwasm/aot/aot_runtime.h +++ b/core/iwasm/aot/aot_runtime.h @@ -315,6 +315,13 @@ typedef struct AOTModule { /* Whether the underlying wasm binary buffer can be freed */ bool is_binary_freeable; + + /* `.data` sections merged into one mmaped to reduce the tlb cache miss */ + uint8 *merged_data_sections; + uint32 merged_data_sections_size; + /* `.data` and `.text` sections merged into one large mmaped section */ + uint8 *merged_data_text_sections; + uint32 merged_data_text_sections_size; } AOTModule; #define AOTMemoryInstance WASMMemoryInstance From 5f517e4335b169ddbb5762b1741a95a35ac4ffe0 Mon Sep 17 00:00:00 2001 From: Wenyong Huang Date: Fri, 16 Aug 2024 11:31:45 +0800 Subject: [PATCH 02/24] aot loader: Refine os_mmap related code (#3711) --- core/iwasm/aot/aot_loader.c | 205 ++++++++++++++++-------------------- 1 file changed, 88 insertions(+), 117 deletions(-) diff --git a/core/iwasm/aot/aot_loader.c b/core/iwasm/aot/aot_loader.c index 11c7495f..b96079d3 100644 --- a/core/iwasm/aot/aot_loader.c +++ b/core/iwasm/aot/aot_loader.c @@ -294,6 +294,39 @@ loader_malloc(uint64 size, char *error_buf, uint32 error_buf_size) return mem; } +static void * +loader_mmap(uint32 size, bool prot_exec, char *error_buf, uint32 error_buf_size) +{ + int map_prot = + MMAP_PROT_READ | MMAP_PROT_WRITE | (prot_exec ? MMAP_PROT_EXEC : 0); + int map_flags; + void *mem; + +#if UINTPTR_MAX == UINT64_MAX + /* The mmapped AOT data and code in 64-bit targets had better be in + range 0 to 2G, or aot loader may fail to apply some relocations, + e.g., R_X86_64_32/R_X86_64_32S/R_X86_64_PC32/R_RISCV_32. + We try to mmap with MMAP_MAP_32BIT flag first, and if fails, mmap + again without the flag. */ + map_flags = MMAP_MAP_32BIT; + if ((mem = os_mmap(NULL, size, map_prot, map_flags, + os_get_invalid_handle()))) { + /* The mmapped memory must be in the first 2 Gigabytes of the + process address space */ + bh_assert((uintptr_t)mem < INT32_MAX); + return mem; + } +#endif + + map_flags = MMAP_MAP_NONE; + if (!(mem = os_mmap(NULL, size, map_prot, map_flags, + os_get_invalid_handle()))) { + set_error_buf(error_buf, error_buf_size, "allocate memory failed"); + return NULL; + } + return mem; +} + static char * load_string(uint8 **p_buf, const uint8 *buf_end, AOTModule *module, bool is_load_from_file_buf, @@ -2407,7 +2440,8 @@ load_object_data_sections(const uint8 **p_buf, const uint8 *buf_end, read_string(buf, buf_end, data_sections[i].name); read_uint32(buf, buf_end, data_sections[i].size); CHECK_BUF(buf, buf_end, data_sections[i].size); - /* temporary record data ptr for merge, will be replaced after mmaped */ + /* Temporary record data ptr for merge, will be replaced after the + merged_data_sections is mmapped */ if (data_sections[i].size > 0) data_sections[i].data = (uint8 *)buf; buf += data_sections[i].size; @@ -2418,33 +2452,13 @@ load_object_data_sections(const uint8 **p_buf, const uint8 *buf_end, return false; } if (total_size > 0) { - int map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE; -#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \ - || defined(BUILD_TARGET_RISCV64_LP64D) \ - || defined(BUILD_TARGET_RISCV64_LP64) - /* aot code and data in x86_64 must be in range 0 to 2G due to - relocation for R_X86_64_32/32S/PC32 */ - int map_flags = MMAP_MAP_32BIT; -#else - int map_flags = MMAP_MAP_NONE; -#endif /* Allocate memory for data */ merged_sections = module->merged_data_sections = - os_mmap(NULL, (uint32)total_size, map_prot, map_flags, - os_get_invalid_handle()); + loader_mmap((uint32)total_size, false, error_buf, error_buf_size); if (!merged_sections) { - set_error_buf(error_buf, error_buf_size, "allocate memory failed"); return false; } module->merged_data_sections_size = (uint32)total_size; -#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) -#if !defined(BH_PLATFORM_LINUX_SGX) && !defined(BH_PLATFORM_WINDOWS) \ - && !defined(BH_PLATFORM_DARWIN) - /* address must be in the first 2 Gigabytes of - the process address space */ - bh_assert((uintptr_t)merged_sections < INT32_MAX); -#endif -#endif } /* Second iteration: Create each data section */ @@ -2570,71 +2584,63 @@ try_merge_data_and_text(const uint8 **buf, const uint8 **buf_end, return true; } - /* calc total memory needed */ + /* calculate the total memory needed */ total_size += align_uint64((uint64)code_size, page_size); for (i = 0; i < module->data_section_count; ++i) { total_size += align_uint64((uint64)module->data_sections[i].size, page_size); } - /* distance between .data and .text should not greater than 4GB for some - * targets (eg. arm64 reloc need < 4G distance) */ + /* distance between .data and .text should not be greater than 4GB + for some targets (e.g. arm64 reloc need < 4G distance) */ if (total_size > UINT32_MAX) { return false; } + /* code_size was checked and must be larger than 0 here */ + bh_assert(total_size > 0); - if (total_size != 0) { - int map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE; - -#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \ - || defined(BUILD_TARGET_RISCV64_LP64D) \ - || defined(BUILD_TARGET_RISCV64_LP64) - /* aot code and data in x86_64 must be in range 0 to 2G due - to relocation for R_X86_64_32/32S/PC32 */ - int map_flags = MMAP_MAP_32BIT; -#else - int map_flags = MMAP_MAP_NONE; -#endif - - sections = os_mmap(NULL, (uint32)total_size, map_prot, map_flags, - os_get_invalid_handle()); - if (!sections) { - /* merge failed but maybe not critical for some targes */ - return false; - } - if (os_mprotect(sections, code_size, map_prot | MMAP_PROT_EXEC) != 0) { - os_munmap(sections, (uint32)total_size); - return false; - } - - module->merged_data_text_sections = sections; - module->merged_data_text_sections_size = (uint32)total_size; - - /* order not essential just as compilers do: .text section first */ - *buf = sections; - *buf_end = sections + code_size; - bh_memcpy_s(sections, code_size, old_buf, code_size); - os_munmap(old_buf, code_size); - sections += align_uint((uint32)code_size, page_size); - - /* then .data sections */ - for (i = 0; i < module->data_section_count; ++i) { - AOTObjectDataSection *data_section = module->data_sections + i; - uint8 *old_data = data_section->data; - data_section->data = sections; - bh_memcpy_s(data_section->data, data_section->size, old_data, - data_section->size); - sections += align_uint(data_section->size, page_size); - } - if (module->merged_data_sections) { - os_munmap(module->merged_data_sections, - module->merged_data_sections_size); - module->merged_data_sections = NULL; - module->merged_data_sections_size = 0; - } + sections = loader_mmap((uint32)total_size, false, NULL, 0); + if (!sections) { + /* merge failed but may be not critical for some targets */ + return false; } + /* change the code part to be executable */ + if (os_mprotect(sections, code_size, + MMAP_PROT_READ | MMAP_PROT_WRITE | MMAP_PROT_EXEC) + != 0) { + os_munmap(sections, (uint32)total_size); + return false; + } + + module->merged_data_text_sections = sections; + module->merged_data_text_sections_size = (uint32)total_size; + + /* order not essential just as compiler does: .text section first */ + *buf = sections; + *buf_end = sections + code_size; + bh_memcpy_s(sections, code_size, old_buf, code_size); + os_munmap(old_buf, code_size); + sections += align_uint((uint32)code_size, page_size); + + /* then migrate .data sections */ + for (i = 0; i < module->data_section_count; ++i) { + AOTObjectDataSection *data_section = module->data_sections + i; + uint8 *old_data = data_section->data; + data_section->data = sections; + bh_memcpy_s(data_section->data, data_section->size, old_data, + data_section->size); + sections += align_uint(data_section->size, page_size); + } + /* free the original data sections */ + if (module->merged_data_sections) { + os_munmap(module->merged_data_sections, + module->merged_data_sections_size); + module->merged_data_sections = NULL; + module->merged_data_sections_size = 0; + } + return true; } -#endif //! defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF) +#endif /* ! defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF) */ static bool load_text_section(const uint8 *buf, const uint8 *buf_end, AOTModule *module, @@ -3495,16 +3501,9 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end, + sizeof(uint64) * module->real_plt_count + sizeof(uint32) * module->float_plt_count; if (size > 0) { - map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE | MMAP_PROT_EXEC; - /* aot code and data in x86_64 must be in range 0 to 2G due to - relocation for R_X86_64_32/32S/PC32 */ - map_flags = MMAP_MAP_32BIT; - if (size > UINT32_MAX - || !(module->extra_plt_data = - os_mmap(NULL, (uint32)size, map_prot, map_flags, - os_get_invalid_handle()))) { - set_error_buf(error_buf, error_buf_size, "mmap memory failed"); + || !(module->extra_plt_data = loader_mmap( + (uint32)size, true, error_buf, error_buf_size))) { goto fail; } module->extra_plt_data_size = (uint32)size; @@ -3616,19 +3615,12 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end, GOTItem *got_item = module->got_item_list; uint32 got_item_idx = 0; - map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE; - /* aot code and data in x86_64 must be in range 0 to 2G due to - relocation for R_X86_64_32/32S/PC32 */ - map_flags = MMAP_MAP_32BIT; - /* Create the GOT for func_ptrs, note that it is different from the .got section of a dynamic object file */ size = (uint64)sizeof(void *) * got_item_count; if (size > UINT32_MAX - || !(module->got_func_ptrs = - os_mmap(NULL, (uint32)size, map_prot, map_flags, - os_get_invalid_handle()))) { - set_error_buf(error_buf, error_buf_size, "mmap memory failed"); + || !(module->got_func_ptrs = loader_mmap( + (uint32)size, false, error_buf, error_buf_size))) { goto fail; } @@ -3863,7 +3855,7 @@ load_from_sections(AOTModule *module, AOTSection *sections, if (!try_merge_data_and_text(&buf, &buf_end, module, error_buf, error_buf_size)) LOG_WARNING("merge .data and .text sections failed"); -#endif //! defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF) +#endif /* ! defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF) */ if (!load_text_section(buf, buf_end, module, error_buf, error_buf_size)) return false; @@ -4180,37 +4172,16 @@ create_sections(AOTModule *module, const uint8 *buf, uint32 size, if (section_type == AOT_SECTION_TYPE_TEXT) { if ((section_size > 0) && !module->is_indirect_mode) { - int map_prot = - MMAP_PROT_READ | MMAP_PROT_WRITE | MMAP_PROT_EXEC; -#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \ - || defined(BUILD_TARGET_RISCV64_LP64D) \ - || defined(BUILD_TARGET_RISCV64_LP64) - /* aot code and data in x86_64 must be in range 0 to 2G due - to relocation for R_X86_64_32/32S/PC32 */ - int map_flags = MMAP_MAP_32BIT; -#else - int map_flags = MMAP_MAP_NONE; -#endif total_size = (uint64)section_size + aot_get_plt_table_size(); total_size = (total_size + 3) & ~((uint64)3); if (total_size >= UINT32_MAX || !(aot_text = - os_mmap(NULL, (uint32)total_size, map_prot, - map_flags, os_get_invalid_handle()))) { + loader_mmap((uint32)total_size, true, + error_buf, error_buf_size))) { wasm_runtime_free(section); - set_error_buf(error_buf, error_buf_size, - "mmap memory failed"); goto fail; } -#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) -#if !defined(BH_PLATFORM_LINUX_SGX) && !defined(BH_PLATFORM_WINDOWS) \ - && !defined(BH_PLATFORM_DARWIN) - /* address must be in the first 2 Gigabytes of - the process address space */ - bh_assert((uintptr_t)aot_text < INT32_MAX); -#endif -#endif #if (WASM_MEM_DUAL_BUS_MIRROR != 0) mirrored_text = os_get_dbus_mirror(aot_text); From b00904b092ce478f7cfec71299db8a910a5a383a Mon Sep 17 00:00:00 2001 From: YAMAMOTO Takashi Date: Thu, 22 Aug 2024 13:35:25 +0900 Subject: [PATCH 03/24] Add a comment on AOT_SECTION_TYPE_SIGNATURE (#3746) cf. https://github.com/bytecodealliance/wasm-micro-runtime/issues/3744 --- core/iwasm/aot/aot_runtime.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/iwasm/aot/aot_runtime.h b/core/iwasm/aot/aot_runtime.h index 0eb64798..76c78451 100644 --- a/core/iwasm/aot/aot_runtime.h +++ b/core/iwasm/aot/aot_runtime.h @@ -39,6 +39,10 @@ typedef enum AOTSectionType { AOT_SECTION_TYPE_FUNCTION = 3, AOT_SECTION_TYPE_EXPORT = 4, AOT_SECTION_TYPE_RELOCATION = 5, + /* + * Note: We haven't had anything to use AOT_SECTION_TYPE_SIGNATURE. + * It's just reserved for possible module signing features. + */ AOT_SECTION_TYPE_SIGNATURE = 6, AOT_SECTION_TYPE_CUSTOM = 100, } AOTSectionType; From e8c2952bf959f6df81972fc469a4357f04ee629a Mon Sep 17 00:00:00 2001 From: Anders Bakken Date: Thu, 22 Aug 2024 18:49:06 -0700 Subject: [PATCH 04/24] Fix arm64 issues on mac (#3688) Make wamrc normalize "arm64" to "aarch64v8". Previously the only way to make the "arm64" target was to not specify a target on 64 bit arm-based mac builds. Now arm64 and aarch64v8 are treated as the same. Make aot_loader accept "aarch64v8" on arm-based apple (as well as accepting legacy "arm64" based aot targets). This also removes __APPLE__ and __MACH__ from the block that defaults size_level to 1 since it doesn't seem to be supported for aarch64: `LLVM ERROR: Only small, tiny and large code models are allowed on AArch64` --- core/iwasm/aot/aot_loader.c | 4 ++++ core/iwasm/aot/arch/aot_reloc_aarch64.c | 11 ++--------- core/iwasm/compilation/aot_llvm.c | 9 +++++++++ wamr-compiler/main.c | 4 ++-- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/core/iwasm/aot/aot_loader.c b/core/iwasm/aot/aot_loader.c index b96079d3..e62e8278 100644 --- a/core/iwasm/aot/aot_loader.c +++ b/core/iwasm/aot/aot_loader.c @@ -579,6 +579,10 @@ load_target_info_section(const uint8 *buf, const uint8 *buf_end, return false; } + /* for backwards compatibility with previous wamrc aot files */ + if (!strcmp(target_info.arch, "arm64")) + bh_strcpy_s(target_info.arch, sizeof(target_info.arch), "aarch64v8"); + /* Check machine info */ if (!check_machine_info(&target_info, error_buf, error_buf_size)) { return false; diff --git a/core/iwasm/aot/arch/aot_reloc_aarch64.c b/core/iwasm/aot/arch/aot_reloc_aarch64.c index b4bb6024..ec646b4e 100644 --- a/core/iwasm/aot/arch/aot_reloc_aarch64.c +++ b/core/iwasm/aot/arch/aot_reloc_aarch64.c @@ -53,12 +53,6 @@ get_target_symbol_map(uint32 *sym_num) return target_sym_map; } -#if (defined(__APPLE__) || defined(__MACH__)) && defined(__arm64__) -#define BUILD_TARGET_AARCH64_DEFAULT "arm64" -#else -#define BUILD_TARGET_AARCH64_DEFAULT "aarch64v8" -#endif - void get_current_target(char *target_buf, uint32 target_buf_size) { @@ -68,8 +62,8 @@ get_current_target(char *target_buf, uint32 target_buf_size) /* Set to "aarch64v8" by default if sub version isn't specified */ if (strcmp(s, "AARCH64") == 0) { - s = BUILD_TARGET_AARCH64_DEFAULT; - s_size = sizeof(BUILD_TARGET_AARCH64_DEFAULT); + s = "aarch64v8"; + s_size = 9; /* strlen("aarch64v8"); */ } if (target_buf_size < s_size) { s_size = target_buf_size; @@ -83,7 +77,6 @@ get_current_target(char *target_buf, uint32 target_buf_size) /* Ensure the string is null byte ('\0') terminated */ *d = '\0'; } -#undef BUILD_TARGET_AARCH64_DEFAULT static uint32 get_plt_item_size() diff --git a/core/iwasm/compilation/aot_llvm.c b/core/iwasm/compilation/aot_llvm.c index d738cfc0..ab0b6ab0 100644 --- a/core/iwasm/compilation/aot_llvm.c +++ b/core/iwasm/compilation/aot_llvm.c @@ -2790,6 +2790,15 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option) bh_assert(vendor_sys); bh_memcpy_s(default_arch, sizeof(default_arch), default_triple, (uint32)(vendor_sys - default_triple)); + /** + * On Mac M[1-9]+ LLVM will report arm64 as the + * architecture, for the purposes of wamr this is the + * same as aarch64v8 so we'll normalize it here. + */ + if (!strcmp(default_arch, "arm64")) { + bh_strcpy_s(default_arch, sizeof(default_arch), + "aarch64v8"); + } arch1 = default_arch; LLVMDisposeMessage(default_triple); diff --git a/wamr-compiler/main.c b/wamr-compiler/main.c index b3e731e5..8eed1c99 100644 --- a/wamr-compiler/main.c +++ b/wamr-compiler/main.c @@ -601,8 +601,8 @@ main(int argc, char *argv[]) LOG_VERBOSE("Set size level to 1 for Windows AOT file"); option.size_level = 1; } -#if defined(_WIN32) || defined(_WIN32_) || defined(__APPLE__) \ - || defined(__MACH__) +#if defined(_WIN32) || defined(_WIN32_) \ + || ((defined(__APPLE__) || defined(__MACH__)) && !defined(__arm64__)) if (!option.target_arch && !option.target_abi) { LOG_VERBOSE("Set size level to 1 for Windows or MacOS AOT file"); option.size_level = 1; From cb3a69f778d42444bb2c35698069b2bc1af96baa Mon Sep 17 00:00:00 2001 From: Huang Qi Date: Wed, 28 Aug 2024 16:05:07 +0800 Subject: [PATCH 05/24] CI: Freeze version of bloaty for NuttX compilation (#3756) Fix the compilation error of this CI: https://github.com/bytecodealliance/wasm-micro-runtime/actions/runs/10575515238 ``` /__w/wasm-micro-runtime/wasm-micro-runtime/bloaty/third_party/abseil-cpp/absl/debugging/failure_signal_handler.cc:139:32: error: no matching function for call to 'max(long int, int)' 139 | size_t stack_size = (std::max(SIGSTKSZ, 65536) + page_mask) & ~page_mask; | ~~~~~~~~^~~~~~~~~~~~~~~~~ ``` --- .github/workflows/compilation_on_nuttx.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/compilation_on_nuttx.yml b/.github/workflows/compilation_on_nuttx.yml index 2f8014fa..627ada8e 100644 --- a/.github/workflows/compilation_on_nuttx.yml +++ b/.github/workflows/compilation_on_nuttx.yml @@ -124,6 +124,7 @@ jobs: repository: google/bloaty submodules: recursive path: bloaty + ref: 34f4a66559ad4938c1e629e9b5f54630b2b4d7b0 - name: Build Bloaty run: | From d1141f6f309b2ef4857df9cf5db3ab8bf52b096a Mon Sep 17 00:00:00 2001 From: TianlongLiang <111852609+TianlongLiang@users.noreply.github.com> Date: Thu, 29 Aug 2024 11:08:39 +0800 Subject: [PATCH 06/24] aot compiler: Allow to control stack boundary check when boundary check is enabled (#3754) In the AOT compiler, allow the user to control stack boundary check when the boundary check is enabled (e.g. `wamrc --bounds-checks=1`). Now the code logic is: 1. When `--stack-bounds-checks` is not set, it will be the same value as `--bounds-checks`. 2. When `--stack-bounds-checks` is set, it will be the option value no matter what the status of `--bounds-checks` is. --- core/iwasm/compilation/aot_llvm.c | 20 ++++++++++---------- core/iwasm/interpreter/wasm_interp_classic.c | 1 + wamr-compiler/main.c | 4 +--- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/core/iwasm/compilation/aot_llvm.c b/core/iwasm/compilation/aot_llvm.c index ab0b6ab0..39f64d81 100644 --- a/core/iwasm/compilation/aot_llvm.c +++ b/core/iwasm/compilation/aot_llvm.c @@ -2969,12 +2969,12 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option) sizeof(comp_ctx->target_arch)); if (option->bounds_checks == 1 || option->bounds_checks == 0) { - /* Set by user */ + /* Set by the user */ comp_ctx->enable_bound_check = (option->bounds_checks == 1) ? true : false; } else { - /* Unset by user, use default value */ + /* Unset by the user, use the default value */ if (strstr(comp_ctx->target_arch, "64") && !option->is_sgx_platform) { comp_ctx->enable_bound_check = false; @@ -2984,17 +2984,17 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option) } } - if (comp_ctx->enable_bound_check) { - /* Always enable stack boundary check if `bounds-checks` - is enabled */ - comp_ctx->enable_stack_bound_check = true; - } - else { - /* When `bounds-checks` is disabled, we set stack boundary - check status according to the input option */ + if (option->stack_bounds_checks == 1 + || option->stack_bounds_checks == 0) { + /* Set by the user */ comp_ctx->enable_stack_bound_check = (option->stack_bounds_checks == 1) ? true : false; } + else { + /* Unset by the user, use the default value, it will be the same + * value as the bound check */ + comp_ctx->enable_stack_bound_check = comp_ctx->enable_bound_check; + } if ((comp_ctx->enable_stack_bound_check || comp_ctx->enable_stack_estimation) diff --git a/core/iwasm/interpreter/wasm_interp_classic.c b/core/iwasm/interpreter/wasm_interp_classic.c index 4a8ba4e2..67f8c2d4 100644 --- a/core/iwasm/interpreter/wasm_interp_classic.c +++ b/core/iwasm/interpreter/wasm_interp_classic.c @@ -5739,6 +5739,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, /* use memmove when memory64 is enabled since len may be larger than UINT32_MAX */ memmove(mdst, msrc, len); + (void)dlen; #endif break; } diff --git a/wamr-compiler/main.c b/wamr-compiler/main.c index 8eed1c99..bd9e5435 100644 --- a/wamr-compiler/main.c +++ b/wamr-compiler/main.c @@ -142,9 +142,7 @@ print_help() printf(" with a runtime without the hardware bounds checks.\n"); printf(" --stack-bounds-checks=1/0 Enable or disable the bounds checks for native stack:\n"); printf(" if the option isn't set, the status is same as `--bounds-check`,\n"); - printf(" if the option is set:\n"); - printf(" (1) it is always enabled when `--bounds-checks` is enabled,\n"); - printf(" (2) else it is enabled/disabled according to the option value\n"); + printf(" if the option is set, the status is same as the option value\n"); printf(" --stack-usage= Generate a stack-usage file.\n"); printf(" Similarly to `clang -fstack-usage`.\n"); printf(" --format= Specifies the format of the output file\n"); From eab409a4df9010bce661c800ef0ddf60acae3b4c Mon Sep 17 00:00:00 2001 From: Anders Bakken Date: Thu, 29 Aug 2024 00:25:17 -0700 Subject: [PATCH 07/24] aot loader: Call os_mmap with MMAP_MAP_32BIT only when target is x86-64 or riscv64 (#3755) Mac on aarch64 uses posix_memmap.c os_mmap which doesn't do anything with the flag MMAP_MAP_32BIT for that build so this condition ends up asserting unless the mapping ends up in the first 4 gigs worth of addressable space. Thsi PR changes to call os_mmap with MMAP_MAP_32BIT flag only when the target is x86-64 or riscv64, and the macro __APPLE__ isn't enabled. The behavior is similar to what the posix os_mmap does. --- core/iwasm/aot/aot_loader.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/iwasm/aot/aot_loader.c b/core/iwasm/aot/aot_loader.c index e62e8278..3a5b6fc5 100644 --- a/core/iwasm/aot/aot_loader.c +++ b/core/iwasm/aot/aot_loader.c @@ -302,7 +302,10 @@ loader_mmap(uint32 size, bool prot_exec, char *error_buf, uint32 error_buf_size) int map_flags; void *mem; -#if UINTPTR_MAX == UINT64_MAX +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \ + || defined(BUILD_TARGET_RISCV64_LP64D) \ + || defined(BUILD_TARGET_RISCV64_LP64) +#ifndef __APPLE__ /* The mmapped AOT data and code in 64-bit targets had better be in range 0 to 2G, or aot loader may fail to apply some relocations, e.g., R_X86_64_32/R_X86_64_32S/R_X86_64_PC32/R_RISCV_32. @@ -316,6 +319,7 @@ loader_mmap(uint32 size, bool prot_exec, char *error_buf, uint32 error_buf_size) bh_assert((uintptr_t)mem < INT32_MAX); return mem; } +#endif #endif map_flags = MMAP_MAP_NONE; From 0b62cc89218030561189f65cc42585319e2ed38c Mon Sep 17 00:00:00 2001 From: Marcin Kolny Date: Tue, 3 Sep 2024 03:39:03 +0100 Subject: [PATCH 08/24] Update ref to the multi-memory tests (#3764) The specific commit has been deleted, I am pointing to the same commit in the main branch though. --- tests/wamr-test-suites/test_wamr.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/wamr-test-suites/test_wamr.sh b/tests/wamr-test-suites/test_wamr.sh index 8254cc71..87e15686 100755 --- a/tests/wamr-test-suites/test_wamr.sh +++ b/tests/wamr-test-suites/test_wamr.sh @@ -511,7 +511,7 @@ function spec_test() pushd spec # Reset to commit: "Merge pull request #48 from backes/specify-memcpy-immediate-order" - git reset --hard 48e69f394869c55b7bbe14ac963c09f4605490b6 + git reset --hard fbc99efd7a788db300aec3dd62a14577ec404f1b git checkout 044d0d2e77bdcbe891f7e0b9dd2ac01d56435f0b -- test/core/elem.wast git apply ../../spec-test-script/multi_memory_ignore_cases.patch || exit 1 if [[ ${RUNNING_MODE} == "aot" ]]; then From 5cc94e59eca97fcc30f7484a7d1abc354f7af3cd Mon Sep 17 00:00:00 2001 From: James Ring Date: Mon, 2 Sep 2024 20:03:24 -0700 Subject: [PATCH 09/24] Improve posix mmap retry logic (#3714) - Only retry on EAGAIN, ENOMEM or EINTR. - On EINTR, don't count it against the retry budget, just keep retrying. EINTR can happen in bursts. - Log the errno on failure, and don't conditionalize that logging on BH_ENABLE_TRACE_MMAP. In other parts of the code, error logging is not conditional on that define, while turning on that tracing define makes things overly verbose. --- .../shared/platform/common/posix/posix_memmap.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/core/shared/platform/common/posix/posix_memmap.c b/core/shared/platform/common/posix/posix_memmap.c index c76abf13..1d972f5f 100644 --- a/core/shared/platform/common/posix/posix_memmap.c +++ b/core/shared/platform/common/posix/posix_memmap.c @@ -138,18 +138,25 @@ os_mmap(void *hint, size_t size, int prot, int flags, os_file_handle file) /* memory hasn't been mapped or was mapped failed previously */ if (addr == MAP_FAILED) { - /* try 5 times */ - for (i = 0; i < 5; i++) { + /* try 5 times on EAGAIN or ENOMEM, and keep retrying on EINTR */ + i = 0; + while (i < 5) { addr = mmap(hint, request_size, map_prot, map_flags, file, 0); if (addr != MAP_FAILED) break; + if (errno == EINTR) + continue; + if (errno != EAGAIN && errno != ENOMEM) { + break; + } + i++; } } if (addr == MAP_FAILED) { -#if BH_ENABLE_TRACE_MMAP != 0 - os_printf("mmap failed\n"); -#endif + os_printf("mmap failed with errno: %d, hint: %p, size: %" PRIu64 + ", prot: %d, flags: %d", + errno, hint, request_size, map_prot, map_flags); return NULL; } From fed0fe953ca32894f51179c97d3dcc9175456a37 Mon Sep 17 00:00:00 2001 From: Huang Qi Date: Wed, 4 Sep 2024 12:08:25 +0800 Subject: [PATCH 10/24] compilation_on_nuttx.yml: Update checkout action to suppress warnings (#3765) --- .github/workflows/compilation_on_nuttx.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/compilation_on_nuttx.yml b/.github/workflows/compilation_on_nuttx.yml index 627ada8e..e10784fe 100644 --- a/.github/workflows/compilation_on_nuttx.yml +++ b/.github/workflows/compilation_on_nuttx.yml @@ -119,7 +119,7 @@ jobs: run: make -j$(nproc) EXTRAFLAGS=-Werror - name: Checkout Bloaty - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: google/bloaty submodules: recursive From 65521b188d9efcdfb689314707f50513c6a509b1 Mon Sep 17 00:00:00 2001 From: Marcin Kolny Date: Thu, 5 Sep 2024 04:10:18 +0100 Subject: [PATCH 11/24] Remove unnecessary code duplication in aot runtime (#3767) --- core/iwasm/aot/aot_runtime.c | 31 +++---------------------------- 1 file changed, 3 insertions(+), 28 deletions(-) diff --git a/core/iwasm/aot/aot_runtime.c b/core/iwasm/aot/aot_runtime.c index 3ca26114..bdb4ca91 100644 --- a/core/iwasm/aot/aot_runtime.c +++ b/core/iwasm/aot/aot_runtime.c @@ -3668,33 +3668,6 @@ aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index) return true; } -static inline void -aot_free_frame_internal(WASMExecEnv *exec_env) -{ - AOTFrame *cur_frame = (AOTFrame *)exec_env->cur_frame; - AOTFrame *prev_frame = cur_frame->prev_frame; - -#if WASM_ENABLE_PERF_PROFILING != 0 - uint64 time_elapsed = - (uintptr_t)os_time_thread_cputime_us() - cur_frame->time_started; - - cur_frame->func_perf_prof_info->total_exec_time += time_elapsed; - cur_frame->func_perf_prof_info->total_exec_cnt++; - - /* parent function */ - if (prev_frame) - prev_frame->func_perf_prof_info->children_exec_time += time_elapsed; -#endif - - exec_env->cur_frame = (struct WASMInterpFrame *)prev_frame; -} - -void -aot_free_frame(WASMExecEnv *exec_env) -{ - aot_free_frame_internal(exec_env); -} - #else /* else of WASM_ENABLE_GC == 0 */ bool @@ -3752,6 +3725,7 @@ aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index) frame->func_index = func_index; return true; } +#endif /* end of WASM_ENABLE_GC == 0 */ static inline void aot_free_frame_internal(WASMExecEnv *exec_env) @@ -3771,7 +3745,9 @@ aot_free_frame_internal(WASMExecEnv *exec_env) prev_frame->func_perf_prof_info->children_exec_time += time_elapsed; #endif +#if WASM_ENABLE_GC != 0 wasm_exec_env_free_wasm_frame(exec_env, cur_frame); +#endif exec_env->cur_frame = (struct WASMInterpFrame *)prev_frame; } @@ -3781,7 +3757,6 @@ aot_free_frame(WASMExecEnv *exec_env) aot_free_frame_internal(exec_env); } -#endif /* end of WASM_ENABLE_GC == 0 */ void aot_frame_update_profile_info(WASMExecEnv *exec_env, bool alloc_frame) From b4380fb3b10e5c5b59a3f10a6df4c5e78fbc4065 Mon Sep 17 00:00:00 2001 From: Marcin Kolny Date: Thu, 5 Sep 2024 09:18:47 +0100 Subject: [PATCH 12/24] refactoring: Re-use commit IP functionality between exception handling and other cases (#3768) --- core/iwasm/compilation/aot_compiler.c | 66 +++++++++++++-------- core/iwasm/compilation/aot_compiler.h | 9 +++ core/iwasm/compilation/aot_emit_exception.c | 45 +------------- 3 files changed, 54 insertions(+), 66 deletions(-) diff --git a/core/iwasm/compilation/aot_compiler.c b/core/iwasm/compilation/aot_compiler.c index 96ed8fac..bb6cf100 100644 --- a/core/iwasm/compilation/aot_compiler.c +++ b/core/iwasm/compilation/aot_compiler.c @@ -569,6 +569,46 @@ aot_gen_commit_values(AOTCompFrame *frame) return true; } +bool +aot_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, + LLVMValueRef ip_value, bool is_64bit) +{ + LLVMValueRef cur_frame = func_ctx->cur_frame; + LLVMValueRef value_offset, value_addr, value_ptr; + uint32 offset_ip; + + if (!comp_ctx->is_jit_mode) + offset_ip = comp_ctx->pointer_size * 4; + else + offset_ip = offsetof(WASMInterpFrame, ip); + + if (!(value_offset = I32_CONST(offset_ip))) { + aot_set_last_error("llvm build const failed"); + return false; + } + + if (!(value_addr = + LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, cur_frame, + &value_offset, 1, "ip_addr"))) { + aot_set_last_error("llvm build in bounds gep failed"); + return false; + } + + if (!(value_ptr = LLVMBuildBitCast( + comp_ctx->builder, value_addr, + is_64bit ? INT64_PTR_TYPE : INT32_PTR_TYPE, "ip_ptr"))) { + aot_set_last_error("llvm build bit cast failed"); + return false; + } + + if (!LLVMBuildStore(comp_ctx->builder, ip_value, value_ptr)) { + aot_set_last_error("llvm build store failed"); + return false; + } + + return true; +} + bool aot_gen_commit_sp_ip(AOTCompFrame *frame, bool commit_sp, bool commit_ip) { @@ -577,40 +617,19 @@ aot_gen_commit_sp_ip(AOTCompFrame *frame, bool commit_sp, bool commit_ip) LLVMValueRef cur_frame = func_ctx->cur_frame; LLVMValueRef value_offset, value_addr, value_ptr, value; LLVMTypeRef int8_ptr_ptr_type; - uint32 offset_ip, offset_sp, n; + uint32 offset_sp, n; bool is_64bit = (comp_ctx->pointer_size == sizeof(uint64)) ? true : false; const AOTValueSlot *sp = frame->sp; const uint8 *ip = frame->frame_ip; if (!comp_ctx->is_jit_mode) { - offset_ip = frame->comp_ctx->pointer_size * 4; offset_sp = frame->comp_ctx->pointer_size * 5; } else { - offset_ip = offsetof(WASMInterpFrame, ip); offset_sp = offsetof(WASMInterpFrame, sp); } if (commit_ip) { - if (!(value_offset = I32_CONST(offset_ip))) { - aot_set_last_error("llvm build const failed"); - return false; - } - - if (!(value_addr = - LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, cur_frame, - &value_offset, 1, "ip_addr"))) { - aot_set_last_error("llvm build in bounds gep failed"); - return false; - } - - if (!(value_ptr = LLVMBuildBitCast( - comp_ctx->builder, value_addr, - is_64bit ? INT64_PTR_TYPE : INT32_PTR_TYPE, "ip_ptr"))) { - aot_set_last_error("llvm build bit cast failed"); - return false; - } - if (!comp_ctx->is_jit_mode) { WASMModule *module = comp_ctx->comp_data->wasm_module; if (is_64bit) @@ -630,8 +649,7 @@ aot_gen_commit_sp_ip(AOTCompFrame *frame, bool commit_sp, bool commit_ip) return false; } - if (!LLVMBuildStore(comp_ctx->builder, value, value_ptr)) { - aot_set_last_error("llvm build store failed"); + if (!aot_gen_commit_ip(comp_ctx, func_ctx, value, is_64bit)) { return false; } } diff --git a/core/iwasm/compilation/aot_compiler.h b/core/iwasm/compilation/aot_compiler.h index ab74b7cb..d3d55b02 100644 --- a/core/iwasm/compilation/aot_compiler.h +++ b/core/iwasm/compilation/aot_compiler.h @@ -195,6 +195,15 @@ aot_gen_commit_values(AOTCompFrame *frame); bool aot_gen_commit_sp_ip(AOTCompFrame *frame, bool commit_sp, bool commit_ip); +/** + * Generate instructions to commit IP pointer to the frame. + * + * @param frame the frame information + */ +bool +aot_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, + LLVMValueRef ip_value, bool is_64bit); + bool aot_frame_store_value(AOTCompContext *comp_ctx, LLVMValueRef value, uint8 value_type, LLVMValueRef cur_frame, uint32 offset); diff --git a/core/iwasm/compilation/aot_emit_exception.c b/core/iwasm/compilation/aot_emit_exception.c index d3dcf719..968ee78b 100644 --- a/core/iwasm/compilation/aot_emit_exception.c +++ b/core/iwasm/compilation/aot_emit_exception.c @@ -4,49 +4,10 @@ */ #include "aot_emit_exception.h" +#include "aot_compiler.h" #include "../interpreter/wasm_runtime.h" #include "../aot/aot_runtime.h" -static bool -commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - LLVMValueRef exce_ip, bool is_64bit) -{ - LLVMValueRef cur_frame = func_ctx->cur_frame; - LLVMValueRef value_offset, value_addr, value_ptr; - uint32 offset_ip; - - if (!comp_ctx->is_jit_mode) - offset_ip = comp_ctx->pointer_size * 4; - else - offset_ip = offsetof(WASMInterpFrame, ip); - - if (!(value_offset = I32_CONST(offset_ip))) { - aot_set_last_error("llvm build const failed"); - return false; - } - - if (!(value_addr = - LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, cur_frame, - &value_offset, 1, "ip_addr"))) { - aot_set_last_error("llvm build in bounds gep failed"); - return false; - } - - if (!(value_ptr = LLVMBuildBitCast( - comp_ctx->builder, value_addr, - is_64bit ? INT64_PTR_TYPE : INT32_PTR_TYPE, "ip_ptr"))) { - aot_set_last_error("llvm build bit cast failed"); - return false; - } - - if (!LLVMBuildStore(comp_ctx->builder, exce_ip, value_ptr)) { - aot_set_last_error("llvm build store failed"); - return false; - } - - return true; -} - bool aot_emit_exception(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, int32 exception_id, bool is_cond_br, LLVMValueRef cond_br_if, @@ -90,8 +51,8 @@ aot_emit_exception(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, } /* Commit ip to current frame */ - if (!commit_ip(comp_ctx, func_ctx, func_ctx->exception_ip_phi, - is_64bit)) { + if (!aot_gen_commit_ip(comp_ctx, func_ctx, + func_ctx->exception_ip_phi, is_64bit)) { return false; } } From 6f97822c181bc9f481b31e292b7f357321ae927c Mon Sep 17 00:00:00 2001 From: Marcin Kolny Date: Thu, 5 Sep 2024 14:44:06 +0100 Subject: [PATCH 13/24] Add wamrc parameter to configure stack frame features (#3763) Those parameters can be used to reduce the size of the AOT code. There's going to be more changes related to AOT code size reduction, this is just the initial step. p.s. https://github.com/bytecodealliance/wasm-micro-runtime/issues/3758 --- core/iwasm/compilation/aot_compiler.c | 8 ++- core/iwasm/compilation/aot_emit_exception.c | 4 +- core/iwasm/compilation/aot_emit_function.c | 39 +++++++----- core/iwasm/compilation/aot_llvm.c | 2 + core/iwasm/compilation/aot_llvm.h | 3 + core/iwasm/include/aot_comp_option.h | 18 ++++++ core/iwasm/interpreter/wasm_loader.c | 1 + core/iwasm/interpreter/wasm_mini_loader.c | 1 + wamr-compiler/main.c | 70 +++++++++++++++++++++ 9 files changed, 127 insertions(+), 19 deletions(-) diff --git a/core/iwasm/compilation/aot_compiler.c b/core/iwasm/compilation/aot_compiler.c index bb6cf100..78b7da88 100644 --- a/core/iwasm/compilation/aot_compiler.c +++ b/core/iwasm/compilation/aot_compiler.c @@ -337,6 +337,10 @@ aot_gen_commit_values(AOTCompFrame *frame) LLVMValueRef value; uint32 n; + if (!frame->comp_ctx->call_stack_features.values) { + return true; + } + /* First, commit reference flags * For LLVM JIT, iterate all local and stack ref flags * For AOT, ignore local(params + locals) ref flags */ @@ -629,7 +633,7 @@ aot_gen_commit_sp_ip(AOTCompFrame *frame, bool commit_sp, bool commit_ip) offset_sp = offsetof(WASMInterpFrame, sp); } - if (commit_ip) { + if (commit_ip && comp_ctx->call_stack_features.ip) { if (!comp_ctx->is_jit_mode) { WASMModule *module = comp_ctx->comp_data->wasm_module; if (is_64bit) @@ -654,7 +658,7 @@ aot_gen_commit_sp_ip(AOTCompFrame *frame, bool commit_sp, bool commit_ip) } } - if (commit_sp) { + if (commit_sp && comp_ctx->call_stack_features.values) { n = (uint32)(sp - frame->lp); value = I32_CONST(offset_of_local(comp_ctx, n)); if (!value) { diff --git a/core/iwasm/compilation/aot_emit_exception.c b/core/iwasm/compilation/aot_emit_exception.c index 968ee78b..1527e83e 100644 --- a/core/iwasm/compilation/aot_emit_exception.c +++ b/core/iwasm/compilation/aot_emit_exception.c @@ -41,7 +41,7 @@ aot_emit_exception(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, return false; } - if (comp_ctx->aot_frame) { + if (comp_ctx->aot_frame && comp_ctx->call_stack_features.trap_ip) { /* Create exception ip phi */ if (!(func_ctx->exception_ip_phi = LLVMBuildPhi( comp_ctx->builder, is_64bit ? I64_TYPE : I32_TYPE, @@ -134,7 +134,7 @@ aot_emit_exception(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, /* Add phi incoming value to got_exception block */ LLVMAddIncoming(func_ctx->exception_id_phi, &exce_id, &block_curr, 1); - if (comp_ctx->aot_frame) { + if (comp_ctx->aot_frame && comp_ctx->call_stack_features.trap_ip) { const uint8 *ip = comp_ctx->aot_frame->frame_ip; LLVMValueRef exce_ip = NULL; diff --git a/core/iwasm/compilation/aot_emit_function.c b/core/iwasm/compilation/aot_emit_function.c index 8f6e3e45..1d565b6c 100644 --- a/core/iwasm/compilation/aot_emit_function.c +++ b/core/iwasm/compilation/aot_emit_function.c @@ -682,24 +682,29 @@ alloc_frame_for_aot_func(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, new_frame = wasm_stack_top; - if (!(check_wasm_stack_succ = LLVMAppendBasicBlockInContext( - comp_ctx->context, func_ctx->func, "check_wasm_stack_succ"))) { - aot_set_last_error("llvm add basic block failed."); - return false; - } + if (comp_ctx->call_stack_features.bounds_checks) { + if (!(check_wasm_stack_succ = LLVMAppendBasicBlockInContext( + comp_ctx->context, func_ctx->func, + "check_wasm_stack_succ"))) { + aot_set_last_error("llvm add basic block failed."); + return false; + } - LLVMMoveBasicBlockAfter(check_wasm_stack_succ, - LLVMGetInsertBlock(comp_ctx->builder)); + LLVMMoveBasicBlockAfter(check_wasm_stack_succ, + LLVMGetInsertBlock(comp_ctx->builder)); - if (!(cmp = LLVMBuildICmp(comp_ctx->builder, LLVMIntUGT, wasm_stack_top_max, - wasm_stack_top_bound, "cmp"))) { - aot_set_last_error("llvm build icmp failed"); - return false; - } + if (!(cmp = LLVMBuildICmp(comp_ctx->builder, LLVMIntUGT, + wasm_stack_top_max, wasm_stack_top_bound, + "cmp"))) { + aot_set_last_error("llvm build icmp failed"); + return false; + } - if (!(aot_emit_exception(comp_ctx, func_ctx, EXCE_OPERAND_STACK_OVERFLOW, - true, cmp, check_wasm_stack_succ))) { - return false; + if (!(aot_emit_exception(comp_ctx, func_ctx, + EXCE_OPERAND_STACK_OVERFLOW, true, cmp, + check_wasm_stack_succ))) { + return false; + } } #if WASM_ENABLE_GC != 0 @@ -1285,6 +1290,10 @@ commit_params_to_frame_of_import_func(AOTCompContext *comp_ctx, { uint32 i, n; + if (!comp_ctx->call_stack_features.values) { + return true; + } + for (i = 0, n = 0; i < func_type->param_count; i++, n++) { switch (func_type->types[i]) { case VALUE_TYPE_I32: diff --git a/core/iwasm/compilation/aot_llvm.c b/core/iwasm/compilation/aot_llvm.c index 39f64d81..3346086a 100644 --- a/core/iwasm/compilation/aot_llvm.c +++ b/core/iwasm/compilation/aot_llvm.c @@ -2580,6 +2580,8 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option) if (option->enable_aux_stack_frame) comp_ctx->enable_aux_stack_frame = true; + comp_ctx->call_stack_features = option->call_stack_features; + if (option->enable_perf_profiling) comp_ctx->enable_perf_profiling = true; diff --git a/core/iwasm/compilation/aot_llvm.h b/core/iwasm/compilation/aot_llvm.h index 270e5ae4..65debbaa 100644 --- a/core/iwasm/compilation/aot_llvm.h +++ b/core/iwasm/compilation/aot_llvm.h @@ -412,6 +412,9 @@ typedef struct AOTCompContext { /* Generate auxiliary stack frame */ bool enable_aux_stack_frame; + /* Auxiliary call stack features */ + AOTCallStackFeatures call_stack_features; + /* Function performance profiling */ bool enable_perf_profiling; diff --git a/core/iwasm/include/aot_comp_option.h b/core/iwasm/include/aot_comp_option.h index 617b68f9..4ab2e6ab 100644 --- a/core/iwasm/include/aot_comp_option.h +++ b/core/iwasm/include/aot_comp_option.h @@ -6,6 +6,23 @@ #ifndef __AOT_COMP_OPTION_H__ #define __AOT_COMP_OPTION_H__ +typedef struct { + /* Enables or disables bounds checks for stack frames. When enabled, the AOT + * compiler generates code to check if the stack pointer is within the + * bounds of the current stack frame (and if not, traps). */ + bool bounds_checks; + + /* Enables or disables instruction pointer (IP) tracking.*/ + bool ip; + + /* Enables or disables tracking instruction pointer of a trap. Only takes + * effect when `ip` is enabled.*/ + bool trap_ip; + + /* Enables or disables parameters, locals and stack operands. */ + bool values; +} AOTCallStackFeatures; + typedef struct AOTCompOption { bool is_jit_mode; bool is_indirect_mode; @@ -22,6 +39,7 @@ typedef struct AOTCompOption { bool enable_gc; bool enable_aux_stack_check; bool enable_aux_stack_frame; + AOTCallStackFeatures call_stack_features; bool enable_perf_profiling; bool enable_memory_profiling; bool disable_llvm_intrinsics; diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 13947ac8..092e0d15 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -5407,6 +5407,7 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf, #if WASM_ENABLE_PERF_PROFILING != 0 || WASM_ENABLE_DUMP_CALL_STACK != 0 \ || WASM_ENABLE_AOT_STACK_FRAME != 0 option.enable_aux_stack_frame = true; + memset(&option.call_stack_features, 1, sizeof(AOTCallStackFeatures)); #endif #if WASM_ENABLE_PERF_PROFILING != 0 option.enable_perf_profiling = true; diff --git a/core/iwasm/interpreter/wasm_mini_loader.c b/core/iwasm/interpreter/wasm_mini_loader.c index 8826f98d..a21f4490 100644 --- a/core/iwasm/interpreter/wasm_mini_loader.c +++ b/core/iwasm/interpreter/wasm_mini_loader.c @@ -2149,6 +2149,7 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf, #if WASM_ENABLE_PERF_PROFILING != 0 || WASM_ENABLE_DUMP_CALL_STACK != 0 \ || WASM_ENABLE_AOT_STACK_FRAME != 0 option.enable_aux_stack_frame = true; + memset(&option.call_stack_features, 1, sizeof(AOTCallStackFeatures)); #endif #if WASM_ENABLE_PERF_PROFILING != 0 option.enable_perf_profiling = true; diff --git a/wamr-compiler/main.c b/wamr-compiler/main.c index bd9e5435..3c7ef1f4 100644 --- a/wamr-compiler/main.c +++ b/wamr-compiler/main.c @@ -162,6 +162,12 @@ print_help() printf(" GC is enabled\n"); printf(" --disable-aux-stack-check Disable auxiliary stack overflow/underflow check\n"); printf(" --enable-dump-call-stack Enable stack trace feature\n"); + printf(" --call-stack-features=\n"); + printf(" A comma-separated list of features when generating call stacks.\n"); + printf(" By default, all features are enabled. To disable all features,\n"); + printf(" provide an empty list (i.e. --call-stack-features=). This flag\n"); + printf(" only only takes effect when --enable-dump-call-stack is set.\n"); + printf(" Available features: bounds-checks, ip, trap-ip, values.\n"); printf(" --enable-perf-profiling Enable function performance profiling\n"); printf(" --enable-memory-profiling Enable memory usage profiling\n"); printf(" --xip A shorthand of --enable-indirect-mode --disable-llvm-intrinsics\n"); @@ -259,6 +265,48 @@ split_string(char *str, int *count, const char *delimer) return res; } +static bool +parse_call_stack_features(char *features_str, + AOTCallStackFeatures *out_features) +{ + int size = 0; + char **features; + bool ret = true; + + bh_assert(features_str); + bh_assert(out_features); + + /* non-empty feature list */ + features = split_string(features_str, &size, ","); + if (!features) { + return false; + } + + while (size--) { + if (!strcmp(features[size], "bounds-checks")) { + out_features->bounds_checks = true; + } + else if (!strcmp(features[size], "ip")) { + out_features->ip = true; + } + else if (!strcmp(features[size], "trap-ip")) { + out_features->trap_ip = true; + } + else if (!strcmp(features[size], "values")) { + out_features->values = true; + } + else { + ret = false; + printf("Unsupported feature %s\n", features[size]); + goto finish; + } + } + +finish: + free(features); + return ret; +} + static uint32 resolve_segue_flags(char *str_flags) { @@ -356,6 +404,9 @@ main(int argc, char *argv[]) option.enable_ref_types = true; option.enable_gc = false; + /* Set all the features to true by default */ + memset(&option.call_stack_features, 1, sizeof(AOTCallStackFeatures)); + /* Process options */ for (argc--, argv++; argc > 0 && argv[0][0] == '-'; argc--, argv++) { if (!strcmp(argv[0], "-o")) { @@ -470,6 +521,19 @@ main(int argc, char *argv[]) else if (!strcmp(argv[0], "--enable-dump-call-stack")) { option.enable_aux_stack_frame = true; } + else if (!strncmp(argv[0], "--call-stack-features=", 22)) { + /* Reset all the features, only enable the user-defined ones */ + memset(&option.call_stack_features, 0, + sizeof(AOTCallStackFeatures)); + + if (argv[0][22] != '\0') { + if (!parse_call_stack_features(argv[0] + 22, + &option.call_stack_features)) { + printf("Failed to parse call-stack-features\n"); + PRINT_HELP_AND_EXIT(); + } + } + } else if (!strcmp(argv[0], "--enable-perf-profiling")) { option.enable_aux_stack_frame = true; option.enable_perf_profiling = true; @@ -608,6 +672,12 @@ main(int argc, char *argv[]) #endif } + if (option.enable_gc && !option.call_stack_features.values) { + LOG_WARNING("Call stack feature 'values' must be enabled for GC. The " + "feature will be enabled automatically."); + option.call_stack_features.values = true; + } + if (sgx_mode) { option.size_level = 1; option.is_sgx_platform = true; From b38a2e88a237f2abe54b73a5dacde7b552f39e54 Mon Sep 17 00:00:00 2001 From: Matt Gabrenya Date: Thu, 5 Sep 2024 21:01:54 -0600 Subject: [PATCH 14/24] Fix building iwasm_shared and iwasm_static libs on win32 (#3762) Fixes to enable building iwasm_shared and iwasm_static libraries on win32. --- CMakeLists.txt | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c7f76668..0531ec41 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -121,10 +121,14 @@ set (WAMR_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}) include (${WAMR_ROOT_DIR}/build-scripts/runtime_lib.cmake) -set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wformat -Wformat-security -Wshadow -Wno-unused-parameter -fvisibility=hidden") -# set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wconversion -Wsign-conversion") - -set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wformat -Wformat-security -Wno-unused") +if (NOT WIN32) + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wformat -Wformat-security \ + -ffunction-sections -fdata-sections \ + -Wno-unused-parameter -Wno-pedantic") + # Remove the extra spaces for better make log + string (REGEX REPLACE " *" " " CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wformat -Wformat-security -Wno-unused") +endif() if (WAMR_BUILD_TARGET MATCHES "X86_.*" OR WAMR_BUILD_TARGET STREQUAL "AMD_64") if (NOT (CMAKE_C_COMPILER MATCHES ".*clang.*" OR CMAKE_C_COMPILER_ID MATCHES ".*Clang")) @@ -145,6 +149,10 @@ include (${SHARED_DIR}/utils/uncommon/shared_uncommon.cmake) set (THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) +if (MSVC) + add_definitions(-DCOMPILING_WASM_RUNTIME_API=1) +endif () + # STATIC LIBRARY if (WAMR_BUILD_STATIC) add_library(iwasm_static STATIC ${WAMR_RUNTIME_LIB_SOURCE}) @@ -155,6 +163,14 @@ if (WAMR_BUILD_STATIC) target_link_libraries(iwasm_static INTERFACE boringssl_crypto) endif () + if (MINGW) + target_link_libraries (iwasm_static PRIVATE ws2_32) + endif () + + if (WIN32) + target_link_libraries(iwasm_static PRIVATE ntdll) + endif() + install (TARGETS iwasm_static ARCHIVE DESTINATION lib) endif () @@ -169,9 +185,14 @@ if (WAMR_BUILD_SHARED) endif () if (MINGW) - target_link_libraries (iwasm_shared INTERFACE -lWs2_32 -lwsock32) + target_link_libraries(iwasm_shared INTERFACE -lWs2_32 -lwsock32) + target_link_libraries(iwasm_shared PRIVATE ws2_32) endif () + if (WIN32) + target_link_libraries(iwasm_shared PRIVATE ntdll) + endif() + install (TARGETS iwasm_shared LIBRARY DESTINATION lib) endif () From cb71ca5822cbfe130920d2a2b89c0c35ca56ac3c Mon Sep 17 00:00:00 2001 From: Huang Qi Date: Mon, 9 Sep 2024 21:58:07 +0800 Subject: [PATCH 15/24] CI: Disable parallel test in spectest for NuttX (#3780) --- .github/workflows/spec_test_on_nuttx.yml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/.github/workflows/spec_test_on_nuttx.yml b/.github/workflows/spec_test_on_nuttx.yml index f2e59ba6..2e940b0e 100644 --- a/.github/workflows/spec_test_on_nuttx.yml +++ b/.github/workflows/spec_test_on_nuttx.yml @@ -327,19 +327,6 @@ jobs: working-directory: apps/interpreters/wamr/wamr - name: Test - if: matrix.target_config.target != 'xtensa' - run: | - cd apps/interpreters/wamr/wamr/tests/wamr-test-suites - ./test_wamr.sh -s spec ${{ matrix.wamr_test_option.mode }} -m ${{ matrix.target_config.target }} -b -Q -P -F ${{ steps.build_firmware_path.outputs.firmware }} ${{ matrix.wamr_feature_option.mode}} - - # for xtensa, for some reasons, when running the tests - # with test_wamr.sh -P, nuttx occasionally hangs after - # "total segments stored 6" on the CI. - # i (yamamoto) couldn't reproduce it locally (macOS) even - # with the identical flash image. - # for now, run the tests without -P. - - name: Test - if: matrix.target_config.target == 'xtensa' run: | cd apps/interpreters/wamr/wamr/tests/wamr-test-suites ./test_wamr.sh -s spec ${{ matrix.wamr_test_option.mode }} -m ${{ matrix.target_config.target }} -b -Q -F ${{ steps.build_firmware_path.outputs.firmware }} ${{ matrix.wamr_feature_option.mode}} From 059935126256e317c1a8abcedf4d560f10f3bec7 Mon Sep 17 00:00:00 2001 From: "liang.he" Date: Tue, 10 Sep 2024 08:45:18 +0800 Subject: [PATCH 16/24] wasi-nn: Add a new target for llama.cpp as a wasi-nn backend (#3709) Minimum support: - [x] accept (WasmEdge) customized model parameters. metadata. - [x] Target [wasmedge-ggml examples](https://github.com/second-state/WasmEdge-WASINN-examples/tree/master/wasmedge-ggml) - [x] basic - [x] chatml - [x] gemma - [x] llama - [x] qwen --- In the future, to support if required: - [ ] Target [wasmedge-ggml examples](https://github.com/second-state/WasmEdge-WASINN-examples/tree/master/wasmedge-ggml) - [ ] command-r. (>70G memory requirement) - [ ] embedding. (embedding mode) - [ ] grammar. (use the grammar option to constrain the model to generate the JSON output) - [ ] llama-stream. (new APIS `compute_single`, `get_output_single`, `fini_single`) - [ ] llava. (image representation) - [ ] llava-base64-stream. (image representation) - [ ] multimodel. (image representation) - [ ] Target [llamaedge](https://github.com/LlamaEdge/LlamaEdge) --- build-scripts/config_common.cmake | 8 +- core/iwasm/libraries/wasi-nn/README.md | 40 +- .../libraries/wasi-nn/cmake/Findcjson.cmake | 17 + .../wasi-nn/cmake/Findllamacpp.cmake | 18 + .../wasi-nn/cmake/Findtensorflow_lite.cmake | 58 +- .../libraries/wasi-nn/cmake/wasi_nn.cmake | 80 ++- .../libraries/wasi-nn/include/wasi_nn_types.h | 9 + core/iwasm/libraries/wasi-nn/src/wasi_nn.c | 137 +++- .../libraries/wasi-nn/src/wasi_nn_llamacpp.c | 601 ++++++++++++++++++ .../wasi-nn/test/Dockerfile.wasi-nn-smoke | 43 +- .../libraries/wasi-nn/test/run_smoke_test.py | 60 ++ 11 files changed, 949 insertions(+), 122 deletions(-) create mode 100644 core/iwasm/libraries/wasi-nn/cmake/Findcjson.cmake create mode 100644 core/iwasm/libraries/wasi-nn/cmake/Findllamacpp.cmake create mode 100644 core/iwasm/libraries/wasi-nn/src/wasi_nn_llamacpp.c diff --git a/build-scripts/config_common.cmake b/build-scripts/config_common.cmake index 252ba3a8..12fc06bd 100644 --- a/build-scripts/config_common.cmake +++ b/build-scripts/config_common.cmake @@ -442,7 +442,9 @@ if (WAMR_BUILD_WASI_NN EQUAL 1) message (" WASI-NN enabled") add_definitions (-DWASM_ENABLE_WASI_NN=1) # Variant backends - if (NOT WAMR_BUILD_WASI_NN_TFLITE EQUAL 1 AND NOT WAMR_BUILD_WASI_NN_OPENVINO EQUAL 1) + if (NOT WAMR_BUILD_WASI_NN_TFLITE EQUAL 1 AND + NOT WAMR_BUILD_WASI_NN_OPENVINO EQUAL 1 AND + NOT WAMR_BUILD_WASI_NN_LLAMACPP EQUAL 1) message (FATAL_ERROR " Need to select a backend for WASI-NN") endif () @@ -454,6 +456,10 @@ if (WAMR_BUILD_WASI_NN EQUAL 1) message (" WASI-NN: backend openvino enabled") add_definitions (-DWASM_ENABLE_WASI_NN_OPENVINO) endif () + if (WAMR_BUILD_WASI_NN_LLAMACPP EQUAL 1) + message (" WASI-NN: backend llamacpp enabled") + add_definitions (-DWASM_ENABLE_WASI_NN_LLAMACPP) + endif () # Variant devices if (WAMR_BUILD_WASI_NN_ENABLE_GPU EQUAL 1) message (" WASI-NN: GPU enabled") diff --git a/core/iwasm/libraries/wasi-nn/README.md b/core/iwasm/libraries/wasi-nn/README.md index e0d3a25c..5536f6d5 100644 --- a/core/iwasm/libraries/wasi-nn/README.md +++ b/core/iwasm/libraries/wasi-nn/README.md @@ -4,7 +4,7 @@ ### Host -Enable WASI-NN in the WAMR by spefiying it in the cmake building configuration as follows, +Enable WASI-NN in the WAMR by specifying it in the cmake building configuration as follows, ```cmake set (WAMR_BUILD_WASI_NN 1) @@ -17,14 +17,15 @@ $ cmake -DWAMR_BUILD_WASI_NN=1 ... ``` > ![Caution] -> If enable `WAMR_BUID_WASI_NN`, iwasm will link a shared WAMR library instead of a static one. Wasi-nn backends will be loaded dynamically at runtime. Users shall specify the path of the backend library and register it to the iwasm runtime with `--native-lib=`. All shared libraries should be placed in the `LD_LIBRARY_PATH`. +> Enabling WAMR_BUILD_WASI_NN will cause the IWASM to link to a shared WAMR library instead of a static one. The WASI-NN backends will then be loaded dynamically when the program is run. You must ensure that all shared libraries are included in the `LD_LIBRARY_PATH`. #### Compilation options -- `WAMR_BUILD_WASI_NN`. enable wasi-nn support. can't work alone. need to identify a backend. Match legacy wasi-nn spec naming convention. use `wasi_nn` as import module names. -- `WAMR_BUILD_WASI_EPHEMERAL_NN`. Match latest wasi-nn spec naming convention. use `wasi_ephemeral_nn` as import module names. -- `WAMR_BUILD_WASI_NN_TFLITE`. identify the backend as TensorFlow Lite. -- `WAMR_BUILD_WASI_NN_OPENVINO`. identify the backend as OpenVINO. +- `WAMR_BUILD_WASI_NN`. This option enables support for WASI-NN. It cannot function independently and requires specifying a backend. It follows the original WASI-NN specification for naming conventions and uses wasi_nn for import module names. +- `WAMR_BUILD_WASI_EPHEMERAL_NN`. This option adheres to the most recent WASI-NN specification for naming conventions and uses wasi_ephemeral_nn for import module names. +- `WAMR_BUILD_WASI_NN_TFLITE`. This option designates TensorFlow Lite as the backend. +- `WAMR_BUILD_WASI_NN_OPENVINO`. This option designates OpenVINO as the backend. +- `WAMR_BUILD_WASI_NN_LLAMACPP`. This option designates Llama.cpp as the backend. ### Wasm @@ -44,7 +45,7 @@ typedef enum { fp16 = 0, fp32, up8, ip32 } tensor_type; It is required to recompile the Wasm application if you want to switch between the two sets of functions. -#### Openvino +#### Openvino installation If you're planning to use OpenVINO backends, the first step is to install OpenVINO on your computer. To do this correctly, please follow the official installation guide which you can find at this link: https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-archive-linux.html. @@ -162,17 +163,9 @@ Supported: ### Testing with WasmEdge-WASINN Examples -To ensure everything is set up correctly, use the examples from [WasmEdge-WASINN-examples](https://github.com/second-state/WasmEdge-WASINN-examples/tree/master). These examples help verify that WASI-NN support in WAMR is functioning as expected. +To make sure everything is configured properly, refer to the examples provided at [WasmEdge-WASINN-examples](https://github.com/second-state/WasmEdge-WASINN-examples/tree/master). These examples are useful for confirming that the WASI-NN support in WAMR is working correctly. -> Note: The repository contains two types of examples. Some use the [standard wasi-nn](https://github.com/WebAssembly/wasi-nn), while others use [WasmEdge's version of wasi-nn](https://github.com/second-state/wasmedge-wasi-nn), which is enhanced to meet specific customer needs. - -The examples test the following machine learning backends: - -- OpenVINO -- PyTorch -- TensorFlow Lite - -Due to the different requirements of each backend, we'll use a Docker container for a hassle-free testing environment. +Because each backend has its own set of requirements, we recommend using a Docker container to create a straightforward testing environment without complications. #### Prepare the execution environment @@ -186,9 +179,20 @@ $ docker build -t wasi-nn-smoke:v1.0 -f ./core/iwasm/libraries/wasi-nn/test/Dock #### Execute ```bash +$ pwd +/workspaces/wasm-micro-runtime/ $ docker run --rm wasi-nn-smoke:v1.0 ``` -### Testing with bytecodealliance wasi-nn +It should be noted that the qwen example is selected as the default one about the Llama.cpp backend because it uses a small model and is easy to run. + +```bash +- openvino_mobile_image. PASS +- openvino_mobile_raw. PASS +- openvino_road_segmentation_adas. PASS +- wasmedge_ggml_qwen. PASS +``` + +### Testing with bytecodealliance WASI-NN For another example, check out [classification-example](https://github.com/bytecodealliance/wasi-nn/tree/main/rust/examples/classification-example), which focuses on OpenVINO. You can run it using the same Docker container mentioned above. diff --git a/core/iwasm/libraries/wasi-nn/cmake/Findcjson.cmake b/core/iwasm/libraries/wasi-nn/cmake/Findcjson.cmake new file mode 100644 index 00000000..1136f41a --- /dev/null +++ b/core/iwasm/libraries/wasi-nn/cmake/Findcjson.cmake @@ -0,0 +1,17 @@ +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +include(FetchContent) + +set(CJSON_SOURCE_DIR "${WAMR_ROOT_DIR}/core/deps/cjson") + +FetchContent_Declare( + cjson + GIT_REPOSITORY https://github.com/DaveGamble/cJSON.git + GIT_TAG v1.7.18 + SOURCE_DIR ${CJSON_SOURCE_DIR} +) + +set(ENABLE_CJSON_TEST OFF CACHE INTERNAL "Turn off tests") +set(ENABLE_CJSON_UNINSTALL OFF CACHE INTERNAL "Turn off uninstall to avoid targets conflict") +FetchContent_MakeAvailable(cjson) diff --git a/core/iwasm/libraries/wasi-nn/cmake/Findllamacpp.cmake b/core/iwasm/libraries/wasi-nn/cmake/Findllamacpp.cmake new file mode 100644 index 00000000..431e15db --- /dev/null +++ b/core/iwasm/libraries/wasi-nn/cmake/Findllamacpp.cmake @@ -0,0 +1,18 @@ +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +include(FetchContent) + +set(LLAMA_SOURCE_DIR "${WAMR_ROOT_DIR}/core/deps/llama.cpp") + +FetchContent_Declare( + llamacpp + GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git + GIT_TAG b3573 + SOURCE_DIR ${LLAMA_SOURCE_DIR} +) + +set(LLAMA_BUILD_TESTS OFF) +set(LLAMA_BUILD_EXAMPLES OFF) +set(LLAMA_BUILD_SERVER OFF) +FetchContent_MakeAvailable(llamacpp) diff --git a/core/iwasm/libraries/wasi-nn/cmake/Findtensorflow_lite.cmake b/core/iwasm/libraries/wasi-nn/cmake/Findtensorflow_lite.cmake index 052dd980..39480741 100644 --- a/core/iwasm/libraries/wasi-nn/cmake/Findtensorflow_lite.cmake +++ b/core/iwasm/libraries/wasi-nn/cmake/Findtensorflow_lite.cmake @@ -1,47 +1,25 @@ # Copyright (C) 2019 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -find_library(TENSORFLOW_LITE - NAMES tensorflow-lite - HINTS ${CMAKE_CURRENT_BINARY_DIR}/tensorflow-lite - NO_DEFAULT_PATHS +include(FetchContent) + +set(TFLITE_SOURCE_DIR "${WAMR_ROOT_DIR}/core/deps/tensorflow-src") + +FetchContent_Declare( + tensorflow_lite + GIT_REPOSITORY https://github.com/tensorflow/tensorflow.git + GIT_TAG v2.12.0 + GIT_SHALLOW ON + GIT_PROGRESS ON + SOURCE_DIR ${TFLITE_SOURCE_DIR} + SOURCE_SUBDIR tensorflow/lite ) -if(NOT TENSORFLOW_LITE) - if(NOT EXISTS "${WAMR_ROOT_DIR}/core/deps/tensorflow-src") - execute_process( - COMMAND "${WAMR_ROOT_DIR}/core/deps/install_tensorflow.sh" - RESULT_VARIABLE TENSORFLOW_RESULT - ) - else() - message("Tensorflow is already downloaded.") - endif() - - set(TENSORFLOW_SOURCE_DIR "${WAMR_ROOT_DIR}/core/deps/tensorflow-src") - - if(WAMR_BUILD_WASI_NN_ENABLE_GPU EQUAL 1) - # Tensorflow specific: - # * https://www.tensorflow.org/lite/guide/build_cmake#available_options_to_build_tensorflow_lite - set (TFLITE_ENABLE_GPU ON) - endif() - - if (CMAKE_SIZEOF_VOID_P EQUAL 4) - set (TFLITE_ENABLE_XNNPACK OFF) - endif() - - add_subdirectory( - "${TENSORFLOW_SOURCE_DIR}/tensorflow/lite" - "${CMAKE_CURRENT_BINARY_DIR}/tensorflow-lite" - EXCLUDE_FROM_ALL - ) -else () - message(STATUS "TensorFlow Lite library found: ${TENSORFLOW_LITE}") - set(TENSORFLOW_SOURCE_DIR "${WAMR_ROOT_DIR}/core/deps/tensorflow-src") +if(WAMR_BUILD_WASI_NN_ENABLE_GPU EQUAL 1) + set(TFLITE_ENABLE_GPU ON) +endif() +if (CMAKE_SIZEOF_VOID_P EQUAL 4) + set(TFLITE_ENABLE_XNNPACK OFF) endif() -set(TENSORFLOW_LITE_INCLUDE_DIR "${TENSORFLOW_SOURCE_DIR}/tensorflow/lite") -set(FLATBUFFER_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/flatbuffers/include") - -include_directories(${TENSORFLOW_SOURCE_DIR}) -include_directories(${FLATBUFFER_INCLUDE_DIR}) -link_directories(${CMAKE_CURRENT_BINARY_DIR}/tensorflow-lite) +FetchContent_MakeAvailable(tensorflow_lite) diff --git a/core/iwasm/libraries/wasi-nn/cmake/wasi_nn.cmake b/core/iwasm/libraries/wasi-nn/cmake/wasi_nn.cmake index e2ad257e..a903f0af 100644 --- a/core/iwasm/libraries/wasi-nn/cmake/wasi_nn.cmake +++ b/core/iwasm/libraries/wasi-nn/cmake/wasi_nn.cmake @@ -3,27 +3,6 @@ list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}) -if(WAMR_BUILD_WASI_NN_TFLITE EQUAL 1) - # Find tensorflow-lite - find_package(tensorflow_lite REQUIRED) -endif() - -if(WAMR_BUILD_WASI_NN_OPENVINO EQUAL 1) - if(NOT DEFINED ENV{OpenVINO_DIR}) - message(FATAL_ERROR - "OpenVINO_DIR is not defined. " - "Please follow https://docs.openvino.ai/2024/get-started/install-openvino.html," - "install openvino, and set environment variable OpenVINO_DIR." - "Like OpenVINO_DIR=/usr/lib/openvino-2023.2/ cmake ..." - "Or OpenVINO_DIR=/opt/intel/openvino/ cmake ..." - ) - endif() - - list(APPEND CMAKE_MODULE_PATH $ENV{OpenVINO_DIR}) - # Find OpenVINO - find_package(OpenVINO REQUIRED COMPONENTS Runtime) -endif() - # # wasi-nn general set(WASI_NN_ROOT ${CMAKE_CURRENT_LIST_DIR}/..) @@ -42,22 +21,46 @@ add_compile_definitions( # # - tflite if(WAMR_BUILD_WASI_NN_TFLITE EQUAL 1) + find_package(tensorflow_lite REQUIRED) + add_library( wasi_nn_tflite SHARED ${WASI_NN_ROOT}/src/wasi_nn_tensorflowlite.cpp ) + target_include_directories( + wasi_nn_tflite + PUBLIC + ${tensorflow_lite_SOURCE_DIR} + ) + target_link_libraries( wasi_nn_tflite PUBLIC libiwasm tensorflow-lite ) + + install(TARGETS wasi_nn_tflite DESTINATION lib) endif() # - openvino if(WAMR_BUILD_WASI_NN_OPENVINO EQUAL 1) + if(NOT DEFINED ENV{OpenVINO_DIR}) + message(FATAL_ERROR + "OpenVINO_DIR is not defined. " + "Please follow https://docs.openvino.ai/2024/get-started/install-openvino.html," + "install openvino, and set environment variable OpenVINO_DIR." + "Like OpenVINO_DIR=/usr/lib/openvino-2023.2/ cmake ..." + "Or OpenVINO_DIR=/opt/intel/openvino/ cmake ..." + ) + endif() + + list(APPEND CMAKE_MODULE_PATH $ENV{OpenVINO_DIR}) + # Find OpenVINO + find_package(OpenVINO REQUIRED COMPONENTS Runtime) + add_library( wasi_nn_openvino SHARED @@ -71,4 +74,37 @@ if(WAMR_BUILD_WASI_NN_OPENVINO EQUAL 1) openvino::runtime openvino::runtime::c ) -endif() \ No newline at end of file + + install(TARGETS wasi_nn_openvino DESTINATION lib) +endif() + +# - llamacpp + +if(WAMR_BUILD_WASI_NN_LLAMACPP EQUAL 1) + find_package(cjson REQUIRED) + find_package(llamacpp REQUIRED) + + add_library( + wasi_nn_llamacpp + SHARED + ${WASI_NN_ROOT}/src/wasi_nn_llamacpp.c + ) + + target_include_directories( + wasi_nn_llamacpp + PUBLIC + ${cjson_SOURCE_DIR} + ) + + target_link_libraries( + wasi_nn_llamacpp + PUBLIC + libiwasm + cjson + common + ggml + llama + ) + + install(TARGETS wasi_nn_llamacpp DESTINATION lib) +endif() diff --git a/core/iwasm/libraries/wasi-nn/include/wasi_nn_types.h b/core/iwasm/libraries/wasi-nn/include/wasi_nn_types.h index d36f5977..3ac694fc 100644 --- a/core/iwasm/libraries/wasi-nn/include/wasi_nn_types.h +++ b/core/iwasm/libraries/wasi-nn/include/wasi_nn_types.h @@ -43,6 +43,11 @@ typedef enum { security, // The operation failed for an unspecified reason. unknown, + // for WasmEdge-wasi-nn + end_of_sequence = 100, // End of Sequence Found. + context_full = 101, // Context Full. + prompt_tool_long = 102, // Prompt Too Long. + model_not_found = 103, // Model Not Found. } wasi_nn_error; /** @@ -140,6 +145,9 @@ typedef uint32_t graph_execution_context; typedef wasi_nn_error (*LOAD)(void *, graph_builder_array *, graph_encoding, execution_target, graph *); typedef wasi_nn_error (*LOAD_BY_NAME)(void *, const char *, uint32_t, graph *); +typedef wasi_nn_error (*LOAD_BY_NAME_WITH_CONFIG)(void *, const char *, + uint32_t, void *, uint32_t, + graph *); typedef wasi_nn_error (*INIT_EXECUTION_CONTEXT)(void *, graph, graph_execution_context *); typedef wasi_nn_error (*SET_INPUT)(void *, graph_execution_context, uint32_t, @@ -154,6 +162,7 @@ typedef wasi_nn_error (*BACKEND_DEINITIALIZE)(void *); typedef struct { LOAD load; LOAD_BY_NAME load_by_name; + LOAD_BY_NAME_WITH_CONFIG load_by_name_with_config; INIT_EXECUTION_CONTEXT init_execution_context; SET_INPUT set_input; COMPUTE compute; diff --git a/core/iwasm/libraries/wasi-nn/src/wasi_nn.c b/core/iwasm/libraries/wasi-nn/src/wasi_nn.c index 0d56981f..4697e931 100644 --- a/core/iwasm/libraries/wasi-nn/src/wasi_nn.c +++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn.c @@ -29,7 +29,7 @@ struct backends_api_functions { void *backend_handle; api_function functions; -} lookup[autodetect] = { 0 }; +} lookup[autodetect + 1] = { 0 }; #define call_wasi_nn_func(backend_encoding, func, wasi_error, ...) \ do { \ @@ -168,14 +168,7 @@ wasi_nn_destroy() lookup[i].backend_handle = NULL; } - lookup[i].functions.init = NULL; - lookup[i].functions.deinit = NULL; - lookup[i].functions.load = NULL; - lookup[i].functions.load_by_name = NULL; - lookup[i].functions.init_execution_context = NULL; - lookup[i].functions.set_input = NULL; - lookup[i].functions.compute = NULL; - lookup[i].functions.get_output = NULL; + memset(&lookup[i].functions, 0, sizeof(api_function)); } } @@ -208,6 +201,10 @@ choose_a_backend() return ggml; } +#ifndef NDEBUG + NN_WARN_PRINTF("%s", dlerror()); +#endif + handle = dlopen(OPENVINO_BACKEND_LIB, RTLD_LAZY); if (handle) { NN_INFO_PRINTF("Using openvino backend"); @@ -215,6 +212,10 @@ choose_a_backend() return openvino; } +#ifndef NDEBUG + NN_WARN_PRINTF("%s", dlerror()); +#endif + handle = dlopen(TFLITE_BACKEND_LIB, RTLD_LAZY); if (handle) { NN_INFO_PRINTF("Using tflite backend"); @@ -222,6 +223,11 @@ choose_a_backend() return tensorflowlite; } +#ifndef NDEBUG + NN_WARN_PRINTF("%s", dlerror()); +#endif + + NN_WARN_PRINTF("No backend found"); return unknown_backend; } @@ -257,6 +263,14 @@ register_backend(void *handle, api_function *functions) } functions->load_by_name = load_by_name; + LOAD_BY_NAME_WITH_CONFIG load_by_name_with_config = + (LOAD_BY_NAME_WITH_CONFIG)dlsym(handle, "load_by_name_with_config"); + if (!load_by_name_with_config) { + NN_WARN_PRINTF("load_by_name_with_config() not found"); + // since only llama.cpp backend need to support this function + } + functions->load_by_name_with_config = load_by_name_with_config; + INIT_EXECUTION_CONTEXT init_execution_context = (INIT_EXECUTION_CONTEXT)dlsym(handle, "init_execution_context"); if (!init_execution_context) { @@ -329,21 +343,23 @@ graph_encoding_to_backend_lib_name(graph_encoding encoding) static bool detect_and_load_backend(graph_encoding backend_hint, struct backends_api_functions *backends, - graph_encoding *loaded_backed) + graph_encoding *loaded_backend) { - if (backend_hint >= autodetect) + if (backend_hint > autodetect) return false; if (backend_hint == autodetect) backend_hint = choose_a_backend(); - /* if already loaded */ - if (lookup[backend_hint].backend_handle) { - *loaded_backed = backend_hint; - return true; - } + if (backend_hint == unknown_backend) + return false; + + *loaded_backend = backend_hint; + + /* if already loaded */ + if (lookup[backend_hint].backend_handle) + return true; - *loaded_backed = backend_hint; const char *backend_lib_name = graph_encoding_to_backend_lib_name(backend_hint); if (!backend_lib_name) @@ -353,6 +369,7 @@ detect_and_load_backend(graph_encoding backend_hint, } /* WASI-NN implementation */ + #if WASM_ENABLE_WASI_EPHEMERAL_NN != 0 wasi_nn_error wasi_nn_load(wasm_exec_env_t exec_env, graph_builder_wasm *builder, @@ -392,15 +409,15 @@ wasi_nn_load(wasm_exec_env_t exec_env, graph_builder_array_wasm *builder, goto fail; } - graph_encoding loaded_backed = autodetect; - if (!detect_and_load_backend(encoding, lookup, &loaded_backed)) { + graph_encoding loaded_backend = autodetect; + if (!detect_and_load_backend(encoding, lookup, &loaded_backend)) { res = invalid_encoding; NN_ERR_PRINTF("load backend failed"); goto fail; } WASINNContext *wasi_nn_ctx = wasm_runtime_get_wasi_nn_ctx(instance); - wasi_nn_ctx->backend = loaded_backed; + wasi_nn_ctx->backend = loaded_backend; /* init() the backend */ call_wasi_nn_func(wasi_nn_ctx->backend, init, res, @@ -413,7 +430,6 @@ wasi_nn_load(wasm_exec_env_t exec_env, graph_builder_array_wasm *builder, if (res != success) goto fail; - wasi_nn_ctx->backend = loaded_backed; wasi_nn_ctx->is_model_loaded = true; fail: @@ -428,8 +444,6 @@ wasi_nn_error wasi_nn_load_by_name(wasm_exec_env_t exec_env, char *name, uint32_t name_len, graph *g) { - NN_DBG_PRINTF("[WASI NN] LOAD_BY_NAME %s...", name); - wasm_module_inst_t instance = wasm_runtime_get_module_inst(exec_env); if (!instance) { return runtime_error; @@ -446,15 +460,23 @@ wasi_nn_load_by_name(wasm_exec_env_t exec_env, char *name, uint32_t name_len, return invalid_argument; } - graph_encoding loaded_backed = autodetect; - if (detect_and_load_backend(autodetect, lookup, &loaded_backed)) { + if (name_len == 0 || name[name_len] != '\0') { + NN_ERR_PRINTF("Invalid filename"); + return invalid_argument; + } + + NN_DBG_PRINTF("[WASI NN] LOAD_BY_NAME %s...", name); + + graph_encoding loaded_backend = autodetect; + if (!detect_and_load_backend(autodetect, lookup, &loaded_backend)) { NN_ERR_PRINTF("load backend failed"); return invalid_encoding; } WASINNContext *wasi_nn_ctx = wasm_runtime_get_wasi_nn_ctx(instance); - wasi_nn_error res; + wasi_nn_ctx->backend = loaded_backend; + wasi_nn_error res; /* init() the backend */ call_wasi_nn_func(wasi_nn_ctx->backend, init, res, &wasi_nn_ctx->backend_ctx); @@ -466,7 +488,67 @@ wasi_nn_load_by_name(wasm_exec_env_t exec_env, char *name, uint32_t name_len, if (res != success) return res; - wasi_nn_ctx->backend = loaded_backed; + wasi_nn_ctx->backend = loaded_backend; + wasi_nn_ctx->is_model_loaded = true; + return success; +} + +wasi_nn_error +wasi_nn_load_by_name_with_config(wasm_exec_env_t exec_env, char *name, + int32_t name_len, char *config, + int32_t config_len, graph *g) +{ + wasm_module_inst_t instance = wasm_runtime_get_module_inst(exec_env); + if (!instance) { + return runtime_error; + } + + if (!wasm_runtime_validate_native_addr(instance, name, name_len)) { + NN_ERR_PRINTF("name is invalid"); + return invalid_argument; + } + + if (!wasm_runtime_validate_native_addr(instance, g, + (uint64)sizeof(graph))) { + NN_ERR_PRINTF("graph is invalid"); + return invalid_argument; + } + + if (name_len == 0 || name[name_len] != '\0') { + NN_ERR_PRINTF("Invalid filename"); + return invalid_argument; + } + + if (!config || config_len == 0 || config[config_len] != '\0') { + NN_ERR_PRINTF("Invalid config"); + return invalid_argument; + } + + NN_DBG_PRINTF("[WASI NN] LOAD_BY_NAME_WITH_CONFIG %s %s...", name, config); + + graph_encoding loaded_backend = autodetect; + if (!detect_and_load_backend(autodetect, lookup, &loaded_backend)) { + NN_ERR_PRINTF("load backend failed"); + return invalid_encoding; + } + + WASINNContext *wasi_nn_ctx = wasm_runtime_get_wasi_nn_ctx(instance); + wasi_nn_ctx->backend = loaded_backend; + + wasi_nn_error res; + /* init() the backend */ + call_wasi_nn_func(wasi_nn_ctx->backend, init, res, + &wasi_nn_ctx->backend_ctx); + if (res != success) + return res; + + call_wasi_nn_func(wasi_nn_ctx->backend, load_by_name_with_config, res, + wasi_nn_ctx->backend_ctx, name, name_len, config, + config_len, g); + if (res != success) + return res; + + wasi_nn_ctx->backend = loaded_backend; wasi_nn_ctx->is_model_loaded = true; return success; } @@ -608,6 +690,7 @@ static NativeSymbol native_symbols_wasi_nn[] = { #if WASM_ENABLE_WASI_EPHEMERAL_NN != 0 REG_NATIVE_FUNC(load, "(*iii*)i"), REG_NATIVE_FUNC(load_by_name, "(*i*)i"), + REG_NATIVE_FUNC(load_by_name_with_config, "(*i*i*)i"), REG_NATIVE_FUNC(init_execution_context, "(i*)i"), REG_NATIVE_FUNC(set_input, "(ii*)i"), REG_NATIVE_FUNC(compute, "(i)i"), diff --git a/core/iwasm/libraries/wasi-nn/src/wasi_nn_llamacpp.c b/core/iwasm/libraries/wasi-nn/src/wasi_nn_llamacpp.c new file mode 100644 index 00000000..58d29163 --- /dev/null +++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_llamacpp.c @@ -0,0 +1,601 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ +#include "wasi_nn_types.h" +#include "utils/logger.h" +#include "llama.h" +#include "ggml.h" +#include "cJSON.h" + +// build info +extern int LLAMA_BUILD_NUMBER; +extern char const *LLAMA_COMMIT; +extern char const *LLAMA_COMPILER; +extern char const *LLAMA_BUILD_TARGET; + +// compatable with WasmEdge +// https://github.com/second-state/WasmEdge-WASINN-examples/blob/master/wasmedge-ggml/README.md#parameters +// https://github.com/WasmEdge/WasmEdge/blob/master/plugins/wasi_nn/ggml.cpp +struct wasi_nn_llama_config { + // Backend(plugin in WasmEdge) parameters: + bool enable_log; + bool enable_debug_log; + bool stream_stdout; + // embedding mode + bool embedding; + // TODO: can it be -1? + // can't bigger than ctx_size + int32_t n_predict; + char *reverse_prompt; + + // Used by LLaVA + // multi-model project file + char *mmproj; + char *image; + + // Model parameters (need to reload the model if updated): + // align to definition of struct llama_model_params + int32_t n_gpu_layers; + int32_t main_gpu; + // limited size: llama_max_devices() + float *tensor_split; + bool use_mmap; + + // Context parameters (used by the llama context): + uint32_t ctx_size; + uint32_t batch_size; + uint32_t ubatch_size; + uint32_t threads; + + // Sampling parameters (used by the llama sampling context). + float temp; + float topP; + float repeat_penalty; + float presence_penalty; + float frequency_penalty; +}; + +struct LlamaContext { + struct llama_context *ctx; + struct llama_model *model; + llama_token *prompt; + size_t prompt_len; + llama_token *generation; + size_t generation_len; + struct wasi_nn_llama_config config; +}; + +static void +wasm_edge_llama_default_configuration(struct wasi_nn_llama_config *output) +{ + output->enable_log = false; + output->enable_debug_log = false; + output->stream_stdout = false; + output->embedding = false; + output->n_predict = 512; + output->reverse_prompt = NULL; + + output->mmproj = NULL; + output->image = NULL; + + output->main_gpu = 0; + output->n_gpu_layers = 0; + output->tensor_split = NULL; + output->use_mmap = true; + + // 0 = from model + output->ctx_size = 0; + output->batch_size = 512; + output->ubatch_size = output->batch_size; + output->threads = 1; + + output->temp = 0.80; + output->topP = 0.95; + output->repeat_penalty = 1.10; + output->presence_penalty = 0.0; + output->frequency_penalty = 0.0; +} + +static void +wasm_edge_llama_apply_configuration(const char *config_json, + struct wasi_nn_llama_config *output) +{ + cJSON *root = cJSON_Parse(config_json); + if (root == NULL) { + const char *error_ptr = cJSON_GetErrorPtr(); + if (error_ptr != NULL) { + NN_WARN_PRINTF("Error before: %s\n", error_ptr); + } + else { + NN_WARN_PRINTF("Failed to parse JSON"); + } + return; + } + + cJSON *item = NULL; + + item = cJSON_GetObjectItem(root, "enable-log"); + if (item != NULL) { + output->enable_log = cJSON_IsTrue(item); + NN_DBG_PRINTF("apply enable-log %d", output->enable_log); + } + + item = cJSON_GetObjectItem(root, "enable-debug-log"); + if (item != NULL) { + output->enable_debug_log = cJSON_IsTrue(item); + NN_DBG_PRINTF("apply enable-debug-log %d", output->enable_debug_log); + } + + item = cJSON_GetObjectItem(root, "stream-stdout"); + if (item != NULL) { + output->stream_stdout = cJSON_IsTrue(item); + NN_DBG_PRINTF("apply stream-stdout %d", output->stream_stdout); + } + + item = cJSON_GetObjectItem(root, "embedding"); + if (item != NULL) { + output->embedding = cJSON_IsTrue(item); + NN_DBG_PRINTF("apply embedding %d", output->embedding); + } + + item = cJSON_GetObjectItem(root, "n-predict"); + if (item != NULL) { + output->n_predict = (int32_t)cJSON_GetNumberValue(item); + NN_DBG_PRINTF("apply n-predict %d", output->n_predict); + } + + item = cJSON_GetObjectItem(root, "n-gpu-layers"); + if (item != NULL) { + output->n_gpu_layers = (int32_t)cJSON_GetNumberValue(item); + NN_DBG_PRINTF("apply n_gpu_layers %d", output->n_gpu_layers); + } + + item = cJSON_GetObjectItem(root, "ctx-size"); + if (item != NULL) { + output->ctx_size = (uint32_t)cJSON_GetNumberValue(item); + NN_DBG_PRINTF("apply ctx-size %d", output->ctx_size); + } + + // more ... + + cJSON_Delete(root); +} + +static struct llama_model_params +llama_model_params_from_wasi_nn_llama_config( + struct wasi_nn_llama_config *config) +{ + struct llama_model_params result = llama_model_default_params(); + + // TODO: support more + result.main_gpu = config->main_gpu; + result.n_gpu_layers = config->n_gpu_layers; + result.use_mmap = config->use_mmap; + + return result; +} + +static struct llama_context_params +llama_context_params_from_wasi_nn_llama_config( + struct wasi_nn_llama_config *config) +{ + struct llama_context_params result = llama_context_default_params(); + + // TODO: support more + result.n_ctx = config->ctx_size; + // result.embeddings = config->embedding; + + return result; +} + +static void +llama_batch_clear(struct llama_batch *batch) +{ + batch->n_tokens = 0; +} + +static void +llama_batch_add(struct llama_batch *batch, llama_token id, llama_pos pos, + llama_seq_id *seq_ids, size_t seq_ids_len, bool logits) +{ + batch->token[batch->n_tokens] = id; + batch->pos[batch->n_tokens] = pos; + batch->n_seq_id[batch->n_tokens] = seq_ids_len; + for (size_t i = 0; i < seq_ids_len; ++i) { + batch->seq_id[batch->n_tokens][i] = seq_ids[i]; + } + batch->logits[batch->n_tokens] = logits; + + batch->n_tokens++; +} + +// always output ERROR and WARN +// INFO needs enable_log +// DEBUG needs enable_debug_log +static void +llama_log_callback_local(enum ggml_log_level level, const char *text, + void *user_data) +{ + struct LlamaContext *backend_ctx = (struct LlamaContext *)user_data; + + if (level == GGML_LOG_LEVEL_DEBUG && !backend_ctx->config.enable_debug_log) + return; + + if (level == GGML_LOG_LEVEL_INFO && !backend_ctx->config.enable_log) + return; + + printf("%s", text); +} + +static void +llama_build_output_metadata(const struct LlamaContext *backend_ctx, + char *output_buf, size_t output_buf_size) +{ + snprintf(output_buf, output_buf_size, + "{\"input_tokens\":%ld, \"output_tokens\":%ld, " + "\"llama_build_number\":%d," + "\"llama_commit\":\"%s\"}", + backend_ctx->prompt_len, backend_ctx->generation_len, + LLAMA_BUILD_NUMBER, LLAMA_COMMIT); +} + +__attribute__((visibility("default"))) wasi_nn_error +init_backend(void **ctx) +{ + struct LlamaContext *backend_ctx = calloc(1, sizeof(struct LlamaContext)); + if (!backend_ctx) { + NN_ERR_PRINTF("Allocate for OpenVINOContext failed"); + return runtime_error; + } + + llama_backend_init(); + // llama_numa_init(); + llama_log_set(llama_log_callback_local, backend_ctx); + +#ifndef NDEBUG + NN_INFO_PRINTF("llama_build_number: % d, llama_commit: %s, llama_compiler: " + "%s, llama_build_target: %s", + LLAMA_BUILD_NUMBER, LLAMA_COMMIT, LLAMA_COMPILER, + LLAMA_BUILD_TARGET); +#endif + + *ctx = (void *)backend_ctx; + return success; +} + +__attribute__((visibility("default"))) wasi_nn_error +deinit_backend(void *ctx) +{ + struct LlamaContext *backend_ctx = (struct LlamaContext *)ctx; + + if (!backend_ctx) + return invalid_argument; + + if (backend_ctx->generation) + free(backend_ctx->generation); + + if (backend_ctx->prompt) + free(backend_ctx->prompt); + + if (backend_ctx->ctx) + llama_free(backend_ctx->ctx); + + if (backend_ctx->model) + llama_free_model(backend_ctx->model); + + llama_backend_free(); + + os_free(backend_ctx); + return success; +} + +__attribute__((visibility("default"))) wasi_nn_error +load(void *ctx, graph_builder_array *builder, graph_encoding encoding, + execution_target target, graph *g) +{ + return unsupported_operation; +} + +static wasi_nn_error +__load_by_name_with_configuration(void *ctx, const char *filename, graph *g) +{ + struct LlamaContext *backend_ctx = (struct LlamaContext *)ctx; + + // make sure backend_ctx->config is initialized + + struct llama_model_params model_params = + llama_model_params_from_wasi_nn_llama_config(&backend_ctx->config); + struct llama_model *model = + llama_load_model_from_file(filename, model_params); + if (model == NULL) { + NN_ERR_PRINTF("Failed to load model from file %s", filename); + return runtime_error; + } + +#ifndef NDEBUG + char buf[128] = { 0 }; + llama_model_desc(model, buf, 127); + NN_INFO_PRINTF("Model desc %s", buf); +#endif + + backend_ctx->model = model; + + return success; +} + +__attribute__((visibility("default"))) wasi_nn_error +load_by_name(void *ctx, const char *filename, uint32_t filename_len, graph *g) +{ + struct LlamaContext *backend_ctx = (struct LlamaContext *)ctx; + + // use default params + wasm_edge_llama_default_configuration(&backend_ctx->config); + return __load_by_name_with_configuration(ctx, filename, g); +} + +__attribute__((visibility("default"))) wasi_nn_error +load_by_name_with_config(void *ctx, const char *filename, uint32_t filename_len, + const char *config, uint32_t config_len, graph *g) +{ + struct LlamaContext *backend_ctx = (struct LlamaContext *)ctx; + + wasm_edge_llama_default_configuration(&backend_ctx->config); + + if (config != NULL) { + // parse wasmedge config + wasm_edge_llama_apply_configuration(config, &backend_ctx->config); + } + else { + NN_INFO_PRINTF("No configuration provided, use default"); + } + + return __load_by_name_with_configuration(ctx, filename, g); +} + +// It is assumed that model params shouldn't be changed in Config stage. +// We only load the model once in the Load stage. +__attribute__((visibility("default"))) wasi_nn_error +init_execution_context(void *ctx, graph g, graph_execution_context *exec_ctx) +{ + struct LlamaContext *backend_ctx = (struct LlamaContext *)ctx; + + struct llama_context_params ctx_params = + llama_context_params_from_wasi_nn_llama_config(&backend_ctx->config); + struct llama_context *llama_ctx = + llama_new_context_with_model(backend_ctx->model, ctx_params); + if (llama_ctx == NULL) { + NN_ERR_PRINTF("Failed to create context for model"); + return runtime_error; + } + + backend_ctx->ctx = llama_ctx; + + NN_INFO_PRINTF("n_predict = %d, n_ctx = %d", backend_ctx->config.n_predict, + llama_n_ctx(backend_ctx->ctx)); + return success; +} + +__attribute__((visibility("default"))) wasi_nn_error +set_input(void *ctx, graph_execution_context exec_ctx, uint32_t index, + tensor *wasi_nn_tensor) +{ + struct LlamaContext *backend_ctx = (struct LlamaContext *)ctx; + // tensor->data is the prompt string. ends with \0 + char *prompt_text = (char *)wasi_nn_tensor->data; + +#ifndef NDEBUG + NN_DBG_PRINTF("--------------------------------------------------"); + NN_DBG_PRINTF("prompt_text: %s", prompt_text); + NN_DBG_PRINTF("--------------------------------------------------"); +#endif + + // tokenize the prompt + uint32_t n_token_max = llama_n_ctx(backend_ctx->ctx); + uint32_t prompt_text_len = strlen(prompt_text); + + if (backend_ctx->prompt == NULL) { + backend_ctx->prompt = calloc(n_token_max, sizeof(llama_token)); + if (backend_ctx->prompt == NULL) { + NN_ERR_PRINTF("Failed to allocate tokens_list"); + return runtime_error; + } + } + + int32_t n_tokens = + llama_tokenize(backend_ctx->model, prompt_text, prompt_text_len, + backend_ctx->prompt, n_token_max, true, false); + if (n_tokens < 0) { + NN_ERR_PRINTF("Failed to tokenize prompt text"); + return runtime_error; + } + + backend_ctx->prompt_len = n_tokens; + + // make sure the KV cache is big enough to hold all the prompt and generated + // tokens + int n_kv_req = n_tokens + (backend_ctx->config.n_predict - n_tokens); + if (n_kv_req < 0 || (uint32_t)n_kv_req > n_token_max) { + NN_ERR_PRINTF("the required KV cache size is not big enough, either " + "reduce n_predict or increase n_ctx"); + return runtime_error; + } + + return success; +} + +__attribute__((visibility("default"))) wasi_nn_error +compute(void *ctx, graph_execution_context exec_ctx) +{ + struct LlamaContext *backend_ctx = (struct LlamaContext *)ctx; + wasi_nn_error ret = runtime_error; + + // reset the generation buffer + if (backend_ctx->generation == NULL) { + backend_ctx->generation = + calloc(backend_ctx->config.n_predict, sizeof(llama_token)); + if (backend_ctx->generation == NULL) { + NN_ERR_PRINTF("Failed to allocate generation"); + return runtime_error; + } + } + + backend_ctx->generation_len = 0; + + // check KV cache + uint32_t n_ctx = llama_n_ctx(backend_ctx->ctx); + if (n_ctx <= backend_ctx->generation_len) { + NN_ERR_PRINTF( + "ctx_size(%u) is not big enough(<%ld), please increase it", n_ctx, + backend_ctx->generation_len); + return context_full; + } + + // prepare the batch + struct llama_batch batch = + llama_batch_init(backend_ctx->config.batch_size, 0, 1); + + // evaluate the initial prompt + llama_seq_id seq_ids[1] = { 0 }; + for (size_t i = 0; i < backend_ctx->prompt_len; i++) { + llama_batch_add(&batch, backend_ctx->prompt[i], i, seq_ids, + sizeof(seq_ids) / sizeof(seq_ids[0]), false); + } + + batch.logits[batch.n_tokens - 1] = true; + + if (batch.n_tokens > backend_ctx->config.n_predict) { + NN_DBG_PRINTF("n_predict(%d) is not big enough(%d), please increase it", + backend_ctx->config.n_predict, batch.n_tokens); + return prompt_tool_long; + } + + if (llama_decode(backend_ctx->ctx, batch) != 0) { + NN_ERR_PRINTF("First decode failed"); + return runtime_error; + } + + // main loop + int32_t n_cur = batch.n_tokens; + int n_decode = 0; + int32_t n_vocab = llama_n_vocab(backend_ctx->model); + llama_token_data *candidates = NULL; + + candidates = calloc(n_vocab, sizeof(llama_token_data)); + if (candidates == NULL) { + NN_ERR_PRINTF("Failed to allocate candidates"); + goto fail; + } + + while (n_cur <= backend_ctx->config.n_predict) { + // sample the next token + float *logits = + llama_get_logits_ith(backend_ctx->ctx, batch.n_tokens - 1); + + memset(candidates, 0, sizeof(llama_token_data) * n_vocab); + for (llama_token token_id = 0; token_id < n_vocab; token_id++) { + candidates[token_id].id = token_id; + candidates[token_id].logit = logits[token_id]; + candidates[token_id].p = 0.0f; + } + + llama_token_data_array candidates_p = { candidates, n_vocab, false }; + + // sample the most likely token + llama_token new_token_id = + llama_sample_token_greedy(backend_ctx->ctx, &candidates_p); + + backend_ctx->generation[backend_ctx->generation_len++] = new_token_id; + +#ifndef NDEBUG + { + char buf[128] = { 0 }; + llama_token_to_piece(backend_ctx->model, new_token_id, buf, 120, 0, + true); + printf("%d(%s),", new_token_id, buf); + } +#endif + + // is it an end of generation? + if (llama_token_is_eog(backend_ctx->model, new_token_id)) { + printf("\n"); + NN_INFO_PRINTF("reach the end of generation"); + break; + } + + // prepare the next batch + llama_batch_clear(&batch); + // push this new token for next evaluation + llama_batch_add(&batch, new_token_id, n_cur, seq_ids, + sizeof(seq_ids) / sizeof(seq_ids[0]), true); + n_decode++; + n_cur++; + + if (llama_decode(backend_ctx->ctx, batch) != 0) { + NN_ERR_PRINTF("Secondary decode failed"); + goto fail; + } + } + + printf("\n"); + ret = success; +fail: + llama_batch_free(batch); + if (candidates != NULL) { + free(candidates); + } + return ret; +} + +__attribute__((visibility("default"))) wasi_nn_error +get_output(void *ctx, graph_execution_context exec_ctx, uint32_t index, + tensor_data output_tensor, uint32_t *output_tensor_size) +{ + struct LlamaContext *backend_ctx = (struct LlamaContext *)ctx; + + // Compatibility with WasmEdge + if (index > 1) { + NN_ERR_PRINTF("Invalid output index %d", index); + return invalid_argument; + } + + // Index 1 is for the metadata of the outputs. + if (index == 1) { + char output_metadata[128] = { 0 }; + llama_build_output_metadata(backend_ctx, output_metadata, 127); + + if (backend_ctx->config.stream_stdout) { + printf("%s\n", output_metadata); + } + + memcpy(output_tensor, output_metadata, strlen(output_metadata)); + *output_tensor_size = strlen(output_metadata); + return success; + } + + // token -> piece -> output_tensor + if (backend_ctx->config.stream_stdout) { + printf("\n"); + } + + size_t end_pos = 0; + for (size_t i = 0; i < backend_ctx->generation_len; i++) { + char buf[128] = { 0 }; + llama_token_to_piece(backend_ctx->model, backend_ctx->generation[i], + buf, 120, 0, true); + + if (backend_ctx->config.stream_stdout) { + printf("%s", buf); + } + + memcpy(output_tensor + end_pos, buf, strlen(buf)); + end_pos += strlen(buf); + } + + if (backend_ctx->config.stream_stdout) { + printf("\n"); + } + + *output_tensor_size = end_pos; + return success; +} diff --git a/core/iwasm/libraries/wasi-nn/test/Dockerfile.wasi-nn-smoke b/core/iwasm/libraries/wasi-nn/test/Dockerfile.wasi-nn-smoke index 261c7726..fe3a8c51 100644 --- a/core/iwasm/libraries/wasi-nn/test/Dockerfile.wasi-nn-smoke +++ b/core/iwasm/libraries/wasi-nn/test/Dockerfile.wasi-nn-smoke @@ -63,21 +63,35 @@ WORKDIR /workspaces/wasmedge-wasinn-examples RUN git clone --depth 1 https://github.com/second-state/WasmEdge-WASINN-examples.git . COPY core/iwasm/libraries/wasi-nn/test/bump_wasi_nn_to_0_6_0.patch . RUN git apply ./bump_wasi_nn_to_0_6_0.patch -# recompile with wasi-nn 0.6.0 -RUN cd openvino-mobilenet-image/rust && cargo build --target=wasm32-wasi -RUN cd openvino-mobilenet-raw/rust && cargo build --target=wasm32-wasi -RUN cd openvino-road-segmentation-adas/openvino-road-seg-adas && cargo build --target=wasm32-wasi -RUN cd tflite-birds_v1-image/rust && cargo build --target=wasm32-wasi -# preparation -RUN cd openvino-mobilenet-image \ +# recompile with wasi-nn 0.6.0 +WORKDIR /workspaces/wasmedge-wasinn-examples/openvino-mobilenet-image/ +RUN pushd rust \ + && cargo build --target=wasm32-wasi \ + && popd \ && ./download_mobilenet.sh . \ && ls -l mobilenet.xml mobilenet.bin -RUN cd openvino-mobilenet-raw \ +WORKDIR /workspaces/wasmedge-wasinn-examples/openvino-mobilenet-raw/ +RUN pushd rust \ + && cargo build --target=wasm32-wasi \ + && popd \ && ./download_mobilenet.sh . \ && ls -l mobilenet.xml mobilenet.bin tensor-1x224x224x3-f32.bgr +WORKDIR /workspaces/wasmedge-wasinn-examples/openvino-road-segmentation-adas/ +RUN pushd openvino-road-seg-adas \ + && cargo build --target=wasm32-wasi + +WORKDIR /workspaces/wasmedge-wasinn-examples/tflite-birds_v1-image/ +RUN pushd rust \ + && cargo build --target=wasm32-wasi + +# mount models when running +WORKDIR /workspaces/wasmedge-wasinn-examples/wasmedge-ggml/qwen +RUN wget --progress=dot:giga https://www.modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat-GGUF/resolve/master/qwen1_5-0_5b-chat-q2_k.gguf +RUN cargo build --target=wasm32-wasi + # # iwasm. build from source WORKDIR /workspaces/wamr @@ -88,15 +102,16 @@ WORKDIR /workspaces/wamr/product-mini/platforms/linux RUN OpenVINO_DIR=/usr/lib/openvino-2023.2.0 \ cmake -S . -B build \ -DWAMR_BUILD_WASI_NN=1 -DWAMR_BUILD_WASI_EPHEMERAL_NN=1 \ - -DWAMR_BUILD_WASI_NN_OPENVINO=1 -DWAMR_BUILD_WASI_NN_TFLITE=1 \ - && cmake --build build - -ENV PATH=/workspaces/wamr/product-mini/platforms/linux/build:${PATH} -ENV LD_LIBRARY_PATH=/workspaces/wamr/product-mini/platforms/linux/build + -DWAMR_BUILD_WASI_NN_OPENVINO=1 \ + -DWAMR_BUILD_WASI_NN_TFLITE=1 \ + -DWAMR_BUILD_WASI_NN_LLAMACPP=1 \ + && cmake --build build \ + && cmake --install build + +ENV LD_LIBRARY_PATH=/usr/local/lib # add smoke test script COPY core/iwasm/libraries/wasi-nn/test/run_smoke_test.py / -# WORKDIR /workspaces/wasmedge-wasinn-examples CMD ["python3", "/run_smoke_test.py"] diff --git a/core/iwasm/libraries/wasi-nn/test/run_smoke_test.py b/core/iwasm/libraries/wasi-nn/test/run_smoke_test.py index a62d9cb7..304b0c97 100644 --- a/core/iwasm/libraries/wasi-nn/test/run_smoke_test.py +++ b/core/iwasm/libraries/wasi-nn/test/run_smoke_test.py @@ -260,6 +260,63 @@ def execute_openvino_road_segmentation_adas( print("------------------------------------------------------------") +def execute_wasmedge_ggml_qwen(iwasm_bin: str, wasmedge_bin: str, cwd: Path): + iwasm_args = ["--dir=."] + wasm_file = ["./target/wasm32-wasi/debug/wasmedge-ggml-qwen.wasm"] + wasm_args = ["./qwen1_5-0_5b-chat-q2_k.gguf"] + + cmd = [iwasm_bin] + cmd.extend(iwasm_args) + cmd.extend(wasm_file) + cmd.extend(wasm_args) + + # print(f'Execute: {" ".join(cmd)}') + + prompt = "what is the capital of Pakistan" + + with subprocess.Popen( + cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + cwd=cwd, + ) as p: + # USER + p.stdout.readline() + + p.stdin.write(b"hi\n") + p.stdin.flush() + # ASSITANT + p.stdout.readline() + # xxx + p.stdout.readline() + # USER + p.stdout.readline() + + p.stdin.write(prompt.encode()) + p.stdin.write(b"\n") + p.stdin.flush() + # ASSITANT + p.stdout.readline() + # xxx + answer = p.stdout.readline().decode("utf-8") + # USER + p.stdout.readline() + + p.terminate() + + if "Karachi" in answer: + print(f"- wasmedge_ggml_qwen. PASS") + return + + print(f"- wasmedge_ggml_qwen. FAILED") + print("------------------------------------------------------------") + pprint(answer) + print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<") + pprint("Karachi") + print("------------------------------------------------------------") + + def execute_wasmedge_wasinn_examples(iwasm_bin: str, wasmedge_bin: str): assert Path.cwd().name == "wasmedge-wasinn-examples" assert shutil.which(iwasm_bin) @@ -282,6 +339,9 @@ def execute_wasmedge_wasinn_examples(iwasm_bin: str, wasmedge_bin: str): iwasm_bin, wasmedge_bin, openvino_road_segmentation_adas_dir ) + wasmedge_ggml_qwem_dir = Path.cwd().joinpath("./wasmedge-ggml/qwen") + execute_wasmedge_ggml_qwen(iwasm_bin, wasmedge_bin, wasmedge_ggml_qwem_dir) + if __name__ == "__main__": execute_wasmedge_wasinn_examples("iwasm", "wasmedge") From cbc20788986155391eaa20a01bd947f0f0910b16 Mon Sep 17 00:00:00 2001 From: Marcin Kolny Date: Tue, 10 Sep 2024 02:05:23 +0100 Subject: [PATCH 17/24] AOT call stack optimizations (#3773) - Implement TINY / STANDARD frame modes - tiny mode is only able to keep track on the IP and func idx, STANDARD mode provides more capabilities (parameters, stack pointer etc.). - Implement FRAME_PER_FUNCTION / FRAME_PER_CALL modes - frame per function adds code at the beginning and at the end of each function for allocating / deallocating stack frame, whereas in per-call mode the frame is allocated before each call. The exception is call to the imported function, where frame-per-function mode also allocates the stack before the `call` instruction (as it can't instrument the imported function). At the moment TINY + FRAME_PER_FUNCTION is automatically enabled in case GC and perf profiling are disabled and `values` call stack feature is not requested. In all the other cases STANDARD + FRAME_PER_CALL is used. STANDARD + FRAME_PER_FUNCTION and TINY + FRAME_PER_CALL are currently not implemented but possible, and might be enabled in the future. ps. https://github.com/bytecodealliance/wasm-micro-runtime/issues/3758 --- core/iwasm/aot/aot_loader.c | 4 + core/iwasm/aot/aot_runtime.c | 198 ++++++++++++++---- core/iwasm/aot/aot_runtime.h | 9 +- core/iwasm/compilation/aot_compiler.c | 54 ++++- core/iwasm/compilation/aot_compiler.h | 9 + core/iwasm/compilation/aot_emit_aot_file.c | 6 + core/iwasm/compilation/aot_emit_control.c | 50 ++++- core/iwasm/compilation/aot_emit_function.c | 78 +++++-- core/iwasm/compilation/aot_llvm.c | 6 +- core/iwasm/compilation/aot_llvm.h | 2 +- core/iwasm/compilation/aot_stack_frame.h | 27 +++ core/iwasm/compilation/aot_stack_frame_comp.c | 148 +++++++++++++ core/iwasm/compilation/aot_stack_frame_comp.h | 33 +++ core/iwasm/include/aot_comp_option.h | 18 +- core/iwasm/interpreter/wasm_loader.c | 4 +- core/iwasm/interpreter/wasm_mini_loader.c | 4 +- wamr-compiler/main.c | 25 ++- 17 files changed, 590 insertions(+), 85 deletions(-) create mode 100644 core/iwasm/compilation/aot_stack_frame.h create mode 100644 core/iwasm/compilation/aot_stack_frame_comp.c create mode 100644 core/iwasm/compilation/aot_stack_frame_comp.h diff --git a/core/iwasm/aot/aot_loader.c b/core/iwasm/aot/aot_loader.c index 3a5b6fc5..0abafd9d 100644 --- a/core/iwasm/aot/aot_loader.c +++ b/core/iwasm/aot/aot_loader.c @@ -597,6 +597,10 @@ load_target_info_section(const uint8 *buf, const uint8 *buf_end, return false; } +#if WASM_ENABLE_DUMP_CALL_STACK != 0 + module->feature_flags = target_info.feature_flags; +#endif + /* Finally, check feature flags */ return check_feature_flags(error_buf, error_buf_size, target_info.feature_flags); diff --git a/core/iwasm/aot/aot_runtime.c b/core/iwasm/aot/aot_runtime.c index bdb4ca91..013c761a 100644 --- a/core/iwasm/aot/aot_runtime.c +++ b/core/iwasm/aot/aot_runtime.c @@ -4,6 +4,7 @@ */ #include "aot_runtime.h" +#include "../compilation/aot_stack_frame.h" #include "bh_log.h" #include "mem_alloc.h" #include "../common/wasm_runtime_common.h" @@ -72,6 +73,10 @@ bh_static_assert(offsetof(AOTFrame, sp) == sizeof(uintptr_t) * 5); bh_static_assert(offsetof(AOTFrame, frame_ref) == sizeof(uintptr_t) * 6); bh_static_assert(offsetof(AOTFrame, lp) == sizeof(uintptr_t) * 7); +bh_static_assert(offsetof(AOTTinyFrame, func_index) == sizeof(uint32) * 0); +bh_static_assert(offsetof(AOTTinyFrame, ip_offset) == sizeof(uint32) * 1); +bh_static_assert(sizeof(AOTTinyFrame) == sizeof(uint32) * 2); + static void set_error_buf(char *error_buf, uint32 error_buf_size, const char *string) { @@ -110,6 +115,55 @@ runtime_malloc(uint64 size, char *error_buf, uint32 error_buf_size) return mem; } +#if WASM_ENABLE_AOT_STACK_FRAME != 0 +static bool +is_tiny_frame(WASMExecEnv *exec_env) +{ + AOTModule *module = + (AOTModule *)((AOTModuleInstance *)exec_env->module_inst)->module; + + return module->feature_flags & WASM_FEATURE_TINY_STACK_FRAME; +} + +static bool +is_frame_per_function(WASMExecEnv *exec_env) +{ + AOTModule *module = + (AOTModule *)((AOTModuleInstance *)exec_env->module_inst)->module; + + return module->feature_flags & WASM_FEATURE_FRAME_PER_FUNCTION; +} + +static void * +get_top_frame(WASMExecEnv *exec_env) +{ + if (is_tiny_frame(exec_env)) { + return exec_env->wasm_stack.top > exec_env->wasm_stack.bottom + ? exec_env->wasm_stack.top - sizeof(AOTTinyFrame) + : NULL; + } + else { + return exec_env->cur_frame; + } +} + +static void * +get_prev_frame(WASMExecEnv *exec_env, void *cur_frame) +{ + bh_assert(cur_frame); + + if (is_tiny_frame(exec_env)) { + if ((uint8 *)cur_frame == exec_env->wasm_stack.bottom) { + return NULL; + } + return ((AOTTinyFrame *)cur_frame) - 1; + } + else { + return ((AOTFrame *)cur_frame)->prev_frame; + } +} +#endif + static bool check_global_init_expr(const AOTModule *module, uint32 global_index, char *error_buf, uint32 error_buf_size) @@ -2265,7 +2319,7 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function, uint32 ext_ret_cell = wasm_get_cell_num(ext_ret_types, ext_ret_count); uint64 size; #if WASM_ENABLE_AOT_STACK_FRAME != 0 - struct WASMInterpFrame *prev_frame = exec_env->cur_frame; + void *prev_frame = get_top_frame(exec_env); #endif /* Allocate memory all arguments */ @@ -2296,7 +2350,8 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function, } #if WASM_ENABLE_AOT_STACK_FRAME != 0 - if (!aot_alloc_frame(exec_env, function->func_index)) { + if (!is_frame_per_function(exec_env) + && !aot_alloc_frame(exec_env, function->func_index)) { if (argv1 != argv1_buf) wasm_runtime_free(argv1); return false; @@ -2324,7 +2379,7 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function, /* Free all frames allocated, note that some frames may be allocated in AOT code and haven't been freed if exception occurred */ - while (exec_env->cur_frame != prev_frame) + while (get_top_frame(exec_env) != prev_frame) aot_free_frame(exec_env); #endif if (!ret) { @@ -2367,9 +2422,12 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function, } else { #if WASM_ENABLE_AOT_STACK_FRAME != 0 - struct WASMInterpFrame *prev_frame = exec_env->cur_frame; - - if (!aot_alloc_frame(exec_env, function->func_index)) { + void *prev_frame = get_top_frame(exec_env); + /* Only allocate frame for frame-per-call mode; in the + frame-per-function mode the frame is allocated at the + beginning of the function. */ + if (!is_frame_per_function(exec_env) + && !aot_alloc_frame(exec_env, function->func_index)) { return false; } #endif @@ -2394,7 +2452,7 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function, /* Free all frames allocated, note that some frames may be allocated in AOT code and haven't been freed if exception occurred */ - while (exec_env->cur_frame != prev_frame) + while (get_top_frame(exec_env) != prev_frame) aot_free_frame(exec_env); #endif @@ -2880,7 +2938,7 @@ aot_invoke_native(WASMExecEnv *exec_env, uint32 func_idx, uint32 argc, goto fail; } #if WASM_ENABLE_AOT_STACK_FRAME != 0 - struct WASMInterpFrame *prev_frame = exec_env->cur_frame; + void *prev_frame = get_top_frame(exec_env); if (!aot_alloc_frame(exec_env, func_idx)) { goto fail; @@ -2894,7 +2952,7 @@ aot_invoke_native(WASMExecEnv *exec_env, uint32 func_idx, uint32 argc, /* Free all frames allocated, note that some frames may be allocated in AOT code and haven't been freed if exception occurred */ - while (exec_env->cur_frame != prev_frame) + while (get_top_frame(exec_env) != prev_frame) aot_free_frame(exec_env); #endif } @@ -3622,8 +3680,8 @@ get_func_name_from_index(const AOTModuleInstance *module_inst, WASM_ENABLE_PERF_PROFILING != 0 */ #if WASM_ENABLE_GC == 0 -bool -aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index) +static bool +aot_alloc_standard_frame(WASMExecEnv *exec_env, uint32 func_index) { AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst; #if WASM_ENABLE_PERF_PROFILING != 0 @@ -3670,8 +3728,8 @@ aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index) #else /* else of WASM_ENABLE_GC == 0 */ -bool -aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index) +static bool +aot_alloc_standard_frame(WASMExecEnv *exec_env, uint32 func_index) { AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst; AOTModule *module = (AOTModule *)module_inst->module; @@ -3727,11 +3785,48 @@ aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index) } #endif /* end of WASM_ENABLE_GC == 0 */ +static bool +aot_alloc_tiny_frame(WASMExecEnv *exec_env, uint32 func_index) +{ + AOTTinyFrame *new_frame = (AOTTinyFrame *)exec_env->wasm_stack.top; + + if ((uint8 *)new_frame > exec_env->wasm_stack.top_boundary) { + aot_set_exception((WASMModuleInstance *)exec_env->module_inst, + "wasm operand stack overflow"); + return false; + } + + new_frame->func_index = func_index; + exec_env->wasm_stack.top += sizeof(AOTTinyFrame); + return true; +} + +bool +aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index) +{ + AOTModule *module = + (AOTModule *)((AOTModuleInstance *)exec_env->module_inst)->module; + + if (is_frame_per_function(exec_env) + && func_index >= module->import_func_count) { + /* in frame per function mode the frame is allocated at + the beginning of each frame, so we only need to allocate + the frame for imported functions */ + return true; + } + if (is_tiny_frame(exec_env)) { + return aot_alloc_tiny_frame(exec_env, func_index); + } + else { + return aot_alloc_standard_frame(exec_env, func_index); + } +} + static inline void -aot_free_frame_internal(WASMExecEnv *exec_env) +aot_free_standard_frame(WASMExecEnv *exec_env) { AOTFrame *cur_frame = (AOTFrame *)exec_env->cur_frame; - AOTFrame *prev_frame = cur_frame->prev_frame; + AOTFrame *prev_frame = (AOTFrame *)cur_frame->prev_frame; #if WASM_ENABLE_PERF_PROFILING != 0 uint64 time_elapsed = @@ -3751,13 +3846,24 @@ aot_free_frame_internal(WASMExecEnv *exec_env) exec_env->cur_frame = (struct WASMInterpFrame *)prev_frame; } +static inline void +aot_free_tiny_frame(WASMExecEnv *exec_env) +{ + exec_env->wasm_stack.top = + get_prev_frame(exec_env, exec_env->wasm_stack.top); +} + void aot_free_frame(WASMExecEnv *exec_env) { - aot_free_frame_internal(exec_env); + if (is_tiny_frame(exec_env)) { + aot_free_tiny_frame(exec_env); + } + else { + aot_free_standard_frame(exec_env); + } } - void aot_frame_update_profile_info(WASMExecEnv *exec_env, bool alloc_frame) { @@ -3806,14 +3912,13 @@ aot_frame_update_profile_info(WASMExecEnv *exec_env, bool alloc_frame) bool aot_create_call_stack(struct WASMExecEnv *exec_env) { - AOTFrame *cur_frame = (AOTFrame *)exec_env->cur_frame, - *first_frame = cur_frame; AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst; AOTModule *module = (AOTModule *)module_inst->module; uint32 n = 0; - while (cur_frame) { - cur_frame = cur_frame->prev_frame; + void *top_frame = get_top_frame(exec_env); + while (top_frame) { + top_frame = get_prev_frame(exec_env, top_frame); n++; } @@ -3823,28 +3928,46 @@ aot_create_call_stack(struct WASMExecEnv *exec_env) return false; } - cur_frame = first_frame; - while (cur_frame) { + top_frame = get_top_frame(exec_env); + while (n-- > 0) { + uint32 func_index, ip_offset; + uint32 *lp = NULL; +#if WASM_ENABLE_GC != 0 + uint32 *sp = NULL; + uint8 *frame_ref = NULL; +#endif + if (is_tiny_frame(exec_env)) { + AOTTinyFrame *frame = (AOTTinyFrame *)top_frame; + func_index = (uint32)frame->func_index; + ip_offset = (uint32)frame->ip_offset; + } + else { + AOTFrame *frame = (AOTFrame *)top_frame; + func_index = (uint32)frame->func_index; + ip_offset = (uint32)frame->ip_offset; + lp = frame->lp; +#if WASM_ENABLE_GC != 0 + sp = frame->sp; + frame_ref = frame->frame_ref; +#endif + } WASMCApiFrame frame = { 0 }; uint32 max_local_cell_num, max_stack_cell_num; uint32 all_cell_num, lp_size; frame.instance = module_inst; frame.module_offset = 0; - frame.func_index = (uint32)cur_frame->func_index; - frame.func_offset = (uint32)cur_frame->ip_offset; - frame.func_name_wp = get_func_name_from_index( - module_inst, (uint32)cur_frame->func_index); + frame.func_index = func_index; + frame.func_offset = ip_offset; + frame.func_name_wp = get_func_name_from_index(module_inst, func_index); - if (cur_frame->func_index >= module->import_func_count) { - uint32 aot_func_idx = - (uint32)(cur_frame->func_index - module->import_func_count); + if (func_index >= module->import_func_count) { + uint32 aot_func_idx = func_index - module->import_func_count; max_local_cell_num = module->max_local_cell_nums[aot_func_idx]; max_stack_cell_num = module->max_stack_cell_nums[aot_func_idx]; } else { - AOTFuncType *func_type = - module->import_funcs[cur_frame->func_index].func_type; + AOTFuncType *func_type = module->import_funcs[func_index].func_type; max_local_cell_num = func_type->param_cell_num > 2 ? func_type->param_cell_num : 2; max_stack_cell_num = 0; @@ -3856,12 +3979,12 @@ aot_create_call_stack(struct WASMExecEnv *exec_env) #else lp_size = align_uint(all_cell_num * 5, 4); #endif - if (lp_size > 0) { + if (lp_size > 0 && !is_tiny_frame(exec_env)) { if (!(frame.lp = wasm_runtime_malloc(lp_size))) { destroy_c_api_frames(module_inst->frames); return false; } - bh_memcpy_s(frame.lp, lp_size, cur_frame->lp, lp_size); + bh_memcpy_s(frame.lp, lp_size, lp, lp_size); #if WASM_ENABLE_GC != 0 uint32 local_ref_flags_cell_num = @@ -3869,9 +3992,8 @@ aot_create_call_stack(struct WASMExecEnv *exec_env) .local_ref_flag_cell_num; uint8 *local_ref_flags = module->func_local_ref_flags[frame.func_index].local_ref_flags; - frame.sp = frame.lp + (cur_frame->sp - cur_frame->lp); - frame.frame_ref = (uint8 *)frame.lp - + (cur_frame->frame_ref - (uint8 *)cur_frame->lp); + frame.sp = frame.lp + (sp - lp); + frame.frame_ref = (uint8 *)frame.lp + (frame_ref - (uint8 *)lp); /* copy local ref flags from AOT module */ bh_memcpy_s(frame.frame_ref, local_ref_flags_cell_num, local_ref_flags, lp_size); @@ -3885,7 +4007,7 @@ aot_create_call_stack(struct WASMExecEnv *exec_env) return false; } - cur_frame = cur_frame->prev_frame; + top_frame = get_prev_frame(exec_env, top_frame); } return true; diff --git a/core/iwasm/aot/aot_runtime.h b/core/iwasm/aot/aot_runtime.h index 76c78451..f6bff00b 100644 --- a/core/iwasm/aot/aot_runtime.h +++ b/core/iwasm/aot/aot_runtime.h @@ -25,12 +25,15 @@ extern "C" { #define WASM_FEATURE_REF_TYPES (1 << 3) #define WASM_FEATURE_GARBAGE_COLLECTION (1 << 4) #define WASM_FEATURE_EXCEPTION_HANDLING (1 << 5) -#define WASM_FEATURE_MEMORY64 (1 << 6) +#define WASM_FEATURE_TINY_STACK_FRAME (1 << 6) #define WASM_FEATURE_MULTI_MEMORY (1 << 7) #define WASM_FEATURE_DYNAMIC_LINKING (1 << 8) #define WASM_FEATURE_COMPONENT_MODEL (1 << 9) #define WASM_FEATURE_RELAXED_SIMD (1 << 10) #define WASM_FEATURE_FLEXIBLE_VECTORS (1 << 11) +/* Stack frame is created at the beginning of the function, + * and not at the beginning of each function call */ +#define WASM_FEATURE_FRAME_PER_FUNCTION (1 << 12) typedef enum AOTSectionType { AOT_SECTION_TYPE_TARGET_INFO = 0, @@ -326,6 +329,10 @@ typedef struct AOTModule { /* `.data` and `.text` sections merged into one large mmaped section */ uint8 *merged_data_text_sections; uint32 merged_data_text_sections_size; + +#if WASM_ENABLE_AOT_STACK_FRAME != 0 + uint32 feature_flags; +#endif } AOTModule; #define AOTMemoryInstance WASMMemoryInstance diff --git a/core/iwasm/compilation/aot_compiler.c b/core/iwasm/compilation/aot_compiler.c index 78b7da88..e5600497 100644 --- a/core/iwasm/compilation/aot_compiler.c +++ b/core/iwasm/compilation/aot_compiler.c @@ -16,6 +16,7 @@ #include "aot_emit_parametric.h" #include "aot_emit_table.h" #include "aot_emit_gc.h" +#include "aot_stack_frame_comp.h" #include "simd/simd_access_lanes.h" #include "simd/simd_bitmask_extracts.h" #include "simd/simd_bit_shifts.h" @@ -253,6 +254,13 @@ store_value(AOTCompContext *comp_ctx, LLVMValueRef value, uint8 value_type, return true; } +void +aot_call_stack_features_init_default(AOTCallStackFeatures *features) +{ + memset(features, 1, sizeof(AOTCallStackFeatures)); + features->frame_per_function = false; +} + bool aot_frame_store_value(AOTCompContext *comp_ctx, LLVMValueRef value, uint8 value_type, LLVMValueRef cur_frame, uint32 offset) @@ -573,9 +581,10 @@ aot_gen_commit_values(AOTCompFrame *frame) return true; } -bool -aot_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - LLVMValueRef ip_value, bool is_64bit) +static bool +aot_standard_frame_gen_commit_ip(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMValueRef ip_value, bool is_64bit) { LLVMValueRef cur_frame = func_ctx->cur_frame; LLVMValueRef value_offset, value_addr, value_ptr; @@ -613,6 +622,23 @@ aot_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, return true; } +bool +aot_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, + LLVMValueRef ip_value, bool is_64bit) +{ + switch (comp_ctx->aux_stack_frame_type) { + case AOT_STACK_FRAME_TYPE_STANDARD: + return aot_standard_frame_gen_commit_ip(comp_ctx, func_ctx, + ip_value, is_64bit); + case AOT_STACK_FRAME_TYPE_TINY: + return aot_tiny_frame_gen_commit_ip(comp_ctx, func_ctx, ip_value); + default: + aot_set_last_error( + "unsupported mode when generating commit_ip code"); + return false; + } +} + bool aot_gen_commit_sp_ip(AOTCompFrame *frame, bool commit_sp, bool commit_ip) { @@ -962,6 +988,7 @@ static bool aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index) { AOTFuncContext *func_ctx = comp_ctx->func_ctxes[func_index]; + LLVMValueRef func_index_ref; uint8 *frame_ip = func_ctx->aot_func->code, opcode, *p_f32, *p_f64; uint8 *frame_ip_end = frame_ip + func_ctx->aot_func->code_size; uint8 *param_types = NULL; @@ -984,16 +1011,27 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index) LLVMMetadataRef location; #endif - if (comp_ctx->enable_aux_stack_frame) { + /* Start to translate the opcodes */ + LLVMPositionBuilderAtEnd( + comp_ctx->builder, + func_ctx->block_stack.block_list_head->llvm_entry_block); + + if (comp_ctx->aux_stack_frame_type + && comp_ctx->call_stack_features.frame_per_function) { + INT_CONST(func_index_ref, + func_index + comp_ctx->comp_data->import_func_count, I32_TYPE, + true); + if (!aot_alloc_frame_per_function_frame_for_aot_func(comp_ctx, func_ctx, + func_index_ref)) { + return false; + } + } + if (comp_ctx->aux_stack_frame_type) { if (!init_comp_frame(comp_ctx, func_ctx, func_index)) { return false; } } - /* Start to translate the opcodes */ - LLVMPositionBuilderAtEnd( - comp_ctx->builder, - func_ctx->block_stack.block_list_head->llvm_entry_block); while (frame_ip < frame_ip_end) { opcode = *frame_ip++; diff --git a/core/iwasm/compilation/aot_compiler.h b/core/iwasm/compilation/aot_compiler.h index d3d55b02..895d2416 100644 --- a/core/iwasm/compilation/aot_compiler.h +++ b/core/iwasm/compilation/aot_compiler.h @@ -661,6 +661,15 @@ set_local_gc_ref(AOTCompFrame *frame, int n, LLVMValueRef value, uint8 ref_type) #define F64_CONST(v) LLVMConstReal(F64_TYPE, v) #define I8_CONST(v) LLVMConstInt(INT8_TYPE, v, true) +#define INT_CONST(variable, value, type, is_signed) \ + do { \ + variable = LLVMConstInt(type, value, is_signed); \ + if (!variable) { \ + aot_set_last_error("llvm build const failed"); \ + return false; \ + } \ + } while (0) + #define LLVM_CONST(name) (comp_ctx->llvm_consts.name) #define I1_ZERO LLVM_CONST(i1_zero) #define I1_ONE LLVM_CONST(i1_one) diff --git a/core/iwasm/compilation/aot_emit_aot_file.c b/core/iwasm/compilation/aot_emit_aot_file.c index e05f83b0..20f29057 100644 --- a/core/iwasm/compilation/aot_emit_aot_file.c +++ b/core/iwasm/compilation/aot_emit_aot_file.c @@ -4433,6 +4433,12 @@ aot_obj_data_create(AOTCompContext *comp_ctx) if (comp_ctx->enable_gc) { obj_data->target_info.feature_flags |= WASM_FEATURE_GARBAGE_COLLECTION; } + if (comp_ctx->aux_stack_frame_type == AOT_STACK_FRAME_TYPE_TINY) { + obj_data->target_info.feature_flags |= WASM_FEATURE_TINY_STACK_FRAME; + } + if (comp_ctx->call_stack_features.frame_per_function) { + obj_data->target_info.feature_flags |= WASM_FEATURE_FRAME_PER_FUNCTION; + } bh_print_time("Begin to resolve object file info"); diff --git a/core/iwasm/compilation/aot_emit_control.c b/core/iwasm/compilation/aot_emit_control.c index 7d73d8d9..945f6395 100644 --- a/core/iwasm/compilation/aot_emit_control.c +++ b/core/iwasm/compilation/aot_emit_control.c @@ -6,6 +6,7 @@ #include "aot_emit_control.h" #include "aot_compiler.h" #include "aot_emit_exception.h" +#include "aot_stack_frame_comp.h" #if WASM_ENABLE_GC != 0 #include "aot_emit_gc.h" #endif @@ -38,13 +39,24 @@ format_block_name(char *name, uint32 name_size, uint32 block_index, snprintf(name, name_size, "%s", "func_end"); } -#define CREATE_BLOCK(new_llvm_block, name) \ - do { \ - if (!(new_llvm_block = LLVMAppendBasicBlockInContext( \ - comp_ctx->context, func_ctx->func, name))) { \ - aot_set_last_error("add LLVM basic block failed."); \ - goto fail; \ - } \ +#define CREATE_BLOCK(new_llvm_block, name) \ + do { \ + if (!(new_llvm_block = LLVMAppendBasicBlockInContext( \ + comp_ctx->context, func_ctx->func, name))) { \ + aot_set_last_error("add LLVM basic block failed."); \ + goto fail; \ + } \ + if (!strcmp(name, "func_end") && comp_ctx->aux_stack_frame_type \ + && comp_ctx->call_stack_features.frame_per_function) { \ + LLVMBasicBlockRef cur_block = \ + LLVMGetInsertBlock(comp_ctx->builder); \ + SET_BUILDER_POS(new_llvm_block); \ + if (!aot_free_frame_per_function_frame_for_aot_func(comp_ctx, \ + func_ctx)) { \ + goto fail; \ + } \ + SET_BUILDER_POS(cur_block); \ + } \ } while (0) #define CURR_BLOCK() LLVMGetInsertBlock(comp_ctx->builder) @@ -93,6 +105,11 @@ format_block_name(char *name, uint32 name_size, uint32 block_index, goto fail; \ } \ SET_BUILDER_POS(block->llvm_end_block); \ + LLVMValueRef first_instr = \ + get_first_non_phi(block->llvm_end_block); \ + if (first_instr) { \ + LLVMPositionBuilderBefore(comp_ctx->builder, first_instr); \ + } \ for (_i = 0; _i < block->result_count; _i++) { \ if (!(block->result_phis[_i] = LLVMBuildPhi( \ comp_ctx->builder, \ @@ -158,6 +175,18 @@ get_target_block(AOTFuncContext *func_ctx, uint32 br_depth) return block; } +LLVMValueRef +get_first_non_phi(LLVMBasicBlockRef block) +{ + LLVMValueRef instr = LLVMGetFirstInstruction(block); + + while (instr && LLVMIsAPHINode(instr)) { + instr = LLVMGetNextInstruction(instr); + } + + return instr; +} + static void clear_frame_locals(AOTCompFrame *aot_frame) { @@ -1361,6 +1390,13 @@ aot_compile_op_return(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, (*p_frame_ip - 1) - comp_ctx->comp_data->wasm_module->buf_code); #endif + if (comp_ctx->aux_stack_frame_type + && comp_ctx->call_stack_features.frame_per_function + && !aot_free_frame_per_function_frame_for_aot_func(comp_ctx, + func_ctx)) { + return false; + } + if (block_func->result_count) { /* Store extra result values to function parameters */ for (i = 0; i < block_func->result_count - 1; i++) { diff --git a/core/iwasm/compilation/aot_emit_function.c b/core/iwasm/compilation/aot_emit_function.c index 1d565b6c..fbef02e2 100644 --- a/core/iwasm/compilation/aot_emit_function.c +++ b/core/iwasm/compilation/aot_emit_function.c @@ -7,6 +7,7 @@ #include "aot_emit_exception.h" #include "aot_emit_control.h" #include "aot_emit_table.h" +#include "aot_stack_frame_comp.h" #include "../aot/aot_runtime.h" #if WASM_ENABLE_GC != 0 #include "aot_emit_gc.h" @@ -1403,6 +1404,7 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, LLVMValueRef *param_values = NULL, value_ret = NULL, func; LLVMValueRef import_func_idx, res; LLVMValueRef ext_ret, ext_ret_ptr, ext_ret_idx; + LLVMValueRef func_idx_ref; int32 i, j = 0, param_count, result_count, ext_ret_count; uint64 total_size; uint8 wasm_ret_type; @@ -1447,12 +1449,28 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, return false; } - if (comp_ctx->enable_aux_stack_frame) { #if WASM_ENABLE_AOT_STACK_FRAME != 0 - if (!alloc_frame_for_aot_func(comp_ctx, func_ctx, func_idx)) - return false; -#endif + if (comp_ctx->aux_stack_frame_type) { + if (func_idx < import_func_count + && comp_ctx->call_stack_features.frame_per_function) { + INT_CONST(func_idx_ref, func_idx, I32_TYPE, true); + if (!aot_alloc_frame_per_function_frame_for_aot_func( + comp_ctx, func_ctx, func_idx_ref)) { + return false; + } + } + else if (!comp_ctx->call_stack_features.frame_per_function) { + if (comp_ctx->aux_stack_frame_type + != AOT_STACK_FRAME_TYPE_STANDARD) { + aot_set_last_error("unsupported mode"); + return false; + } + if (!alloc_frame_for_aot_func(comp_ctx, func_ctx, func_idx)) { + return false; + } + } } +#endif /* Get param cell number */ param_cell_num = func_type->param_cell_num; @@ -1522,7 +1540,7 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, } if (func_idx < import_func_count) { - if (comp_ctx->enable_aux_stack_frame + if (comp_ctx->aux_stack_frame_type == AOT_STACK_FRAME_TYPE_STANDARD && !commit_params_to_frame_of_import_func( comp_ctx, func_ctx, func_type, param_values + 1)) { goto fail; @@ -1813,12 +1831,26 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, } } - if (comp_ctx->enable_aux_stack_frame) { #if WASM_ENABLE_AOT_STACK_FRAME != 0 - if (!free_frame_for_aot_func(comp_ctx, func_ctx)) - goto fail; -#endif + if (comp_ctx->aux_stack_frame_type) { + if (func_idx < import_func_count + && comp_ctx->call_stack_features.frame_per_function) { + if (!aot_free_frame_per_function_frame_for_aot_func(comp_ctx, + func_ctx)) { + goto fail; + } + } + else if (!comp_ctx->call_stack_features.frame_per_function) { + if (comp_ctx->aux_stack_frame_type + != AOT_STACK_FRAME_TYPE_STANDARD) { + aot_set_last_error("unsupported mode"); + } + if (!free_frame_for_aot_func(comp_ctx, func_ctx)) { + goto fail; + } + } } +#endif /* Insert suspend check point */ if (comp_ctx->enable_thread_mgr) { @@ -2439,7 +2471,8 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, goto fail; } - if (comp_ctx->enable_aux_stack_frame) { + if (comp_ctx->aux_stack_frame_type + && !comp_ctx->call_stack_features.frame_per_function) { #if WASM_ENABLE_AOT_STACK_FRAME != 0 /* TODO: use current frame instead of allocating new frame for WASM_OP_RETURN_CALL_INDIRECT */ @@ -2508,7 +2541,13 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, /* Translate call import block */ LLVMPositionBuilderAtEnd(comp_ctx->builder, block_call_import); - if (comp_ctx->enable_aux_stack_frame + if (comp_ctx->aot_frame && comp_ctx->call_stack_features.frame_per_function + && !aot_alloc_frame_per_function_frame_for_aot_func(comp_ctx, func_ctx, + func_idx)) { + goto fail; + } + + if (comp_ctx->aux_stack_frame_type == AOT_STACK_FRAME_TYPE_STANDARD && !commit_params_to_frame_of_import_func(comp_ctx, func_ctx, func_type, param_values + 1)) { goto fail; @@ -2545,6 +2584,12 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, && !check_call_return(comp_ctx, func_ctx, res)) goto fail; + if (comp_ctx->aot_frame && comp_ctx->call_stack_features.frame_per_function + && !aot_free_frame_per_function_frame_for_aot_func(comp_ctx, + func_ctx)) { + goto fail; + } + block_curr = LLVMGetInsertBlock(comp_ctx->builder); for (i = 0; i < func_result_count; i++) { LLVMAddIncoming(result_phis[i], &value_rets[i], &block_curr, 1); @@ -2629,7 +2674,8 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, PUSH(result_phis[i], func_type->types[func_param_count + i]); } - if (comp_ctx->enable_aux_stack_frame) { + if (comp_ctx->aux_stack_frame_type + && !comp_ctx->call_stack_features.frame_per_function) { #if WASM_ENABLE_AOT_STACK_FRAME != 0 if (!free_frame_for_aot_func(comp_ctx, func_ctx)) goto fail; @@ -2936,7 +2982,8 @@ aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, goto fail; } - if (comp_ctx->enable_aux_stack_frame) { + if (comp_ctx->aux_stack_frame_type + && !comp_ctx->call_stack_features.frame_per_function) { #if WASM_ENABLE_AOT_STACK_FRAME != 0 /* TODO: use current frame instead of allocating new frame for WASM_OP_RETURN_CALL_REF */ @@ -3005,7 +3052,7 @@ aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, /* Translate call import block */ LLVMPositionBuilderAtEnd(comp_ctx->builder, block_call_import); - if (comp_ctx->enable_aux_stack_frame + if (comp_ctx->aux_stack_frame_type == AOT_STACK_FRAME_TYPE_STANDARD && !commit_params_to_frame_of_import_func(comp_ctx, func_ctx, func_type, param_values + 1)) { goto fail; @@ -3133,7 +3180,8 @@ aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, PUSH(result_phis[i], func_type->types[func_param_count + i]); } - if (comp_ctx->enable_aux_stack_frame) { + if (comp_ctx->aux_stack_frame_type + && !comp_ctx->call_stack_features.frame_per_function) { #if WASM_ENABLE_AOT_STACK_FRAME != 0 if (!free_frame_for_aot_func(comp_ctx, func_ctx)) goto fail; diff --git a/core/iwasm/compilation/aot_llvm.c b/core/iwasm/compilation/aot_llvm.c index 3346086a..820a55e9 100644 --- a/core/iwasm/compilation/aot_llvm.c +++ b/core/iwasm/compilation/aot_llvm.c @@ -1771,7 +1771,7 @@ aot_create_func_context(const AOTCompData *comp_data, AOTCompContext *comp_ctx, goto fail; } - if (comp_ctx->enable_aux_stack_frame + if (comp_ctx->aux_stack_frame_type && !create_aux_stack_frame(comp_ctx, func_ctx)) { goto fail; } @@ -2577,9 +2577,7 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option) if (option->enable_ref_types) comp_ctx->enable_ref_types = true; - if (option->enable_aux_stack_frame) - comp_ctx->enable_aux_stack_frame = true; - + comp_ctx->aux_stack_frame_type = option->aux_stack_frame_type; comp_ctx->call_stack_features = option->call_stack_features; if (option->enable_perf_profiling) diff --git a/core/iwasm/compilation/aot_llvm.h b/core/iwasm/compilation/aot_llvm.h index 65debbaa..43212e50 100644 --- a/core/iwasm/compilation/aot_llvm.h +++ b/core/iwasm/compilation/aot_llvm.h @@ -410,7 +410,7 @@ typedef struct AOTCompContext { bool enable_aux_stack_check; /* Generate auxiliary stack frame */ - bool enable_aux_stack_frame; + AOTStackFrameType aux_stack_frame_type; /* Auxiliary call stack features */ AOTCallStackFeatures call_stack_features; diff --git a/core/iwasm/compilation/aot_stack_frame.h b/core/iwasm/compilation/aot_stack_frame.h new file mode 100644 index 00000000..6155ee6e --- /dev/null +++ b/core/iwasm/compilation/aot_stack_frame.h @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2024 Amazon Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef _AOT_STACK_FRAME_H_ +#define _AOT_STACK_FRAME_H_ + +#include "platform_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + /* The non-imported function index of current function */ + uint32 func_index; + + /* Instruction pointer: offset to the bytecode array */ + uint32 ip_offset; +} AOTTinyFrame; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/core/iwasm/compilation/aot_stack_frame_comp.c b/core/iwasm/compilation/aot_stack_frame_comp.c new file mode 100644 index 00000000..342dfe80 --- /dev/null +++ b/core/iwasm/compilation/aot_stack_frame_comp.c @@ -0,0 +1,148 @@ +/* + * Copyright (C) 2024 Amazon Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ +#include "aot_stack_frame_comp.h" +#include "aot_emit_exception.h" + +#define ADD_IN_BOUNDS_GEP(variable, type, pointer, indices, num_indices) \ + do { \ + if (!(variable = \ + LLVMBuildInBoundsGEP2(comp_ctx->builder, type, pointer, \ + indices, num_indices, #variable))) { \ + aot_set_last_error("llvm build in bounds gep failed"); \ + return false; \ + } \ + } while (0) + +#define ADD_STORE(value, pointer) \ + do { \ + if (!LLVMBuildStore(comp_ctx->builder, value, pointer)) { \ + aot_set_last_error("llvm build store failed"); \ + return false; \ + } \ + } while (0) + +#define ADD_LOAD(value, type, pointer) \ + do { \ + if (!(value = \ + LLVMBuildLoad2(comp_ctx->builder, type, pointer, #value))) { \ + aot_set_last_error("llvm build load failed"); \ + return false; \ + } \ + } while (0) + +static bool +aot_alloc_tiny_frame_for_aot_func(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMValueRef func_index) +{ + LLVMValueRef wasm_stack_top_ptr = func_ctx->wasm_stack_top_ptr, + wasm_stack_top_bound = func_ctx->wasm_stack_top_bound, + wasm_stack_top, cmp; + LLVMBasicBlockRef check_wasm_stack_succ; + LLVMValueRef offset; + + ADD_LOAD(wasm_stack_top, INT8_PTR_TYPE, wasm_stack_top_ptr); + + if (comp_ctx->call_stack_features.bounds_checks) { + if (!(check_wasm_stack_succ = LLVMAppendBasicBlockInContext( + comp_ctx->context, func_ctx->func, + "check_wasm_stack_succ"))) { + aot_set_last_error("llvm add basic block failed."); + return false; + } + + LLVMMoveBasicBlockAfter(check_wasm_stack_succ, + LLVMGetInsertBlock(comp_ctx->builder)); + + if (!(cmp = LLVMBuildICmp(comp_ctx->builder, LLVMIntUGE, wasm_stack_top, + wasm_stack_top_bound, "cmp"))) { + aot_set_last_error("llvm build icmp failed"); + return false; + } + + if (!(aot_emit_exception(comp_ctx, func_ctx, + EXCE_OPERAND_STACK_OVERFLOW, true, cmp, + check_wasm_stack_succ))) { + return false; + } + } + + /* Save the func_idx on the top of the stack */ + ADD_STORE(func_index, wasm_stack_top); + + /* increment the stack pointer */ + INT_CONST(offset, sizeof(AOTTinyFrame), I32_TYPE, true); + ADD_IN_BOUNDS_GEP(wasm_stack_top, INT8_TYPE, wasm_stack_top, &offset, 1); + ADD_STORE(wasm_stack_top, wasm_stack_top_ptr); + + return true; +} + +static bool +aot_free_tiny_frame_for_aot_func(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + LLVMValueRef wasm_stack_top_ptr = func_ctx->wasm_stack_top_ptr, + wasm_stack_top; + LLVMValueRef offset; + + ADD_LOAD(wasm_stack_top, INT8_PTR_TYPE, wasm_stack_top_ptr); + + INT_CONST(offset, -sizeof(AOTTinyFrame), + comp_ctx->pointer_size == 8 ? I64_TYPE : I32_TYPE, true); + ADD_IN_BOUNDS_GEP(wasm_stack_top, INT8_TYPE, wasm_stack_top, &offset, 1); + ADD_STORE(wasm_stack_top, wasm_stack_top_ptr); + + return true; +} + +bool +aot_tiny_frame_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, + LLVMValueRef ip_value) +{ + LLVMValueRef wasm_stack_top_ptr = func_ctx->wasm_stack_top_ptr, + wasm_stack_top; + LLVMValueRef offset, ip_addr; + + bh_assert(ip_value); + + ADD_LOAD(wasm_stack_top, INT8_PTR_TYPE, wasm_stack_top_ptr); + + INT_CONST(offset, -4, comp_ctx->pointer_size == 8 ? I64_TYPE : I32_TYPE, + true); + ADD_IN_BOUNDS_GEP(ip_addr, INT8_TYPE, wasm_stack_top, &offset, 1); + + ADD_STORE(ip_value, ip_addr); + + return true; +} + +bool +aot_alloc_frame_per_function_frame_for_aot_func(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMValueRef func_index) +{ + switch (comp_ctx->aux_stack_frame_type) { + case AOT_STACK_FRAME_TYPE_TINY: + return aot_alloc_tiny_frame_for_aot_func(comp_ctx, func_ctx, + func_index); + default: + aot_set_last_error("unsupported mode"); + return false; + } +} + +bool +aot_free_frame_per_function_frame_for_aot_func(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + switch (comp_ctx->aux_stack_frame_type) { + case AOT_STACK_FRAME_TYPE_TINY: + return aot_free_tiny_frame_for_aot_func(comp_ctx, func_ctx); + default: + aot_set_last_error("unsupported mode"); + return false; + } +} diff --git a/core/iwasm/compilation/aot_stack_frame_comp.h b/core/iwasm/compilation/aot_stack_frame_comp.h new file mode 100644 index 00000000..7980b8c0 --- /dev/null +++ b/core/iwasm/compilation/aot_stack_frame_comp.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2024 Amazon Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef _AOT_STACK_FRAME_COMP_H_ +#define _AOT_STACK_FRAME_COMP_H_ + +#include "aot_stack_frame.h" +#include "aot_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +aot_alloc_frame_per_function_frame_for_aot_func(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMValueRef func_index); + +bool +aot_free_frame_per_function_frame_for_aot_func(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +bool +aot_tiny_frame_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, + LLVMValueRef ip_value); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/core/iwasm/include/aot_comp_option.h b/core/iwasm/include/aot_comp_option.h index 4ab2e6ab..67ec81cd 100644 --- a/core/iwasm/include/aot_comp_option.h +++ b/core/iwasm/include/aot_comp_option.h @@ -21,8 +21,24 @@ typedef struct { /* Enables or disables parameters, locals and stack operands. */ bool values; + + /* If enabled, stack frame is generated at the beginning of each + * function (frame-per-function mode). Otherwise, stack frame is + * generated before each call of a function (frame-per-call mode). */ + bool frame_per_function; } AOTCallStackFeatures; +void +aot_call_stack_features_init_default(AOTCallStackFeatures *features); + +typedef enum { + AOT_STACK_FRAME_OFF = 0, + /* Use a small stack frame data structure (AOTTinyFrame) */ + AOT_STACK_FRAME_TYPE_TINY, + /* Use a regular stack frame data structure (AOTFrame) */ + AOT_STACK_FRAME_TYPE_STANDARD, +} AOTStackFrameType; + typedef struct AOTCompOption { bool is_jit_mode; bool is_indirect_mode; @@ -38,7 +54,7 @@ typedef struct AOTCompOption { bool enable_ref_types; bool enable_gc; bool enable_aux_stack_check; - bool enable_aux_stack_frame; + AOTStackFrameType aux_stack_frame_type; AOTCallStackFeatures call_stack_features; bool enable_perf_profiling; bool enable_memory_profiling; diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 092e0d15..ed85bb78 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -5406,8 +5406,8 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf, option.enable_aux_stack_check = true; #if WASM_ENABLE_PERF_PROFILING != 0 || WASM_ENABLE_DUMP_CALL_STACK != 0 \ || WASM_ENABLE_AOT_STACK_FRAME != 0 - option.enable_aux_stack_frame = true; - memset(&option.call_stack_features, 1, sizeof(AOTCallStackFeatures)); + option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD; + aot_call_stack_features_init_default(&option.call_stack_features); #endif #if WASM_ENABLE_PERF_PROFILING != 0 option.enable_perf_profiling = true; diff --git a/core/iwasm/interpreter/wasm_mini_loader.c b/core/iwasm/interpreter/wasm_mini_loader.c index a21f4490..34f4a183 100644 --- a/core/iwasm/interpreter/wasm_mini_loader.c +++ b/core/iwasm/interpreter/wasm_mini_loader.c @@ -2148,8 +2148,8 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf, option.enable_aux_stack_check = true; #if WASM_ENABLE_PERF_PROFILING != 0 || WASM_ENABLE_DUMP_CALL_STACK != 0 \ || WASM_ENABLE_AOT_STACK_FRAME != 0 - option.enable_aux_stack_frame = true; - memset(&option.call_stack_features, 1, sizeof(AOTCallStackFeatures)); + option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD; + aot_call_stack_features_init_default(&option.call_stack_features); #endif #if WASM_ENABLE_PERF_PROFILING != 0 option.enable_perf_profiling = true; diff --git a/wamr-compiler/main.c b/wamr-compiler/main.c index 3c7ef1f4..53c75c84 100644 --- a/wamr-compiler/main.c +++ b/wamr-compiler/main.c @@ -307,6 +307,13 @@ finish: return ret; } +static bool +can_enable_tiny_frame(const AOTCompOption *opt) +{ + return !opt->call_stack_features.values && !opt->enable_gc + && !opt->enable_perf_profiling; +} + static uint32 resolve_segue_flags(char *str_flags) { @@ -403,9 +410,7 @@ main(int argc, char *argv[]) option.enable_bulk_memory = true; option.enable_ref_types = true; option.enable_gc = false; - - /* Set all the features to true by default */ - memset(&option.call_stack_features, 1, sizeof(AOTCallStackFeatures)); + aot_call_stack_features_init_default(&option.call_stack_features); /* Process options */ for (argc--, argv++; argc > 0 && argv[0][0] == '-'; argc--, argv++) { @@ -519,7 +524,7 @@ main(int argc, char *argv[]) option.enable_aux_stack_check = false; } else if (!strcmp(argv[0], "--enable-dump-call-stack")) { - option.enable_aux_stack_frame = true; + option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD; } else if (!strncmp(argv[0], "--call-stack-features=", 22)) { /* Reset all the features, only enable the user-defined ones */ @@ -535,7 +540,7 @@ main(int argc, char *argv[]) } } else if (!strcmp(argv[0], "--enable-perf-profiling")) { - option.enable_aux_stack_frame = true; + option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD; option.enable_perf_profiling = true; } else if (!strcmp(argv[0], "--enable-memory-profiling")) { @@ -550,7 +555,7 @@ main(int argc, char *argv[]) option.is_indirect_mode = true; } else if (!strcmp(argv[0], "--enable-gc")) { - option.enable_aux_stack_frame = true; + option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD; option.enable_gc = true; } else if (!strcmp(argv[0], "--disable-llvm-intrinsics")) { @@ -652,6 +657,14 @@ main(int argc, char *argv[]) if (!use_dummy_wasm && (argc == 0 || !out_file_name)) PRINT_HELP_AND_EXIT(); + if (option.aux_stack_frame_type == AOT_STACK_FRAME_TYPE_STANDARD + && can_enable_tiny_frame(&option)) { + LOG_VERBOSE("Use tiny frame mode for stack frames"); + option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_TINY; + /* for now we only enable frame per function for a TINY frame mode */ + option.call_stack_features.frame_per_function = true; + } + if (!size_level_set) { /** * Set opt level to 1 by default for Windows and MacOS as From f453d9d5ce2cb58080154ecbbb4642409beba056 Mon Sep 17 00:00:00 2001 From: YAMAMOTO Takashi Date: Tue, 10 Sep 2024 10:42:23 +0900 Subject: [PATCH 18/24] Appease GCC strict prototypes warning (#3775) --- core/iwasm/aot/aot_reloc.h | 2 +- core/iwasm/aot/aot_runtime.h | 2 +- core/iwasm/aot/arch/aot_reloc_aarch64.c | 2 +- core/iwasm/aot/arch/aot_reloc_arm.c | 194 +++++++++--------- core/iwasm/aot/arch/aot_reloc_mips.c | 2 +- core/iwasm/aot/arch/aot_reloc_riscv.c | 106 +++++----- core/iwasm/aot/arch/aot_reloc_thumb.c | 194 +++++++++--------- core/iwasm/aot/arch/aot_reloc_x86_64.c | 2 +- core/iwasm/aot/arch/aot_reloc_xtensa.c | 68 +++--- core/iwasm/aot/debug/jit_debug.c | 10 +- core/iwasm/aot/debug/jit_debug.h | 4 +- core/iwasm/common/wasm_memory.c | 4 +- core/iwasm/common/wasm_memory.h | 4 +- core/iwasm/common/wasm_native.c | 4 +- core/iwasm/common/wasm_native.h | 4 +- core/iwasm/common/wasm_runtime_common.c | 18 +- core/iwasm/common/wasm_runtime_common.h | 8 +- core/iwasm/common/wasm_shared_memory.h | 4 +- core/iwasm/compilation/aot.h | 2 +- core/iwasm/interpreter/wasm_interp_fast.c | 2 +- core/iwasm/interpreter/wasm_loader.c | 2 +- core/iwasm/interpreter/wasm_mini_loader.c | 2 +- .../lib_wasi_threads_wrapper.c | 2 +- .../libraries/thread-mgr/thread_manager.h | 8 +- .../platform/include/platform_api_extension.h | 10 +- .../shared/platform/nuttx/platform_internal.h | 2 +- product-mini/platforms/common/libc_wasi.c | 2 +- product-mini/platforms/nuttx/CMakeLists.txt | 2 +- product-mini/platforms/nuttx/wamr.mk | 2 +- product-mini/platforms/posix/main.c | 2 +- 30 files changed, 335 insertions(+), 335 deletions(-) diff --git a/core/iwasm/aot/aot_reloc.h b/core/iwasm/aot/aot_reloc.h index 8ead3cd9..f7ada4d8 100644 --- a/core/iwasm/aot/aot_reloc.h +++ b/core/iwasm/aot/aot_reloc.h @@ -226,7 +226,7 @@ SymbolMap * get_target_symbol_map(uint32 *sym_num); uint32 -get_plt_table_size(); +get_plt_table_size(void); void init_plt_table(uint8 *plt); diff --git a/core/iwasm/aot/aot_runtime.h b/core/iwasm/aot/aot_runtime.h index f6bff00b..3ff0e0e3 100644 --- a/core/iwasm/aot/aot_runtime.h +++ b/core/iwasm/aot/aot_runtime.h @@ -648,7 +648,7 @@ aot_check_app_addr_and_convert(AOTModuleInstance *module_inst, bool is_str, void **p_native_addr); uint32 -aot_get_plt_table_size(); +aot_get_plt_table_size(void); void * aot_memmove(void *dest, const void *src, size_t n); diff --git a/core/iwasm/aot/arch/aot_reloc_aarch64.c b/core/iwasm/aot/arch/aot_reloc_aarch64.c index ec646b4e..26815334 100644 --- a/core/iwasm/aot/arch/aot_reloc_aarch64.c +++ b/core/iwasm/aot/arch/aot_reloc_aarch64.c @@ -79,7 +79,7 @@ get_current_target(char *target_buf, uint32 target_buf_size) } static uint32 -get_plt_item_size() +get_plt_item_size(void) { /* 6*4 bytes instructions and 8 bytes symbol address */ return 32; diff --git a/core/iwasm/aot/arch/aot_reloc_arm.c b/core/iwasm/aot/arch/aot_reloc_arm.c index bb492bfe..0be17ef4 100644 --- a/core/iwasm/aot/arch/aot_reloc_arm.c +++ b/core/iwasm/aot/arch/aot_reloc_arm.c @@ -12,102 +12,102 @@ #define R_ARM_MOVT_ABS 44 /* clang-format off */ -void __adddf3(); -void __addsf3(); -void __aeabi_d2f(); -void __aeabi_d2iz(); -void __aeabi_d2lz(); -void __aeabi_d2uiz(); -void __aeabi_d2ulz(); -void __aeabi_dadd(); -void __aeabi_dcmpeq(); -void __aeabi_dcmpge(); -void __aeabi_dcmpgt(); -void __aeabi_dcmple(); -void __aeabi_dcmplt(); -void __aeabi_dcmpun(); -void __aeabi_ddiv(); -void __aeabi_dmul(); -void __aeabi_dsub(); -void __aeabi_f2d(); -void __aeabi_f2iz(); -void __aeabi_f2lz(); -void __aeabi_f2ulz(); -void __aeabi_fadd(); -void __aeabi_fcmpeq(); -void __aeabi_fcmpge(); -void __aeabi_fcmpgt(); -void __aeabi_fcmple(); -void __aeabi_fcmplt(); -void __aeabi_fcmpun(); -void __aeabi_fdiv(); -void __aeabi_fmul(); -void __aeabi_fsub(); -void __aeabi_i2d(); -void __aeabi_i2f(); -void __aeabi_idiv(); -void __aeabi_idivmod(); -void __aeabi_l2d(); -void __aeabi_l2f(); -void __aeabi_ldivmod(); -void __aeabi_memclr(); -void __aeabi_memcpy(); -void __aeabi_memmove(); -void __aeabi_memset(); -void __aeabi_ui2d(); -void __aeabi_ui2f(); -void __aeabi_uidiv(); -void __aeabi_uidivmod(); -void __aeabi_ul2d(); -void __aeabi_ul2f(); -void __aeabi_uldivmod(); -void __clzsi2(); -void __divdf3(); -void __divdi3(); -void __divsf3(); -void __divsi3(); -void __eqdf2(); -void __eqsf2(); -void __extendsfdf2(); -void __fixdfdi(); -void __fixdfsi(); -void __fixsfdi(); -void __fixsfsi(); -void __fixunsdfdi(); -void __fixunsdfsi(); -void __fixunssfdi(); -void __floatdidf(); -void __floatdisf(); -void __floatsidf(); -void __floatsisf(); -void __floatundidf(); -void __floatundisf(); -void __floatunsidf(); -void __floatunsisf(); -void __gedf2(); -void __gesf2(); -void __gtdf2(); -void __gtsf2(); -void __ledf2(); -void __lesf2(); -void __ltdf2(); -void __ltsf2(); -void __moddi3(); -void __modsi3(); -void __muldf3(); -void __mulsf3(); -void __nedf2(); -void __nesf2(); -void __subdf3(); -void __subsf3(); -void __truncdfsf2(); -void __udivdi3(); -void __udivmoddi4(); -void __udivsi3(); -void __umoddi3(); -void __umodsi3(); -void __unorddf2(); -void __unordsf2(); +void __adddf3(void); +void __addsf3(void); +void __aeabi_d2f(void); +void __aeabi_d2iz(void); +void __aeabi_d2lz(void); +void __aeabi_d2uiz(void); +void __aeabi_d2ulz(void); +void __aeabi_dadd(void); +void __aeabi_dcmpeq(void); +void __aeabi_dcmpge(void); +void __aeabi_dcmpgt(void); +void __aeabi_dcmple(void); +void __aeabi_dcmplt(void); +void __aeabi_dcmpun(void); +void __aeabi_ddiv(void); +void __aeabi_dmul(void); +void __aeabi_dsub(void); +void __aeabi_f2d(void); +void __aeabi_f2iz(void); +void __aeabi_f2lz(void); +void __aeabi_f2ulz(void); +void __aeabi_fadd(void); +void __aeabi_fcmpeq(void); +void __aeabi_fcmpge(void); +void __aeabi_fcmpgt(void); +void __aeabi_fcmple(void); +void __aeabi_fcmplt(void); +void __aeabi_fcmpun(void); +void __aeabi_fdiv(void); +void __aeabi_fmul(void); +void __aeabi_fsub(void); +void __aeabi_i2d(void); +void __aeabi_i2f(void); +void __aeabi_idiv(void); +void __aeabi_idivmod(void); +void __aeabi_l2d(void); +void __aeabi_l2f(void); +void __aeabi_ldivmod(void); +void __aeabi_memclr(void); +void __aeabi_memcpy(void); +void __aeabi_memmove(void); +void __aeabi_memset(void); +void __aeabi_ui2d(void); +void __aeabi_ui2f(void); +void __aeabi_uidiv(void); +void __aeabi_uidivmod(void); +void __aeabi_ul2d(void); +void __aeabi_ul2f(void); +void __aeabi_uldivmod(void); +void __clzsi2(void); +void __divdf3(void); +void __divdi3(void); +void __divsf3(void); +void __divsi3(void); +void __eqdf2(void); +void __eqsf2(void); +void __extendsfdf2(void); +void __fixdfdi(void); +void __fixdfsi(void); +void __fixsfdi(void); +void __fixsfsi(void); +void __fixunsdfdi(void); +void __fixunsdfsi(void); +void __fixunssfdi(void); +void __floatdidf(void); +void __floatdisf(void); +void __floatsidf(void); +void __floatsisf(void); +void __floatundidf(void); +void __floatundisf(void); +void __floatunsidf(void); +void __floatunsisf(void); +void __gedf2(void); +void __gesf2(void); +void __gtdf2(void); +void __gtsf2(void); +void __ledf2(void); +void __lesf2(void); +void __ltdf2(void); +void __ltsf2(void); +void __moddi3(void); +void __modsi3(void); +void __muldf3(void); +void __mulsf3(void); +void __nedf2(void); +void __nesf2(void); +void __subdf3(void); +void __subsf3(void); +void __truncdfsf2(void); +void __udivdi3(void); +void __udivmoddi4(void); +void __udivsi3(void); +void __umoddi3(void); +void __umodsi3(void); +void __unorddf2(void); +void __unordsf2(void); /* clang-format on */ static SymbolMap target_sym_map[] = { @@ -255,7 +255,7 @@ get_current_target(char *target_buf, uint32 target_buf_size) #undef BUILD_TARGET_ARM_DEFAULT uint32 -get_plt_item_size() +get_plt_item_size(void) { /* 8 bytes instructions and 4 bytes symbol address */ return 12; diff --git a/core/iwasm/aot/arch/aot_reloc_mips.c b/core/iwasm/aot/arch/aot_reloc_mips.c index f9f06a05..4b856119 100644 --- a/core/iwasm/aot/arch/aot_reloc_mips.c +++ b/core/iwasm/aot/arch/aot_reloc_mips.c @@ -28,7 +28,7 @@ get_current_target(char *target_buf, uint32 target_buf_size) } static uint32 -get_plt_item_size() +get_plt_item_size(void) { return 0; } diff --git a/core/iwasm/aot/arch/aot_reloc_riscv.c b/core/iwasm/aot/arch/aot_reloc_riscv.c index b87bb200..058ad0e1 100644 --- a/core/iwasm/aot/arch/aot_reloc_riscv.c +++ b/core/iwasm/aot/arch/aot_reloc_riscv.c @@ -49,58 +49,58 @@ #endif /* clang-format off */ -void __adddf3(); -void __addsf3(); -void __divdf3(); -void __divdi3(); -void __divsf3(); -void __divsi3(); -void __eqdf2(); -void __eqsf2(); -void __extendsfdf2(); -void __fixdfdi(); -void __fixdfsi(); -void __fixsfdi(); -void __fixsfsi(); -void __fixunsdfdi(); -void __fixunsdfsi(); -void __fixunssfdi(); -void __fixunssfsi(); -void __floatdidf(); -void __floatdisf(); -void __floatsidf(); -void __floatsisf(); -void __floatundidf(); -void __floatundisf(); -void __floatunsidf(); -void __floatunsisf(); -void __gedf2(); -void __gesf2(); -void __gtdf2(); -void __gtsf2(); -void __ledf2(); -void __lesf2(); -void __ltdf2(); -void __ltsf2(); -void __moddi3(); -void __modsi3(); -void __muldf3(); -void __muldi3(); -void __mulsf3(); -void __mulsi3(); -void __nedf2(); -void __negdf2(); -void __negsf2(); -void __nesf2(); -void __subdf3(); -void __subsf3(); -void __truncdfsf2(); -void __udivdi3(); -void __udivsi3(); -void __umoddi3(); -void __umodsi3(); -void __unorddf2(); -void __unordsf2(); +void __adddf3(void); +void __addsf3(void); +void __divdf3(void); +void __divdi3(void); +void __divsf3(void); +void __divsi3(void); +void __eqdf2(void); +void __eqsf2(void); +void __extendsfdf2(void); +void __fixdfdi(void); +void __fixdfsi(void); +void __fixsfdi(void); +void __fixsfsi(void); +void __fixunsdfdi(void); +void __fixunsdfsi(void); +void __fixunssfdi(void); +void __fixunssfsi(void); +void __floatdidf(void); +void __floatdisf(void); +void __floatsidf(void); +void __floatsisf(void); +void __floatundidf(void); +void __floatundisf(void); +void __floatunsidf(void); +void __floatunsisf(void); +void __gedf2(void); +void __gesf2(void); +void __gtdf2(void); +void __gtsf2(void); +void __ledf2(void); +void __lesf2(void); +void __ltdf2(void); +void __ltsf2(void); +void __moddi3(void); +void __modsi3(void); +void __muldf3(void); +void __muldi3(void); +void __mulsf3(void); +void __mulsi3(void); +void __nedf2(void); +void __negdf2(void); +void __negsf2(void); +void __nesf2(void); +void __subdf3(void); +void __subsf3(void); +void __truncdfsf2(void); +void __udivdi3(void); +void __udivsi3(void); +void __umoddi3(void); +void __umodsi3(void); +void __unorddf2(void); +void __unordsf2(void); /* clang-format on */ static SymbolMap target_sym_map[] = { @@ -193,7 +193,7 @@ get_current_target(char *target_buf, uint32 target_buf_size) } uint32 -get_plt_item_size() +get_plt_item_size(void) { #if __riscv_xlen == 64 /* auipc + ld + jalr + nop + addr */ diff --git a/core/iwasm/aot/arch/aot_reloc_thumb.c b/core/iwasm/aot/arch/aot_reloc_thumb.c index f90507de..c0957a42 100644 --- a/core/iwasm/aot/arch/aot_reloc_thumb.c +++ b/core/iwasm/aot/arch/aot_reloc_thumb.c @@ -14,102 +14,102 @@ #define R_ARM_THM_MOVT_PREL 50 /* clang-format off */ -void __adddf3(); -void __addsf3(); -void __aeabi_d2f(); -void __aeabi_d2iz(); -void __aeabi_d2lz(); -void __aeabi_d2uiz(); -void __aeabi_d2ulz(); -void __aeabi_dadd(); -void __aeabi_dcmpeq(); -void __aeabi_dcmpge(); -void __aeabi_dcmpgt(); -void __aeabi_dcmple(); -void __aeabi_dcmplt(); -void __aeabi_dcmpun(); -void __aeabi_ddiv(); -void __aeabi_dmul(); -void __aeabi_dsub(); -void __aeabi_f2d(); -void __aeabi_f2iz(); -void __aeabi_f2lz(); -void __aeabi_f2ulz(); -void __aeabi_fadd(); -void __aeabi_fcmpeq(); -void __aeabi_fcmpge(); -void __aeabi_fcmpgt(); -void __aeabi_fcmple(); -void __aeabi_fcmplt(); -void __aeabi_fcmpun(); -void __aeabi_fdiv(); -void __aeabi_fmul(); -void __aeabi_fsub(); -void __aeabi_i2d(); -void __aeabi_i2f(); -void __aeabi_idiv(); -void __aeabi_idivmod(); -void __aeabi_l2d(); -void __aeabi_l2f(); -void __aeabi_ldivmod(); -void __aeabi_llsl(); -void __aeabi_llsr(); -void __aeabi_lmul(); -void __aeabi_ui2d(); -void __aeabi_ui2f(); -void __aeabi_uidiv(); -void __aeabi_uidivmod(); -void __aeabi_ul2d(); -void __aeabi_ul2f(); -void __aeabi_uldivmod(); -void __ashldi3(); -void __clzsi2(); -void __divdf3(); -void __divdi3(); -void __divsi3(); -void __eqdf2(); -void __eqsf2(); -void __extendsfdf2(); -void __fixdfdi(); -void __fixdfsi(); -void __fixsfdi(); -void __fixunsdfdi(); -void __fixunsdfsi(); -void __fixunssfdi(); -void __floatdidf(); -void __floatdisf(); -void __floatsidf(); -void __floatsisf(); -void __floatundidf(); -void __floatundisf(); -void __floatunsidf(); -void __floatunsisf(); -void __gedf2(); -void __gesf2(); -void __gtdf2(); -void __gtsf2(); -void __ledf2(); -void __lesf2(); -void __lshrdi3(); -void __ltdf2(); -void __ltsf2(); -void __moddi3(); -void __modsi3(); -void __muldf3(); -void __muldi3(); -void __mulsf3(); -void __nedf2(); -void __nesf2(); -void __subdf3(); -void __subsf3(); -void __truncdfsf2(); -void __udivdi3(); -void __udivmoddi4(); -void __udivsi3(); -void __umoddi3(); -void __umodsi3(); -void __unorddf2(); -void __unordsf2(); +void __adddf3(void); +void __addsf3(void); +void __aeabi_d2f(void); +void __aeabi_d2iz(void); +void __aeabi_d2lz(void); +void __aeabi_d2uiz(void); +void __aeabi_d2ulz(void); +void __aeabi_dadd(void); +void __aeabi_dcmpeq(void); +void __aeabi_dcmpge(void); +void __aeabi_dcmpgt(void); +void __aeabi_dcmple(void); +void __aeabi_dcmplt(void); +void __aeabi_dcmpun(void); +void __aeabi_ddiv(void); +void __aeabi_dmul(void); +void __aeabi_dsub(void); +void __aeabi_f2d(void); +void __aeabi_f2iz(void); +void __aeabi_f2lz(void); +void __aeabi_f2ulz(void); +void __aeabi_fadd(void); +void __aeabi_fcmpeq(void); +void __aeabi_fcmpge(void); +void __aeabi_fcmpgt(void); +void __aeabi_fcmple(void); +void __aeabi_fcmplt(void); +void __aeabi_fcmpun(void); +void __aeabi_fdiv(void); +void __aeabi_fmul(void); +void __aeabi_fsub(void); +void __aeabi_i2d(void); +void __aeabi_i2f(void); +void __aeabi_idiv(void); +void __aeabi_idivmod(void); +void __aeabi_l2d(void); +void __aeabi_l2f(void); +void __aeabi_ldivmod(void); +void __aeabi_llsl(void); +void __aeabi_llsr(void); +void __aeabi_lmul(void); +void __aeabi_ui2d(void); +void __aeabi_ui2f(void); +void __aeabi_uidiv(void); +void __aeabi_uidivmod(void); +void __aeabi_ul2d(void); +void __aeabi_ul2f(void); +void __aeabi_uldivmod(void); +void __ashldi3(void); +void __clzsi2(void); +void __divdf3(void); +void __divdi3(void); +void __divsi3(void); +void __eqdf2(void); +void __eqsf2(void); +void __extendsfdf2(void); +void __fixdfdi(void); +void __fixdfsi(void); +void __fixsfdi(void); +void __fixunsdfdi(void); +void __fixunsdfsi(void); +void __fixunssfdi(void); +void __floatdidf(void); +void __floatdisf(void); +void __floatsidf(void); +void __floatsisf(void); +void __floatundidf(void); +void __floatundisf(void); +void __floatunsidf(void); +void __floatunsisf(void); +void __gedf2(void); +void __gesf2(void); +void __gtdf2(void); +void __gtsf2(void); +void __ledf2(void); +void __lesf2(void); +void __lshrdi3(void); +void __ltdf2(void); +void __ltsf2(void); +void __moddi3(void); +void __modsi3(void); +void __muldf3(void); +void __muldi3(void); +void __mulsf3(void); +void __nedf2(void); +void __nesf2(void); +void __subdf3(void); +void __subsf3(void); +void __truncdfsf2(void); +void __udivdi3(void); +void __udivmoddi4(void); +void __udivsi3(void); +void __umoddi3(void); +void __umodsi3(void); +void __unorddf2(void); +void __unordsf2(void); /* clang-format on */ static SymbolMap target_sym_map[] = { @@ -259,7 +259,7 @@ get_current_target(char *target_buf, uint32 target_buf_size) #undef BUILD_TARGET_THUMB_V4T uint32 -get_plt_item_size() +get_plt_item_size(void) { /* 16 bytes instructions and 4 bytes symbol address */ return 20; diff --git a/core/iwasm/aot/arch/aot_reloc_x86_64.c b/core/iwasm/aot/arch/aot_reloc_x86_64.c index d1f5cb5a..fe18d79c 100644 --- a/core/iwasm/aot/arch/aot_reloc_x86_64.c +++ b/core/iwasm/aot/arch/aot_reloc_x86_64.c @@ -58,7 +58,7 @@ get_current_target(char *target_buf, uint32 target_buf_size) } static uint32 -get_plt_item_size() +get_plt_item_size(void) { /* size of mov instruction and jmp instruction */ return 12; diff --git a/core/iwasm/aot/arch/aot_reloc_xtensa.c b/core/iwasm/aot/arch/aot_reloc_xtensa.c index a29c9f2b..fca1b80d 100644 --- a/core/iwasm/aot/arch/aot_reloc_xtensa.c +++ b/core/iwasm/aot/arch/aot_reloc_xtensa.c @@ -10,44 +10,44 @@ /* clang-format off */ /* for soft-float */ -void __floatsidf(); -void __divdf3(); -void __ltdf2(); +void __floatsidf(void); +void __divdf3(void); +void __ltdf2(void); /* for mul32 */ -void __mulsi3(); -void __muldi3(); +void __mulsi3(void); +void __muldi3(void); -void __modsi3(); +void __modsi3(void); -void __divdi3(); +void __divdi3(void); -void __udivdi3(); -void __unorddf2(); -void __adddf3(); -void __eqdf2(); -void __muldf3(); -void __gedf2(); -void __ledf2(); -void __fixunsdfsi(); -void __floatunsidf(); -void __subdf3(); -void __nedf2(); -void __fixdfsi(); -void __moddi3(); -void __extendsfdf2(); -void __truncdfsf2(); -void __gtdf2(); -void __umoddi3(); -void __floatdidf(); -void __divsf3(); -void __fixdfdi(); -void __floatundidf(); -void __fixsfdi(); -void __fixunssfdi(); -void __fixunsdfdi(); -void __floatdisf(); -void __floatundisf(); +void __udivdi3(void); +void __unorddf2(void); +void __adddf3(void); +void __eqdf2(void); +void __muldf3(void); +void __gedf2(void); +void __ledf2(void); +void __fixunsdfsi(void); +void __floatunsidf(void); +void __subdf3(void); +void __nedf2(void); +void __fixdfsi(void); +void __moddi3(void); +void __extendsfdf2(void); +void __truncdfsf2(void); +void __gtdf2(void); +void __umoddi3(void); +void __floatdidf(void); +void __divsf3(void); +void __fixdfdi(void); +void __floatundidf(void); +void __fixsfdi(void); +void __fixunssfdi(void); +void __fixunsdfdi(void); +void __floatdisf(void); +void __floatundisf(void); static SymbolMap target_sym_map[] = { @@ -119,7 +119,7 @@ get_current_target(char *target_buf, uint32 target_buf_size) } static uint32 -get_plt_item_size() +get_plt_item_size(void) { return 0; } diff --git a/core/iwasm/aot/debug/jit_debug.c b/core/iwasm/aot/debug/jit_debug.c index 261c2054..9f92dd39 100644 --- a/core/iwasm/aot/debug/jit_debug.c +++ b/core/iwasm/aot/debug/jit_debug.c @@ -69,10 +69,10 @@ typedef struct JITDescriptor { * and inline assembler statement inside. */ void attribute_noinline -__jit_debug_register_code(); +__jit_debug_register_code(void); void attribute_noinline -__jit_debug_register_code() +__jit_debug_register_code(void) { int x; *(char *)&x = '\0'; @@ -96,7 +96,7 @@ extern JITDescriptor __jit_debug_descriptor; * This gives the debugger an easy way to inject custom code to * handle the events. */ -void (*__jit_debug_register_code_ptr)() = __jit_debug_register_code; +void (*__jit_debug_register_code_ptr)(void) = __jit_debug_register_code; #ifdef __cplusplus } @@ -171,7 +171,7 @@ DestroyJITCodeEntryInternal(JITCodeEntry *entry) } bool -jit_debug_engine_init() +jit_debug_engine_init(void) { if (jit_debug_engine) { return true; @@ -194,7 +194,7 @@ jit_debug_engine_init() } void -jit_debug_engine_destroy() +jit_debug_engine_destroy(void) { if (jit_debug_engine) { WASMJITEntryNode *node, *node_next; diff --git a/core/iwasm/aot/debug/jit_debug.h b/core/iwasm/aot/debug/jit_debug.h index 5e3e3651..813c8b78 100644 --- a/core/iwasm/aot/debug/jit_debug.h +++ b/core/iwasm/aot/debug/jit_debug.h @@ -11,10 +11,10 @@ extern "C" { #endif bool -jit_debug_engine_init(); +jit_debug_engine_init(void); void -jit_debug_engine_destroy(); +jit_debug_engine_destroy(void); bool jit_code_entry_create(const uint8 *symfile_addr, uint64 symfile_size); diff --git a/core/iwasm/common/wasm_memory.c b/core/iwasm/common/wasm_memory.c index 71d33754..82eebbf3 100644 --- a/core/iwasm/common/wasm_memory.c +++ b/core/iwasm/common/wasm_memory.c @@ -159,7 +159,7 @@ wasm_runtime_memory_init(mem_alloc_type_t mem_alloc_type, } void -wasm_runtime_memory_destroy() +wasm_runtime_memory_destroy(void) { if (memory_mode == MEMORY_MODE_POOL) { #if BH_ENABLE_GC_VERIFY == 0 @@ -176,7 +176,7 @@ wasm_runtime_memory_destroy() } unsigned -wasm_runtime_memory_pool_size() +wasm_runtime_memory_pool_size(void) { if (memory_mode == MEMORY_MODE_POOL) return global_pool_size; diff --git a/core/iwasm/common/wasm_memory.h b/core/iwasm/common/wasm_memory.h index a5dfefae..2f20d3f6 100644 --- a/core/iwasm/common/wasm_memory.h +++ b/core/iwasm/common/wasm_memory.h @@ -46,10 +46,10 @@ wasm_runtime_memory_init(mem_alloc_type_t mem_alloc_type, const MemAllocOption *alloc_option); void -wasm_runtime_memory_destroy(); +wasm_runtime_memory_destroy(void); unsigned -wasm_runtime_memory_pool_size(); +wasm_runtime_memory_pool_size(void); void wasm_runtime_set_mem_bound_check_bytes(WASMMemoryInstance *memory, diff --git a/core/iwasm/common/wasm_native.c b/core/iwasm/common/wasm_native.c index 9e8764a2..0ff3053f 100644 --- a/core/iwasm/common/wasm_native.c +++ b/core/iwasm/common/wasm_native.c @@ -469,7 +469,7 @@ wasi_context_dtor(WASMModuleInstanceCommon *inst, void *ctx) #if WASM_ENABLE_QUICK_AOT_ENTRY != 0 static bool -quick_aot_entry_init(); +quick_aot_entry_init(void); #endif bool @@ -1461,7 +1461,7 @@ quick_aot_entry_cmp(const void *quick_aot_entry1, const void *quick_aot_entry2) } static bool -quick_aot_entry_init() +quick_aot_entry_init(void) { qsort(quick_aot_entries, sizeof(quick_aot_entries) / sizeof(QuickAOTEntry), sizeof(QuickAOTEntry), quick_aot_entry_cmp); diff --git a/core/iwasm/common/wasm_native.h b/core/iwasm/common/wasm_native.h index 5cb78bf9..9a6afee1 100644 --- a/core/iwasm/common/wasm_native.h +++ b/core/iwasm/common/wasm_native.h @@ -100,10 +100,10 @@ wasm_native_inherit_contexts(struct WASMModuleInstanceCommon *child, #endif /* WASM_ENABLE_MODULE_INST_CONTEXT */ bool -wasm_native_init(); +wasm_native_init(void); void -wasm_native_destroy(); +wasm_native_destroy(void); #if WASM_ENABLE_QUICK_AOT_ENTRY != 0 void * diff --git a/core/iwasm/common/wasm_runtime_common.c b/core/iwasm/common/wasm_runtime_common.c index 5dd2957d..314dc7dd 100644 --- a/core/iwasm/common/wasm_runtime_common.c +++ b/core/iwasm/common/wasm_runtime_common.c @@ -86,7 +86,7 @@ static bh_list registered_module_list_head; static bh_list *const registered_module_list = ®istered_module_list_head; static korp_mutex registered_module_list_lock; static void -wasm_runtime_destroy_registered_module_list(); +wasm_runtime_destroy_registered_module_list(void); #endif /* WASM_ENABLE_MULTI_MODULE */ #define E_TYPE_XIP 4 @@ -97,11 +97,11 @@ val_type_to_val_kind(uint8 value_type); #if WASM_ENABLE_GC == 0 && WASM_ENABLE_REF_TYPES != 0 /* Initialize externref hashmap */ static bool -wasm_externref_map_init(); +wasm_externref_map_init(void); /* Destroy externref hashmap */ static void -wasm_externref_map_destroy(); +wasm_externref_map_destroy(void); #endif /* end of WASM_ENABLE_GC == 0 && WASM_ENABLE_REF_TYPES != 0 */ static void @@ -438,7 +438,7 @@ wasm_runtime_get_exec_env_tls() #endif /* end of OS_ENABLE_HW_BOUND_CHECK */ static bool -wasm_runtime_env_init() +wasm_runtime_env_init(void) { if (bh_platform_init() != 0) return false; @@ -584,7 +584,7 @@ static korp_mutex runtime_lock = OS_THREAD_MUTEX_INITIALIZER; static int32 runtime_ref_count = 0; static bool -wasm_runtime_init_internal() +wasm_runtime_init_internal(void) { if (!wasm_runtime_memory_init(Alloc_With_System_Allocator, NULL)) return false; @@ -622,7 +622,7 @@ wasm_runtime_init() } static void -wasm_runtime_destroy_internal() +wasm_runtime_destroy_internal(void) { #if WASM_ENABLE_GC == 0 && WASM_ENABLE_REF_TYPES != 0 wasm_externref_map_destroy(); @@ -4747,7 +4747,7 @@ fail: || defined(BUILD_TARGET_RISCV32_ILP32D) \ || defined(BUILD_TARGET_RISCV32_ILP32F) \ || defined(BUILD_TARGET_RISCV32_ILP32) || defined(BUILD_TARGET_ARC) -typedef void (*GenericFunctionPointer)(); +typedef void (*GenericFunctionPointer)(void); void invokeNative(GenericFunctionPointer f, uint32 *args, uint32 n_stacks); @@ -5312,7 +5312,7 @@ fail: #if defined(BUILD_TARGET_X86_32) || defined(BUILD_TARGET_ARM) \ || defined(BUILD_TARGET_THUMB) || defined(BUILD_TARGET_MIPS) \ || defined(BUILD_TARGET_XTENSA) -typedef void (*GenericFunctionPointer)(); +typedef void (*GenericFunctionPointer)(void); void invokeNative(GenericFunctionPointer f, uint32 *args, uint32 sz); @@ -5597,7 +5597,7 @@ typedef uint32x4_t __m128i; #endif /* end of WASM_ENABLE_SIMD != 0 */ -typedef void (*GenericFunctionPointer)(); +typedef void (*GenericFunctionPointer)(void); void invokeNative(GenericFunctionPointer f, uint64 *args, uint64 n_stacks); diff --git a/core/iwasm/common/wasm_runtime_common.h b/core/iwasm/common/wasm_runtime_common.h index 71264ca7..fb2c7940 100644 --- a/core/iwasm/common/wasm_runtime_common.h +++ b/core/iwasm/common/wasm_runtime_common.h @@ -852,10 +852,10 @@ wasm_runtime_set_module_reader(const module_reader reader, const module_destroyer destroyer); module_reader -wasm_runtime_get_module_reader(); +wasm_runtime_get_module_reader(void); module_destroyer -wasm_runtime_get_module_destroyer(); +wasm_runtime_get_module_destroyer(void); bool wasm_runtime_register_module_internal(const char *module_name, @@ -881,7 +881,7 @@ bool wasm_runtime_is_loading_module(const char *module_name); void -wasm_runtime_destroy_loading_module_list(); +wasm_runtime_destroy_loading_module_list(void); WASMModuleCommon * wasm_runtime_search_sub_module(const WASMModuleCommon *parent_module, @@ -1168,7 +1168,7 @@ wasm_runtime_quick_invoke_c_api_native(WASMModuleInstanceCommon *module_inst, uint32 result_count); void -wasm_runtime_show_app_heap_corrupted_prompt(); +wasm_runtime_show_app_heap_corrupted_prompt(void); #if WASM_ENABLE_LOAD_CUSTOM_SECTION != 0 void diff --git a/core/iwasm/common/wasm_shared_memory.h b/core/iwasm/common/wasm_shared_memory.h index 8bbc4a80..e1c5154a 100644 --- a/core/iwasm/common/wasm_shared_memory.h +++ b/core/iwasm/common/wasm_shared_memory.h @@ -17,10 +17,10 @@ extern "C" { extern korp_mutex g_shared_memory_lock; bool -wasm_shared_memory_init(); +wasm_shared_memory_init(void); void -wasm_shared_memory_destroy(); +wasm_shared_memory_destroy(void); uint16 shared_memory_inc_reference(WASMMemoryInstance *memory); diff --git a/core/iwasm/compilation/aot.h b/core/iwasm/compilation/aot.h index dcf9bbe1..98d2cc6c 100644 --- a/core/iwasm/compilation/aot.h +++ b/core/iwasm/compilation/aot.h @@ -312,7 +312,7 @@ void aot_destroy_comp_data(AOTCompData *comp_data); char * -aot_get_last_error(); +aot_get_last_error(void); void aot_set_last_error(const char *error); diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 51963759..1d7ca8f9 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -6030,7 +6030,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, #if WASM_ENABLE_LABELS_AS_VALUES != 0 void ** -wasm_interp_get_handle_table() +wasm_interp_get_handle_table(void) { WASMModuleInstance module; memset(&module, 0, sizeof(WASMModuleInstance)); diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index ed85bb78..7b99c60a 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -5751,7 +5751,7 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, #if WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_LABELS_AS_VALUES != 0 void ** -wasm_interp_get_handle_table(); +wasm_interp_get_handle_table(void); static void **handle_table; #endif diff --git a/core/iwasm/interpreter/wasm_mini_loader.c b/core/iwasm/interpreter/wasm_mini_loader.c index 34f4a183..968eaf00 100644 --- a/core/iwasm/interpreter/wasm_mini_loader.c +++ b/core/iwasm/interpreter/wasm_mini_loader.c @@ -2532,7 +2532,7 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, #if WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_LABELS_AS_VALUES != 0 void ** -wasm_interp_get_handle_table(); +wasm_interp_get_handle_table(void); static void **handle_table; #endif diff --git a/core/iwasm/libraries/lib-wasi-threads/lib_wasi_threads_wrapper.c b/core/iwasm/libraries/lib-wasi-threads/lib_wasi_threads_wrapper.c index aeaafced..c9512fb4 100644 --- a/core/iwasm/libraries/lib-wasi-threads/lib_wasi_threads_wrapper.c +++ b/core/iwasm/libraries/lib-wasi-threads/lib_wasi_threads_wrapper.c @@ -29,7 +29,7 @@ typedef struct { } ThreadStartArg; static int32 -allocate_thread_id() +allocate_thread_id(void) { os_mutex_lock(&thread_id_lock); int32 id = tid_allocator_get_tid(&tid_allocator); diff --git a/core/iwasm/libraries/thread-mgr/thread_manager.h b/core/iwasm/libraries/thread-mgr/thread_manager.h index f5ca1eae..7ad6c772 100644 --- a/core/iwasm/libraries/thread-mgr/thread_manager.h +++ b/core/iwasm/libraries/thread-mgr/thread_manager.h @@ -64,10 +64,10 @@ void wasm_cluster_set_max_thread_num(uint32 num); bool -thread_manager_init(); +thread_manager_init(void); void -thread_manager_destroy(); +thread_manager_destroy(void); /* Create cluster */ WASMCluster * @@ -109,7 +109,7 @@ bool wasm_cluster_register_destroy_callback(void (*callback)(WASMCluster *)); void -wasm_cluster_cancel_all_callbacks(); +wasm_cluster_cancel_all_callbacks(void); void wasm_cluster_suspend_all(WASMCluster *cluster); @@ -190,7 +190,7 @@ struct WASMCurrentEnvStatus { }; WASMCurrentEnvStatus * -wasm_cluster_create_exenv_status(); +wasm_cluster_create_exenv_status(void); void wasm_cluster_destroy_exenv_status(WASMCurrentEnvStatus *status); diff --git a/core/shared/platform/include/platform_api_extension.h b/core/shared/platform/include/platform_api_extension.h index b1c3b4f4..37b8399b 100644 --- a/core/shared/platform/include/platform_api_extension.h +++ b/core/shared/platform/include/platform_api_extension.h @@ -379,19 +379,19 @@ os_sem_unlink(const char *name); * Initialize process-global state for os_wakeup_blocking_op. */ int -os_blocking_op_init(); +os_blocking_op_init(void); /** * Start accepting os_wakeup_blocking_op requests for the calling thread. */ void -os_begin_blocking_op(); +os_begin_blocking_op(void); /** * Stop accepting os_wakeup_blocking_op requests for the calling thread. */ void -os_end_blocking_op(); +os_end_blocking_op(void); /** * Wake up the specified thread. @@ -1586,7 +1586,7 @@ os_closedir(os_dir_stream dir_stream); * @return the invalid directory stream */ os_dir_stream -os_get_invalid_dir_stream(); +os_get_invalid_dir_stream(void); /** * Checks whether the given directory stream is valid. An invalid directory @@ -1605,7 +1605,7 @@ os_is_dir_stream_valid(os_dir_stream *dir_stream); * @return the invalid handle */ os_file_handle -os_get_invalid_handle(); +os_get_invalid_handle(void); /** * Checks whether the given file handle is valid. An invalid handle is diff --git a/core/shared/platform/nuttx/platform_internal.h b/core/shared/platform/nuttx/platform_internal.h index 0b54d85a..fef2122d 100644 --- a/core/shared/platform/nuttx/platform_internal.h +++ b/core/shared/platform/nuttx/platform_internal.h @@ -137,7 +137,7 @@ typedef DIR *os_dir_stream; typedef int os_raw_file_handle; static inline os_file_handle -os_get_invalid_handle() +os_get_invalid_handle(void) { return -1; } diff --git a/product-mini/platforms/common/libc_wasi.c b/product-mini/platforms/common/libc_wasi.c index 84e133bc..2f0b3512 100644 --- a/product-mini/platforms/common/libc_wasi.c +++ b/product-mini/platforms/common/libc_wasi.c @@ -28,7 +28,7 @@ typedef enum { } libc_wasi_parse_result_t; static void -libc_wasi_print_help() +libc_wasi_print_help(void) { printf(" --env= Pass wasi environment variables with " "\"key=value\"\n"); diff --git a/product-mini/platforms/nuttx/CMakeLists.txt b/product-mini/platforms/nuttx/CMakeLists.txt index ac6c47b9..ca07a5d5 100644 --- a/product-mini/platforms/nuttx/CMakeLists.txt +++ b/product-mini/platforms/nuttx/CMakeLists.txt @@ -197,7 +197,7 @@ include(${WAMR_ROOT_DIR}/build-scripts/runtime_lib.cmake) # NuttX wamr lib complie required: `WAMR_SOURCES` `WAMR_CFLAGS` `WAMR_INCDIRS` # `WAMR_DEFINITIONS` set(WAMR_SOURCES ${WAMR_RUNTIME_LIB_SOURCE}) -set(WAMR_CFLAGS -Wno-strict-prototypes -Wno-shadow -Wno-unused-variable +set(WAMR_CFLAGS -Wno-shadow -Wno-unused-variable -Wno-int-conversion -Wno-implicit-function-declaration) get_directory_property(WAMR_INCDIRS INCLUDE_DIRECTORIES) get_directory_property(WAMR_DEFINITIONS COMPILE_DEFINITIONS) diff --git a/product-mini/platforms/nuttx/wamr.mk b/product-mini/platforms/nuttx/wamr.mk index 38553e86..0ee76c7d 100644 --- a/product-mini/platforms/nuttx/wamr.mk +++ b/product-mini/platforms/nuttx/wamr.mk @@ -400,7 +400,7 @@ CFLAGS += -DWASM_ENABLE_EXCE_HANDLING=0 CFLAGS += -DWASM_ENABLE_TAGS=0 endif -CFLAGS += -Wno-strict-prototypes -Wno-shadow -Wno-unused-variable +CFLAGS += -Wno-shadow -Wno-unused-variable CFLAGS += -Wno-int-conversion -Wno-implicit-function-declaration CFLAGS += -I${CORE_ROOT} \ diff --git a/product-mini/platforms/posix/main.c b/product-mini/platforms/posix/main.c index c1ba169d..14dc01f6 100644 --- a/product-mini/platforms/posix/main.c +++ b/product-mini/platforms/posix/main.c @@ -27,7 +27,7 @@ static char **app_argv; /* clang-format off */ static int -print_help() +print_help(void) { printf("Usage: iwasm [-options] wasm_file [args...]\n"); printf("options:\n"); From c276aca2023bf7d737491abbb93df5c7f634afc9 Mon Sep 17 00:00:00 2001 From: Benbuck Nason Date: Mon, 9 Sep 2024 18:55:15 -0700 Subject: [PATCH 19/24] Fix compile error when multi-module and tags are enabled (#3781) --- core/iwasm/interpreter/wasm_loader.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 7b99c60a..3a21b1fc 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -2474,7 +2474,8 @@ wasm_loader_resolve_tag(const char *module_name, const char *tag_name, } /* check function type */ - if (!wasm_type_equal(expected_tag_type, tag->tag_type)) { + if (!wasm_type_equal(expected_tag_type, tag->tag_type, module->types, + module->type_count)) { LOG_DEBUG("%s.%s failed the type check", module_name, tag_name); set_error_buf(error_buf, error_buf_size, "incompatible import type"); return NULL; From cd47438af92fc75231339197b03ca0a7b0512af3 Mon Sep 17 00:00:00 2001 From: YAMAMOTO Takashi Date: Tue, 10 Sep 2024 11:58:22 +0900 Subject: [PATCH 20/24] spec_test_on_nuttx.yml: Disable riscv32_ilp32f for now (#3777) It seems failing too frequently. cf. https://github.com/bytecodealliance/wasm-micro-runtime/issues/3776 --- .github/workflows/spec_test_on_nuttx.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/spec_test_on_nuttx.yml b/.github/workflows/spec_test_on_nuttx.yml index 2e940b0e..712bd06b 100644 --- a/.github/workflows/spec_test_on_nuttx.yml +++ b/.github/workflows/spec_test_on_nuttx.yml @@ -74,11 +74,11 @@ jobs: target: "riscv32", fpu_type: "none" }, - { - config: "boards/risc-v/qemu-rv/rv-virt/configs/nsh", - target: "riscv32_ilp32f", - fpu_type: "fp" - }, + #{ + # config: "boards/risc-v/qemu-rv/rv-virt/configs/nsh", + # target: "riscv32_ilp32f", + # fpu_type: "fp" + #}, # { # config: "boards/risc-v/qemu-rv/rv-virt/configs/nsh", # target: "riscv32_ilp32d", From 1a61cb75e1d8ced8a2aed2b02f8793676664aa37 Mon Sep 17 00:00:00 2001 From: YAMAMOTO Takashi Date: Tue, 10 Sep 2024 15:53:11 +0900 Subject: [PATCH 21/24] Appease GCC -Wformat (#3783) I'm not sure we want to use C99 %tu here. While C99 %zu is more widely used in WAMR, %tu is rare (if any) and I'm not sure if it's ubiquitously implemented in platforms we support. --- core/shared/mem-alloc/ems/ems_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/shared/mem-alloc/ems/ems_alloc.c b/core/shared/mem-alloc/ems/ems_alloc.c index 4863527d..e272b301 100644 --- a/core/shared/mem-alloc/ems/ems_alloc.c +++ b/core/shared/mem-alloc/ems/ems_alloc.c @@ -992,8 +992,8 @@ gci_dump(gc_heap_t *heap) os_printf("#%d %08" PRIx32 " %" PRIx32 " %d %d" " %c %" PRId32 "\n", - i, (int32)((char *)cur - (char *)heap->base_addr), (int32)ut, - p, mark, inuse, (int32)hmu_obj_size(size)); + i, (uint32)((char *)cur - (char *)heap->base_addr), + (uint32)ut, p, mark, inuse, (int32)hmu_obj_size(size)); #if BH_ENABLE_GC_VERIFY != 0 if (inuse == 'V') { gc_object_prefix_t *prefix = (gc_object_prefix_t *)(cur + 1); From b882017674bf76ea50b7e1a1dc5af8e64d4b2e78 Mon Sep 17 00:00:00 2001 From: Marcin Kolny Date: Wed, 11 Sep 2024 01:59:16 +0100 Subject: [PATCH 22/24] Fix compiler warnings (#3784) The definition of os_get_invalid_handle in platform_internal.h did not match the declaration in platform_api_extension.h. --- core/shared/platform/alios/platform_internal.h | 2 +- core/shared/platform/android/platform_internal.h | 2 +- core/shared/platform/cosmopolitan/platform_internal.h | 2 +- core/shared/platform/darwin/platform_internal.h | 2 +- core/shared/platform/esp-idf/platform_internal.h | 2 +- core/shared/platform/freebsd/platform_internal.h | 2 +- core/shared/platform/linux-sgx/platform_internal.h | 2 +- core/shared/platform/linux/platform_internal.h | 2 +- core/shared/platform/riot/platform_internal.h | 2 +- core/shared/platform/rt-thread/platform_internal.h | 2 +- core/shared/platform/vxworks/platform_internal.h | 2 +- core/shared/platform/windows/platform_internal.h | 2 +- core/shared/platform/zephyr/platform_internal.h | 2 +- 13 files changed, 13 insertions(+), 13 deletions(-) diff --git a/core/shared/platform/alios/platform_internal.h b/core/shared/platform/alios/platform_internal.h index d2897a6b..bdf3d073 100644 --- a/core/shared/platform/alios/platform_internal.h +++ b/core/shared/platform/alios/platform_internal.h @@ -77,7 +77,7 @@ typedef void *os_dir_stream; typedef int os_raw_file_handle; static inline os_file_handle -os_get_invalid_handle() +os_get_invalid_handle(void) { return -1; } diff --git a/core/shared/platform/android/platform_internal.h b/core/shared/platform/android/platform_internal.h index 4449f21e..7abf863f 100644 --- a/core/shared/platform/android/platform_internal.h +++ b/core/shared/platform/android/platform_internal.h @@ -151,7 +151,7 @@ typedef DIR *os_dir_stream; typedef int os_raw_file_handle; static inline os_file_handle -os_get_invalid_handle() +os_get_invalid_handle(void) { return -1; } diff --git a/core/shared/platform/cosmopolitan/platform_internal.h b/core/shared/platform/cosmopolitan/platform_internal.h index 72602116..5c73ed5a 100644 --- a/core/shared/platform/cosmopolitan/platform_internal.h +++ b/core/shared/platform/cosmopolitan/platform_internal.h @@ -69,7 +69,7 @@ typedef DIR *os_dir_stream; typedef int os_raw_file_handle; static inline os_file_handle -os_get_invalid_handle() +os_get_invalid_handle(void) { return -1; } diff --git a/core/shared/platform/darwin/platform_internal.h b/core/shared/platform/darwin/platform_internal.h index 1cbecdc2..928aad72 100644 --- a/core/shared/platform/darwin/platform_internal.h +++ b/core/shared/platform/darwin/platform_internal.h @@ -114,7 +114,7 @@ typedef DIR *os_dir_stream; typedef int os_raw_file_handle; static inline os_file_handle -os_get_invalid_handle() +os_get_invalid_handle(void) { return -1; } diff --git a/core/shared/platform/esp-idf/platform_internal.h b/core/shared/platform/esp-idf/platform_internal.h index e0091bee..580a06d9 100644 --- a/core/shared/platform/esp-idf/platform_internal.h +++ b/core/shared/platform/esp-idf/platform_internal.h @@ -145,7 +145,7 @@ typedef DIR *os_dir_stream; typedef int os_raw_file_handle; static inline os_file_handle -os_get_invalid_handle() +os_get_invalid_handle(void) { return -1; } diff --git a/core/shared/platform/freebsd/platform_internal.h b/core/shared/platform/freebsd/platform_internal.h index bfdfe149..01a6e824 100644 --- a/core/shared/platform/freebsd/platform_internal.h +++ b/core/shared/platform/freebsd/platform_internal.h @@ -115,7 +115,7 @@ os_set_signal_number_for_blocking_op(int signo); typedef int os_file_handle; static inline os_file_handle -os_get_invalid_handle() +os_get_invalid_handle(void) { return -1; } diff --git a/core/shared/platform/linux-sgx/platform_internal.h b/core/shared/platform/linux-sgx/platform_internal.h index 2cc34dfc..66960ad2 100644 --- a/core/shared/platform/linux-sgx/platform_internal.h +++ b/core/shared/platform/linux-sgx/platform_internal.h @@ -74,7 +74,7 @@ typedef DIR *os_dir_stream; typedef int os_raw_file_handle; static inline os_file_handle -os_get_invalid_handle() +os_get_invalid_handle(void) { return -1; } diff --git a/core/shared/platform/linux/platform_internal.h b/core/shared/platform/linux/platform_internal.h index aeddc4cc..86518027 100644 --- a/core/shared/platform/linux/platform_internal.h +++ b/core/shared/platform/linux/platform_internal.h @@ -127,7 +127,7 @@ typedef DIR *os_dir_stream; typedef int os_raw_file_handle; static inline os_file_handle -os_get_invalid_handle() +os_get_invalid_handle(void) { return -1; } diff --git a/core/shared/platform/riot/platform_internal.h b/core/shared/platform/riot/platform_internal.h index 24a1d6c8..11f2ba0a 100644 --- a/core/shared/platform/riot/platform_internal.h +++ b/core/shared/platform/riot/platform_internal.h @@ -89,7 +89,7 @@ int isnan(double x); #endif static inline os_file_handle -os_get_invalid_handle() +os_get_invalid_handle(void) { return -1; } diff --git a/core/shared/platform/rt-thread/platform_internal.h b/core/shared/platform/rt-thread/platform_internal.h index 69d6d558..b9b8c78c 100644 --- a/core/shared/platform/rt-thread/platform_internal.h +++ b/core/shared/platform/rt-thread/platform_internal.h @@ -124,7 +124,7 @@ typedef void *os_dir_stream; typedef int os_raw_file_handle; static inline os_file_handle -os_get_invalid_handle() +os_get_invalid_handle(void) { return -1; } diff --git a/core/shared/platform/vxworks/platform_internal.h b/core/shared/platform/vxworks/platform_internal.h index 1b870c70..6a6b00f4 100644 --- a/core/shared/platform/vxworks/platform_internal.h +++ b/core/shared/platform/vxworks/platform_internal.h @@ -101,7 +101,7 @@ os_sigreturn(); #define os_getpagesize getpagesize static inline os_file_handle -os_get_invalid_handle() +os_get_invalid_handle(void) { return -1; } diff --git a/core/shared/platform/windows/platform_internal.h b/core/shared/platform/windows/platform_internal.h index 8bb77e7c..ed021a9a 100644 --- a/core/shared/platform/windows/platform_internal.h +++ b/core/shared/platform/windows/platform_internal.h @@ -188,7 +188,7 @@ typedef uint32_t os_raw_file_handle; #endif static inline os_file_handle -os_get_invalid_handle() +os_get_invalid_handle(void) { return NULL; } diff --git a/core/shared/platform/zephyr/platform_internal.h b/core/shared/platform/zephyr/platform_internal.h index fa8fbd86..5bc9ca98 100644 --- a/core/shared/platform/zephyr/platform_internal.h +++ b/core/shared/platform/zephyr/platform_internal.h @@ -209,7 +209,7 @@ typedef void *os_dir_stream; typedef int os_raw_file_handle; static inline os_file_handle -os_get_invalid_handle() +os_get_invalid_handle(void) { return -1; } From 9c2083a27f7cba141b4957db4b9907890f0fe096 Mon Sep 17 00:00:00 2001 From: Marcin Kolny Date: Wed, 11 Sep 2024 09:08:37 +0100 Subject: [PATCH 23/24] Implement option for skipping function index in the callstack (#3785) Also add a script that converts instruction pointers to function indexes (or function names). https://github.com/bytecodealliance/wasm-micro-runtime/issues/3758 --- core/iwasm/aot/aot_runtime.c | 35 ++-- core/iwasm/aot/aot_runtime.h | 1 + core/iwasm/compilation/aot_emit_aot_file.c | 3 + core/iwasm/compilation/aot_emit_function.c | 41 ++--- core/iwasm/compilation/aot_stack_frame_comp.c | 4 +- core/iwasm/include/aot_comp_option.h | 12 +- test-tools/ip2function/ip2function.py | 156 ++++++++++++++++++ wamr-compiler/main.c | 11 +- 8 files changed, 229 insertions(+), 34 deletions(-) create mode 100644 test-tools/ip2function/ip2function.py diff --git a/core/iwasm/aot/aot_runtime.c b/core/iwasm/aot/aot_runtime.c index 013c761a..4b6d25b8 100644 --- a/core/iwasm/aot/aot_runtime.c +++ b/core/iwasm/aot/aot_runtime.c @@ -134,6 +134,15 @@ is_frame_per_function(WASMExecEnv *exec_env) return module->feature_flags & WASM_FEATURE_FRAME_PER_FUNCTION; } +static bool +is_frame_func_idx_disabled(WASMExecEnv *exec_env) +{ + AOTModule *module = + (AOTModule *)((AOTModuleInstance *)exec_env->module_inst)->module; + + return module->feature_flags & WASM_FEATURE_FRAME_NO_FUNC_IDX; +} + static void * get_top_frame(WASMExecEnv *exec_env) { @@ -3952,7 +3961,7 @@ aot_create_call_stack(struct WASMExecEnv *exec_env) #endif } WASMCApiFrame frame = { 0 }; - uint32 max_local_cell_num, max_stack_cell_num; + uint32 max_local_cell_num = 0, max_stack_cell_num = 0; uint32 all_cell_num, lp_size; frame.instance = module_inst; @@ -3961,16 +3970,20 @@ aot_create_call_stack(struct WASMExecEnv *exec_env) frame.func_offset = ip_offset; frame.func_name_wp = get_func_name_from_index(module_inst, func_index); - if (func_index >= module->import_func_count) { - uint32 aot_func_idx = func_index - module->import_func_count; - max_local_cell_num = module->max_local_cell_nums[aot_func_idx]; - max_stack_cell_num = module->max_stack_cell_nums[aot_func_idx]; - } - else { - AOTFuncType *func_type = module->import_funcs[func_index].func_type; - max_local_cell_num = - func_type->param_cell_num > 2 ? func_type->param_cell_num : 2; - max_stack_cell_num = 0; + if (!is_frame_func_idx_disabled(exec_env)) { + if (func_index >= module->import_func_count) { + uint32 aot_func_idx = func_index - module->import_func_count; + max_local_cell_num = module->max_local_cell_nums[aot_func_idx]; + max_stack_cell_num = module->max_stack_cell_nums[aot_func_idx]; + } + else { + AOTFuncType *func_type = + module->import_funcs[func_index].func_type; + max_local_cell_num = func_type->param_cell_num > 2 + ? func_type->param_cell_num + : 2; + max_stack_cell_num = 0; + } } all_cell_num = max_local_cell_num + max_stack_cell_num; diff --git a/core/iwasm/aot/aot_runtime.h b/core/iwasm/aot/aot_runtime.h index 3ff0e0e3..56d11a22 100644 --- a/core/iwasm/aot/aot_runtime.h +++ b/core/iwasm/aot/aot_runtime.h @@ -34,6 +34,7 @@ extern "C" { /* Stack frame is created at the beginning of the function, * and not at the beginning of each function call */ #define WASM_FEATURE_FRAME_PER_FUNCTION (1 << 12) +#define WASM_FEATURE_FRAME_NO_FUNC_IDX (1 << 13) typedef enum AOTSectionType { AOT_SECTION_TYPE_TARGET_INFO = 0, diff --git a/core/iwasm/compilation/aot_emit_aot_file.c b/core/iwasm/compilation/aot_emit_aot_file.c index 20f29057..8fa20530 100644 --- a/core/iwasm/compilation/aot_emit_aot_file.c +++ b/core/iwasm/compilation/aot_emit_aot_file.c @@ -4439,6 +4439,9 @@ aot_obj_data_create(AOTCompContext *comp_ctx) if (comp_ctx->call_stack_features.frame_per_function) { obj_data->target_info.feature_flags |= WASM_FEATURE_FRAME_PER_FUNCTION; } + if (!comp_ctx->call_stack_features.func_idx) { + obj_data->target_info.feature_flags |= WASM_FEATURE_FRAME_NO_FUNC_IDX; + } bh_print_time("Begin to resolve object file info"); diff --git a/core/iwasm/compilation/aot_emit_function.c b/core/iwasm/compilation/aot_emit_function.c index fbef02e2..11129ac9 100644 --- a/core/iwasm/compilation/aot_emit_function.c +++ b/core/iwasm/compilation/aot_emit_function.c @@ -885,25 +885,28 @@ alloc_frame_for_aot_func(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, } if (!comp_ctx->is_jit_mode) { - /* aot mode: new_frame->func_idx = func_idx */ - func_idx_val = comp_ctx->pointer_size == sizeof(uint64) - ? I64_CONST(func_idx) - : I32_CONST(func_idx); - offset = I32_CONST(comp_ctx->pointer_size); - CHECK_LLVM_CONST(func_idx_val); - CHECK_LLVM_CONST(offset); - if (!(func_idx_ptr = - LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, new_frame, - &offset, 1, "func_idx_addr")) - || !(func_idx_ptr = - LLVMBuildBitCast(comp_ctx->builder, func_idx_ptr, - INTPTR_T_PTR_TYPE, "func_idx_ptr"))) { - aot_set_last_error("llvm get func_idx_ptr failed"); - return false; - } - if (!LLVMBuildStore(comp_ctx->builder, func_idx_val, func_idx_ptr)) { - aot_set_last_error("llvm build store failed"); - return false; + if (comp_ctx->call_stack_features.func_idx) { + /* aot mode: new_frame->func_idx = func_idx */ + func_idx_val = comp_ctx->pointer_size == sizeof(uint64) + ? I64_CONST(func_idx) + : I32_CONST(func_idx); + offset = I32_CONST(comp_ctx->pointer_size); + CHECK_LLVM_CONST(func_idx_val); + CHECK_LLVM_CONST(offset); + if (!(func_idx_ptr = LLVMBuildInBoundsGEP2( + comp_ctx->builder, INT8_TYPE, new_frame, &offset, 1, + "func_idx_addr")) + || !(func_idx_ptr = + LLVMBuildBitCast(comp_ctx->builder, func_idx_ptr, + INTPTR_T_PTR_TYPE, "func_idx_ptr"))) { + aot_set_last_error("llvm get func_idx_ptr failed"); + return false; + } + if (!LLVMBuildStore(comp_ctx->builder, func_idx_val, + func_idx_ptr)) { + aot_set_last_error("llvm build store failed"); + return false; + } } } else { diff --git a/core/iwasm/compilation/aot_stack_frame_comp.c b/core/iwasm/compilation/aot_stack_frame_comp.c index 342dfe80..fb540e64 100644 --- a/core/iwasm/compilation/aot_stack_frame_comp.c +++ b/core/iwasm/compilation/aot_stack_frame_comp.c @@ -70,7 +70,9 @@ aot_alloc_tiny_frame_for_aot_func(AOTCompContext *comp_ctx, } /* Save the func_idx on the top of the stack */ - ADD_STORE(func_index, wasm_stack_top); + if (comp_ctx->call_stack_features.func_idx) { + ADD_STORE(func_index, wasm_stack_top); + } /* increment the stack pointer */ INT_CONST(offset, sizeof(AOTTinyFrame), I32_TYPE, true); diff --git a/core/iwasm/include/aot_comp_option.h b/core/iwasm/include/aot_comp_option.h index 67ec81cd..98f33a16 100644 --- a/core/iwasm/include/aot_comp_option.h +++ b/core/iwasm/include/aot_comp_option.h @@ -12,11 +12,19 @@ typedef struct { * bounds of the current stack frame (and if not, traps). */ bool bounds_checks; - /* Enables or disables instruction pointer (IP) tracking.*/ + /* Enables or disables instruction pointer (IP) tracking. */ bool ip; + /* Enables or disables function index in the stack trace. Please note that + * function index can be recovered from the instruction pointer using + * ip2function.py script, so enabling this feature along with `ip` might + * often be redundant. + * This option will automatically be enabled for GC and Perf Profiling mode. + */ + bool func_idx; + /* Enables or disables tracking instruction pointer of a trap. Only takes - * effect when `ip` is enabled.*/ + * effect when `ip` is enabled. */ bool trap_ip; /* Enables or disables parameters, locals and stack operands. */ diff --git a/test-tools/ip2function/ip2function.py b/test-tools/ip2function/ip2function.py new file mode 100644 index 00000000..fb8ecd17 --- /dev/null +++ b/test-tools/ip2function/ip2function.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2024 Amazon Inc. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# + +""" +This tool corrects function names in call stacks based on the +instruction pointers. + +When the AOT file is generated with excluded func-idx in the +`--call-stack-features` parameter, the function indexes are +incorrect (likely they're zero). This script uses instruction +pointers and the original WASM file to generate a call stack +file with the correct function indexes (or function names, +when available). + +Example input (call_stack.txt) - note that `__imported_wasi_snapshot_preview1_fd_close` +had index 0, therefore it appears as a name in every line: +``` +#00: 0x0505 - __imported_wasi_snapshot_preview1_fd_close +#01: 0x0309 - __imported_wasi_snapshot_preview1_fd_close +#02: 0x037c - __imported_wasi_snapshot_preview1_fd_close +#03: 0x03b2 - __imported_wasi_snapshot_preview1_fd_close +#04: 0x03e4 - __imported_wasi_snapshot_preview1_fd_close +#05: 0x02e6 - __imported_wasi_snapshot_preview1_fd_close +``` + +Conversion command: +``` +python3 test-tools/ip2function/ip2function.py \ + --wasm-file opt-samp/tiny.wasm \ + call_stack.txt +``` + +Output: +``` +#0: 0x0505 - abort +#1: 0x0309 - baz +#2: 0x037c - bar +#3: 0x03b2 - foo +#4: 0x03e4 - __original_main +#5: 0x02e6 - _start +``` +""" + +import argparse +import bisect +import os +import re +import subprocess +import sys + +from typing import NamedTuple, Optional +from typing import TextIO +from pathlib import Path +import shutil + + +class FunctionInfo(NamedTuple): + start_address: int + idx: int + name: Optional[str] + + def __str__(self) -> str: + return self.name if self.name else f"$f{self.idx}" + + +def load_functions(wasm_objdump: Path, wasm_file: Path) -> list[FunctionInfo]: + objdump_function_pattern = re.compile( + r"^([0-9a-f]+)\sfunc\[(\d+)\](?:\s\<(.+)\>)?\:$" + ) + + def parse_objdump_function_line( + line: str, + ) -> Optional[FunctionInfo]: + match = objdump_function_pattern.match(line.strip()) + return ( + FunctionInfo(int(match[1], 16), int(match[2]), match[3]) if match else None + ) + + p = subprocess.run( + [wasm_objdump, "--disassemble", wasm_file], + check=True, + capture_output=True, + text=True, + universal_newlines=True, + ) + + return list( + filter( + None, + ( + parse_objdump_function_line(line.strip()) + for line in p.stdout.split(os.linesep) + ), + ) + ) + + +def parse_call_stack_file( + functions: list[FunctionInfo], call_stack_file: TextIO, output_file: TextIO +) -> None: + call_stack_line_pattern = re.compile(r"^(#\d+): (0x[0-9a-f]+) \- (\S+)$") + for line in call_stack_file: + match = call_stack_line_pattern.match(line.strip()) + if not match: + output_file.write(line) + continue + index = match[1] + address = match[2] + + func_pos = bisect.bisect_right( + functions, int(address, 16), key=lambda x: x.start_address + ) + if func_pos <= 0: + raise ValueError(f"Cannot find function for address {address}") + output_file.write(f"{index}: {address} - {functions[func_pos -1]}\n") + + +def main() -> int: + parser = argparse.ArgumentParser(description="addr2line for wasm") + parser.add_argument( + "--wasm-objdump", type=Path, default="wasm-objdump", help="path to wasm objdump" + ) + parser.add_argument( + "--wasm-file", required=True, type=Path, help="path to wasm file" + ) + parser.add_argument( + "call_stack_file", type=argparse.FileType("r"), help="path to a call stack file" + ) + parser.add_argument( + "-o", + "--output", + type=argparse.FileType("w"), + default=sys.stdout, + help="Output file path (default is stdout)", + ) + + args = parser.parse_args() + + wasm_objdump: Path = shutil.which(args.wasm_objdump) + assert wasm_objdump is not None + + wasm_file: Path = args.wasm_file + assert wasm_file.exists() + + parse_call_stack_file( + load_functions(wasm_objdump, wasm_file), args.call_stack_file, args.output + ) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/wamr-compiler/main.c b/wamr-compiler/main.c index 53c75c84..8dca712c 100644 --- a/wamr-compiler/main.c +++ b/wamr-compiler/main.c @@ -167,7 +167,7 @@ print_help() printf(" By default, all features are enabled. To disable all features,\n"); printf(" provide an empty list (i.e. --call-stack-features=). This flag\n"); printf(" only only takes effect when --enable-dump-call-stack is set.\n"); - printf(" Available features: bounds-checks, ip, trap-ip, values.\n"); + printf(" Available features: bounds-checks, ip, func-idx, trap-ip, values.\n"); printf(" --enable-perf-profiling Enable function performance profiling\n"); printf(" --enable-memory-profiling Enable memory usage profiling\n"); printf(" --xip A shorthand of --enable-indirect-mode --disable-llvm-intrinsics\n"); @@ -295,6 +295,9 @@ parse_call_stack_features(char *features_str, else if (!strcmp(features[size], "values")) { out_features->values = true; } + else if (!strcmp(features[size], "func-idx")) { + out_features->func_idx = true; + } else { ret = false; printf("Unsupported feature %s\n", features[size]); @@ -664,6 +667,12 @@ main(int argc, char *argv[]) /* for now we only enable frame per function for a TINY frame mode */ option.call_stack_features.frame_per_function = true; } + if (!option.call_stack_features.func_idx + && (option.enable_gc || option.enable_perf_profiling)) { + LOG_WARNING("'func-idx' call stack feature will be automatically " + "enabled for GC and perf profiling mode"); + option.call_stack_features.func_idx = true; + } if (!size_level_set) { /** From 9aadbfee292dba0a3e7ee4940e5031b49cc3d5bd Mon Sep 17 00:00:00 2001 From: Huang Qi Date: Fri, 13 Sep 2024 09:43:44 +0800 Subject: [PATCH 24/24] Ignore temporary file from aider (#3787) Aider is AI pair programming in your terminal: https://aider.chat --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 355d391f..baf11c89 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ .clangd .DS_Store *.o +.aider* core/deps/** core/shared/mem-alloc/tlsf