Enable merged os_mmap for aot data sections (#3681)

And enable merged os_mmap for aot data and text sections except on
platform nuttx and esp-idf.

Fix issue that aarch64 AOT module fails to load on android:
https://github.com/bytecodealliance/wasm-micro-runtime/issues/2274
This commit is contained in:
bianchui
2024-08-14 08:42:01 +08:00
committed by GitHub
parent 1362a305f4
commit 55cb9c59c5
2 changed files with 150 additions and 15 deletions

View File

@ -2378,7 +2378,6 @@ destroy_object_data_sections(AOTObjectDataSection *data_sections,
}
}
#endif
os_munmap(data_section->data, data_section->size);
}
wasm_runtime_free(data_sections);
}
@ -2392,6 +2391,9 @@ load_object_data_sections(const uint8 **p_buf, const uint8 *buf_end,
AOTObjectDataSection *data_sections;
uint64 size;
uint32 i;
uint64 total_size = 0;
uint32 page_size = os_getpagesize();
uint8 *merged_sections = NULL;
/* Allocate memory */
size = sizeof(AOTObjectDataSection) * (uint64)module->data_section_count;
@ -2400,8 +2402,22 @@ load_object_data_sections(const uint8 **p_buf, const uint8 *buf_end,
return false;
}
/* Create each data section */
/* First iteration: read data from buf, and calculate total memory needed */
for (i = 0; i < module->data_section_count; i++) {
read_string(buf, buf_end, data_sections[i].name);
read_uint32(buf, buf_end, data_sections[i].size);
CHECK_BUF(buf, buf_end, data_sections[i].size);
/* temporary record data ptr for merge, will be replaced after mmaped */
if (data_sections[i].size > 0)
data_sections[i].data = (uint8 *)buf;
buf += data_sections[i].size;
total_size += align_uint64((uint64)data_sections[i].size, page_size);
}
if (total_size > UINT32_MAX) {
set_error_buf(error_buf, error_buf_size, "data sections too large");
return false;
}
if (total_size > 0) {
int map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE;
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \
|| defined(BUILD_TARGET_RISCV64_LP64D) \
@ -2412,29 +2428,33 @@ load_object_data_sections(const uint8 **p_buf, const uint8 *buf_end,
#else
int map_flags = MMAP_MAP_NONE;
#endif
read_string(buf, buf_end, data_sections[i].name);
read_uint32(buf, buf_end, data_sections[i].size);
/* Allocate memory for data */
if (data_sections[i].size > 0
&& !(data_sections[i].data =
os_mmap(NULL, data_sections[i].size, map_prot, map_flags,
os_get_invalid_handle()))) {
merged_sections = module->merged_data_sections =
os_mmap(NULL, (uint32)total_size, map_prot, map_flags,
os_get_invalid_handle());
if (!merged_sections) {
set_error_buf(error_buf, error_buf_size, "allocate memory failed");
return false;
}
module->merged_data_sections_size = (uint32)total_size;
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
#if !defined(BH_PLATFORM_LINUX_SGX) && !defined(BH_PLATFORM_WINDOWS) \
&& !defined(BH_PLATFORM_DARWIN)
/* address must be in the first 2 Gigabytes of
the process address space */
bh_assert((uintptr_t)data_sections[i].data < INT32_MAX);
bh_assert((uintptr_t)merged_sections < INT32_MAX);
#endif
#endif
}
read_byte_array(buf, buf_end, data_sections[i].data,
data_sections[i].size);
/* Second iteration: Create each data section */
for (i = 0; i < module->data_section_count; i++) {
if (data_sections[i].size > 0) {
bh_memcpy_s(merged_sections, data_sections[i].size,
data_sections[i].data, data_sections[i].size);
data_sections[i].data = merged_sections;
merged_sections += align_uint(data_sections[i].size, page_size);
}
}
*p_buf = buf;
@ -2532,6 +2552,90 @@ fail:
return false;
}
#if !defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF)
static bool
try_merge_data_and_text(const uint8 **buf, const uint8 **buf_end,
AOTModule *module, char *error_buf,
uint32 error_buf_size)
{
uint8 *old_buf = (uint8 *)*buf;
uint8 *old_end = (uint8 *)*buf_end;
size_t code_size = (size_t)(old_end - old_buf);
uint32 page_size = os_getpagesize();
uint64 total_size = 0;
uint32 i;
uint8 *sections;
if (code_size == 0) {
return true;
}
/* calc total memory needed */
total_size += align_uint64((uint64)code_size, page_size);
for (i = 0; i < module->data_section_count; ++i) {
total_size +=
align_uint64((uint64)module->data_sections[i].size, page_size);
}
/* distance between .data and .text should not greater than 4GB for some
* targets (eg. arm64 reloc need < 4G distance) */
if (total_size > UINT32_MAX) {
return false;
}
if (total_size != 0) {
int map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE;
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \
|| defined(BUILD_TARGET_RISCV64_LP64D) \
|| defined(BUILD_TARGET_RISCV64_LP64)
/* aot code and data in x86_64 must be in range 0 to 2G due
to relocation for R_X86_64_32/32S/PC32 */
int map_flags = MMAP_MAP_32BIT;
#else
int map_flags = MMAP_MAP_NONE;
#endif
sections = os_mmap(NULL, (uint32)total_size, map_prot, map_flags,
os_get_invalid_handle());
if (!sections) {
/* merge failed but maybe not critical for some targes */
return false;
}
if (os_mprotect(sections, code_size, map_prot | MMAP_PROT_EXEC) != 0) {
os_munmap(sections, (uint32)total_size);
return false;
}
module->merged_data_text_sections = sections;
module->merged_data_text_sections_size = (uint32)total_size;
/* order not essential just as compilers do: .text section first */
*buf = sections;
*buf_end = sections + code_size;
bh_memcpy_s(sections, code_size, old_buf, code_size);
os_munmap(old_buf, code_size);
sections += align_uint((uint32)code_size, page_size);
/* then .data sections */
for (i = 0; i < module->data_section_count; ++i) {
AOTObjectDataSection *data_section = module->data_sections + i;
uint8 *old_data = data_section->data;
data_section->data = sections;
bh_memcpy_s(data_section->data, data_section->size, old_data,
data_section->size);
sections += align_uint(data_section->size, page_size);
}
if (module->merged_data_sections) {
os_munmap(module->merged_data_sections,
module->merged_data_sections_size);
module->merged_data_sections = NULL;
module->merged_data_sections_size = 0;
}
}
return true;
}
#endif //! defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF)
static bool
load_text_section(const uint8 *buf, const uint8 *buf_end, AOTModule *module,
char *error_buf, uint32 error_buf_size)
@ -3749,6 +3853,17 @@ load_from_sections(AOTModule *module, AOTSection *sections,
return false;
break;
case AOT_SECTION_TYPE_TEXT:
#if !defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF)
/* try to merge .data and .text, with exceptions:
* 1. XIP mode
* 2. pre-mmapped module load from aot_load_from_sections()
* 3. nuttx & esp-idf: have separate region for MMAP_PROT_EXEC
*/
if (!module->is_indirect_mode && is_load_from_file_buf)
if (!try_merge_data_and_text(&buf, &buf_end, module,
error_buf, error_buf_size))
LOG_WARNING("merge .data and .text sections failed");
#endif //! defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF)
if (!load_text_section(buf, buf_end, module, error_buf,
error_buf_size))
return false;
@ -4179,7 +4294,11 @@ load(const uint8 *buf, uint32 size, AOTModule *module,
if (!ret) {
/* If load_from_sections() fails, then aot text is destroyed
in destroy_sections() */
destroy_sections(section_list, module->is_indirect_mode ? false : true);
destroy_sections(section_list,
module->is_indirect_mode
|| module->merged_data_text_sections
? false
: true);
/* aot_unload() won't destroy aot text again */
module->code = NULL;
}
@ -4329,7 +4448,8 @@ aot_unload(AOTModule *module)
}
#endif
if (module->code && !module->is_indirect_mode) {
if (module->code && !module->is_indirect_mode
&& !module->merged_data_text_sections) {
/* The layout is: literal size + literal + code (with plt table) */
uint8 *mmap_addr = module->literal - sizeof(uint32);
uint32 total_size =
@ -4364,6 +4484,14 @@ aot_unload(AOTModule *module)
destroy_object_data_sections(module->data_sections,
module->data_section_count);
if (module->merged_data_sections)
os_munmap(module->merged_data_sections,
module->merged_data_sections_size);
if (module->merged_data_text_sections)
os_munmap(module->merged_data_text_sections,
module->merged_data_text_sections_size);
#if WASM_ENABLE_DEBUG_AOT != 0
jit_code_entry_destroy(module->elf_hdr);
#endif

View File

@ -315,6 +315,13 @@ typedef struct AOTModule {
/* Whether the underlying wasm binary buffer can be freed */
bool is_binary_freeable;
/* `.data` sections merged into one mmaped to reduce the tlb cache miss */
uint8 *merged_data_sections;
uint32 merged_data_sections_size;
/* `.data` and `.text` sections merged into one large mmaped section */
uint8 *merged_data_text_sections;
uint32 merged_data_text_sections_size;
} AOTModule;
#define AOTMemoryInstance WASMMemoryInstance