Refine interp/aot string storage and emitting (#820)

Currently the string in the wasm/aot file will be duplicated and stored
into const string list/set in interpreter/aot loader, which leads to extra
unnecessary memory consumption if the file buffer can be referred to
after loading. We refine the string storage by:
- if the file buffer can be referred to after loading and it is writable, we
  reuse the file buffer to store the string but not store it into the const
  string set: move string backward and append '\0'
- emit string with '\0' only for XIP mode in which the AOT file is readonly
- if the file buffer cannot be referred to, e.g. in app manager, keep the
  same behavior as before

Signed-off-by: Wenyong Huang <wenyong.huang@intel.com>
This commit is contained in:
Wenyong Huang
2021-11-08 11:27:26 +08:00
committed by GitHub
parent 9424dad56a
commit 403a7d3f4f
4 changed files with 345 additions and 214 deletions

View File

@ -153,16 +153,12 @@ GET_U64_FROM_ADDR(uint32 *addr)
p += len; \
} while (0)
#define read_string(p, p_end, str) \
do { \
uint16 str_len; \
read_uint16(p, p_end, str_len); \
CHECK_BUF(p, p_end, str_len); \
if (!(str = const_str_set_insert(p, str_len, module, error_buf, \
error_buf_size))) { \
goto fail; \
} \
p += str_len; \
#define read_string(p, p_end, str) \
do { \
if (!(str = load_string((uint8 **)&p, p_end, module, \
is_load_from_file_buf, error_buf, \
error_buf_size))) \
goto fail; \
} while (0)
/* Legal values for bin_type */
@ -218,6 +214,17 @@ const_str_set_insert(const uint8 *str, int32 len, AOTModule *module,
HashMap *set = module->const_str_set;
char *c_str, *value;
/* Create const string set if it isn't created */
if (!set
&& !(set = module->const_str_set = bh_hash_map_create(
32, false, (HashFunc)wasm_string_hash,
(KeyEqualFunc)wasm_string_equal, NULL, wasm_runtime_free))) {
set_error_buf(error_buf, error_buf_size,
"create const string set failed");
return NULL;
}
/* Lookup const string set, use the string if found */
if (!(c_str = loader_malloc((uint32)len + 1, error_buf, error_buf_size))) {
return NULL;
}
@ -240,6 +247,64 @@ const_str_set_insert(const uint8 *str, int32 len, AOTModule *module,
return c_str;
}
static char *
load_string(uint8 **p_buf, const uint8 *buf_end, AOTModule *module,
bool is_load_from_file_buf, char *error_buf, uint32 error_buf_size)
{
uint8 *p = *p_buf;
const uint8 *p_end = buf_end;
char *str;
uint16 str_len;
CHECK_BUF(p, p_end, 1);
if (*p & 0x80) {
/* The string has been adjusted */
str = (char *)++p;
/* Ensure the whole string is in range */
do {
CHECK_BUF(p, p_end, 1);
} while (*p++ != '\0');
}
else {
/* The string hasn't been adjusted */
read_uint16(p, p_end, str_len);
CHECK_BUF(p, p_end, str_len);
if (str_len == 0) {
str = "";
}
else if (p[str_len - 1] == '\0') {
/* The string is terminated with '\0', use it directly */
str = (char *)p;
}
else if (is_load_from_file_buf) {
/* As the file buffer can be referred to after loading,
we use the 2 bytes of size to adjust the string:
mark the flag with the highest bit of size[0],
move string 1 byte backward and then append '\0' */
*(p - 2) |= 0x80;
bh_memmove_s(p - 1, (uint32)(str_len + 1), p, (uint32)str_len);
p[str_len - 1] = '\0';
str = (char *)(p - 1);
}
else {
/* Load from sections, the file buffer cannot be reffered to
after loading, we must create another string and insert it
into const string set */
if (!(str = const_str_set_insert((uint8 *)p, str_len, module,
error_buf, error_buf_size))) {
goto fail;
}
}
p += str_len;
}
*p_buf = p;
return str;
fail:
return NULL;
}
static bool
get_aot_file_target(AOTTargetInfo *target_info, char *target_buf,
uint32 target_buf_size, char *error_buf,
@ -398,8 +463,8 @@ get_native_symbol_by_name(const char *name)
static bool
load_native_symbol_section(const uint8 *buf, const uint8 *buf_end,
AOTModule *module, char *error_buf,
uint32 error_buf_size)
AOTModule *module, bool is_load_from_file_buf,
char *error_buf, uint32 error_buf_size)
{
const uint8 *p = buf, *p_end = buf_end;
uint32 cnt;
@ -436,7 +501,8 @@ fail:
static bool
load_name_section(const uint8 *buf, const uint8 *buf_end, AOTModule *module,
char *error_buf, uint32 error_buf_size)
bool is_load_from_file_buf, char *error_buf,
uint32 error_buf_size)
{
#if WASM_ENABLE_CUSTOM_NAME_SECTION != 0
const uint8 *p = buf, *p_end = buf_end;
@ -563,7 +629,8 @@ fail:
static bool
load_custom_section(const uint8 *buf, const uint8 *buf_end, AOTModule *module,
char *error_buf, uint32 error_buf_size)
bool is_load_from_file_buf, char *error_buf,
uint32 error_buf_size)
{
const uint8 *p = buf, *p_end = buf_end;
uint32 sub_section_type;
@ -573,13 +640,14 @@ load_custom_section(const uint8 *buf, const uint8 *buf_end, AOTModule *module,
switch (sub_section_type) {
case AOT_CUSTOM_SECTION_NATIVE_SYMBOL:
if (!load_native_symbol_section(buf, buf_end, module, error_buf,
if (!load_native_symbol_section(buf, buf_end, module,
is_load_from_file_buf, error_buf,
error_buf_size))
goto fail;
break;
case AOT_CUSTOM_SECTION_NAME:
if (!load_name_section(buf, buf_end, module, error_buf,
error_buf_size))
if (!load_name_section(buf, buf_end, module, is_load_from_file_buf,
error_buf, error_buf_size))
goto fail;
break;
default:
@ -977,7 +1045,8 @@ destroy_import_globals(AOTImportGlobal *import_globals, bool is_jit_mode)
static bool
load_import_globals(const uint8 **p_buf, const uint8 *buf_end,
AOTModule *module, char *error_buf, uint32 error_buf_size)
AOTModule *module, bool is_load_from_file_buf,
char *error_buf, uint32 error_buf_size)
{
const uint8 *buf = *p_buf;
AOTImportGlobal *import_globals;
@ -1031,8 +1100,8 @@ fail:
static bool
load_import_global_info(const uint8 **p_buf, const uint8 *buf_end,
AOTModule *module, char *error_buf,
uint32 error_buf_size)
AOTModule *module, bool is_load_from_file_buf,
char *error_buf, uint32 error_buf_size)
{
const uint8 *buf = *p_buf;
@ -1040,8 +1109,8 @@ load_import_global_info(const uint8 **p_buf, const uint8 *buf_end,
/* load import globals */
if (module->import_global_count > 0
&& !load_import_globals(&buf, buf_end, module, error_buf,
error_buf_size))
&& !load_import_globals(&buf, buf_end, module, is_load_from_file_buf,
error_buf, error_buf_size))
return false;
*p_buf = buf;
@ -1141,7 +1210,8 @@ destroy_import_funcs(AOTImportFunc *import_funcs, bool is_jit_mode)
static bool
load_import_funcs(const uint8 **p_buf, const uint8 *buf_end, AOTModule *module,
char *error_buf, uint32 error_buf_size)
bool is_load_from_file_buf, char *error_buf,
uint32 error_buf_size)
{
const char *module_name, *field_name;
const uint8 *buf = *p_buf;
@ -1190,7 +1260,8 @@ fail:
static bool
load_import_func_info(const uint8 **p_buf, const uint8 *buf_end,
AOTModule *module, char *error_buf, uint32 error_buf_size)
AOTModule *module, bool is_load_from_file_buf,
char *error_buf, uint32 error_buf_size)
{
const uint8 *buf = *p_buf;
@ -1198,7 +1269,8 @@ load_import_func_info(const uint8 **p_buf, const uint8 *buf_end,
/* load import funcs */
if (module->import_func_count > 0
&& !load_import_funcs(&buf, buf_end, module, error_buf, error_buf_size))
&& !load_import_funcs(&buf, buf_end, module, is_load_from_file_buf,
error_buf, error_buf_size))
return false;
*p_buf = buf;
@ -1221,8 +1293,8 @@ destroy_object_data_sections(AOTObjectDataSection *data_sections,
static bool
load_object_data_sections(const uint8 **p_buf, const uint8 *buf_end,
AOTModule *module, char *error_buf,
uint32 error_buf_size)
AOTModule *module, bool is_load_from_file_buf,
char *error_buf, uint32 error_buf_size)
{
const uint8 *buf = *p_buf;
AOTObjectDataSection *data_sections;
@ -1280,8 +1352,8 @@ fail:
static bool
load_object_data_sections_info(const uint8 **p_buf, const uint8 *buf_end,
AOTModule *module, char *error_buf,
uint32 error_buf_size)
AOTModule *module, bool is_load_from_file_buf,
char *error_buf, uint32 error_buf_size)
{
const uint8 *buf = *p_buf;
@ -1289,7 +1361,8 @@ load_object_data_sections_info(const uint8 **p_buf, const uint8 *buf_end,
/* load object data sections */
if (module->data_section_count > 0
&& !load_object_data_sections(&buf, buf_end, module, error_buf,
&& !load_object_data_sections(&buf, buf_end, module,
is_load_from_file_buf, error_buf,
error_buf_size))
return false;
@ -1301,18 +1374,19 @@ fail:
static bool
load_init_data_section(const uint8 *buf, const uint8 *buf_end,
AOTModule *module, char *error_buf,
uint32 error_buf_size)
AOTModule *module, bool is_load_from_file_buf,
char *error_buf, uint32 error_buf_size)
{
const uint8 *p = buf, *p_end = buf_end;
if (!load_memory_info(&p, p_end, module, error_buf, error_buf_size)
|| !load_table_info(&p, p_end, module, error_buf, error_buf_size)
|| !load_func_type_info(&p, p_end, module, error_buf, error_buf_size)
|| !load_import_global_info(&p, p_end, module, error_buf,
error_buf_size)
|| !load_import_global_info(&p, p_end, module, is_load_from_file_buf,
error_buf, error_buf_size)
|| !load_global_info(&p, p_end, module, error_buf, error_buf_size)
|| !load_import_func_info(&p, p_end, module, error_buf, error_buf_size))
|| !load_import_func_info(&p, p_end, module, is_load_from_file_buf,
error_buf, error_buf_size))
return false;
/* load function count and start function index */
@ -1336,7 +1410,8 @@ load_init_data_section(const uint8 *buf, const uint8 *buf_end,
read_uint32(p, p_end, module->aux_stack_bottom);
read_uint32(p, p_end, module->aux_stack_size);
if (!load_object_data_sections_info(&p, p_end, module, error_buf,
if (!load_object_data_sections_info(&p, p_end, module,
is_load_from_file_buf, error_buf,
error_buf_size))
return false;
@ -1526,7 +1601,7 @@ destroy_exports(AOTExport *exports, bool is_jit_mode)
static bool
load_exports(const uint8 **p_buf, const uint8 *buf_end, AOTModule *module,
char *error_buf, uint32 error_buf_size)
bool is_load_from_file_buf, char *error_buf, uint32 error_buf_size)
{
const uint8 *buf = *p_buf;
AOTExport *exports;
@ -1563,14 +1638,16 @@ fail:
static bool
load_export_section(const uint8 *buf, const uint8 *buf_end, AOTModule *module,
char *error_buf, uint32 error_buf_size)
bool is_load_from_file_buf, char *error_buf,
uint32 error_buf_size)
{
const uint8 *p = buf, *p_end = buf_end;
/* load export functions */
read_uint32(p, p_end, module->export_count);
if (module->export_count > 0
&& !load_exports(&p, p_end, module, error_buf, error_buf_size))
&& !load_exports(&p, p_end, module, is_load_from_file_buf, error_buf,
error_buf_size))
return false;
if (p != p_end) {
@ -1903,8 +1980,8 @@ fail:
static bool
load_relocation_section(const uint8 *buf, const uint8 *buf_end,
AOTModule *module, char *error_buf,
uint32 error_buf_size)
AOTModule *module, bool is_load_from_file_buf,
char *error_buf, uint32 error_buf_size)
{
AOTRelocationGroup *groups = NULL, *group;
uint32 symbol_count = 0;
@ -2048,7 +2125,6 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end,
for (i = 0, group = groups; i < group_count; i++, group++) {
AOTRelocation *relocation;
uint32 name_index;
uint16 str_len;
uint8 *name_addr;
/* section name address is 4 bytes aligned. */
@ -2062,13 +2138,7 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end,
}
name_addr = symbol_buf + symbol_offsets[name_index];
str_len = *(uint16 *)name_addr;
if (!(group->section_name = const_str_set_insert(
name_addr + sizeof(uint16), (int32)str_len, module, error_buf,
error_buf_size))) {
goto fail;
}
read_string(name_addr, buf_end, group->section_name);
read_uint32(buf, buf_end, group->relocation_count);
@ -2083,7 +2153,6 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end,
/* Load each relocation */
for (j = 0; j < group->relocation_count; j++, relocation++) {
uint32 symbol_index;
uint16 str_len;
uint8 *symbol_addr;
if (sizeof(void *) == 8) {
@ -2107,13 +2176,7 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end,
}
symbol_addr = symbol_buf + symbol_offsets[symbol_index];
str_len = *(uint16 *)symbol_addr;
if (!(relocation->symbol_name = const_str_set_insert(
symbol_addr + sizeof(uint16), (int32)str_len, module,
error_buf, error_buf_size))) {
goto fail;
}
read_string(symbol_addr, buf_end, relocation->symbol_name);
}
if (!strcmp(group->section_name, ".rel.text")
@ -2185,7 +2248,8 @@ fail:
}
static bool
load_from_sections(AOTModule *module, AOTSection *sections, char *error_buf,
load_from_sections(AOTModule *module, AOTSection *sections,
bool is_load_from_file_buf, char *error_buf,
uint32 error_buf_size)
{
AOTSection *section = sections;
@ -2216,7 +2280,8 @@ load_from_sections(AOTModule *module, AOTSection *sections, char *error_buf,
return false;
break;
case AOT_SECTION_TYPE_INIT_DATA:
if (!load_init_data_section(buf, buf_end, module, error_buf,
if (!load_init_data_section(buf, buf_end, module,
is_load_from_file_buf, error_buf,
error_buf_size))
return false;
break;
@ -2231,17 +2296,20 @@ load_from_sections(AOTModule *module, AOTSection *sections, char *error_buf,
return false;
break;
case AOT_SECTION_TYPE_EXPORT:
if (!load_export_section(buf, buf_end, module, error_buf,
if (!load_export_section(buf, buf_end, module,
is_load_from_file_buf, error_buf,
error_buf_size))
return false;
break;
case AOT_SECTION_TYPE_RELOCATION:
if (!load_relocation_section(buf, buf_end, module, error_buf,
if (!load_relocation_section(buf, buf_end, module,
is_load_from_file_buf, error_buf,
error_buf_size))
return false;
break;
case AOT_SECTION_TYPE_CUSTOM:
if (!load_custom_section(buf, buf_end, module, error_buf,
if (!load_custom_section(buf, buf_end, module,
is_load_from_file_buf, error_buf,
error_buf_size))
return false;
break;
@ -2379,15 +2447,6 @@ create_module(char *error_buf, uint32 error_buf_size)
module->module_type = Wasm_Module_AoT;
if (!(module->const_str_set = bh_hash_map_create(
32, false, (HashFunc)wasm_string_hash,
(KeyEqualFunc)wasm_string_equal, NULL, wasm_runtime_free))) {
set_error_buf(error_buf, error_buf_size,
"create const string set failed");
wasm_runtime_free(module);
return NULL;
}
return module;
}
@ -2400,7 +2459,8 @@ aot_load_from_sections(AOTSection *section_list, char *error_buf,
if (!module)
return NULL;
if (!load_from_sections(module, section_list, error_buf, error_buf_size)) {
if (!load_from_sections(module, section_list, false, error_buf,
error_buf_size)) {
aot_unload(module);
return NULL;
}
@ -2599,7 +2659,8 @@ load(const uint8 *buf, uint32 size, AOTModule *module, char *error_buf,
error_buf_size))
return false;
ret = load_from_sections(module, section_list, error_buf, error_buf_size);
ret = load_from_sections(module, section_list, true, error_buf,
error_buf_size);
if (!ret) {
/* If load_from_sections() fails, then aot text is destroyed
in destroy_sections() */