Implement AOT static PGO (#2243)
LLVM PGO (Profile-Guided Optimization) allows the compiler to better optimize code
for how it actually runs. This PR implements the AOT static PGO, and is tested on
Linux x86-64 and x86-32. The basic steps are:
1. Use `wamrc --enable-llvm-pgo -o <aot_file_of_pgo> <wasm_file>`
to generate an instrumented aot file.
2. Compile iwasm with `cmake -DWAMR_BUILD_STATIC_PGO=1` and run
`iwasm --gen-prof-file=<raw_profile_file> <aot_file_of_pgo>`
to generate the raw profile file.
3. Run `llvm-profdata merge -output=<profile_file> <raw_profile_file>`
to merge the raw profile file into the profile file.
4. Run `wamrc --use-prof-file=<profile_file> -o <aot_file> <wasm_file>`
to generate the optimized aot file.
5. Run the optimized aot_file: `iwasm <aot_file>`.
The test scripts are also added for each benchmark, run `test_pgo.sh` under
each benchmark's folder to test the AOT static pgo.
This commit is contained in:
@ -445,4 +445,8 @@
|
||||
#define WASM_ENABLE_WASM_CACHE 0
|
||||
#endif
|
||||
|
||||
#ifndef WASM_ENABLE_STATIC_PGO
|
||||
#define WASM_ENABLE_STATIC_PGO 0
|
||||
#endif
|
||||
|
||||
#endif /* end of _CONFIG_H_ */
|
||||
|
||||
@ -1430,8 +1430,28 @@ destroy_object_data_sections(AOTObjectDataSection *data_sections,
|
||||
uint32 i;
|
||||
AOTObjectDataSection *data_section = data_sections;
|
||||
for (i = 0; i < data_section_count; i++, data_section++)
|
||||
if (data_section->data)
|
||||
if (data_section->data) {
|
||||
#if WASM_ENABLE_STATIC_PGO != 0
|
||||
if (!strncmp(data_section->name, "__llvm_prf_data", 15)) {
|
||||
LLVMProfileData *data = (LLVMProfileData *)data_section->data;
|
||||
if (data->values) {
|
||||
uint32 num_value_sites =
|
||||
data->num_value_sites[0] + data->num_value_sites[1];
|
||||
uint32 j;
|
||||
for (j = 0; j < num_value_sites; j++) {
|
||||
ValueProfNode *node = data->values[j], *node_next;
|
||||
while (node) {
|
||||
node_next = node->next;
|
||||
wasm_runtime_free(node);
|
||||
node = node_next;
|
||||
}
|
||||
}
|
||||
wasm_runtime_free(data->values);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
os_munmap(data_section->data, data_section->size);
|
||||
}
|
||||
wasm_runtime_free(data_sections);
|
||||
}
|
||||
|
||||
@ -1900,6 +1920,8 @@ str2uint64(const char *buf, uint64 *p_res)
|
||||
return true;
|
||||
}
|
||||
|
||||
#define R_X86_64_GOTPCREL 9 /* 32 bit signed PC relative offset to GOT */
|
||||
|
||||
static bool
|
||||
do_text_relocation(AOTModule *module, AOTRelocationGroup *group,
|
||||
char *error_buf, uint32 error_buf_size)
|
||||
@ -1937,6 +1959,14 @@ do_text_relocation(AOTModule *module, AOTRelocationGroup *group,
|
||||
bh_memcpy_s(symbol, symbol_len, relocation->symbol_name, symbol_len);
|
||||
symbol[symbol_len] = '\0';
|
||||
|
||||
#if WASM_ENABLE_STATIC_PGO != 0
|
||||
if (!strcmp(symbol, "__llvm_profile_runtime")
|
||||
|| !strcmp(symbol, "__llvm_profile_register_function")
|
||||
|| !strcmp(symbol, "__llvm_profile_register_names_function")) {
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!strncmp(symbol, AOT_FUNC_PREFIX, strlen(AOT_FUNC_PREFIX))) {
|
||||
p = symbol + strlen(AOT_FUNC_PREFIX);
|
||||
if (*p == '\0'
|
||||
@ -1945,7 +1975,26 @@ do_text_relocation(AOTModule *module, AOTRelocationGroup *group,
|
||||
"invalid import symbol %s", symbol);
|
||||
goto check_symbol_fail;
|
||||
}
|
||||
#if (defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)) \
|
||||
&& !defined(BH_PLATFORM_WINDOWS)
|
||||
if (relocation->relocation_type == R_X86_64_GOTPCREL) {
|
||||
GOTItem *got_item = module->got_item_list;
|
||||
uint32 got_item_idx = 0;
|
||||
|
||||
while (got_item) {
|
||||
if (got_item->func_idx == func_index)
|
||||
break;
|
||||
got_item_idx++;
|
||||
got_item = got_item->next;
|
||||
}
|
||||
/* Calculate `GOT + G` */
|
||||
symbol_addr = module->got_func_ptrs + got_item_idx;
|
||||
}
|
||||
else
|
||||
symbol_addr = module->func_ptrs[func_index];
|
||||
#else
|
||||
symbol_addr = module->func_ptrs[func_index];
|
||||
#endif
|
||||
}
|
||||
else if (!strcmp(symbol, ".text")) {
|
||||
symbol_addr = module->code;
|
||||
@ -1956,7 +2005,13 @@ do_text_relocation(AOTModule *module, AOTRelocationGroup *group,
|
||||
/* ".rodata.cst4/8/16/.." */
|
||||
|| !strncmp(symbol, ".rodata.cst", strlen(".rodata.cst"))
|
||||
/* ".rodata.strn.m" */
|
||||
|| !strncmp(symbol, ".rodata.str", strlen(".rodata.str"))) {
|
||||
|| !strncmp(symbol, ".rodata.str", strlen(".rodata.str"))
|
||||
#if WASM_ENABLE_STATIC_PGO != 0
|
||||
|| !strncmp(symbol, "__llvm_prf_cnts", 15)
|
||||
|| !strncmp(symbol, "__llvm_prf_data", 15)
|
||||
|| !strncmp(symbol, "__llvm_prf_names", 16)
|
||||
#endif
|
||||
) {
|
||||
symbol_addr = get_data_section_addr(module, symbol, NULL);
|
||||
if (!symbol_addr) {
|
||||
set_error_buf_v(error_buf, error_buf_size,
|
||||
@ -2088,6 +2143,14 @@ do_data_relocation(AOTModule *module, AOTRelocationGroup *group,
|
||||
else if (!strcmp(group->section_name, ".rdata")) {
|
||||
data_section_name = group->section_name;
|
||||
}
|
||||
#if WASM_ENABLE_STATIC_PGO != 0
|
||||
else if (!strncmp(group->section_name, ".rel__llvm_prf_data", 19)) {
|
||||
data_section_name = group->section_name + strlen(".rel");
|
||||
}
|
||||
else if (!strncmp(group->section_name, ".rela__llvm_prf_data", 20)) {
|
||||
data_section_name = group->section_name + strlen(".rela");
|
||||
}
|
||||
#endif
|
||||
else {
|
||||
set_error_buf(error_buf, error_buf_size,
|
||||
"invalid data relocation section name");
|
||||
@ -2107,6 +2170,49 @@ do_data_relocation(AOTModule *module, AOTRelocationGroup *group,
|
||||
if (!strcmp(symbol, ".text")) {
|
||||
symbol_addr = module->code;
|
||||
}
|
||||
#if WASM_ENABLE_STATIC_PGO != 0
|
||||
else if (!strncmp(symbol, AOT_FUNC_PREFIX, strlen(AOT_FUNC_PREFIX))) {
|
||||
char *p = symbol + strlen(AOT_FUNC_PREFIX);
|
||||
uint32 func_index;
|
||||
if (*p == '\0'
|
||||
|| (func_index = (uint32)atoi(p)) > module->func_count) {
|
||||
set_error_buf_v(error_buf, error_buf_size,
|
||||
"invalid relocation symbol %s", symbol);
|
||||
return false;
|
||||
}
|
||||
symbol_addr = module->func_ptrs[func_index];
|
||||
}
|
||||
else if (!strcmp(symbol, "__llvm_prf_cnts")) {
|
||||
uint32 j;
|
||||
for (j = 0; j < module->data_section_count; j++) {
|
||||
if (!strncmp(module->data_sections[j].name, symbol, 15)) {
|
||||
bh_assert(relocation->relocation_addend + sizeof(uint64)
|
||||
<= module->data_sections[j].size);
|
||||
symbol_addr = module->data_sections[j].data;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j == module->data_section_count) {
|
||||
set_error_buf_v(error_buf, error_buf_size,
|
||||
"invalid relocation symbol %s", symbol);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (!strncmp(symbol, "__llvm_prf_cnts", 15)) {
|
||||
uint32 j;
|
||||
for (j = 0; j < module->data_section_count; j++) {
|
||||
if (!strcmp(module->data_sections[j].name, symbol)) {
|
||||
symbol_addr = module->data_sections[j].data;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j == module->data_section_count) {
|
||||
set_error_buf_v(error_buf, error_buf_size,
|
||||
"invalid relocation symbol %s", symbol);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#endif /* end of WASM_ENABLE_STATIC_PGO != 0 */
|
||||
else {
|
||||
set_error_buf_v(error_buf, error_buf_size,
|
||||
"invalid relocation symbol %s", symbol);
|
||||
@ -2154,7 +2260,7 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end,
|
||||
{
|
||||
AOTRelocationGroup *groups = NULL, *group;
|
||||
uint32 symbol_count = 0;
|
||||
uint32 group_count = 0, i, j;
|
||||
uint32 group_count = 0, i, j, got_item_count = 0;
|
||||
uint64 size;
|
||||
uint32 *symbol_offsets, total_string_len;
|
||||
uint8 *symbol_buf, *symbol_buf_end;
|
||||
@ -2216,6 +2322,8 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end,
|
||||
|
||||
for (j = 0; j < relocation_count; j++) {
|
||||
AOTRelocation relocation = { 0 };
|
||||
char group_name_buf[128] = { 0 };
|
||||
char symbol_name_buf[128] = { 0 };
|
||||
uint32 symbol_index, offset32;
|
||||
int32 addend32;
|
||||
uint16 symbol_name_len;
|
||||
@ -2244,10 +2352,10 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end,
|
||||
symbol_name_len = *(uint16 *)symbol_name;
|
||||
symbol_name += sizeof(uint16);
|
||||
|
||||
char group_name_buf[128] = { 0 };
|
||||
char symbol_name_buf[128] = { 0 };
|
||||
memcpy(group_name_buf, group_name, group_name_len);
|
||||
memcpy(symbol_name_buf, symbol_name, symbol_name_len);
|
||||
bh_memcpy_s(group_name_buf, (uint32)sizeof(group_name_buf),
|
||||
group_name, group_name_len);
|
||||
bh_memcpy_s(symbol_name_buf, (uint32)sizeof(symbol_name_buf),
|
||||
symbol_name, symbol_name_len);
|
||||
|
||||
if ((group_name_len == strlen(".text")
|
||||
|| (module->is_indirect_mode
|
||||
@ -2309,6 +2417,139 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end,
|
||||
}
|
||||
#endif /* end of defined(BH_PLATFORM_WINDOWS) */
|
||||
|
||||
#if (defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)) \
|
||||
&& !defined(BH_PLATFORM_WINDOWS)
|
||||
buf = symbol_buf_end;
|
||||
read_uint32(buf, buf_end, group_count);
|
||||
|
||||
/* Resolve the relocations of type R_X86_64_GOTPCREL */
|
||||
for (i = 0; i < group_count; i++) {
|
||||
uint32 name_index, relocation_count;
|
||||
uint16 group_name_len;
|
||||
uint8 *group_name;
|
||||
|
||||
/* section name address is 4 bytes aligned. */
|
||||
buf = (uint8 *)align_ptr(buf, sizeof(uint32));
|
||||
read_uint32(buf, buf_end, name_index);
|
||||
|
||||
if (name_index >= symbol_count) {
|
||||
set_error_buf(error_buf, error_buf_size,
|
||||
"symbol index out of range");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
group_name = symbol_buf + symbol_offsets[name_index];
|
||||
group_name_len = *(uint16 *)group_name;
|
||||
group_name += sizeof(uint16);
|
||||
|
||||
read_uint32(buf, buf_end, relocation_count);
|
||||
|
||||
for (j = 0; j < relocation_count; j++) {
|
||||
AOTRelocation relocation = { 0 };
|
||||
char group_name_buf[128] = { 0 };
|
||||
char symbol_name_buf[128] = { 0 };
|
||||
uint32 symbol_index;
|
||||
uint16 symbol_name_len;
|
||||
uint8 *symbol_name;
|
||||
|
||||
/* relocation offset and addend */
|
||||
buf += sizeof(void *) * 2;
|
||||
|
||||
read_uint32(buf, buf_end, relocation.relocation_type);
|
||||
read_uint32(buf, buf_end, symbol_index);
|
||||
|
||||
if (symbol_index >= symbol_count) {
|
||||
set_error_buf(error_buf, error_buf_size,
|
||||
"symbol index out of range");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
symbol_name = symbol_buf + symbol_offsets[symbol_index];
|
||||
symbol_name_len = *(uint16 *)symbol_name;
|
||||
symbol_name += sizeof(uint16);
|
||||
|
||||
bh_memcpy_s(group_name_buf, (uint32)sizeof(group_name_buf),
|
||||
group_name, group_name_len);
|
||||
bh_memcpy_s(symbol_name_buf, (uint32)sizeof(symbol_name_buf),
|
||||
symbol_name, symbol_name_len);
|
||||
|
||||
if (relocation.relocation_type == R_X86_64_GOTPCREL
|
||||
&& !strncmp(symbol_name_buf, AOT_FUNC_PREFIX,
|
||||
strlen(AOT_FUNC_PREFIX))) {
|
||||
uint32 func_idx =
|
||||
atoi(symbol_name_buf + strlen(AOT_FUNC_PREFIX));
|
||||
GOTItem *got_item = module->got_item_list;
|
||||
|
||||
if (func_idx >= module->func_count) {
|
||||
set_error_buf(error_buf, error_buf_size,
|
||||
"func index out of range");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
while (got_item) {
|
||||
if (got_item->func_idx == func_idx)
|
||||
break;
|
||||
got_item = got_item->next;
|
||||
}
|
||||
|
||||
if (!got_item) {
|
||||
/* Create the got item and append to the list */
|
||||
got_item = wasm_runtime_malloc(sizeof(GOTItem));
|
||||
if (!got_item) {
|
||||
set_error_buf(error_buf, error_buf_size,
|
||||
"allocate memory failed");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
got_item->func_idx = func_idx;
|
||||
got_item->next = NULL;
|
||||
if (!module->got_item_list) {
|
||||
module->got_item_list = module->got_item_list_end =
|
||||
got_item;
|
||||
}
|
||||
else {
|
||||
module->got_item_list_end->next = got_item;
|
||||
module->got_item_list_end = got_item;
|
||||
}
|
||||
|
||||
got_item_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (got_item_count) {
|
||||
GOTItem *got_item = module->got_item_list;
|
||||
uint32 got_item_idx = 0;
|
||||
|
||||
map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE;
|
||||
/* aot code and data in x86_64 must be in range 0 to 2G due to
|
||||
relocation for R_X86_64_32/32S/PC32 */
|
||||
map_flags = MMAP_MAP_32BIT;
|
||||
|
||||
/* Create the GOT for func_ptrs, note that it is different from
|
||||
the .got section of a dynamic object file */
|
||||
size = (uint64)sizeof(void *) * got_item_count;
|
||||
if (size > UINT32_MAX
|
||||
|| !(module->got_func_ptrs =
|
||||
os_mmap(NULL, (uint32)size, map_prot, map_flags))) {
|
||||
set_error_buf(error_buf, error_buf_size, "mmap memory failed");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
while (got_item) {
|
||||
module->got_func_ptrs[got_item_idx++] =
|
||||
module->func_ptrs[got_item->func_idx];
|
||||
got_item = got_item->next;
|
||||
}
|
||||
|
||||
module->got_item_count = got_item_count;
|
||||
}
|
||||
#else
|
||||
(void)got_item_count;
|
||||
#endif /* (defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)) && \
|
||||
!defined(BH_PLATFORM_WINDOWS) */
|
||||
|
||||
buf = symbol_buf_end;
|
||||
read_uint32(buf, buf_end, group_count);
|
||||
|
||||
@ -2994,9 +3235,27 @@ aot_unload(AOTModule *module)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)) \
|
||||
&& !defined(BH_PLATFORM_WINDOWS)
|
||||
{
|
||||
GOTItem *got_item = module->got_item_list, *got_item_next;
|
||||
|
||||
if (module->got_func_ptrs) {
|
||||
os_munmap(module->got_func_ptrs,
|
||||
sizeof(void *) * module->got_item_count);
|
||||
}
|
||||
while (got_item) {
|
||||
got_item_next = got_item->next;
|
||||
wasm_runtime_free(got_item);
|
||||
got_item = got_item_next;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (module->data_sections)
|
||||
destroy_object_data_sections(module->data_sections,
|
||||
module->data_section_count);
|
||||
|
||||
#if WASM_ENABLE_DEBUG_AOT != 0
|
||||
jit_code_entry_destroy(module->elf_hdr);
|
||||
#endif
|
||||
@ -3043,3 +3302,23 @@ aot_get_custom_section(const AOTModule *module, const char *name, uint32 *len)
|
||||
return NULL;
|
||||
}
|
||||
#endif /* end of WASM_ENABLE_LOAD_CUSTOM_SECTION */
|
||||
|
||||
#if WASM_ENABLE_STATIC_PGO != 0
|
||||
void
|
||||
aot_exchange_uint16(uint8 *p_data)
|
||||
{
|
||||
return exchange_uint16(p_data);
|
||||
}
|
||||
|
||||
void
|
||||
aot_exchange_uint32(uint8 *p_data)
|
||||
{
|
||||
return exchange_uint32(p_data);
|
||||
}
|
||||
|
||||
void
|
||||
aot_exchange_uint64(uint8 *p_data)
|
||||
{
|
||||
return exchange_uint64(p_data);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -121,6 +121,14 @@ typedef struct {
|
||||
REG_SYM(aot_intrinsic_i32_rem_s), \
|
||||
REG_SYM(aot_intrinsic_i32_rem_u), \
|
||||
|
||||
#if WASM_ENABLE_STATIC_PGO != 0
|
||||
#define REG_LLVM_PGO_SYM() \
|
||||
{ "__llvm_profile_instrument_target", llvm_profile_instrument_target }, \
|
||||
{ "__llvm_profile_instrument_memop", llvm_profile_instrument_memop },
|
||||
#else
|
||||
#define REG_LLVM_PGO_SYM()
|
||||
#endif
|
||||
|
||||
#define REG_COMMON_SYMBOLS \
|
||||
REG_SYM(aot_set_exception_with_id), \
|
||||
REG_SYM(aot_invoke_native), \
|
||||
@ -150,6 +158,7 @@ typedef struct {
|
||||
REG_REF_TYPES_SYM() \
|
||||
REG_AOT_TRACE_SYM() \
|
||||
REG_INTRINSIC_SYM() \
|
||||
REG_LLVM_PGO_SYM() \
|
||||
|
||||
#define CHECK_RELOC_OFFSET(data_size) do { \
|
||||
if (!check_reloc_offset(target_section_size, \
|
||||
|
||||
@ -2852,3 +2852,520 @@ aot_dump_perf_profiling(const AOTModuleInstance *module_inst)
|
||||
}
|
||||
}
|
||||
#endif /* end of WASM_ENABLE_PERF_PROFILING */
|
||||
|
||||
#if WASM_ENABLE_STATIC_PGO != 0
|
||||
|
||||
/* indirect call target */
|
||||
#define IPVK_IndirectCallTarget 0
|
||||
/* memory intrinsic functions size */
|
||||
#define IPVK_MemOPSize 1
|
||||
#define IPVK_First IPVK_IndirectCallTarget
|
||||
#define IPVK_Last IPVK_MemOPSize
|
||||
|
||||
#define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 24
|
||||
#define INSTR_PROF_MAX_NUM_VAL_PER_SITE 255
|
||||
|
||||
static int hasNonDefaultValsPerSite = 0;
|
||||
static uint32 VPMaxNumValsPerSite = INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE;
|
||||
|
||||
static bool
|
||||
cmpxchg_ptr(void **ptr, void *old_val, void *new_val)
|
||||
{
|
||||
#if defined(os_atomic_cmpxchg)
|
||||
return os_atomic_cmpxchg(ptr, &old_val, new_val);
|
||||
#else
|
||||
/* TODO: add lock when thread-manager is enabled */
|
||||
void *read = *ptr;
|
||||
if (read == old_val) {
|
||||
*ptr = new_val;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int
|
||||
allocateValueProfileCounters(LLVMProfileData *Data)
|
||||
{
|
||||
ValueProfNode **Mem;
|
||||
uint64 NumVSites = 0, total_size;
|
||||
uint32 VKI;
|
||||
|
||||
/* When dynamic allocation is enabled, allow tracking the max number of
|
||||
values allowed. */
|
||||
if (!hasNonDefaultValsPerSite)
|
||||
VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
|
||||
|
||||
for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
|
||||
NumVSites += Data->num_value_sites[VKI];
|
||||
|
||||
/* If NumVSites = 0, calloc is allowed to return a non-null pointer. */
|
||||
bh_assert(NumVSites > 0 && "NumVSites can't be zero");
|
||||
|
||||
total_size = (uint64)sizeof(ValueProfNode *) * NumVSites;
|
||||
if (total_size > UINT32_MAX
|
||||
|| !(Mem = (ValueProfNode **)wasm_runtime_malloc((uint32)total_size))) {
|
||||
return 0;
|
||||
}
|
||||
memset(Mem, 0, (uint32)total_size);
|
||||
|
||||
if (!cmpxchg_ptr((void **)&Data->values, NULL, Mem)) {
|
||||
wasm_runtime_free(Mem);
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static ValueProfNode *
|
||||
allocateOneNode(void)
|
||||
{
|
||||
ValueProfNode *Node;
|
||||
|
||||
Node = wasm_runtime_malloc((uint32)sizeof(ValueProfNode));
|
||||
if (Node)
|
||||
memset(Node, 0, sizeof(ValueProfNode));
|
||||
return Node;
|
||||
}
|
||||
|
||||
static void
|
||||
instrumentTargetValueImpl(uint64 TargetValue, void *Data, uint32 CounterIndex,
|
||||
uint64 CountValue)
|
||||
{
|
||||
ValueProfNode **ValueCounters;
|
||||
ValueProfNode *PrevVNode = NULL, *MinCountVNode = NULL, *CurVNode;
|
||||
LLVMProfileData *PData = (LLVMProfileData *)Data;
|
||||
uint64 MinCount = UINT64_MAX;
|
||||
uint8 VDataCount = 0;
|
||||
bool success = false;
|
||||
|
||||
if (!PData)
|
||||
return;
|
||||
if (!CountValue)
|
||||
return;
|
||||
if (!PData->values) {
|
||||
if (!allocateValueProfileCounters(PData))
|
||||
return;
|
||||
}
|
||||
|
||||
ValueCounters = (ValueProfNode **)PData->values;
|
||||
CurVNode = ValueCounters[CounterIndex];
|
||||
|
||||
while (CurVNode) {
|
||||
if (TargetValue == CurVNode->value) {
|
||||
CurVNode->count += CountValue;
|
||||
return;
|
||||
}
|
||||
if (CurVNode->count < MinCount) {
|
||||
MinCount = CurVNode->count;
|
||||
MinCountVNode = CurVNode;
|
||||
}
|
||||
PrevVNode = CurVNode;
|
||||
CurVNode = CurVNode->next;
|
||||
++VDataCount;
|
||||
}
|
||||
|
||||
if (VDataCount >= VPMaxNumValsPerSite) {
|
||||
if (MinCountVNode->count <= CountValue) {
|
||||
CurVNode = MinCountVNode;
|
||||
CurVNode->value = TargetValue;
|
||||
CurVNode->count = CountValue;
|
||||
}
|
||||
else
|
||||
MinCountVNode->count -= CountValue;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
CurVNode = allocateOneNode();
|
||||
if (!CurVNode)
|
||||
return;
|
||||
CurVNode->value = TargetValue;
|
||||
CurVNode->count += CountValue;
|
||||
|
||||
if (!ValueCounters[CounterIndex]) {
|
||||
success =
|
||||
cmpxchg_ptr((void **)&ValueCounters[CounterIndex], NULL, CurVNode);
|
||||
}
|
||||
else if (PrevVNode && !PrevVNode->next) {
|
||||
success = cmpxchg_ptr((void **)&PrevVNode->next, 0, CurVNode);
|
||||
}
|
||||
|
||||
if (!success) {
|
||||
wasm_runtime_free(CurVNode);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
llvm_profile_instrument_target(uint64 target_value, void *data,
|
||||
uint32 counter_idx)
|
||||
{
|
||||
instrumentTargetValueImpl(target_value, data, counter_idx, 1);
|
||||
}
|
||||
|
||||
static inline uint32
|
||||
popcount64(uint64 u)
|
||||
{
|
||||
uint32 ret = 0;
|
||||
while (u) {
|
||||
u = (u & (u - 1));
|
||||
ret++;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline uint32
|
||||
clz64(uint64 type)
|
||||
{
|
||||
uint32 num = 0;
|
||||
if (type == 0)
|
||||
return 64;
|
||||
while (!(type & 0x8000000000000000LL)) {
|
||||
num++;
|
||||
type <<= 1;
|
||||
}
|
||||
return num;
|
||||
}
|
||||
|
||||
/* Map an (observed) memop size value to the representative value of its range.
|
||||
For example, 5 -> 5, 22 -> 17, 99 -> 65, 256 -> 256, 1001 -> 513. */
|
||||
static uint64
|
||||
InstrProfGetRangeRepValue(uint64 Value)
|
||||
{
|
||||
if (Value <= 8)
|
||||
/* The first ranges are individually tracked. Use the value as is. */
|
||||
return Value;
|
||||
else if (Value >= 513)
|
||||
/* The last range is mapped to its lowest value. */
|
||||
return 513;
|
||||
else if (popcount64(Value) == 1)
|
||||
/* If it's a power of two, use it as is. */
|
||||
return Value;
|
||||
else
|
||||
/* Otherwise, take to the previous power of two + 1. */
|
||||
return (((uint64)1) << (64 - clz64(Value) - 1)) + 1;
|
||||
}
|
||||
|
||||
void
|
||||
llvm_profile_instrument_memop(uint64 target_value, void *data,
|
||||
uint32 counter_idx)
|
||||
{
|
||||
uint64 rep_value = InstrProfGetRangeRepValue(target_value);
|
||||
instrumentTargetValueImpl(rep_value, data, counter_idx, 1);
|
||||
}
|
||||
|
||||
static uint32
|
||||
get_pgo_prof_data_size(AOTModuleInstance *module_inst, uint32 *p_num_prof_data,
|
||||
uint32 *p_num_prof_counters, uint32 *p_padding_size,
|
||||
uint32 *p_prof_counters_size, uint32 *p_prof_names_size,
|
||||
uint32 *p_value_counters_size, uint8 **p_prof_names)
|
||||
{
|
||||
AOTModule *module = (AOTModule *)module_inst->module;
|
||||
LLVMProfileData *prof_data;
|
||||
uint8 *prof_names = NULL;
|
||||
uint32 num_prof_data = 0, num_prof_counters = 0, padding_size, i;
|
||||
uint32 prof_counters_size = 0, prof_names_size = 0;
|
||||
uint32 total_size, total_size_wo_value_counters;
|
||||
|
||||
for (i = 0; i < module->data_section_count; i++) {
|
||||
if (!strncmp(module->data_sections[i].name, "__llvm_prf_data", 15)) {
|
||||
bh_assert(module->data_sections[i].size == sizeof(LLVMProfileData));
|
||||
num_prof_data++;
|
||||
prof_data = (LLVMProfileData *)module->data_sections[i].data;
|
||||
num_prof_counters += prof_data->num_counters;
|
||||
}
|
||||
else if (!strncmp(module->data_sections[i].name, "__llvm_prf_cnts",
|
||||
15)) {
|
||||
prof_counters_size += module->data_sections[i].size;
|
||||
}
|
||||
else if (!strncmp(module->data_sections[i].name, "__llvm_prf_names",
|
||||
16)) {
|
||||
prof_names_size = module->data_sections[i].size;
|
||||
prof_names = module->data_sections[i].data;
|
||||
}
|
||||
}
|
||||
|
||||
if (prof_counters_size != num_prof_counters * sizeof(uint64))
|
||||
return 0;
|
||||
|
||||
total_size = sizeof(LLVMProfileRawHeader)
|
||||
+ num_prof_data * sizeof(LLVMProfileData_64)
|
||||
+ prof_counters_size + prof_names_size;
|
||||
padding_size = sizeof(uint64) - (prof_names_size % sizeof(uint64));
|
||||
if (padding_size != sizeof(uint64))
|
||||
total_size += padding_size;
|
||||
|
||||
/* Total size excluding value counters */
|
||||
total_size_wo_value_counters = total_size;
|
||||
|
||||
for (i = 0; i < module->data_section_count; i++) {
|
||||
if (!strncmp(module->data_sections[i].name, "__llvm_prf_data", 15)) {
|
||||
uint32 j, k, num_value_sites, num_value_nodes;
|
||||
ValueProfNode **values, *value_node;
|
||||
|
||||
prof_data = (LLVMProfileData *)module->data_sections[i].data;
|
||||
values = prof_data->values;
|
||||
|
||||
if (prof_data->num_value_sites[0] > 0
|
||||
|| prof_data->num_value_sites[1] > 0) {
|
||||
/* TotalSize (uint32) and NumValueKinds (uint32) */
|
||||
total_size += 8;
|
||||
for (j = 0; j < 2; j++) {
|
||||
if ((num_value_sites = prof_data->num_value_sites[j]) > 0) {
|
||||
/* ValueKind (uint32) and NumValueSites (uint32) */
|
||||
total_size += 8;
|
||||
/* (Value + Counter) group counts of each value site,
|
||||
each count is one byte */
|
||||
total_size += align_uint(num_value_sites, 8);
|
||||
|
||||
if (values) {
|
||||
for (k = 0; k < num_value_sites; k++) {
|
||||
num_value_nodes = 0;
|
||||
value_node = *values;
|
||||
while (value_node) {
|
||||
num_value_nodes++;
|
||||
value_node = value_node->next;
|
||||
}
|
||||
if (num_value_nodes) {
|
||||
/* (Value + Counter) groups */
|
||||
total_size += num_value_nodes * 8 * 2;
|
||||
}
|
||||
values++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (p_num_prof_data)
|
||||
*p_num_prof_data = num_prof_data;
|
||||
if (p_num_prof_counters)
|
||||
*p_num_prof_counters = num_prof_counters;
|
||||
if (p_padding_size)
|
||||
*p_padding_size = padding_size;
|
||||
if (p_prof_counters_size)
|
||||
*p_prof_counters_size = prof_counters_size;
|
||||
if (p_prof_names_size)
|
||||
*p_prof_names_size = prof_names_size;
|
||||
if (p_value_counters_size)
|
||||
*p_value_counters_size = total_size - total_size_wo_value_counters;
|
||||
if (p_prof_names)
|
||||
*p_prof_names = prof_names;
|
||||
|
||||
return total_size;
|
||||
}
|
||||
|
||||
uint32
|
||||
aot_get_pgo_prof_data_size(AOTModuleInstance *module_inst)
|
||||
{
|
||||
return get_pgo_prof_data_size(module_inst, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
static union {
|
||||
int a;
|
||||
char b;
|
||||
} __ue = { .a = 1 };
|
||||
|
||||
#define is_little_endian() (__ue.b == 1)
|
||||
|
||||
uint32
|
||||
aot_dump_pgo_prof_data_to_buf(AOTModuleInstance *module_inst, char *buf,
|
||||
uint32 len)
|
||||
{
|
||||
AOTModule *module = (AOTModule *)module_inst->module;
|
||||
LLVMProfileRawHeader prof_header = { 0 };
|
||||
LLVMProfileData *prof_data;
|
||||
uint8 *prof_names = NULL;
|
||||
uint32 num_prof_data = 0, num_prof_counters = 0, padding_size, i;
|
||||
uint32 prof_counters_size = 0, prof_names_size = 0;
|
||||
uint32 value_counters_size = 0, value_counters_size_backup = 0;
|
||||
uint32 total_size, size;
|
||||
int64 counters_delta, offset_counters;
|
||||
|
||||
total_size = get_pgo_prof_data_size(module_inst, &num_prof_data,
|
||||
&num_prof_counters, &padding_size,
|
||||
&prof_counters_size, &prof_names_size,
|
||||
&value_counters_size, &prof_names);
|
||||
if (len < total_size)
|
||||
return 0;
|
||||
|
||||
value_counters_size_backup = value_counters_size;
|
||||
value_counters_size = 0;
|
||||
|
||||
prof_header.counters_delta = counters_delta =
|
||||
sizeof(LLVMProfileData_64) * num_prof_data;
|
||||
offset_counters = 0;
|
||||
for (i = 0; i < module->data_section_count; i++) {
|
||||
if (!strncmp(module->data_sections[i].name, "__llvm_prf_data", 15)) {
|
||||
prof_data = (LLVMProfileData *)module->data_sections[i].data;
|
||||
prof_data->offset_counters = counters_delta + offset_counters;
|
||||
offset_counters += prof_data->num_counters * sizeof(uint64);
|
||||
counters_delta -= sizeof(LLVMProfileData_64);
|
||||
}
|
||||
}
|
||||
|
||||
prof_header.magic = 0xFF6C70726F667281LL;
|
||||
/* Version 8 */
|
||||
prof_header.version = 0x0000000000000008LL;
|
||||
/* with VARIANT_MASK_IR_PROF (IR Instrumentation) */
|
||||
prof_header.version |= 0x1ULL << 56;
|
||||
/* with VARIANT_MASK_MEMPROF (Memory Profile) */
|
||||
prof_header.version |= 0x1ULL << 62;
|
||||
prof_header.num_prof_data = num_prof_data;
|
||||
prof_header.num_prof_counters = num_prof_counters;
|
||||
prof_header.names_size = prof_names_size;
|
||||
prof_header.value_kind_last = 1;
|
||||
|
||||
if (!is_little_endian()) {
|
||||
aot_exchange_uint64((uint8 *)&prof_header.magic);
|
||||
aot_exchange_uint64((uint8 *)&prof_header.version);
|
||||
aot_exchange_uint64((uint8 *)&prof_header.num_prof_data);
|
||||
aot_exchange_uint64((uint8 *)&prof_header.num_prof_counters);
|
||||
aot_exchange_uint64((uint8 *)&prof_header.names_size);
|
||||
aot_exchange_uint64((uint8 *)&prof_header.counters_delta);
|
||||
aot_exchange_uint64((uint8 *)&prof_header.value_kind_last);
|
||||
}
|
||||
|
||||
size = sizeof(LLVMProfileRawHeader);
|
||||
bh_memcpy_s(buf, size, &prof_header, size);
|
||||
buf += size;
|
||||
|
||||
for (i = 0; i < module->data_section_count; i++) {
|
||||
if (!strncmp(module->data_sections[i].name, "__llvm_prf_data", 15)) {
|
||||
LLVMProfileData_64 *prof_data_64 = (LLVMProfileData_64 *)buf;
|
||||
|
||||
/* Convert LLVMProfileData to LLVMProfileData_64, the pointer width
|
||||
in the output file is alawys 8 bytes */
|
||||
prof_data = (LLVMProfileData *)module->data_sections[i].data;
|
||||
prof_data_64->func_md5 = prof_data->func_md5;
|
||||
prof_data_64->func_hash = prof_data->func_hash;
|
||||
prof_data_64->offset_counters = prof_data->offset_counters;
|
||||
prof_data_64->func_ptr = prof_data->func_ptr;
|
||||
prof_data_64->values = (uint64)(uintptr_t)prof_data->values;
|
||||
prof_data_64->num_counters = prof_data->num_counters;
|
||||
prof_data_64->num_value_sites[0] = prof_data->num_value_sites[0];
|
||||
prof_data_64->num_value_sites[1] = prof_data->num_value_sites[1];
|
||||
|
||||
if (!is_little_endian()) {
|
||||
aot_exchange_uint64((uint8 *)&prof_data_64->func_hash);
|
||||
aot_exchange_uint64((uint8 *)&prof_data_64->offset_counters);
|
||||
aot_exchange_uint64((uint8 *)&prof_data_64->offset_counters);
|
||||
aot_exchange_uint64((uint8 *)&prof_data_64->func_ptr);
|
||||
aot_exchange_uint64((uint8 *)&prof_data_64->values);
|
||||
aot_exchange_uint32((uint8 *)&prof_data_64->num_counters);
|
||||
aot_exchange_uint16((uint8 *)&prof_data_64->num_value_sites[0]);
|
||||
aot_exchange_uint16((uint8 *)&prof_data_64->num_value_sites[1]);
|
||||
}
|
||||
buf += sizeof(LLVMProfileData_64);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < module->data_section_count; i++) {
|
||||
if (!strncmp(module->data_sections[i].name, "__llvm_prf_cnts", 15)) {
|
||||
size = module->data_sections[i].size;
|
||||
bh_memcpy_s(buf, size, module->data_sections[i].data, size);
|
||||
buf += size;
|
||||
}
|
||||
}
|
||||
|
||||
if (prof_names && prof_names_size > 0) {
|
||||
size = prof_names_size;
|
||||
bh_memcpy_s(buf, size, prof_names, size);
|
||||
buf += size;
|
||||
padding_size = sizeof(uint64) - (prof_names_size % sizeof(uint64));
|
||||
if (padding_size != sizeof(uint64)) {
|
||||
char padding_buf[8] = { 0 };
|
||||
bh_memcpy_s(buf, padding_size, padding_buf, padding_size);
|
||||
buf += padding_size;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < module->data_section_count; i++) {
|
||||
if (!strncmp(module->data_sections[i].name, "__llvm_prf_data", 15)) {
|
||||
uint32 j, k, num_value_sites, num_value_nodes;
|
||||
ValueProfNode **values, **values_tmp, *value_node;
|
||||
|
||||
prof_data = (LLVMProfileData *)module->data_sections[i].data;
|
||||
values = values_tmp = prof_data->values;
|
||||
|
||||
if (prof_data->num_value_sites[0] > 0
|
||||
|| prof_data->num_value_sites[1] > 0) {
|
||||
uint32 *buf_total_size = (uint32 *)buf;
|
||||
|
||||
buf += 4; /* emit TotalSize later */
|
||||
*(uint32 *)buf = (prof_data->num_value_sites[0] > 0
|
||||
&& prof_data->num_value_sites[1] > 0)
|
||||
? 2
|
||||
: 1;
|
||||
if (!is_little_endian())
|
||||
aot_exchange_uint32((uint8 *)buf);
|
||||
buf += 4;
|
||||
|
||||
for (j = 0; j < 2; j++) {
|
||||
if ((num_value_sites = prof_data->num_value_sites[j]) > 0) {
|
||||
/* ValueKind */
|
||||
*(uint32 *)buf = j;
|
||||
if (!is_little_endian())
|
||||
aot_exchange_uint32((uint8 *)buf);
|
||||
buf += 4;
|
||||
/* NumValueSites */
|
||||
*(uint32 *)buf = num_value_sites;
|
||||
if (!is_little_endian())
|
||||
aot_exchange_uint32((uint8 *)buf);
|
||||
buf += 4;
|
||||
|
||||
for (k = 0; k < num_value_sites; k++) {
|
||||
num_value_nodes = 0;
|
||||
if (values_tmp) {
|
||||
value_node = *values_tmp;
|
||||
while (value_node) {
|
||||
num_value_nodes++;
|
||||
value_node = value_node->next;
|
||||
}
|
||||
values_tmp++;
|
||||
}
|
||||
bh_assert(num_value_nodes < 255);
|
||||
*(uint8 *)buf++ = (uint8)num_value_nodes;
|
||||
}
|
||||
if (num_value_sites % 8) {
|
||||
buf += 8 - (num_value_sites % 8);
|
||||
}
|
||||
|
||||
for (k = 0; k < num_value_sites; k++) {
|
||||
if (values) {
|
||||
value_node = *values;
|
||||
while (value_node) {
|
||||
*(uint64 *)buf = value_node->value;
|
||||
if (!is_little_endian())
|
||||
aot_exchange_uint64((uint8 *)buf);
|
||||
buf += 8;
|
||||
*(uint64 *)buf = value_node->count;
|
||||
if (!is_little_endian())
|
||||
aot_exchange_uint64((uint8 *)buf);
|
||||
buf += 8;
|
||||
value_node = value_node->next;
|
||||
}
|
||||
values++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* TotalSize */
|
||||
*(uint32 *)buf_total_size =
|
||||
(uint8 *)buf - (uint8 *)buf_total_size;
|
||||
if (!is_little_endian())
|
||||
aot_exchange_uint64((uint8 *)buf_total_size);
|
||||
value_counters_size += (uint8 *)buf - (uint8 *)buf_total_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bh_assert(value_counters_size == value_counters_size_backup);
|
||||
(void)value_counters_size_backup;
|
||||
|
||||
return total_size;
|
||||
}
|
||||
#endif /* end of WASM_ENABLE_STATIC_PGO != 0 */
|
||||
|
||||
@ -41,6 +41,10 @@ typedef struct AOTObjectDataSection {
|
||||
char *name;
|
||||
uint8 *data;
|
||||
uint32 size;
|
||||
#if WASM_ENABLE_WAMR_COMPILER != 0 || WASM_ENABLE_JIT != 0
|
||||
bool is_name_allocated;
|
||||
bool is_data_allocated;
|
||||
#endif
|
||||
} AOTObjectDataSection;
|
||||
|
||||
/* Relocation info */
|
||||
@ -51,6 +55,9 @@ typedef struct AOTRelocation {
|
||||
char *symbol_name;
|
||||
/* index in the symbol offset field */
|
||||
uint32 symbol_index;
|
||||
#if WASM_ENABLE_WAMR_COMPILER != 0 || WASM_ENABLE_JIT != 0
|
||||
bool is_symbol_name_allocated;
|
||||
#endif
|
||||
} AOTRelocation;
|
||||
|
||||
/* Relocation Group */
|
||||
@ -60,6 +67,9 @@ typedef struct AOTRelocationGroup {
|
||||
uint32 name_index;
|
||||
uint32 relocation_count;
|
||||
AOTRelocation *relocations;
|
||||
#if WASM_ENABLE_WAMR_COMPILER != 0 || WASM_ENABLE_JIT != 0
|
||||
bool is_section_name_allocated;
|
||||
#endif
|
||||
} AOTRelocationGroup;
|
||||
|
||||
/* AOT function instance */
|
||||
@ -108,6 +118,13 @@ typedef struct AOTUnwindInfo {
|
||||
#define PLT_ITEM_SIZE 12
|
||||
#endif
|
||||
|
||||
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
|
||||
typedef struct GOTItem {
|
||||
uint32 func_idx;
|
||||
struct GOTItem *next;
|
||||
} GOTItem, *GOTItemList;
|
||||
#endif
|
||||
|
||||
typedef struct AOTModule {
|
||||
uint32 module_type;
|
||||
|
||||
@ -204,6 +221,13 @@ typedef struct AOTModule {
|
||||
bool rtl_func_table_registered;
|
||||
#endif
|
||||
|
||||
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
|
||||
uint32 got_item_count;
|
||||
GOTItemList got_item_list;
|
||||
GOTItemList got_item_list_end;
|
||||
void **got_func_ptrs;
|
||||
#endif
|
||||
|
||||
/* data sections in AOT object file, including .data, .rodata
|
||||
and .rodata.cstN. */
|
||||
AOTObjectDataSection *data_sections;
|
||||
@ -294,6 +318,54 @@ typedef struct AOTFrame {
|
||||
#endif
|
||||
} AOTFrame;
|
||||
|
||||
#if WASM_ENABLE_STATIC_PGO != 0
|
||||
typedef struct LLVMProfileRawHeader {
|
||||
uint64 magic;
|
||||
uint64 version;
|
||||
uint64 binary_ids_size;
|
||||
uint64 num_prof_data;
|
||||
uint64 padding_bytes_before_counters;
|
||||
uint64 num_prof_counters;
|
||||
uint64 padding_bytes_after_counters;
|
||||
uint64 names_size;
|
||||
uint64 counters_delta;
|
||||
uint64 names_delta;
|
||||
uint64 value_kind_last;
|
||||
} LLVMProfileRawHeader;
|
||||
|
||||
typedef struct ValueProfNode {
|
||||
uint64 value;
|
||||
uint64 count;
|
||||
struct ValueProfNode *next;
|
||||
} ValueProfNode;
|
||||
|
||||
/* The profiling data of data sections created by aot compiler and
|
||||
used when profiling, the width of pointer can be 8 bytes (64-bit)
|
||||
or 4 bytes (32-bit) */
|
||||
typedef struct LLVMProfileData {
|
||||
uint64 func_md5;
|
||||
uint64 func_hash;
|
||||
uint64 offset_counters;
|
||||
uintptr_t func_ptr;
|
||||
ValueProfNode **values;
|
||||
uint32 num_counters;
|
||||
uint16 num_value_sites[2];
|
||||
} LLVMProfileData;
|
||||
|
||||
/* The profiling data for writting to the output file, the width of
|
||||
pointer is 8 bytes suppose we always use wamrc and llvm-profdata
|
||||
with 64-bit mode */
|
||||
typedef struct LLVMProfileData_64 {
|
||||
uint64 func_md5;
|
||||
uint64 func_hash;
|
||||
uint64 offset_counters;
|
||||
uint64 func_ptr;
|
||||
uint64 values;
|
||||
uint32 num_counters;
|
||||
uint16 num_value_sites[2];
|
||||
} LLVMProfileData_64;
|
||||
#endif /* end of WASM_ENABLE_STATIC_PGO != 0 */
|
||||
|
||||
/**
|
||||
* Load a AOT module from aot file buffer
|
||||
* @param buf the byte buffer which contains the AOT file data
|
||||
@ -564,6 +636,32 @@ aot_dump_perf_profiling(const AOTModuleInstance *module_inst);
|
||||
const uint8 *
|
||||
aot_get_custom_section(const AOTModule *module, const char *name, uint32 *len);
|
||||
|
||||
#if WASM_ENABLE_STATIC_PGO != 0
|
||||
void
|
||||
llvm_profile_instrument_target(uint64 target_value, void *data,
|
||||
uint32 counter_idx);
|
||||
|
||||
void
|
||||
llvm_profile_instrument_memop(uint64 target_value, void *data,
|
||||
uint32 counter_idx);
|
||||
|
||||
uint32
|
||||
aot_get_pgo_prof_data_size(AOTModuleInstance *module_inst);
|
||||
|
||||
uint32
|
||||
aot_dump_pgo_prof_data_to_buf(AOTModuleInstance *module_inst, char *buf,
|
||||
uint32 len);
|
||||
|
||||
void
|
||||
aot_exchange_uint16(uint8 *p_data);
|
||||
|
||||
void
|
||||
aot_exchange_uint32(uint8 *p_data);
|
||||
|
||||
void
|
||||
aot_exchange_uint64(uint8 *p_data);
|
||||
#endif /* end of WASM_ENABLE_STATIC_PGO != 0 */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* end of extern "C" */
|
||||
#endif
|
||||
|
||||
@ -8,6 +8,9 @@
|
||||
#define R_386_32 1 /* Direct 32 bit */
|
||||
#define R_386_PC32 2 /* PC relative 32 bit */
|
||||
#define R_386_PLT32 4 /* 32-bit address ProcedureLinkageTable */
|
||||
#define R_386_TLS_GD_32 \
|
||||
24 /* Direct 32 bit for general dynamic \
|
||||
thread local data */
|
||||
|
||||
#if !defined(_WIN32) && !defined(_WIN32_)
|
||||
/* clang-format off */
|
||||
@ -110,6 +113,9 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr,
|
||||
{
|
||||
switch (reloc_type) {
|
||||
case R_386_32:
|
||||
#if WASM_ENABLE_STATIC_PGO != 0
|
||||
case R_386_TLS_GD_32:
|
||||
#endif
|
||||
{
|
||||
intptr_t value;
|
||||
|
||||
|
||||
@ -6,11 +6,13 @@
|
||||
#include "aot_reloc.h"
|
||||
|
||||
#if !defined(BH_PLATFORM_WINDOWS)
|
||||
#define R_X86_64_64 1 /* Direct 64 bit */
|
||||
#define R_X86_64_PC32 2 /* PC relative 32 bit signed */
|
||||
#define R_X86_64_PLT32 4 /* 32 bit PLT address */
|
||||
#define R_X86_64_32 10 /* Direct 32 bit zero extended */
|
||||
#define R_X86_64_32S 11 /* Direct 32 bit sign extended */
|
||||
#define R_X86_64_64 1 /* Direct 64 bit */
|
||||
#define R_X86_64_PC32 2 /* PC relative 32 bit signed */
|
||||
#define R_X86_64_PLT32 4 /* 32 bit PLT address */
|
||||
#define R_X86_64_GOTPCREL 9 /* 32 bit signed PC relative offset to GOT */
|
||||
#define R_X86_64_32 10 /* Direct 32 bit zero extended */
|
||||
#define R_X86_64_32S 11 /* Direct 32 bit sign extended */
|
||||
#define R_X86_64_PC64 24 /* PC relative 64 bit */
|
||||
#else
|
||||
#ifndef IMAGE_REL_AMD64_ADDR64
|
||||
#define IMAGE_REL_AMD64_ADDR64 1 /* The 64-bit VA of the relocation target */
|
||||
@ -164,6 +166,7 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr,
|
||||
#endif
|
||||
#if !defined(BH_PLATFORM_WINDOWS)
|
||||
case R_X86_64_PC32:
|
||||
case R_X86_64_GOTPCREL: /* GOT + G has been calculated as symbol_addr */
|
||||
{
|
||||
intptr_t target_addr = (intptr_t) /* S + A - P */
|
||||
((uintptr_t)symbol_addr + reloc_addend
|
||||
@ -182,6 +185,16 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr,
|
||||
*(int32 *)(target_section_addr + reloc_offset) = (int32)target_addr;
|
||||
break;
|
||||
}
|
||||
case R_X86_64_PC64:
|
||||
{
|
||||
intptr_t target_addr = (intptr_t) /* S + A - P */
|
||||
((uintptr_t)symbol_addr + reloc_addend
|
||||
- (uintptr_t)(target_section_addr + reloc_offset));
|
||||
|
||||
CHECK_RELOC_OFFSET(sizeof(int64));
|
||||
*(int64 *)(target_section_addr + reloc_offset) = (int64)target_addr;
|
||||
break;
|
||||
}
|
||||
case R_X86_64_32:
|
||||
case R_X86_64_32S:
|
||||
{
|
||||
|
||||
@ -5033,6 +5033,33 @@ wasm_runtime_dump_call_stack_to_buf(wasm_exec_env_t exec_env, char *buf,
|
||||
}
|
||||
#endif /* end of WASM_ENABLE_DUMP_CALL_STACK */
|
||||
|
||||
#if WASM_ENABLE_STATIC_PGO != 0
|
||||
uint32
|
||||
wasm_runtime_get_pgo_prof_data_size(WASMModuleInstanceCommon *module_inst)
|
||||
{
|
||||
#if WASM_ENABLE_AOT != 0
|
||||
if (module_inst->module_type == Wasm_Module_AoT) {
|
||||
AOTModuleInstance *aot_inst = (AOTModuleInstance *)module_inst;
|
||||
return aot_get_pgo_prof_data_size(aot_inst);
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32
|
||||
wasm_runtime_dump_pgo_prof_data_to_buf(WASMModuleInstanceCommon *module_inst,
|
||||
char *buf, uint32 len)
|
||||
{
|
||||
#if WASM_ENABLE_AOT != 0
|
||||
if (module_inst->module_type == Wasm_Module_AoT) {
|
||||
AOTModuleInstance *aot_inst = (AOTModuleInstance *)module_inst;
|
||||
return aot_dump_pgo_prof_data_to_buf(aot_inst, buf, len);
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
#endif /* end of WASM_ENABLE_STATIC_PGO != 0 */
|
||||
|
||||
bool
|
||||
wasm_runtime_get_table_elem_type(const WASMModuleCommon *module_comm,
|
||||
uint32 table_idx, uint8 *out_elem_type,
|
||||
|
||||
@ -111,6 +111,8 @@ typedef struct AOTSymbolList {
|
||||
|
||||
/* AOT object data */
|
||||
typedef struct AOTObjectData {
|
||||
AOTCompContext *comp_ctx;
|
||||
|
||||
LLVMMemoryBufferRef mem_buf;
|
||||
LLVMBinaryRef binary;
|
||||
|
||||
@ -119,6 +121,12 @@ typedef struct AOTObjectData {
|
||||
void *text;
|
||||
uint32 text_size;
|
||||
|
||||
void *text_unlikely;
|
||||
uint32 text_unlikely_size;
|
||||
|
||||
void *text_hot;
|
||||
uint32 text_hot_size;
|
||||
|
||||
/* literal data and size */
|
||||
void *literal;
|
||||
uint32 literal_size;
|
||||
@ -558,8 +566,10 @@ get_init_data_section_size(AOTCompContext *comp_ctx, AOTCompData *comp_data,
|
||||
static uint32
|
||||
get_text_section_size(AOTObjectData *obj_data)
|
||||
{
|
||||
return (sizeof(uint32) + obj_data->literal_size + obj_data->text_size + 3)
|
||||
& ~3;
|
||||
return sizeof(uint32) + align_uint(obj_data->literal_size, 4)
|
||||
+ align_uint(obj_data->text_size, 4)
|
||||
+ align_uint(obj_data->text_unlikely_size, 4)
|
||||
+ align_uint(obj_data->text_hot_size, 4);
|
||||
}
|
||||
|
||||
static uint32
|
||||
@ -1702,12 +1712,28 @@ aot_emit_text_section(uint8 *buf, uint8 *buf_end, uint32 *p_offset,
|
||||
EMIT_U32(AOT_SECTION_TYPE_TEXT);
|
||||
EMIT_U32(section_size);
|
||||
EMIT_U32(obj_data->literal_size);
|
||||
if (obj_data->literal_size > 0)
|
||||
EMIT_BUF(obj_data->literal, obj_data->literal_size);
|
||||
EMIT_BUF(obj_data->text, obj_data->text_size);
|
||||
|
||||
while (offset & 3)
|
||||
EMIT_BUF(&placeholder, 1);
|
||||
if (obj_data->literal_size > 0) {
|
||||
EMIT_BUF(obj_data->literal, obj_data->literal_size);
|
||||
while (offset & 3)
|
||||
EMIT_BUF(&placeholder, 1);
|
||||
}
|
||||
|
||||
if (obj_data->text_size > 0) {
|
||||
EMIT_BUF(obj_data->text, obj_data->text_size);
|
||||
while (offset & 3)
|
||||
EMIT_BUF(&placeholder, 1);
|
||||
}
|
||||
if (obj_data->text_unlikely_size > 0) {
|
||||
EMIT_BUF(obj_data->text_unlikely, obj_data->text_unlikely_size);
|
||||
while (offset & 3)
|
||||
EMIT_BUF(&placeholder, 1);
|
||||
}
|
||||
if (obj_data->text_hot_size > 0) {
|
||||
EMIT_BUF(obj_data->text_hot, obj_data->text_hot_size);
|
||||
while (offset & 3)
|
||||
EMIT_BUF(&placeholder, 1);
|
||||
}
|
||||
|
||||
if (offset - *p_offset != section_size + sizeof(uint32) * 2) {
|
||||
aot_set_last_error("emit text section failed.");
|
||||
@ -2211,11 +2237,23 @@ aot_resolve_text(AOTObjectData *obj_data)
|
||||
}
|
||||
while (
|
||||
!LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary, sec_itr)) {
|
||||
if ((name = (char *)LLVMGetSectionName(sec_itr))
|
||||
&& !strcmp(name, ".text")) {
|
||||
obj_data->text = (char *)LLVMGetSectionContents(sec_itr);
|
||||
obj_data->text_size = (uint32)LLVMGetSectionSize(sec_itr);
|
||||
break;
|
||||
if ((name = (char *)LLVMGetSectionName(sec_itr))) {
|
||||
if (!strcmp(name, ".text")) {
|
||||
obj_data->text = (char *)LLVMGetSectionContents(sec_itr);
|
||||
obj_data->text_size = (uint32)LLVMGetSectionSize(sec_itr);
|
||||
}
|
||||
else if (!strcmp(name, ".text.unlikely.")) {
|
||||
obj_data->text_unlikely =
|
||||
(char *)LLVMGetSectionContents(sec_itr);
|
||||
obj_data->text_unlikely_size =
|
||||
(uint32)LLVMGetSectionSize(sec_itr);
|
||||
}
|
||||
else if (!strcmp(name, ".text.hot.")) {
|
||||
obj_data->text_hot =
|
||||
(char *)LLVMGetSectionContents(sec_itr);
|
||||
obj_data->text_hot_size =
|
||||
(uint32)LLVMGetSectionSize(sec_itr);
|
||||
}
|
||||
}
|
||||
LLVMMoveToNextSection(sec_itr);
|
||||
}
|
||||
@ -2253,7 +2291,8 @@ static bool
|
||||
get_relocations_count(LLVMSectionIteratorRef sec_itr, uint32 *p_count);
|
||||
|
||||
static bool
|
||||
is_data_section(LLVMSectionIteratorRef sec_itr, char *section_name)
|
||||
is_data_section(AOTObjectData *obj_data, LLVMSectionIteratorRef sec_itr,
|
||||
char *section_name)
|
||||
{
|
||||
uint32 relocation_count = 0;
|
||||
|
||||
@ -2265,7 +2304,11 @@ is_data_section(LLVMSectionIteratorRef sec_itr, char *section_name)
|
||||
|| !strncmp(section_name, ".rodata.str", strlen(".rodata.str"))
|
||||
|| (!strcmp(section_name, ".rdata")
|
||||
&& get_relocations_count(sec_itr, &relocation_count)
|
||||
&& relocation_count > 0));
|
||||
&& relocation_count > 0)
|
||||
|| (obj_data->comp_ctx->enable_llvm_pgo
|
||||
&& (!strncmp(section_name, "__llvm_prf_cnts", 15)
|
||||
|| !strncmp(section_name, "__llvm_prf_data", 15)
|
||||
|| !strncmp(section_name, "__llvm_prf_names", 16))));
|
||||
}
|
||||
|
||||
static bool
|
||||
@ -2281,7 +2324,7 @@ get_object_data_sections_count(AOTObjectData *obj_data, uint32 *p_count)
|
||||
}
|
||||
while (!LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary, sec_itr)) {
|
||||
if ((name = (char *)LLVMGetSectionName(sec_itr))
|
||||
&& (is_data_section(sec_itr, name))) {
|
||||
&& (is_data_section(obj_data, sec_itr, name))) {
|
||||
count++;
|
||||
}
|
||||
LLVMMoveToNextSection(sec_itr);
|
||||
@ -2306,6 +2349,9 @@ aot_resolve_object_data_sections(AOTObjectData *obj_data)
|
||||
}
|
||||
|
||||
if (sections_count > 0) {
|
||||
uint32 llvm_prf_cnts_idx = 0, llvm_prf_data_idx = 0;
|
||||
char buf[32];
|
||||
|
||||
size = (uint32)sizeof(AOTObjectDataSection) * sections_count;
|
||||
if (!(data_section = obj_data->data_sections =
|
||||
wasm_runtime_malloc(size))) {
|
||||
@ -2322,10 +2368,46 @@ aot_resolve_object_data_sections(AOTObjectData *obj_data)
|
||||
while (
|
||||
!LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary, sec_itr)) {
|
||||
if ((name = (char *)LLVMGetSectionName(sec_itr))
|
||||
&& (is_data_section(sec_itr, name))) {
|
||||
&& (is_data_section(obj_data, sec_itr, name))) {
|
||||
data_section->name = name;
|
||||
data_section->data = (uint8 *)LLVMGetSectionContents(sec_itr);
|
||||
data_section->size = (uint32)LLVMGetSectionSize(sec_itr);
|
||||
if (obj_data->comp_ctx->enable_llvm_pgo
|
||||
&& !strcmp(name, "__llvm_prf_cnts")) {
|
||||
snprintf(buf, sizeof(buf), "%s%u", name,
|
||||
llvm_prf_cnts_idx++);
|
||||
size = strlen(buf) + 1;
|
||||
if (!(data_section->name = wasm_runtime_malloc(size))) {
|
||||
aot_set_last_error(
|
||||
"allocate memory for data section name failed.");
|
||||
return false;
|
||||
}
|
||||
bh_memcpy_s(data_section->name, size, buf, size);
|
||||
data_section->is_name_allocated = true;
|
||||
}
|
||||
else if (obj_data->comp_ctx->enable_llvm_pgo
|
||||
&& !strcmp(name, "__llvm_prf_data")) {
|
||||
snprintf(buf, sizeof(buf), "%s%u", name,
|
||||
llvm_prf_data_idx++);
|
||||
size = strlen(buf) + 1;
|
||||
if (!(data_section->name = wasm_runtime_malloc(size))) {
|
||||
aot_set_last_error(
|
||||
"allocate memory for data section name failed.");
|
||||
return false;
|
||||
}
|
||||
bh_memcpy_s(data_section->name, size, buf, size);
|
||||
data_section->is_name_allocated = true;
|
||||
}
|
||||
|
||||
if (obj_data->comp_ctx->enable_llvm_pgo
|
||||
&& !strcmp(name, "__llvm_prf_names")) {
|
||||
data_section->data = (uint8 *)aot_compress_aot_func_names(
|
||||
obj_data->comp_ctx, &data_section->size);
|
||||
data_section->is_data_allocated = true;
|
||||
}
|
||||
else {
|
||||
data_section->data =
|
||||
(uint8 *)LLVMGetSectionContents(sec_itr);
|
||||
data_section->size = (uint32)LLVMGetSectionSize(sec_itr);
|
||||
}
|
||||
data_section++;
|
||||
}
|
||||
LLVMMoveToNextSection(sec_itr);
|
||||
@ -2365,9 +2447,36 @@ aot_resolve_functions(AOTCompContext *comp_ctx, AOTObjectData *obj_data)
|
||||
&& str_starts_with(name, prefix)) {
|
||||
func_index = (uint32)atoi(name + strlen(prefix));
|
||||
if (func_index < obj_data->func_count) {
|
||||
LLVMSectionIteratorRef contain_section;
|
||||
char *contain_section_name;
|
||||
|
||||
func = obj_data->funcs + func_index;
|
||||
func->func_name = name;
|
||||
func->text_offset = LLVMGetSymbolAddress(sym_itr);
|
||||
|
||||
if (!(contain_section = LLVMObjectFileCopySectionIterator(
|
||||
obj_data->binary))) {
|
||||
aot_set_last_error("llvm get section iterator failed.");
|
||||
LLVMDisposeSymbolIterator(sym_itr);
|
||||
return false;
|
||||
}
|
||||
LLVMMoveToContainingSection(contain_section, sym_itr);
|
||||
contain_section_name =
|
||||
(char *)LLVMGetSectionName(contain_section);
|
||||
LLVMDisposeSectionIterator(contain_section);
|
||||
|
||||
if (!strcmp(contain_section_name, ".text.unlikely.")) {
|
||||
func->text_offset = align_uint(obj_data->text_size, 4)
|
||||
+ LLVMGetSymbolAddress(sym_itr);
|
||||
}
|
||||
else if (!strcmp(contain_section_name, ".text.hot.")) {
|
||||
func->text_offset =
|
||||
align_uint(obj_data->text_size, 4)
|
||||
+ align_uint(obj_data->text_unlikely_size, 4)
|
||||
+ LLVMGetSymbolAddress(sym_itr);
|
||||
}
|
||||
else {
|
||||
func->text_offset = LLVMGetSymbolAddress(sym_itr);
|
||||
}
|
||||
}
|
||||
}
|
||||
LLVMMoveToNextSymbol(sym_itr);
|
||||
@ -2478,9 +2587,86 @@ aot_resolve_object_relocation_group(AOTObjectData *obj_data,
|
||||
}
|
||||
|
||||
/* set relocation fields */
|
||||
relocation->relocation_offset = offset;
|
||||
relocation->relocation_type = (uint32)type;
|
||||
relocation->symbol_name = (char *)LLVMGetSymbolName(rel_sym);
|
||||
relocation->relocation_offset = offset;
|
||||
if (!strcmp(group->section_name, ".rela.text.unlikely.")
|
||||
|| !strcmp(group->section_name, ".rel.text.unlikely.")) {
|
||||
relocation->relocation_offset += align_uint(obj_data->text_size, 4);
|
||||
}
|
||||
else if (!strcmp(group->section_name, ".rela.text.hot.")
|
||||
|| !strcmp(group->section_name, ".rel.text.hot.")) {
|
||||
relocation->relocation_offset +=
|
||||
align_uint(obj_data->text_size, 4)
|
||||
+ align_uint(obj_data->text_unlikely_size, 4);
|
||||
}
|
||||
if (!strcmp(relocation->symbol_name, ".text.unlikely.")) {
|
||||
relocation->symbol_name = ".text";
|
||||
relocation->relocation_addend += align_uint(obj_data->text_size, 4);
|
||||
}
|
||||
if (!strcmp(relocation->symbol_name, ".text.hot.")) {
|
||||
relocation->symbol_name = ".text";
|
||||
relocation->relocation_addend +=
|
||||
align_uint(obj_data->text_size, 4)
|
||||
+ align_uint(obj_data->text_unlikely_size, 4);
|
||||
}
|
||||
|
||||
if (obj_data->comp_ctx->enable_llvm_pgo
|
||||
&& (!strcmp(relocation->symbol_name, "__llvm_prf_cnts")
|
||||
|| !strcmp(relocation->symbol_name, "__llvm_prf_data"))) {
|
||||
LLVMSectionIteratorRef sec_itr;
|
||||
char buf[32], *section_name;
|
||||
uint32 prof_section_idx = 0;
|
||||
|
||||
if (!(sec_itr =
|
||||
LLVMObjectFileCopySectionIterator(obj_data->binary))) {
|
||||
aot_set_last_error("llvm get section iterator failed.");
|
||||
LLVMDisposeSymbolIterator(rel_sym);
|
||||
goto fail;
|
||||
}
|
||||
while (!LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary,
|
||||
sec_itr)) {
|
||||
section_name = (char *)LLVMGetSectionName(sec_itr);
|
||||
if (section_name
|
||||
&& !strcmp(section_name, relocation->symbol_name)) {
|
||||
if (LLVMGetSectionContainsSymbol(sec_itr, rel_sym))
|
||||
break;
|
||||
prof_section_idx++;
|
||||
}
|
||||
LLVMMoveToNextSection(sec_itr);
|
||||
}
|
||||
LLVMDisposeSectionIterator(sec_itr);
|
||||
|
||||
if (!strcmp(group->section_name, ".rela.text")
|
||||
|| !strcmp(group->section_name, ".rel.text")) {
|
||||
snprintf(buf, sizeof(buf), "%s%u", relocation->symbol_name,
|
||||
prof_section_idx);
|
||||
size = strlen(buf) + 1;
|
||||
if (!(relocation->symbol_name = wasm_runtime_malloc(size))) {
|
||||
aot_set_last_error(
|
||||
"allocate memory for relocation symbol name failed.");
|
||||
LLVMDisposeSymbolIterator(rel_sym);
|
||||
goto fail;
|
||||
}
|
||||
bh_memcpy_s(relocation->symbol_name, size, buf, size);
|
||||
relocation->is_symbol_name_allocated = true;
|
||||
}
|
||||
else if (!strncmp(group->section_name, ".rela__llvm_prf_data", 20)
|
||||
|| !strncmp(group->section_name, ".rel__llvm_prf_data",
|
||||
19)) {
|
||||
snprintf(buf, sizeof(buf), "%s%u", relocation->symbol_name,
|
||||
prof_section_idx);
|
||||
size = strlen(buf) + 1;
|
||||
if (!(relocation->symbol_name = wasm_runtime_malloc(size))) {
|
||||
aot_set_last_error(
|
||||
"allocate memory for relocation symbol name failed.");
|
||||
LLVMDisposeSymbolIterator(rel_sym);
|
||||
goto fail;
|
||||
}
|
||||
bh_memcpy_s(relocation->symbol_name, size, buf, size);
|
||||
relocation->is_symbol_name_allocated = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* for ".LCPIxxx", ".LJTIxxx", ".LBBxxx" and switch lookup table
|
||||
* relocation, transform the symbol name to real section name and set
|
||||
@ -2525,10 +2711,14 @@ fail:
|
||||
}
|
||||
|
||||
static bool
|
||||
is_relocation_section_name(char *section_name)
|
||||
is_relocation_section_name(AOTObjectData *obj_data, char *section_name)
|
||||
{
|
||||
return (!strcmp(section_name, ".rela.text")
|
||||
|| !strcmp(section_name, ".rel.text")
|
||||
|| !strcmp(section_name, ".rela.text.unlikely.")
|
||||
|| !strcmp(section_name, ".rel.text.unlikely.")
|
||||
|| !strcmp(section_name, ".rela.text.hot.")
|
||||
|| !strcmp(section_name, ".rel.text.hot.")
|
||||
|| !strcmp(section_name, ".rela.literal")
|
||||
|| !strcmp(section_name, ".rela.data")
|
||||
|| !strcmp(section_name, ".rel.data")
|
||||
@ -2536,6 +2726,9 @@ is_relocation_section_name(char *section_name)
|
||||
|| !strcmp(section_name, ".rel.sdata")
|
||||
|| !strcmp(section_name, ".rela.rodata")
|
||||
|| !strcmp(section_name, ".rel.rodata")
|
||||
|| (obj_data->comp_ctx->enable_llvm_pgo
|
||||
&& (!strcmp(section_name, ".rela__llvm_prf_data")
|
||||
|| !strcmp(section_name, ".rel__llvm_prf_data")))
|
||||
/* ".rela.rodata.cst4/8/16/.." */
|
||||
|| !strncmp(section_name, ".rela.rodata.cst",
|
||||
strlen(".rela.rodata.cst"))
|
||||
@ -2545,14 +2738,15 @@ is_relocation_section_name(char *section_name)
|
||||
}
|
||||
|
||||
static bool
|
||||
is_relocation_section(LLVMSectionIteratorRef sec_itr)
|
||||
is_relocation_section(AOTObjectData *obj_data, LLVMSectionIteratorRef sec_itr)
|
||||
{
|
||||
uint32 count = 0;
|
||||
char *name = (char *)LLVMGetSectionName(sec_itr);
|
||||
if (name) {
|
||||
if (is_relocation_section_name(name))
|
||||
if (is_relocation_section_name(obj_data, name))
|
||||
return true;
|
||||
else if ((!strcmp(name, ".text") || !strcmp(name, ".rdata"))
|
||||
else if ((!strcmp(name, ".text") || !strcmp(name, ".text.unlikely.")
|
||||
|| !strcmp(name, ".text.hot.") || !strcmp(name, ".rdata"))
|
||||
&& get_relocations_count(sec_itr, &count) && count > 0)
|
||||
return true;
|
||||
}
|
||||
@ -2570,7 +2764,7 @@ get_relocation_groups_count(AOTObjectData *obj_data, uint32 *p_count)
|
||||
return false;
|
||||
}
|
||||
while (!LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary, sec_itr)) {
|
||||
if (is_relocation_section(sec_itr)) {
|
||||
if (is_relocation_section(obj_data, sec_itr)) {
|
||||
count++;
|
||||
}
|
||||
LLVMMoveToNextSection(sec_itr);
|
||||
@ -2586,7 +2780,7 @@ aot_resolve_object_relocation_groups(AOTObjectData *obj_data)
|
||||
{
|
||||
LLVMSectionIteratorRef sec_itr;
|
||||
AOTRelocationGroup *relocation_group;
|
||||
uint32 group_count;
|
||||
uint32 group_count, llvm_prf_data_idx = 0;
|
||||
char *name;
|
||||
uint32 size;
|
||||
|
||||
@ -2612,14 +2806,50 @@ aot_resolve_object_relocation_groups(AOTObjectData *obj_data)
|
||||
return false;
|
||||
}
|
||||
while (!LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary, sec_itr)) {
|
||||
if (is_relocation_section(sec_itr)) {
|
||||
if (is_relocation_section(obj_data, sec_itr)) {
|
||||
name = (char *)LLVMGetSectionName(sec_itr);
|
||||
relocation_group->section_name = name;
|
||||
|
||||
if (obj_data->comp_ctx->enable_llvm_pgo
|
||||
&& (!strcmp(name, ".rela__llvm_prf_data")
|
||||
|| !strcmp(name, ".rel__llvm_prf_data"))) {
|
||||
char buf[32];
|
||||
snprintf(buf, sizeof(buf), "%s%u", name, llvm_prf_data_idx);
|
||||
size = strlen(buf) + 1;
|
||||
if (!(relocation_group->section_name =
|
||||
wasm_runtime_malloc(size))) {
|
||||
aot_set_last_error(
|
||||
"allocate memory for section name failed.");
|
||||
LLVMDisposeSectionIterator(sec_itr);
|
||||
return false;
|
||||
}
|
||||
bh_memcpy_s(relocation_group->section_name, size, buf, size);
|
||||
relocation_group->is_section_name_allocated = true;
|
||||
}
|
||||
|
||||
if (!aot_resolve_object_relocation_group(obj_data, relocation_group,
|
||||
sec_itr)) {
|
||||
LLVMDisposeSectionIterator(sec_itr);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (obj_data->comp_ctx->enable_llvm_pgo
|
||||
&& (!strcmp(name, ".rela__llvm_prf_data")
|
||||
|| !strcmp(name, ".rel__llvm_prf_data"))) {
|
||||
llvm_prf_data_idx++;
|
||||
}
|
||||
|
||||
if (!strcmp(relocation_group->section_name, ".rela.text.unlikely.")
|
||||
|| !strcmp(relocation_group->section_name, ".rela.text.hot.")) {
|
||||
relocation_group->section_name = ".rela.text";
|
||||
}
|
||||
else if (!strcmp(relocation_group->section_name,
|
||||
".rel.text.unlikely.")
|
||||
|| !strcmp(relocation_group->section_name,
|
||||
".rel.text.hot.")) {
|
||||
relocation_group->section_name = ".rel.text";
|
||||
}
|
||||
|
||||
relocation_group++;
|
||||
}
|
||||
LLVMMoveToNextSection(sec_itr);
|
||||
@ -2633,12 +2863,21 @@ static void
|
||||
destroy_relocation_groups(AOTRelocationGroup *relocation_groups,
|
||||
uint32 relocation_group_count)
|
||||
{
|
||||
uint32 i;
|
||||
uint32 i, j;
|
||||
AOTRelocationGroup *relocation_group = relocation_groups;
|
||||
|
||||
for (i = 0; i < relocation_group_count; i++, relocation_group++)
|
||||
if (relocation_group->relocations)
|
||||
for (i = 0; i < relocation_group_count; i++, relocation_group++) {
|
||||
if (relocation_group->relocations) {
|
||||
for (j = 0; j < relocation_group->relocation_count; j++) {
|
||||
if (relocation_group->relocations[j].is_symbol_name_allocated)
|
||||
wasm_runtime_free(
|
||||
relocation_group->relocations[j].symbol_name);
|
||||
}
|
||||
wasm_runtime_free(relocation_group->relocations);
|
||||
}
|
||||
if (relocation_group->is_section_name_allocated)
|
||||
wasm_runtime_free(relocation_group->section_name);
|
||||
}
|
||||
wasm_runtime_free(relocation_groups);
|
||||
}
|
||||
|
||||
@ -2664,8 +2903,20 @@ aot_obj_data_destroy(AOTObjectData *obj_data)
|
||||
LLVMDisposeMemoryBuffer(obj_data->mem_buf);
|
||||
if (obj_data->funcs)
|
||||
wasm_runtime_free(obj_data->funcs);
|
||||
if (obj_data->data_sections)
|
||||
if (obj_data->data_sections) {
|
||||
uint32 i;
|
||||
for (i = 0; i < obj_data->data_sections_count; i++) {
|
||||
if (obj_data->data_sections[i].name
|
||||
&& obj_data->data_sections[i].is_name_allocated) {
|
||||
wasm_runtime_free(obj_data->data_sections[i].name);
|
||||
}
|
||||
if (obj_data->data_sections[i].data
|
||||
&& obj_data->data_sections[i].is_data_allocated) {
|
||||
wasm_runtime_free(obj_data->data_sections[i].data);
|
||||
}
|
||||
}
|
||||
wasm_runtime_free(obj_data->data_sections);
|
||||
}
|
||||
if (obj_data->relocation_groups)
|
||||
destroy_relocation_groups(obj_data->relocation_groups,
|
||||
obj_data->relocation_group_count);
|
||||
@ -2688,6 +2939,7 @@ aot_obj_data_create(AOTCompContext *comp_ctx)
|
||||
return false;
|
||||
}
|
||||
memset(obj_data, 0, sizeof(AOTObjectData));
|
||||
obj_data->comp_ctx = comp_ctx;
|
||||
|
||||
bh_print_time("Begin to emit object file");
|
||||
if (comp_ctx->external_llc_compiler || comp_ctx->external_asm_compiler) {
|
||||
@ -2821,8 +3073,8 @@ aot_obj_data_create(AOTCompContext *comp_ctx)
|
||||
if (!aot_resolve_target_info(comp_ctx, obj_data)
|
||||
|| !aot_resolve_text(obj_data) || !aot_resolve_literal(obj_data)
|
||||
|| !aot_resolve_object_data_sections(obj_data)
|
||||
|| !aot_resolve_object_relocation_groups(obj_data)
|
||||
|| !aot_resolve_functions(comp_ctx, obj_data))
|
||||
|| !aot_resolve_functions(comp_ctx, obj_data)
|
||||
|| !aot_resolve_object_relocation_groups(obj_data))
|
||||
goto fail;
|
||||
|
||||
return obj_data;
|
||||
|
||||
@ -1670,6 +1670,12 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
|
||||
if (option->disable_llvm_lto)
|
||||
comp_ctx->disable_llvm_lto = true;
|
||||
|
||||
if (option->enable_llvm_pgo)
|
||||
comp_ctx->enable_llvm_pgo = true;
|
||||
|
||||
if (option->use_prof_file)
|
||||
comp_ctx->use_prof_file = option->use_prof_file;
|
||||
|
||||
if (option->enable_stack_estimation)
|
||||
comp_ctx->enable_stack_estimation = true;
|
||||
|
||||
@ -2829,3 +2835,23 @@ aot_load_const_from_table(AOTCompContext *comp_ctx, LLVMValueRef base,
|
||||
(void)const_type;
|
||||
return const_value;
|
||||
}
|
||||
|
||||
bool
|
||||
aot_set_cond_br_weights(AOTCompContext *comp_ctx, LLVMValueRef cond_br,
|
||||
int32 weights_true, int32 weights_false)
|
||||
{
|
||||
LLVMMetadataRef md_nodes[3], meta_data;
|
||||
LLVMValueRef meta_data_as_value;
|
||||
|
||||
md_nodes[0] = LLVMMDStringInContext2(comp_ctx->context, "branch_weights",
|
||||
strlen("branch_weights"));
|
||||
md_nodes[1] = LLVMValueAsMetadata(I32_CONST(weights_true));
|
||||
md_nodes[2] = LLVMValueAsMetadata(I32_CONST(weights_false));
|
||||
|
||||
meta_data = LLVMMDNodeInContext2(comp_ctx->context, md_nodes, 3);
|
||||
meta_data_as_value = LLVMMetadataAsValue(comp_ctx->context, meta_data);
|
||||
|
||||
LLVMSetMetadata(cond_br, 2, meta_data_as_value);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -349,6 +349,12 @@ typedef struct AOTCompContext {
|
||||
/* Disable LLVM link time optimization */
|
||||
bool disable_llvm_lto;
|
||||
|
||||
/* Enable LLVM PGO (Profile-Guided Optimization) */
|
||||
bool enable_llvm_pgo;
|
||||
|
||||
/* Use profile file collected by LLVM PGO */
|
||||
char *use_prof_file;
|
||||
|
||||
/* Enable to use segument register as the base addr
|
||||
of linear memory for load/store operations */
|
||||
bool enable_segue_i32_load;
|
||||
@ -428,7 +434,9 @@ typedef struct AOTCompOption {
|
||||
bool enable_aux_stack_frame;
|
||||
bool disable_llvm_intrinsics;
|
||||
bool disable_llvm_lto;
|
||||
bool enable_llvm_pgo;
|
||||
bool enable_stack_estimation;
|
||||
char *use_prof_file;
|
||||
uint32 opt_level;
|
||||
uint32 size_level;
|
||||
uint32 output_format;
|
||||
@ -541,6 +549,13 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module);
|
||||
void
|
||||
aot_handle_llvm_errmsg(const char *string, LLVMErrorRef err);
|
||||
|
||||
char *
|
||||
aot_compress_aot_func_names(AOTCompContext *comp_ctx, uint32 *p_size);
|
||||
|
||||
bool
|
||||
aot_set_cond_br_weights(AOTCompContext *comp_ctx, LLVMValueRef cond_br,
|
||||
int32 weights_true, int32 weights_false);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* end of extern "C" */
|
||||
#endif
|
||||
|
||||
@ -44,6 +44,7 @@
|
||||
#if LLVM_VERSION_MAJOR >= 12
|
||||
#include <llvm/Analysis/AliasAnalysis.h>
|
||||
#endif
|
||||
#include <llvm/ProfileData/InstrProf.h>
|
||||
|
||||
#include <cstring>
|
||||
#include "../aot/aot_runtime.h"
|
||||
@ -232,14 +233,26 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
|
||||
PTO.SLPVectorization = true;
|
||||
PTO.LoopUnrolling = true;
|
||||
|
||||
Optional<PGOOptions> PGO = None;
|
||||
if (comp_ctx->enable_llvm_pgo) {
|
||||
/* Disable static counter allocation for value profiler,
|
||||
it will be allocated by runtime */
|
||||
const char *argv[] = { "", "-vp-static-alloc=false" };
|
||||
cl::ParseCommandLineOptions(2, argv);
|
||||
PGO = PGOOptions("", "", "", PGOOptions::IRInstr);
|
||||
}
|
||||
else if (comp_ctx->use_prof_file) {
|
||||
PGO = PGOOptions(comp_ctx->use_prof_file, "", "", PGOOptions::IRUse);
|
||||
}
|
||||
|
||||
#ifdef DEBUG_PASS
|
||||
PassInstrumentationCallbacks PIC;
|
||||
PassBuilder PB(TM, PTO, None, &PIC);
|
||||
PassBuilder PB(TM, PTO, PGO, &PIC);
|
||||
#else
|
||||
#if LLVM_VERSION_MAJOR == 12
|
||||
PassBuilder PB(false, TM, PTO);
|
||||
PassBuilder PB(false, TM, PTO, PGO);
|
||||
#else
|
||||
PassBuilder PB(TM, PTO);
|
||||
PassBuilder PB(TM, PTO, PGO);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -334,8 +347,16 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
|
||||
FPM.addPass(SLPVectorizerPass());
|
||||
FPM.addPass(LoadStoreVectorizerPass());
|
||||
|
||||
if (comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file) {
|
||||
LICMOptions licm_opt;
|
||||
/* LICM pass: loop invariant code motion, attempting to remove
|
||||
as much code from the body of a loop as possible. Experiments
|
||||
show it is good to enable it when pgo is enabled. */
|
||||
FPM.addPass(
|
||||
createFunctionToLoopPassAdaptor(LICMPass(licm_opt), true));
|
||||
}
|
||||
|
||||
/*
|
||||
FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
|
||||
FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
|
||||
FPM.addPass(createFunctionToLoopPassAdaptor(SimpleLoopUnswitchPass()));
|
||||
*/
|
||||
@ -344,9 +365,10 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
|
||||
|
||||
if (!disable_llvm_lto) {
|
||||
/* Apply LTO for AOT mode */
|
||||
if (comp_ctx->comp_data->func_count >= 10)
|
||||
/* Adds the pre-link optimizations if the func count
|
||||
is large enough */
|
||||
if (comp_ctx->comp_data->func_count >= 10
|
||||
|| comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file)
|
||||
/* Add the pre-link optimizations if the func count
|
||||
is large enough or PGO is enabled */
|
||||
MPM.addPass(PB.buildLTOPreLinkDefaultPipeline(OL));
|
||||
else
|
||||
MPM.addPass(PB.buildLTODefaultPipeline(OL, NULL));
|
||||
@ -358,3 +380,34 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
|
||||
|
||||
MPM.run(*M, MAM);
|
||||
}
|
||||
|
||||
char *
|
||||
aot_compress_aot_func_names(AOTCompContext *comp_ctx, uint32 *p_size)
|
||||
{
|
||||
std::vector<std::string> NameStrs;
|
||||
std::string Result;
|
||||
char buf[32], *compressed_str;
|
||||
uint32 compressed_str_len, i;
|
||||
|
||||
for (i = 0; i < comp_ctx->func_ctx_count; i++) {
|
||||
snprintf(buf, sizeof(buf), "%s%d", AOT_FUNC_PREFIX, i);
|
||||
std::string str(buf);
|
||||
NameStrs.push_back(str);
|
||||
}
|
||||
|
||||
if (collectPGOFuncNameStrings(NameStrs, true, Result)) {
|
||||
aot_set_last_error("collect pgo func name strings failed");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
compressed_str_len = Result.size();
|
||||
if (!(compressed_str = (char *)wasm_runtime_malloc(compressed_str_len))) {
|
||||
aot_set_last_error("allocate memory failed");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bh_memcpy_s(compressed_str, compressed_str_len, Result.c_str(),
|
||||
compressed_str_len);
|
||||
*p_size = compressed_str_len;
|
||||
return compressed_str;
|
||||
}
|
||||
|
||||
@ -55,7 +55,9 @@ typedef struct AOTCompOption {
|
||||
bool enable_aux_stack_frame;
|
||||
bool disable_llvm_intrinsics;
|
||||
bool disable_llvm_lto;
|
||||
bool enable_llvm_pgo;
|
||||
bool enable_stack_estimation;
|
||||
char *use_prof_file;
|
||||
uint32_t opt_level;
|
||||
uint32_t size_level;
|
||||
uint32_t output_format;
|
||||
|
||||
@ -1331,6 +1331,30 @@ WASM_RUNTIME_API_EXTERN uint32_t
|
||||
wasm_runtime_dump_call_stack_to_buf(wasm_exec_env_t exec_env, char *buf,
|
||||
uint32_t len);
|
||||
|
||||
/**
|
||||
* Get the size required to store the LLVM PGO profile data
|
||||
*
|
||||
* @param module_inst the WASM module instance
|
||||
*
|
||||
* @return size required to store the contents, 0 means error
|
||||
*/
|
||||
WASM_RUNTIME_API_EXTERN uint32_t
|
||||
wasm_runtime_get_pgo_prof_data_size(wasm_module_inst_t module_inst);
|
||||
|
||||
/**
|
||||
* Dump the LLVM PGO profile data to buffer
|
||||
*
|
||||
* @param module_inst the WASM module instance
|
||||
* @param buf buffer to store the dumped content
|
||||
* @param len length of the buffer
|
||||
*
|
||||
* @return bytes dumped to the buffer, 0 means error and data in buf
|
||||
* may be invalid
|
||||
*/
|
||||
WASM_RUNTIME_API_EXTERN uint32_t
|
||||
wasm_runtime_dump_pgo_prof_data_to_buf(wasm_module_inst_t module_inst,
|
||||
char *buf, uint32_t len);
|
||||
|
||||
/**
|
||||
* Get a custom section by name
|
||||
*
|
||||
|
||||
@ -130,6 +130,7 @@ os_thread_exit(void *retval);
|
||||
#define os_memory_order_release memory_order_release
|
||||
#define os_memory_order_seq_cst memory_order_seq_cst
|
||||
#define os_atomic_thread_fence atomic_thread_fence
|
||||
#define os_atomic_cmpxchg atomic_compare_exchange_strong
|
||||
#endif
|
||||
|
||||
#endif /* end of os_atomic_thread_fence */
|
||||
|
||||
Reference in New Issue
Block a user