Implement AOT static PGO (#2243)

LLVM PGO (Profile-Guided Optimization) allows the compiler to better optimize code
for how it actually runs. This PR implements the AOT static PGO, and is tested on
Linux x86-64 and x86-32. The basic steps are:

1. Use `wamrc --enable-llvm-pgo -o <aot_file_of_pgo> <wasm_file>`
   to generate an instrumented aot file.
2. Compile iwasm with `cmake -DWAMR_BUILD_STATIC_PGO=1` and run
      `iwasm --gen-prof-file=<raw_profile_file> <aot_file_of_pgo>`
    to generate the raw profile file.
3. Run `llvm-profdata merge -output=<profile_file> <raw_profile_file>`
    to merge the raw profile file into the profile file.
4. Run `wamrc --use-prof-file=<profile_file> -o <aot_file> <wasm_file>`
    to generate the optimized aot file.
5. Run the optimized aot_file: `iwasm <aot_file>`.

The test scripts are also added for each benchmark, run `test_pgo.sh` under
each benchmark's folder to test the AOT static pgo.
This commit is contained in:
Wenyong Huang
2023-06-05 09:17:39 +08:00
committed by GitHub
parent f1e9029ebc
commit 8d88471c46
29 changed files with 2000 additions and 53 deletions

View File

@ -111,6 +111,8 @@ typedef struct AOTSymbolList {
/* AOT object data */
typedef struct AOTObjectData {
AOTCompContext *comp_ctx;
LLVMMemoryBufferRef mem_buf;
LLVMBinaryRef binary;
@ -119,6 +121,12 @@ typedef struct AOTObjectData {
void *text;
uint32 text_size;
void *text_unlikely;
uint32 text_unlikely_size;
void *text_hot;
uint32 text_hot_size;
/* literal data and size */
void *literal;
uint32 literal_size;
@ -558,8 +566,10 @@ get_init_data_section_size(AOTCompContext *comp_ctx, AOTCompData *comp_data,
static uint32
get_text_section_size(AOTObjectData *obj_data)
{
return (sizeof(uint32) + obj_data->literal_size + obj_data->text_size + 3)
& ~3;
return sizeof(uint32) + align_uint(obj_data->literal_size, 4)
+ align_uint(obj_data->text_size, 4)
+ align_uint(obj_data->text_unlikely_size, 4)
+ align_uint(obj_data->text_hot_size, 4);
}
static uint32
@ -1702,12 +1712,28 @@ aot_emit_text_section(uint8 *buf, uint8 *buf_end, uint32 *p_offset,
EMIT_U32(AOT_SECTION_TYPE_TEXT);
EMIT_U32(section_size);
EMIT_U32(obj_data->literal_size);
if (obj_data->literal_size > 0)
EMIT_BUF(obj_data->literal, obj_data->literal_size);
EMIT_BUF(obj_data->text, obj_data->text_size);
while (offset & 3)
EMIT_BUF(&placeholder, 1);
if (obj_data->literal_size > 0) {
EMIT_BUF(obj_data->literal, obj_data->literal_size);
while (offset & 3)
EMIT_BUF(&placeholder, 1);
}
if (obj_data->text_size > 0) {
EMIT_BUF(obj_data->text, obj_data->text_size);
while (offset & 3)
EMIT_BUF(&placeholder, 1);
}
if (obj_data->text_unlikely_size > 0) {
EMIT_BUF(obj_data->text_unlikely, obj_data->text_unlikely_size);
while (offset & 3)
EMIT_BUF(&placeholder, 1);
}
if (obj_data->text_hot_size > 0) {
EMIT_BUF(obj_data->text_hot, obj_data->text_hot_size);
while (offset & 3)
EMIT_BUF(&placeholder, 1);
}
if (offset - *p_offset != section_size + sizeof(uint32) * 2) {
aot_set_last_error("emit text section failed.");
@ -2211,11 +2237,23 @@ aot_resolve_text(AOTObjectData *obj_data)
}
while (
!LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary, sec_itr)) {
if ((name = (char *)LLVMGetSectionName(sec_itr))
&& !strcmp(name, ".text")) {
obj_data->text = (char *)LLVMGetSectionContents(sec_itr);
obj_data->text_size = (uint32)LLVMGetSectionSize(sec_itr);
break;
if ((name = (char *)LLVMGetSectionName(sec_itr))) {
if (!strcmp(name, ".text")) {
obj_data->text = (char *)LLVMGetSectionContents(sec_itr);
obj_data->text_size = (uint32)LLVMGetSectionSize(sec_itr);
}
else if (!strcmp(name, ".text.unlikely.")) {
obj_data->text_unlikely =
(char *)LLVMGetSectionContents(sec_itr);
obj_data->text_unlikely_size =
(uint32)LLVMGetSectionSize(sec_itr);
}
else if (!strcmp(name, ".text.hot.")) {
obj_data->text_hot =
(char *)LLVMGetSectionContents(sec_itr);
obj_data->text_hot_size =
(uint32)LLVMGetSectionSize(sec_itr);
}
}
LLVMMoveToNextSection(sec_itr);
}
@ -2253,7 +2291,8 @@ static bool
get_relocations_count(LLVMSectionIteratorRef sec_itr, uint32 *p_count);
static bool
is_data_section(LLVMSectionIteratorRef sec_itr, char *section_name)
is_data_section(AOTObjectData *obj_data, LLVMSectionIteratorRef sec_itr,
char *section_name)
{
uint32 relocation_count = 0;
@ -2265,7 +2304,11 @@ is_data_section(LLVMSectionIteratorRef sec_itr, char *section_name)
|| !strncmp(section_name, ".rodata.str", strlen(".rodata.str"))
|| (!strcmp(section_name, ".rdata")
&& get_relocations_count(sec_itr, &relocation_count)
&& relocation_count > 0));
&& relocation_count > 0)
|| (obj_data->comp_ctx->enable_llvm_pgo
&& (!strncmp(section_name, "__llvm_prf_cnts", 15)
|| !strncmp(section_name, "__llvm_prf_data", 15)
|| !strncmp(section_name, "__llvm_prf_names", 16))));
}
static bool
@ -2281,7 +2324,7 @@ get_object_data_sections_count(AOTObjectData *obj_data, uint32 *p_count)
}
while (!LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary, sec_itr)) {
if ((name = (char *)LLVMGetSectionName(sec_itr))
&& (is_data_section(sec_itr, name))) {
&& (is_data_section(obj_data, sec_itr, name))) {
count++;
}
LLVMMoveToNextSection(sec_itr);
@ -2306,6 +2349,9 @@ aot_resolve_object_data_sections(AOTObjectData *obj_data)
}
if (sections_count > 0) {
uint32 llvm_prf_cnts_idx = 0, llvm_prf_data_idx = 0;
char buf[32];
size = (uint32)sizeof(AOTObjectDataSection) * sections_count;
if (!(data_section = obj_data->data_sections =
wasm_runtime_malloc(size))) {
@ -2322,10 +2368,46 @@ aot_resolve_object_data_sections(AOTObjectData *obj_data)
while (
!LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary, sec_itr)) {
if ((name = (char *)LLVMGetSectionName(sec_itr))
&& (is_data_section(sec_itr, name))) {
&& (is_data_section(obj_data, sec_itr, name))) {
data_section->name = name;
data_section->data = (uint8 *)LLVMGetSectionContents(sec_itr);
data_section->size = (uint32)LLVMGetSectionSize(sec_itr);
if (obj_data->comp_ctx->enable_llvm_pgo
&& !strcmp(name, "__llvm_prf_cnts")) {
snprintf(buf, sizeof(buf), "%s%u", name,
llvm_prf_cnts_idx++);
size = strlen(buf) + 1;
if (!(data_section->name = wasm_runtime_malloc(size))) {
aot_set_last_error(
"allocate memory for data section name failed.");
return false;
}
bh_memcpy_s(data_section->name, size, buf, size);
data_section->is_name_allocated = true;
}
else if (obj_data->comp_ctx->enable_llvm_pgo
&& !strcmp(name, "__llvm_prf_data")) {
snprintf(buf, sizeof(buf), "%s%u", name,
llvm_prf_data_idx++);
size = strlen(buf) + 1;
if (!(data_section->name = wasm_runtime_malloc(size))) {
aot_set_last_error(
"allocate memory for data section name failed.");
return false;
}
bh_memcpy_s(data_section->name, size, buf, size);
data_section->is_name_allocated = true;
}
if (obj_data->comp_ctx->enable_llvm_pgo
&& !strcmp(name, "__llvm_prf_names")) {
data_section->data = (uint8 *)aot_compress_aot_func_names(
obj_data->comp_ctx, &data_section->size);
data_section->is_data_allocated = true;
}
else {
data_section->data =
(uint8 *)LLVMGetSectionContents(sec_itr);
data_section->size = (uint32)LLVMGetSectionSize(sec_itr);
}
data_section++;
}
LLVMMoveToNextSection(sec_itr);
@ -2365,9 +2447,36 @@ aot_resolve_functions(AOTCompContext *comp_ctx, AOTObjectData *obj_data)
&& str_starts_with(name, prefix)) {
func_index = (uint32)atoi(name + strlen(prefix));
if (func_index < obj_data->func_count) {
LLVMSectionIteratorRef contain_section;
char *contain_section_name;
func = obj_data->funcs + func_index;
func->func_name = name;
func->text_offset = LLVMGetSymbolAddress(sym_itr);
if (!(contain_section = LLVMObjectFileCopySectionIterator(
obj_data->binary))) {
aot_set_last_error("llvm get section iterator failed.");
LLVMDisposeSymbolIterator(sym_itr);
return false;
}
LLVMMoveToContainingSection(contain_section, sym_itr);
contain_section_name =
(char *)LLVMGetSectionName(contain_section);
LLVMDisposeSectionIterator(contain_section);
if (!strcmp(contain_section_name, ".text.unlikely.")) {
func->text_offset = align_uint(obj_data->text_size, 4)
+ LLVMGetSymbolAddress(sym_itr);
}
else if (!strcmp(contain_section_name, ".text.hot.")) {
func->text_offset =
align_uint(obj_data->text_size, 4)
+ align_uint(obj_data->text_unlikely_size, 4)
+ LLVMGetSymbolAddress(sym_itr);
}
else {
func->text_offset = LLVMGetSymbolAddress(sym_itr);
}
}
}
LLVMMoveToNextSymbol(sym_itr);
@ -2478,9 +2587,86 @@ aot_resolve_object_relocation_group(AOTObjectData *obj_data,
}
/* set relocation fields */
relocation->relocation_offset = offset;
relocation->relocation_type = (uint32)type;
relocation->symbol_name = (char *)LLVMGetSymbolName(rel_sym);
relocation->relocation_offset = offset;
if (!strcmp(group->section_name, ".rela.text.unlikely.")
|| !strcmp(group->section_name, ".rel.text.unlikely.")) {
relocation->relocation_offset += align_uint(obj_data->text_size, 4);
}
else if (!strcmp(group->section_name, ".rela.text.hot.")
|| !strcmp(group->section_name, ".rel.text.hot.")) {
relocation->relocation_offset +=
align_uint(obj_data->text_size, 4)
+ align_uint(obj_data->text_unlikely_size, 4);
}
if (!strcmp(relocation->symbol_name, ".text.unlikely.")) {
relocation->symbol_name = ".text";
relocation->relocation_addend += align_uint(obj_data->text_size, 4);
}
if (!strcmp(relocation->symbol_name, ".text.hot.")) {
relocation->symbol_name = ".text";
relocation->relocation_addend +=
align_uint(obj_data->text_size, 4)
+ align_uint(obj_data->text_unlikely_size, 4);
}
if (obj_data->comp_ctx->enable_llvm_pgo
&& (!strcmp(relocation->symbol_name, "__llvm_prf_cnts")
|| !strcmp(relocation->symbol_name, "__llvm_prf_data"))) {
LLVMSectionIteratorRef sec_itr;
char buf[32], *section_name;
uint32 prof_section_idx = 0;
if (!(sec_itr =
LLVMObjectFileCopySectionIterator(obj_data->binary))) {
aot_set_last_error("llvm get section iterator failed.");
LLVMDisposeSymbolIterator(rel_sym);
goto fail;
}
while (!LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary,
sec_itr)) {
section_name = (char *)LLVMGetSectionName(sec_itr);
if (section_name
&& !strcmp(section_name, relocation->symbol_name)) {
if (LLVMGetSectionContainsSymbol(sec_itr, rel_sym))
break;
prof_section_idx++;
}
LLVMMoveToNextSection(sec_itr);
}
LLVMDisposeSectionIterator(sec_itr);
if (!strcmp(group->section_name, ".rela.text")
|| !strcmp(group->section_name, ".rel.text")) {
snprintf(buf, sizeof(buf), "%s%u", relocation->symbol_name,
prof_section_idx);
size = strlen(buf) + 1;
if (!(relocation->symbol_name = wasm_runtime_malloc(size))) {
aot_set_last_error(
"allocate memory for relocation symbol name failed.");
LLVMDisposeSymbolIterator(rel_sym);
goto fail;
}
bh_memcpy_s(relocation->symbol_name, size, buf, size);
relocation->is_symbol_name_allocated = true;
}
else if (!strncmp(group->section_name, ".rela__llvm_prf_data", 20)
|| !strncmp(group->section_name, ".rel__llvm_prf_data",
19)) {
snprintf(buf, sizeof(buf), "%s%u", relocation->symbol_name,
prof_section_idx);
size = strlen(buf) + 1;
if (!(relocation->symbol_name = wasm_runtime_malloc(size))) {
aot_set_last_error(
"allocate memory for relocation symbol name failed.");
LLVMDisposeSymbolIterator(rel_sym);
goto fail;
}
bh_memcpy_s(relocation->symbol_name, size, buf, size);
relocation->is_symbol_name_allocated = true;
}
}
/* for ".LCPIxxx", ".LJTIxxx", ".LBBxxx" and switch lookup table
* relocation, transform the symbol name to real section name and set
@ -2525,10 +2711,14 @@ fail:
}
static bool
is_relocation_section_name(char *section_name)
is_relocation_section_name(AOTObjectData *obj_data, char *section_name)
{
return (!strcmp(section_name, ".rela.text")
|| !strcmp(section_name, ".rel.text")
|| !strcmp(section_name, ".rela.text.unlikely.")
|| !strcmp(section_name, ".rel.text.unlikely.")
|| !strcmp(section_name, ".rela.text.hot.")
|| !strcmp(section_name, ".rel.text.hot.")
|| !strcmp(section_name, ".rela.literal")
|| !strcmp(section_name, ".rela.data")
|| !strcmp(section_name, ".rel.data")
@ -2536,6 +2726,9 @@ is_relocation_section_name(char *section_name)
|| !strcmp(section_name, ".rel.sdata")
|| !strcmp(section_name, ".rela.rodata")
|| !strcmp(section_name, ".rel.rodata")
|| (obj_data->comp_ctx->enable_llvm_pgo
&& (!strcmp(section_name, ".rela__llvm_prf_data")
|| !strcmp(section_name, ".rel__llvm_prf_data")))
/* ".rela.rodata.cst4/8/16/.." */
|| !strncmp(section_name, ".rela.rodata.cst",
strlen(".rela.rodata.cst"))
@ -2545,14 +2738,15 @@ is_relocation_section_name(char *section_name)
}
static bool
is_relocation_section(LLVMSectionIteratorRef sec_itr)
is_relocation_section(AOTObjectData *obj_data, LLVMSectionIteratorRef sec_itr)
{
uint32 count = 0;
char *name = (char *)LLVMGetSectionName(sec_itr);
if (name) {
if (is_relocation_section_name(name))
if (is_relocation_section_name(obj_data, name))
return true;
else if ((!strcmp(name, ".text") || !strcmp(name, ".rdata"))
else if ((!strcmp(name, ".text") || !strcmp(name, ".text.unlikely.")
|| !strcmp(name, ".text.hot.") || !strcmp(name, ".rdata"))
&& get_relocations_count(sec_itr, &count) && count > 0)
return true;
}
@ -2570,7 +2764,7 @@ get_relocation_groups_count(AOTObjectData *obj_data, uint32 *p_count)
return false;
}
while (!LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary, sec_itr)) {
if (is_relocation_section(sec_itr)) {
if (is_relocation_section(obj_data, sec_itr)) {
count++;
}
LLVMMoveToNextSection(sec_itr);
@ -2586,7 +2780,7 @@ aot_resolve_object_relocation_groups(AOTObjectData *obj_data)
{
LLVMSectionIteratorRef sec_itr;
AOTRelocationGroup *relocation_group;
uint32 group_count;
uint32 group_count, llvm_prf_data_idx = 0;
char *name;
uint32 size;
@ -2612,14 +2806,50 @@ aot_resolve_object_relocation_groups(AOTObjectData *obj_data)
return false;
}
while (!LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary, sec_itr)) {
if (is_relocation_section(sec_itr)) {
if (is_relocation_section(obj_data, sec_itr)) {
name = (char *)LLVMGetSectionName(sec_itr);
relocation_group->section_name = name;
if (obj_data->comp_ctx->enable_llvm_pgo
&& (!strcmp(name, ".rela__llvm_prf_data")
|| !strcmp(name, ".rel__llvm_prf_data"))) {
char buf[32];
snprintf(buf, sizeof(buf), "%s%u", name, llvm_prf_data_idx);
size = strlen(buf) + 1;
if (!(relocation_group->section_name =
wasm_runtime_malloc(size))) {
aot_set_last_error(
"allocate memory for section name failed.");
LLVMDisposeSectionIterator(sec_itr);
return false;
}
bh_memcpy_s(relocation_group->section_name, size, buf, size);
relocation_group->is_section_name_allocated = true;
}
if (!aot_resolve_object_relocation_group(obj_data, relocation_group,
sec_itr)) {
LLVMDisposeSectionIterator(sec_itr);
return false;
}
if (obj_data->comp_ctx->enable_llvm_pgo
&& (!strcmp(name, ".rela__llvm_prf_data")
|| !strcmp(name, ".rel__llvm_prf_data"))) {
llvm_prf_data_idx++;
}
if (!strcmp(relocation_group->section_name, ".rela.text.unlikely.")
|| !strcmp(relocation_group->section_name, ".rela.text.hot.")) {
relocation_group->section_name = ".rela.text";
}
else if (!strcmp(relocation_group->section_name,
".rel.text.unlikely.")
|| !strcmp(relocation_group->section_name,
".rel.text.hot.")) {
relocation_group->section_name = ".rel.text";
}
relocation_group++;
}
LLVMMoveToNextSection(sec_itr);
@ -2633,12 +2863,21 @@ static void
destroy_relocation_groups(AOTRelocationGroup *relocation_groups,
uint32 relocation_group_count)
{
uint32 i;
uint32 i, j;
AOTRelocationGroup *relocation_group = relocation_groups;
for (i = 0; i < relocation_group_count; i++, relocation_group++)
if (relocation_group->relocations)
for (i = 0; i < relocation_group_count; i++, relocation_group++) {
if (relocation_group->relocations) {
for (j = 0; j < relocation_group->relocation_count; j++) {
if (relocation_group->relocations[j].is_symbol_name_allocated)
wasm_runtime_free(
relocation_group->relocations[j].symbol_name);
}
wasm_runtime_free(relocation_group->relocations);
}
if (relocation_group->is_section_name_allocated)
wasm_runtime_free(relocation_group->section_name);
}
wasm_runtime_free(relocation_groups);
}
@ -2664,8 +2903,20 @@ aot_obj_data_destroy(AOTObjectData *obj_data)
LLVMDisposeMemoryBuffer(obj_data->mem_buf);
if (obj_data->funcs)
wasm_runtime_free(obj_data->funcs);
if (obj_data->data_sections)
if (obj_data->data_sections) {
uint32 i;
for (i = 0; i < obj_data->data_sections_count; i++) {
if (obj_data->data_sections[i].name
&& obj_data->data_sections[i].is_name_allocated) {
wasm_runtime_free(obj_data->data_sections[i].name);
}
if (obj_data->data_sections[i].data
&& obj_data->data_sections[i].is_data_allocated) {
wasm_runtime_free(obj_data->data_sections[i].data);
}
}
wasm_runtime_free(obj_data->data_sections);
}
if (obj_data->relocation_groups)
destroy_relocation_groups(obj_data->relocation_groups,
obj_data->relocation_group_count);
@ -2688,6 +2939,7 @@ aot_obj_data_create(AOTCompContext *comp_ctx)
return false;
}
memset(obj_data, 0, sizeof(AOTObjectData));
obj_data->comp_ctx = comp_ctx;
bh_print_time("Begin to emit object file");
if (comp_ctx->external_llc_compiler || comp_ctx->external_asm_compiler) {
@ -2821,8 +3073,8 @@ aot_obj_data_create(AOTCompContext *comp_ctx)
if (!aot_resolve_target_info(comp_ctx, obj_data)
|| !aot_resolve_text(obj_data) || !aot_resolve_literal(obj_data)
|| !aot_resolve_object_data_sections(obj_data)
|| !aot_resolve_object_relocation_groups(obj_data)
|| !aot_resolve_functions(comp_ctx, obj_data))
|| !aot_resolve_functions(comp_ctx, obj_data)
|| !aot_resolve_object_relocation_groups(obj_data))
goto fail;
return obj_data;

View File

@ -1670,6 +1670,12 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
if (option->disable_llvm_lto)
comp_ctx->disable_llvm_lto = true;
if (option->enable_llvm_pgo)
comp_ctx->enable_llvm_pgo = true;
if (option->use_prof_file)
comp_ctx->use_prof_file = option->use_prof_file;
if (option->enable_stack_estimation)
comp_ctx->enable_stack_estimation = true;
@ -2829,3 +2835,23 @@ aot_load_const_from_table(AOTCompContext *comp_ctx, LLVMValueRef base,
(void)const_type;
return const_value;
}
bool
aot_set_cond_br_weights(AOTCompContext *comp_ctx, LLVMValueRef cond_br,
int32 weights_true, int32 weights_false)
{
LLVMMetadataRef md_nodes[3], meta_data;
LLVMValueRef meta_data_as_value;
md_nodes[0] = LLVMMDStringInContext2(comp_ctx->context, "branch_weights",
strlen("branch_weights"));
md_nodes[1] = LLVMValueAsMetadata(I32_CONST(weights_true));
md_nodes[2] = LLVMValueAsMetadata(I32_CONST(weights_false));
meta_data = LLVMMDNodeInContext2(comp_ctx->context, md_nodes, 3);
meta_data_as_value = LLVMMetadataAsValue(comp_ctx->context, meta_data);
LLVMSetMetadata(cond_br, 2, meta_data_as_value);
return true;
}

View File

@ -349,6 +349,12 @@ typedef struct AOTCompContext {
/* Disable LLVM link time optimization */
bool disable_llvm_lto;
/* Enable LLVM PGO (Profile-Guided Optimization) */
bool enable_llvm_pgo;
/* Use profile file collected by LLVM PGO */
char *use_prof_file;
/* Enable to use segument register as the base addr
of linear memory for load/store operations */
bool enable_segue_i32_load;
@ -428,7 +434,9 @@ typedef struct AOTCompOption {
bool enable_aux_stack_frame;
bool disable_llvm_intrinsics;
bool disable_llvm_lto;
bool enable_llvm_pgo;
bool enable_stack_estimation;
char *use_prof_file;
uint32 opt_level;
uint32 size_level;
uint32 output_format;
@ -541,6 +549,13 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module);
void
aot_handle_llvm_errmsg(const char *string, LLVMErrorRef err);
char *
aot_compress_aot_func_names(AOTCompContext *comp_ctx, uint32 *p_size);
bool
aot_set_cond_br_weights(AOTCompContext *comp_ctx, LLVMValueRef cond_br,
int32 weights_true, int32 weights_false);
#ifdef __cplusplus
} /* end of extern "C" */
#endif

View File

@ -44,6 +44,7 @@
#if LLVM_VERSION_MAJOR >= 12
#include <llvm/Analysis/AliasAnalysis.h>
#endif
#include <llvm/ProfileData/InstrProf.h>
#include <cstring>
#include "../aot/aot_runtime.h"
@ -232,14 +233,26 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
PTO.SLPVectorization = true;
PTO.LoopUnrolling = true;
Optional<PGOOptions> PGO = None;
if (comp_ctx->enable_llvm_pgo) {
/* Disable static counter allocation for value profiler,
it will be allocated by runtime */
const char *argv[] = { "", "-vp-static-alloc=false" };
cl::ParseCommandLineOptions(2, argv);
PGO = PGOOptions("", "", "", PGOOptions::IRInstr);
}
else if (comp_ctx->use_prof_file) {
PGO = PGOOptions(comp_ctx->use_prof_file, "", "", PGOOptions::IRUse);
}
#ifdef DEBUG_PASS
PassInstrumentationCallbacks PIC;
PassBuilder PB(TM, PTO, None, &PIC);
PassBuilder PB(TM, PTO, PGO, &PIC);
#else
#if LLVM_VERSION_MAJOR == 12
PassBuilder PB(false, TM, PTO);
PassBuilder PB(false, TM, PTO, PGO);
#else
PassBuilder PB(TM, PTO);
PassBuilder PB(TM, PTO, PGO);
#endif
#endif
@ -334,8 +347,16 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
FPM.addPass(SLPVectorizerPass());
FPM.addPass(LoadStoreVectorizerPass());
if (comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file) {
LICMOptions licm_opt;
/* LICM pass: loop invariant code motion, attempting to remove
as much code from the body of a loop as possible. Experiments
show it is good to enable it when pgo is enabled. */
FPM.addPass(
createFunctionToLoopPassAdaptor(LICMPass(licm_opt), true));
}
/*
FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
FPM.addPass(createFunctionToLoopPassAdaptor(SimpleLoopUnswitchPass()));
*/
@ -344,9 +365,10 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
if (!disable_llvm_lto) {
/* Apply LTO for AOT mode */
if (comp_ctx->comp_data->func_count >= 10)
/* Adds the pre-link optimizations if the func count
is large enough */
if (comp_ctx->comp_data->func_count >= 10
|| comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file)
/* Add the pre-link optimizations if the func count
is large enough or PGO is enabled */
MPM.addPass(PB.buildLTOPreLinkDefaultPipeline(OL));
else
MPM.addPass(PB.buildLTODefaultPipeline(OL, NULL));
@ -358,3 +380,34 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
MPM.run(*M, MAM);
}
char *
aot_compress_aot_func_names(AOTCompContext *comp_ctx, uint32 *p_size)
{
std::vector<std::string> NameStrs;
std::string Result;
char buf[32], *compressed_str;
uint32 compressed_str_len, i;
for (i = 0; i < comp_ctx->func_ctx_count; i++) {
snprintf(buf, sizeof(buf), "%s%d", AOT_FUNC_PREFIX, i);
std::string str(buf);
NameStrs.push_back(str);
}
if (collectPGOFuncNameStrings(NameStrs, true, Result)) {
aot_set_last_error("collect pgo func name strings failed");
return NULL;
}
compressed_str_len = Result.size();
if (!(compressed_str = (char *)wasm_runtime_malloc(compressed_str_len))) {
aot_set_last_error("allocate memory failed");
return NULL;
}
bh_memcpy_s(compressed_str, compressed_str_len, Result.c_str(),
compressed_str_len);
*p_size = compressed_str_len;
return compressed_str;
}