Implement AOT static PGO (#2243)
LLVM PGO (Profile-Guided Optimization) allows the compiler to better optimize code
for how it actually runs. This PR implements the AOT static PGO, and is tested on
Linux x86-64 and x86-32. The basic steps are:
1. Use `wamrc --enable-llvm-pgo -o <aot_file_of_pgo> <wasm_file>`
to generate an instrumented aot file.
2. Compile iwasm with `cmake -DWAMR_BUILD_STATIC_PGO=1` and run
`iwasm --gen-prof-file=<raw_profile_file> <aot_file_of_pgo>`
to generate the raw profile file.
3. Run `llvm-profdata merge -output=<profile_file> <raw_profile_file>`
to merge the raw profile file into the profile file.
4. Run `wamrc --use-prof-file=<profile_file> -o <aot_file> <wasm_file>`
to generate the optimized aot file.
5. Run the optimized aot_file: `iwasm <aot_file>`.
The test scripts are also added for each benchmark, run `test_pgo.sh` under
each benchmark's folder to test the AOT static pgo.
This commit is contained in:
@ -41,6 +41,10 @@ typedef struct AOTObjectDataSection {
|
||||
char *name;
|
||||
uint8 *data;
|
||||
uint32 size;
|
||||
#if WASM_ENABLE_WAMR_COMPILER != 0 || WASM_ENABLE_JIT != 0
|
||||
bool is_name_allocated;
|
||||
bool is_data_allocated;
|
||||
#endif
|
||||
} AOTObjectDataSection;
|
||||
|
||||
/* Relocation info */
|
||||
@ -51,6 +55,9 @@ typedef struct AOTRelocation {
|
||||
char *symbol_name;
|
||||
/* index in the symbol offset field */
|
||||
uint32 symbol_index;
|
||||
#if WASM_ENABLE_WAMR_COMPILER != 0 || WASM_ENABLE_JIT != 0
|
||||
bool is_symbol_name_allocated;
|
||||
#endif
|
||||
} AOTRelocation;
|
||||
|
||||
/* Relocation Group */
|
||||
@ -60,6 +67,9 @@ typedef struct AOTRelocationGroup {
|
||||
uint32 name_index;
|
||||
uint32 relocation_count;
|
||||
AOTRelocation *relocations;
|
||||
#if WASM_ENABLE_WAMR_COMPILER != 0 || WASM_ENABLE_JIT != 0
|
||||
bool is_section_name_allocated;
|
||||
#endif
|
||||
} AOTRelocationGroup;
|
||||
|
||||
/* AOT function instance */
|
||||
@ -108,6 +118,13 @@ typedef struct AOTUnwindInfo {
|
||||
#define PLT_ITEM_SIZE 12
|
||||
#endif
|
||||
|
||||
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
|
||||
typedef struct GOTItem {
|
||||
uint32 func_idx;
|
||||
struct GOTItem *next;
|
||||
} GOTItem, *GOTItemList;
|
||||
#endif
|
||||
|
||||
typedef struct AOTModule {
|
||||
uint32 module_type;
|
||||
|
||||
@ -204,6 +221,13 @@ typedef struct AOTModule {
|
||||
bool rtl_func_table_registered;
|
||||
#endif
|
||||
|
||||
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
|
||||
uint32 got_item_count;
|
||||
GOTItemList got_item_list;
|
||||
GOTItemList got_item_list_end;
|
||||
void **got_func_ptrs;
|
||||
#endif
|
||||
|
||||
/* data sections in AOT object file, including .data, .rodata
|
||||
and .rodata.cstN. */
|
||||
AOTObjectDataSection *data_sections;
|
||||
@ -294,6 +318,54 @@ typedef struct AOTFrame {
|
||||
#endif
|
||||
} AOTFrame;
|
||||
|
||||
#if WASM_ENABLE_STATIC_PGO != 0
|
||||
typedef struct LLVMProfileRawHeader {
|
||||
uint64 magic;
|
||||
uint64 version;
|
||||
uint64 binary_ids_size;
|
||||
uint64 num_prof_data;
|
||||
uint64 padding_bytes_before_counters;
|
||||
uint64 num_prof_counters;
|
||||
uint64 padding_bytes_after_counters;
|
||||
uint64 names_size;
|
||||
uint64 counters_delta;
|
||||
uint64 names_delta;
|
||||
uint64 value_kind_last;
|
||||
} LLVMProfileRawHeader;
|
||||
|
||||
typedef struct ValueProfNode {
|
||||
uint64 value;
|
||||
uint64 count;
|
||||
struct ValueProfNode *next;
|
||||
} ValueProfNode;
|
||||
|
||||
/* The profiling data of data sections created by aot compiler and
|
||||
used when profiling, the width of pointer can be 8 bytes (64-bit)
|
||||
or 4 bytes (32-bit) */
|
||||
typedef struct LLVMProfileData {
|
||||
uint64 func_md5;
|
||||
uint64 func_hash;
|
||||
uint64 offset_counters;
|
||||
uintptr_t func_ptr;
|
||||
ValueProfNode **values;
|
||||
uint32 num_counters;
|
||||
uint16 num_value_sites[2];
|
||||
} LLVMProfileData;
|
||||
|
||||
/* The profiling data for writting to the output file, the width of
|
||||
pointer is 8 bytes suppose we always use wamrc and llvm-profdata
|
||||
with 64-bit mode */
|
||||
typedef struct LLVMProfileData_64 {
|
||||
uint64 func_md5;
|
||||
uint64 func_hash;
|
||||
uint64 offset_counters;
|
||||
uint64 func_ptr;
|
||||
uint64 values;
|
||||
uint32 num_counters;
|
||||
uint16 num_value_sites[2];
|
||||
} LLVMProfileData_64;
|
||||
#endif /* end of WASM_ENABLE_STATIC_PGO != 0 */
|
||||
|
||||
/**
|
||||
* Load a AOT module from aot file buffer
|
||||
* @param buf the byte buffer which contains the AOT file data
|
||||
@ -564,6 +636,32 @@ aot_dump_perf_profiling(const AOTModuleInstance *module_inst);
|
||||
const uint8 *
|
||||
aot_get_custom_section(const AOTModule *module, const char *name, uint32 *len);
|
||||
|
||||
#if WASM_ENABLE_STATIC_PGO != 0
|
||||
void
|
||||
llvm_profile_instrument_target(uint64 target_value, void *data,
|
||||
uint32 counter_idx);
|
||||
|
||||
void
|
||||
llvm_profile_instrument_memop(uint64 target_value, void *data,
|
||||
uint32 counter_idx);
|
||||
|
||||
uint32
|
||||
aot_get_pgo_prof_data_size(AOTModuleInstance *module_inst);
|
||||
|
||||
uint32
|
||||
aot_dump_pgo_prof_data_to_buf(AOTModuleInstance *module_inst, char *buf,
|
||||
uint32 len);
|
||||
|
||||
void
|
||||
aot_exchange_uint16(uint8 *p_data);
|
||||
|
||||
void
|
||||
aot_exchange_uint32(uint8 *p_data);
|
||||
|
||||
void
|
||||
aot_exchange_uint64(uint8 *p_data);
|
||||
#endif /* end of WASM_ENABLE_STATIC_PGO != 0 */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* end of extern "C" */
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user