Implement AOT static PGO (#2243)
LLVM PGO (Profile-Guided Optimization) allows the compiler to better optimize code
for how it actually runs. This PR implements the AOT static PGO, and is tested on
Linux x86-64 and x86-32. The basic steps are:
1. Use `wamrc --enable-llvm-pgo -o <aot_file_of_pgo> <wasm_file>`
to generate an instrumented aot file.
2. Compile iwasm with `cmake -DWAMR_BUILD_STATIC_PGO=1` and run
`iwasm --gen-prof-file=<raw_profile_file> <aot_file_of_pgo>`
to generate the raw profile file.
3. Run `llvm-profdata merge -output=<profile_file> <raw_profile_file>`
to merge the raw profile file into the profile file.
4. Run `wamrc --use-prof-file=<profile_file> -o <aot_file> <wasm_file>`
to generate the optimized aot file.
5. Run the optimized aot_file: `iwasm <aot_file>`.
The test scripts are also added for each benchmark, run `test_pgo.sh` under
each benchmark's folder to test the AOT static pgo.
This commit is contained in:
@ -44,6 +44,7 @@
|
||||
#if LLVM_VERSION_MAJOR >= 12
|
||||
#include <llvm/Analysis/AliasAnalysis.h>
|
||||
#endif
|
||||
#include <llvm/ProfileData/InstrProf.h>
|
||||
|
||||
#include <cstring>
|
||||
#include "../aot/aot_runtime.h"
|
||||
@ -232,14 +233,26 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
|
||||
PTO.SLPVectorization = true;
|
||||
PTO.LoopUnrolling = true;
|
||||
|
||||
Optional<PGOOptions> PGO = None;
|
||||
if (comp_ctx->enable_llvm_pgo) {
|
||||
/* Disable static counter allocation for value profiler,
|
||||
it will be allocated by runtime */
|
||||
const char *argv[] = { "", "-vp-static-alloc=false" };
|
||||
cl::ParseCommandLineOptions(2, argv);
|
||||
PGO = PGOOptions("", "", "", PGOOptions::IRInstr);
|
||||
}
|
||||
else if (comp_ctx->use_prof_file) {
|
||||
PGO = PGOOptions(comp_ctx->use_prof_file, "", "", PGOOptions::IRUse);
|
||||
}
|
||||
|
||||
#ifdef DEBUG_PASS
|
||||
PassInstrumentationCallbacks PIC;
|
||||
PassBuilder PB(TM, PTO, None, &PIC);
|
||||
PassBuilder PB(TM, PTO, PGO, &PIC);
|
||||
#else
|
||||
#if LLVM_VERSION_MAJOR == 12
|
||||
PassBuilder PB(false, TM, PTO);
|
||||
PassBuilder PB(false, TM, PTO, PGO);
|
||||
#else
|
||||
PassBuilder PB(TM, PTO);
|
||||
PassBuilder PB(TM, PTO, PGO);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -334,8 +347,16 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
|
||||
FPM.addPass(SLPVectorizerPass());
|
||||
FPM.addPass(LoadStoreVectorizerPass());
|
||||
|
||||
if (comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file) {
|
||||
LICMOptions licm_opt;
|
||||
/* LICM pass: loop invariant code motion, attempting to remove
|
||||
as much code from the body of a loop as possible. Experiments
|
||||
show it is good to enable it when pgo is enabled. */
|
||||
FPM.addPass(
|
||||
createFunctionToLoopPassAdaptor(LICMPass(licm_opt), true));
|
||||
}
|
||||
|
||||
/*
|
||||
FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
|
||||
FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
|
||||
FPM.addPass(createFunctionToLoopPassAdaptor(SimpleLoopUnswitchPass()));
|
||||
*/
|
||||
@ -344,9 +365,10 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
|
||||
|
||||
if (!disable_llvm_lto) {
|
||||
/* Apply LTO for AOT mode */
|
||||
if (comp_ctx->comp_data->func_count >= 10)
|
||||
/* Adds the pre-link optimizations if the func count
|
||||
is large enough */
|
||||
if (comp_ctx->comp_data->func_count >= 10
|
||||
|| comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file)
|
||||
/* Add the pre-link optimizations if the func count
|
||||
is large enough or PGO is enabled */
|
||||
MPM.addPass(PB.buildLTOPreLinkDefaultPipeline(OL));
|
||||
else
|
||||
MPM.addPass(PB.buildLTODefaultPipeline(OL, NULL));
|
||||
@ -358,3 +380,34 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
|
||||
|
||||
MPM.run(*M, MAM);
|
||||
}
|
||||
|
||||
char *
|
||||
aot_compress_aot_func_names(AOTCompContext *comp_ctx, uint32 *p_size)
|
||||
{
|
||||
std::vector<std::string> NameStrs;
|
||||
std::string Result;
|
||||
char buf[32], *compressed_str;
|
||||
uint32 compressed_str_len, i;
|
||||
|
||||
for (i = 0; i < comp_ctx->func_ctx_count; i++) {
|
||||
snprintf(buf, sizeof(buf), "%s%d", AOT_FUNC_PREFIX, i);
|
||||
std::string str(buf);
|
||||
NameStrs.push_back(str);
|
||||
}
|
||||
|
||||
if (collectPGOFuncNameStrings(NameStrs, true, Result)) {
|
||||
aot_set_last_error("collect pgo func name strings failed");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
compressed_str_len = Result.size();
|
||||
if (!(compressed_str = (char *)wasm_runtime_malloc(compressed_str_len))) {
|
||||
aot_set_last_error("allocate memory failed");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bh_memcpy_s(compressed_str, compressed_str_len, Result.c_str(),
|
||||
compressed_str_len);
|
||||
*p_size = compressed_str_len;
|
||||
return compressed_str;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user