Implement the segue optimization for LLVM AOT/JIT (#2230)

Segue is an optimization technology which uses x86 segment register to store
the WebAssembly linear memory base address, so as to remove most of the cost
of SFI (Software-based Fault Isolation) base addition and free up a general
purpose register, by this way it may:
- Improve the performance of JIT/AOT
- Reduce the footprint of JIT/AOT, the JIT/AOT code generated is smaller
- Reduce the compilation time of JIT/AOT

This PR uses the x86-64 GS segment register to apply the optimization, currently
it supports linux and linux-sgx platforms on x86-64 target. By default it is disabled,
developer can use the option below to enable it for wamrc and iwasm(with LLVM
JIT enabled):
```bash
wamrc --enable-segue=[<flags>] -o output_file wasm_file
iwasm --enable-segue=[<flags>] wasm_file [args...]
```
`flags` can be:
    i32.load, i64.load, f32.load, f64.load, v128.load,
    i32.store, i64.store, f32.store, f64.store, v128.store
Use comma to separate them, e.g. `--enable-segue=i32.load,i64.store`,
and `--enable-segue` means all flags are added.

Acknowledgement:
Many thanks to Intel Labs, UC San Diego and UT Austin teams for introducing this
technology and the great support and guidance!

Signed-off-by: Wenyong Huang <wenyong.huang@intel.com>
Co-authored-by: Vahldiek-oberwagner, Anjo Lucas <anjo.lucas.vahldiek-oberwagner@intel.com>
This commit is contained in:
Wenyong Huang
2023-05-26 10:13:33 +08:00
committed by GitHub
parent 27239723a9
commit 76be848ec3
42 changed files with 1864 additions and 123 deletions

View File

@ -1132,6 +1132,28 @@ aot_set_llvm_basic_types(AOTLLVMTypes *basic_types, LLVMContextRef context)
basic_types->v128_type = basic_types->i64x2_vec_type;
basic_types->v128_ptr_type = LLVMPointerType(basic_types->v128_type, 0);
basic_types->int8_ptr_type_gs =
LLVMPointerType(basic_types->int8_type, 256);
basic_types->int16_ptr_type_gs =
LLVMPointerType(basic_types->int16_type, 256);
basic_types->int32_ptr_type_gs =
LLVMPointerType(basic_types->int32_type, 256);
basic_types->int64_ptr_type_gs =
LLVMPointerType(basic_types->int64_type, 256);
basic_types->float32_ptr_type_gs =
LLVMPointerType(basic_types->float32_type, 256);
basic_types->float64_ptr_type_gs =
LLVMPointerType(basic_types->float64_type, 256);
basic_types->v128_ptr_type_gs =
LLVMPointerType(basic_types->v128_type, 256);
if (!basic_types->int8_ptr_type_gs || !basic_types->int16_ptr_type_gs
|| !basic_types->int32_ptr_type_gs || !basic_types->int64_ptr_type_gs
|| !basic_types->float32_ptr_type_gs
|| !basic_types->float64_ptr_type_gs
|| !basic_types->v128_ptr_type_gs) {
return false;
}
basic_types->i1x2_vec_type = LLVMVectorType(basic_types->int1_type, 2);
basic_types->funcref_type = LLVMInt32TypeInContext(context);
@ -2073,6 +2095,37 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
}
}
triple = LLVMGetTargetMachineTriple(comp_ctx->target_machine);
if (!triple) {
aot_set_last_error("get target machine triple failed.");
goto fail;
}
if (strstr(triple, "linux") && !strcmp(comp_ctx->target_arch, "x86_64")) {
if (option->segue_flags) {
if (option->segue_flags & (1 << 0))
comp_ctx->enable_segue_i32_load = true;
if (option->segue_flags & (1 << 1))
comp_ctx->enable_segue_i64_load = true;
if (option->segue_flags & (1 << 2))
comp_ctx->enable_segue_f32_load = true;
if (option->segue_flags & (1 << 3))
comp_ctx->enable_segue_f64_load = true;
if (option->segue_flags & (1 << 4))
comp_ctx->enable_segue_v128_load = true;
if (option->segue_flags & (1 << 8))
comp_ctx->enable_segue_i32_store = true;
if (option->segue_flags & (1 << 9))
comp_ctx->enable_segue_i64_store = true;
if (option->segue_flags & (1 << 10))
comp_ctx->enable_segue_f32_store = true;
if (option->segue_flags & (1 << 11))
comp_ctx->enable_segue_f64_store = true;
if (option->segue_flags & (1 << 12))
comp_ctx->enable_segue_v128_store = true;
}
}
LLVMDisposeMessage(triple);
if (option->enable_simd && strcmp(comp_ctx->target_arch, "x86_64") != 0
&& strncmp(comp_ctx->target_arch, "aarch64", 7) != 0) {
/* Disable simd if it isn't supported by target arch */