Implement the segue optimization for LLVM AOT/JIT (#2230)

Segue is an optimization technology which uses x86 segment register to store
the WebAssembly linear memory base address, so as to remove most of the cost
of SFI (Software-based Fault Isolation) base addition and free up a general
purpose register, by this way it may:
- Improve the performance of JIT/AOT
- Reduce the footprint of JIT/AOT, the JIT/AOT code generated is smaller
- Reduce the compilation time of JIT/AOT

This PR uses the x86-64 GS segment register to apply the optimization, currently
it supports linux and linux-sgx platforms on x86-64 target. By default it is disabled,
developer can use the option below to enable it for wamrc and iwasm(with LLVM
JIT enabled):
```bash
wamrc --enable-segue=[<flags>] -o output_file wasm_file
iwasm --enable-segue=[<flags>] wasm_file [args...]
```
`flags` can be:
    i32.load, i64.load, f32.load, f64.load, v128.load,
    i32.store, i64.store, f32.store, f64.store, v128.store
Use comma to separate them, e.g. `--enable-segue=i32.load,i64.store`,
and `--enable-segue` means all flags are added.

Acknowledgement:
Many thanks to Intel Labs, UC San Diego and UT Austin teams for introducing this
technology and the great support and guidance!

Signed-off-by: Wenyong Huang <wenyong.huang@intel.com>
Co-authored-by: Vahldiek-oberwagner, Anjo Lucas <anjo.lucas.vahldiek-oberwagner@intel.com>
This commit is contained in:
Wenyong Huang
2023-05-26 10:13:33 +08:00
committed by GitHub
parent 27239723a9
commit 76be848ec3
42 changed files with 1864 additions and 123 deletions

View File

@ -14,12 +14,12 @@
static LLVMValueRef
simd_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align,
uint32 offset, uint32 data_length, LLVMTypeRef ptr_type,
LLVMTypeRef data_type)
LLVMTypeRef data_type, bool enable_segue)
{
LLVMValueRef maddr, data;
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset,
data_length))) {
data_length, enable_segue))) {
HANDLE_FAILURE("aot_check_memory_overflow");
return NULL;
}
@ -44,10 +44,12 @@ bool
aot_compile_simd_v128_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 align, uint32 offset)
{
bool enable_segue = comp_ctx->enable_segue_v128_load;
LLVMTypeRef v128_ptr_type = enable_segue ? V128_PTR_TYPE_GS : V128_PTR_TYPE;
LLVMValueRef result;
if (!(result = simd_load(comp_ctx, func_ctx, align, offset, 16,
V128_PTR_TYPE, V128_TYPE))) {
v128_ptr_type, V128_TYPE, enable_segue))) {
return false;
}
@ -75,6 +77,7 @@ aot_compile_simd_load_extend(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVMVectorType(I32_TYPE, 2), LLVMVectorType(I32_TYPE, 2),
};
LLVMTypeRef sub_vector_type, sub_vector_ptr_type;
bool enable_segue = comp_ctx->enable_segue_v128_load;
bh_assert(opcode_index < 6);
@ -82,13 +85,15 @@ aot_compile_simd_load_extend(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
/* to vector ptr type */
if (!sub_vector_type
|| !(sub_vector_ptr_type = LLVMPointerType(sub_vector_type, 0))) {
|| !(sub_vector_ptr_type =
LLVMPointerType(sub_vector_type, enable_segue ? 256 : 0))) {
HANDLE_FAILURE("LLVMPointerType");
return false;
}
if (!(sub_vector = simd_load(comp_ctx, func_ctx, align, offset, 8,
sub_vector_ptr_type, sub_vector_type))) {
if (!(sub_vector =
simd_load(comp_ctx, func_ctx, align, offset, 8,
sub_vector_ptr_type, sub_vector_type, enable_segue))) {
return false;
}
@ -118,6 +123,9 @@ aot_compile_simd_load_splat(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVMValueRef element, result;
LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE,
INT32_PTR_TYPE, INT64_PTR_TYPE };
LLVMTypeRef element_ptr_types_gs[] = { INT8_PTR_TYPE_GS, INT16_PTR_TYPE_GS,
INT32_PTR_TYPE_GS,
INT64_PTR_TYPE_GS };
LLVMTypeRef element_data_types[] = { INT8_TYPE, INT16_TYPE, I32_TYPE,
I64_TYPE };
uint32 data_lengths[] = { 1, 2, 4, 8 };
@ -133,13 +141,16 @@ aot_compile_simd_load_splat(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVM_CONST(i32x4_zero),
LLVM_CONST(i32x2_zero),
};
bool enable_segue = comp_ctx->enable_segue_v128_load;
bh_assert(opcode_index < 4);
if (!(element = simd_load(comp_ctx, func_ctx, align, offset,
data_lengths[opcode_index],
element_ptr_types[opcode_index],
element_data_types[opcode_index]))) {
if (!(element = simd_load(
comp_ctx, func_ctx, align, offset, data_lengths[opcode_index],
comp_ctx->enable_segue_v128_load
? element_ptr_types_gs[opcode_index]
: element_ptr_types[opcode_index],
element_data_types[opcode_index], enable_segue))) {
return false;
}
@ -170,11 +181,15 @@ aot_compile_simd_load_lane(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 data_lengths[] = { 1, 2, 4, 8 };
LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE,
INT32_PTR_TYPE, INT64_PTR_TYPE };
LLVMTypeRef element_ptr_types_gs[] = { INT8_PTR_TYPE_GS, INT16_PTR_TYPE_GS,
INT32_PTR_TYPE_GS,
INT64_PTR_TYPE_GS };
LLVMTypeRef element_data_types[] = { INT8_TYPE, INT16_TYPE, I32_TYPE,
I64_TYPE };
LLVMTypeRef vector_types[] = { V128_i8x16_TYPE, V128_i16x8_TYPE,
V128_i32x4_TYPE, V128_i64x2_TYPE };
LLVMValueRef lane = simd_lane_id_to_llvm_value(comp_ctx, lane_id);
bool enable_segue = comp_ctx->enable_segue_v128_load;
bh_assert(opcode_index < 4);
@ -183,10 +198,12 @@ aot_compile_simd_load_lane(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
return false;
}
if (!(element = simd_load(comp_ctx, func_ctx, align, offset,
data_lengths[opcode_index],
element_ptr_types[opcode_index],
element_data_types[opcode_index]))) {
if (!(element = simd_load(
comp_ctx, func_ctx, align, offset, data_lengths[opcode_index],
comp_ctx->enable_segue_v128_load
? element_ptr_types_gs[opcode_index]
: element_ptr_types[opcode_index],
element_data_types[opcode_index], enable_segue))) {
return false;
}
@ -207,6 +224,8 @@ aot_compile_simd_load_zero(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 opcode_index = opcode - SIMD_v128_load32_zero;
uint32 data_lengths[] = { 4, 8 };
LLVMTypeRef element_ptr_types[] = { INT32_PTR_TYPE, INT64_PTR_TYPE };
LLVMTypeRef element_ptr_types_gs[] = { INT32_PTR_TYPE_GS,
INT64_PTR_TYPE_GS };
LLVMTypeRef element_data_types[] = { I32_TYPE, I64_TYPE };
LLVMValueRef zero[] = {
LLVM_CONST(i32x4_vec_zero),
@ -222,13 +241,16 @@ aot_compile_simd_load_zero(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVM_CONST(i32_six) },
{ LLVM_CONST(i32_zero), LLVM_CONST(i32_two) },
};
bool enable_segue = comp_ctx->enable_segue_v128_load;
bh_assert(opcode_index < 2);
if (!(element = simd_load(comp_ctx, func_ctx, align, offset,
data_lengths[opcode_index],
element_ptr_types[opcode_index],
element_data_types[opcode_index]))) {
if (!(element = simd_load(
comp_ctx, func_ctx, align, offset, data_lengths[opcode_index],
comp_ctx->enable_segue_v128_load
? element_ptr_types_gs[opcode_index]
: element_ptr_types[opcode_index],
element_data_types[opcode_index], enable_segue))) {
return false;
}
@ -260,12 +282,12 @@ aot_compile_simd_load_zero(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
static bool
simd_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align,
uint32 offset, uint32 data_length, LLVMValueRef value,
LLVMTypeRef value_ptr_type)
LLVMTypeRef value_ptr_type, bool enable_segue)
{
LLVMValueRef maddr, result;
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset,
data_length)))
data_length, enable_segue)))
return false;
if (!(maddr = LLVMBuildBitCast(comp_ctx->builder, maddr, value_ptr_type,
@ -288,12 +310,14 @@ bool
aot_compile_simd_v128_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 align, uint32 offset)
{
bool enable_segue = comp_ctx->enable_segue_v128_store;
LLVMTypeRef v128_ptr_type = enable_segue ? V128_PTR_TYPE_GS : V128_PTR_TYPE;
LLVMValueRef value;
POP_V128(value);
return simd_store(comp_ctx, func_ctx, align, offset, 16, value,
V128_PTR_TYPE);
v128_ptr_type, enable_segue);
fail:
return false;
}
@ -307,10 +331,14 @@ aot_compile_simd_store_lane(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 data_lengths[] = { 1, 2, 4, 8 };
LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE,
INT32_PTR_TYPE, INT64_PTR_TYPE };
LLVMTypeRef element_ptr_types_gs[] = { INT8_PTR_TYPE_GS, INT16_PTR_TYPE_GS,
INT32_PTR_TYPE_GS,
INT64_PTR_TYPE_GS };
uint32 opcode_index = opcode - SIMD_v128_store8_lane;
LLVMTypeRef vector_types[] = { V128_i8x16_TYPE, V128_i16x8_TYPE,
V128_i32x4_TYPE, V128_i64x2_TYPE };
LLVMValueRef lane = simd_lane_id_to_llvm_value(comp_ctx, lane_id);
bool enable_segue = comp_ctx->enable_segue_v128_store;
bh_assert(opcode_index < 4);
@ -327,5 +355,7 @@ aot_compile_simd_store_lane(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
return simd_store(comp_ctx, func_ctx, align, offset,
data_lengths[opcode_index], element,
element_ptr_types[opcode_index]);
enable_segue ? element_ptr_types_gs[opcode_index]
: element_ptr_types[opcode_index],
enable_segue);
}