Implement the segue optimization for LLVM AOT/JIT (#2230)

Segue is an optimization technology which uses x86 segment register to store
the WebAssembly linear memory base address, so as to remove most of the cost
of SFI (Software-based Fault Isolation) base addition and free up a general
purpose register, by this way it may:
- Improve the performance of JIT/AOT
- Reduce the footprint of JIT/AOT, the JIT/AOT code generated is smaller
- Reduce the compilation time of JIT/AOT

This PR uses the x86-64 GS segment register to apply the optimization, currently
it supports linux and linux-sgx platforms on x86-64 target. By default it is disabled,
developer can use the option below to enable it for wamrc and iwasm(with LLVM
JIT enabled):
```bash
wamrc --enable-segue=[<flags>] -o output_file wasm_file
iwasm --enable-segue=[<flags>] wasm_file [args...]
```
`flags` can be:
    i32.load, i64.load, f32.load, f64.load, v128.load,
    i32.store, i64.store, f32.store, f64.store, v128.store
Use comma to separate them, e.g. `--enable-segue=i32.load,i64.store`,
and `--enable-segue` means all flags are added.

Acknowledgement:
Many thanks to Intel Labs, UC San Diego and UT Austin teams for introducing this
technology and the great support and guidance!

Signed-off-by: Wenyong Huang <wenyong.huang@intel.com>
Co-authored-by: Vahldiek-oberwagner, Anjo Lucas <anjo.lucas.vahldiek-oberwagner@intel.com>
This commit is contained in:
Wenyong Huang
2023-05-26 10:13:33 +08:00
committed by GitHub
parent 27239723a9
commit 76be848ec3
42 changed files with 1864 additions and 123 deletions

View File

@ -2889,6 +2889,16 @@ load(const uint8 *buf, uint32 size, AOTModule *module, char *error_buf,
module->code and will be destroyed in aot_unload() */
destroy_sections(section_list, false);
}
#if 0
{
uint32 i;
for (i = 0; i < module->func_count; i++) {
os_printf("AOT func %u, addr: %p\n", i, module->func_ptrs[i]);
}
}
#endif
return ret;
fail:
return false;

View File

@ -1015,6 +1015,15 @@ execute_post_instantiate_functions(AOTModuleInstance *module_inst,
}
}
#if defined(os_writegsbase)
{
AOTMemoryInstance *memory_inst = aot_get_default_memory(module_inst);
if (memory_inst)
/* write base addr of linear memory to GS segment register */
os_writegsbase(memory_inst->memory_data);
}
#endif
/* Execute start function for both main insance and sub instance */
if (module->start_function) {
AOTFunctionInstance start_func = { 0 };
@ -1453,6 +1462,15 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
}
argc = func_type->param_cell_num;
#if defined(os_writegsbase)
{
AOTMemoryInstance *memory_inst = aot_get_default_memory(module_inst);
if (memory_inst)
/* write base addr of linear memory to GS segment register */
os_writegsbase(memory_inst->memory_data);
}
#endif
/* func pointer was looked up previously */
bh_assert(function->u.func.func_ptr != NULL);

View File

@ -624,6 +624,11 @@ wasm_enlarge_memory_internal(WASMModuleInstance *module, uint32 inc_page_count)
#endif
#endif
#if defined(os_writegsbase)
/* write base addr of linear memory to GS segment register */
os_writegsbase(memory_data_new);
#endif
return ret;
}
#else
@ -756,4 +761,4 @@ wasm_get_linear_memory_size(WASMMemoryInstance *memory, void *node)
#endif
return linear_mem_size;
}
#endif
#endif

View File

@ -130,7 +130,7 @@ static JitCompOptions jit_options = { 0 };
#endif
#if WASM_ENABLE_JIT != 0
static LLVMJITOptions llvm_jit_options = { 3, 3 };
static LLVMJITOptions llvm_jit_options = { 3, 3, 0 };
#endif
static RunningMode runtime_running_mode = Mode_Default;
@ -554,6 +554,7 @@ wasm_runtime_full_init(RuntimeInitArgs *init_args)
#if WASM_ENABLE_JIT != 0
llvm_jit_options.size_level = init_args->llvm_jit_size_level;
llvm_jit_options.opt_level = init_args->llvm_jit_opt_level;
llvm_jit_options.segue_flags = init_args->segue_flags;
#endif
if (!wasm_runtime_env_init()) {

View File

@ -420,6 +420,7 @@ typedef struct wasm_frame_t {
typedef struct LLVMJITOptions {
uint32 opt_level;
uint32 size_level;
uint32 segue_flags;
} LLVMJITOptions;
#endif

View File

@ -239,6 +239,13 @@ check_type_compatible(uint8 src_type, uint8 dst_type)
#define FUNC_REF_TYPE comp_ctx->basic_types.funcref_type
#define EXTERN_REF_TYPE comp_ctx->basic_types.externref_type
#define INT8_PTR_TYPE_GS comp_ctx->basic_types.int8_ptr_type_gs
#define INT16_PTR_TYPE_GS comp_ctx->basic_types.int16_ptr_type_gs
#define INT32_PTR_TYPE_GS comp_ctx->basic_types.int32_ptr_type_gs
#define INT64_PTR_TYPE_GS comp_ctx->basic_types.int64_ptr_type_gs
#define F32_PTR_TYPE_GS comp_ctx->basic_types.float32_ptr_type_gs
#define F64_PTR_TYPE_GS comp_ctx->basic_types.float64_ptr_type_gs
#define I32_CONST(v) LLVMConstInt(I32_TYPE, v, true)
#define I64_CONST(v) LLVMConstInt(I64_TYPE, v, true)
#define F32_CONST(v) LLVMConstReal(F32_TYPE, v)
@ -272,6 +279,7 @@ check_type_compatible(uint8 src_type, uint8 dst_type)
#define V128_TYPE comp_ctx->basic_types.v128_type
#define V128_PTR_TYPE comp_ctx->basic_types.v128_ptr_type
#define V128_PTR_TYPE_GS comp_ctx->basic_types.v128_ptr_type_gs
#define V128_i8x16_TYPE comp_ctx->basic_types.i8x16_vec_type
#define V128_i16x8_TYPE comp_ctx->basic_types.i16x8_vec_type
#define V128_i32x4_TYPE comp_ctx->basic_types.i32x4_vec_type

View File

@ -81,7 +81,7 @@ get_memory_curr_page_count(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
LLVMValueRef
aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 offset, uint32 bytes)
uint32 offset, uint32 bytes, bool enable_segue)
{
LLVMValueRef offset_const = I32_CONST(offset);
LLVMValueRef addr, maddr, offset1, cmp1, cmp2, cmp;
@ -162,11 +162,20 @@ aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
/* inside memory space */
offset1 = I32_CONST((uint32)mem_offset);
CHECK_LLVM_CONST(offset1);
if (!(maddr = LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE,
mem_base_addr, &offset1, 1,
"maddr"))) {
aot_set_last_error("llvm build add failed.");
goto fail;
if (!enable_segue) {
if (!(maddr = LLVMBuildInBoundsGEP2(comp_ctx->builder,
INT8_TYPE, mem_base_addr,
&offset1, 1, "maddr"))) {
aot_set_last_error("llvm build add failed.");
goto fail;
}
}
else {
if (!(maddr = LLVMBuildIntToPtr(comp_ctx->builder, offset1,
INT8_PTR_TYPE_GS, "maddr"))) {
aot_set_last_error("llvm build IntToPtr failed.");
goto fail;
}
}
return maddr;
}
@ -244,11 +253,29 @@ aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
}
}
/* maddr = mem_base_addr + offset1 */
if (!(maddr = LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE,
if (!enable_segue) {
/* maddr = mem_base_addr + offset1 */
if (!(maddr =
LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE,
mem_base_addr, &offset1, 1, "maddr"))) {
aot_set_last_error("llvm build add failed.");
goto fail;
aot_set_last_error("llvm build add failed.");
goto fail;
}
}
else {
LLVMValueRef maddr_base;
if (!(maddr_base = LLVMBuildIntToPtr(comp_ctx->builder, addr,
INT8_PTR_TYPE_GS, "maddr_base"))) {
aot_set_last_error("llvm build int to ptr failed.");
goto fail;
}
if (!(maddr = LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE,
maddr_base, &offset_const, 1,
"maddr"))) {
aot_set_last_error("llvm build inboundgep failed.");
goto fail;
}
}
return maddr;
fail:
@ -388,13 +415,18 @@ aot_compile_op_i32_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
{
LLVMValueRef maddr, value = NULL;
LLVMTypeRef data_type;
bool enable_segue = comp_ctx->enable_segue_i32_load;
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
enable_segue)))
return false;
switch (bytes) {
case 4:
BUILD_PTR_CAST(INT32_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT32_PTR_TYPE);
else
BUILD_PTR_CAST(INT32_PTR_TYPE_GS);
#if WASM_ENABLE_SHARED_MEMORY != 0
if (atomic)
BUILD_ATOMIC_LOAD(align, I32_TYPE);
@ -405,11 +437,17 @@ aot_compile_op_i32_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
case 2:
case 1:
if (bytes == 2) {
BUILD_PTR_CAST(INT16_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT16_PTR_TYPE);
else
BUILD_PTR_CAST(INT16_PTR_TYPE_GS);
data_type = INT16_TYPE;
}
else {
BUILD_PTR_CAST(INT8_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT8_PTR_TYPE);
else
BUILD_PTR_CAST(INT8_PTR_TYPE_GS);
data_type = INT8_TYPE;
}
@ -447,13 +485,18 @@ aot_compile_op_i64_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
{
LLVMValueRef maddr, value = NULL;
LLVMTypeRef data_type;
bool enable_segue = comp_ctx->enable_segue_i64_load;
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
enable_segue)))
return false;
switch (bytes) {
case 8:
BUILD_PTR_CAST(INT64_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT64_PTR_TYPE);
else
BUILD_PTR_CAST(INT64_PTR_TYPE_GS);
#if WASM_ENABLE_SHARED_MEMORY != 0
if (atomic)
BUILD_ATOMIC_LOAD(align, I64_TYPE);
@ -465,15 +508,24 @@ aot_compile_op_i64_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
case 2:
case 1:
if (bytes == 4) {
BUILD_PTR_CAST(INT32_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT32_PTR_TYPE);
else
BUILD_PTR_CAST(INT32_PTR_TYPE_GS);
data_type = I32_TYPE;
}
else if (bytes == 2) {
BUILD_PTR_CAST(INT16_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT16_PTR_TYPE);
else
BUILD_PTR_CAST(INT16_PTR_TYPE_GS);
data_type = INT16_TYPE;
}
else {
BUILD_PTR_CAST(INT8_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT8_PTR_TYPE);
else
BUILD_PTR_CAST(INT8_PTR_TYPE_GS);
data_type = INT8_TYPE;
}
@ -509,12 +561,18 @@ aot_compile_op_f32_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 align, uint32 offset)
{
LLVMValueRef maddr, value;
bool enable_segue = comp_ctx->enable_segue_f32_load;
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4,
enable_segue)))
return false;
BUILD_PTR_CAST(F32_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(F32_PTR_TYPE);
else
BUILD_PTR_CAST(F32_PTR_TYPE_GS);
BUILD_LOAD(F32_TYPE);
PUSH_F32(value);
return true;
fail:
@ -526,12 +584,18 @@ aot_compile_op_f64_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 align, uint32 offset)
{
LLVMValueRef maddr, value;
bool enable_segue = comp_ctx->enable_segue_f64_load;
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8,
enable_segue)))
return false;
BUILD_PTR_CAST(F64_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(F64_PTR_TYPE);
else
BUILD_PTR_CAST(F64_PTR_TYPE_GS);
BUILD_LOAD(F64_TYPE);
PUSH_F64(value);
return true;
fail:
@ -543,22 +607,33 @@ aot_compile_op_i32_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 align, uint32 offset, uint32 bytes, bool atomic)
{
LLVMValueRef maddr, value;
bool enable_segue = comp_ctx->enable_segue_i32_store;
POP_I32(value);
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
enable_segue)))
return false;
switch (bytes) {
case 4:
BUILD_PTR_CAST(INT32_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT32_PTR_TYPE);
else
BUILD_PTR_CAST(INT32_PTR_TYPE_GS);
break;
case 2:
BUILD_PTR_CAST(INT16_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT16_PTR_TYPE);
else
BUILD_PTR_CAST(INT16_PTR_TYPE_GS);
BUILD_TRUNC(value, INT16_TYPE);
break;
case 1:
BUILD_PTR_CAST(INT8_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT8_PTR_TYPE);
else
BUILD_PTR_CAST(INT8_PTR_TYPE_GS);
BUILD_TRUNC(value, INT8_TYPE);
break;
default:
@ -582,26 +657,40 @@ aot_compile_op_i64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 align, uint32 offset, uint32 bytes, bool atomic)
{
LLVMValueRef maddr, value;
bool enable_segue = comp_ctx->enable_segue_i64_store;
POP_I64(value);
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
enable_segue)))
return false;
switch (bytes) {
case 8:
BUILD_PTR_CAST(INT64_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT64_PTR_TYPE);
else
BUILD_PTR_CAST(INT64_PTR_TYPE_GS);
break;
case 4:
BUILD_PTR_CAST(INT32_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT32_PTR_TYPE);
else
BUILD_PTR_CAST(INT32_PTR_TYPE_GS);
BUILD_TRUNC(value, I32_TYPE);
break;
case 2:
BUILD_PTR_CAST(INT16_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT16_PTR_TYPE);
else
BUILD_PTR_CAST(INT16_PTR_TYPE_GS);
BUILD_TRUNC(value, INT16_TYPE);
break;
case 1:
BUILD_PTR_CAST(INT8_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT8_PTR_TYPE);
else
BUILD_PTR_CAST(INT8_PTR_TYPE_GS);
BUILD_TRUNC(value, INT8_TYPE);
break;
default:
@ -625,13 +714,18 @@ aot_compile_op_f32_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 align, uint32 offset)
{
LLVMValueRef maddr, value;
bool enable_segue = comp_ctx->enable_segue_f32_store;
POP_F32(value);
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4,
enable_segue)))
return false;
BUILD_PTR_CAST(F32_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(F32_PTR_TYPE);
else
BUILD_PTR_CAST(F32_PTR_TYPE_GS);
BUILD_STORE();
return true;
fail:
@ -643,13 +737,18 @@ aot_compile_op_f64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 align, uint32 offset)
{
LLVMValueRef maddr, value;
bool enable_segue = comp_ctx->enable_segue_f64_store;
POP_F64(value);
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8,
enable_segue)))
return false;
BUILD_PTR_CAST(F64_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(F64_PTR_TYPE);
else
BUILD_PTR_CAST(F64_PTR_TYPE_GS);
BUILD_STORE();
return true;
fail:
@ -1140,13 +1239,19 @@ aot_compile_op_atomic_rmw(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 offset, uint32 bytes)
{
LLVMValueRef maddr, value, result;
bool enable_segue = (op_type == VALUE_TYPE_I32)
? comp_ctx->enable_segue_i32_load
&& comp_ctx->enable_segue_i32_store
: comp_ctx->enable_segue_i64_load
&& comp_ctx->enable_segue_i64_store;
if (op_type == VALUE_TYPE_I32)
POP_I32(value);
else
POP_I64(value);
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
enable_segue)))
return false;
if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align))
@ -1154,19 +1259,31 @@ aot_compile_op_atomic_rmw(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
switch (bytes) {
case 8:
BUILD_PTR_CAST(INT64_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT64_PTR_TYPE);
else
BUILD_PTR_CAST(INT64_PTR_TYPE_GS);
break;
case 4:
BUILD_PTR_CAST(INT32_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT32_PTR_TYPE);
else
BUILD_PTR_CAST(INT32_PTR_TYPE_GS);
if (op_type == VALUE_TYPE_I64)
BUILD_TRUNC(value, I32_TYPE);
break;
case 2:
BUILD_PTR_CAST(INT16_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT16_PTR_TYPE);
else
BUILD_PTR_CAST(INT16_PTR_TYPE_GS);
BUILD_TRUNC(value, INT16_TYPE);
break;
case 1:
BUILD_PTR_CAST(INT8_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT8_PTR_TYPE);
else
BUILD_PTR_CAST(INT8_PTR_TYPE_GS);
BUILD_TRUNC(value, INT8_TYPE);
break;
default:
@ -1208,6 +1325,11 @@ aot_compile_op_atomic_cmpxchg(AOTCompContext *comp_ctx,
uint32 align, uint32 offset, uint32 bytes)
{
LLVMValueRef maddr, value, expect, result;
bool enable_segue = (op_type == VALUE_TYPE_I32)
? comp_ctx->enable_segue_i32_load
&& comp_ctx->enable_segue_i32_store
: comp_ctx->enable_segue_i64_load
&& comp_ctx->enable_segue_i64_store;
if (op_type == VALUE_TYPE_I32) {
POP_I32(value);
@ -1218,7 +1340,8 @@ aot_compile_op_atomic_cmpxchg(AOTCompContext *comp_ctx,
POP_I64(expect);
}
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
enable_segue)))
return false;
if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align))
@ -1226,22 +1349,34 @@ aot_compile_op_atomic_cmpxchg(AOTCompContext *comp_ctx,
switch (bytes) {
case 8:
BUILD_PTR_CAST(INT64_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT64_PTR_TYPE);
else
BUILD_PTR_CAST(INT64_PTR_TYPE_GS);
break;
case 4:
BUILD_PTR_CAST(INT32_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT32_PTR_TYPE);
else
BUILD_PTR_CAST(INT32_PTR_TYPE_GS);
if (op_type == VALUE_TYPE_I64) {
BUILD_TRUNC(value, I32_TYPE);
BUILD_TRUNC(expect, I32_TYPE);
}
break;
case 2:
BUILD_PTR_CAST(INT16_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT16_PTR_TYPE);
else
BUILD_PTR_CAST(INT16_PTR_TYPE_GS);
BUILD_TRUNC(value, INT16_TYPE);
BUILD_TRUNC(expect, INT16_TYPE);
break;
case 1:
BUILD_PTR_CAST(INT8_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT8_PTR_TYPE);
else
BUILD_PTR_CAST(INT8_PTR_TYPE_GS);
BUILD_TRUNC(value, INT8_TYPE);
BUILD_TRUNC(expect, INT8_TYPE);
break;
@ -1318,7 +1453,8 @@ aot_compile_op_atomic_wait(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
CHECK_LLVM_CONST(is_wait64);
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
false)))
return false;
if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align))
@ -1393,7 +1529,8 @@ aot_compiler_op_atomic_notify(AOTCompContext *comp_ctx,
POP_I32(count);
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
false)))
return false;
if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align))

View File

@ -53,7 +53,7 @@ aot_compile_op_f64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVMValueRef
aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 offset, uint32 bytes);
uint32 offset, uint32 bytes, bool enable_segue);
bool
aot_compile_op_memory_size(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);

View File

@ -1132,6 +1132,28 @@ aot_set_llvm_basic_types(AOTLLVMTypes *basic_types, LLVMContextRef context)
basic_types->v128_type = basic_types->i64x2_vec_type;
basic_types->v128_ptr_type = LLVMPointerType(basic_types->v128_type, 0);
basic_types->int8_ptr_type_gs =
LLVMPointerType(basic_types->int8_type, 256);
basic_types->int16_ptr_type_gs =
LLVMPointerType(basic_types->int16_type, 256);
basic_types->int32_ptr_type_gs =
LLVMPointerType(basic_types->int32_type, 256);
basic_types->int64_ptr_type_gs =
LLVMPointerType(basic_types->int64_type, 256);
basic_types->float32_ptr_type_gs =
LLVMPointerType(basic_types->float32_type, 256);
basic_types->float64_ptr_type_gs =
LLVMPointerType(basic_types->float64_type, 256);
basic_types->v128_ptr_type_gs =
LLVMPointerType(basic_types->v128_type, 256);
if (!basic_types->int8_ptr_type_gs || !basic_types->int16_ptr_type_gs
|| !basic_types->int32_ptr_type_gs || !basic_types->int64_ptr_type_gs
|| !basic_types->float32_ptr_type_gs
|| !basic_types->float64_ptr_type_gs
|| !basic_types->v128_ptr_type_gs) {
return false;
}
basic_types->i1x2_vec_type = LLVMVectorType(basic_types->int1_type, 2);
basic_types->funcref_type = LLVMInt32TypeInContext(context);
@ -2073,6 +2095,37 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
}
}
triple = LLVMGetTargetMachineTriple(comp_ctx->target_machine);
if (!triple) {
aot_set_last_error("get target machine triple failed.");
goto fail;
}
if (strstr(triple, "linux") && !strcmp(comp_ctx->target_arch, "x86_64")) {
if (option->segue_flags) {
if (option->segue_flags & (1 << 0))
comp_ctx->enable_segue_i32_load = true;
if (option->segue_flags & (1 << 1))
comp_ctx->enable_segue_i64_load = true;
if (option->segue_flags & (1 << 2))
comp_ctx->enable_segue_f32_load = true;
if (option->segue_flags & (1 << 3))
comp_ctx->enable_segue_f64_load = true;
if (option->segue_flags & (1 << 4))
comp_ctx->enable_segue_v128_load = true;
if (option->segue_flags & (1 << 8))
comp_ctx->enable_segue_i32_store = true;
if (option->segue_flags & (1 << 9))
comp_ctx->enable_segue_i64_store = true;
if (option->segue_flags & (1 << 10))
comp_ctx->enable_segue_f32_store = true;
if (option->segue_flags & (1 << 11))
comp_ctx->enable_segue_f64_store = true;
if (option->segue_flags & (1 << 12))
comp_ctx->enable_segue_v128_store = true;
}
}
LLVMDisposeMessage(triple);
if (option->enable_simd && strcmp(comp_ctx->target_arch, "x86_64") != 0
&& strncmp(comp_ctx->target_arch, "aarch64", 7) != 0) {
/* Disable simd if it isn't supported by target arch */

View File

@ -214,6 +214,14 @@ typedef struct AOTLLVMTypes {
LLVMTypeRef f32x4_vec_type;
LLVMTypeRef f64x2_vec_type;
LLVMTypeRef int8_ptr_type_gs;
LLVMTypeRef int16_ptr_type_gs;
LLVMTypeRef int32_ptr_type_gs;
LLVMTypeRef int64_ptr_type_gs;
LLVMTypeRef float32_ptr_type_gs;
LLVMTypeRef float64_ptr_type_gs;
LLVMTypeRef v128_ptr_type_gs;
LLVMTypeRef i1x2_vec_type;
LLVMTypeRef meta_data_type;
@ -341,6 +349,19 @@ typedef struct AOTCompContext {
/* Disable LLVM link time optimization */
bool disable_llvm_lto;
/* Enable to use segument register as the base addr
of linear memory for load/store operations */
bool enable_segue_i32_load;
bool enable_segue_i64_load;
bool enable_segue_f32_load;
bool enable_segue_f64_load;
bool enable_segue_v128_load;
bool enable_segue_i32_store;
bool enable_segue_i64_store;
bool enable_segue_f32_store;
bool enable_segue_f64_store;
bool enable_segue_v128_store;
/* Whether optimize the JITed code */
bool optimize;
@ -413,6 +434,7 @@ typedef struct AOTCompOption {
uint32 output_format;
uint32 bounds_checks;
uint32 stack_bounds_checks;
uint32 segue_flags;
char **custom_sections;
uint32 custom_sections_count;
const char *stack_usage_file;

View File

@ -14,12 +14,12 @@
static LLVMValueRef
simd_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align,
uint32 offset, uint32 data_length, LLVMTypeRef ptr_type,
LLVMTypeRef data_type)
LLVMTypeRef data_type, bool enable_segue)
{
LLVMValueRef maddr, data;
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset,
data_length))) {
data_length, enable_segue))) {
HANDLE_FAILURE("aot_check_memory_overflow");
return NULL;
}
@ -44,10 +44,12 @@ bool
aot_compile_simd_v128_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 align, uint32 offset)
{
bool enable_segue = comp_ctx->enable_segue_v128_load;
LLVMTypeRef v128_ptr_type = enable_segue ? V128_PTR_TYPE_GS : V128_PTR_TYPE;
LLVMValueRef result;
if (!(result = simd_load(comp_ctx, func_ctx, align, offset, 16,
V128_PTR_TYPE, V128_TYPE))) {
v128_ptr_type, V128_TYPE, enable_segue))) {
return false;
}
@ -75,6 +77,7 @@ aot_compile_simd_load_extend(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVMVectorType(I32_TYPE, 2), LLVMVectorType(I32_TYPE, 2),
};
LLVMTypeRef sub_vector_type, sub_vector_ptr_type;
bool enable_segue = comp_ctx->enable_segue_v128_load;
bh_assert(opcode_index < 6);
@ -82,13 +85,15 @@ aot_compile_simd_load_extend(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
/* to vector ptr type */
if (!sub_vector_type
|| !(sub_vector_ptr_type = LLVMPointerType(sub_vector_type, 0))) {
|| !(sub_vector_ptr_type =
LLVMPointerType(sub_vector_type, enable_segue ? 256 : 0))) {
HANDLE_FAILURE("LLVMPointerType");
return false;
}
if (!(sub_vector = simd_load(comp_ctx, func_ctx, align, offset, 8,
sub_vector_ptr_type, sub_vector_type))) {
if (!(sub_vector =
simd_load(comp_ctx, func_ctx, align, offset, 8,
sub_vector_ptr_type, sub_vector_type, enable_segue))) {
return false;
}
@ -118,6 +123,9 @@ aot_compile_simd_load_splat(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVMValueRef element, result;
LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE,
INT32_PTR_TYPE, INT64_PTR_TYPE };
LLVMTypeRef element_ptr_types_gs[] = { INT8_PTR_TYPE_GS, INT16_PTR_TYPE_GS,
INT32_PTR_TYPE_GS,
INT64_PTR_TYPE_GS };
LLVMTypeRef element_data_types[] = { INT8_TYPE, INT16_TYPE, I32_TYPE,
I64_TYPE };
uint32 data_lengths[] = { 1, 2, 4, 8 };
@ -133,13 +141,16 @@ aot_compile_simd_load_splat(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVM_CONST(i32x4_zero),
LLVM_CONST(i32x2_zero),
};
bool enable_segue = comp_ctx->enable_segue_v128_load;
bh_assert(opcode_index < 4);
if (!(element = simd_load(comp_ctx, func_ctx, align, offset,
data_lengths[opcode_index],
element_ptr_types[opcode_index],
element_data_types[opcode_index]))) {
if (!(element = simd_load(
comp_ctx, func_ctx, align, offset, data_lengths[opcode_index],
comp_ctx->enable_segue_v128_load
? element_ptr_types_gs[opcode_index]
: element_ptr_types[opcode_index],
element_data_types[opcode_index], enable_segue))) {
return false;
}
@ -170,11 +181,15 @@ aot_compile_simd_load_lane(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 data_lengths[] = { 1, 2, 4, 8 };
LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE,
INT32_PTR_TYPE, INT64_PTR_TYPE };
LLVMTypeRef element_ptr_types_gs[] = { INT8_PTR_TYPE_GS, INT16_PTR_TYPE_GS,
INT32_PTR_TYPE_GS,
INT64_PTR_TYPE_GS };
LLVMTypeRef element_data_types[] = { INT8_TYPE, INT16_TYPE, I32_TYPE,
I64_TYPE };
LLVMTypeRef vector_types[] = { V128_i8x16_TYPE, V128_i16x8_TYPE,
V128_i32x4_TYPE, V128_i64x2_TYPE };
LLVMValueRef lane = simd_lane_id_to_llvm_value(comp_ctx, lane_id);
bool enable_segue = comp_ctx->enable_segue_v128_load;
bh_assert(opcode_index < 4);
@ -183,10 +198,12 @@ aot_compile_simd_load_lane(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
return false;
}
if (!(element = simd_load(comp_ctx, func_ctx, align, offset,
data_lengths[opcode_index],
element_ptr_types[opcode_index],
element_data_types[opcode_index]))) {
if (!(element = simd_load(
comp_ctx, func_ctx, align, offset, data_lengths[opcode_index],
comp_ctx->enable_segue_v128_load
? element_ptr_types_gs[opcode_index]
: element_ptr_types[opcode_index],
element_data_types[opcode_index], enable_segue))) {
return false;
}
@ -207,6 +224,8 @@ aot_compile_simd_load_zero(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 opcode_index = opcode - SIMD_v128_load32_zero;
uint32 data_lengths[] = { 4, 8 };
LLVMTypeRef element_ptr_types[] = { INT32_PTR_TYPE, INT64_PTR_TYPE };
LLVMTypeRef element_ptr_types_gs[] = { INT32_PTR_TYPE_GS,
INT64_PTR_TYPE_GS };
LLVMTypeRef element_data_types[] = { I32_TYPE, I64_TYPE };
LLVMValueRef zero[] = {
LLVM_CONST(i32x4_vec_zero),
@ -222,13 +241,16 @@ aot_compile_simd_load_zero(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVM_CONST(i32_six) },
{ LLVM_CONST(i32_zero), LLVM_CONST(i32_two) },
};
bool enable_segue = comp_ctx->enable_segue_v128_load;
bh_assert(opcode_index < 2);
if (!(element = simd_load(comp_ctx, func_ctx, align, offset,
data_lengths[opcode_index],
element_ptr_types[opcode_index],
element_data_types[opcode_index]))) {
if (!(element = simd_load(
comp_ctx, func_ctx, align, offset, data_lengths[opcode_index],
comp_ctx->enable_segue_v128_load
? element_ptr_types_gs[opcode_index]
: element_ptr_types[opcode_index],
element_data_types[opcode_index], enable_segue))) {
return false;
}
@ -260,12 +282,12 @@ aot_compile_simd_load_zero(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
static bool
simd_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align,
uint32 offset, uint32 data_length, LLVMValueRef value,
LLVMTypeRef value_ptr_type)
LLVMTypeRef value_ptr_type, bool enable_segue)
{
LLVMValueRef maddr, result;
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset,
data_length)))
data_length, enable_segue)))
return false;
if (!(maddr = LLVMBuildBitCast(comp_ctx->builder, maddr, value_ptr_type,
@ -288,12 +310,14 @@ bool
aot_compile_simd_v128_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 align, uint32 offset)
{
bool enable_segue = comp_ctx->enable_segue_v128_store;
LLVMTypeRef v128_ptr_type = enable_segue ? V128_PTR_TYPE_GS : V128_PTR_TYPE;
LLVMValueRef value;
POP_V128(value);
return simd_store(comp_ctx, func_ctx, align, offset, 16, value,
V128_PTR_TYPE);
v128_ptr_type, enable_segue);
fail:
return false;
}
@ -307,10 +331,14 @@ aot_compile_simd_store_lane(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 data_lengths[] = { 1, 2, 4, 8 };
LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE,
INT32_PTR_TYPE, INT64_PTR_TYPE };
LLVMTypeRef element_ptr_types_gs[] = { INT8_PTR_TYPE_GS, INT16_PTR_TYPE_GS,
INT32_PTR_TYPE_GS,
INT64_PTR_TYPE_GS };
uint32 opcode_index = opcode - SIMD_v128_store8_lane;
LLVMTypeRef vector_types[] = { V128_i8x16_TYPE, V128_i16x8_TYPE,
V128_i32x4_TYPE, V128_i64x2_TYPE };
LLVMValueRef lane = simd_lane_id_to_llvm_value(comp_ctx, lane_id);
bool enable_segue = comp_ctx->enable_segue_v128_store;
bh_assert(opcode_index < 4);
@ -327,5 +355,7 @@ aot_compile_simd_store_lane(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
return simd_store(comp_ctx, func_ctx, align, offset,
data_lengths[opcode_index], element,
element_ptr_types[opcode_index]);
enable_segue ? element_ptr_types_gs[opcode_index]
: element_ptr_types[opcode_index],
enable_segue);
}

View File

@ -61,6 +61,7 @@ typedef struct AOTCompOption {
uint32_t output_format;
uint32_t bounds_checks;
uint32_t stack_bounds_checks;
uint32_t segue_flags;
char **custom_sections;
uint32_t custom_sections_count;
const char *stack_usage_file;

View File

@ -167,6 +167,8 @@ typedef struct RuntimeInitArgs {
/* LLVM JIT opt and size level */
uint32_t llvm_jit_opt_level;
uint32_t llvm_jit_size_level;
/* Segue optimization flags for LLVM JIT */
uint32_t segue_flags;
} RuntimeInitArgs;
#ifndef WASM_VALKIND_T_DEFINED
@ -1351,20 +1353,21 @@ WASM_RUNTIME_API_EXTERN void
wasm_runtime_get_version(uint32_t *major, uint32_t *minor, uint32_t *patch);
/**
* Check whether an import func `(import <module_name> <func_name> (func ...))` is linked or not
* with runtime registered natvie functions
* Check whether an import func `(import <module_name> <func_name> (func ...))`
* is linked or not with runtime registered natvie functions
*/
WASM_RUNTIME_API_EXTERN bool
wasm_runtime_is_import_func_linked(const char *module_name,
const char *func_name);
/**
* Check whether an import global `(import <module_name> <global_name> (global ...))` is linked or not
* with runtime registered natvie globals
* Check whether an import global `(import <module_name> <global_name> (global ...))`
* is linked or not with runtime registered natvie globals
*/
WASM_RUNTIME_API_EXTERN bool
wasm_runtime_is_import_global_linked(const char *module_name,
const char *global_name);
/* clang-format on */
#ifdef __cplusplus

View File

@ -4231,6 +4231,15 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
wasm_exec_env_set_cur_frame(exec_env, frame);
#if defined(os_writegsbase)
{
WASMMemoryInstance *memory_inst = wasm_get_default_memory(module_inst);
if (memory_inst)
/* write base addr of linear memory to GS segment register */
os_writegsbase(memory_inst->memory_data);
}
#endif
if (function->is_import_func) {
#if WASM_ENABLE_MULTI_MODULE != 0
if (function->import_module_inst) {

View File

@ -3979,6 +3979,15 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
wasm_exec_env_set_cur_frame(exec_env, frame);
#if defined(os_writegsbase)
{
WASMMemoryInstance *memory_inst = wasm_get_default_memory(module_inst);
if (memory_inst)
/* write base addr of linear memory to GS segment register */
os_writegsbase(memory_inst->memory_data);
}
#endif
if (function->is_import_func) {
#if WASM_ENABLE_MULTI_MODULE != 0
if (function->import_module_inst) {

View File

@ -3000,7 +3000,7 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
if (module->function_count == 0)
return true;
#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_LLVM_JIT != 0
#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_LAZY_JIT != 0
if (os_mutex_init(&module->tierup_wait_lock) != 0) {
set_error_buf(error_buf, error_buf_size, "init jit tierup lock failed");
return false;
@ -3035,6 +3035,7 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
llvm_jit_options = wasm_runtime_get_llvm_jit_options();
option.opt_level = llvm_jit_options.opt_level;
option.size_level = llvm_jit_options.size_level;
option.segue_flags = llvm_jit_options.segue_flags;
#if WASM_ENABLE_BULK_MEMORY != 0
option.enable_bulk_memory = true;

View File

@ -1843,7 +1843,7 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
if (module->function_count == 0)
return true;
#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_LLVM_JIT != 0
#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_LAZY_JIT != 0
if (os_mutex_init(&module->tierup_wait_lock) != 0) {
set_error_buf(error_buf, error_buf_size, "init jit tierup lock failed");
return false;
@ -1876,6 +1876,7 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
option.is_jit_mode = true;
option.opt_level = llvm_jit_options.opt_level;
option.size_level = llvm_jit_options.size_level;
option.segue_flags = llvm_jit_options.segue_flags;
#if WASM_ENABLE_BULK_MEMORY != 0
option.enable_bulk_memory = true;

View File

@ -56,6 +56,20 @@ typedef unsigned int korp_sem;
#define OS_THREAD_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
#endif
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
#define os_writegsbase(base_addr) \
do { \
uint64 __gs_value = (uint64)(uintptr_t)base_addr; \
asm volatile("wrgsbase %0" ::"r"(__gs_value) : "memory"); \
} while (0)
#if 0
/* _writegsbase_u64 also works, but need to add -mfsgsbase flag for gcc */
#include <immintrin.h>
#define os_writegsbase(base_addr) \
_writegsbase_u64(((uint64)(uintptr_t)base_addr))
#endif
#endif
typedef int (*os_print_function_t)(const char *message);
void
os_set_print_function(os_print_function_t pf);

View File

@ -63,6 +63,20 @@ typedef sem_t korp_sem;
#define bh_socket_t int
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
#define os_writegsbase(base_addr) \
do { \
uint64 __gs_value = (uint64)(uintptr_t)base_addr; \
asm volatile("wrgsbase %0" ::"r"(__gs_value) : "memory"); \
} while (0)
#if 0
/* _writegsbase_u64 also works, but need to add -mfsgsbase flag for gcc */
#include <immintrin.h>
#define os_writegsbase(base_addr) \
_writegsbase_u64(((uint64)(uintptr_t)base_addr))
#endif
#endif
#if WASM_DISABLE_HW_BOUND_CHECK == 0
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \
|| defined(BUILD_TARGET_AARCH64) || defined(BUILD_TARGET_RISCV64_LP64D) \