From 76be848ec34ebd3285fa5840a1db1e81c9e0f941 Mon Sep 17 00:00:00 2001 From: Wenyong Huang Date: Fri, 26 May 2023 10:13:33 +0800 Subject: [PATCH] Implement the segue optimization for LLVM AOT/JIT (#2230) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Segue is an optimization technology which uses x86 segment register to store the WebAssembly linear memory base address, so as to remove most of the cost of SFI (Software-based Fault Isolation) base addition and free up a general purpose register, by this way it may: - Improve the performance of JIT/AOT - Reduce the footprint of JIT/AOT, the JIT/AOT code generated is smaller - Reduce the compilation time of JIT/AOT This PR uses the x86-64 GS segment register to apply the optimization, currently it supports linux and linux-sgx platforms on x86-64 target. By default it is disabled, developer can use the option below to enable it for wamrc and iwasm(with LLVM JIT enabled): ```bash wamrc --enable-segue=[] -o output_file wasm_file iwasm --enable-segue=[] wasm_file [args...] ``` `flags` can be:     i32.load, i64.load, f32.load, f64.load, v128.load,     i32.store, i64.store, f32.store, f64.store, v128.store Use comma to separate them, e.g. `--enable-segue=i32.load,i64.store`, and `--enable-segue` means all flags are added. Acknowledgement: Many thanks to Intel Labs, UC San Diego and UT Austin teams for introducing this technology and the great support and guidance! Signed-off-by: Wenyong Huang Co-authored-by: Vahldiek-oberwagner, Anjo Lucas --- ATTRIBUTIONS.md | 10 + core/iwasm/aot/aot_loader.c | 10 + core/iwasm/aot/aot_runtime.c | 18 + core/iwasm/common/wasm_memory.c | 7 +- core/iwasm/common/wasm_runtime_common.c | 3 +- core/iwasm/common/wasm_runtime_common.h | 1 + core/iwasm/compilation/aot_compiler.h | 8 + core/iwasm/compilation/aot_emit_memory.c | 233 +++++++-- core/iwasm/compilation/aot_emit_memory.h | 2 +- core/iwasm/compilation/aot_llvm.c | 53 ++ core/iwasm/compilation/aot_llvm.h | 22 + core/iwasm/compilation/simd/simd_load_store.c | 74 ++- core/iwasm/include/aot_export.h | 1 + core/iwasm/include/wasm_export.h | 11 +- core/iwasm/interpreter/wasm_interp_classic.c | 9 + core/iwasm/interpreter/wasm_interp_fast.c | 9 + core/iwasm/interpreter/wasm_loader.c | 3 +- core/iwasm/interpreter/wasm_mini_loader.c | 3 +- .../platform/linux-sgx/platform_internal.h | 14 + .../shared/platform/linux/platform_internal.h | 14 + product-mini/platforms/posix/main.c | 82 ++- tests/benchmarks/coremark/build.sh | 7 + tests/benchmarks/coremark/run.sh | 13 +- tests/benchmarks/dhrystone/LICENSE | 7 + tests/benchmarks/dhrystone/build.sh | 24 + tests/benchmarks/dhrystone/include/dhry.h | 306 +++++++++++ tests/benchmarks/dhrystone/run.sh | 19 + tests/benchmarks/dhrystone/src/dhry_1.c | 485 ++++++++++++++++++ tests/benchmarks/dhrystone/src/dhry_2.c | 187 +++++++ tests/benchmarks/jetstream/build.sh | 126 ++++- tests/benchmarks/jetstream/jetstream.patch | 15 +- tests/benchmarks/jetstream/run_aot.sh | 18 +- tests/benchmarks/jetstream/tsf.patch | 24 + tests/benchmarks/libsodium/build.sh | 11 +- .../libsodium/{test_aot.sh => run_aot.sh} | 45 +- tests/benchmarks/polybench/build.sh | 8 + tests/benchmarks/polybench/run_aot.sh | 12 +- tests/benchmarks/polybench/run_interp.sh | 2 +- tests/benchmarks/sightglass/build.sh | 7 +- tests/benchmarks/sightglass/run_aot.sh | 12 +- tests/benchmarks/sightglass/run_interp.sh | 4 +- wamr-compiler/main.c | 68 ++- 42 files changed, 1864 insertions(+), 123 deletions(-) create mode 100644 tests/benchmarks/dhrystone/LICENSE create mode 100755 tests/benchmarks/dhrystone/build.sh create mode 100644 tests/benchmarks/dhrystone/include/dhry.h create mode 100755 tests/benchmarks/dhrystone/run.sh create mode 100644 tests/benchmarks/dhrystone/src/dhry_1.c create mode 100644 tests/benchmarks/dhrystone/src/dhry_2.c create mode 100644 tests/benchmarks/jetstream/tsf.patch rename tests/benchmarks/libsodium/{test_aot.sh => run_aot.sh} (50%) diff --git a/ATTRIBUTIONS.md b/ATTRIBUTIONS.md index 0cf62f49..60b6bb1b 100644 --- a/ATTRIBUTIONS.md +++ b/ATTRIBUTIONS.md @@ -16,6 +16,7 @@ WAMR project reused some components from other open source project: - **asmjit**: for the Fast JIT x86-64 codegen implementation - **zydis**: for the Fast JIT x86-64 codegen implementation - **NuttX ELF headers**: used in core/iwasm/aot/debug/elf_parser.c +- **Dhrystone**: for the test benchmakr dhrystone The WAMR fast interpreter is a clean room development. We would acknowledge the inspirations by [WASM3](https://github.com/wasm3/wasm3) open source project for the approach of pre-calculated oprand stack location. @@ -35,6 +36,7 @@ The WAMR fast interpreter is a clean room development. We would acknowledge the | asmjit | unspecified | unspecified | https://github.com/asmjit/asmjit | | | zydis | unspecified | e14a07895136182a5b53e181eec3b1c6e0b434de | https://github.com/zyantific/zydis | | | NuttX ELF headers | 72313301e23f9c2de969fb64b9a0f67bb4c284df | 10.3.0 | https://github.com/apache/incubator-nuttx | | +| Dhrystone | 2.1 | 2.1 | https://fossies.org/linux/privat/old/ | | ## Licenses @@ -81,15 +83,19 @@ The WAMR fast interpreter is a clean room development. We would acknowledge the [LICENSE](./tests/wamr-test-suites/spec-test-script/LICENSE) ### libuv + [LICENSE](./core/iwasm/libraries/libc-uvwasi/LICENSE_LIBUV) ### uvwasi + [LICENSE](./core/iwasm/libraries/libc-uvwasi/LICENSE_UVWASI) ### asmjit + [LICENSE](./core/iwasm/fast-jit/cg/LICENSE_ASMJIT) ### zydis + [LICENSE](./core/iwasm/fast-jit/cg/LICENSE_ZYDIS) ### NuttX ELF headers @@ -97,3 +103,7 @@ The WAMR fast interpreter is a clean room development. We would acknowledge the [LICENSE](./core/iwasm/aot/debug/LICENSE_NUTTX) [NOTICE](./core/iwasm/aot/debug/NOTICE_NUTTX) + +### Dhrystone + +[LICENSE](./tests/benchmarks/dhrystone/LICENSE) diff --git a/core/iwasm/aot/aot_loader.c b/core/iwasm/aot/aot_loader.c index 5345fb2d..db9eea42 100644 --- a/core/iwasm/aot/aot_loader.c +++ b/core/iwasm/aot/aot_loader.c @@ -2889,6 +2889,16 @@ load(const uint8 *buf, uint32 size, AOTModule *module, char *error_buf, module->code and will be destroyed in aot_unload() */ destroy_sections(section_list, false); } + +#if 0 + { + uint32 i; + for (i = 0; i < module->func_count; i++) { + os_printf("AOT func %u, addr: %p\n", i, module->func_ptrs[i]); + } + } +#endif + return ret; fail: return false; diff --git a/core/iwasm/aot/aot_runtime.c b/core/iwasm/aot/aot_runtime.c index b5c406b9..2a3280ef 100644 --- a/core/iwasm/aot/aot_runtime.c +++ b/core/iwasm/aot/aot_runtime.c @@ -1015,6 +1015,15 @@ execute_post_instantiate_functions(AOTModuleInstance *module_inst, } } +#if defined(os_writegsbase) + { + AOTMemoryInstance *memory_inst = aot_get_default_memory(module_inst); + if (memory_inst) + /* write base addr of linear memory to GS segment register */ + os_writegsbase(memory_inst->memory_data); + } +#endif + /* Execute start function for both main insance and sub instance */ if (module->start_function) { AOTFunctionInstance start_func = { 0 }; @@ -1453,6 +1462,15 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function, } argc = func_type->param_cell_num; +#if defined(os_writegsbase) + { + AOTMemoryInstance *memory_inst = aot_get_default_memory(module_inst); + if (memory_inst) + /* write base addr of linear memory to GS segment register */ + os_writegsbase(memory_inst->memory_data); + } +#endif + /* func pointer was looked up previously */ bh_assert(function->u.func.func_ptr != NULL); diff --git a/core/iwasm/common/wasm_memory.c b/core/iwasm/common/wasm_memory.c index 82676ae2..310dab6d 100644 --- a/core/iwasm/common/wasm_memory.c +++ b/core/iwasm/common/wasm_memory.c @@ -624,6 +624,11 @@ wasm_enlarge_memory_internal(WASMModuleInstance *module, uint32 inc_page_count) #endif #endif +#if defined(os_writegsbase) + /* write base addr of linear memory to GS segment register */ + os_writegsbase(memory_data_new); +#endif + return ret; } #else @@ -756,4 +761,4 @@ wasm_get_linear_memory_size(WASMMemoryInstance *memory, void *node) #endif return linear_mem_size; } -#endif \ No newline at end of file +#endif diff --git a/core/iwasm/common/wasm_runtime_common.c b/core/iwasm/common/wasm_runtime_common.c index 252e12b0..35bb9bce 100644 --- a/core/iwasm/common/wasm_runtime_common.c +++ b/core/iwasm/common/wasm_runtime_common.c @@ -130,7 +130,7 @@ static JitCompOptions jit_options = { 0 }; #endif #if WASM_ENABLE_JIT != 0 -static LLVMJITOptions llvm_jit_options = { 3, 3 }; +static LLVMJITOptions llvm_jit_options = { 3, 3, 0 }; #endif static RunningMode runtime_running_mode = Mode_Default; @@ -554,6 +554,7 @@ wasm_runtime_full_init(RuntimeInitArgs *init_args) #if WASM_ENABLE_JIT != 0 llvm_jit_options.size_level = init_args->llvm_jit_size_level; llvm_jit_options.opt_level = init_args->llvm_jit_opt_level; + llvm_jit_options.segue_flags = init_args->segue_flags; #endif if (!wasm_runtime_env_init()) { diff --git a/core/iwasm/common/wasm_runtime_common.h b/core/iwasm/common/wasm_runtime_common.h index 00d5ba23..283d2ed5 100644 --- a/core/iwasm/common/wasm_runtime_common.h +++ b/core/iwasm/common/wasm_runtime_common.h @@ -420,6 +420,7 @@ typedef struct wasm_frame_t { typedef struct LLVMJITOptions { uint32 opt_level; uint32 size_level; + uint32 segue_flags; } LLVMJITOptions; #endif diff --git a/core/iwasm/compilation/aot_compiler.h b/core/iwasm/compilation/aot_compiler.h index e6031ab8..40d79cf8 100644 --- a/core/iwasm/compilation/aot_compiler.h +++ b/core/iwasm/compilation/aot_compiler.h @@ -239,6 +239,13 @@ check_type_compatible(uint8 src_type, uint8 dst_type) #define FUNC_REF_TYPE comp_ctx->basic_types.funcref_type #define EXTERN_REF_TYPE comp_ctx->basic_types.externref_type +#define INT8_PTR_TYPE_GS comp_ctx->basic_types.int8_ptr_type_gs +#define INT16_PTR_TYPE_GS comp_ctx->basic_types.int16_ptr_type_gs +#define INT32_PTR_TYPE_GS comp_ctx->basic_types.int32_ptr_type_gs +#define INT64_PTR_TYPE_GS comp_ctx->basic_types.int64_ptr_type_gs +#define F32_PTR_TYPE_GS comp_ctx->basic_types.float32_ptr_type_gs +#define F64_PTR_TYPE_GS comp_ctx->basic_types.float64_ptr_type_gs + #define I32_CONST(v) LLVMConstInt(I32_TYPE, v, true) #define I64_CONST(v) LLVMConstInt(I64_TYPE, v, true) #define F32_CONST(v) LLVMConstReal(F32_TYPE, v) @@ -272,6 +279,7 @@ check_type_compatible(uint8 src_type, uint8 dst_type) #define V128_TYPE comp_ctx->basic_types.v128_type #define V128_PTR_TYPE comp_ctx->basic_types.v128_ptr_type +#define V128_PTR_TYPE_GS comp_ctx->basic_types.v128_ptr_type_gs #define V128_i8x16_TYPE comp_ctx->basic_types.i8x16_vec_type #define V128_i16x8_TYPE comp_ctx->basic_types.i16x8_vec_type #define V128_i32x4_TYPE comp_ctx->basic_types.i32x4_vec_type diff --git a/core/iwasm/compilation/aot_emit_memory.c b/core/iwasm/compilation/aot_emit_memory.c index 4da4cc80..c11989eb 100644 --- a/core/iwasm/compilation/aot_emit_memory.c +++ b/core/iwasm/compilation/aot_emit_memory.c @@ -81,7 +81,7 @@ get_memory_curr_page_count(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); LLVMValueRef aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - uint32 offset, uint32 bytes) + uint32 offset, uint32 bytes, bool enable_segue) { LLVMValueRef offset_const = I32_CONST(offset); LLVMValueRef addr, maddr, offset1, cmp1, cmp2, cmp; @@ -162,11 +162,20 @@ aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, /* inside memory space */ offset1 = I32_CONST((uint32)mem_offset); CHECK_LLVM_CONST(offset1); - if (!(maddr = LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, - mem_base_addr, &offset1, 1, - "maddr"))) { - aot_set_last_error("llvm build add failed."); - goto fail; + if (!enable_segue) { + if (!(maddr = LLVMBuildInBoundsGEP2(comp_ctx->builder, + INT8_TYPE, mem_base_addr, + &offset1, 1, "maddr"))) { + aot_set_last_error("llvm build add failed."); + goto fail; + } + } + else { + if (!(maddr = LLVMBuildIntToPtr(comp_ctx->builder, offset1, + INT8_PTR_TYPE_GS, "maddr"))) { + aot_set_last_error("llvm build IntToPtr failed."); + goto fail; + } } return maddr; } @@ -244,11 +253,29 @@ aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, } } - /* maddr = mem_base_addr + offset1 */ - if (!(maddr = LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, + if (!enable_segue) { + /* maddr = mem_base_addr + offset1 */ + if (!(maddr = + LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, mem_base_addr, &offset1, 1, "maddr"))) { - aot_set_last_error("llvm build add failed."); - goto fail; + aot_set_last_error("llvm build add failed."); + goto fail; + } + } + else { + LLVMValueRef maddr_base; + + if (!(maddr_base = LLVMBuildIntToPtr(comp_ctx->builder, addr, + INT8_PTR_TYPE_GS, "maddr_base"))) { + aot_set_last_error("llvm build int to ptr failed."); + goto fail; + } + if (!(maddr = LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, + maddr_base, &offset_const, 1, + "maddr"))) { + aot_set_last_error("llvm build inboundgep failed."); + goto fail; + } } return maddr; fail: @@ -388,13 +415,18 @@ aot_compile_op_i32_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, { LLVMValueRef maddr, value = NULL; LLVMTypeRef data_type; + bool enable_segue = comp_ctx->enable_segue_i32_load; - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes, + enable_segue))) return false; switch (bytes) { case 4: - BUILD_PTR_CAST(INT32_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT32_PTR_TYPE); + else + BUILD_PTR_CAST(INT32_PTR_TYPE_GS); #if WASM_ENABLE_SHARED_MEMORY != 0 if (atomic) BUILD_ATOMIC_LOAD(align, I32_TYPE); @@ -405,11 +437,17 @@ aot_compile_op_i32_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, case 2: case 1: if (bytes == 2) { - BUILD_PTR_CAST(INT16_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT16_PTR_TYPE); + else + BUILD_PTR_CAST(INT16_PTR_TYPE_GS); data_type = INT16_TYPE; } else { - BUILD_PTR_CAST(INT8_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT8_PTR_TYPE); + else + BUILD_PTR_CAST(INT8_PTR_TYPE_GS); data_type = INT8_TYPE; } @@ -447,13 +485,18 @@ aot_compile_op_i64_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, { LLVMValueRef maddr, value = NULL; LLVMTypeRef data_type; + bool enable_segue = comp_ctx->enable_segue_i64_load; - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes, + enable_segue))) return false; switch (bytes) { case 8: - BUILD_PTR_CAST(INT64_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT64_PTR_TYPE); + else + BUILD_PTR_CAST(INT64_PTR_TYPE_GS); #if WASM_ENABLE_SHARED_MEMORY != 0 if (atomic) BUILD_ATOMIC_LOAD(align, I64_TYPE); @@ -465,15 +508,24 @@ aot_compile_op_i64_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, case 2: case 1: if (bytes == 4) { - BUILD_PTR_CAST(INT32_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT32_PTR_TYPE); + else + BUILD_PTR_CAST(INT32_PTR_TYPE_GS); data_type = I32_TYPE; } else if (bytes == 2) { - BUILD_PTR_CAST(INT16_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT16_PTR_TYPE); + else + BUILD_PTR_CAST(INT16_PTR_TYPE_GS); data_type = INT16_TYPE; } else { - BUILD_PTR_CAST(INT8_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT8_PTR_TYPE); + else + BUILD_PTR_CAST(INT8_PTR_TYPE_GS); data_type = INT8_TYPE; } @@ -509,12 +561,18 @@ aot_compile_op_f32_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset) { LLVMValueRef maddr, value; + bool enable_segue = comp_ctx->enable_segue_f32_load; - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4, + enable_segue))) return false; - BUILD_PTR_CAST(F32_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(F32_PTR_TYPE); + else + BUILD_PTR_CAST(F32_PTR_TYPE_GS); BUILD_LOAD(F32_TYPE); + PUSH_F32(value); return true; fail: @@ -526,12 +584,18 @@ aot_compile_op_f64_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset) { LLVMValueRef maddr, value; + bool enable_segue = comp_ctx->enable_segue_f64_load; - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8, + enable_segue))) return false; - BUILD_PTR_CAST(F64_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(F64_PTR_TYPE); + else + BUILD_PTR_CAST(F64_PTR_TYPE_GS); BUILD_LOAD(F64_TYPE); + PUSH_F64(value); return true; fail: @@ -543,22 +607,33 @@ aot_compile_op_i32_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset, uint32 bytes, bool atomic) { LLVMValueRef maddr, value; + bool enable_segue = comp_ctx->enable_segue_i32_store; POP_I32(value); - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes, + enable_segue))) return false; switch (bytes) { case 4: - BUILD_PTR_CAST(INT32_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT32_PTR_TYPE); + else + BUILD_PTR_CAST(INT32_PTR_TYPE_GS); break; case 2: - BUILD_PTR_CAST(INT16_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT16_PTR_TYPE); + else + BUILD_PTR_CAST(INT16_PTR_TYPE_GS); BUILD_TRUNC(value, INT16_TYPE); break; case 1: - BUILD_PTR_CAST(INT8_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT8_PTR_TYPE); + else + BUILD_PTR_CAST(INT8_PTR_TYPE_GS); BUILD_TRUNC(value, INT8_TYPE); break; default: @@ -582,26 +657,40 @@ aot_compile_op_i64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset, uint32 bytes, bool atomic) { LLVMValueRef maddr, value; + bool enable_segue = comp_ctx->enable_segue_i64_store; POP_I64(value); - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes, + enable_segue))) return false; switch (bytes) { case 8: - BUILD_PTR_CAST(INT64_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT64_PTR_TYPE); + else + BUILD_PTR_CAST(INT64_PTR_TYPE_GS); break; case 4: - BUILD_PTR_CAST(INT32_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT32_PTR_TYPE); + else + BUILD_PTR_CAST(INT32_PTR_TYPE_GS); BUILD_TRUNC(value, I32_TYPE); break; case 2: - BUILD_PTR_CAST(INT16_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT16_PTR_TYPE); + else + BUILD_PTR_CAST(INT16_PTR_TYPE_GS); BUILD_TRUNC(value, INT16_TYPE); break; case 1: - BUILD_PTR_CAST(INT8_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT8_PTR_TYPE); + else + BUILD_PTR_CAST(INT8_PTR_TYPE_GS); BUILD_TRUNC(value, INT8_TYPE); break; default: @@ -625,13 +714,18 @@ aot_compile_op_f32_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset) { LLVMValueRef maddr, value; + bool enable_segue = comp_ctx->enable_segue_f32_store; POP_F32(value); - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4, + enable_segue))) return false; - BUILD_PTR_CAST(F32_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(F32_PTR_TYPE); + else + BUILD_PTR_CAST(F32_PTR_TYPE_GS); BUILD_STORE(); return true; fail: @@ -643,13 +737,18 @@ aot_compile_op_f64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset) { LLVMValueRef maddr, value; + bool enable_segue = comp_ctx->enable_segue_f64_store; POP_F64(value); - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8, + enable_segue))) return false; - BUILD_PTR_CAST(F64_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(F64_PTR_TYPE); + else + BUILD_PTR_CAST(F64_PTR_TYPE_GS); BUILD_STORE(); return true; fail: @@ -1140,13 +1239,19 @@ aot_compile_op_atomic_rmw(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 offset, uint32 bytes) { LLVMValueRef maddr, value, result; + bool enable_segue = (op_type == VALUE_TYPE_I32) + ? comp_ctx->enable_segue_i32_load + && comp_ctx->enable_segue_i32_store + : comp_ctx->enable_segue_i64_load + && comp_ctx->enable_segue_i64_store; if (op_type == VALUE_TYPE_I32) POP_I32(value); else POP_I64(value); - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes, + enable_segue))) return false; if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align)) @@ -1154,19 +1259,31 @@ aot_compile_op_atomic_rmw(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, switch (bytes) { case 8: - BUILD_PTR_CAST(INT64_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT64_PTR_TYPE); + else + BUILD_PTR_CAST(INT64_PTR_TYPE_GS); break; case 4: - BUILD_PTR_CAST(INT32_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT32_PTR_TYPE); + else + BUILD_PTR_CAST(INT32_PTR_TYPE_GS); if (op_type == VALUE_TYPE_I64) BUILD_TRUNC(value, I32_TYPE); break; case 2: - BUILD_PTR_CAST(INT16_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT16_PTR_TYPE); + else + BUILD_PTR_CAST(INT16_PTR_TYPE_GS); BUILD_TRUNC(value, INT16_TYPE); break; case 1: - BUILD_PTR_CAST(INT8_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT8_PTR_TYPE); + else + BUILD_PTR_CAST(INT8_PTR_TYPE_GS); BUILD_TRUNC(value, INT8_TYPE); break; default: @@ -1208,6 +1325,11 @@ aot_compile_op_atomic_cmpxchg(AOTCompContext *comp_ctx, uint32 align, uint32 offset, uint32 bytes) { LLVMValueRef maddr, value, expect, result; + bool enable_segue = (op_type == VALUE_TYPE_I32) + ? comp_ctx->enable_segue_i32_load + && comp_ctx->enable_segue_i32_store + : comp_ctx->enable_segue_i64_load + && comp_ctx->enable_segue_i64_store; if (op_type == VALUE_TYPE_I32) { POP_I32(value); @@ -1218,7 +1340,8 @@ aot_compile_op_atomic_cmpxchg(AOTCompContext *comp_ctx, POP_I64(expect); } - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes, + enable_segue))) return false; if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align)) @@ -1226,22 +1349,34 @@ aot_compile_op_atomic_cmpxchg(AOTCompContext *comp_ctx, switch (bytes) { case 8: - BUILD_PTR_CAST(INT64_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT64_PTR_TYPE); + else + BUILD_PTR_CAST(INT64_PTR_TYPE_GS); break; case 4: - BUILD_PTR_CAST(INT32_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT32_PTR_TYPE); + else + BUILD_PTR_CAST(INT32_PTR_TYPE_GS); if (op_type == VALUE_TYPE_I64) { BUILD_TRUNC(value, I32_TYPE); BUILD_TRUNC(expect, I32_TYPE); } break; case 2: - BUILD_PTR_CAST(INT16_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT16_PTR_TYPE); + else + BUILD_PTR_CAST(INT16_PTR_TYPE_GS); BUILD_TRUNC(value, INT16_TYPE); BUILD_TRUNC(expect, INT16_TYPE); break; case 1: - BUILD_PTR_CAST(INT8_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT8_PTR_TYPE); + else + BUILD_PTR_CAST(INT8_PTR_TYPE_GS); BUILD_TRUNC(value, INT8_TYPE); BUILD_TRUNC(expect, INT8_TYPE); break; @@ -1318,7 +1453,8 @@ aot_compile_op_atomic_wait(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, CHECK_LLVM_CONST(is_wait64); - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes, + false))) return false; if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align)) @@ -1393,7 +1529,8 @@ aot_compiler_op_atomic_notify(AOTCompContext *comp_ctx, POP_I32(count); - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes, + false))) return false; if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align)) diff --git a/core/iwasm/compilation/aot_emit_memory.h b/core/iwasm/compilation/aot_emit_memory.h index e49582e3..1c2db503 100644 --- a/core/iwasm/compilation/aot_emit_memory.h +++ b/core/iwasm/compilation/aot_emit_memory.h @@ -53,7 +53,7 @@ aot_compile_op_f64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, LLVMValueRef aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - uint32 offset, uint32 bytes); + uint32 offset, uint32 bytes, bool enable_segue); bool aot_compile_op_memory_size(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); diff --git a/core/iwasm/compilation/aot_llvm.c b/core/iwasm/compilation/aot_llvm.c index 8a55eaf4..e398affc 100644 --- a/core/iwasm/compilation/aot_llvm.c +++ b/core/iwasm/compilation/aot_llvm.c @@ -1132,6 +1132,28 @@ aot_set_llvm_basic_types(AOTLLVMTypes *basic_types, LLVMContextRef context) basic_types->v128_type = basic_types->i64x2_vec_type; basic_types->v128_ptr_type = LLVMPointerType(basic_types->v128_type, 0); + basic_types->int8_ptr_type_gs = + LLVMPointerType(basic_types->int8_type, 256); + basic_types->int16_ptr_type_gs = + LLVMPointerType(basic_types->int16_type, 256); + basic_types->int32_ptr_type_gs = + LLVMPointerType(basic_types->int32_type, 256); + basic_types->int64_ptr_type_gs = + LLVMPointerType(basic_types->int64_type, 256); + basic_types->float32_ptr_type_gs = + LLVMPointerType(basic_types->float32_type, 256); + basic_types->float64_ptr_type_gs = + LLVMPointerType(basic_types->float64_type, 256); + basic_types->v128_ptr_type_gs = + LLVMPointerType(basic_types->v128_type, 256); + if (!basic_types->int8_ptr_type_gs || !basic_types->int16_ptr_type_gs + || !basic_types->int32_ptr_type_gs || !basic_types->int64_ptr_type_gs + || !basic_types->float32_ptr_type_gs + || !basic_types->float64_ptr_type_gs + || !basic_types->v128_ptr_type_gs) { + return false; + } + basic_types->i1x2_vec_type = LLVMVectorType(basic_types->int1_type, 2); basic_types->funcref_type = LLVMInt32TypeInContext(context); @@ -2073,6 +2095,37 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option) } } + triple = LLVMGetTargetMachineTriple(comp_ctx->target_machine); + if (!triple) { + aot_set_last_error("get target machine triple failed."); + goto fail; + } + if (strstr(triple, "linux") && !strcmp(comp_ctx->target_arch, "x86_64")) { + if (option->segue_flags) { + if (option->segue_flags & (1 << 0)) + comp_ctx->enable_segue_i32_load = true; + if (option->segue_flags & (1 << 1)) + comp_ctx->enable_segue_i64_load = true; + if (option->segue_flags & (1 << 2)) + comp_ctx->enable_segue_f32_load = true; + if (option->segue_flags & (1 << 3)) + comp_ctx->enable_segue_f64_load = true; + if (option->segue_flags & (1 << 4)) + comp_ctx->enable_segue_v128_load = true; + if (option->segue_flags & (1 << 8)) + comp_ctx->enable_segue_i32_store = true; + if (option->segue_flags & (1 << 9)) + comp_ctx->enable_segue_i64_store = true; + if (option->segue_flags & (1 << 10)) + comp_ctx->enable_segue_f32_store = true; + if (option->segue_flags & (1 << 11)) + comp_ctx->enable_segue_f64_store = true; + if (option->segue_flags & (1 << 12)) + comp_ctx->enable_segue_v128_store = true; + } + } + LLVMDisposeMessage(triple); + if (option->enable_simd && strcmp(comp_ctx->target_arch, "x86_64") != 0 && strncmp(comp_ctx->target_arch, "aarch64", 7) != 0) { /* Disable simd if it isn't supported by target arch */ diff --git a/core/iwasm/compilation/aot_llvm.h b/core/iwasm/compilation/aot_llvm.h index 1c073213..8acaa80d 100644 --- a/core/iwasm/compilation/aot_llvm.h +++ b/core/iwasm/compilation/aot_llvm.h @@ -214,6 +214,14 @@ typedef struct AOTLLVMTypes { LLVMTypeRef f32x4_vec_type; LLVMTypeRef f64x2_vec_type; + LLVMTypeRef int8_ptr_type_gs; + LLVMTypeRef int16_ptr_type_gs; + LLVMTypeRef int32_ptr_type_gs; + LLVMTypeRef int64_ptr_type_gs; + LLVMTypeRef float32_ptr_type_gs; + LLVMTypeRef float64_ptr_type_gs; + LLVMTypeRef v128_ptr_type_gs; + LLVMTypeRef i1x2_vec_type; LLVMTypeRef meta_data_type; @@ -341,6 +349,19 @@ typedef struct AOTCompContext { /* Disable LLVM link time optimization */ bool disable_llvm_lto; + /* Enable to use segument register as the base addr + of linear memory for load/store operations */ + bool enable_segue_i32_load; + bool enable_segue_i64_load; + bool enable_segue_f32_load; + bool enable_segue_f64_load; + bool enable_segue_v128_load; + bool enable_segue_i32_store; + bool enable_segue_i64_store; + bool enable_segue_f32_store; + bool enable_segue_f64_store; + bool enable_segue_v128_store; + /* Whether optimize the JITed code */ bool optimize; @@ -413,6 +434,7 @@ typedef struct AOTCompOption { uint32 output_format; uint32 bounds_checks; uint32 stack_bounds_checks; + uint32 segue_flags; char **custom_sections; uint32 custom_sections_count; const char *stack_usage_file; diff --git a/core/iwasm/compilation/simd/simd_load_store.c b/core/iwasm/compilation/simd/simd_load_store.c index d166e954..0e869727 100644 --- a/core/iwasm/compilation/simd/simd_load_store.c +++ b/core/iwasm/compilation/simd/simd_load_store.c @@ -14,12 +14,12 @@ static LLVMValueRef simd_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset, uint32 data_length, LLVMTypeRef ptr_type, - LLVMTypeRef data_type) + LLVMTypeRef data_type, bool enable_segue) { LLVMValueRef maddr, data; if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, - data_length))) { + data_length, enable_segue))) { HANDLE_FAILURE("aot_check_memory_overflow"); return NULL; } @@ -44,10 +44,12 @@ bool aot_compile_simd_v128_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset) { + bool enable_segue = comp_ctx->enable_segue_v128_load; + LLVMTypeRef v128_ptr_type = enable_segue ? V128_PTR_TYPE_GS : V128_PTR_TYPE; LLVMValueRef result; if (!(result = simd_load(comp_ctx, func_ctx, align, offset, 16, - V128_PTR_TYPE, V128_TYPE))) { + v128_ptr_type, V128_TYPE, enable_segue))) { return false; } @@ -75,6 +77,7 @@ aot_compile_simd_load_extend(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, LLVMVectorType(I32_TYPE, 2), LLVMVectorType(I32_TYPE, 2), }; LLVMTypeRef sub_vector_type, sub_vector_ptr_type; + bool enable_segue = comp_ctx->enable_segue_v128_load; bh_assert(opcode_index < 6); @@ -82,13 +85,15 @@ aot_compile_simd_load_extend(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, /* to vector ptr type */ if (!sub_vector_type - || !(sub_vector_ptr_type = LLVMPointerType(sub_vector_type, 0))) { + || !(sub_vector_ptr_type = + LLVMPointerType(sub_vector_type, enable_segue ? 256 : 0))) { HANDLE_FAILURE("LLVMPointerType"); return false; } - if (!(sub_vector = simd_load(comp_ctx, func_ctx, align, offset, 8, - sub_vector_ptr_type, sub_vector_type))) { + if (!(sub_vector = + simd_load(comp_ctx, func_ctx, align, offset, 8, + sub_vector_ptr_type, sub_vector_type, enable_segue))) { return false; } @@ -118,6 +123,9 @@ aot_compile_simd_load_splat(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, LLVMValueRef element, result; LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE, INT32_PTR_TYPE, INT64_PTR_TYPE }; + LLVMTypeRef element_ptr_types_gs[] = { INT8_PTR_TYPE_GS, INT16_PTR_TYPE_GS, + INT32_PTR_TYPE_GS, + INT64_PTR_TYPE_GS }; LLVMTypeRef element_data_types[] = { INT8_TYPE, INT16_TYPE, I32_TYPE, I64_TYPE }; uint32 data_lengths[] = { 1, 2, 4, 8 }; @@ -133,13 +141,16 @@ aot_compile_simd_load_splat(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, LLVM_CONST(i32x4_zero), LLVM_CONST(i32x2_zero), }; + bool enable_segue = comp_ctx->enable_segue_v128_load; bh_assert(opcode_index < 4); - if (!(element = simd_load(comp_ctx, func_ctx, align, offset, - data_lengths[opcode_index], - element_ptr_types[opcode_index], - element_data_types[opcode_index]))) { + if (!(element = simd_load( + comp_ctx, func_ctx, align, offset, data_lengths[opcode_index], + comp_ctx->enable_segue_v128_load + ? element_ptr_types_gs[opcode_index] + : element_ptr_types[opcode_index], + element_data_types[opcode_index], enable_segue))) { return false; } @@ -170,11 +181,15 @@ aot_compile_simd_load_lane(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 data_lengths[] = { 1, 2, 4, 8 }; LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE, INT32_PTR_TYPE, INT64_PTR_TYPE }; + LLVMTypeRef element_ptr_types_gs[] = { INT8_PTR_TYPE_GS, INT16_PTR_TYPE_GS, + INT32_PTR_TYPE_GS, + INT64_PTR_TYPE_GS }; LLVMTypeRef element_data_types[] = { INT8_TYPE, INT16_TYPE, I32_TYPE, I64_TYPE }; LLVMTypeRef vector_types[] = { V128_i8x16_TYPE, V128_i16x8_TYPE, V128_i32x4_TYPE, V128_i64x2_TYPE }; LLVMValueRef lane = simd_lane_id_to_llvm_value(comp_ctx, lane_id); + bool enable_segue = comp_ctx->enable_segue_v128_load; bh_assert(opcode_index < 4); @@ -183,10 +198,12 @@ aot_compile_simd_load_lane(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, return false; } - if (!(element = simd_load(comp_ctx, func_ctx, align, offset, - data_lengths[opcode_index], - element_ptr_types[opcode_index], - element_data_types[opcode_index]))) { + if (!(element = simd_load( + comp_ctx, func_ctx, align, offset, data_lengths[opcode_index], + comp_ctx->enable_segue_v128_load + ? element_ptr_types_gs[opcode_index] + : element_ptr_types[opcode_index], + element_data_types[opcode_index], enable_segue))) { return false; } @@ -207,6 +224,8 @@ aot_compile_simd_load_zero(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 opcode_index = opcode - SIMD_v128_load32_zero; uint32 data_lengths[] = { 4, 8 }; LLVMTypeRef element_ptr_types[] = { INT32_PTR_TYPE, INT64_PTR_TYPE }; + LLVMTypeRef element_ptr_types_gs[] = { INT32_PTR_TYPE_GS, + INT64_PTR_TYPE_GS }; LLVMTypeRef element_data_types[] = { I32_TYPE, I64_TYPE }; LLVMValueRef zero[] = { LLVM_CONST(i32x4_vec_zero), @@ -222,13 +241,16 @@ aot_compile_simd_load_zero(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, LLVM_CONST(i32_six) }, { LLVM_CONST(i32_zero), LLVM_CONST(i32_two) }, }; + bool enable_segue = comp_ctx->enable_segue_v128_load; bh_assert(opcode_index < 2); - if (!(element = simd_load(comp_ctx, func_ctx, align, offset, - data_lengths[opcode_index], - element_ptr_types[opcode_index], - element_data_types[opcode_index]))) { + if (!(element = simd_load( + comp_ctx, func_ctx, align, offset, data_lengths[opcode_index], + comp_ctx->enable_segue_v128_load + ? element_ptr_types_gs[opcode_index] + : element_ptr_types[opcode_index], + element_data_types[opcode_index], enable_segue))) { return false; } @@ -260,12 +282,12 @@ aot_compile_simd_load_zero(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, static bool simd_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset, uint32 data_length, LLVMValueRef value, - LLVMTypeRef value_ptr_type) + LLVMTypeRef value_ptr_type, bool enable_segue) { LLVMValueRef maddr, result; if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, - data_length))) + data_length, enable_segue))) return false; if (!(maddr = LLVMBuildBitCast(comp_ctx->builder, maddr, value_ptr_type, @@ -288,12 +310,14 @@ bool aot_compile_simd_v128_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset) { + bool enable_segue = comp_ctx->enable_segue_v128_store; + LLVMTypeRef v128_ptr_type = enable_segue ? V128_PTR_TYPE_GS : V128_PTR_TYPE; LLVMValueRef value; POP_V128(value); return simd_store(comp_ctx, func_ctx, align, offset, 16, value, - V128_PTR_TYPE); + v128_ptr_type, enable_segue); fail: return false; } @@ -307,10 +331,14 @@ aot_compile_simd_store_lane(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 data_lengths[] = { 1, 2, 4, 8 }; LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE, INT32_PTR_TYPE, INT64_PTR_TYPE }; + LLVMTypeRef element_ptr_types_gs[] = { INT8_PTR_TYPE_GS, INT16_PTR_TYPE_GS, + INT32_PTR_TYPE_GS, + INT64_PTR_TYPE_GS }; uint32 opcode_index = opcode - SIMD_v128_store8_lane; LLVMTypeRef vector_types[] = { V128_i8x16_TYPE, V128_i16x8_TYPE, V128_i32x4_TYPE, V128_i64x2_TYPE }; LLVMValueRef lane = simd_lane_id_to_llvm_value(comp_ctx, lane_id); + bool enable_segue = comp_ctx->enable_segue_v128_store; bh_assert(opcode_index < 4); @@ -327,5 +355,7 @@ aot_compile_simd_store_lane(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, return simd_store(comp_ctx, func_ctx, align, offset, data_lengths[opcode_index], element, - element_ptr_types[opcode_index]); + enable_segue ? element_ptr_types_gs[opcode_index] + : element_ptr_types[opcode_index], + enable_segue); } diff --git a/core/iwasm/include/aot_export.h b/core/iwasm/include/aot_export.h index e58873bf..fef5356c 100644 --- a/core/iwasm/include/aot_export.h +++ b/core/iwasm/include/aot_export.h @@ -61,6 +61,7 @@ typedef struct AOTCompOption { uint32_t output_format; uint32_t bounds_checks; uint32_t stack_bounds_checks; + uint32_t segue_flags; char **custom_sections; uint32_t custom_sections_count; const char *stack_usage_file; diff --git a/core/iwasm/include/wasm_export.h b/core/iwasm/include/wasm_export.h index f6c0107b..6cdbd2ab 100644 --- a/core/iwasm/include/wasm_export.h +++ b/core/iwasm/include/wasm_export.h @@ -167,6 +167,8 @@ typedef struct RuntimeInitArgs { /* LLVM JIT opt and size level */ uint32_t llvm_jit_opt_level; uint32_t llvm_jit_size_level; + /* Segue optimization flags for LLVM JIT */ + uint32_t segue_flags; } RuntimeInitArgs; #ifndef WASM_VALKIND_T_DEFINED @@ -1351,20 +1353,21 @@ WASM_RUNTIME_API_EXTERN void wasm_runtime_get_version(uint32_t *major, uint32_t *minor, uint32_t *patch); /** - * Check whether an import func `(import (func ...))` is linked or not - * with runtime registered natvie functions + * Check whether an import func `(import (func ...))` + * is linked or not with runtime registered natvie functions */ WASM_RUNTIME_API_EXTERN bool wasm_runtime_is_import_func_linked(const char *module_name, const char *func_name); /** - * Check whether an import global `(import (global ...))` is linked or not - * with runtime registered natvie globals + * Check whether an import global `(import (global ...))` + * is linked or not with runtime registered natvie globals */ WASM_RUNTIME_API_EXTERN bool wasm_runtime_is_import_global_linked(const char *module_name, const char *global_name); + /* clang-format on */ #ifdef __cplusplus diff --git a/core/iwasm/interpreter/wasm_interp_classic.c b/core/iwasm/interpreter/wasm_interp_classic.c index 6ef2b71a..6defb046 100644 --- a/core/iwasm/interpreter/wasm_interp_classic.c +++ b/core/iwasm/interpreter/wasm_interp_classic.c @@ -4231,6 +4231,15 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env, wasm_exec_env_set_cur_frame(exec_env, frame); +#if defined(os_writegsbase) + { + WASMMemoryInstance *memory_inst = wasm_get_default_memory(module_inst); + if (memory_inst) + /* write base addr of linear memory to GS segment register */ + os_writegsbase(memory_inst->memory_data); + } +#endif + if (function->is_import_func) { #if WASM_ENABLE_MULTI_MODULE != 0 if (function->import_module_inst) { diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 6ddeaa9c..63d30028 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -3979,6 +3979,15 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env, wasm_exec_env_set_cur_frame(exec_env, frame); +#if defined(os_writegsbase) + { + WASMMemoryInstance *memory_inst = wasm_get_default_memory(module_inst); + if (memory_inst) + /* write base addr of linear memory to GS segment register */ + os_writegsbase(memory_inst->memory_data); + } +#endif + if (function->is_import_func) { #if WASM_ENABLE_MULTI_MODULE != 0 if (function->import_module_inst) { diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index a3c4f422..d9deee63 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -3000,7 +3000,7 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf, if (module->function_count == 0) return true; -#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_LLVM_JIT != 0 +#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_LAZY_JIT != 0 if (os_mutex_init(&module->tierup_wait_lock) != 0) { set_error_buf(error_buf, error_buf_size, "init jit tierup lock failed"); return false; @@ -3035,6 +3035,7 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf, llvm_jit_options = wasm_runtime_get_llvm_jit_options(); option.opt_level = llvm_jit_options.opt_level; option.size_level = llvm_jit_options.size_level; + option.segue_flags = llvm_jit_options.segue_flags; #if WASM_ENABLE_BULK_MEMORY != 0 option.enable_bulk_memory = true; diff --git a/core/iwasm/interpreter/wasm_mini_loader.c b/core/iwasm/interpreter/wasm_mini_loader.c index aa5e18f6..fc637a5a 100644 --- a/core/iwasm/interpreter/wasm_mini_loader.c +++ b/core/iwasm/interpreter/wasm_mini_loader.c @@ -1843,7 +1843,7 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf, if (module->function_count == 0) return true; -#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_LLVM_JIT != 0 +#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_LAZY_JIT != 0 if (os_mutex_init(&module->tierup_wait_lock) != 0) { set_error_buf(error_buf, error_buf_size, "init jit tierup lock failed"); return false; @@ -1876,6 +1876,7 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf, option.is_jit_mode = true; option.opt_level = llvm_jit_options.opt_level; option.size_level = llvm_jit_options.size_level; + option.segue_flags = llvm_jit_options.segue_flags; #if WASM_ENABLE_BULK_MEMORY != 0 option.enable_bulk_memory = true; diff --git a/core/shared/platform/linux-sgx/platform_internal.h b/core/shared/platform/linux-sgx/platform_internal.h index d18f015e..7fdbf22d 100644 --- a/core/shared/platform/linux-sgx/platform_internal.h +++ b/core/shared/platform/linux-sgx/platform_internal.h @@ -56,6 +56,20 @@ typedef unsigned int korp_sem; #define OS_THREAD_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER #endif +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) +#define os_writegsbase(base_addr) \ + do { \ + uint64 __gs_value = (uint64)(uintptr_t)base_addr; \ + asm volatile("wrgsbase %0" ::"r"(__gs_value) : "memory"); \ + } while (0) +#if 0 +/* _writegsbase_u64 also works, but need to add -mfsgsbase flag for gcc */ +#include +#define os_writegsbase(base_addr) \ + _writegsbase_u64(((uint64)(uintptr_t)base_addr)) +#endif +#endif + typedef int (*os_print_function_t)(const char *message); void os_set_print_function(os_print_function_t pf); diff --git a/core/shared/platform/linux/platform_internal.h b/core/shared/platform/linux/platform_internal.h index 0ac63cf5..334808e5 100644 --- a/core/shared/platform/linux/platform_internal.h +++ b/core/shared/platform/linux/platform_internal.h @@ -63,6 +63,20 @@ typedef sem_t korp_sem; #define bh_socket_t int +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) +#define os_writegsbase(base_addr) \ + do { \ + uint64 __gs_value = (uint64)(uintptr_t)base_addr; \ + asm volatile("wrgsbase %0" ::"r"(__gs_value) : "memory"); \ + } while (0) +#if 0 +/* _writegsbase_u64 also works, but need to add -mfsgsbase flag for gcc */ +#include +#define os_writegsbase(base_addr) \ + _writegsbase_u64(((uint64)(uintptr_t)base_addr)) +#endif +#endif + #if WASM_DISABLE_HW_BOUND_CHECK == 0 #if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \ || defined(BUILD_TARGET_AARCH64) || defined(BUILD_TARGET_RISCV64_LP64D) \ diff --git a/product-mini/platforms/posix/main.c b/product-mini/platforms/posix/main.c index 2e96ccdd..d8ea6222 100644 --- a/product-mini/platforms/posix/main.c +++ b/product-mini/platforms/posix/main.c @@ -54,6 +54,14 @@ print_help() #if WASM_ENABLE_JIT != 0 printf(" --llvm-jit-size-level=n Set LLVM JIT size level, default is 3\n"); printf(" --llvm-jit-opt-level=n Set LLVM JIT optimization level, default is 3\n"); +#if defined(os_writegsbase) + printf(" --enable-segue[=] Enable using segment register GS as the base address of\n"); + printf(" linear memory, which may improve performance, flags can be:\n"); + printf(" i32.load, i64.load, f32.load, f64.load, v128.load,\n"); + printf(" i32.store, i64.store, f32.store, f64.store, v128.store\n"); + printf(" Use comma to separate, e.g. --enable-segue=i32.load,i64.store\n"); + printf(" and --enable-segue means all flags are added.\n"); +#endif #endif printf(" --repl Start a very simple REPL (read-eval-print-loop) mode\n" " that runs commands in the form of \"FUNC ARG...\"\n"); @@ -117,13 +125,13 @@ app_instance_func(wasm_module_inst_t module_inst, const char *func_name) } /** - * Split a space separated strings into an array of strings + * Split a string into an array of strings * Returns NULL on failure * Memory must be freed by caller * Based on: http://stackoverflow.com/a/11198630/471795 */ static char ** -split_string(char *str, int *count) +split_string(char *str, int *count, const char *delimer) { char **res = NULL, **res1; char *p; @@ -131,7 +139,7 @@ split_string(char *str, int *count) /* split string and append tokens to 'res' */ do { - p = strtok(str, " "); + p = strtok(str, delimer); str = NULL; res1 = res; res = (char **)realloc(res1, sizeof(char *) * (uint32)(idx + 1)); @@ -180,7 +188,7 @@ app_instance_repl(wasm_module_inst_t module_inst) printf("exit repl mode\n"); break; } - app_argv = split_string(cmd, &app_argc); + app_argv = split_string(cmd, &app_argc, " "); if (app_argv == NULL) { LOG_ERROR("Wasm prepare param failed: split string failed.\n"); break; @@ -195,6 +203,59 @@ app_instance_repl(wasm_module_inst_t module_inst) return NULL; } +#if WASM_ENABLE_JIT != 0 +static uint32 +resolve_segue_flags(char *str_flags) +{ + uint32 segue_flags = 0; + int32 flag_count, i; + char **flag_list; + + flag_list = split_string(str_flags, &flag_count, ","); + if (flag_list) { + for (i = 0; i < flag_count; i++) { + if (!strcmp(flag_list[i], "i32.load")) { + segue_flags |= 1 << 0; + } + else if (!strcmp(flag_list[i], "i64.load")) { + segue_flags |= 1 << 1; + } + else if (!strcmp(flag_list[i], "f32.load")) { + segue_flags |= 1 << 2; + } + else if (!strcmp(flag_list[i], "f64.load")) { + segue_flags |= 1 << 3; + } + else if (!strcmp(flag_list[i], "v128.load")) { + segue_flags |= 1 << 4; + } + else if (!strcmp(flag_list[i], "i32.store")) { + segue_flags |= 1 << 8; + } + else if (!strcmp(flag_list[i], "i64.store")) { + segue_flags |= 1 << 9; + } + else if (!strcmp(flag_list[i], "f32.store")) { + segue_flags |= 1 << 10; + } + else if (!strcmp(flag_list[i], "f64.store")) { + segue_flags |= 1 << 11; + } + else if (!strcmp(flag_list[i], "v128.store")) { + segue_flags |= 1 << 12; + } + else { + /* invalid flag */ + segue_flags = (uint32)-1; + break; + } + } + free(flag_list); + } + return segue_flags; +} +#endif /* end of WASM_ENABLE_JIT != 0 */ + #if WASM_ENABLE_LIBC_WASI != 0 static bool validate_env_str(char *env) @@ -367,6 +428,7 @@ main(int argc, char *argv[]) #if WASM_ENABLE_JIT != 0 uint32 llvm_jit_size_level = 3; uint32 llvm_jit_opt_level = 3; + uint32 segue_flags = 0; #endif wasm_module_t wasm_module = NULL; wasm_module_inst_t wasm_module_inst = NULL; @@ -487,7 +549,16 @@ main(int argc, char *argv[]) llvm_jit_opt_level = 3; } } -#endif + else if (!strcmp(argv[0], "--enable-segue")) { + /* all flags are enabled */ + segue_flags = 0x1F1F; + } + else if (!strncmp(argv[0], "--enable-segue=", 15)) { + segue_flags = resolve_segue_flags(argv[0] + 15); + if (segue_flags == (uint32)-1) + return print_help(); + } +#endif /* end of WASM_ENABLE_JIT != 0 */ #if WASM_ENABLE_LIBC_WASI != 0 else if (!strncmp(argv[0], "--dir=", 6)) { if (argv[0][6] == '\0') @@ -632,6 +703,7 @@ main(int argc, char *argv[]) #if WASM_ENABLE_JIT != 0 init_args.llvm_jit_size_level = llvm_jit_size_level; init_args.llvm_jit_opt_level = llvm_jit_opt_level; + init_args.segue_flags = segue_flags; #endif #if WASM_ENABLE_DEBUG_INTERP != 0 diff --git a/tests/benchmarks/coremark/build.sh b/tests/benchmarks/coremark/build.sh index 14c179ce..ecada10d 100755 --- a/tests/benchmarks/coremark/build.sh +++ b/tests/benchmarks/coremark/build.sh @@ -3,6 +3,8 @@ # Copyright (C) 2019 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +PLATFORM=$(uname -s | tr A-Z a-z) + WAMRC="../../../wamr-compiler/build/wamrc" if [ ! -d coremark ]; then @@ -32,4 +34,9 @@ cd .. echo "Compile coremark.wasm to coremark.aot .." ${WAMRC} -o coremark.aot coremark.wasm +if [[ ${PLATFORM} == "linux" ]]; then + echo "Compile coremark.wasm to coremark_segue.aot .." + ${WAMRC} --enable-segue -o coremark_segue.aot coremark.wasm +fi + echo "Done" diff --git a/tests/benchmarks/coremark/run.sh b/tests/benchmarks/coremark/run.sh index a1ea7f6b..0d308bb6 100755 --- a/tests/benchmarks/coremark/run.sh +++ b/tests/benchmarks/coremark/run.sh @@ -3,14 +3,21 @@ # Copyright (C) 2019 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -IWASM="../../../product-mini/platforms/linux/build/iwasm" +PLATFORM=$(uname -s | tr A-Z a-z) + +IWASM="../../../product-mini/platforms/${PLATFORM}/build/iwasm" WAMRC="../../../wamr-compiler/build/wamrc" echo "Run coremark with native .." ./coremark.exe -echo "Run coremark with iwasm mode .." +echo "Run coremark with iwasm aot mode .." ${IWASM} coremark.aot -echo "Run coremakr with iwasm interpreter .." +if [[ ${PLATFORM} == "linux" ]]; then + echo "Run coremark with iwasm aot-segue mode .." + ${IWASM} coremark_segue.aot +fi + +echo "Run coremark with iwasm interpreter mode .." ${IWASM} coremark.wasm diff --git a/tests/benchmarks/dhrystone/LICENSE b/tests/benchmarks/dhrystone/LICENSE new file mode 100644 index 00000000..9b3a7b2c --- /dev/null +++ b/tests/benchmarks/dhrystone/LICENSE @@ -0,0 +1,7 @@ +Dhrystone +------------------------------------------------------------------------------ +There is no explicit license defined. They were originally +written in ADA by Reinhold P. Weicker and translated to C by Rick Richardson . + +The source obtained from the following site: +https://fossies.org/linux/privat/old/dhrystone-2.1.tar.gz diff --git a/tests/benchmarks/dhrystone/build.sh b/tests/benchmarks/dhrystone/build.sh new file mode 100755 index 00000000..eea33d58 --- /dev/null +++ b/tests/benchmarks/dhrystone/build.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +PLATFORM=$(uname -s | tr A-Z a-z) + +WAMRC_CMD=$PWD/../../../wamr-compiler/build/wamrc + +echo "===> compile dhrystone src to dhrystone_native" +gcc -O3 -o dhrystone_native src/dhry_1.c src/dhry_2.c -I include + +echo "===> compile dhrystone src to dhrystone.wasm" +/opt/wasi-sdk/bin/clang -O3 \ + -o dhrystone.wasm src/dhry_1.c src/dhry_2.c -I include \ + -Wl,--export=__heap_base -Wl,--export=__data_end + +echo "===> compile dhrystone.wasm to dhrystone.aot" +${WAMRC_CMD} -o dhrystone.aot dhrystone.wasm + +if [[ ${PLATFORM} == "linux" ]]; then + echo "===> compile dhrystone.wasm to dhrystone_segue.aot" + ${WAMRC_CMD} --enable-segue -o dhrystone_segue.aot dhrystone.wasm +fi diff --git a/tests/benchmarks/dhrystone/include/dhry.h b/tests/benchmarks/dhrystone/include/dhry.h new file mode 100644 index 00000000..0eb5ec64 --- /dev/null +++ b/tests/benchmarks/dhrystone/include/dhry.h @@ -0,0 +1,306 @@ +/* + ************************************************************************** + * DHRYSTONE 2.1 BENCHMARK PC VERSION + ************************************************************************** + * + * "DHRYSTONE" Benchmark Program + * ----------------------------- + * + * Version: C, Version 2.1 + * + * File: dhry.h (part 1 of 3) + * + * Date: May 25, 1988 + * + * Author: Reinhold P. Weicker + * Siemens AG, AUT E 51 + * Postfach 3220 + * 8520 Erlangen + * Germany (West) + * Phone: [+49]-9131-7-20330 + * (8-17 Central European Time) + * Usenet: ..!mcsun!unido!estevax!weicker + * + * Original Version (in Ada) published in + * "Communications of the ACM" vol. 27., no. 10 (Oct. 1984), + * pp. 1013 - 1030, together with the statistics + * on which the distribution of statements etc. is based. + * + * In this C version, the following C library functions are used: + * - strcpy, strcmp (inside the measurement loop) + * - printf, scanf (outside the measurement loop) + * In addition, Berkeley UNIX system calls "times ()" or "time ()" + * are used for execution time measurement. For measurements + * on other systems, these calls have to be changed. + * + * Collection of Results: + * Reinhold Weicker (address see above) and + * + * Rick Richardson + * PC Research. Inc. + * 94 Apple Orchard Drive + * Tinton Falls, NJ 07724 + * Phone: (201) 389-8963 (9-17 EST) + * Usenet: ...!uunet!pcrat!rick + * + * Please send results to Rick Richardson and/or Reinhold Weicker. + * Complete information should be given on hardware and software used. + * Hardware information includes: Machine type, CPU, type and size + * of caches; for microprocessors: clock frequency, memory speed + * (number of wait states). + * Software information includes: Compiler (and runtime library) + * manufacturer and version, compilation switches, OS version. + * The Operating System version may give an indication about the + * compiler; Dhrystone itself performs no OS calls in the measurement + * loop. + * + * The complete output generated by the program should be mailed + * such that at least some checks for correctness can be made. + * + ************************************************************************** + * + * This version has changes made by Roy Longbottom to conform to a common + * format for a series of standard benchmarks for PCs: + * + * Running time greater than 5 seconds due to inaccuracy of the PC clock. + * + * Automatic adjustment of run time, no manually inserted parameters. + * + * Initial display of calibration times to confirm linearity. + * + * Display of results within one screen (or at a slow speed as the test + * progresses) so that it can be seen to have run successfully. + * + * Facilities to type in details of system used etc. + * + * All results and details appended to a results file. + * + * + * Roy Longbottom + * 101323.2241@compuserve.com + * + ************************************************************************** + * + * For details of history, changes, other defines, benchmark construction + * statistics see official versions from ftp.nosc.mil/pub/aburto where + * the latest table of results (dhry.tbl) are available. See also + * netlib@ornl.gov + * + ************************************************************************** + * + * Defines: The following "Defines" are possible: + * -DREG=register (default: Not defined) + * As an approximation to what an average C programmer + * might do, the "register" storage class is applied + * (if enabled by -DREG=register) + * - for local variables, if they are used (dynamically) + * five or more times + * - for parameters if they are used (dynamically) + * six or more times + * Note that an optimal "register" strategy is + * compiler-dependent, and that "register" declarations + * do not necessarily lead to faster execution. + * -DNOSTRUCTASSIGN (default: Not defined) + * Define if the C compiler does not support + * assignment of structures. + * -DNOENUMS (default: Not defined) + * Define if the C compiler does not support + * enumeration types. + *************************************************************************** + * + * Compilation model and measurement (IMPORTANT): + * + * This C version of Dhrystone consists of three files: + * - dhry.h (this file, containing global definitions and comments) + * - dhry_1.c (containing the code corresponding to Ada package Pack_1) + * - dhry_2.c (containing the code corresponding to Ada package Pack_2) + * + * The following "ground rules" apply for measurements: + * - Separate compilation + * - No procedure merging + * - Otherwise, compiler optimizations are allowed but should be indicated + * - Default results are those without register declarations + * See the companion paper "Rationale for Dhrystone Version 2" for a more + * detailed discussion of these ground rules. + * + * For 16-Bit processors (e.g. 80186, 80286), times for all compilation + * models ("small", "medium", "large" etc.) should be given if possible, + * together with a definition of these models for the compiler system used. + * + ************************************************************************** + * Examples of Pentium Results + * + * Dhrystone Benchmark Version 2.1 (Language: C) + * + * Month run 4/1996 + * PC model Escom + * CPU Pentium + * Clock MHz 100 + * Cache 256K + * Options Neptune chipset + * OS/DOS Windows 95 + * Compiler Watcom C/ C++ 10.5 Win386 + * OptLevel -otexan -zp8 -fp5 -5r + * Run by Roy Longbottom + * From UK + * Mail 101323.2241@compuserve.com + * + * Final values (* implementation-dependent): + * + * Int_Glob: O.K. 5 + * Bool_Glob: O.K. 1 + * Ch_1_Glob: O.K. A + * Ch_2_Glob: O.K. B + * Arr_1_Glob[8]: O.K. 7 + * Arr_2_Glob8/7: O.K. 1600010 + * Ptr_Glob-> + * Ptr_Comp: * 98008 + * Discr: O.K. 0 + * Enum_Comp: O.K. 2 + * Int_Comp: O.K. 17 + * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING + * Next_Ptr_Glob-> + * Ptr_Comp: * 98008 same as above + * Discr: O.K. 0 + * Enum_Comp: O.K. 1 + * Int_Comp: O.K. 18 + * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING + * Int_1_Loc: O.K. 5 + * Int_2_Loc: O.K. 13 + * Int_3_Loc: O.K. 7 + * Enum_Loc: O.K. 1 + * Str_1_Loc: O.K. DHRYSTONE PROGRAM, 1'ST STRING + * Str_2_Loc: O.K. DHRYSTONE PROGRAM, 2'ND STRING + * + * Register option Selected. + * + * Microseconds 1 loop: 4.53 + * Dhrystones / second: 220690 + * VAX MIPS rating: 125.61 + * + * + * Dhrystone Benchmark Version 2.1 (Language: C) + * + * Month run 4/1996 + * PC model Escom + * CPU Pentium + * Clock MHz 100 + * Cache 256K + * Options Neptune chipset + * OS/DOS Windows 95 + * Compiler Watcom C/ C++ 10.5 Win386 + * OptLevel No optimisation + * Run by Roy Longbottom + * From UK + * Mail 101323.2241@compuserve.com + * + * Final values (* implementation-dependent): + * + * Int_Glob: O.K. 5 + * Bool_Glob: O.K. 1 + * Ch_1_Glob: O.K. A + * Ch_2_Glob: O.K. B + * Arr_1_Glob[8]: O.K. 7 + * Arr_2_Glob8/7: O.K. 320010 + * Ptr_Glob-> + * Ptr_Comp: * 98004 + * Discr: O.K. 0 + * Enum_Comp: O.K. 2 + * Int_Comp: O.K. 17 + * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING + * Next_Ptr_Glob-> + * Ptr_Comp: * 98004 same as above + * Discr: O.K. 0 + * Enum_Comp: O.K. 1 + * Int_Comp: O.K. 18 + * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING + * Int_1_Loc: O.K. 5 + * Int_2_Loc: O.K. 13 + * Int_3_Loc: O.K. 7 + * Enum_Loc: O.K. 1 + * Str_1_Loc: O.K. DHRYSTONE PROGRAM, 1'ST STRING + * Str_2_Loc: O.K. DHRYSTONE PROGRAM, 2'ND STRING + * + * Register option Not selected. + * + * Microseconds 1 loop: 20.06 + * Dhrystones / second: 49844 + * VAX MIPS rating: 28.37 + * + ************************************************************************** + */ + +/* Compiler and system dependent definitions: */ + +#ifndef TIME +#define TIMES +#endif +/* Use times(2) time function unless */ +/* explicitly defined otherwise */ + +#ifdef TIMES +/* #include + #include */ +/* for "times" */ +#endif + +#define Mic_secs_Per_Second 1000000.0 +/* Berkeley UNIX C returns process times in seconds/HZ */ + +#ifdef NOSTRUCTASSIGN +#define structassign(d, s) memcpy(&(d), &(s), sizeof(d)) +#else +#define structassign(d, s) d = s +#endif + +#ifdef NOENUM +#define Ident_1 0 +#define Ident_2 1 +#define Ident_3 2 +#define Ident_4 3 +#define Ident_5 4 +typedef int Enumeration; +#else +typedef enum { Ident_1, Ident_2, Ident_3, Ident_4, Ident_5 } Enumeration; +#endif +/* for boolean and enumeration types in Ada, Pascal */ + +/* General definitions: */ + +#include +#include + +/* for strcpy, strcmp */ + +#define Null 0 +/* Value of a Null pointer */ +#define true 1 +#define false 0 + +typedef int One_Thirty; +typedef int One_Fifty; +typedef char Capital_Letter; +typedef int Boolean; +typedef char Str_30[31]; +typedef int Arr_1_Dim[50]; +typedef int Arr_2_Dim[50][50]; + +typedef struct record { + struct record *Ptr_Comp; + Enumeration Discr; + union { + struct { + Enumeration Enum_Comp; + int Int_Comp; + char Str_Comp[31]; + } var_1; + struct { + Enumeration E_Comp_2; + char Str_2_Comp[31]; + } var_2; + struct { + char Ch_1_Comp; + char Ch_2_Comp; + } var_3; + } variant; +} Rec_Type, *Rec_Pointer; diff --git a/tests/benchmarks/dhrystone/run.sh b/tests/benchmarks/dhrystone/run.sh new file mode 100755 index 00000000..a9ac1d0b --- /dev/null +++ b/tests/benchmarks/dhrystone/run.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +PLATFORM=$(uname -s | tr A-Z a-z) + +readonly IWASM_CMD="../../../product-mini/platforms/${PLATFORM}/build/iwasm" + +echo "============> run dhrystone native" +./dhrystone_native + +echo "============> run dhrystone.aot" +${IWASM_CMD} dhrystone.aot + +if [[ ${PLATFORM} == "linux" ]]; then + echo "============> run dhrystone_segue.aot" + ${IWASM_CMD} dhrystone_segue.aot +fi diff --git a/tests/benchmarks/dhrystone/src/dhry_1.c b/tests/benchmarks/dhrystone/src/dhry_1.c new file mode 100644 index 00000000..92f6e7e8 --- /dev/null +++ b/tests/benchmarks/dhrystone/src/dhry_1.c @@ -0,0 +1,485 @@ +/* + ************************************************************************* + * + * "DHRYSTONE" Benchmark Program + * ----------------------------- + * + * Version: C, Version 2.1 + * + * File: dhry_1.c (part 2 of 3) + * + * Date: May 25, 1988 + * + * Author: Reinhold P. Weicker + * + ************************************************************************* + */ + +#include +#include +#include +#include "dhry.h" + +/* Global Variables: */ + +Rec_Pointer Ptr_Glob, Next_Ptr_Glob; +int Int_Glob; +Boolean Bool_Glob; +char Ch_1_Glob, Ch_2_Glob; +int Arr_1_Glob[50]; +int Arr_2_Glob[50][50]; + +Enumeration +Func_1(Capital_Letter Ch_1_Par_Val, Capital_Letter Ch_2_Par_Val); +/* +forward declaration necessary since Enumeration may not simply be int +*/ + +#ifndef ROPT +#define REG +/* REG becomes defined as empty */ +/* i.e. no register variables */ +#else +#define REG register +#endif + +void +Proc_1(REG Rec_Pointer Ptr_Val_Par); +void +Proc_2(One_Fifty *Int_Par_Ref); +void +Proc_3(Rec_Pointer *Ptr_Ref_Par); +void +Proc_4(); +void +Proc_5(); +void +Proc_6(Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par); +void +Proc_7(One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val, + One_Fifty *Int_Par_Ref); +void +Proc_8(Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref, int Int_1_Par_Val, + int Int_2_Par_Val); + +Boolean +Func_2(Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref); + +/* variables for time measurement: */ + +#define Too_Small_Time 2 +/* Measurements should last at least 2 seconds */ + +#define BILLION 1000000000L +#define MILLION 1000000 +struct timespec Begin_Time, End_Time; +double User_Time; + +double Microseconds, Dhrystones_Per_Second, Vax_Mips; + +/* end of variables for time measurement */ + +int +main(int argc, char *argv[]) +/*****/ + +/* main program, corresponds to procedures */ +/* Main and Proc_0 in the Ada version */ +{ + One_Fifty Int_1_Loc; + REG One_Fifty Int_2_Loc; + One_Fifty Int_3_Loc; + REG char Ch_Index; + Enumeration Enum_Loc; + Str_30 Str_1_Loc; + Str_30 Str_2_Loc; + REG int Run_Index; + REG int Number_Of_Runs; + int endit, count = 10; + char general[9][80] = { " " }; + + /*********************************************************************** + * Change for compiler and optimisation used * + ***********************************************************************/ + + Next_Ptr_Glob = (Rec_Pointer)malloc(sizeof(Rec_Type)); + Ptr_Glob = (Rec_Pointer)malloc(sizeof(Rec_Type)); + + Ptr_Glob->Ptr_Comp = Next_Ptr_Glob; + Ptr_Glob->Discr = Ident_1; + Ptr_Glob->variant.var_1.Enum_Comp = Ident_3; + Ptr_Glob->variant.var_1.Int_Comp = 40; + strcpy(Ptr_Glob->variant.var_1.Str_Comp, "DHRYSTONE PROGRAM, SOME STRING"); + strcpy(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING"); + + Arr_2_Glob[8][7] = 10; + /* Was missing in published program. Without this statement, */ + /* Arr_2_Glob [8][7] would have an undefined value. */ + /* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */ + /* overflow may occur for this array element. */ + + printf("\n"); + printf("Dhrystone Benchmark, Version 2.1 (Language: C or C++)\n"); + printf("\n"); + + Number_Of_Runs = 5000; + + do { + + Number_Of_Runs = Number_Of_Runs * 2; + count = count - 1; + Arr_2_Glob[8][7] = 10; + + /***************/ + /* Start timer */ + /***************/ + + clock_gettime(CLOCK_MONOTONIC, &Begin_Time); + + for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index) { + + Proc_5(); + Proc_4(); + /* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */ + Int_1_Loc = 2; + Int_2_Loc = 3; + strcpy(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING"); + Enum_Loc = Ident_2; + Bool_Glob = !Func_2(Str_1_Loc, Str_2_Loc); + /* Bool_Glob == 1 */ + while (Int_1_Loc < Int_2_Loc) /* loop body executed once */ + { + Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc; + /* Int_3_Loc == 7 */ + Proc_7(Int_1_Loc, Int_2_Loc, &Int_3_Loc); + /* Int_3_Loc == 7 */ + Int_1_Loc += 1; + } /* while */ + /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */ + Proc_8(Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc); + /* Int_Glob == 5 */ + Proc_1(Ptr_Glob); + for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index) + /* loop body executed twice */ + { + if (Enum_Loc == Func_1(Ch_Index, 'C')) + /* then, not executed */ + { + Proc_6(Ident_1, &Enum_Loc); + strcpy(Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING"); + Int_2_Loc = Run_Index; + Int_Glob = Run_Index; + } + } + /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */ + Int_2_Loc = Int_2_Loc * Int_1_Loc; + Int_1_Loc = Int_2_Loc / Int_3_Loc; + Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc; + /* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */ + Proc_2(&Int_1_Loc); + /* Int_1_Loc == 5 */ + + } /* loop "for Run_Index" */ + + /**************/ + /* Stop timer */ + /**************/ + + clock_gettime(CLOCK_MONOTONIC, &End_Time); + + User_Time = (End_Time.tv_sec - Begin_Time.tv_sec) * MILLION + + (End_Time.tv_nsec - Begin_Time.tv_nsec) / 1000; + User_Time = User_Time / MILLION; /* convert to seconds */ + + printf("%ld runs %lf seconds \n", (long)Number_Of_Runs, User_Time); + if (User_Time > 5.0) { + count = 0; + } + else { + if (User_Time < 0.1) { + Number_Of_Runs = Number_Of_Runs * 5; + } + } + } /* calibrate/run do while */ + while (count > 0); + + printf("\n"); + printf("Final values (* implementation-dependent):\n"); + printf("\n"); + printf("Int_Glob: "); + if (Int_Glob == 5) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Int_Glob); + + printf("Bool_Glob: "); + if (Bool_Glob == 1) + printf("O.K. "); + else + printf("WRONG "); + printf("%d\n", Bool_Glob); + + printf("Ch_1_Glob: "); + if (Ch_1_Glob == 'A') + printf("O.K. "); + else + printf("WRONG "); + printf("%c ", Ch_1_Glob); + + printf("Ch_2_Glob: "); + if (Ch_2_Glob == 'B') + printf("O.K. "); + else + printf("WRONG "); + printf("%c\n", Ch_2_Glob); + + printf("Arr_1_Glob[8]: "); + if (Arr_1_Glob[8] == 7) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Arr_1_Glob[8]); + + printf("Arr_2_Glob8/7: "); + if (Arr_2_Glob[8][7] == Number_Of_Runs + 10) + printf("O.K. "); + else + printf("WRONG "); + printf("%10d\n", Arr_2_Glob[8][7]); + + printf("Ptr_Glob-> "); + printf(" Ptr_Comp: * %p\n", Ptr_Glob->Ptr_Comp); + + printf(" Discr: "); + if (Ptr_Glob->Discr == 0) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Ptr_Glob->Discr); + + printf("Enum_Comp: "); + if (Ptr_Glob->variant.var_1.Enum_Comp == 2) + printf("O.K. "); + else + printf("WRONG "); + printf("%d\n", Ptr_Glob->variant.var_1.Enum_Comp); + + printf(" Int_Comp: "); + if (Ptr_Glob->variant.var_1.Int_Comp == 17) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Ptr_Glob->variant.var_1.Int_Comp); + + printf("Str_Comp: "); + if (strcmp(Ptr_Glob->variant.var_1.Str_Comp, + "DHRYSTONE PROGRAM, SOME STRING") + == 0) + printf("O.K. "); + else + printf("WRONG "); + printf("%s\n", Ptr_Glob->variant.var_1.Str_Comp); + + printf("Next_Ptr_Glob-> "); + printf(" Ptr_Comp: * %p", Next_Ptr_Glob->Ptr_Comp); + printf(" same as above\n"); + + printf(" Discr: "); + if (Next_Ptr_Glob->Discr == 0) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Next_Ptr_Glob->Discr); + + printf("Enum_Comp: "); + if (Next_Ptr_Glob->variant.var_1.Enum_Comp == 1) + printf("O.K. "); + else + printf("WRONG "); + printf("%d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp); + + printf(" Int_Comp: "); + if (Next_Ptr_Glob->variant.var_1.Int_Comp == 18) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Next_Ptr_Glob->variant.var_1.Int_Comp); + + printf("Str_Comp: "); + if (strcmp(Next_Ptr_Glob->variant.var_1.Str_Comp, + "DHRYSTONE PROGRAM, SOME STRING") + == 0) + printf("O.K. "); + else + printf("WRONG "); + printf("%s\n", Next_Ptr_Glob->variant.var_1.Str_Comp); + + printf("Int_1_Loc: "); + if (Int_1_Loc == 5) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Int_1_Loc); + + printf("Int_2_Loc: "); + if (Int_2_Loc == 13) + printf("O.K. "); + else + printf("WRONG "); + printf("%d\n", Int_2_Loc); + + printf("Int_3_Loc: "); + if (Int_3_Loc == 7) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Int_3_Loc); + + printf("Enum_Loc: "); + if (Enum_Loc == 1) + printf("O.K. "); + else + printf("WRONG "); + printf("%d\n", Enum_Loc); + + printf("Str_1_Loc: "); + if (strcmp(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING") == 0) + printf("O.K. "); + else + printf("WRONG "); + printf("%s\n", Str_1_Loc); + + printf("Str_2_Loc: "); + if (strcmp(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING") == 0) + printf("O.K. "); + else + printf("WRONG "); + printf("%s\n", Str_2_Loc); + + printf("\n"); + + if (User_Time < Too_Small_Time) { + printf("Measured time too small to obtain meaningful results\n"); + printf("Please increase number of runs\n"); + printf("\n"); + } + else { + Microseconds = User_Time * Mic_secs_Per_Second / (double)Number_Of_Runs; + Dhrystones_Per_Second = (double)Number_Of_Runs / User_Time; + Vax_Mips = Dhrystones_Per_Second / 1757.0; + + printf("Microseconds for one run through Dhrystone: "); + printf("%lf \n", Microseconds); + printf("Dhrystones per Second: "); + printf("%lf \n", Dhrystones_Per_Second); + printf("VAX MIPS rating = "); + printf("%lf \n", Vax_Mips); + printf("\n"); + } + + free(Next_Ptr_Glob); + free(Ptr_Glob); + return 1; +} + +void +Proc_1(REG Rec_Pointer Ptr_Val_Par) +/******************/ + +/* executed once */ +{ + REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp; + /* == Ptr_Glob_Next */ + /* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */ + /* corresponds to "rename" in Ada, "with" in Pascal */ + + structassign(*Ptr_Val_Par->Ptr_Comp, *Ptr_Glob); + Ptr_Val_Par->variant.var_1.Int_Comp = 5; + Next_Record->variant.var_1.Int_Comp = Ptr_Val_Par->variant.var_1.Int_Comp; + Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp; + Proc_3(&Next_Record->Ptr_Comp); + /* Ptr_Val_Par->Ptr_Comp->Ptr_Comp + == Ptr_Glob->Ptr_Comp */ + if (Next_Record->Discr == Ident_1) + /* then, executed */ + { + Next_Record->variant.var_1.Int_Comp = 6; + Proc_6(Ptr_Val_Par->variant.var_1.Enum_Comp, + &Next_Record->variant.var_1.Enum_Comp); + Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp; + Proc_7(Next_Record->variant.var_1.Int_Comp, 10, + &Next_Record->variant.var_1.Int_Comp); + } + else { /* not executed */ + structassign(*Ptr_Val_Par, *Ptr_Val_Par->Ptr_Comp); + } +} /* Proc_1 */ + +void +Proc_2(One_Fifty *Int_Par_Ref) +/******************/ +/* executed once */ +/* *Int_Par_Ref == 1, becomes 4 */ + +{ + One_Fifty Int_Loc; + Enumeration Enum_Loc; + + Int_Loc = *Int_Par_Ref + 10; + do /* executed once */ + if (Ch_1_Glob == 'A') + /* then, executed */ + { + Int_Loc -= 1; + *Int_Par_Ref = Int_Loc - Int_Glob; + Enum_Loc = Ident_1; + } /* if */ + while (Enum_Loc != Ident_1); /* true */ +} /* Proc_2 */ + +void +Proc_3(Rec_Pointer *Ptr_Ref_Par) +/******************/ +/* executed once */ +/* Ptr_Ref_Par becomes Ptr_Glob */ + +{ + if (Ptr_Glob != Null) + /* then, executed */ + *Ptr_Ref_Par = Ptr_Glob->Ptr_Comp; + Proc_7(10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp); +} /* Proc_3 */ + +void +Proc_4() /* without parameters */ +/*******/ +/* executed once */ +{ + Boolean Bool_Loc; + + Bool_Loc = Ch_1_Glob == 'A'; + Bool_Glob = Bool_Loc | Bool_Glob; + Ch_2_Glob = 'B'; +} /* Proc_4 */ + +void +Proc_5() /* without parameters */ +/*******/ +/* executed once */ +{ + Ch_1_Glob = 'A'; + Bool_Glob = false; +} /* Proc_5 */ + +/* Procedure for the assignment of structures, */ +/* if the C compiler doesn't support this feature */ +#ifdef NOSTRUCTASSIGN +memcpy(d, s, l) register char *d; +register char *s; +register int l; +{ + while (l--) + *d++ = *s++; +} +#endif diff --git a/tests/benchmarks/dhrystone/src/dhry_2.c b/tests/benchmarks/dhrystone/src/dhry_2.c new file mode 100644 index 00000000..5378799e --- /dev/null +++ b/tests/benchmarks/dhrystone/src/dhry_2.c @@ -0,0 +1,187 @@ +/* + ************************************************************************* + * + * "DHRYSTONE" Benchmark Program + * ----------------------------- + * + * Version: C, Version 2.1 + * + * File: dhry_2.c (part 3 of 3) + * + * Date: May 25, 1988 + * + * Author: Reinhold P. Weicker + * + ************************************************************************* + */ + +#include "dhry.h" + +#ifndef REG +#define REG +/* REG becomes defined as empty */ +/* i.e. no register variables */ +#else +#define REG register +#endif + +extern int Int_Glob; +extern char Ch_1_Glob; + +Boolean +Func_3(Enumeration Enum_Par_Val); + +void +Proc_6(Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par) +/*********************************/ +/* executed once */ +/* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */ + +{ + *Enum_Ref_Par = Enum_Val_Par; + if (!Func_3(Enum_Val_Par)) + /* then, not executed */ + *Enum_Ref_Par = Ident_4; + switch (Enum_Val_Par) { + case Ident_1: + *Enum_Ref_Par = Ident_1; + break; + case Ident_2: + if (Int_Glob > 100) + /* then */ + *Enum_Ref_Par = Ident_1; + else + *Enum_Ref_Par = Ident_4; + break; + case Ident_3: /* executed */ + *Enum_Ref_Par = Ident_2; + break; + case Ident_4: + break; + case Ident_5: + *Enum_Ref_Par = Ident_3; + break; + } /* switch */ +} /* Proc_6 */ + +void +Proc_7(One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val, One_Fifty *Int_Par_Ref) +/**********************************************/ +/* executed three times */ +/* first call: Int_1_Par_Val == 2, Int_2_Par_Val == 3, */ +/* Int_Par_Ref becomes 7 */ +/* second call: Int_1_Par_Val == 10, Int_2_Par_Val == 5, */ +/* Int_Par_Ref becomes 17 */ +/* third call: Int_1_Par_Val == 6, Int_2_Par_Val == 10, */ +/* Int_Par_Ref becomes 18 */ + +{ + One_Fifty Int_Loc; + + Int_Loc = Int_1_Par_Val + 2; + *Int_Par_Ref = Int_2_Par_Val + Int_Loc; +} /* Proc_7 */ + +void +Proc_8(Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref, int Int_1_Par_Val, + int Int_2_Par_Val) +/*********************************************************************/ +/* executed once */ +/* Int_Par_Val_1 == 3 */ +/* Int_Par_Val_2 == 7 */ + +{ + REG One_Fifty Int_Index; + REG One_Fifty Int_Loc; + + Int_Loc = Int_1_Par_Val + 5; + Arr_1_Par_Ref[Int_Loc] = Int_2_Par_Val; + Arr_1_Par_Ref[Int_Loc + 1] = Arr_1_Par_Ref[Int_Loc]; + Arr_1_Par_Ref[Int_Loc + 30] = Int_Loc; + for (Int_Index = Int_Loc; Int_Index <= Int_Loc + 1; ++Int_Index) + Arr_2_Par_Ref[Int_Loc][Int_Index] = Int_Loc; + Arr_2_Par_Ref[Int_Loc][Int_Loc - 1] += 1; + Arr_2_Par_Ref[Int_Loc + 20][Int_Loc] = Arr_1_Par_Ref[Int_Loc]; + Int_Glob = 5; +} /* Proc_8 */ + +Enumeration +Func_1(Capital_Letter Ch_1_Par_Val, Capital_Letter Ch_2_Par_Val) +/*************************************************/ +/* executed three times */ +/* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */ +/* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */ +/* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */ + +{ + Capital_Letter Ch_1_Loc; + Capital_Letter Ch_2_Loc; + + Ch_1_Loc = Ch_1_Par_Val; + Ch_2_Loc = Ch_1_Loc; + if (Ch_2_Loc != Ch_2_Par_Val) + /* then, executed */ + return (Ident_1); + else /* not executed */ + { + Ch_1_Glob = Ch_1_Loc; + return (Ident_2); + } +} /* Func_1 */ + +Boolean +Func_2(Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref) +/*************************************************/ +/* executed once */ +/* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */ +/* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */ + +{ + REG One_Thirty Int_Loc; + Capital_Letter Ch_Loc; + + Int_Loc = 2; + while (Int_Loc <= 2) /* loop body executed once */ + if (Func_1(Str_1_Par_Ref[Int_Loc], Str_2_Par_Ref[Int_Loc + 1]) + == Ident_1) + /* then, executed */ + { + Ch_Loc = 'A'; + Int_Loc += 1; + } /* if, while */ + if (Ch_Loc >= 'W' && Ch_Loc < 'Z') + /* then, not executed */ + Int_Loc = 7; + if (Ch_Loc == 'R') + /* then, not executed */ + return (true); + else /* executed */ + { + if (strcmp(Str_1_Par_Ref, Str_2_Par_Ref) > 0) + /* then, not executed */ + { + Int_Loc += 7; + Int_Glob = Int_Loc; + return (true); + } + else /* executed */ + return (false); + } /* if Ch_Loc */ +} /* Func_2 */ + +Boolean +Func_3(Enumeration Enum_Par_Val) +/***************************/ +/* executed once */ +/* Enum_Par_Val == Ident_3 */ + +{ + Enumeration Enum_Loc; + + Enum_Loc = Enum_Par_Val; + if (Enum_Loc == Ident_3) + /* then, executed */ + return (true); + else /* not executed */ + return (false); +} /* Func_3 */ diff --git a/tests/benchmarks/jetstream/build.sh b/tests/benchmarks/jetstream/build.sh index 030b8d3a..ca8401cd 100755 --- a/tests/benchmarks/jetstream/build.sh +++ b/tests/benchmarks/jetstream/build.sh @@ -3,27 +3,45 @@ # Copyright (C) 2019 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +source /opt/emsdk/emsdk_env.sh + +PLATFORM=$(uname -s | tr A-Z a-z) + OUT_DIR=$PWD/out WAMRC_CMD=$PWD/../../../wamr-compiler/build/wamrc mkdir -p jetstream +mkdir -p tsf-src mkdir -p ${OUT_DIR} +if [[ $1 != "--no-simd" ]];then + NATIVE_SIMD_FLAGS="-msse2 -msse3 -msse4" + WASM_SIMD_FLAGS="-msimd128 -msse2 -msse3 -msse4" +else + NATIVE_SIMD_FLAGS="" + WASM_SIMD_FLAGS="" +fi + cd jetstream echo "Download source files .." -wget https://browserbench.org/JetStream/wasm/gcc-loops.cpp -wget https://browserbench.org/JetStream/wasm/quicksort.c -wget https://browserbench.org/JetStream/wasm/HashSet.cpp -wget https://browserbench.org/JetStream/simple/float-mm.c +wget -N https://browserbench.org/JetStream/wasm/gcc-loops.cpp +wget -N https://browserbench.org/JetStream/wasm/quicksort.c +wget -N https://browserbench.org/JetStream/wasm/HashSet.cpp +wget -N https://browserbench.org/JetStream/simple/float-mm.c -patch -p1 < ../jetstream.patch +if [[ $? != 0 ]]; then + exit +fi + +echo "Patch source files .." +patch -p1 -N < ../jetstream.patch echo "Build gcc-loops with g++ .." -g++ -O3 -msse2 -msse3 -msse4 -o ${OUT_DIR}/gcc-loops_native gcc-loops.cpp +g++ -O3 ${NATIVE_SIMD_FLAGS} -o ${OUT_DIR}/gcc-loops_native gcc-loops.cpp echo "Build gcc-loops with em++ .." -em++ -O3 -s STANDALONE_WASM=1 -msimd128 \ +em++ -O3 -s STANDALONE_WASM=1 ${WASM_SIMD_FLAGS} \ -s INITIAL_MEMORY=1048576 \ -s TOTAL_STACK=32768 \ -s "EXPORTED_FUNCTIONS=['_main']" \ @@ -33,11 +51,16 @@ em++ -O3 -s STANDALONE_WASM=1 -msimd128 \ echo "Compile gcc-loops.wasm to gcc-loops.aot" ${WAMRC_CMD} -o ${OUT_DIR}/gcc-loops.aot ${OUT_DIR}/gcc-loops.wasm +if [[ ${PLATFORM} == "linux" ]]; then + echo "Compile gcc-loops.wasm to gcc-loops_segue.aot" + ${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/gcc-loops_segue.aot ${OUT_DIR}/gcc-loops.wasm +fi + echo "Build quicksort with gcc .." -gcc -O3 -msse2 -msse3 -msse4 -o ${OUT_DIR}/quicksort_native quicksort.c +gcc -O3 ${NATIVE_SIMD_FLAGS} -o ${OUT_DIR}/quicksort_native quicksort.c echo "Build quicksort with emcc .." -emcc -O3 -s STANDALONE_WASM=1 -msimd128 \ +emcc -O3 -s STANDALONE_WASM=1 ${WASM_SIMD_FLAGS} \ -s INITIAL_MEMORY=1048576 \ -s TOTAL_STACK=32768 \ -s "EXPORTED_FUNCTIONS=['_main']" \ @@ -46,12 +69,17 @@ emcc -O3 -s STANDALONE_WASM=1 -msimd128 \ echo "Compile quicksort.wasm to quicksort.aot" ${WAMRC_CMD} -o ${OUT_DIR}/quicksort.aot ${OUT_DIR}/quicksort.wasm +if [[ ${PLATFORM} == "linux" ]]; then + echo "Compile quicksort.wasm to quicksort_segue.aot" + ${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/quicksort_segue.aot ${OUT_DIR}/quicksort.wasm +fi + echo "Build HashSet with g++ .." -g++ -O3 -msse2 -msse3 -msse4 -o ${OUT_DIR}/HashSet_native HashSet.cpp \ +g++ -O3 ${NATIVE_SIMD_FLAGS} -o ${OUT_DIR}/HashSet_native HashSet.cpp \ -lstdc++ echo "Build HashSet with em++ .." -em++ -O3 -s STANDALONE_WASM=1 -msimd128 \ +em++ -O3 -s STANDALONE_WASM=1 ${WASM_SIMD_FLAGS} \ -s INITIAL_MEMORY=1048576 \ -s TOTAL_STACK=32768 \ -s "EXPORTED_FUNCTIONS=['_main']" \ @@ -60,11 +88,16 @@ em++ -O3 -s STANDALONE_WASM=1 -msimd128 \ echo "Compile HashSet.wasm to HashSet.aot" ${WAMRC_CMD} -o ${OUT_DIR}/HashSet.aot ${OUT_DIR}/HashSet.wasm +if [[ ${PLATFORM} == "linux" ]]; then + echo "Compile HashSet.wasm to HashSet_segue.aot" + ${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/HashSet_segue.aot ${OUT_DIR}/HashSet.wasm +fi + echo "Build float-mm with gcc .." -gcc -O3 -msse2 -msse3 -msse4 -o ${OUT_DIR}/float-mm_native float-mm.c +gcc -O3 ${NATIVE_SIMD_FLAGS} -o ${OUT_DIR}/float-mm_native float-mm.c echo "Build float-mm with emcc .." -emcc -O3 -s STANDALONE_WASM=1 -msimd128 \ +emcc -O3 -s STANDALONE_WASM=1 ${WASM_SIMD_FLAGS} \ -s INITIAL_MEMORY=1048576 \ -s TOTAL_STACK=32768 \ -s "EXPORTED_FUNCTIONS=['_main']" \ @@ -72,3 +105,70 @@ emcc -O3 -s STANDALONE_WASM=1 -msimd128 \ echo "Compile float-mm.wasm to float-mm.aot" ${WAMRC_CMD} -o ${OUT_DIR}/float-mm.aot ${OUT_DIR}/float-mm.wasm + +if [[ ${PLATFORM} == "linux" ]]; then + echo "Compile float-mm.wasm to float-mm_segue.aot" + ${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/float-mm_segue.aot ${OUT_DIR}/float-mm.wasm +fi + +cd ../tsf-src + +tsf_srcs="tsf_asprintf.c tsf_buffer.c tsf_error.c tsf_reflect.c tsf_st.c \ + tsf_type.c tsf_io.c tsf_native.c tsf_generator.c tsf_st_typetable.c \ + tsf_parser.c tsf_buf_writer.c tsf_buf_reader.c tsf_primitive.c \ + tsf_type_table.c tsf_copier.c tsf_destructor.c tsf_gpc_code_gen.c \ + gpc_code_gen_util.c gpc_threaded.c gpc_intable.c gpc_instruction.c \ + gpc_program.c gpc_proto.c gpc_stack_height.c tsf_serial_in_man.c \ + tsf_serial_out_man.c tsf_type_in_map.c tsf_type_out_map.c \ + tsf_stream_file_input.c tsf_stream_file_output.c tsf_sort.c \ + tsf_version.c tsf_named_type.c tsf_io_utils.c tsf_zip_attr.c \ + tsf_zip_reader.c tsf_zip_writer.c tsf_zip_abstract.c tsf_limits.c \ + tsf_ra_type_man.c tsf_adaptive_reader.c tsf_sha1.c tsf_sha1_writer.c \ + tsf_fsdb.c tsf_fsdb_protocol.c tsf_define_helpers.c tsf_ir.c \ + tsf_ir_different.c tsf_ir_speed.c" + +tsf_files="${tsf_srcs} config.h gpc_worklist.h \ + tsf_config_stub.h tsf.h tsf_internal.h tsf_region.h tsf_types.h \ + gpc.h tsf_atomics.h tsf_define_helpers.h tsf_indent.h tsf_inttypes.h \ + tsf_serial_protocol.h tsf_util.h gpc_int_common.h tsf_build_defines.h \ + tsf_format.h tsf_internal_config.h tsf_ir_different.h tsf_sha1.h \ + tsf_zip_abstract.h gpc_internal.h tsf_config.h tsf_fsdb_protocol.h \ + tsf_internal_config_stub.h tsf_ir.h tsf_st.h \ + gpc_instruction_dispatch.gen gpc_instruction_stack_effects.gen \ + gpc_instruction_to_string.gen gpc_instruction_size.gen \ + gpc_instruction_static_size.gen gpc_interpreter.gen" + +echo "Download tsf source files .." +for t in ${tsf_files} +do + wget -N "https://browserbench.org/JetStream/wasm/TSF/${t}" + if [[ $? != 0 ]]; then + exit + fi +done + +patch -p1 -N < ../tsf.patch + +echo "Build tsf with gcc .." +gcc \ + -o ${OUT_DIR}/tsf_native -O3 ${NATIVE_SIMD_FLAGS} \ + -I. -DTSF_BUILD_SYSTEM=1 \ + ${tsf_srcs} -lm + +echo "Build tsf standalone with wasi-sdk .." +/opt/wasi-sdk/bin/clang -O3 ${WASM_SIMD_FLAGS} -z stack-size=1048576 \ + -Wl,--initial-memory=52428800 \ + -Wl,--export=main \ + -Wl,--export=__heap_base,--export=__data_end \ + -I. -DTSF_BUILD_SYSTEM=1 \ + -Wl,--allow-undefined \ + -o ${OUT_DIR}/tsf.wasm \ + ${tsf_srcs} + +echo "Compile tsf.wasm to tsf.aot" +${WAMRC_CMD} -o ${OUT_DIR}/tsf.aot ${OUT_DIR}/tsf.wasm + +if [[ ${PLATFORM} == "linux" ]]; then + echo "Compile tsf.wasm to tsf_segue.aot" + ${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/tsf_segue.aot ${OUT_DIR}/tsf.wasm +fi diff --git a/tests/benchmarks/jetstream/jetstream.patch b/tests/benchmarks/jetstream/jetstream.patch index 34431de0..bc680d98 100644 --- a/tests/benchmarks/jetstream/jetstream.patch +++ b/tests/benchmarks/jetstream/jetstream.patch @@ -1,15 +1,18 @@ diff -urN jetstream-org/HashSet.cpp jetstream/HashSet.cpp ---- jetstream-org/HashSet.cpp 2020-10-30 04:12:42.000000000 +0800 -+++ jetstream/HashSet.cpp 2022-01-24 17:11:08.619831711 +0800 -@@ -24,6 +24,7 @@ +--- jetstream-org/HashSet.cpp 2020-10-30 04:12:42.000000000 +0800 ++++ jetstream/HashSet.cpp 2022-01-24 17:11:08.619831711 +0800 +@@ -22,8 +22,10 @@ + + #include #include ++#include #include #include +#include #include - + // Compile with: xcrun clang++ -o HashSet HashSet.cpp -O2 -W -framework Foundation -licucore -std=c++11 -fvisibility=hidden -DNDEBUG=1 -@@ -76,7 +77,7 @@ +@@ -76,7 +78,7 @@ inline ToType bitwise_cast(FromType from) { typename std::remove_const::type to { }; @@ -17,4 +20,4 @@ diff -urN jetstream-org/HashSet.cpp jetstream/HashSet.cpp + memcpy(&to, &from, sizeof(to)); return to; } - + diff --git a/tests/benchmarks/jetstream/run_aot.sh b/tests/benchmarks/jetstream/run_aot.sh index d62a5da9..85ef3fba 100755 --- a/tests/benchmarks/jetstream/run_aot.sh +++ b/tests/benchmarks/jetstream/run_aot.sh @@ -3,6 +3,8 @@ # Copyright (C) 2019 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +PLATFORM=$(uname -s | tr A-Z a-z) + CUR_DIR=$PWD OUT_DIR=$CUR_DIR/out REPORT=$CUR_DIR/report.txt @@ -13,7 +15,7 @@ IWASM_CMD=$CUR_DIR/../../../product-mini/platforms/${PLATFORM}/build/iwasm BENCH_NAME_MAX_LEN=20 -JETSTREAM_CASES="gcc-loops quicksort HashSet float-mm" +JETSTREAM_CASES="gcc-loops HashSet tsf float-mm quicksort" rm -f $REPORT touch $REPORT @@ -34,7 +36,11 @@ echo "Start to run cases, the result is written to report.txt" #run benchmarks cd $OUT_DIR -echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT +if [[ ${PLATFORM} == "linux" ]]; then + echo -en "\t\t\t\t\t native\tiwasm-aot\tiwasm-aot-segue\n" >> $REPORT +else + echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT +fi for t in $JETSTREAM_CASES do @@ -46,7 +52,13 @@ do echo "run $t with iwasm aot .." echo -en "\t" >> $REPORT - $TIME -f "real-%e-time" $IWASM_CMD ${t}.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + $TIME -f "real-%e-time" $IWASM_CMD --dir=. ${t}.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + + if [[ ${PLATFORM} == "linux" ]]; then + echo "run $t with iwasm aot segue .." + echo -en "\t" >> $REPORT + $TIME -f "real-%e-time" $IWASM_CMD --dir=. ${t}_segue.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + fi echo -en "\n" >> $REPORT done diff --git a/tests/benchmarks/jetstream/tsf.patch b/tests/benchmarks/jetstream/tsf.patch new file mode 100644 index 00000000..e52c3cdc --- /dev/null +++ b/tests/benchmarks/jetstream/tsf.patch @@ -0,0 +1,24 @@ +diff -urN tsf-src-org/tsf_internal.h tsf-src/tsf_internal.h +--- tsf-src-org/tsf_internal.h 2023-03-31 10:49:45.000000000 +0800 ++++ tsf-src/tsf_internal.h 2023-05-11 08:18:35.000000000 +0800 +@@ -429,6 +429,7 @@ + #endif + tsf_fsdb_connection_t *connection; + #endif ++ uint32_t __padding; + } remote; + } u; + tsf_limits_t *limits; +diff -urN tsf-src-org/tsf_ir_speed.c tsf-src/tsf_ir_speed.c +--- tsf-src-org/tsf_ir_speed.c 2023-03-31 10:49:45.000000000 +0800 ++++ tsf-src/tsf_ir_speed.c 2023-05-11 08:18:35.000000000 +0800 +@@ -63,6 +63,9 @@ + Program_t *program; + unsigned elementIndex; + ++ if (!(programIndex % 100)) ++ printf("##programIndex: %u\n", programIndex); ++ + CS(program = tsf_region_create(sizeof(Program_t))); + + program->globals.len = numDecls + numDefns; diff --git a/tests/benchmarks/libsodium/build.sh b/tests/benchmarks/libsodium/build.sh index 1e9cc21a..3049f2c7 100755 --- a/tests/benchmarks/libsodium/build.sh +++ b/tests/benchmarks/libsodium/build.sh @@ -16,6 +16,8 @@ libsodium_CASES="aead_aes256gcm2 aead_aes256gcm aead_chacha20poly13052 aead_chac sodium_utils3 sodium_utils sodium_version stream2 stream3 stream4 stream verify1 \ xchacha20" +PLATFORM=$(uname -s | tr A-Z a-z) + readonly WAMRC_CMD=$PWD/../../../wamr-compiler/build/wamrc readonly OUT_DIR=$PWD/libsodium/zig-out/bin @@ -34,9 +36,16 @@ zig build -Drelease-fast -Denable_benchmarks=true -Dtarget=wasm32-wasi for case in ${libsodium_CASES} do ${WAMRC_CMD} -o ${OUT_DIR}/${case}.aot ${OUT_DIR}/${case}.wasm - if [ "$?" != 0 ]; then echo -e "Error while compiling ${case}.wasm to ${case}.aot" exit fi + + if [[ ${PLATFORM} == "linux" ]]; then + ${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/${case}_segue.aot ${OUT_DIR}/${case}.wasm + if [ "$?" != 0 ]; then + echo -e "Error while compiling ${case}.wasm to ${case}_segue.aot" + exit + fi + fi done diff --git a/tests/benchmarks/libsodium/test_aot.sh b/tests/benchmarks/libsodium/run_aot.sh similarity index 50% rename from tests/benchmarks/libsodium/test_aot.sh rename to tests/benchmarks/libsodium/run_aot.sh index 2e4e3e35..8859d063 100755 --- a/tests/benchmarks/libsodium/test_aot.sh +++ b/tests/benchmarks/libsodium/run_aot.sh @@ -13,12 +13,14 @@ libsodium_CASES="aead_aes256gcm2 aead_aes256gcm aead_chacha20poly13052 aead_chac scalarmult6 scalarmult7 scalarmult8 scalarmult_ed25519 scalarmult_ristretto255 \ scalarmult secretbox2 secretbox7 secretbox8 secretbox_easy2 secretbox_easy \ secretbox secretstream shorthash sign siphashx24 sodium_core sodium_utils2 \ - sodium_utils3 sodium_utils sodium_version stream2 stream3 stream4 stream verify1 \ - xchacha20" + sodium_utils stream2 stream3 stream4 stream verify1 xchacha20" + +PLATFORM=$(uname -s | tr A-Z a-z) readonly OUT_DIR=$PWD/libsodium/zig-out/bin readonly REPORT=$PWD/report.txt -readonly IWASM_CMD=$PWD/../../../product-mini/platforms/linux/build/iwasm +readonly IWASM_CMD=$PWD/../../../product-mini/platforms/${PLATFORM}/build/iwasm +readonly TIME=/usr/bin/time BENCH_NAME_MAX_LEN=20 @@ -40,7 +42,11 @@ function print_bench_name() # run benchmarks cd $OUT_DIR -echo -en "\t\t\t\t\t\tnative\tiwasm-aot\n" >> $REPORT +if [[ ${PLATFORM} == "linux" ]]; then + echo -en "\t\t\t\t\t\tnative\tiwasm-aot\tiwasm-aot-segue\n" >> $REPORT +else + echo -en "\t\t\t\t\t\tnative\tiwasm-aot\n" >> $REPORT +fi for t in $libsodium_CASES do @@ -48,11 +54,38 @@ do echo "run $t with native..." echo -en "\t" >> $REPORT - ./${t} | awk -F '-' 'BEGIN{FIELDWIDTHS="10"}{ORS=""; print $1 / 1000000.0}' >> $REPORT + if [[ $t != "sodium_utils2" ]]; then + ./${t} | awk '{printf "%-10.2f", $0/1000000.0}' >> $REPORT + else + # sodium_utils2 doesn't print the result, + # use time command to get result instead + $TIME -f "real-%e-time" ./${t} 2>&1 | grep "real-.*-time" | + awk -F '-' '{printf "%-10.2f", $2}' >> $REPORT + fi echo "run $t with iwasm aot..." echo -en "\t \t" >> $REPORT - $IWASM_CMD ${t}.aot | awk -F '-' 'BEGIN{FIELDWIDTHS="10"}{ORS=""; print $1 / 1000000.0}' >> $REPORT + if [[ $t != "sodium_utils2" ]]; then + $IWASM_CMD ${t}.aot | awk '{printf "%-10.2f", $0/1000000.0}' >> $REPORT + else + # sodium_utils2 doesn't print the result, + # use time command to get result instead + $TIME -f "real-%e-time" $IWASM_CMD ${t}.aot 2>&1 | grep "real-.*-time" | + awk -F '-' '{printf "%-10.2f", $2}' >> $REPORT + fi + + if [[ ${PLATFORM} == "linux" ]]; then + echo "run $t with iwasm aot segue..." + echo -en "\t \t" >> $REPORT + if [[ $t != "sodium_utils2" ]]; then + $IWASM_CMD ${t}_segue.aot | awk '{printf "%.2f", $0/1000000.0}' >> $REPORT + else + # sodium_utils2 doesn't print the result, + # use time command to get result instead + $TIME -f "real-%e-time" $IWASM_CMD ${t}_segue.aot 2>&1 | grep "real-.*-time" | + awk -F '-' '{printf "%.2f", $2}' >> $REPORT + fi + fi echo -en "\n" >> $REPORT done diff --git a/tests/benchmarks/polybench/build.sh b/tests/benchmarks/polybench/build.sh index bc7bf4c1..3e59a9bc 100755 --- a/tests/benchmarks/polybench/build.sh +++ b/tests/benchmarks/polybench/build.sh @@ -3,6 +3,8 @@ # Copyright (C) 2019 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +PLATFORM=$(uname -s | tr A-Z a-z) + OUT_DIR=$PWD/out WAMRC_CMD=$PWD/../../../wamr-compiler/build/wamrc POLYBENCH_CASES="datamining linear-algebra medley stencils" @@ -40,6 +42,12 @@ do echo "Compile ${file_name%.*}.wasm into ${file_name%.*}.aot" ${WAMRC_CMD} -o ${OUT_DIR}/${file_name%.*}.aot \ ${OUT_DIR}/${file_name%.*}.wasm + + if [[ ${PLATFORM} == "linux" ]]; then + echo "Compile ${file_name%.*}.wasm into ${file_name%.*}_segue.aot" + ${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/${file_name%.*}_segue.aot \ + ${OUT_DIR}/${file_name%.*}.wasm + fi done done diff --git a/tests/benchmarks/polybench/run_aot.sh b/tests/benchmarks/polybench/run_aot.sh index 17cc098a..7eb301b2 100755 --- a/tests/benchmarks/polybench/run_aot.sh +++ b/tests/benchmarks/polybench/run_aot.sh @@ -37,7 +37,11 @@ echo "Start to run cases, the result is written to report.txt" #run benchmarks cd $OUT_DIR -echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT +if [[ ${PLATFORM} == "linux" ]]; then + echo -en "\t\t\t\t\t native\tiwasm-aot\tiwasm-aot-segue\n" >> $REPORT +else + echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT +fi for t in $POLYBENCH_CASES do @@ -51,5 +55,11 @@ do echo -en "\t" >> $REPORT $TIME -f "real-%e-time" $IWASM_CMD ${t}.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + if [[ ${PLATFORM} == "linux" ]]; then + echo "run $t with iwasm aot segue .." + echo -en "\t" >> $REPORT + $TIME -f "real-%e-time" $IWASM_CMD ${t}_segue.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + fi + echo -en "\n" >> $REPORT done diff --git a/tests/benchmarks/polybench/run_interp.sh b/tests/benchmarks/polybench/run_interp.sh index f6d5c254..5dfe760e 100755 --- a/tests/benchmarks/polybench/run_interp.sh +++ b/tests/benchmarks/polybench/run_interp.sh @@ -37,7 +37,7 @@ echo "Start to run cases, the result is written to report.txt" #run benchmarks cd $OUT_DIR -echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT +echo -en "\t\t\t\t\t native\tiwasm-interp\n" >> $REPORT for t in $POLYBENCH_CASES do diff --git a/tests/benchmarks/sightglass/build.sh b/tests/benchmarks/sightglass/build.sh index c7192c16..54088228 100755 --- a/tests/benchmarks/sightglass/build.sh +++ b/tests/benchmarks/sightglass/build.sh @@ -3,6 +3,8 @@ # Copyright (C) 2019 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +PLATFORM=$(uname -s | tr A-Z a-z) + OUT_DIR=$PWD/out WAMRC_CMD=$PWD/../../../wamr-compiler/build/wamrc SHOOTOUT_CASES="base64 fib2 gimli heapsort matrix memmove nestedloop \ @@ -34,9 +36,12 @@ do -Wl,--export=app_main -Wl,--export=_start \ ${bench}.c main/main_${bench}.c main/my_libc.c - echo "Compile ${bench}.wasm into ${bench}.aot" ${WAMRC_CMD} -o ${OUT_DIR}/${bench}.aot ${OUT_DIR}/${bench}.wasm + if [[ ${PLATFORM} == "linux" ]]; then + echo "Compile ${bench}.wasm into ${bench}_segue.aot" + ${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/${bench}_segue.aot ${OUT_DIR}/${bench}.wasm + fi done cd .. diff --git a/tests/benchmarks/sightglass/run_aot.sh b/tests/benchmarks/sightglass/run_aot.sh index 7a74a791..44945b91 100755 --- a/tests/benchmarks/sightglass/run_aot.sh +++ b/tests/benchmarks/sightglass/run_aot.sh @@ -36,7 +36,11 @@ echo "Start to run cases, the result is written to report.txt" #run benchmarks cd $OUT_DIR -echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT +if [[ ${PLATFORM} == "linux" ]]; then + echo -en "\t\t\t\t\t native\tiwasm-aot\tiwasm-aot-segue\n" >> $REPORT +else + echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT +fi for t in $SHOOTOUT_CASES do @@ -50,5 +54,11 @@ do echo -en "\t" >> $REPORT $TIME -f "real-%e-time" $IWASM_CMD ${t}.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + if [[ ${PLATFORM} == "linux" ]]; then + echo "run $t with iwasm aot segue .." + echo -en "\t" >> $REPORT + $TIME -f "real-%e-time" $IWASM_CMD ${t}_segue.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + fi + echo -en "\n" >> $REPORT done diff --git a/tests/benchmarks/sightglass/run_interp.sh b/tests/benchmarks/sightglass/run_interp.sh index 50e94a5d..c3dbb302 100755 --- a/tests/benchmarks/sightglass/run_interp.sh +++ b/tests/benchmarks/sightglass/run_interp.sh @@ -46,9 +46,9 @@ do echo -en "\t" >> $REPORT $TIME -f "real-%e-time" ./${t}_native 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT - echo "run $t with iwasm aot .." + echo "run $t with iwasm interp .." echo -en "\t" >> $REPORT - $TIME -f "real-%e-time" $IWASM_CMD ${t}.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + $TIME -f "real-%e-time" $IWASM_CMD ${t}.wasm 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT echo -en "\n" >> $REPORT done diff --git a/wamr-compiler/main.c b/wamr-compiler/main.c index bd8691c4..ccda363e 100644 --- a/wamr-compiler/main.c +++ b/wamr-compiler/main.c @@ -65,6 +65,12 @@ print_help() printf(" --enable-indirect-mode Enalbe call function through symbol table but not direct call\n"); printf(" --disable-llvm-intrinsics Disable the LLVM built-in intrinsics\n"); printf(" --disable-llvm-lto Disable the LLVM link time optimization\n"); + printf(" --enable-segue[=] Enable using segment register GS as the base address of linear memory,\n"); + printf(" only available on linux/linux-sgx x86-64, which may improve performance,\n"); + printf(" flags can be: i32.load, i64.load, f32.load, f64.load, v128.load,\n"); + printf(" i32.store, i64.store, f32.store, f64.store, v128.store\n"); + printf(" Use comma to separate, e.g. --enable-segue=i32.load,i64.store\n"); + printf(" and --enable-segue means all flags are added.\n"); printf(" --emit-custom-sections=
\n"); printf(" Emit the specified custom sections to AoT file, using comma to separate\n"); printf(" multiple names, e.g.\n"); @@ -84,7 +90,7 @@ print_help() } while (0) /** - * Split a strings into an array of strings + * Split a string into an array of strings * Returns NULL on failure * Memory must be freed by caller * Based on: http://stackoverflow.com/a/11198630/471795 @@ -126,6 +132,57 @@ split_string(char *str, int *count, const char *delimer) return res; } +static uint32 +resolve_segue_flags(char *str_flags) +{ + uint32 segue_flags = 0; + int32 flag_count, i; + char **flag_list; + + flag_list = split_string(str_flags, &flag_count, ","); + if (flag_list) { + for (i = 0; i < flag_count; i++) { + if (!strcmp(flag_list[i], "i32.load")) { + segue_flags |= 1 << 0; + } + else if (!strcmp(flag_list[i], "i64.load")) { + segue_flags |= 1 << 1; + } + else if (!strcmp(flag_list[i], "f32.load")) { + segue_flags |= 1 << 2; + } + else if (!strcmp(flag_list[i], "f64.load")) { + segue_flags |= 1 << 3; + } + else if (!strcmp(flag_list[i], "v128.load")) { + segue_flags |= 1 << 4; + } + else if (!strcmp(flag_list[i], "i32.store")) { + segue_flags |= 1 << 8; + } + else if (!strcmp(flag_list[i], "i64.store")) { + segue_flags |= 1 << 9; + } + else if (!strcmp(flag_list[i], "f32.store")) { + segue_flags |= 1 << 10; + } + else if (!strcmp(flag_list[i], "f64.store")) { + segue_flags |= 1 << 11; + } + else if (!strcmp(flag_list[i], "v128.store")) { + segue_flags |= 1 << 12; + } + else { + /* invalid flag */ + segue_flags = (uint32)-1; + break; + } + } + free(flag_list); + } + return segue_flags; +} + int main(int argc, char *argv[]) { @@ -272,6 +329,15 @@ main(int argc, char *argv[]) else if (!strcmp(argv[0], "--disable-llvm-lto")) { option.disable_llvm_lto = true; } + else if (!strcmp(argv[0], "--enable-segue")) { + /* all flags are enabled */ + option.segue_flags = 0x1F1F; + } + else if (!strncmp(argv[0], "--enable-segue=", 15)) { + option.segue_flags = resolve_segue_flags(argv[0] + 15); + if (option.segue_flags == (uint32)-1) + PRINT_HELP_AND_EXIT(); + } else if (!strncmp(argv[0], "--emit-custom-sections=", 23)) { int len = 0; if (option.custom_sections) {