From 7e9bf9cdf5ee27b0b03b623f755724a4432449d1 Mon Sep 17 00:00:00 2001 From: Wenyong Huang Date: Thu, 20 Apr 2023 10:09:34 +0800 Subject: [PATCH] Implement Fast JIT multi-threading feature (#2134) - Translate all the opcodes of threads spec proposal for Fast JIT - Add the atomic flag for Fast JIT load/store IRs to support atomic load/store - Add new atomic related Fast JIT IRs and translate them in the codegen - Add suspend_flags check in branch opcodes and before/after call function - Modify CI to enable Fast JIT multi-threading test Co-authored-by: TianlongLiang --- .../compilation_on_android_ubuntu.yml | 24 +- core/iwasm/compilation/aot_emit_control.c | 4 +- .../fast-jit/cg/x86-64/jit_codegen_x86_64.cpp | 1762 ++++++++++++++++- core/iwasm/fast-jit/fe/jit_emit_control.c | 55 + core/iwasm/fast-jit/fe/jit_emit_function.c | 31 + core/iwasm/fast-jit/fe/jit_emit_memory.c | 374 +++- core/iwasm/fast-jit/fe/jit_emit_memory.h | 3 + core/iwasm/fast-jit/jit_dump.c | 7 +- core/iwasm/fast-jit/jit_frontend.c | 65 +- core/iwasm/fast-jit/jit_frontend.h | 11 + core/iwasm/fast-jit/jit_ir.c | 18 +- core/iwasm/fast-jit/jit_ir.def | 44 + core/iwasm/fast-jit/jit_ir.h | 38 +- core/iwasm/fast-jit/jit_regalloc.c | 14 + tests/wamr-test-suites/test_wamr.sh | 8 - 15 files changed, 2290 insertions(+), 168 deletions(-) diff --git a/.github/workflows/compilation_on_android_ubuntu.yml b/.github/workflows/compilation_on_android_ubuntu.yml index 3a495e5b..aa366833 100644 --- a/.github/workflows/compilation_on_android_ubuntu.yml +++ b/.github/workflows/compilation_on_android_ubuntu.yml @@ -125,8 +125,8 @@ jobs: # Running mode $CLASSIC_INTERP_BUILD_OPTIONS, $FAST_INTERP_BUILD_OPTIONS, - $FAST_JIT_BUILD_OPTIONS - ] + $FAST_JIT_BUILD_OPTIONS, + ] make_options_feature: [ # Features "-DWAMR_BUILD_CUSTOM_NAME_SECTION=1", @@ -414,11 +414,11 @@ jobs: os: [ubuntu-20.04, ubuntu-22.04] wasi_sdk_release: [ - "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-20/wasi-sdk-20.0-linux.tar.gz" + "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-20/wasi-sdk-20.0-linux.tar.gz", ] wabt_release: [ - "https://github.com/WebAssembly/wabt/releases/download/1.0.31/wabt-1.0.31-ubuntu.tar.gz" + "https://github.com/WebAssembly/wabt/releases/download/1.0.31/wabt-1.0.31-ubuntu.tar.gz", ] steps: - name: checkout @@ -505,7 +505,7 @@ jobs: build_iwasm, build_llvm_libraries_on_ubuntu_2004, build_llvm_libraries_on_ubuntu_2204, - build_wamrc + build_wamrc, ] runs-on: ${{ matrix.os }} strategy: @@ -530,7 +530,7 @@ jobs: ] wasi_sdk_release: [ - "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-20/wasi-sdk-20.0-linux.tar.gz" + "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-20/wasi-sdk-20.0-linux.tar.gz", ] include: - os: ubuntu-20.04 @@ -551,24 +551,16 @@ jobs: test_option: $MULTI_MODULES_TEST_OPTIONS - running_mode: "jit" test_option: $MULTI_MODULES_TEST_OPTIONS - # fast-jit doesn't support multi module, simd, and threads + # fast-jit doesn't support multi module, simd - running_mode: "fast-jit" test_option: $MULTI_MODULES_TEST_OPTIONS - running_mode: "fast-jit" test_option: $SIMD_TEST_OPTIONS - - running_mode: "fast-jit" - test_option: $THREADS_TEST_OPTIONS - - running_mode: "fast-jit" - test_option: $WASI_TEST_OPTIONS - # multi-tier-jit doesn't support multi module, simd, and threads + # multi-tier-jit doesn't support multi module, simd - running_mode: "multi-tier-jit" test_option: $MULTI_MODULES_TEST_OPTIONS - running_mode: "multi-tier-jit" test_option: $SIMD_TEST_OPTIONS - - running_mode: "multi-tier-jit" - test_option: $THREADS_TEST_OPTIONS - - running_mode: "multi-tier-jit" - test_option: $WASI_TEST_OPTIONS steps: - name: checkout uses: actions/checkout@v3 diff --git a/core/iwasm/compilation/aot_emit_control.c b/core/iwasm/compilation/aot_emit_control.c index 8c15d4d1..2cf51cf6 100644 --- a/core/iwasm/compilation/aot_emit_control.c +++ b/core/iwasm/compilation/aot_emit_control.c @@ -701,7 +701,7 @@ check_suspend_flags(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) if (!(terminate_flags = LLVMBuildLoad2(comp_ctx->builder, I32_TYPE, terminate_addr, "terminate_flags"))) { - aot_set_last_error("llvm build bit cast failed"); + aot_set_last_error("llvm build LOAD failed"); return false; } /* Set terminate_flags memory accecc to volatile, so that the value @@ -729,7 +729,7 @@ check_suspend_flags(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) goto fail; } - /* Move builder to terminate block */ + /* Move builder to non terminate block */ SET_BUILDER_POS(non_terminate_block); return true; diff --git a/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp b/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp index 48d1486d..e28acf98 100644 --- a/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp +++ b/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp @@ -26,6 +26,28 @@ static char *code_block_return_to_interp_from_jitted = NULL; static char *code_block_compile_fast_jit_and_then_call = NULL; #endif +typedef enum { + REG_BPL_IDX = 0, + REG_AXL_IDX, + REG_BXL_IDX, + REG_CXL_IDX, + REG_DXL_IDX, + REG_DIL_IDX, + REG_SIL_IDX, + REG_I8_FREE_IDX = REG_SIL_IDX +} RegIndexI8; + +typedef enum { + REG_BP_IDX = 0, + REG_AX_IDX, + REG_BX_IDX, + REG_CX_IDX, + REG_DX_IDX, + REG_DI_IDX, + REG_SI_IDX, + REG_I16_FREE_IDX = REG_SI_IDX +} RegIndexI16; + typedef enum { REG_EBP_IDX = 0, REG_EAX_IDX, @@ -262,6 +284,13 @@ jit_codegen_interp_jitted_glue(void *exec_env, JitInterpSwitchInfo *info, r3 = *jit_insn_opnd(insn, 3); \ CHECK_NCONST(r0) +/* Load five operands from insn and check if r0 is non-const */ +#define LOAD_4ARGS_NO_ASSIGN() \ + r0 = *jit_insn_opnd(insn, 0); \ + r1 = *jit_insn_opnd(insn, 1); \ + r2 = *jit_insn_opnd(insn, 2); \ + r3 = *jit_insn_opnd(insn, 3); + class JitErrorHandler : public ErrorHandler { public: @@ -853,6 +882,47 @@ mov_imm_to_m(x86::Assembler &a, x86::Mem &m_dst, Imm imm_src, uint32 bytes_dst) return true; } +#if WASM_ENABLE_SHARED_MEMORY != 0 +/** + * Encode exchange register with memory + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data, + * could be 1(byte), 2(short), 4(int32), 8(int64), + * skipped by float and double + * @param kind_dst the kind of data to move, could only be I32 or I64 + * @param m_dst the dest memory operand + * @param reg_no_src the index of dest register + * + * @return true if success, false otherwise + */ +static bool +xchg_r_to_m(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, + x86::Mem &m_dst, int32 reg_no_src) +{ + bh_assert((kind_dst == JIT_REG_KIND_I32 && bytes_dst <= 4) + || kind_dst == JIT_REG_KIND_I64); + bh_assert(reg_no_src < 16); + switch (bytes_dst) { + case 1: + a.xchg(m_dst, regs_i8[reg_no_src]); + break; + case 2: + a.xchg(m_dst, regs_i16[reg_no_src]); + break; + case 4: + a.xchg(m_dst, regs_i32[reg_no_src]); + break; + case 8: + a.xchg(m_dst, regs_i64[reg_no_src]); + break; + default: + bh_assert(0); + return false; + } + return true; +} +#endif /** * Encode loading register data from memory with imm base and imm offset * @@ -967,9 +1037,13 @@ ld_r_from_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, static bool st_r_to_base_imm_offset_imm(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, int32 reg_no_src, int32 base, - int32 offset) + int32 offset, bool atomic) { x86::Mem m((uintptr_t)(base + offset), bytes_dst); +#if WASM_ENABLE_SHARED_MEMORY != 0 + if (atomic) + return xchg_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src); +#endif return mov_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src); } @@ -990,9 +1064,14 @@ st_r_to_base_imm_offset_imm(x86::Assembler &a, uint32 bytes_dst, */ static bool st_r_to_base_imm_offset_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, - int32 reg_no_src, int32 base, int32 reg_no_offset) + int32 reg_no_src, int32 base, int32 reg_no_offset, + bool atomic) { x86::Mem m(regs_i64[reg_no_offset], base, bytes_dst); +#if WASM_ENABLE_SHARED_MEMORY != 0 + if (atomic) + return xchg_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src); +#endif return mov_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src); } @@ -1012,9 +1091,14 @@ st_r_to_base_imm_offset_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, */ static bool st_r_to_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, - int32 reg_no_src, int32 reg_no_base, int32 offset) + int32 reg_no_src, int32 reg_no_base, int32 offset, + bool atomic) { x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst); +#if WASM_ENABLE_SHARED_MEMORY != 0 + if (atomic) + return xchg_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src); +#endif return mov_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src); } @@ -1036,9 +1120,13 @@ st_r_to_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, static bool st_r_to_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, int32 reg_no_src, int32 reg_no_base, - int32 reg_no_offset) + int32 reg_no_offset, bool atomic) { x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst); +#if WASM_ENABLE_SHARED_MEMORY != 0 + if (atomic) + return xchg_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src); +#endif return mov_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src); } @@ -1063,6 +1151,37 @@ imm_set_value(Imm &imm, void *data, uint32 bytes) } } +#if WASM_ENABLE_SHARED_MEMORY != 0 +static uint32 +mov_imm_to_free_reg(x86::Assembler &a, Imm &imm, uint32 bytes) +{ + uint32 reg_no; + + switch (bytes) { + case 1: + reg_no = REG_I8_FREE_IDX; + a.mov(regs_i8[reg_no], imm); + break; + case 2: + reg_no = REG_I16_FREE_IDX; + a.mov(regs_i16[reg_no], imm); + break; + case 4: + reg_no = REG_I32_FREE_IDX; + a.mov(regs_i32[reg_no], imm); + break; + case 8: + reg_no = REG_I64_FREE_IDX; + a.mov(regs_i64[reg_no], imm); + break; + default: + bh_assert(0); + } + + return reg_no; +} +#endif + /** * Encode storing int32 imm data to memory with imm base and imm offset * @@ -1077,11 +1196,18 @@ imm_set_value(Imm &imm, void *data, uint32 bytes) */ static bool st_imm_to_base_imm_offset_imm(x86::Assembler &a, uint32 bytes_dst, - void *data_src, int32 base, int32 offset) + void *data_src, int32 base, int32 offset, + bool atomic) { x86::Mem m((uintptr_t)(base + offset), bytes_dst); Imm imm; imm_set_value(imm, data_src, bytes_dst); +#if WASM_ENABLE_SHARED_MEMORY != 0 + uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst); + if (atomic) { + return xchg_r_to_m(a, bytes_dst, JIT_REG_KIND_I64, m, reg_no_src); + } +#endif return mov_imm_to_m(a, m, imm, bytes_dst); } @@ -1100,11 +1226,17 @@ st_imm_to_base_imm_offset_imm(x86::Assembler &a, uint32 bytes_dst, */ static bool st_imm_to_base_imm_offset_r(x86::Assembler &a, uint32 bytes_dst, void *data_src, - int32 base, int32 reg_no_offset) + int32 base, int32 reg_no_offset, bool atomic) { x86::Mem m(regs_i64[reg_no_offset], base, bytes_dst); Imm imm; imm_set_value(imm, data_src, bytes_dst); +#if WASM_ENABLE_SHARED_MEMORY != 0 + uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst); + if (atomic) { + return xchg_r_to_m(a, bytes_dst, JIT_REG_KIND_I64, m, reg_no_src); + } +#endif return mov_imm_to_m(a, m, imm, bytes_dst); } @@ -1123,11 +1255,17 @@ st_imm_to_base_imm_offset_r(x86::Assembler &a, uint32 bytes_dst, void *data_src, */ static bool st_imm_to_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, void *data_src, - int32 reg_no_base, int32 offset) + int32 reg_no_base, int32 offset, bool atomic) { x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst); Imm imm; imm_set_value(imm, data_src, bytes_dst); +#if WASM_ENABLE_SHARED_MEMORY != 0 + uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst); + if (atomic) { + return xchg_r_to_m(a, bytes_dst, JIT_REG_KIND_I64, m, reg_no_src); + } +#endif return mov_imm_to_m(a, m, imm, bytes_dst); } @@ -1147,11 +1285,17 @@ st_imm_to_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, void *data_src, */ static bool st_imm_to_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, void *data_src, - int32 reg_no_base, int32 reg_no_offset) + int32 reg_no_base, int32 reg_no_offset, bool atomic) { x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst); Imm imm; imm_set_value(imm, data_src, bytes_dst); +#if WASM_ENABLE_SHARED_MEMORY != 0 + uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst); + if (atomic) { + return xchg_r_to_m(a, bytes_dst, JIT_REG_KIND_I64, m, reg_no_src); + } +#endif return mov_imm_to_m(a, m, imm, bytes_dst); } @@ -4555,82 +4699,84 @@ cmp_r_imm_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src, * Encode insn sd: ST_type r0, r1, r2 * @param kind the data kind, such as I32, I64, F32 and F64 * @param bytes_dst the byte number of dst data + * @param atomic whether it's atomic store */ -#define ST_R_R_R(kind, type, bytes_dst) \ - do { \ - type data_src = 0; \ - int32 reg_no_src = 0, reg_no_base = 0, reg_no_offset = 0; \ - int32 base = 0, offset = 0; \ - bool _ret = false; \ - \ - if (jit_reg_is_const(r1)) { \ - CHECK_KIND(r1, JIT_REG_KIND_I32); \ - } \ - else { \ - CHECK_KIND(r1, JIT_REG_KIND_I64); \ - } \ - if (jit_reg_is_const(r2)) { \ - CHECK_KIND(r2, JIT_REG_KIND_I32); \ - } \ - else { \ - CHECK_KIND(r2, JIT_REG_KIND_I64); \ - } \ - \ - if (jit_reg_is_const(r0)) \ - data_src = jit_cc_get_const_##kind(cc, r0); \ - else { \ - reg_no_src = jit_reg_no(r0); \ - CHECK_REG_NO(reg_no_src, jit_reg_kind(r0)); \ - } \ - if (jit_reg_is_const(r1)) \ - base = jit_cc_get_const_I32(cc, r1); \ - else { \ - reg_no_base = jit_reg_no(r1); \ - CHECK_REG_NO(reg_no_base, jit_reg_kind(r1)); \ - } \ - if (jit_reg_is_const(r2)) \ - offset = jit_cc_get_const_I32(cc, r2); \ - else { \ - reg_no_offset = jit_reg_no(r2); \ - CHECK_REG_NO(reg_no_offset, jit_reg_kind(r2)); \ - } \ - \ - if (jit_reg_is_const(r0)) { \ - if (jit_reg_is_const(r1)) { \ - if (jit_reg_is_const(r2)) \ - _ret = st_imm_to_base_imm_offset_imm( \ - a, bytes_dst, &data_src, base, offset); \ - else \ - _ret = st_imm_to_base_imm_offset_r( \ - a, bytes_dst, &data_src, base, reg_no_offset); \ - } \ - else if (jit_reg_is_const(r2)) \ - _ret = st_imm_to_base_r_offset_imm(a, bytes_dst, &data_src, \ - reg_no_base, offset); \ - else \ - _ret = st_imm_to_base_r_offset_r(a, bytes_dst, &data_src, \ - reg_no_base, reg_no_offset); \ - } \ - else if (jit_reg_is_const(r1)) { \ - if (jit_reg_is_const(r2)) \ - _ret = st_r_to_base_imm_offset_imm(a, bytes_dst, \ - JIT_REG_KIND_##kind, \ - reg_no_src, base, offset); \ - else \ - _ret = st_r_to_base_imm_offset_r( \ - a, bytes_dst, JIT_REG_KIND_##kind, reg_no_src, base, \ - reg_no_offset); \ - } \ - else if (jit_reg_is_const(r2)) \ - _ret = \ - st_r_to_base_r_offset_imm(a, bytes_dst, JIT_REG_KIND_##kind, \ - reg_no_src, reg_no_base, offset); \ - else \ - _ret = st_r_to_base_r_offset_r(a, bytes_dst, JIT_REG_KIND_##kind, \ - reg_no_src, reg_no_base, \ - reg_no_offset); \ - if (!_ret) \ - GOTO_FAIL; \ +#define ST_R_R_R(kind, type, bytes_dst, atomic) \ + do { \ + type data_src = 0; \ + int32 reg_no_src = 0, reg_no_base = 0, reg_no_offset = 0; \ + int32 base = 0, offset = 0; \ + bool _ret = false; \ + \ + if (jit_reg_is_const(r1)) { \ + CHECK_KIND(r1, JIT_REG_KIND_I32); \ + } \ + else { \ + CHECK_KIND(r1, JIT_REG_KIND_I64); \ + } \ + if (jit_reg_is_const(r2)) { \ + CHECK_KIND(r2, JIT_REG_KIND_I32); \ + } \ + else { \ + CHECK_KIND(r2, JIT_REG_KIND_I64); \ + } \ + \ + if (jit_reg_is_const(r0)) \ + data_src = jit_cc_get_const_##kind(cc, r0); \ + else { \ + reg_no_src = jit_reg_no(r0); \ + CHECK_REG_NO(reg_no_src, jit_reg_kind(r0)); \ + } \ + if (jit_reg_is_const(r1)) \ + base = jit_cc_get_const_I32(cc, r1); \ + else { \ + reg_no_base = jit_reg_no(r1); \ + CHECK_REG_NO(reg_no_base, jit_reg_kind(r1)); \ + } \ + if (jit_reg_is_const(r2)) \ + offset = jit_cc_get_const_I32(cc, r2); \ + else { \ + reg_no_offset = jit_reg_no(r2); \ + CHECK_REG_NO(reg_no_offset, jit_reg_kind(r2)); \ + } \ + \ + if (jit_reg_is_const(r0)) { \ + if (jit_reg_is_const(r1)) { \ + if (jit_reg_is_const(r2)) \ + _ret = st_imm_to_base_imm_offset_imm( \ + a, bytes_dst, &data_src, base, offset, atomic); \ + else \ + _ret = st_imm_to_base_imm_offset_r( \ + a, bytes_dst, &data_src, base, reg_no_offset, atomic); \ + } \ + else if (jit_reg_is_const(r2)) \ + _ret = st_imm_to_base_r_offset_imm( \ + a, bytes_dst, &data_src, reg_no_base, offset, atomic); \ + else \ + _ret = st_imm_to_base_r_offset_r(a, bytes_dst, &data_src, \ + reg_no_base, reg_no_offset, \ + atomic); \ + } \ + else if (jit_reg_is_const(r1)) { \ + if (jit_reg_is_const(r2)) \ + _ret = st_r_to_base_imm_offset_imm( \ + a, bytes_dst, JIT_REG_KIND_##kind, reg_no_src, base, \ + offset, atomic); \ + else \ + _ret = st_r_to_base_imm_offset_r( \ + a, bytes_dst, JIT_REG_KIND_##kind, reg_no_src, base, \ + reg_no_offset, atomic); \ + } \ + else if (jit_reg_is_const(r2)) \ + _ret = st_r_to_base_r_offset_imm(a, bytes_dst, \ + JIT_REG_KIND_##kind, reg_no_src, \ + reg_no_base, offset, atomic); \ + else \ + _ret = st_r_to_base_r_offset_r(a, bytes_dst, JIT_REG_KIND_##kind, \ + reg_no_src, reg_no_base, \ + reg_no_offset, atomic); \ + if (!_ret) \ + GOTO_FAIL; \ } while (0) /** @@ -6242,12 +6388,1192 @@ cast_r_f64_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src) GOTO_FAIL; \ } while (0) +#if WASM_ENABLE_SHARED_MEMORY != 0 + +/** + * Encode extend certain bytes in the src register to a I32 or I64 kind value in + * dst register + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data, + * could be 1(byte), 2(short), 4(int32), 8(int64), + * @param kind_dst the kind of data to extend to, could be I32, I64 + * @param reg_no_src the index of register hold src value + * + * @return true if success, false otherwise + */ +static bool +extend_r_to_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, + int32 reg_no_src, int32 reg_no_dst) +{ + if (kind_dst == JIT_REG_KIND_I32) { + bh_assert(reg_no_src < 16 && reg_no_dst < 16); + switch (bytes_dst) { + case 1: + extend_r8_to_r32(a, reg_no_dst, reg_no_src, false); + break; + case 2: + extend_r16_to_r32(a, reg_no_dst, reg_no_src, false); + break; + case 4: + mov_r_to_r_i32(a, reg_no_dst, reg_no_src); + break; + default: + bh_assert(0); + return false; + } + } + else if (kind_dst == JIT_REG_KIND_I64) { + bh_assert(reg_no_src < 16 && reg_no_dst < 16); + switch (bytes_dst) { + case 1: + extend_r8_to_r64(a, reg_no_dst, reg_no_src, false); + break; + case 2: + extend_r16_to_r64(a, reg_no_dst, reg_no_src, false); + break; + case 4: + extend_r32_to_r64(a, reg_no_dst, reg_no_src, false); + break; + case 8: + mov_r_to_r_i64(a, reg_no_dst, reg_no_src); + break; + default: + bh_assert(0); + return false; + } + } + else { + bh_assert(0); + } + return true; +} + +/** + * Encode atomic compare and exchange, when calling this function, + * value for comparison should be already moved in register + * al/ax/eax/rax + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data, + * could be 1(byte), 2(short), 4(int32), 8(int64), + * @param kind_dst the kind of data to move, could be I32, I64 + * @param m_dst the dest memory operand + * @param reg_no_xchg the index of register hold exchange value + * + * @return true if success, false otherwise + */ +static bool +at_cmpxchg(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, + int32 reg_no_xchg, x86::Mem &m_dst) +{ + bh_assert((kind_dst == JIT_REG_KIND_I32 && bytes_dst <= 4) + || kind_dst == JIT_REG_KIND_I64); + bh_assert(reg_no_xchg < 16); + switch (bytes_dst) { + case 1: + a.lock().cmpxchg(m_dst, regs_i8[reg_no_xchg]); + break; + case 2: + a.lock().cmpxchg(m_dst, regs_i16[reg_no_xchg]); + break; + case 4: + a.lock().cmpxchg(m_dst, regs_i32[reg_no_xchg]); + break; + case 8: + a.lock().cmpxchg(m_dst, regs_i64[reg_no_xchg]); + break; + default: + bh_assert(0); + return false; + } + return true; +} + +/** + * Encode atomic compare and exchange: load value into a register from + * memory with reg base and reg offset, compare (expected) reg data with the + * loaded value, if equal, store the (replacement) reg data to the same + * memory, else, do nothing. Either way, returns the loaded value + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_xchg the no of register that stores the conditionally + * replacement value + * @param reg_no_base the no of register that stores the base address + * of src&dst memory + * @param reg_no_offset the no of register that stores the offset address + * of src&dst memory + * @return true if success, false otherwise + */ +static bool +at_cmpxchg_r_ra_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_xchg, + int32 reg_no_base, int32 reg_no_offset) +{ + x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst); + return at_cmpxchg(a, bytes_dst, kind_dst, reg_no_xchg, m) + && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, REG_RAX_IDX); +} + +/** + * Encode atomic compare and exchange: load value into a register from + * memory with reg base and imm offset, compare (expected) reg data with the + * loaded value, if equal, store the (replacement) reg data to the same + * memory, else, do nothing. Either way, returns the loaded value + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_xchg the no of register that stores the conditionally + * replacement value + * @param reg_no_base the no of register that stores the base address + * of src&dst memory + * @param offset the offset address of the memory + * @return true if success, false otherwise + */ +static bool +at_cmpxchg_r_ra_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_xchg, + int32 reg_no_base, int32 offset) +{ + x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst); + return at_cmpxchg(a, bytes_dst, kind_dst, reg_no_xchg, m) + && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, REG_RAX_IDX); +} + +/** + * Encode atomic compare and exchange: load value into a register from + * memory with reg base and reg offset, compare (expected) reg data with the + * loaded value, if equal, store the (replacement) imm data to the same + * memory, else, do nothing. Either way, returns the loaded value + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param data_xchg the immediate data for exchange(conditionally replacment + * value) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory + * @param reg_no_offset the no of register that stores the offset address + * of src&dst memory + * @return true if success, false otherwise + */ +static bool +at_cmpxchg_imm_ra_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, void *data_xchg, + int32 reg_no_base, int32 reg_no_offset) +{ + x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst); + Imm imm; + imm_set_value(imm, data_xchg, bytes_dst); + uint32 reg_no_xchg = mov_imm_to_free_reg(a, imm, bytes_dst); + return at_cmpxchg(a, bytes_dst, kind_dst, reg_no_xchg, m) + && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, REG_RAX_IDX); +} + +/** + * Encode atomic compare and exchange: load value into a register from + * memory with reg base and imm offset, compare (expected) reg data with the + * loaded value, if equal, store the (replacement) imm data to the same + * memory, else, do nothing. Either way, returns the loaded value + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param data_xchg the immediate data for exchange(conditionally replacment + * value) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory + * @param offset the offset address of the memory + * @return true if success, false otherwise + */ +static bool +at_cmpxchg_imm_ra_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, void *data_xchg, + int32 reg_no_base, int32 offset) +{ + x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst); + Imm imm; + imm_set_value(imm, data_xchg, bytes_dst); + uint32 reg_no_xchg = mov_imm_to_free_reg(a, imm, bytes_dst); + return at_cmpxchg(a, bytes_dst, kind_dst, reg_no_xchg, m) + && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, REG_RAX_IDX); +} + +/** + * Encode insn cmpxchg: CMPXCHG_type r0, r1, r2, r3, r4 + * @param kind the data kind, can only be I32 or I64 + * @param bytes_dst the byte number of dst data + */ +#define CMPXCHG_R_R_R_R_R(kind, type, bytes_dst) \ + do { \ + type data_xchg = 0; \ + int32 reg_no_xchg = 0, reg_no_cmp = 0, reg_no_base = 0, \ + reg_no_offset = 0; \ + int32 offset = 0; \ + bool _ret = false; \ + if (jit_reg_is_const(r3)) { \ + CHECK_KIND(r3, JIT_REG_KIND_I32); \ + } \ + else { \ + CHECK_KIND(r3, JIT_REG_KIND_I64); \ + } \ + /* r1: expected value(it must in register a) \ + * r2: memory base addr can't be const */ \ + CHECK_NCONST(r1); \ + reg_no_cmp = jit_reg_no(r1); \ + bh_assert(reg_no_cmp == REG_EAX_IDX || reg_no_cmp == REG_RAX_IDX); \ + CHECK_REG_NO(reg_no_cmp, jit_reg_kind(r1)); \ + CHECK_NCONST(r2); \ + reg_no_base = jit_reg_no(r2); \ + CHECK_REG_NO(reg_no_base, jit_reg_kind(r2)); \ + /* r0: replacement value r3: offset can be const */ \ + if (jit_reg_is_const(r0)) \ + data_xchg = jit_cc_get_const_##kind(cc, r0); \ + else { \ + reg_no_xchg = jit_reg_no(r0); \ + CHECK_REG_NO(reg_no_xchg, jit_reg_kind(r0)); \ + } \ + if (jit_reg_is_const(r3)) \ + offset = jit_cc_get_const_I32(cc, r3); \ + else { \ + reg_no_offset = jit_reg_no(r3); \ + CHECK_REG_NO(reg_no_offset, jit_reg_kind(r3)); \ + } \ + \ + if (jit_reg_is_const(r0)) { \ + if (jit_reg_is_const(r3)) \ + _ret = at_cmpxchg_imm_ra_base_r_offset_imm( \ + a, bytes_dst, JIT_REG_KIND_##kind, &data_xchg, \ + reg_no_base, offset); \ + else \ + _ret = at_cmpxchg_imm_ra_base_r_offset_r( \ + a, bytes_dst, JIT_REG_KIND_##kind, &data_xchg, \ + reg_no_base, reg_no_offset); \ + } \ + else { \ + if (jit_reg_is_const(r3)) \ + _ret = at_cmpxchg_r_ra_base_r_offset_imm( \ + a, bytes_dst, JIT_REG_KIND_##kind, reg_no_xchg, \ + reg_no_base, offset); \ + else \ + _ret = at_cmpxchg_r_ra_base_r_offset_r( \ + a, bytes_dst, JIT_REG_KIND_##kind, reg_no_xchg, \ + reg_no_base, reg_no_offset); \ + } \ + if (!_ret) \ + GOTO_FAIL; \ + } while (0) + +/** + * Encode negate a value in the register + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data, + * could be 1(byte), 2(short), 4(int32), 8(int64), + * @param kind_dst the kind of data to move, could be I32, I64 + * @param reg_no_src the index of register hold src value + * + * @return true if success, false otherwise + */ +static bool +neg_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, int32 reg_no_src) +{ + bh_assert((kind_dst == JIT_REG_KIND_I32 && bytes_dst <= 4) + || kind_dst == JIT_REG_KIND_I64); + bh_assert(reg_no_src < 16); + switch (bytes_dst) { + case 1: + a.neg(regs_i8[reg_no_src]); + break; + case 2: + a.neg(regs_i16[reg_no_src]); + break; + case 4: + a.neg(regs_i32[reg_no_src]); + break; + case 8: + a.neg(regs_i64[reg_no_src]); + break; + default: + bh_assert(0); + return false; + } + return true; +} + +/** + * Encode atomic exchange and add + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data, + * could be 1(byte), 2(short), 4(int32), 8(int64), + * @param kind_dst the kind of data to move, could be I32, I64 + * @param reg_no_src the index of register hold operand value of add operation + * @param m_dst the dest memory operand + * + * @return true if success, false otherwise + */ +static bool +at_xadd(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, int32 reg_no_src, + x86::Mem &m_dst) +{ + bh_assert((kind_dst == JIT_REG_KIND_I32 && bytes_dst <= 4) + || kind_dst == JIT_REG_KIND_I64); + bh_assert(reg_no_src < 16); + switch (bytes_dst) { + case 1: + a.lock().xadd(m_dst, regs_i8[reg_no_src]); + break; + case 2: + a.lock().xadd(m_dst, regs_i16[reg_no_src]); + break; + case 4: + a.lock().xadd(m_dst, regs_i32[reg_no_src]); + break; + case 8: + a.lock().xadd(m_dst, regs_i64[reg_no_src]); + break; + default: + bh_assert(0); + return false; + } + + return true; +} + +/** + * Encode atomic rmw add: load value into a register from memory + * with reg base and reg offset, add loaded value with imm data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param data_src the immediate data(first operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(second operand&store back) + * @param offset the offset address of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_add_imm_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + void *data_src, int32 reg_no_base, + int32 offset) +{ + x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst); + Imm imm; + imm_set_value(imm, data_src, bytes_dst); + uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst); + return at_xadd(a, bytes_dst, kind_dst, reg_no_src, m) + && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst); +} + +/** + * Encode atomic rmw add: load value into a register from memory + * with reg base and reg offset, add loaded value with imm data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param data_src the immediate data(second operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(first operand&store back location) + * @param reg_no_offset the no of register that stores the offset of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_add_imm_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + void *data_src, int32 reg_no_base, + int32 reg_no_offset) +{ + x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst); + Imm imm; + imm_set_value(imm, data_src, bytes_dst); + uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst); + return at_xadd(a, bytes_dst, kind_dst, reg_no_src, m) + && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst); +} + +/** + * Encode atomic rmw add: load value into a register from memory + * with reg base and imm offset, add loaded value with reg data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param reg_no_src the no of register store the src data(second operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(first operand&store back location) + * @param offset the offset address of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_add_r_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + int32 reg_no_src, int32 reg_no_base, + int32 offset) +{ + x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst); + return at_xadd(a, bytes_dst, kind_dst, reg_no_src, m) + && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst); +} + +/** + * Encode atomic rmw add: load value into a register from memory + * with reg base and reg offset, add loaded value with reg data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param reg_no_src the no of register store the src data(second operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(first operand&store back) + * @param reg_no_offset the no of register that stores the offset of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_add_r_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + int32 reg_no_src, int32 reg_no_base, + int32 reg_no_offset) +{ + x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst); + return at_xadd(a, bytes_dst, kind_dst, reg_no_src, m) + && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst); +} + +/** + * Encode atomic rmw sub: load value into a register from memory + * with reg base and reg offset, sub loaded value with imm data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param data_src the immediate data(first operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(second operand&store back) + * @param offset the offset address of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_sub_imm_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + void *data_src, int32 reg_no_base, + int32 offset) +{ + x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst); + Imm imm; + imm_set_value(imm, data_src, bytes_dst); + uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst); + return neg_r(a, bytes_dst, kind_dst, reg_no_src) + && at_xadd(a, bytes_dst, kind_dst, reg_no_src, m) + && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst); +} + +/** + * Encode atomic rmw sub: load value into a register from memory + * with reg base and reg offset, sub loaded value with imm data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param data_src the immediate data(second operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(first operand&store back location) + * @param reg_no_offset the no of register that stores the offset of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_sub_imm_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + void *data_src, int32 reg_no_base, + int32 reg_no_offset) +{ + x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst); + Imm imm; + imm_set_value(imm, data_src, bytes_dst); + uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst); + return neg_r(a, bytes_dst, kind_dst, reg_no_src) + && at_xadd(a, bytes_dst, kind_dst, reg_no_src, m) + && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst); +} + +/** + * Encode atomic rmw sub: load value into a register from memory + * with reg base and imm offset, sub loaded value with reg data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param reg_no_src the no of register store the src data(second operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(first operand&store back location) + * @param offset the offset address of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_sub_r_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + int32 reg_no_src, int32 reg_no_base, + int32 offset) +{ + x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst); + return neg_r(a, bytes_dst, kind_dst, reg_no_src) + && at_xadd(a, bytes_dst, kind_dst, reg_no_src, m) + && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst); +} + +/** + * Encode atomic rmw sub: load value into a register from memory + * with reg base and reg offset, sub loaded value with reg data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param reg_no_src the no of register store the src data(second operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(first operand&store back) + * @param reg_no_offset the no of register that stores the offset of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_sub_r_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + int32 reg_no_src, int32 reg_no_base, + int32 reg_no_offset) +{ + x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst); + return neg_r(a, bytes_dst, kind_dst, reg_no_src) + && at_xadd(a, bytes_dst, kind_dst, reg_no_src, m) + && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst); +} + +/** + * Encode atomic rmw xchg: load value into a register from memory + * with reg base and reg offset, exchange loaded value with imm data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param data_src the immediate data(first operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(second operand&store back) + * @param offset the offset address of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_xchg_imm_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + void *data_src, int32 reg_no_base, + int32 offset) +{ + x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst); + Imm imm; + imm_set_value(imm, data_src, bytes_dst); + uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst); + return xchg_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src) + && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst); +} + +/** + * Encode atomic rmw xchg: load value into a register from memory + * with reg base and reg offset, exchange loaded value with imm data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param data_src the immediate data(second operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(first operand&store back location) + * @param reg_no_offset the no of register that stores the offset of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_xchg_imm_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + void *data_src, int32 reg_no_base, + int32 reg_no_offset) +{ + x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst); + Imm imm; + imm_set_value(imm, data_src, bytes_dst); + uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst); + return xchg_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src) + && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst); +} + +/** + * Encode atomic rmw xchg: load value into a register from memory + * with reg base and imm offset, exchange loaded value with reg data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param reg_no_src the no of register store the src data(second operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(first operand&store back location) + * @param offset the offset address of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_xchg_r_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + int32 reg_no_src, int32 reg_no_base, + int32 offset) +{ + x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst); + return xchg_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src) + && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst); +} + +/** + * Encode atomic rmw xchg: load value into a register from memory + * with reg base and reg offset, exchange loaded value with reg data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param reg_no_src the no of register store the src data(second operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(first operand&store back) + * @param reg_no_offset the no of register that stores the offset of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_xchg_r_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + int32 reg_no_src, int32 reg_no_base, + int32 reg_no_offset) +{ + x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst); + return xchg_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src) + && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst); +} + +/** + * Encode insn rmw logical operation: generate a loop to make sure it's atomic + * @param bin_op the operation, can be and/or/xor + * @param kind the data kind, can only be I32 or I64 + * @param bytes_dst the byte number of dst data + */ +#define AT_RMW_LOGICAL_LOOP(bin_op, kind, bytes_dst) \ + do { \ + bh_assert((kind_dst == JIT_REG_KIND_I32 && bytes_dst <= 4) \ + || kind_dst == JIT_REG_KIND_I64); \ + bh_assert(reg_no_src < 16 && reg_no_dst < 16); \ + /* read original value in memory(operand 1) to rax(expected) */ \ + mov_m_to_r(a, bytes_dst, kind_dst, false, REG_RAX_IDX, m_dst); \ + Label loop = a.newLabel(); \ + /* check whether loop is valid, and bind the loop label \ + * to the current position in the code. */ \ + if (!loop.isValid() || a.bind(loop) != kErrorOk) \ + return false; \ + /* move operand 1 to temp reg rb */ \ + mov_r_to_r(a, kind_dst, REG_RBX_IDX, REG_RAX_IDX); \ + /* actual logical operation with operand 2, result save to rbx */ \ + switch (bytes_dst) { \ + case 1: \ + a.bin_op##_(regs_i8[REG_RBX_IDX], regs_i8[reg_no_src]); \ + break; \ + case 2: \ + a.bin_op##_(regs_i16[REG_RBX_IDX], regs_i16[reg_no_src]); \ + break; \ + case 4: \ + a.bin_op##_(regs_i32[REG_RBX_IDX], regs_i32[reg_no_src]); \ + break; \ + case 8: \ + a.bin_op##_(regs_i64[REG_RBX_IDX], regs_i64[reg_no_src]); \ + break; \ + default: \ + bh_assert(0); \ + return false; \ + } \ + /* cmp with read value in RAX, try to change with result value in RBX \ + * REG, if change successfully, mem data is changed and exit loop(ZF \ + * is set) if not, loop again(ZF is clear) and tries to do logical ops \ + * atomically */ \ + at_cmpxchg(a, bytes_dst, kind_dst, REG_RBX_IDX, m_dst); \ + a.jne(loop); \ + return true; \ + } while (0) + +/** + * Encode atomic logical binary operation: and + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data, + * could be 1(byte), 2(short), 4(int32), 8(int64), + * @param kind_dst the kind of data to move, could be I32, I64 + * @param reg_no_dst the index of dest register + * @param reg_no_src the index of register hold operand value of add operation + * @param m_dst the dest memory operand + * + * @return true if success, false otherwise + */ +static bool +at_and(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, int32 reg_no_dst, + int32 reg_no_src, x86::Mem &m_dst) +{ + AT_RMW_LOGICAL_LOOP(and, kind_dst, bytes_dst); +} + +/** + * Encode atomic logical binary operation: or + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data, + * could be 1(byte), 2(short), 4(int32), 8(int64), + * @param kind_dst the kind of data to move, could be I32, I64 + * @param reg_no_dst the index of dest register + * @param reg_no_src the index of register hold operand value of add operation + * @param m_dst the dest memory operand + * + * @return true if success, false otherwise + */ +static bool +at_or(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, int32 reg_no_dst, + int32 reg_no_src, x86::Mem &m_dst) +{ + AT_RMW_LOGICAL_LOOP(or, kind_dst, bytes_dst); +} +/** + * Encode atomic logical binary operation: xor + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data, + * could be 1(byte), 2(short), 4(int32), 8(int64), + * @param kind_dst the kind of data to move, could be I32, I64 + * @param reg_no_dst the index of dest register + * @param reg_no_src the index of register hold operand value of add operation + * @param m_dst the dest memory operand + * + * @return true if success, false otherwise + */ +static bool +at_xor(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, int32 reg_no_dst, + int32 reg_no_src, x86::Mem &m_dst) +{ + AT_RMW_LOGICAL_LOOP(xor, kind_dst, bytes_dst); +} + +/** + * Encode atomic rmw and: load value into a register from memory with reg base + * and reg offset, bitwise and loaded value with imm data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param data_src the immediate data(first operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(second operand&store back) + * @param offset the offset address of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_and_imm_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + void *data_src, int32 reg_no_base, + int32 offset) +{ + x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst); + Imm imm; + imm_set_value(imm, data_src, bytes_dst); + uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst); + return at_and(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m) + && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst); +} + +/** + * Encode atomic rmw and: load value into a register from memory with reg base + * and reg offset, bitwise and loaded value with imm data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param data_src the immediate data(second operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(first operand&store back location) + * @param reg_no_offset the no of register that stores the offset of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_and_imm_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + void *data_src, int32 reg_no_base, + int32 reg_no_offset) +{ + x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst); + Imm imm; + imm_set_value(imm, data_src, bytes_dst); + uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst); + return at_and(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m) + && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst); +} + +/** + * Encode atomic rmw and: load value into a register from memory with reg base + * and imm offset, bitwise and value with reg data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param reg_no_src the no of register store the src data(second operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(first operand&store back location) + * @param offset the offset address of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_and_r_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + int32 reg_no_src, int32 reg_no_base, + int32 offset) +{ + x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst); + return at_and(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m) + && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst); +} + +/** + * Encode atomic rmw and: load value into a register from memory with reg base + * and reg offset, bitwise and loaded value with reg data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param reg_no_src the no of register store the src data(second operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(first operand&store back) + * @param reg_no_offset the no of register that stores the offset of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_and_r_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + int32 reg_no_src, int32 reg_no_base, + int32 reg_no_offset) +{ + x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst); + return at_and(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m) + && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst); +} + +/** + * Encode atomic rmw or: load value into a register from memory with reg base + * and reg offset, bitwise or loaded value with imm data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param data_src the immediate data(first operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(second operand&store back) + * @param offset the offset address of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_or_imm_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + void *data_src, int32 reg_no_base, int32 offset) +{ + x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst); + Imm imm; + imm_set_value(imm, data_src, bytes_dst); + uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst); + return at_or(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m) + && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst); +} + +/** + * Encode atomic rmw or: load value into a register from memory with reg base + * and reg offset, bitwise or loaded value with imm data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param data_src the immediate data(second operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(first operand&store back location) + * @param reg_no_offset the no of register that stores the offset of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_or_imm_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, void *data_src, + int32 reg_no_base, int32 reg_no_offset) +{ + x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst); + Imm imm; + imm_set_value(imm, data_src, bytes_dst); + uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst); + return at_or(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m) + && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst); +} + +/** + * Encode atomic rmw or: load value into a register from memory with reg base + * and imm offset, bitwise or loaded value with reg data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param reg_no_src the no of register store the src data(second operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(first operand&store back location) + * @param offset the offset address of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_or_r_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + int32 reg_no_src, int32 reg_no_base, int32 offset) +{ + x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst); + return at_or(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m) + && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst); +} + +/** + * Encode atomic rmw or: load value into a register from memory with reg base + * and reg offset, bitwise or loaded value with reg data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param reg_no_src the no of register store the src data(second operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(first operand&store back) + * @param reg_no_offset the no of register that stores the offset of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_or_r_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, int32 reg_no_src, + int32 reg_no_base, int32 reg_no_offset) +{ + x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst); + return at_or(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m) + && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst); +} + +/** + * Encode atomic rmw xor: load value into a register from memory with reg base + * and reg offset, bitwise xor loaded value with imm data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param data_src the immediate data(first operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(second operand&store back) + * @param offset the offset address of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_xor_imm_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + void *data_src, int32 reg_no_base, + int32 offset) +{ + x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst); + Imm imm; + imm_set_value(imm, data_src, bytes_dst); + uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst); + return at_xor(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m) + && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst); +} + +/** + * Encode atomic rmw xor: load value into a register from memory with reg base + * and reg offset, bitwise xor loaded value with imm data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param data_src the immediate data(second operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(first operand&store back location) + * @param reg_no_offset the no of register that stores the offset of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_xor_imm_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + void *data_src, int32 reg_no_base, + int32 reg_no_offset) +{ + x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst); + Imm imm; + imm_set_value(imm, data_src, bytes_dst); + uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst); + return at_xor(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m) + && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst); +} + +/** + * Encode atomic rmw xor: load value into a register from memory with reg base + * and imm offset, bitwise xor exchange loaded value with reg data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param reg_no_src the no of register store the src data(second operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(first operand&store back location) + * @param offset the offset address of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_xor_r_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + int32 reg_no_src, int32 reg_no_base, + int32 offset) +{ + x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst); + return at_xor(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m) + && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst); +} + +/** + * Encode atomic rmw xor: load value into a register from memory with reg base + * and reg offset, bitwise xor loaded value with reg data, store back + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data to actual operated on(load, + * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64) + * @param reg_no_dst the no of register that stores the returned value + * @param reg_no_src the no of register store the src data(second operand) + * @param reg_no_base the no of register that stores the base address + * of src&dst memory(first operand&store back) + * @param reg_no_offset the no of register that stores the offset of the memory + * @return true if success, false otherwise + */ +static bool +at_rmw_xor_r_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, + uint32 kind_dst, int32 reg_no_dst, + int32 reg_no_src, int32 reg_no_base, + int32 reg_no_offset) +{ + x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst); + return at_xor(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m) + && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst); +} + +/** + * Encode insn rmw RMW_type r0, r1, r2, r3 + * @param bin_op the operation, can be add/sub/xchg/and/or/xor + * @param kind the data kind, can only be I32 or I64 + * @param bytes_dst the byte number of dst data + */ +#define AT_RMW_R_R_R_R(bin_op, kind, type, bytes_dst) \ + do { \ + type data_src = 0; \ + int32 reg_no_dst = 0, reg_no_src = 0, reg_no_base = 0, \ + reg_no_offset = 0; \ + int32 offset = 0; \ + bool _ret = false; \ + if (jit_reg_is_const(r3)) { \ + CHECK_KIND(r3, JIT_REG_KIND_I32); \ + } \ + else { \ + CHECK_KIND(r3, JIT_REG_KIND_I64); \ + } \ + /* r0: read/return value r2: memory base addr can't be const */ \ + /* already check it's not const in LOAD_4ARGS(); */ \ + reg_no_dst = jit_reg_no(r0); \ + CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0)); \ + /* mem_data base address has to be non-const */ \ + CHECK_NCONST(r2); \ + reg_no_base = jit_reg_no(r2); \ + CHECK_REG_NO(reg_no_base, jit_reg_kind(r2)); \ + /* r1: source operand value r3: offset can be const */ \ + if (jit_reg_is_const(r1)) \ + data_src = jit_cc_get_const_##kind(cc, r1); \ + else { \ + reg_no_src = jit_reg_no(r1); \ + CHECK_REG_NO(reg_no_src, jit_reg_kind(r1)); \ + } \ + if (jit_reg_is_const(r3)) \ + offset = jit_cc_get_const_I32(cc, r3); \ + else { \ + reg_no_offset = jit_reg_no(r3); \ + CHECK_REG_NO(reg_no_offset, jit_reg_kind(r3)); \ + } \ + \ + if (jit_reg_is_const(r1)) { \ + if (jit_reg_is_const(r3)) \ + _ret = at_rmw_##bin_op##_imm_base_r_offset_imm( \ + a, bytes_dst, JIT_REG_KIND_##kind, reg_no_dst, &data_src, \ + reg_no_base, offset); \ + else \ + _ret = at_rmw_##bin_op##_imm_base_r_offset_r( \ + a, bytes_dst, JIT_REG_KIND_##kind, reg_no_dst, &data_src, \ + reg_no_base, reg_no_offset); \ + } \ + else { \ + if (jit_reg_is_const(r3)) \ + _ret = at_rmw_##bin_op##_r_base_r_offset_imm( \ + a, bytes_dst, JIT_REG_KIND_##kind, reg_no_dst, reg_no_src, \ + reg_no_base, offset); \ + else \ + _ret = at_rmw_##bin_op##_r_base_r_offset_r( \ + a, bytes_dst, JIT_REG_KIND_##kind, reg_no_dst, reg_no_src, \ + reg_no_base, reg_no_offset); \ + } \ + if (!_ret) \ + GOTO_FAIL; \ + } while (0) + +/** + * Encode insn mfence + **/ +static void +fence(x86::Assembler &a) +{ + a.mfence(); +} + +/** + * Encode insn fence + */ +#define FENCE() fence(a) + +#endif + bool jit_codegen_gen_native(JitCompContext *cc) { + bool atomic; JitBasicBlock *block; JitInsn *insn; - JitReg r0, r1, r2, r3; + JitReg r0, r1, r2, r3, r4; JmpInfo jmp_info_head; bh_list *jmp_info_list = (bh_list *)&jmp_info_head; uint32 label_index, label_num, i; @@ -6615,33 +7941,41 @@ jit_codegen_gen_native(JitCompContext *cc) case JIT_OP_STI8: LOAD_3ARGS_NO_ASSIGN(); - ST_R_R_R(I32, int32, 1); + atomic = insn->flags_u8 & 0x1; + ST_R_R_R(I32, int32, 1, atomic); break; case JIT_OP_STI16: LOAD_3ARGS_NO_ASSIGN(); - ST_R_R_R(I32, int32, 2); + atomic = insn->flags_u8 & 0x1; + ST_R_R_R(I32, int32, 2, atomic); break; case JIT_OP_STI32: LOAD_3ARGS_NO_ASSIGN(); - ST_R_R_R(I32, int32, 4); + atomic = insn->flags_u8 & 0x1; + ST_R_R_R(I32, int32, 4, atomic); break; case JIT_OP_STI64: + LOAD_3ARGS_NO_ASSIGN(); + atomic = insn->flags_u8 & 0x1; + ST_R_R_R(I64, int64, 8, atomic); + break; + case JIT_OP_STPTR: LOAD_3ARGS_NO_ASSIGN(); - ST_R_R_R(I64, int64, 8); + ST_R_R_R(I64, int64, 8, false); break; case JIT_OP_STF32: LOAD_3ARGS_NO_ASSIGN(); - ST_R_R_R(F32, float32, 4); + ST_R_R_R(F32, float32, 4, false); break; case JIT_OP_STF64: LOAD_3ARGS_NO_ASSIGN(); - ST_R_R_R(F64, float64, 8); + ST_R_R_R(F64, float64, 8, false); break; case JIT_OP_JMP: @@ -6720,6 +8054,254 @@ jit_codegen_gen_native(JitCompContext *cc) CAST_R_R(I64, F64, i64, f64, double); break; +#if WASM_ENABLE_SHARED_MEMORY != 0 + case JIT_OP_AT_CMPXCHGU8: + LOAD_4ARGS_NO_ASSIGN(); + if (jit_reg_kind(r0) == JIT_REG_KIND_I32) + CMPXCHG_R_R_R_R_R(I32, int32, 1); + else + CMPXCHG_R_R_R_R_R(I64, int64, 1); + break; + + case JIT_OP_AT_CMPXCHGU16: + LOAD_4ARGS_NO_ASSIGN(); + if (jit_reg_kind(r0) == JIT_REG_KIND_I32) + CMPXCHG_R_R_R_R_R(I32, int32, 2); + else + CMPXCHG_R_R_R_R_R(I64, int64, 2); + break; + + case JIT_OP_AT_CMPXCHGI32: + LOAD_4ARGS_NO_ASSIGN(); + CMPXCHG_R_R_R_R_R(I32, int32, 4); + break; + + case JIT_OP_AT_CMPXCHGU32: + LOAD_4ARGS_NO_ASSIGN(); + CMPXCHG_R_R_R_R_R(I64, int32, 4); + break; + + case JIT_OP_AT_CMPXCHGI64: + LOAD_4ARGS_NO_ASSIGN(); + CMPXCHG_R_R_R_R_R(I64, int64, 8); + break; + + case JIT_OP_AT_ADDU8: + LOAD_4ARGS(); + bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32 + || jit_reg_kind(r0) == JIT_REG_KIND_I64); + if (jit_reg_kind(r0) == JIT_REG_KIND_I32) + AT_RMW_R_R_R_R(add, I32, int32, 1); + else + AT_RMW_R_R_R_R(add, I64, int64, 1); + break; + + case JIT_OP_AT_ADDU16: + LOAD_4ARGS(); + bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32 + || jit_reg_kind(r0) == JIT_REG_KIND_I64); + if (jit_reg_kind(r0) == JIT_REG_KIND_I32) + AT_RMW_R_R_R_R(add, I32, int32, 2); + else + AT_RMW_R_R_R_R(add, I64, int64, 2); + break; + + case JIT_OP_AT_ADDI32: + LOAD_4ARGS(); + AT_RMW_R_R_R_R(add, I32, int32, 4); + break; + + case JIT_OP_AT_ADDU32: + LOAD_4ARGS(); + AT_RMW_R_R_R_R(add, I64, int64, 4); + break; + + case JIT_OP_AT_ADDI64: + LOAD_4ARGS(); + AT_RMW_R_R_R_R(add, I64, int64, 8); + break; + + case JIT_OP_AT_SUBU8: + LOAD_4ARGS(); + bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32 + || jit_reg_kind(r0) == JIT_REG_KIND_I64); + if (jit_reg_kind(r0) == JIT_REG_KIND_I32) + AT_RMW_R_R_R_R(sub, I32, int32, 1); + else + AT_RMW_R_R_R_R(sub, I64, int64, 1); + break; + + case JIT_OP_AT_SUBU16: + LOAD_4ARGS(); + bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32 + || jit_reg_kind(r0) == JIT_REG_KIND_I64); + if (jit_reg_kind(r0) == JIT_REG_KIND_I32) + AT_RMW_R_R_R_R(sub, I32, int32, 2); + else + AT_RMW_R_R_R_R(sub, I64, int64, 2); + break; + + case JIT_OP_AT_SUBI32: + LOAD_4ARGS(); + AT_RMW_R_R_R_R(sub, I32, int32, 4); + break; + + case JIT_OP_AT_SUBU32: + LOAD_4ARGS(); + AT_RMW_R_R_R_R(sub, I64, int64, 4); + break; + + case JIT_OP_AT_SUBI64: + LOAD_4ARGS(); + AT_RMW_R_R_R_R(sub, I64, int64, 8); + break; + + case JIT_OP_AT_XCHGU8: + LOAD_4ARGS(); + bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32 + || jit_reg_kind(r0) == JIT_REG_KIND_I64); + if (jit_reg_kind(r0) == JIT_REG_KIND_I32) + AT_RMW_R_R_R_R(xchg, I32, int32, 1); + else + AT_RMW_R_R_R_R(xchg, I64, int64, 1); + break; + + case JIT_OP_AT_XCHGU16: + LOAD_4ARGS(); + bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32 + || jit_reg_kind(r0) == JIT_REG_KIND_I64); + if (jit_reg_kind(r0) == JIT_REG_KIND_I32) + AT_RMW_R_R_R_R(xchg, I32, int32, 2); + else + AT_RMW_R_R_R_R(xchg, I64, int64, 2); + break; + + case JIT_OP_AT_XCHGI32: + LOAD_4ARGS(); + AT_RMW_R_R_R_R(xchg, I32, int32, 4); + break; + + case JIT_OP_AT_XCHGU32: + LOAD_4ARGS(); + AT_RMW_R_R_R_R(xchg, I64, int64, 4); + break; + + case JIT_OP_AT_XCHGI64: + LOAD_4ARGS(); + AT_RMW_R_R_R_R(xchg, I64, int64, 8); + break; + + case JIT_OP_AT_ANDU8: + LOAD_4ARGS(); + bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32 + || jit_reg_kind(r0) == JIT_REG_KIND_I64); + if (jit_reg_kind(r0) == JIT_REG_KIND_I32) + AT_RMW_R_R_R_R(and, I32, int32, 1); + else + AT_RMW_R_R_R_R(and, I64, int64, 1); + break; + + case JIT_OP_AT_ANDU16: + LOAD_4ARGS(); + bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32 + || jit_reg_kind(r0) == JIT_REG_KIND_I64); + if (jit_reg_kind(r0) == JIT_REG_KIND_I32) + AT_RMW_R_R_R_R(and, I32, int32, 2); + else + AT_RMW_R_R_R_R(and, I64, int64, 2); + break; + + case JIT_OP_AT_ANDI32: + LOAD_4ARGS(); + AT_RMW_R_R_R_R(and, I32, int32, 4); + break; + + case JIT_OP_AT_ANDU32: + LOAD_4ARGS(); + AT_RMW_R_R_R_R(and, I64, int64, 4); + break; + + case JIT_OP_AT_ANDI64: + LOAD_4ARGS(); + AT_RMW_R_R_R_R(and, I64, int64, 8); + break; + + case JIT_OP_AT_ORU8: + LOAD_4ARGS(); + bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32 + || jit_reg_kind(r0) == JIT_REG_KIND_I64); + if (jit_reg_kind(r0) == JIT_REG_KIND_I32) + AT_RMW_R_R_R_R(or, I32, int32, 1); + else + AT_RMW_R_R_R_R(or, I64, int64, 1); + break; + + case JIT_OP_AT_ORU16: + LOAD_4ARGS(); + bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32 + || jit_reg_kind(r0) == JIT_REG_KIND_I64); + if (jit_reg_kind(r0) == JIT_REG_KIND_I32) + AT_RMW_R_R_R_R(or, I32, int32, 2); + else + AT_RMW_R_R_R_R(or, I64, int64, 2); + break; + + case JIT_OP_AT_ORI32: + LOAD_4ARGS(); + AT_RMW_R_R_R_R(or, I32, int32, 4); + break; + + case JIT_OP_AT_ORU32: + LOAD_4ARGS(); + AT_RMW_R_R_R_R(or, I64, int64, 4); + break; + + case JIT_OP_AT_ORI64: + LOAD_4ARGS(); + AT_RMW_R_R_R_R(or, I64, int64, 8); + break; + + case JIT_OP_AT_XORU8: + LOAD_4ARGS(); + bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32 + || jit_reg_kind(r0) == JIT_REG_KIND_I64); + if (jit_reg_kind(r0) == JIT_REG_KIND_I32) + AT_RMW_R_R_R_R(xor, I32, int32, 1); + else + AT_RMW_R_R_R_R(xor, I64, int64, 1); + break; + + case JIT_OP_AT_XORU16: + LOAD_4ARGS(); + bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32 + || jit_reg_kind(r0) == JIT_REG_KIND_I64); + if (jit_reg_kind(r0) == JIT_REG_KIND_I32) + AT_RMW_R_R_R_R(xor, I32, int32, 2); + else + AT_RMW_R_R_R_R(xor, I64, int64, 2); + break; + + case JIT_OP_AT_XORI32: + LOAD_4ARGS(); + AT_RMW_R_R_R_R(xor, I32, int32, 4); + break; + + case JIT_OP_AT_XORU32: + LOAD_4ARGS(); + AT_RMW_R_R_R_R(xor, I64, int64, 4); + break; + + case JIT_OP_AT_XORI64: + LOAD_4ARGS(); + AT_RMW_R_R_R_R(xor, I64, int64, 8); + break; + + case JIT_OP_FENCE: + FENCE(); + break; + +#endif + default: jit_set_last_error_v(cc, "unsupported JIT opcode 0x%2x", insn->opcode); diff --git a/core/iwasm/fast-jit/fe/jit_emit_control.c b/core/iwasm/fast-jit/fe/jit_emit_control.c index f3aa31f3..f7536c73 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_control.c +++ b/core/iwasm/fast-jit/fe/jit_emit_control.c @@ -904,6 +904,42 @@ check_copy_arities(const JitBlock *block_dst, JitFrame *jit_frame) } } +#if WASM_ENABLE_THREAD_MGR != 0 +bool +jit_check_suspend_flags(JitCompContext *cc) +{ + JitReg exec_env, suspend_flags, terminate_flag, offset; + JitBasicBlock *terminate_block, *cur_basic_block; + JitFrame *jit_frame = cc->jit_frame; + + cur_basic_block = cc->cur_basic_block; + terminate_block = jit_cc_new_basic_block(cc, 0); + if (!terminate_block) { + return false; + } + + gen_commit_values(jit_frame, jit_frame->lp, jit_frame->sp); + exec_env = cc->exec_env_reg; + suspend_flags = jit_cc_new_reg_I32(cc); + terminate_flag = jit_cc_new_reg_I32(cc); + + offset = jit_cc_new_const_I32(cc, offsetof(WASMExecEnv, suspend_flags)); + GEN_INSN(LDI32, suspend_flags, exec_env, offset); + GEN_INSN(AND, terminate_flag, suspend_flags, NEW_CONST(I32, 1)); + + GEN_INSN(CMP, cc->cmp_reg, terminate_flag, NEW_CONST(I32, 0)); + GEN_INSN(BNE, cc->cmp_reg, jit_basic_block_label(terminate_block), 0); + + cc->cur_basic_block = terminate_block; + GEN_INSN(RETURN, NEW_CONST(I32, 0)); + + cc->cur_basic_block = cur_basic_block; + + return true; +} + +#endif + static bool handle_op_br(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) { @@ -986,6 +1022,13 @@ fail: bool jit_compile_op_br(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) { + +#if WASM_ENABLE_THREAD_MGR != 0 + /* Insert suspend check point */ + if (!jit_check_suspend_flags(cc)) + return false; +#endif + return handle_op_br(cc, br_depth, p_frame_ip) && handle_next_reachable_block(cc, p_frame_ip); } @@ -1105,6 +1148,12 @@ jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, jit_insn_delete(insn_select); } +#if WASM_ENABLE_THREAD_MGR != 0 + /* Insert suspend check point */ + if (!jit_check_suspend_flags(cc)) + return false; +#endif + SET_BUILDER_POS(if_basic_block); SET_BB_BEGIN_BCIP(if_basic_block, *p_frame_ip - 1); @@ -1144,6 +1193,12 @@ jit_compile_op_br_table(JitCompContext *cc, uint32 *br_depths, uint32 br_count, uint32 i = 0; JitOpndLookupSwitch *opnd = NULL; +#if WASM_ENABLE_THREAD_MGR != 0 + /* Insert suspend check point */ + if (!jit_check_suspend_flags(cc)) + return false; +#endif + cur_basic_block = cc->cur_basic_block; POP_I32(value); diff --git a/core/iwasm/fast-jit/fe/jit_emit_function.c b/core/iwasm/fast-jit/fe/jit_emit_function.c index a89f2ff5..3ac9e3ed 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_function.c +++ b/core/iwasm/fast-jit/fe/jit_emit_function.c @@ -5,6 +5,7 @@ #include "jit_emit_function.h" #include "jit_emit_exception.h" +#include "jit_emit_control.h" #include "../jit_frontend.h" #include "../jit_codegen.h" #include "../../interpreter/wasm_runtime.h" @@ -232,6 +233,12 @@ jit_compile_op_call(JitCompContext *cc, uint32 func_idx, bool tail_call) bool is_pointer_arg; bool return_value = false; +#if WASM_ENABLE_THREAD_MGR != 0 + /* Insert suspend check point */ + if (!jit_check_suspend_flags(cc)) + goto fail; +#endif + if (func_idx < wasm_module->import_function_count) { /* The function to call is an import function */ func_import = &wasm_module->import_functions[func_idx].u.function; @@ -275,6 +282,12 @@ jit_compile_op_call(JitCompContext *cc, uint32 func_idx, bool tail_call) goto fail; } +#if WASM_ENABLE_THREAD_MGR != 0 + /* Insert suspend check point */ + if (!jit_check_suspend_flags(cc)) + goto fail; +#endif + return true; } @@ -416,6 +429,12 @@ jit_compile_op_call(JitCompContext *cc, uint32 func_idx, bool tail_call) } } +#if WASM_ENABLE_THREAD_MGR != 0 + /* Insert suspend check point */ + if (!jit_check_suspend_flags(cc)) + goto fail; +#endif + /* Clear part of memory regs and table regs as their values may be changed in the function call */ if (cc->cur_wasm_module->possible_memory_grow) @@ -540,6 +559,12 @@ jit_compile_op_call_indirect(JitCompContext *cc, uint32 type_idx, GEN_INSN(STI32, func_idx, cc->exec_env_reg, NEW_CONST(I32, offsetof(WASMExecEnv, jit_cache) + 4)); +#if WASM_ENABLE_THREAD_MGR != 0 + /* Insert suspend check point */ + if (!jit_check_suspend_flags(cc)) + goto fail; +#endif + block_import = jit_cc_new_basic_block(cc, 0); block_nonimport = jit_cc_new_basic_block(cc, 0); func_return = jit_cc_new_basic_block(cc, 0); @@ -742,6 +767,12 @@ jit_compile_op_call_indirect(JitCompContext *cc, uint32 type_idx, goto fail; } +#if WASM_ENABLE_THREAD_MGR != 0 + /* Insert suspend check point */ + if (!jit_check_suspend_flags(cc)) + goto fail; +#endif + /* Clear part of memory regs and table regs as their values may be changed in the function call */ if (cc->cur_wasm_module->possible_memory_grow) diff --git a/core/iwasm/fast-jit/fe/jit_emit_memory.c b/core/iwasm/fast-jit/fe/jit_emit_memory.c index b71d98ba..9635d4e5 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_memory.c +++ b/core/iwasm/fast-jit/fe/jit_emit_memory.c @@ -9,6 +9,7 @@ #include "../jit_frontend.h" #include "../jit_codegen.h" #include "../../interpreter/wasm_runtime.h" +#include "jit_emit_control.h" #ifndef OS_ENABLE_HW_BOUND_CHECK static JitReg @@ -60,6 +61,14 @@ fail: } #endif +#if WASM_ENABLE_SHARED_MEMORY != 0 +static void +set_load_or_store_atomic(JitInsn *load_or_store_inst) +{ + load_or_store_inst->flags_u8 |= 0x1; +} +#endif + #if UINTPTR_MAX == UINT64_MAX static JitReg check_and_seek_on_64bit_platform(JitCompContext *cc, JitReg addr, JitReg offset, @@ -177,23 +186,36 @@ fail: return 0; } -#define CHECK_ALIGNMENT(maddr, memory_data, offset1) \ +#if UINTPTR_MAX == UINT64_MAX +#define CHECK_ALIGNMENT(offset1) \ do { \ - GEN_INSN(ADD, maddr, memory_data, offset1); \ JitReg align_mask = NEW_CONST(I64, ((uint64)1 << align) - 1); \ JitReg AND_res = jit_cc_new_reg_I64(cc); \ - GEN_INSN(AND, AND_res, maddr, align_mask); \ + GEN_INSN(AND, AND_res, offset1, align_mask); \ GEN_INSN(CMP, cc->cmp_reg, AND_res, NEW_CONST(I64, 0)); \ if (!jit_emit_exception(cc, EXCE_UNALIGNED_ATOMIC, JIT_OP_BNE, \ cc->cmp_reg, NULL)) \ goto fail; \ } while (0) +#else +#define CHECK_ALIGNMENT(offset1) \ + do { \ + JitReg align_mask = NEW_CONST(I32, (1 << align) - 1); \ + JitReg AND_res = jit_cc_new_reg_I32(cc); \ + GEN_INSN(AND, AND_res, offset1, align_mask); \ + GEN_INSN(CMP, cc->cmp_reg, AND_res, NEW_CONST(I32, 0)); \ + if (!jit_emit_exception(cc, EXCE_UNALIGNED_ATOMIC, JIT_OP_BNE, \ + cc->cmp_reg, NULL)) \ + goto fail; \ + } while (0) +#endif bool jit_compile_op_i32_load(JitCompContext *cc, uint32 align, uint32 offset, uint32 bytes, bool sign, bool atomic) { JitReg addr, offset1, value, memory_data; + JitInsn *load_insn = NULL; POP_I32(addr); @@ -201,6 +223,11 @@ jit_compile_op_i32_load(JitCompContext *cc, uint32 align, uint32 offset, if (!offset1) { goto fail; } +#if WASM_ENABLE_SHARED_MEMORY != 0 + if (atomic) { + CHECK_ALIGNMENT(offset1); + } +#endif memory_data = get_memory_data_reg(cc->jit_frame, 0); @@ -209,30 +236,30 @@ jit_compile_op_i32_load(JitCompContext *cc, uint32 align, uint32 offset, case 1: { if (sign) { - GEN_INSN(LDI8, value, memory_data, offset1); + load_insn = GEN_INSN(LDI8, value, memory_data, offset1); } else { - GEN_INSN(LDU8, value, memory_data, offset1); + load_insn = GEN_INSN(LDU8, value, memory_data, offset1); } break; } case 2: { if (sign) { - GEN_INSN(LDI16, value, memory_data, offset1); + load_insn = GEN_INSN(LDI16, value, memory_data, offset1); } else { - GEN_INSN(LDU16, value, memory_data, offset1); + load_insn = GEN_INSN(LDU16, value, memory_data, offset1); } break; } case 4: { if (sign) { - GEN_INSN(LDI32, value, memory_data, offset1); + load_insn = GEN_INSN(LDI32, value, memory_data, offset1); } else { - GEN_INSN(LDU32, value, memory_data, offset1); + load_insn = GEN_INSN(LDU32, value, memory_data, offset1); } break; } @@ -243,6 +270,13 @@ jit_compile_op_i32_load(JitCompContext *cc, uint32 align, uint32 offset, } } +#if WASM_ENABLE_SHARED_MEMORY != 0 + if (atomic && load_insn) + set_load_or_store_atomic(load_insn); +#else + (void)load_insn; +#endif + PUSH_I32(value); return true; fail: @@ -254,6 +288,7 @@ jit_compile_op_i64_load(JitCompContext *cc, uint32 align, uint32 offset, uint32 bytes, bool sign, bool atomic) { JitReg addr, offset1, value, memory_data; + JitInsn *load_insn = NULL; POP_I32(addr); @@ -261,6 +296,11 @@ jit_compile_op_i64_load(JitCompContext *cc, uint32 align, uint32 offset, if (!offset1) { goto fail; } +#if WASM_ENABLE_SHARED_MEMORY != 0 + if (atomic) { + CHECK_ALIGNMENT(offset1); + } +#endif memory_data = get_memory_data_reg(cc->jit_frame, 0); @@ -269,40 +309,40 @@ jit_compile_op_i64_load(JitCompContext *cc, uint32 align, uint32 offset, case 1: { if (sign) { - GEN_INSN(LDI8, value, memory_data, offset1); + load_insn = GEN_INSN(LDI8, value, memory_data, offset1); } else { - GEN_INSN(LDU8, value, memory_data, offset1); + load_insn = GEN_INSN(LDU8, value, memory_data, offset1); } break; } case 2: { if (sign) { - GEN_INSN(LDI16, value, memory_data, offset1); + load_insn = GEN_INSN(LDI16, value, memory_data, offset1); } else { - GEN_INSN(LDU16, value, memory_data, offset1); + load_insn = GEN_INSN(LDU16, value, memory_data, offset1); } break; } case 4: { if (sign) { - GEN_INSN(LDI32, value, memory_data, offset1); + load_insn = GEN_INSN(LDI32, value, memory_data, offset1); } else { - GEN_INSN(LDU32, value, memory_data, offset1); + load_insn = GEN_INSN(LDU32, value, memory_data, offset1); } break; } case 8: { if (sign) { - GEN_INSN(LDI64, value, memory_data, offset1); + load_insn = GEN_INSN(LDI64, value, memory_data, offset1); } else { - GEN_INSN(LDU64, value, memory_data, offset1); + load_insn = GEN_INSN(LDU64, value, memory_data, offset1); } break; } @@ -313,6 +353,13 @@ jit_compile_op_i64_load(JitCompContext *cc, uint32 align, uint32 offset, } } +#if WASM_ENABLE_SHARED_MEMORY != 0 + if (atomic && load_insn) + set_load_or_store_atomic(load_insn); +#else + (void)load_insn; +#endif + PUSH_I64(value); return true; fail: @@ -370,6 +417,7 @@ jit_compile_op_i32_store(JitCompContext *cc, uint32 align, uint32 offset, uint32 bytes, bool atomic) { JitReg value, addr, offset1, memory_data; + JitInsn *store_insn = NULL; POP_I32(value); POP_I32(addr); @@ -378,23 +426,28 @@ jit_compile_op_i32_store(JitCompContext *cc, uint32 align, uint32 offset, if (!offset1) { goto fail; } +#if WASM_ENABLE_SHARED_MEMORY != 0 + if (atomic) { + CHECK_ALIGNMENT(offset1); + } +#endif memory_data = get_memory_data_reg(cc->jit_frame, 0); switch (bytes) { case 1: { - GEN_INSN(STI8, value, memory_data, offset1); + store_insn = GEN_INSN(STI8, value, memory_data, offset1); break; } case 2: { - GEN_INSN(STI16, value, memory_data, offset1); + store_insn = GEN_INSN(STI16, value, memory_data, offset1); break; } case 4: { - GEN_INSN(STI32, value, memory_data, offset1); + store_insn = GEN_INSN(STI32, value, memory_data, offset1); break; } default: @@ -403,6 +456,12 @@ jit_compile_op_i32_store(JitCompContext *cc, uint32 align, uint32 offset, goto fail; } } +#if WASM_ENABLE_SHARED_MEMORY != 0 + if (atomic && store_insn) + set_load_or_store_atomic(store_insn); +#else + (void)store_insn; +#endif return true; fail: @@ -414,6 +473,7 @@ jit_compile_op_i64_store(JitCompContext *cc, uint32 align, uint32 offset, uint32 bytes, bool atomic) { JitReg value, addr, offset1, memory_data; + JitInsn *store_insn = NULL; POP_I64(value); POP_I32(addr); @@ -422,6 +482,11 @@ jit_compile_op_i64_store(JitCompContext *cc, uint32 align, uint32 offset, if (!offset1) { goto fail; } +#if WASM_ENABLE_SHARED_MEMORY != 0 + if (atomic) { + CHECK_ALIGNMENT(offset1); + } +#endif if (jit_reg_is_const(value) && bytes < 8) { value = NEW_CONST(I32, (int32)jit_cc_get_const_I64(cc, value)); @@ -432,22 +497,22 @@ jit_compile_op_i64_store(JitCompContext *cc, uint32 align, uint32 offset, switch (bytes) { case 1: { - GEN_INSN(STI8, value, memory_data, offset1); + store_insn = GEN_INSN(STI8, value, memory_data, offset1); break; } case 2: { - GEN_INSN(STI16, value, memory_data, offset1); + store_insn = GEN_INSN(STI16, value, memory_data, offset1); break; } case 4: { - GEN_INSN(STI32, value, memory_data, offset1); + store_insn = GEN_INSN(STI32, value, memory_data, offset1); break; } case 8: { - GEN_INSN(STI64, value, memory_data, offset1); + store_insn = GEN_INSN(STI64, value, memory_data, offset1); break; } default: @@ -456,6 +521,12 @@ jit_compile_op_i64_store(JitCompContext *cc, uint32 align, uint32 offset, goto fail; } } +#if WASM_ENABLE_SHARED_MEMORY != 0 + if (atomic && store_insn) + set_load_or_store_atomic(store_insn); +#else + (void)store_insn; +#endif return true; fail: @@ -774,10 +845,153 @@ fail: #endif #if WASM_ENABLE_SHARED_MEMORY != 0 +#define GEN_AT_RMW_INSN(op, op_type, bytes, result, value, memory_data, \ + offset1) \ + do { \ + switch (bytes) { \ + case 1: \ + { \ + insn = GEN_INSN(AT_##op##U8, result, value, memory_data, \ + offset1); \ + break; \ + } \ + case 2: \ + { \ + insn = GEN_INSN(AT_##op##U16, result, value, memory_data, \ + offset1); \ + break; \ + } \ + case 4: \ + { \ + if (op_type == VALUE_TYPE_I32) \ + insn = GEN_INSN(AT_##op##I32, result, value, memory_data, \ + offset1); \ + else \ + insn = GEN_INSN(AT_##op##U32, result, value, memory_data, \ + offset1); \ + break; \ + } \ + case 8: \ + { \ + insn = GEN_INSN(AT_##op##I64, result, value, memory_data, \ + offset1); \ + break; \ + } \ + default: \ + { \ + bh_assert(0); \ + goto fail; \ + } \ + } \ + } while (0) + bool jit_compile_op_atomic_rmw(JitCompContext *cc, uint8 atomic_op, uint8 op_type, uint32 align, uint32 offset, uint32 bytes) { + JitReg addr, offset1, memory_data, value, result, eax_hreg, rax_hreg, + ebx_hreg, rbx_hreg; + JitInsn *insn = NULL; + bool is_i32 = op_type == VALUE_TYPE_I32; + bool is_logical_op = atomic_op == AtomicRMWBinOpAnd + || atomic_op == AtomicRMWBinOpOr + || atomic_op == AtomicRMWBinOpXor; + + /* currently we only implement atomic rmw on x86-64 target */ +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) + + /* For atomic logical binary ops, it implicitly uses rax in cmpxchg + * instruction and implicitly uses rbx for storing temp value in the + * generated loop */ + eax_hreg = jit_codegen_get_hreg_by_name("eax"); + rax_hreg = jit_codegen_get_hreg_by_name("rax"); + ebx_hreg = jit_codegen_get_hreg_by_name("ebx"); + rbx_hreg = jit_codegen_get_hreg_by_name("rbx"); + + bh_assert(op_type == VALUE_TYPE_I32 || op_type == VALUE_TYPE_I64); + if (op_type == VALUE_TYPE_I32) { + POP_I32(value); + } + else { + POP_I64(value); + } + POP_I32(addr); + + offset1 = check_and_seek(cc, addr, offset, bytes); + if (!offset1) { + goto fail; + } + CHECK_ALIGNMENT(offset1); + + memory_data = get_memory_data_reg(cc->jit_frame, 0); + + if (op_type == VALUE_TYPE_I32) + result = jit_cc_new_reg_I32(cc); + else + result = jit_cc_new_reg_I64(cc); + + switch (atomic_op) { + case AtomicRMWBinOpAdd: + { + GEN_AT_RMW_INSN(ADD, op_type, bytes, result, value, memory_data, + offset1); + break; + } + case AtomicRMWBinOpSub: + { + GEN_AT_RMW_INSN(SUB, op_type, bytes, result, value, memory_data, + offset1); + break; + } + case AtomicRMWBinOpAnd: + { + GEN_AT_RMW_INSN(AND, op_type, bytes, result, value, memory_data, + offset1); + break; + } + case AtomicRMWBinOpOr: + { + GEN_AT_RMW_INSN(OR, op_type, bytes, result, value, memory_data, + offset1); + break; + } + case AtomicRMWBinOpXor: + { + GEN_AT_RMW_INSN(XOR, op_type, bytes, result, value, memory_data, + offset1); + break; + } + case AtomicRMWBinOpXchg: + { + GEN_AT_RMW_INSN(XCHG, op_type, bytes, result, value, memory_data, + offset1); + break; + } + default: + { + bh_assert(0); + goto fail; + } + } + + if (is_logical_op + && (!insn + || !jit_lock_reg_in_insn(cc, insn, is_i32 ? eax_hreg : rax_hreg) + || !jit_lock_reg_in_insn(cc, insn, is_i32 ? ebx_hreg : rbx_hreg))) { + jit_set_last_error( + cc, "generate atomic logical insn or lock ra&rb hreg failed"); + goto fail; + } + + if (op_type == VALUE_TYPE_I32) + PUSH_I32(result); + else + PUSH_I64(result); + + return true; +#endif /* defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) */ + +fail: return false; } @@ -785,6 +999,93 @@ bool jit_compile_op_atomic_cmpxchg(JitCompContext *cc, uint8 op_type, uint32 align, uint32 offset, uint32 bytes) { + JitReg addr, offset1, memory_data, value, expect, result; + bool is_i32 = op_type == VALUE_TYPE_I32; + /* currently we only implement atomic cmpxchg on x86-64 target */ +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) + /* cmpxchg will use register al/ax/eax/rax to store parameter expected + * value, and the read result will also be stored to al/ax/eax/rax */ + JitReg eax_hreg = jit_codegen_get_hreg_by_name("eax"); + JitReg rax_hreg = jit_codegen_get_hreg_by_name("rax"); + JitInsn *insn = NULL; + + bh_assert(op_type == VALUE_TYPE_I32 || op_type == VALUE_TYPE_I64); + if (is_i32) { + POP_I32(value); + POP_I32(expect); + result = jit_cc_new_reg_I32(cc); + } + else { + POP_I64(value); + POP_I64(expect); + result = jit_cc_new_reg_I64(cc); + } + POP_I32(addr); + + offset1 = check_and_seek(cc, addr, offset, bytes); + if (!offset1) { + goto fail; + } + CHECK_ALIGNMENT(offset1); + + memory_data = get_memory_data_reg(cc->jit_frame, 0); + + GEN_INSN(MOV, is_i32 ? eax_hreg : rax_hreg, expect); + switch (bytes) { + case 1: + { + insn = GEN_INSN(AT_CMPXCHGU8, value, is_i32 ? eax_hreg : rax_hreg, + memory_data, offset1); + break; + } + case 2: + { + insn = GEN_INSN(AT_CMPXCHGU16, value, is_i32 ? eax_hreg : rax_hreg, + memory_data, offset1); + break; + } + case 4: + { + if (op_type == VALUE_TYPE_I32) + insn = + GEN_INSN(AT_CMPXCHGI32, value, is_i32 ? eax_hreg : rax_hreg, + memory_data, offset1); + else + insn = + GEN_INSN(AT_CMPXCHGU32, value, is_i32 ? eax_hreg : rax_hreg, + memory_data, offset1); + break; + } + case 8: + { + insn = GEN_INSN(AT_CMPXCHGI64, value, is_i32 ? eax_hreg : rax_hreg, + memory_data, offset1); + break; + } + default: + { + bh_assert(0); + goto fail; + } + } + + if (!insn + || !jit_lock_reg_in_insn(cc, insn, is_i32 ? eax_hreg : rax_hreg)) { + jit_set_last_error(cc, "generate cmpxchg insn or lock ra hreg failed"); + goto fail; + } + + GEN_INSN(MOV, result, is_i32 ? eax_hreg : rax_hreg); + + if (is_i32) + PUSH_I32(result); + else + PUSH_I64(result); + + return true; +#endif /* defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) */ + +fail: return false; } @@ -812,8 +1113,10 @@ jit_compile_op_atomic_wait(JitCompContext *cc, uint8 op_type, uint32 align, JitReg offset1 = check_and_seek(cc, addr, offset, bytes); if (!offset1) goto fail; - JitReg maddr = jit_cc_new_reg_I64(cc); - CHECK_ALIGNMENT(maddr, memory_data, offset1); + CHECK_ALIGNMENT(offset1); + + JitReg maddr = jit_cc_new_reg_ptr(cc); + GEN_INSN(ADD, maddr, memory_data, offset1); // Prepare `wasm_runtime_atomic_wait` arguments JitReg res = jit_cc_new_reg_I32(cc); @@ -835,6 +1138,12 @@ jit_compile_op_atomic_wait(JitCompContext *cc, uint8 op_type, uint32 align, goto fail; PUSH_I32(res); + +#if WASM_ENABLE_THREAD_MGR != 0 + /* Insert suspend check point */ + if (!jit_check_suspend_flags(cc)) + goto fail; +#endif return true; fail: return false; @@ -854,8 +1163,10 @@ jit_compiler_op_atomic_notify(JitCompContext *cc, uint32 align, uint32 offset, JitReg offset1 = check_and_seek(cc, addr, offset, bytes); if (!offset1) goto fail; - JitReg maddr = jit_cc_new_reg_I64(cc); - CHECK_ALIGNMENT(maddr, memory_data, offset1); + CHECK_ALIGNMENT(offset1); + + JitReg maddr = jit_cc_new_reg_ptr(cc); + GEN_INSN(ADD, maddr, memory_data, offset1); // Prepare `wasm_runtime_atomic_notify` arguments JitReg res = jit_cc_new_reg_I32(cc); @@ -879,4 +1190,11 @@ jit_compiler_op_atomic_notify(JitCompContext *cc, uint32 align, uint32 offset, fail: return false; } + +bool +jit_compiler_op_atomic_fence(JitCompContext *cc) +{ + GEN_INSN(FENCE); + return true; +} #endif diff --git a/core/iwasm/fast-jit/fe/jit_emit_memory.h b/core/iwasm/fast-jit/fe/jit_emit_memory.h index bbf715f2..6565cdc1 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_memory.h +++ b/core/iwasm/fast-jit/fe/jit_emit_memory.h @@ -80,6 +80,9 @@ jit_compile_op_atomic_wait(JitCompContext *cc, uint8 op_type, uint32 align, bool jit_compiler_op_atomic_notify(JitCompContext *cc, uint32 align, uint32 offset, uint32 bytes); + +bool +jit_compiler_op_atomic_fence(JitCompContext *cc); #endif #ifdef __cplusplus diff --git a/core/iwasm/fast-jit/jit_dump.c b/core/iwasm/fast-jit/jit_dump.c index 4dba5c3b..d61ed5dc 100644 --- a/core/iwasm/fast-jit/jit_dump.c +++ b/core/iwasm/fast-jit/jit_dump.c @@ -114,7 +114,10 @@ jit_dump_insn(JitCompContext *cc, JitInsn *insn) switch (insn->opcode) { #define INSN(NAME, OPND_KIND, OPND_NUM, FIRST_USE) \ case JIT_OP_##NAME: \ - os_printf(" %-15s", #NAME); \ + if (insn->flags_u8 & 0x1) \ + os_printf(" ATOMIC %-8s", #NAME); \ + else \ + os_printf(" %-15s", #NAME); \ jit_dump_insn_##OPND_KIND(cc, insn, OPND_NUM); \ break; #include "jit_ir.def" @@ -319,7 +322,9 @@ jit_pass_dump(JitCompContext *cc) os_printf("JIT.COMPILER.DUMP: PASS_NO=%d PREV_PASS=%s\n\n", pass_no, pass_name); + jit_dump_cc(cc); + os_printf("\n"); return true; } diff --git a/core/iwasm/fast-jit/jit_frontend.c b/core/iwasm/fast-jit/jit_frontend.c index 5ca82964..ec68ad91 100644 --- a/core/iwasm/fast-jit/jit_frontend.c +++ b/core/iwasm/fast-jit/jit_frontend.c @@ -223,18 +223,37 @@ get_memory_data_reg(JitFrame *frame, uint32 mem_idx) { JitCompContext *cc = frame->cc; JitReg module_inst_reg = get_module_inst_reg(frame); - uint32 memory_data_offset = - (uint32)offsetof(WASMModuleInstance, global_table_data.bytes) - + (uint32)offsetof(WASMMemoryInstance, memory_data); + uint32 memory_data_offset; bh_assert(mem_idx == 0); - +#if WASM_ENABLE_SHARED_MEMORY != 0 + uint32 memories_offset = (uint32)offsetof(WASMModuleInstance, memories); + JitReg memories_addr = jit_cc_new_reg_ptr(cc); + JitReg memories_0_addr = jit_cc_new_reg_ptr(cc); + memory_data_offset = (uint32)offsetof(WASMMemoryInstance, memory_data); + if (!frame->memory_regs[mem_idx].memory_data) { + frame->memory_regs[mem_idx].memory_data = + cc->memory_regs[mem_idx].memory_data; + /* module_inst->memories */ + GEN_INSN(LDPTR, memories_addr, module_inst_reg, + NEW_CONST(I32, memories_offset)); + /* module_inst->memories[0] */ + GEN_INSN(LDPTR, memories_0_addr, memories_addr, NEW_CONST(I32, 0)); + /* memories[0]->memory_data */ + GEN_INSN(LDPTR, frame->memory_regs[mem_idx].memory_data, + memories_0_addr, NEW_CONST(I32, memory_data_offset)); + } +#else + memory_data_offset = + (uint32)offsetof(WASMModuleInstance, global_table_data.bytes) + + (uint32)offsetof(WASMMemoryInstance, memory_data); if (!frame->memory_regs[mem_idx].memory_data) { frame->memory_regs[mem_idx].memory_data = cc->memory_regs[mem_idx].memory_data; GEN_INSN(LDPTR, frame->memory_regs[mem_idx].memory_data, module_inst_reg, NEW_CONST(I32, memory_data_offset)); } +#endif return frame->memory_regs[mem_idx].memory_data; } @@ -1078,6 +1097,39 @@ read_leb(JitCompContext *cc, const uint8 *buf, const uint8 *buf_end, res = (int64)res64; \ } while (0) +#if WASM_ENABLE_SHARED_MEMORY != 0 +#define COMPILE_ATOMIC_RMW(OP, NAME) \ + case WASM_OP_ATOMIC_RMW_I32_##NAME: \ + bytes = 4; \ + op_type = VALUE_TYPE_I32; \ + goto OP_ATOMIC_##OP; \ + case WASM_OP_ATOMIC_RMW_I64_##NAME: \ + bytes = 8; \ + op_type = VALUE_TYPE_I64; \ + goto OP_ATOMIC_##OP; \ + case WASM_OP_ATOMIC_RMW_I32_##NAME##8_U: \ + bytes = 1; \ + op_type = VALUE_TYPE_I32; \ + goto OP_ATOMIC_##OP; \ + case WASM_OP_ATOMIC_RMW_I32_##NAME##16_U: \ + bytes = 2; \ + op_type = VALUE_TYPE_I32; \ + goto OP_ATOMIC_##OP; \ + case WASM_OP_ATOMIC_RMW_I64_##NAME##8_U: \ + bytes = 1; \ + op_type = VALUE_TYPE_I64; \ + goto OP_ATOMIC_##OP; \ + case WASM_OP_ATOMIC_RMW_I64_##NAME##16_U: \ + bytes = 2; \ + op_type = VALUE_TYPE_I64; \ + goto OP_ATOMIC_##OP; \ + case WASM_OP_ATOMIC_RMW_I64_##NAME##32_U: \ + bytes = 4; \ + op_type = VALUE_TYPE_I64; \ + OP_ATOMIC_##OP : bin_op = AtomicRMWBinOp##OP; \ + goto build_atomic_rmw; +#endif + static bool jit_compile_func(JitCompContext *cc) { @@ -2096,6 +2148,8 @@ jit_compile_func(JitCompContext *cc) case WASM_OP_ATOMIC_FENCE: /* Skip memory index */ frame_ip++; + if (!jit_compiler_op_atomic_fence(cc)) + return false; break; case WASM_OP_ATOMIC_I32_LOAD: bytes = 4; @@ -2192,15 +2246,12 @@ jit_compile_func(JitCompContext *cc) return false; break; - /* TODO */ - /* COMPILE_ATOMIC_RMW(Add, ADD); COMPILE_ATOMIC_RMW(Sub, SUB); COMPILE_ATOMIC_RMW(And, AND); COMPILE_ATOMIC_RMW(Or, OR); COMPILE_ATOMIC_RMW(Xor, XOR); COMPILE_ATOMIC_RMW(Xchg, XCHG); - */ build_atomic_rmw: if (!jit_compile_op_atomic_rmw(cc, bin_op, op_type, diff --git a/core/iwasm/fast-jit/jit_frontend.h b/core/iwasm/fast-jit/jit_frontend.h index fce8ecfd..7aa460fd 100644 --- a/core/iwasm/fast-jit/jit_frontend.h +++ b/core/iwasm/fast-jit/jit_frontend.h @@ -108,6 +108,17 @@ typedef enum FloatArithmetic { FLOAT_MAX, } FloatArithmetic; +#if WASM_ENABLE_SHARED_MEMORY != 0 +typedef enum AtomicRMWBinOp { + AtomicRMWBinOpAdd, + AtomicRMWBinOpSub, + AtomicRMWBinOpAnd, + AtomicRMWBinOpOr, + AtomicRMWBinOpXor, + AtomicRMWBinOpXchg +} AtomicRMWBinOp; +#endif + /** * Translate instructions in a function. The translated block must * end with a branch instruction whose targets are offsets relating to diff --git a/core/iwasm/fast-jit/jit_ir.c b/core/iwasm/fast-jit/jit_ir.c index db0d86ca..68503e3f 100644 --- a/core/iwasm/fast-jit/jit_ir.c +++ b/core/iwasm/fast-jit/jit_ir.c @@ -10,7 +10,11 @@ /** * Operand kinds of instructions. */ -enum { JIT_OPND_KIND_Reg, JIT_OPND_KIND_VReg, JIT_OPND_KIND_LookupSwitch }; +enum { + JIT_OPND_KIND_Reg, + JIT_OPND_KIND_VReg, + JIT_OPND_KIND_LookupSwitch, +}; /** * Operand kind of each instruction. @@ -45,6 +49,18 @@ static const uint8 insn_opnd_first_use[] = { jit_calloc(offsetof(JitInsn, _opnd._opnd_VReg._reg) \ + sizeof(JitReg) * (OPND_NUM)) +JitInsn * +_jit_insn_new_Reg_0(JitOpcode opc) +{ + JitInsn *insn = JIT_INSN_NEW_Reg(0); + + if (insn) { + insn->opcode = opc; + } + + return insn; +} + JitInsn * _jit_insn_new_Reg_1(JitOpcode opc, JitReg r0) { diff --git a/core/iwasm/fast-jit/jit_ir.def b/core/iwasm/fast-jit/jit_ir.def index 8a4396da..046bea1f 100644 --- a/core/iwasm/fast-jit/jit_ir.def +++ b/core/iwasm/fast-jit/jit_ir.def @@ -200,6 +200,50 @@ INSN(CALLBC, Reg, 4, 2) INSN(RETURNBC, Reg, 3, 0) INSN(RETURN, Reg, 1, 0) +#if WASM_ENABLE_SHARED_MEMORY != 0 +/* Atomic Memory Accesses */ +/* op1(replacement val) op2(expected val) op3(mem data) op4(offset) + * and in x86, the result is stored in register al/ax/eax/rax */ +INSN(AT_CMPXCHGU8, Reg, 4, 0) +INSN(AT_CMPXCHGU16, Reg, 4, 0) +INSN(AT_CMPXCHGI32, Reg, 4, 0) +INSN(AT_CMPXCHGU32, Reg, 4, 0) +INSN(AT_CMPXCHGI64, Reg, 4, 0) +/* rmw operations: + * op1(read value) op2(operand value) op3(mem data) op4(offset) */ +INSN(AT_ADDU8, Reg, 4, 1) +INSN(AT_ADDU16, Reg, 4, 1) +INSN(AT_ADDI32, Reg, 4, 1) +INSN(AT_ADDU32, Reg, 4, 1) +INSN(AT_ADDI64, Reg, 4, 1) +INSN(AT_SUBU8, Reg, 4, 1) +INSN(AT_SUBU16, Reg, 4, 1) +INSN(AT_SUBI32, Reg, 4, 1) +INSN(AT_SUBU32, Reg, 4, 1) +INSN(AT_SUBI64, Reg, 4, 1) +INSN(AT_ANDU8, Reg, 4, 1) +INSN(AT_ANDU16, Reg, 4, 1) +INSN(AT_ANDI32, Reg, 4, 1) +INSN(AT_ANDU32, Reg, 4, 1) +INSN(AT_ANDI64, Reg, 4, 1) +INSN(AT_ORU8, Reg, 4, 1) +INSN(AT_ORU16, Reg, 4, 1) +INSN(AT_ORI32, Reg, 4, 1) +INSN(AT_ORU32, Reg, 4, 1) +INSN(AT_ORI64, Reg, 4, 1) +INSN(AT_XORU8, Reg, 4, 1) +INSN(AT_XORU16, Reg, 4, 1) +INSN(AT_XORI32, Reg, 4, 1) +INSN(AT_XORU32, Reg, 4, 1) +INSN(AT_XORI64, Reg, 4, 1) +INSN(AT_XCHGU8, Reg, 4, 1) +INSN(AT_XCHGU16, Reg, 4, 1) +INSN(AT_XCHGI32, Reg, 4, 1) +INSN(AT_XCHGU32, Reg, 4, 1) +INSN(AT_XCHGI64, Reg, 4, 1) +INSN(FENCE, Reg, 0, 0) +#endif + #undef INSN /** diff --git a/core/iwasm/fast-jit/jit_ir.h b/core/iwasm/fast-jit/jit_ir.h index 632e8ed1..e13a41d1 100644 --- a/core/iwasm/fast-jit/jit_ir.h +++ b/core/iwasm/fast-jit/jit_ir.h @@ -313,7 +313,8 @@ typedef struct JitInsn { /* Opcode of the instruction. */ uint16 opcode; - /* Reserved field that may be used by optimizations locally. */ + /* Reserved field that may be used by optimizations locally. + * bit_0(Least Significant Bit) is atomic flag for load/store */ uint8 flags_u8; /* The unique ID of the instruction. */ @@ -346,6 +347,9 @@ typedef enum JitOpcode { * Helper functions for creating new instructions. Don't call them * directly. Use jit_insn_new_NAME, such as jit_insn_new_MOV instead. */ + +JitInsn * +_jit_insn_new_Reg_0(JitOpcode opc); JitInsn * _jit_insn_new_Reg_1(JitOpcode opc, JitReg r0); JitInsn * @@ -368,31 +372,35 @@ _jit_insn_new_LookupSwitch_1(JitOpcode opc, JitReg value, uint32 num); * Instruction creation functions jit_insn_new_NAME, where NAME is the * name of the instruction defined in jit_ir.def. */ +#define ARG_DECL_Reg_0 +#define ARG_LIST_Reg_0 #define ARG_DECL_Reg_1 JitReg r0 -#define ARG_LIST_Reg_1 r0 +#define ARG_LIST_Reg_1 , r0 #define ARG_DECL_Reg_2 JitReg r0, JitReg r1 -#define ARG_LIST_Reg_2 r0, r1 +#define ARG_LIST_Reg_2 , r0, r1 #define ARG_DECL_Reg_3 JitReg r0, JitReg r1, JitReg r2 -#define ARG_LIST_Reg_3 r0, r1, r2 +#define ARG_LIST_Reg_3 , r0, r1, r2 #define ARG_DECL_Reg_4 JitReg r0, JitReg r1, JitReg r2, JitReg r3 -#define ARG_LIST_Reg_4 r0, r1, r2, r3 +#define ARG_LIST_Reg_4 , r0, r1, r2, r3 #define ARG_DECL_Reg_5 JitReg r0, JitReg r1, JitReg r2, JitReg r3, JitReg r4 -#define ARG_LIST_Reg_5 r0, r1, r2, r3, r4 +#define ARG_LIST_Reg_5 , r0, r1, r2, r3, r4 #define ARG_DECL_VReg_1 JitReg r0, int n -#define ARG_LIST_VReg_1 r0, n +#define ARG_LIST_VReg_1 , r0, n #define ARG_DECL_VReg_2 JitReg r0, JitReg r1, int n -#define ARG_LIST_VReg_2 r0, r1, n +#define ARG_LIST_VReg_2 , r0, r1, n #define ARG_DECL_LookupSwitch_1 JitReg value, uint32 num -#define ARG_LIST_LookupSwitch_1 value, num -#define INSN(NAME, OPND_KIND, OPND_NUM, FIRST_USE) \ - static inline JitInsn *jit_insn_new_##NAME( \ - ARG_DECL_##OPND_KIND##_##OPND_NUM) \ - { \ - return _jit_insn_new_##OPND_KIND##_##OPND_NUM( \ - JIT_OP_##NAME, ARG_LIST_##OPND_KIND##_##OPND_NUM); \ +#define ARG_LIST_LookupSwitch_1 , value, num +#define INSN(NAME, OPND_KIND, OPND_NUM, FIRST_USE) \ + static inline JitInsn *jit_insn_new_##NAME( \ + ARG_DECL_##OPND_KIND##_##OPND_NUM) \ + { \ + return _jit_insn_new_##OPND_KIND##_##OPND_NUM( \ + JIT_OP_##NAME ARG_LIST_##OPND_KIND##_##OPND_NUM); \ } #include "jit_ir.def" #undef INSN +#undef ARG_DECL_Reg_0 +#undef ARG_LIST_Reg_0 #undef ARG_DECL_Reg_1 #undef ARG_LIST_Reg_1 #undef ARG_DECL_Reg_2 diff --git a/core/iwasm/fast-jit/jit_regalloc.c b/core/iwasm/fast-jit/jit_regalloc.c index 5bff465f..70ca228a 100644 --- a/core/iwasm/fast-jit/jit_regalloc.c +++ b/core/iwasm/fast-jit/jit_regalloc.c @@ -410,6 +410,13 @@ collect_distances(RegallocContext *rc, JitBasicBlock *basic_block) JIT_FOREACH_INSN(basic_block, insn) { +#if WASM_ENABLE_SHARED_MEMORY != 0 + /* fence insn doesn't have any operand, hence, no regs involved */ + if (insn->opcode == JIT_OP_FENCE) { + continue; + } +#endif + JitRegVec regvec = jit_insn_opnd_regs(insn); unsigned i; JitReg *regp; @@ -737,6 +744,13 @@ allocate_for_basic_block(RegallocContext *rc, JitBasicBlock *basic_block, JIT_FOREACH_INSN_REVERSE(basic_block, insn) { +#if WASM_ENABLE_SHARED_MEMORY != 0 + /* fence insn doesn't have any operand, hence, no regs involved */ + if (insn->opcode == JIT_OP_FENCE) { + continue; + } +#endif + JitRegVec regvec = jit_insn_opnd_regs(insn); unsigned first_use = jit_insn_opnd_first_use(insn); unsigned i; diff --git a/tests/wamr-test-suites/test_wamr.sh b/tests/wamr-test-suites/test_wamr.sh index cf0bc970..67868b9c 100755 --- a/tests/wamr-test-suites/test_wamr.sh +++ b/tests/wamr-test-suites/test_wamr.sh @@ -444,14 +444,6 @@ function spec_test() if [[ ${ENABLE_MULTI_THREAD} == 1 ]]; then ARGS_FOR_SPEC_TEST+="-p " - if [[ $1 == 'fast-jit' ]]; then - echo "fast-jit doesn't support multi-thread feature yet, skip it" - return - fi - if [[ $1 == 'multi-tier-jit' ]]; then - echo "multi-tier-jit doesn't support multi-thread feature yet, skip it" - return - fi fi if [[ ${ENABLE_XIP} == 1 ]]; then