Implement SIMD latest opcodes and update LLVM to 13.0 (#758)

Implement the latest SIMD opcodes and update LLVM 13.0,
update the llvm build scripts, update the sample workloads‘ build scripts,
and build customized wasi-sdk to build some workloads.
Also refine the CI rules.

Signed-off-by: Wenyong Huang <wenyong.huang@intel.com>
This commit is contained in:
Wenyong Huang
2021-09-17 19:12:57 +08:00
committed by GitHub
parent 7e60a5db8d
commit 7be0d385a6
82 changed files with 5266 additions and 4698 deletions

View File

@ -4053,44 +4053,35 @@ wasm_loader_find_block_addr(BlockAddr *block_addr_cache,
#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0)
case WASM_OP_SIMD_PREFIX:
{
/* TODO: shall we ceate a table to be friendly to branch prediction */
opcode = read_uint8(p);
if (SIMD_i8x16_eq <= opcode
&& opcode <= SIMD_f32x4_convert_i32x4_u) {
break;
}
/* follow the order of enum WASMSimdEXTOpcode in wasm_opcode.h */
switch (opcode) {
case SIMD_v128_load:
case SIMD_i16x8_load8x8_s:
case SIMD_i16x8_load8x8_u:
case SIMD_i32x4_load16x4_s:
case SIMD_i32x4_load16x4_u:
case SIMD_i64x2_load32x2_s:
case SIMD_i64x2_load32x2_u:
case SIMD_v8x16_load_splat:
case SIMD_v16x8_load_splat:
case SIMD_v32x4_load_splat:
case SIMD_v64x2_load_splat:
case SIMD_v128_load8x8_s:
case SIMD_v128_load8x8_u:
case SIMD_v128_load16x4_s:
case SIMD_v128_load16x4_u:
case SIMD_v128_load32x2_s:
case SIMD_v128_load32x2_u:
case SIMD_v128_load8_splat:
case SIMD_v128_load16_splat:
case SIMD_v128_load32_splat:
case SIMD_v128_load64_splat:
case SIMD_v128_store:
skip_leb_uint32(p, p_end); /* align */
skip_leb_uint32(p, p_end); /* offset */
/* memarg align */
skip_leb_uint32(p, p_end);
/* memarg offset*/
skip_leb_uint32(p, p_end);
break;
case SIMD_v128_const:
case SIMD_v8x16_shuffle:
/* immByte[16] immLaneId[16] */
CHECK_BUF1(p, p_end, 16);
p += 16;
break;
case SIMD_v8x16_swizzle:
case SIMD_i8x16_splat:
case SIMD_i16x8_splat:
case SIMD_i32x4_splat:
case SIMD_i64x2_splat:
case SIMD_f32x4_splat:
case SIMD_f64x2_splat:
break;
case SIMD_i8x16_extract_lane_s:
case SIMD_i8x16_extract_lane_u:
case SIMD_i8x16_replace_lane:
@ -4105,14 +4096,44 @@ wasm_loader_find_block_addr(BlockAddr *block_addr_cache,
case SIMD_f32x4_replace_lane:
case SIMD_f64x2_extract_lane:
case SIMD_f64x2_replace_lane:
/* ImmLaneId */
CHECK_BUF(p, p_end, 1);
p++;
break;
case SIMD_v128_load8_lane:
case SIMD_v128_load16_lane:
case SIMD_v128_load32_lane:
case SIMD_v128_load64_lane:
case SIMD_v128_store8_lane:
case SIMD_v128_store16_lane:
case SIMD_v128_store32_lane:
case SIMD_v128_store64_lane:
/* memarg align */
skip_leb_uint32(p, p_end);
/* memarg offset*/
skip_leb_uint32(p, p_end);
/* ImmLaneId */
CHECK_BUF(p, p_end, 1);
p++;
break;
case SIMD_v128_load32_zero:
case SIMD_v128_load64_zero:
/* memarg align */
skip_leb_uint32(p, p_end);
/* memarg offset*/
skip_leb_uint32(p, p_end);
break;
default:
LOG_WARNING("WASM loader find block addr failed: "
"invalid opcode fd 0x%02x.", opcode);
return false;
/*
* since latest SIMD specific used almost every value
* from 0x00 to 0xff, the default branch will present all
* opcodes without imm
* https://github.com/WebAssembly/simd/blob/main/proposals/simd/NewOpcodes.md
*/
break;
}
break;
}
@ -5685,9 +5706,25 @@ check_simd_memory_access_align(uint8 opcode, uint32 align,
4, /* store */
};
bh_assert(opcode <= SIMD_v128_store);
uint8 mem_access_aligns_load_lane[] = {
0, 1, 2, 3, /* load lane */
0, 1, 2, 3, /* store lane */
2, 3 /* store zero */
};
if (align > mem_access_aligns[opcode - SIMD_v128_load]) {
if (!((opcode <= SIMD_v128_store)
|| (SIMD_v128_load8_lane <= opcode
&& opcode <= SIMD_v128_load64_zero))) {
set_error_buf(error_buf, error_buf_size,
"the opcode doesn't include memarg");
return false;
}
if ((opcode <= SIMD_v128_store
&& align > mem_access_aligns[opcode - SIMD_v128_load])
|| (SIMD_v128_load8_lane <= opcode && opcode <= SIMD_v128_load64_zero
&& align > mem_access_aligns_load_lane[opcode
- SIMD_v128_load8_lane])) {
set_error_buf(error_buf, error_buf_size,
"alignment must not be larger than natural");
return false;
@ -5731,6 +5768,24 @@ check_simd_access_lane(uint8 opcode, uint8 lane,
goto fail;
}
break;
case SIMD_v128_load8_lane:
case SIMD_v128_load16_lane:
case SIMD_v128_load32_lane:
case SIMD_v128_load64_lane:
case SIMD_v128_store8_lane:
case SIMD_v128_store16_lane:
case SIMD_v128_store32_lane:
case SIMD_v128_store64_lane:
case SIMD_v128_load32_zero:
case SIMD_v128_load64_zero:
{
uint8 max_lanes[] = { 16, 8, 4, 2, 16, 8, 4, 2, 4, 2 };
if (lane >= max_lanes[opcode - SIMD_v128_load8_lane]) {
goto fail;
}
break;
}
default:
goto fail;
}
@ -8038,21 +8093,21 @@ fail_data_cnt_sec_require:
#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0)
case WASM_OP_SIMD_PREFIX:
{
uint8 lane;
opcode = read_uint8(p);
/* follow the order of enum WASMSimdEXTOpcode in wasm_opcode.h */
switch (opcode) {
/* memory instruction */
case SIMD_v128_load:
case SIMD_i16x8_load8x8_s:
case SIMD_i16x8_load8x8_u:
case SIMD_i32x4_load16x4_s:
case SIMD_i32x4_load16x4_u:
case SIMD_i64x2_load32x2_s:
case SIMD_i64x2_load32x2_u:
case SIMD_v8x16_load_splat:
case SIMD_v16x8_load_splat:
case SIMD_v32x4_load_splat:
case SIMD_v64x2_load_splat:
case SIMD_v128_load8x8_s:
case SIMD_v128_load8x8_u:
case SIMD_v128_load16x4_s:
case SIMD_v128_load16x4_u:
case SIMD_v128_load32x2_s:
case SIMD_v128_load32x2_u:
case SIMD_v128_load8_splat:
case SIMD_v128_load16_splat:
case SIMD_v128_load32_splat:
case SIMD_v128_load64_splat:
{
CHECK_MEMORY();
@ -8064,7 +8119,6 @@ fail_data_cnt_sec_require:
read_leb_uint32(p, p_end, mem_offset); /* offset */
/* pop(i32 %i), push(v128 *result) */
POP_AND_PUSH(VALUE_TYPE_I32, VALUE_TYPE_V128);
break;
}
@ -8081,18 +8135,19 @@ fail_data_cnt_sec_require:
read_leb_uint32(p, p_end, mem_offset); /* offset */
/* pop(v128 %value) */
POP_V128();
/* pop(i32 %i) */
POP_I32();
break;
}
/* basic operation */
case SIMD_v128_const:
{
CHECK_BUF1(p, p_end, 16);
p += 16;
PUSH_V128();
break;
}
case SIMD_v8x16_shuffle:
{
@ -8111,122 +8166,87 @@ fail_data_cnt_sec_require:
}
case SIMD_v8x16_swizzle:
{
POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
/* splat operation */
case SIMD_i8x16_splat:
case SIMD_i16x8_splat:
case SIMD_i32x4_splat:
POP_AND_PUSH(VALUE_TYPE_I32, VALUE_TYPE_V128);
break;
case SIMD_i64x2_splat:
POP_AND_PUSH(VALUE_TYPE_I64, VALUE_TYPE_V128);
break;
case SIMD_f32x4_splat:
POP_AND_PUSH(VALUE_TYPE_F32, VALUE_TYPE_V128);
break;
case SIMD_f64x2_splat:
POP_AND_PUSH(VALUE_TYPE_F64, VALUE_TYPE_V128);
{
uint8 pop_type[] = { VALUE_TYPE_I32, VALUE_TYPE_I32,
VALUE_TYPE_I32, VALUE_TYPE_I64,
VALUE_TYPE_F32, VALUE_TYPE_F64 };
POP_AND_PUSH(pop_type[opcode - SIMD_i8x16_splat],
VALUE_TYPE_V128);
break;
}
/* lane operation */
case SIMD_i8x16_extract_lane_s:
case SIMD_i8x16_extract_lane_u:
case SIMD_i8x16_replace_lane:
case SIMD_i16x8_extract_lane_s:
case SIMD_i16x8_extract_lane_u:
case SIMD_i32x4_extract_lane:
CHECK_BUF(p, p_end, 1);
lane = read_uint8(p);
if (!check_simd_access_lane(opcode, lane, error_buf,
error_buf_size)) {
goto fail;
}
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32);
break;
case SIMD_i64x2_extract_lane:
CHECK_BUF(p, p_end, 1);
lane = read_uint8(p);
if (!check_simd_access_lane(opcode, lane, error_buf,
error_buf_size)) {
goto fail;
}
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I64);
break;
case SIMD_f32x4_extract_lane:
CHECK_BUF(p, p_end, 1);
lane = read_uint8(p);
if (!check_simd_access_lane(opcode, lane, error_buf,
error_buf_size)) {
goto fail;
}
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_F32);
break;
case SIMD_f64x2_extract_lane:
CHECK_BUF(p, p_end, 1);
lane = read_uint8(p);
if (!check_simd_access_lane(opcode, lane, error_buf,
error_buf_size)) {
goto fail;
}
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_F64);
break;
case SIMD_i8x16_replace_lane:
case SIMD_i16x8_replace_lane:
case SIMD_i32x4_extract_lane:
case SIMD_i32x4_replace_lane:
CHECK_BUF(p, p_end, 1);
lane = read_uint8(p);
if (!check_simd_access_lane(opcode, lane, error_buf,
error_buf_size)) {
goto fail;
}
POP_I32();
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
case SIMD_i64x2_extract_lane:
case SIMD_i64x2_replace_lane:
CHECK_BUF(p, p_end, 1);
lane = read_uint8(p);
if (!check_simd_access_lane(opcode, lane, error_buf,
error_buf_size)) {
goto fail;
}
POP_I64();
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
case SIMD_f32x4_extract_lane:
case SIMD_f32x4_replace_lane:
CHECK_BUF(p, p_end, 1);
lane = read_uint8(p);
if (!check_simd_access_lane(opcode, lane, error_buf,
error_buf_size)) {
goto fail;
}
POP_F32();
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
case SIMD_f64x2_extract_lane:
case SIMD_f64x2_replace_lane:
{
uint8 lane;
/* clang-format off */
uint8 replace[] = {
/*i8x16*/ 0x0, 0x0, VALUE_TYPE_I32,
/*i16x8*/ 0x0, 0x0, VALUE_TYPE_I32,
/*i32x4*/ 0x0, VALUE_TYPE_I32,
/*i64x2*/ 0x0, VALUE_TYPE_I64,
/*f32x4*/ 0x0, VALUE_TYPE_F32,
/*f64x2*/ 0x0, VALUE_TYPE_F64,
};
uint8 push_type[] = {
/*i8x16*/ VALUE_TYPE_I32, VALUE_TYPE_I32,
VALUE_TYPE_V128,
/*i16x8*/ VALUE_TYPE_I32, VALUE_TYPE_I32,
VALUE_TYPE_V128,
/*i32x4*/ VALUE_TYPE_I32, VALUE_TYPE_V128,
/*i64x2*/ VALUE_TYPE_I64, VALUE_TYPE_V128,
/*f32x4*/ VALUE_TYPE_F32, VALUE_TYPE_V128,
/*f64x2*/ VALUE_TYPE_F64, VALUE_TYPE_V128,
};
/* clang-format on */
CHECK_BUF(p, p_end, 1);
lane = read_uint8(p);
if (!check_simd_access_lane(opcode, lane, error_buf,
error_buf_size)) {
goto fail;
}
POP_F64();
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
if (replace[opcode - SIMD_i8x16_extract_lane_s]) {
if (!(wasm_loader_pop_frame_ref(
loader_ctx,
replace[opcode - SIMD_i8x16_extract_lane_s],
error_buf, error_buf_size)))
goto fail;
}
POP_AND_PUSH(
VALUE_TYPE_V128,
push_type[opcode - SIMD_i8x16_extract_lane_s]);
break;
}
/* i8x16 compare operation */
case SIMD_i8x16_eq:
case SIMD_i8x16_ne:
case SIMD_i8x16_lt_s:
@ -8237,6 +8257,7 @@ fail_data_cnt_sec_require:
case SIMD_i8x16_le_u:
case SIMD_i8x16_ge_s:
case SIMD_i8x16_ge_u:
/* i16x8 compare operation */
case SIMD_i16x8_eq:
case SIMD_i16x8_ne:
case SIMD_i16x8_lt_s:
@ -8247,6 +8268,7 @@ fail_data_cnt_sec_require:
case SIMD_i16x8_le_u:
case SIMD_i16x8_ge_s:
case SIMD_i16x8_ge_u:
/* i32x4 compare operation */
case SIMD_i32x4_eq:
case SIMD_i32x4_ne:
case SIMD_i32x4_lt_s:
@ -8257,122 +8279,318 @@ fail_data_cnt_sec_require:
case SIMD_i32x4_le_u:
case SIMD_i32x4_ge_s:
case SIMD_i32x4_ge_u:
/* f32x4 compare operation */
case SIMD_f32x4_eq:
case SIMD_f32x4_ne:
case SIMD_f32x4_lt:
case SIMD_f32x4_gt:
case SIMD_f32x4_le:
case SIMD_f32x4_ge:
/* f64x2 compare operation */
case SIMD_f64x2_eq:
case SIMD_f64x2_ne:
case SIMD_f64x2_lt:
case SIMD_f64x2_gt:
case SIMD_f64x2_le:
case SIMD_f64x2_ge:
{
POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
/* v128 operation */
case SIMD_v128_not:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_v128_and:
case SIMD_v128_andnot:
case SIMD_v128_or:
case SIMD_v128_xor:
{
POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_v128_bitselect:
{
POP_V128();
POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_v128_any_true:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32);
break;
}
/* Load Lane Operation */
case SIMD_v128_load8_lane:
case SIMD_v128_load16_lane:
case SIMD_v128_load32_lane:
case SIMD_v128_load64_lane:
case SIMD_v128_store8_lane:
case SIMD_v128_store16_lane:
case SIMD_v128_store32_lane:
case SIMD_v128_store64_lane:
{
uint8 lane;
CHECK_MEMORY();
read_leb_uint32(p, p_end, align); /* align */
if (!check_simd_memory_access_align(
opcode, align, error_buf, error_buf_size)) {
goto fail;
}
read_leb_uint32(p, p_end, mem_offset); /* offset */
CHECK_BUF(p, p_end, 1);
lane = read_uint8(p);
if (!check_simd_access_lane(opcode, lane, error_buf,
error_buf_size)) {
goto fail;
}
POP_V128();
POP_I32();
if (opcode < SIMD_v128_store8_lane) {
PUSH_V128();
}
break;
}
case SIMD_v128_load32_zero:
case SIMD_v128_load64_zero:
{
CHECK_MEMORY();
read_leb_uint32(p, p_end, align); /* align */
if (!check_simd_memory_access_align(
opcode, align, error_buf, error_buf_size)) {
goto fail;
}
read_leb_uint32(p, p_end, mem_offset); /* offset */
POP_AND_PUSH(VALUE_TYPE_I32, VALUE_TYPE_V128);
break;
}
/* Float conversion */
case SIMD_f32x4_demote_f64x2_zero:
case SIMD_f64x2_promote_low_f32x4_zero:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
/* i8x16 Operation */
case SIMD_i8x16_abs:
case SIMD_i8x16_neg:
case SIMD_i8x16_popcnt:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_i8x16_all_true:
case SIMD_i8x16_bitmask:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32);
break;
}
case SIMD_i8x16_narrow_i16x8_s:
case SIMD_i8x16_narrow_i16x8_u:
{
POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_f32x4_ceil:
case SIMD_f32x4_floor:
case SIMD_f32x4_trunc:
case SIMD_f32x4_nearest:
case SIMD_f64x2_ceil:
case SIMD_f64x2_floor:
case SIMD_f64x2_trunc:
case SIMD_f64x2_nearest:
case SIMD_v128_not:
case SIMD_i8x16_abs:
case SIMD_i8x16_neg:
case SIMD_i16x8_abs:
case SIMD_i16x8_neg:
case SIMD_i32x4_abs:
case SIMD_i32x4_neg:
case SIMD_i64x2_neg:
case SIMD_f32x4_abs:
case SIMD_f32x4_neg:
case SIMD_f32x4_sqrt:
case SIMD_f64x2_abs:
case SIMD_f64x2_neg:
case SIMD_f64x2_sqrt:
case SIMD_i16x8_widen_low_i8x16_s:
case SIMD_i16x8_widen_high_i8x16_s:
case SIMD_i16x8_widen_low_i8x16_u:
case SIMD_i16x8_widen_high_i8x16_u:
case SIMD_i32x4_widen_low_i16x8_s:
case SIMD_i32x4_widen_high_i16x8_s:
case SIMD_i32x4_widen_low_i16x8_u:
case SIMD_i32x4_widen_high_i16x8_u:
case SIMD_i32x4_trunc_sat_f32x4_s:
case SIMD_i32x4_trunc_sat_f32x4_u:
case SIMD_f32x4_convert_i32x4_s:
case SIMD_f32x4_convert_i32x4_u:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
case SIMD_v128_bitselect:
POP_V128();
POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
case SIMD_i8x16_any_true:
case SIMD_i8x16_all_true:
case SIMD_i8x16_bitmask:
case SIMD_i16x8_any_true:
case SIMD_i16x8_all_true:
case SIMD_i16x8_bitmask:
case SIMD_i32x4_any_true:
case SIMD_i32x4_all_true:
case SIMD_i32x4_bitmask:
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32);
break;
}
case SIMD_i8x16_shl:
case SIMD_i8x16_shr_s:
case SIMD_i8x16_shr_u:
case SIMD_i16x8_shl:
case SIMD_i16x8_shr_s:
case SIMD_i16x8_shr_u:
case SIMD_i32x4_shl:
case SIMD_i32x4_shr_s:
case SIMD_i32x4_shr_u:
case SIMD_i64x2_shl:
case SIMD_i64x2_shr_s:
case SIMD_i64x2_shr_u:
{
POP_I32();
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_i8x16_narrow_i16x8_s:
case SIMD_i8x16_narrow_i16x8_u:
case SIMD_i16x8_narrow_i32x4_s:
case SIMD_i16x8_narrow_i32x4_u:
case SIMD_v128_and:
case SIMD_v128_andnot:
case SIMD_v128_or:
case SIMD_v128_xor:
case SIMD_i8x16_add:
case SIMD_i8x16_add_saturate_s:
case SIMD_i8x16_add_saturate_u:
case SIMD_i8x16_add_sat_s:
case SIMD_i8x16_add_sat_u:
case SIMD_i8x16_sub:
case SIMD_i8x16_sub_saturate_s:
case SIMD_i8x16_sub_saturate_u:
case SIMD_i8x16_sub_sat_s:
case SIMD_i8x16_sub_sat_u:
{
POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_f64x2_ceil:
case SIMD_f64x2_floor:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_i8x16_min_s:
case SIMD_i8x16_min_u:
case SIMD_i8x16_max_s:
case SIMD_i8x16_max_u:
{
POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_f64x2_trunc:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_i8x16_avgr_u:
{
POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_i16x8_extadd_pairwise_i8x16_s:
case SIMD_i16x8_extadd_pairwise_i8x16_u:
case SIMD_i32x4_extadd_pairwise_i16x8_s:
case SIMD_i32x4_extadd_pairwise_i16x8_u:
/* i16x8 operation */
case SIMD_i16x8_abs:
case SIMD_i16x8_neg:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_i16x8_q15mulr_sat_s:
{
POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_i16x8_all_true:
case SIMD_i16x8_bitmask:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32);
break;
}
case SIMD_i16x8_narrow_i32x4_s:
case SIMD_i16x8_narrow_i32x4_u:
{
POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_i16x8_extend_low_i8x16_s:
case SIMD_i16x8_extend_high_i8x16_s:
case SIMD_i16x8_extend_low_i8x16_u:
case SIMD_i16x8_extend_high_i8x16_u:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_i16x8_shl:
case SIMD_i16x8_shr_s:
case SIMD_i16x8_shr_u:
{
POP_I32();
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_i16x8_add:
case SIMD_i16x8_add_saturate_s:
case SIMD_i16x8_add_saturate_u:
case SIMD_i16x8_add_sat_s:
case SIMD_i16x8_add_sat_u:
case SIMD_i16x8_sub:
case SIMD_i16x8_sub_saturate_s:
case SIMD_i16x8_sub_saturate_u:
case SIMD_i16x8_sub_sat_s:
case SIMD_i16x8_sub_sat_u:
{
POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_f64x2_nearest:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_i16x8_mul:
case SIMD_i16x8_min_s:
case SIMD_i16x8_min_u:
case SIMD_i16x8_max_s:
case SIMD_i16x8_max_u:
case SIMD_i16x8_avgr_u:
case SIMD_i16x8_extmul_low_i8x16_s:
case SIMD_i16x8_extmul_high_i8x16_s:
case SIMD_i16x8_extmul_low_i8x16_u:
case SIMD_i16x8_extmul_high_i8x16_u:
{
POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
/* i32x4 operation */
case SIMD_i32x4_abs:
case SIMD_i32x4_neg:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_i32x4_all_true:
case SIMD_i32x4_bitmask:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32);
break;
}
case SIMD_i32x4_narrow_i64x2_s:
case SIMD_i32x4_narrow_i64x2_u:
{
POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_i32x4_extend_low_i16x8_s:
case SIMD_i32x4_extend_high_i16x8_s:
case SIMD_i32x4_extend_low_i16x8_u:
case SIMD_i32x4_extend_high_i16x8_u:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_i32x4_shl:
case SIMD_i32x4_shr_s:
case SIMD_i32x4_shr_u:
{
POP_I32();
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_i32x4_add:
case SIMD_i32x4_sub:
case SIMD_i32x4_mul:
@ -8380,31 +8598,137 @@ fail_data_cnt_sec_require:
case SIMD_i32x4_min_u:
case SIMD_i32x4_max_s:
case SIMD_i32x4_max_u:
case SIMD_i32x4_dot_i16x8_s:
case SIMD_i32x4_avgr_u:
case SIMD_i32x4_extmul_low_i16x8_s:
case SIMD_i32x4_extmul_high_i16x8_s:
case SIMD_i32x4_extmul_low_i16x8_u:
case SIMD_i32x4_extmul_high_i16x8_u:
{
POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
/* i64x2 operation */
case SIMD_i64x2_abs:
case SIMD_i64x2_neg:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_i64x2_all_true:
case SIMD_i64x2_bitmask:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32);
break;
}
case SIMD_i64x2_extend_low_i32x4_s:
case SIMD_i64x2_extend_high_i32x4_s:
case SIMD_i64x2_extend_low_i32x4_u:
case SIMD_i64x2_extend_high_i32x4_u:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_i64x2_shl:
case SIMD_i64x2_shr_s:
case SIMD_i64x2_shr_u:
{
POP_I32();
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_i64x2_add:
case SIMD_i64x2_sub:
case SIMD_i64x2_mul:
case SIMD_i64x2_eq:
case SIMD_i64x2_ne:
case SIMD_i64x2_lt_s:
case SIMD_i64x2_gt_s:
case SIMD_i64x2_le_s:
case SIMD_i64x2_ge_s:
case SIMD_i64x2_extmul_low_i32x4_s:
case SIMD_i64x2_extmul_high_i32x4_s:
case SIMD_i64x2_extmul_low_i32x4_u:
case SIMD_i64x2_extmul_high_i32x4_u:
{
POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
/* f32x4 operation */
case SIMD_f32x4_abs:
case SIMD_f32x4_neg:
case SIMD_f32x4_round:
case SIMD_f32x4_sqrt:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_f32x4_add:
case SIMD_f32x4_sub:
case SIMD_f32x4_mul:
case SIMD_f32x4_div:
case SIMD_f32x4_min:
case SIMD_f32x4_max:
case SIMD_f32x4_pmin:
case SIMD_f32x4_pmax:
{
POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
/* f64x2 operation */
case SIMD_f64x2_abs:
case SIMD_f64x2_neg:
case SIMD_f64x2_round:
case SIMD_f64x2_sqrt:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_f64x2_add:
case SIMD_f64x2_sub:
case SIMD_f64x2_mul:
case SIMD_f64x2_div:
case SIMD_f64x2_min:
case SIMD_f64x2_max:
case SIMD_f64x2_pmin:
case SIMD_f64x2_pmax:
{
POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
case SIMD_i32x4_trunc_sat_f32x4_s:
case SIMD_i32x4_trunc_sat_f32x4_u:
case SIMD_f32x4_convert_i32x4_s:
case SIMD_f32x4_convert_i32x4_u:
case SIMD_i32x4_trunc_sat_f64x2_s_zero:
case SIMD_i32x4_trunc_sat_f64x2_u_zero:
case SIMD_f64x2_convert_low_i32x4_s:
case SIMD_f64x2_convert_low_i32x4_u:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
default:
{
if (error_buf != NULL) {
snprintf(error_buf, error_buf_size,
"WASM module load failed: "
"invalid opcode 0xfd %02x.", opcode);
"WASM module load failed: "
"invalid opcode 0xfd %02x.",
opcode);
}
goto fail;
}
}
break;
}

View File

@ -296,18 +296,18 @@ typedef enum WASMMiscEXTOpcode {
typedef enum WASMSimdEXTOpcode {
/* memory instruction */
SIMD_v128_load = 0x00,
SIMD_i16x8_load8x8_s = 0x01,
SIMD_i16x8_load8x8_u = 0x02,
SIMD_i32x4_load16x4_s = 0x03,
SIMD_i32x4_load16x4_u = 0x04,
SIMD_i64x2_load32x2_s = 0x05,
SIMD_i64x2_load32x2_u = 0x06,
SIMD_v8x16_load_splat = 0x07,
SIMD_v16x8_load_splat = 0x08,
SIMD_v32x4_load_splat = 0x09,
SIMD_v64x2_load_splat = 0x0a,
SIMD_v128_store = 0x0b,
SIMD_v128_load = 0x00,
SIMD_v128_load8x8_s = 0x01,
SIMD_v128_load8x8_u = 0x02,
SIMD_v128_load16x4_s = 0x03,
SIMD_v128_load16x4_u = 0x04,
SIMD_v128_load32x2_s = 0x05,
SIMD_v128_load32x2_u = 0x06,
SIMD_v128_load8_splat = 0x07,
SIMD_v128_load16_splat = 0x08,
SIMD_v128_load32_splat = 0x09,
SIMD_v128_load64_splat = 0x0a,
SIMD_v128_store = 0x0b,
/* basic operation */
SIMD_v128_const = 0x0c,
@ -391,107 +391,170 @@ typedef enum WASMSimdEXTOpcode {
SIMD_f64x2_ge = 0x4c,
/* v128 operation */
SIMD_v128_not = 0x4d,
SIMD_v128_and = 0x4e,
SIMD_v128_andnot = 0x4f,
SIMD_v128_or = 0x50,
SIMD_v128_xor = 0x51,
SIMD_v128_not = 0x4d,
SIMD_v128_and = 0x4e,
SIMD_v128_andnot = 0x4f,
SIMD_v128_or = 0x50,
SIMD_v128_xor = 0x51,
SIMD_v128_bitselect = 0x52,
SIMD_v128_any_true = 0x53,
/* Load Lane Operation */
SIMD_v128_load8_lane = 0x54,
SIMD_v128_load16_lane = 0x55,
SIMD_v128_load32_lane = 0x56,
SIMD_v128_load64_lane = 0x57,
SIMD_v128_store8_lane = 0x58,
SIMD_v128_store16_lane = 0x59,
SIMD_v128_store32_lane = 0x5a,
SIMD_v128_store64_lane = 0x5b,
SIMD_v128_load32_zero = 0x5c,
SIMD_v128_load64_zero = 0x5d,
/* Float conversion */
SIMD_f32x4_demote_f64x2_zero = 0x5e,
SIMD_f64x2_promote_low_f32x4_zero = 0x5f,
/* i8x16 Operation */
SIMD_i8x16_abs = 0x60,
SIMD_i8x16_neg = 0x61,
SIMD_i8x16_any_true = 0x62,
SIMD_i8x16_popcnt = 0x62,
SIMD_i8x16_all_true = 0x63,
SIMD_i8x16_bitmask = 0x64,
SIMD_i8x16_narrow_i16x8_s = 0x65,
SIMD_i8x16_narrow_i16x8_u = 0x66,
SIMD_f32x4_ceil = 0x67,
SIMD_f32x4_floor = 0x68,
SIMD_f32x4_trunc = 0x69,
SIMD_f32x4_nearest = 0x6a,
SIMD_i8x16_shl = 0x6b,
SIMD_i8x16_shr_s = 0x6c,
SIMD_i8x16_shr_u = 0x6d,
SIMD_i8x16_add = 0x6e,
SIMD_i8x16_add_saturate_s = 0x6f,
SIMD_i8x16_add_saturate_u = 0x70,
SIMD_i8x16_add_sat_s = 0x6f,
SIMD_i8x16_add_sat_u = 0x70,
SIMD_i8x16_sub = 0x71,
SIMD_i8x16_sub_saturate_s = 0x72,
SIMD_i8x16_sub_saturate_u = 0x73,
SIMD_i8x16_sub_sat_s = 0x72,
SIMD_i8x16_sub_sat_u = 0x73,
SIMD_f64x2_ceil = 0x74,
SIMD_f64x2_floor = 0x75,
SIMD_i8x16_min_s = 0x76,
SIMD_i8x16_min_u = 0x77,
SIMD_i8x16_max_s = 0x78,
SIMD_i8x16_max_u = 0x79,
SIMD_f64x2_trunc = 0x7a,
SIMD_i8x16_avgr_u = 0x7b,
SIMD_i16x8_extadd_pairwise_i8x16_s = 0x7c,
SIMD_i16x8_extadd_pairwise_i8x16_u = 0x7d,
SIMD_i32x4_extadd_pairwise_i16x8_s = 0x7e,
SIMD_i32x4_extadd_pairwise_i16x8_u = 0x7f,
/* i16x8 operation */
SIMD_i16x8_abs = 0x80,
SIMD_i16x8_neg = 0x81,
SIMD_i16x8_any_true = 0x82,
SIMD_i16x8_q15mulr_sat_s = 0x82,
SIMD_i16x8_all_true = 0x83,
SIMD_i16x8_bitmask = 0x84,
SIMD_i16x8_narrow_i32x4_s = 0x85,
SIMD_i16x8_narrow_i32x4_u = 0x86,
SIMD_i16x8_widen_low_i8x16_s = 0x87,
SIMD_i16x8_widen_high_i8x16_s = 0x88,
SIMD_i16x8_widen_low_i8x16_u = 0x89,
SIMD_i16x8_widen_high_i8x16_u = 0x8a,
SIMD_i16x8_extend_low_i8x16_s = 0x87,
SIMD_i16x8_extend_high_i8x16_s = 0x88,
SIMD_i16x8_extend_low_i8x16_u = 0x89,
SIMD_i16x8_extend_high_i8x16_u = 0x8a,
SIMD_i16x8_shl = 0x8b,
SIMD_i16x8_shr_s = 0x8c,
SIMD_i16x8_shr_u = 0x8d,
SIMD_i16x8_add = 0x8e,
SIMD_i16x8_add_saturate_s = 0x8f,
SIMD_i16x8_add_saturate_u = 0x90,
SIMD_i16x8_add_sat_s = 0x8f,
SIMD_i16x8_add_sat_u = 0x90,
SIMD_i16x8_sub = 0x91,
SIMD_i16x8_sub_saturate_s = 0x92,
SIMD_i16x8_sub_saturate_u = 0x93,
SIMD_i16x8_sub_sat_s = 0x92,
SIMD_i16x8_sub_sat_u = 0x93,
SIMD_f64x2_nearest = 0x94,
SIMD_i16x8_mul = 0x95,
SIMD_i16x8_min_s = 0x96,
SIMD_i16x8_min_u = 0x97,
SIMD_i16x8_max_s = 0x98,
SIMD_i16x8_max_u = 0x99,
/* placeholder = 0x9a */
SIMD_i16x8_avgr_u = 0x9b,
SIMD_i16x8_extmul_low_i8x16_s = 0x9c,
SIMD_i16x8_extmul_high_i8x16_s = 0x9d,
SIMD_i16x8_extmul_low_i8x16_u = 0x9e,
SIMD_i16x8_extmul_high_i8x16_u = 0x9f,
/* i32x4 operation */
SIMD_i32x4_abs = 0xa0,
SIMD_i32x4_neg = 0xa1,
SIMD_i32x4_any_true = 0xa2,
/* placeholder = 0xa2 */
SIMD_i32x4_all_true = 0xa3,
SIMD_i32x4_bitmask = 0xa4,
SIMD_i32x4_widen_low_i16x8_s = 0xa7,
SIMD_i32x4_widen_high_i16x8_s = 0xa8,
SIMD_i32x4_widen_low_i16x8_u = 0xa9,
SIMD_i32x4_widen_high_i16x8_u = 0xaa,
SIMD_i32x4_narrow_i64x2_s = 0xa5,
SIMD_i32x4_narrow_i64x2_u = 0xa6,
SIMD_i32x4_extend_low_i16x8_s = 0xa7,
SIMD_i32x4_extend_high_i16x8_s = 0xa8,
SIMD_i32x4_extend_low_i16x8_u = 0xa9,
SIMD_i32x4_extend_high_i16x8_u = 0xaa,
SIMD_i32x4_shl = 0xab,
SIMD_i32x4_shr_s = 0xac,
SIMD_i32x4_shr_u = 0xad,
SIMD_i32x4_add = 0xae,
SIMD_i32x4_add_sat_s = 0xaf,
SIMD_i32x4_add_sat_u = 0xb0,
SIMD_i32x4_sub = 0xb1,
SIMD_i32x4_sub_sat_s = 0xb2,
SIMD_i32x4_sub_sat_u = 0xb3,
/* placeholder = 0xb4 */
SIMD_i32x4_mul = 0xb5,
SIMD_i32x4_min_s = 0xb6,
SIMD_i32x4_min_u = 0xb7,
SIMD_i32x4_max_s = 0xb8,
SIMD_i32x4_max_u = 0xb9,
SIMD_i32x4_dot_i16x8_s = 0xba,
SIMD_i32x4_avgr_u = 0xbb,
SIMD_i32x4_extmul_low_i16x8_s = 0xbc,
SIMD_i32x4_extmul_high_i16x8_s = 0xbd,
SIMD_i32x4_extmul_low_i16x8_u = 0xbe,
SIMD_i32x4_extmul_high_i16x8_u = 0xbf,
/* i64x2 operation */
SIMD_i64x2_neg = 0xc1,
SIMD_i64x2_shl = 0xcb,
SIMD_i64x2_shr_s = 0xcc,
SIMD_i64x2_shr_u = 0xcd,
SIMD_i64x2_add = 0xce,
SIMD_i64x2_sub = 0xd1,
SIMD_i64x2_mul = 0xd5,
/* float ceil/floor/trunc/nearest */
SIMD_f32x4_ceil = 0xd8,
SIMD_f32x4_floor = 0xd9,
SIMD_f32x4_trunc = 0xda,
SIMD_f32x4_nearest = 0xdb,
SIMD_f64x2_ceil = 0xdc,
SIMD_f64x2_floor = 0xdd,
SIMD_f64x2_trunc = 0xde,
SIMD_f64x2_nearest = 0xdf,
SIMD_i64x2_abs = 0xc0,
SIMD_i64x2_neg = 0xc1,
/* placeholder = 0xc2 */
SIMD_i64x2_all_true = 0xc3,
SIMD_i64x2_bitmask = 0xc4,
/* placeholder = 0xc5 */
/* placeholder = 0xc6 */
SIMD_i64x2_extend_low_i32x4_s = 0xc7,
SIMD_i64x2_extend_high_i32x4_s = 0xc8,
SIMD_i64x2_extend_low_i32x4_u = 0xc9,
SIMD_i64x2_extend_high_i32x4_u = 0xca,
SIMD_i64x2_shl = 0xcb,
SIMD_i64x2_shr_s = 0xcc,
SIMD_i64x2_shr_u = 0xcd,
SIMD_i64x2_add = 0xce,
/* placeholder = 0xcf */
/* placeholder = 0xd0 */
SIMD_i64x2_sub = 0xd1,
/* placeholder = 0xd2 */
/* placeholder = 0xd3 */
/* placeholder = 0xd4 */
SIMD_i64x2_mul = 0xd5,
SIMD_i64x2_eq = 0xd6,
SIMD_i64x2_ne = 0xd7,
SIMD_i64x2_lt_s = 0xd8,
SIMD_i64x2_gt_s = 0xd9,
SIMD_i64x2_le_s = 0xda,
SIMD_i64x2_ge_s = 0xdb,
SIMD_i64x2_extmul_low_i32x4_s = 0xdc,
SIMD_i64x2_extmul_high_i32x4_s = 0xdd,
SIMD_i64x2_extmul_low_i32x4_u = 0xde,
SIMD_i64x2_extmul_high_i32x4_u = 0xdf,
/* f32x4 operation */
SIMD_f32x4_abs = 0xe0,
SIMD_f32x4_neg = 0xe1,
SIMD_f32x4_round = 0xe2,
SIMD_f32x4_sqrt = 0xe3,
SIMD_f32x4_add = 0xe4,
SIMD_f32x4_sub = 0xe5,
@ -499,10 +562,13 @@ typedef enum WASMSimdEXTOpcode {
SIMD_f32x4_div = 0xe7,
SIMD_f32x4_min = 0xe8,
SIMD_f32x4_max = 0xe9,
SIMD_f32x4_pmin = 0xea,
SIMD_f32x4_pmax = 0xeb,
/* f64x2 operation */
SIMD_f64x2_abs = 0xec,
SIMD_f64x2_neg = 0xed,
SIMD_f64x2_round = 0xee,
SIMD_f64x2_sqrt = 0xef,
SIMD_f64x2_add = 0xf0,
SIMD_f64x2_sub = 0xf1,
@ -510,12 +576,18 @@ typedef enum WASMSimdEXTOpcode {
SIMD_f64x2_div = 0xf3,
SIMD_f64x2_min = 0xf4,
SIMD_f64x2_max = 0xf5,
SIMD_f64x2_pmin = 0xf6,
SIMD_f64x2_pmax = 0xf7,
/* conversion operation */
SIMD_i32x4_trunc_sat_f32x4_s = 0xf8,
SIMD_i32x4_trunc_sat_f32x4_u = 0xf9,
SIMD_f32x4_convert_i32x4_s = 0xfa,
SIMD_f32x4_convert_i32x4_u = 0xfb,
SIMD_i32x4_trunc_sat_f32x4_s = 0xf8,
SIMD_i32x4_trunc_sat_f32x4_u = 0xf9,
SIMD_f32x4_convert_i32x4_s = 0xfa,
SIMD_f32x4_convert_i32x4_u = 0xfb,
SIMD_i32x4_trunc_sat_f64x2_s_zero = 0xfc,
SIMD_i32x4_trunc_sat_f64x2_u_zero = 0xfd,
SIMD_f64x2_convert_low_i32x4_s = 0xfe,
SIMD_f64x2_convert_low_i32x4_u = 0xff,
} WASMSimdEXTOpcode;
typedef enum WASMAtomicEXTOpcode {