Enable SIMD for AARCH64 Platform (#11) (#610)

Signed-off-by: Wu Zhongmin <zhongmin.wzm@antgroup.com>
Signed-off-by: Xiaokang Qin <xiaokang.qxk@antgroup.com>

Co-authored-by: Wu Zhongmin <zhongmin.wzm@antgroup.com>

Co-authored-by: Wu Zhongmin <zhongmin.wzm@antgroup.com>
This commit is contained in:
Xiaokang Qin
2021-04-13 14:45:51 +08:00
committed by GitHub
parent 8b96f4fb71
commit 46db353017
7 changed files with 557 additions and 8 deletions

View File

@ -8,6 +8,13 @@
#include "../aot_emit_exception.h"
#include "../../aot/aot_runtime.h"
static bool
is_target_x86(AOTCompContext *comp_ctx)
{
return !strncmp(comp_ctx->target_arch, "x86_64", 6) ||
!strncmp(comp_ctx->target_arch, "i386", 4);
}
static LLVMValueRef
build_intx16_vector(const AOTCompContext *comp_ctx,
const LLVMTypeRef element_type,
@ -86,7 +93,7 @@ fail:
/* TODO: instructions for other CPUs */
/* shufflevector is not an option, since it requires *mask as a const */
bool
aot_compile_simd_swizzle(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
aot_compile_simd_swizzle_x86(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
{
LLVMValueRef vector, mask, max_lanes, condition, mask_lanes, result;
LLVMTypeRef param_types[2];
@ -151,6 +158,109 @@ fail:
return false;
}
bool
aot_compile_simd_swizzle(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
{
LLVMValueRef vector, mask, default_lane_value, condition, max_lane_id,
result, idx, id, replace_with_zero, elem, elem_or_zero, undef;
uint8 i;
if (is_target_x86(comp_ctx)) {
return aot_compile_simd_swizzle_x86(comp_ctx, func_ctx);
}
int const_lane_ids[16] = { 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16 },
const_zeors[16] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
if (!(mask = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE,
"mask"))) {
goto fail;
}
if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
V128_i8x16_TYPE, "vec"))) {
goto fail;
}
if (!(undef = LLVMGetUndef(V128_i8x16_TYPE))) {
HANDLE_FAILURE("LLVMGetUndef");
goto fail;
}
/* icmp uge <16 x i8> mask, <16, 16, 16, 16, ...> */
if (!(max_lane_id =
build_intx16_vector(comp_ctx, INT8_TYPE, const_lane_ids))) {
goto fail;
}
if (!(condition = LLVMBuildICmp(comp_ctx->builder, LLVMIntUGE, mask,
max_lane_id, "out_of_range"))) {
HANDLE_FAILURE("LLVMBuldICmp");
goto fail;
}
/* if the id is out of range (>=16), set the id as 0 */
if (!(default_lane_value =
build_intx16_vector(comp_ctx, INT8_TYPE, const_zeors))) {
goto fail;
}
if (!(idx = LLVMBuildSelect(comp_ctx->builder, condition,
default_lane_value, mask, "mask"))) {
HANDLE_FAILURE("LLVMBuildSelect");
goto fail;
}
for (i = 0; i < 16; i++) {
if (!(id = LLVMBuildExtractElement(comp_ctx->builder, idx, I8_CONST(i),
"id"))) {
HANDLE_FAILURE("LLVMBuildExtractElement");
goto fail;
}
if (!(replace_with_zero =
LLVMBuildExtractElement(comp_ctx->builder, condition,
I8_CONST(i), "replace_with_zero"))) {
HANDLE_FAILURE("LLVMBuildExtractElement");
goto fail;
}
if (!(elem = LLVMBuildExtractElement(comp_ctx->builder, vector, id,
"vector[mask[i]]"))) {
HANDLE_FAILURE("LLVMBuildExtractElement");
goto fail;
}
if (!(elem_or_zero =
LLVMBuildSelect(comp_ctx->builder, replace_with_zero,
I8_CONST(0), elem, "elem_or_zero"))) {
HANDLE_FAILURE("LLVMBuildSelect");
goto fail;
}
if (!(undef =
LLVMBuildInsertElement(comp_ctx->builder, undef, elem_or_zero,
I8_CONST(i), "new_vector"))) {
HANDLE_FAILURE("LLVMBuildInsertElement");
goto fail;
}
}
if (!(result = LLVMBuildBitCast(comp_ctx->builder, undef, V128_i64x2_TYPE,
"ret"))) {
HANDLE_FAILURE("LLVMBuildBitCast");
goto fail;
}
PUSH_V128(result);
return true;
fail:
return false;
}
static bool
aot_compile_simd_extract(AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx,

View File

@ -9,6 +9,13 @@
#include "../aot_emit_numberic.h"
#include "../../aot/aot_runtime.h"
static bool
is_target_x86(AOTCompContext *comp_ctx)
{
return !strncmp(comp_ctx->target_arch, "x86_64", 6) ||
!strncmp(comp_ctx->target_arch, "i386", 4);
}
static bool
simd_integer_narrow(AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx,
@ -49,8 +56,85 @@ fail:
return false;
}
static LLVMValueRef
build_intx4_vector(const AOTCompContext *comp_ctx,
const LLVMTypeRef element_type,
const int *element_value)
{
LLVMValueRef vector, elements[4];
unsigned i;
for (i = 0; i < 4; i++) {
if (!(elements[i] =
LLVMConstInt(element_type, element_value[i], true))) {
HANDLE_FAILURE("LLVMConstInst");
goto fail;
}
}
if (!(vector = LLVMConstVector(elements, 4))) {
HANDLE_FAILURE("LLVMConstVector");
goto fail;
}
return vector;
fail:
return NULL;
}
static LLVMValueRef
build_intx8_vector(const AOTCompContext *comp_ctx,
const LLVMTypeRef element_type,
const int *element_value)
{
LLVMValueRef vector, elements[8];
unsigned i;
for (i = 0; i < 8; i++) {
if (!(elements[i] =
LLVMConstInt(element_type, element_value[i], true))) {
HANDLE_FAILURE("LLVMConstInst");
goto fail;
}
}
if (!(vector = LLVMConstVector(elements, 8))) {
HANDLE_FAILURE("LLVMConstVector");
goto fail;
}
return vector;
fail:
return NULL;
}
static LLVMValueRef
build_intx16_vector(const AOTCompContext *comp_ctx,
const LLVMTypeRef element_type,
const int *element_value)
{
LLVMValueRef vector, elements[16];
unsigned i;
for (i = 0; i < 16; i++) {
if (!(elements[i] =
LLVMConstInt(element_type, element_value[i], true))) {
HANDLE_FAILURE("LLVMConstInst");
goto fail;
}
}
if (!(vector = LLVMConstVector(elements, 16))) {
HANDLE_FAILURE("LLVMConstVector");
goto fail;
}
return vector;
fail:
return NULL;
}
bool
aot_compile_simd_i8x16_narrow_i16x8(AOTCompContext *comp_ctx,
aot_compile_simd_i8x16_narrow_i16x8_x86(AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx,
bool is_signed)
{
@ -60,7 +144,7 @@ aot_compile_simd_i8x16_narrow_i16x8(AOTCompContext *comp_ctx,
}
bool
aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx,
aot_compile_simd_i16x8_narrow_i32x4_x86(AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx,
bool is_signed)
{
@ -69,6 +153,273 @@ aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx,
is_signed ? "llvm.x86.sse2.packssdw.128" : "llvm.x86.sse41.packusdw");
}
bool
aot_compile_simd_i8x16_narrow_i16x8(AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx,
bool is_signed)
{
LLVMValueRef vector1, vector2, result, vector_min, vector_max, shuffle,
vector1_clamped, vector2_clamped, vector1_trunced, vector2_trunced,
shuffle_vector;
LLVMValueRef v1_gt_max, v1_lt_min, v2_gt_max, v2_lt_min;
if (is_target_x86(comp_ctx)) {
return aot_compile_simd_i8x16_narrow_i16x8_x86(comp_ctx, func_ctx,
is_signed);
}
int min_s_array[8] = { 0xff80, 0xff80, 0xff80, 0xff80,
0xff80, 0xff80, 0xff80, 0xff80 };
int max_s_array[8] = { 0x007f, 0x007f, 0x007f, 0x007f,
0x007f, 0x007f, 0x007f, 0x007f };
int min_u_array[8] = { 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000 };
int max_u_array[8] = { 0x00ff, 0x00ff, 0x00ff, 0x00ff,
0x00ff, 0x00ff, 0x00ff, 0x00ff };
int shuffle_array[16] = { 0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15 };
if (!(vector2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
V128_i16x8_TYPE, "vec2"))) {
goto fail;
}
if (!(vector1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
V128_i16x8_TYPE, "vec1"))) {
goto fail;
}
if (!(vector_min = build_intx8_vector(
comp_ctx, INT16_TYPE, is_signed ? min_s_array : min_u_array))) {
goto fail;
}
if (!(vector_max = build_intx8_vector(
comp_ctx, INT16_TYPE, is_signed ? max_s_array : max_u_array))) {
goto fail;
}
if (!(shuffle = build_intx16_vector(comp_ctx, I32_TYPE, shuffle_array))) {
goto fail;
}
if (!(v1_gt_max = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGT, vector1,
vector_max, "v1_great_than_max"))) {
HANDLE_FAILURE("LLVMBuldICmp");
goto fail;
}
if (!(v2_gt_max = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGT, vector2,
vector_max, "v2_great_than_max"))) {
HANDLE_FAILURE("LLVMBuldICmp");
goto fail;
}
if (!(v1_lt_min = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector1,
vector_min, "v1_less_than_min"))) {
HANDLE_FAILURE("LLVMBuldICmp");
goto fail;
}
if (!(v2_lt_min = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector2,
vector_min, "v2_less_than_min"))) {
HANDLE_FAILURE("LLVMBuldICmp");
goto fail;
}
if (!(vector1_clamped =
LLVMBuildSelect(comp_ctx->builder, v1_gt_max, vector_max, vector1,
"vector1_clamped_max"))) {
HANDLE_FAILURE("LLVMBuildSelect");
goto fail;
}
if (!(vector1_clamped =
LLVMBuildSelect(comp_ctx->builder, v1_lt_min, vector_min,
vector1_clamped, "vector1_clamped_min"))) {
HANDLE_FAILURE("LLVMBuildSelect");
goto fail;
}
if (!(vector2_clamped =
LLVMBuildSelect(comp_ctx->builder, v2_gt_max, vector_max, vector2,
"vector2_clamped_max"))) {
HANDLE_FAILURE("LLVMBuildSelect");
goto fail;
}
if (!(vector2_clamped =
LLVMBuildSelect(comp_ctx->builder, v2_lt_min, vector_min,
vector2_clamped, "vector2_clamped_min"))) {
HANDLE_FAILURE("LLVMBuildSelect");
goto fail;
}
if (!(vector1_trunced =
LLVMBuildTrunc(comp_ctx->builder, vector1_clamped,
LLVMVectorType(INT8_TYPE, 8), "vector1_trunced"))) {
HANDLE_FAILURE("LLVMBuildTrunc");
goto fail;
}
if (!(vector2_trunced =
LLVMBuildTrunc(comp_ctx->builder, vector2_clamped,
LLVMVectorType(INT8_TYPE, 8), "vector2_trunced"))) {
HANDLE_FAILURE("LLVMBuildTrunc");
goto fail;
}
if (!(shuffle_vector = LLVMBuildShuffleVector(
comp_ctx->builder, vector1_trunced, vector2_trunced, shuffle,
"shuffle_vector"))) {
HANDLE_FAILURE("LLVMBuildShuffleVector");
goto fail;
}
if (!(result = LLVMBuildBitCast(comp_ctx->builder, shuffle_vector,
V128_i64x2_TYPE, "ret"))) {
HANDLE_FAILURE("LLVMBuildBitCast");
goto fail;
}
PUSH_V128(result);
return true;
fail:
return false;
}
bool
aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx,
bool is_signed)
{
LLVMValueRef vector1, vector2, result, vector_min, vector_max, shuffle,
vector1_clamped, vector2_clamped, vector1_trunced, vector2_trunced,
shuffle_vector;
LLVMValueRef v1_gt_max, v1_lt_min, v2_gt_max, v2_lt_min;
if (is_target_x86(comp_ctx)) {
return aot_compile_simd_i16x8_narrow_i32x4_x86(comp_ctx, func_ctx,
is_signed);
}
int min_s_array[4] = { 0xffff8000, 0xffff8000, 0xffff8000, 0xffff8000 };
int32 max_s_array[4] = { 0x00007fff, 0x00007fff, 0x00007fff, 0x00007fff };
int min_u_array[4] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 };
int max_u_array[4] = { 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff };
int shuffle_array[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
if (!(vector2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
V128_i32x4_TYPE, "vec2"))) {
goto fail;
}
if (!(vector1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
V128_i32x4_TYPE, "vec1"))) {
goto fail;
}
if (!(vector_min = build_intx4_vector(
comp_ctx, I32_TYPE, is_signed ? min_s_array : min_u_array))) {
goto fail;
}
if (!(vector_max = build_intx4_vector(
comp_ctx, I32_TYPE, is_signed ? max_s_array : max_u_array))) {
goto fail;
}
if (!(shuffle = build_intx8_vector(comp_ctx, I32_TYPE, shuffle_array))) {
goto fail;
}
if (!(v1_gt_max = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGT, vector1,
vector_max, "v1_great_than_max"))) {
HANDLE_FAILURE("LLVMBuldICmp");
goto fail;
}
if (!(v2_gt_max = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGT, vector2,
vector_max, "v2_great_than_max"))) {
HANDLE_FAILURE("LLVMBuldICmp");
goto fail;
}
if (!(v1_lt_min = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector1,
vector_min, "v1_less_than_min"))) {
HANDLE_FAILURE("LLVMBuldICmp");
goto fail;
}
if (!(v2_lt_min = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector2,
vector_min, "v2_less_than_min"))) {
HANDLE_FAILURE("LLVMBuldICmp");
goto fail;
}
if (!(vector1_clamped =
LLVMBuildSelect(comp_ctx->builder, v1_gt_max, vector_max, vector1,
"vector1_clamped_max"))) {
HANDLE_FAILURE("LLVMBuildSelect");
goto fail;
}
if (!(vector1_clamped =
LLVMBuildSelect(comp_ctx->builder, v1_lt_min, vector_min,
vector1_clamped, "vector1_clamped_min"))) {
HANDLE_FAILURE("LLVMBuildSelect");
goto fail;
}
if (!(vector2_clamped =
LLVMBuildSelect(comp_ctx->builder, v2_gt_max, vector_max, vector2,
"vector2_clamped_max"))) {
HANDLE_FAILURE("LLVMBuildSelect");
goto fail;
}
if (!(vector2_clamped =
LLVMBuildSelect(comp_ctx->builder, v2_lt_min, vector_min,
vector2_clamped, "vector2_clamped_min"))) {
HANDLE_FAILURE("LLVMBuildSelect");
goto fail;
}
if (!(vector1_trunced = LLVMBuildTrunc(comp_ctx->builder, vector1_clamped,
LLVMVectorType(INT16_TYPE, 4),
"vector1_trunced"))) {
HANDLE_FAILURE("LLVMBuildTrunc");
goto fail;
}
if (!(vector2_trunced = LLVMBuildTrunc(comp_ctx->builder, vector2_clamped,
LLVMVectorType(INT16_TYPE, 4),
"vector2_trunced"))) {
HANDLE_FAILURE("LLVMBuildTrunc");
goto fail;
}
if (!(shuffle_vector = LLVMBuildShuffleVector(
comp_ctx->builder, vector1_trunced, vector2_trunced, shuffle,
"shuffle_vector"))) {
HANDLE_FAILURE("LLVMBuildShuffleVector");
goto fail;
}
if (!(result = LLVMBuildBitCast(comp_ctx->builder, shuffle_vector,
V128_i64x2_TYPE, "ret"))) {
HANDLE_FAILURE("LLVMBuildBitCast");
goto fail;
}
PUSH_V128(result);
return true;
fail:
return false;
}
bool
aot_compile_simd_i16x8_widen_i8x16(AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx,