Implement AOT support for RISCV (#649)

Enable RISCV AOT support, the supported ABIs are LP64 and LP64D for riscv64, ILP32 and ILP32D for riscv32.
For wamrc:
    use --target=riscv64/riscv32 to specify the target arch of output AOT file,
    use --target-abi=lp64d/lp64/ilp32d/ilp32 to specify the target ABI,
    if --target-abi isn't specified, by default lp64d is used for riscv64, and ilp32d is used for riscv32.

Signed-off-by: Huang Qi <huangqi3@xiaomi.com>
Co-authored-by: wenyongh <wenyong.huang@intel.com>
This commit is contained in:
Huang Qi
2021-07-22 11:16:47 +08:00
committed by GitHub
parent ea06c19a9d
commit e4023c8e02
29 changed files with 667 additions and 459 deletions

View File

@ -0,0 +1,148 @@
/*
* Copyright (C) 2019 Intel Corporation. All rights reserved.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
/*
* The float abi macros used bellow are from risc-v c api:
* https://github.com/riscv/riscv-c-api-doc/blob/master/riscv-c-api.md
*
*/
#if defined(__riscv_float_abi_soft)
#define RV_FPREG_SIZE 0
#elif defined(__riscv_float_abi_single)
#define RV_OP_LOADFPREG flw
#define RV_OP_STROEFPREG fsw
#define RV_FPREG_SIZE 4
#elif defined(__riscv_float_abi_double)
#define RV_OP_LOADFPREG fld
#define RV_OP_STROEFPREG fsd
#define RV_FPREG_SIZE 8
#endif
#if __riscv_xlen == 32
#define RV_OP_LOADREG lw
#define RV_OP_STOREREG sw
#define RV_REG_SIZE 4
#define RV_REG_SHIFT 2
#define RV_FP_OFFSET (8 * RV_REG_SIZE)
#define RV_INT_OFFSET 0
#else
#define RV_OP_LOADREG ld
#define RV_OP_STOREREG sd
#define RV_REG_SIZE 8
#define RV_REG_SHIFT 3
#define RV_FP_OFFSET 0
#define RV_INT_OFFSET (8 * RV_FPREG_SIZE)
#endif
.text
.align 2
#ifndef BH_PLATFORM_DARWIN
.globl invokeNative
.type invokeNative, function
invokeNative:
#else
.globl _invokeNative
_invokeNative:
#endif /* end of BH_PLATFORM_DARWIN */
/*
* Arguments passed in:
*
* a0 function ptr
* a1 argv
* a2 nstacks
*/
/*
* sp (stack pointer)
* |- sd/sw to store 64/32-bit values from register to memory
* |- ld/lw to load from stack to register
* fp/s0 (frame pointer)
* a0-a7 (8 integer arguments)
* |- sd/sw to store
* |- ld/lw to load
* fa0-a7 (8 float arguments)
* |- fsd/fsw to store
* |- fld/fsw to load
* t0-t6 (temporaries regisgers)
* |- caller saved
*/
/* reserve space on stack to save return address and frame pointer */
addi sp, sp, - 2 * RV_REG_SIZE
RV_OP_STOREREG fp, 0 * RV_REG_SIZE(sp) /* save frame pointer */
RV_OP_STOREREG ra, 1 * RV_REG_SIZE(sp) /* save return address */
mv fp, sp /* set frame pointer to bottom of fixed frame */
/* save function ptr, argv & nstacks */
mv t0, a0 /* t0 = function ptr */
mv t1, a1 /* t1 = argv array address */
mv t2, a2 /* t2 = nstack */
#ifndef __riscv_float_abi_soft
/* fill in fa0-7 float-registers*/
RV_OP_LOADFPREG fa0, RV_FP_OFFSET + 0 * RV_FPREG_SIZE(t1) /* fa0 */
RV_OP_LOADFPREG fa1, RV_FP_OFFSET + 1 * RV_FPREG_SIZE(t1) /* fa1 */
RV_OP_LOADFPREG fa2, RV_FP_OFFSET + 2 * RV_FPREG_SIZE(t1) /* fa2 */
RV_OP_LOADFPREG fa3, RV_FP_OFFSET + 3 * RV_FPREG_SIZE(t1) /* fa3 */
RV_OP_LOADFPREG fa4, RV_FP_OFFSET + 4 * RV_FPREG_SIZE(t1) /* fa4 */
RV_OP_LOADFPREG fa5, RV_FP_OFFSET + 5 * RV_FPREG_SIZE(t1) /* fa5 */
RV_OP_LOADFPREG fa6, RV_FP_OFFSET + 6 * RV_FPREG_SIZE(t1) /* fa6 */
RV_OP_LOADFPREG fa7, RV_FP_OFFSET + 7 * RV_FPREG_SIZE(t1) /* fa7 */
#endif
/* fill in a0-7 integer-registers*/
RV_OP_LOADREG a0, RV_INT_OFFSET + 0 * RV_REG_SIZE(t1) /* a0 */
RV_OP_LOADREG a1, RV_INT_OFFSET + 1 * RV_REG_SIZE(t1) /* a1 */
RV_OP_LOADREG a2, RV_INT_OFFSET + 2 * RV_REG_SIZE(t1) /* a2 */
RV_OP_LOADREG a3, RV_INT_OFFSET + 3 * RV_REG_SIZE(t1) /* a3 */
RV_OP_LOADREG a4, RV_INT_OFFSET + 4 * RV_REG_SIZE(t1) /* a4 */
RV_OP_LOADREG a5, RV_INT_OFFSET + 5 * RV_REG_SIZE(t1) /* a5 */
RV_OP_LOADREG a6, RV_INT_OFFSET + 6 * RV_REG_SIZE(t1) /* a6 */
RV_OP_LOADREG a7, RV_INT_OFFSET + 7 * RV_REG_SIZE(t1) /* a7 */
/* t1 points to stack args */
/* RV_FPREG_SIZE is zero when __riscv_float_abi_soft defined */
addi t1, t1, RV_REG_SIZE * 8 + RV_FPREG_SIZE * 8
/* directly call the function if no args in stack,
x0 always holds 0 */
beq t2, x0, call_func
/* reserve enough stack space for function arguments */
sll t3, t2, RV_REG_SHIFT /* shift left 3 bits. t3 = n_stacks * 8 */
sub sp, sp, t3
/* make 16-byte aligned */
li t3, 15
not t3, t3
and sp, sp, t3
/* save sp in t4 register */
mv t4, sp
/* copy left arguments from caller stack to own frame stack */
loop_stack_args:
beq t2, x0, call_func
RV_OP_LOADREG t5, 0(t1) /* load stack argument, t5 = argv[i] */
RV_OP_STOREREG t5, 0(t4) /* store t5 to reseved stack, sp[j] = t5 */
addi t1, t1, RV_REG_SIZE /* move to next stack argument */
addi t4, t4, RV_REG_SIZE /* move to next stack pointer */
addi t2, t2, -1 /* decrease t2 every loop, nstacks = nstacks -1 */
j loop_stack_args
call_func:
jalr t0
/* restore registers pushed in stack or saved in another register */
return:
mv sp, fp /* restore sp saved in fp before function call */
RV_OP_LOADREG fp, 0 * RV_REG_SIZE(sp) /* load previous frame poniter to fp register */
RV_OP_LOADREG ra, 1 * RV_REG_SIZE(sp) /* load previous return address to ra register */
addi sp, sp, 2 * RV_REG_SIZE /* pop frame, restore sp */
jr ra

View File

@ -1,95 +0,0 @@
/*
* Copyright (C) 2019 Intel Corporation. All rights reserved.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
.text
.align 2
#ifndef BH_PLATFORM_DARWIN
.globl invokeNative
.type invokeNative, function
invokeNative:
#else
.globl _invokeNative
_invokeNative:
#endif /* end of BH_PLATFORM_DARWIN */
/*
* Arguments passed in:
*
* a0 function ptr
* a1 argv
* a2 nstacks
*/
/*
* sp (stack pointer)
* |- sw to store 32-bit values from register to memory
* |- lw to load from stack to register
* fp/s0 (frame pointer)
* a0-a7 (8 integer arguments)
* |- sw to store
* |- lw to load
* t0-t6 (temporaries regisgers)
* |- caller saved
*/
/* reserve space on stack to save return address and frame pointer */
addi sp, sp, -8
sw fp, 0(sp) /* save frame pointer */
sw ra, 4(sp) /* save return address */
mv fp, sp /* set frame pointer to bottom of fixed frame */
/* save function ptr, argv & nstacks */
mv t0, a0 /* t0 = function ptr */
mv t1, a1 /* t1 = argv array address */
mv t2, a2 /* t2 = nstack */
/* fill in a0-7 integer-registers */
lw a0, 0(t1) /* a0 = argv[0] */
lw a1, 4(t1) /* a1 = argv[1] */
lw a2, 8(t1) /* a2 = argv[2] */
lw a3, 12(t1) /* a3 = argv[3] */
lw a4, 16(t1) /* a4 = argv[4] */
lw a5, 20(t1) /* a5 = argv[5] */
lw a6, 24(t1) /* a6 = argv[6] */
lw a7, 28(t1) /* a7 = argv[7] */
addi t1, t1, 32 /* t1 points to stack args */
/* directly call the function if no args in stack,
x0 always holds 0 */
beq t2, x0, call_func
/* reserve enough stack space for function arguments */
sll t3, t2, 2 /* shift left 2 bits. t3 = n_stacks * 4 */
sub sp, sp, t3
/* make 16-byte aligned */
and sp, sp, ~15
/* save sp in t4 register */
mv t4, sp
/* copy left arguments from caller stack to own frame stack */
loop_stack_args:
beq t2, x0, call_func
lw t5, 0(t1) /* load stack argument, t5 = argv[i] */
sw t5, 0(t4) /* store t5 to reseved stack, sp[j] = t5 */
addi t1, t1, 4 /* move to next stack argument */
addi t4, t4, 4 /* move to next stack pointer */
addi t2, t2, -1 /* decrease t2 every loop, nstacks = nstacks -1 */
j loop_stack_args
call_func:
jalr t0
/* restore registers pushed in stack or saved in another register */
return:
mv sp, fp /* restore sp saved in fp before function call */
lw fp, 0(sp) /* load previous frame poniter to fp register */
lw ra, 4(sp) /* load previous return address to ra register */
addi sp, sp, 8 /* pop frame, restore sp */
jr ra

View File

@ -1,104 +0,0 @@
/*
* Copyright (C) 2019 Intel Corporation. All rights reserved.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
.text
.align 2
#ifndef BH_PLATFORM_DARWIN
.globl invokeNative
.type invokeNative, function
invokeNative:
#else
.globl _invokeNative
_invokeNative:
#endif /* end of BH_PLATFORM_DARWIN */
/*
* Arguments passed in:
*
* a0 function ptr
* a1 argv
* a2 nstacks
*/
/*
* sp (stack pointer)
* |- sw to store 32-bit values from register to memory
* |- lw to load from stack to register
* fp/s0 (frame pointer)
* a0-a7 (8 integer arguments)
* |- sw to store
* |- lw to load
* t0-t6 (temporaries regisgers)
* |- caller saved
*/
/* reserve space on stack to save return address and frame pointer */
addi sp, sp, -8
sw fp, 0(sp) /* save frame pointer */
sw ra, 4(sp) /* save return address */
mv fp, sp /* set frame pointer to bottom of fixed frame */
/* save function ptr, argv & nstacks */
mv t0, a0 /* t0 = function ptr */
mv t1, a1 /* t1 = argv array address */
mv t2, a2 /* t2 = nstack */
/* fill in a0-7 integer-registers */
lw a0, 0(t1) /* a0 = argv[0] */
lw a1, 4(t1) /* a1 = argv[1] */
lw a2, 8(t1) /* a2 = argv[2] */
lw a3, 12(t1) /* a3 = argv[3] */
lw a4, 16(t1) /* a4 = argv[4] */
lw a5, 20(t1) /* a5 = argv[5] */
lw a6, 24(t1) /* a6 = argv[6] */
lw a7, 28(t1) /* a7 = argv[7] */
/* fill in fa0-7 float-registers*/
fld fa0, 32(t1) /* fa0 = argv[8] */
fld fa1, 40(t1) /* fa1 = argv[9] */
fld fa2, 48(t1) /* fa2 = argv[10] */
fld fa3, 56(t1) /* fa3 = argv[11] */
fld fa4, 64(t1) /* fa4 = argv[12] */
fld fa5, 72(t1) /* fa5 = argv[13] */
fld fa6, 80(t1) /* fa6 = argv[14] */
fld fa7, 88(t1) /* fa7 = argv[15] */
addi t1, t1, 96 /* t1 points to stack args */
/* directly call the function if no args in stack,
x0 always holds 0 */
beq t2, x0, call_func
/* reserve enough stack space for function arguments */
sll t3, t2, 2 /* shift left 2 bits. t3 = n_stacks * 4 */
sub sp, sp, t3
/* make 16-byte aligned */
and sp, sp, ~15
/* save sp in t4 register */
mv t4, sp
/* copy left arguments from caller stack to own frame stack */
loop_stack_args:
beq t2, x0, call_func
lw t5, 0(t1) /* load stack argument, t5 = argv[i] */
sw t5, 0(t4) /* store t5 to reseved stack, sp[j] = t5 */
addi t1, t1, 4 /* move to next stack argument */
addi t4, t4, 4 /* move to next stack pointer */
addi t2, t2, -1 /* decrease t2 every loop, nstacks = nstacks -1 */
j loop_stack_args
call_func:
jalr t0
/* restore registers pushed in stack or saved in another register */
return:
mv sp, fp /* restore sp saved in fp before function call */
lw fp, 0(sp) /* load previous frame poniter to fp register */
lw ra, 4(sp) /* load previous return address to ra register */
addi sp, sp, 8 /* pop frame, restore sp */
jr ra

View File

@ -1,95 +0,0 @@
/*
* Copyright (C) 2019 Intel Corporation. All rights reserved.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
.text
.align 2
#ifndef BH_PLATFORM_DARWIN
.globl invokeNative
.type invokeNative, function
invokeNative:
#else
.globl _invokeNative
_invokeNative:
#endif /* end of BH_PLATFORM_DARWIN */
/*
* Arguments passed in:
*
* a0 function ptr
* a1 argv
* a2 nstacks
*/
/*
* sp (stack pointer)
* |- sd to store 64-bit values from register to memory
* |- ld to load from stack to register
* fp/s0 (frame pointer)
* a0-a7 (8 integer arguments)
* |- sd to store
* |- ld to load
* t0-t6 (temporaries regisgers)
* |- caller saved
*/
/* reserve space on stack to save return address and frame pointer */
addi sp, sp, -16
sd fp, 0(sp) /* save frame pointer */
sd ra, 8(sp) /* save return address */
mv fp, sp /* set frame pointer to bottom of fixed frame */
/* save function ptr, argv & nstacks */
mv t0, a0 /* t0 = function ptr */
mv t1, a1 /* t1 = argv array address */
mv t2, a2 /* t2 = nstack */
/* fill in a0-7 integer-registers*/
ld a0, 0(t1) /* a0 = argv[0] */
ld a1, 8(t1) /* a1 = argv[1] */
ld a2, 16(t1) /* a2 = argv[2] */
ld a3, 24(t1) /* a3 = argv[3] */
ld a4, 32(t1) /* a4 = argv[4] */
ld a5, 40(t1) /* a5 = argv[5] */
ld a6, 48(t1) /* a6 = argv[6] */
ld a7, 56(t1) /* a7 = argv[7] */
addi t1, t1, 64 /* t1 points to stack args */
/* directly call the function if no args in stack,
x0 always holds 0 */
beq t2, x0, call_func
/* reserve enough stack space for function arguments */
sll t3, t2, 3 /* shift left 3 bits. t3 = n_stacks * 8 */
sub sp, sp, t3
/* make 16-byte aligned */
and sp, sp, ~(15LL)
/* save sp in t4 register */
mv t4, sp
/* copy left arguments from caller stack to own frame stack */
loop_stack_args:
beq t2, x0, call_func
ld t5, 0(t1) /* load stack argument, t5 = argv[i] */
sd t5, 0(t4) /* store t5 to reseved stack, sp[j] = t5 */
addi t1, t1, 8 /* move to next stack argument */
addi t4, t4, 8 /* move to next stack pointer */
addi t2, t2, -1 /* decrease t2 every loop, nstacks = nstacks -1 */
j loop_stack_args
call_func:
jalr t0
/* restore registers pushed in stack or saved in another register */
return:
mv sp, fp /* restore sp saved in fp before function call */
ld fp, 0(sp) /* load previous frame poniter to fp register */
ld ra, 8(sp) /* load previous return address to ra register */
addi sp, sp, 16 /* pop frame, restore sp */
jr ra

View File

@ -1,108 +0,0 @@
/*
* Copyright (C) 2019 Intel Corporation. All rights reserved.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
.text
.align 2
#ifndef BH_PLATFORM_DARWIN
.globl invokeNative
.type invokeNative, function
invokeNative:
#else
.globl _invokeNative
_invokeNative:
#endif /* end of BH_PLATFORM_DARWIN */
/*
* Arguments passed in:
*
* a0 function ptr
* a1 argv
* a2 nstacks
*/
/*
* sp (stack pointer)
* |- sd to store 64-bit values from register to memory
* |- ld to load from stack to register
* fp/s0 (frame pointer)
* a0-a7 (8 integer arguments)
* |- sd to store
* |- ld to load
* fa0-a7 (8 float arguments)
* |- fsd to store
* |- fld to load
* t0-t6 (temporaries regisgers)
* |- caller saved
*/
/* reserve space on stack to save return address and frame pointer */
addi sp, sp, -16
sd fp, 0(sp) /* save frame pointer */
sd ra, 8(sp) /* save return address */
mv fp, sp /* set frame pointer to bottom of fixed frame */
/* save function ptr, argv & nstacks */
mv t0, a0 /* t0 = function ptr */
mv t1, a1 /* t1 = argv array address */
mv t2, a2 /* t2 = nstack */
/* fill in fa0-7 float-registers*/
fld fa0, 0(t1) /* fa0 = argv[0] */
fld fa1, 8(t1) /* fa1 = argv[1] */
fld fa2, 16(t1) /* fa2 = argv[2] */
fld fa3, 24(t1) /* fa3 = argv[3] */
fld fa4, 32(t1) /* fa4 = argv[4] */
fld fa5, 40(t1) /* fa5 = argv[5] */
fld fa6, 48(t1) /* fa6 = argv[6] */
fld fa7, 56(t1) /* fa7 = argv[7] */
/* fill in a0-7 integer-registers*/
ld a0, 64(t1) /* a0 = argv[8] */
ld a1, 72(t1) /* a1 = argv[9] */
ld a2, 80(t1) /* a2 = argv[10] */
ld a3, 88(t1) /* a3 = argv[11] */
ld a4, 96(t1) /* a4 = argv[12] */
ld a5, 104(t1) /* a5 = argv[13] */
ld a6, 112(t1) /* a6 = argv[14] */
ld a7, 120(t1) /* a7 = argv[15] */
addi t1, t1, 128 /* t1 points to stack args */
/* directly call the function if no args in stack,
x0 always holds 0 */
beq t2, x0, call_func
/* reserve enough stack space for function arguments */
sll t3, t2, 3 /* shift left 3 bits. t3 = n_stacks * 8 */
sub sp, sp, t3
/* make 16-byte aligned */
and sp, sp, ~(15LL)
/* save sp in t4 register */
mv t4, sp
/* copy left arguments from caller stack to own frame stack */
loop_stack_args:
beq t2, x0, call_func
ld t5, 0(t1) /* load stack argument, t5 = argv[i] */
sd t5, 0(t4) /* store t5 to reseved stack, sp[j] = t5 */
addi t1, t1, 8 /* move to next stack argument */
addi t4, t4, 8 /* move to next stack pointer */
addi t2, t2, -1 /* decrease t2 every loop, nstacks = nstacks -1 */
j loop_stack_args
call_func:
jalr t0
/* restore registers pushed in stack or saved in another register */
return:
mv sp, fp /* restore sp saved in fp before function call */
ld fp, 0(sp) /* load previous frame poniter to fp register */
ld ra, 8(sp) /* load previous return address to ra register */
addi sp, sp, 16 /* pop frame, restore sp */
jr ra

View File

@ -66,14 +66,8 @@ elseif (WAMR_BUILD_TARGET STREQUAL "MIPS")
set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_mips.s)
elseif (WAMR_BUILD_TARGET STREQUAL "XTENSA")
set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_xtensa.s)
elseif (WAMR_BUILD_TARGET STREQUAL "RISCV64" OR WAMR_BUILD_TARGET STREQUAL "RISCV64_LP64D")
set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_riscv64_lp64d.s)
elseif (WAMR_BUILD_TARGET STREQUAL "RISCV64_LP64")
set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_riscv64_lp64.s)
elseif (WAMR_BUILD_TARGET STREQUAL "RISCV32" OR WAMR_BUILD_TARGET STREQUAL "RISCV32_ILP32D")
set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_riscv32_ilp32d.s)
elseif (WAMR_BUILD_TARGET STREQUAL "RISCV32_ILP32")
set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_riscv32_ilp32.s)
elseif (WAMR_BUILD_TARGET MATCHES "RISCV*")
set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_riscv.S)
else ()
message (FATAL_ERROR "Build target isn't set")
endif ()

View File

@ -3061,7 +3061,8 @@ typedef union __declspec(intrin_type) __declspec(align(8)) v128 {
unsigned __int32 m128i_u32[4];
unsigned __int64 m128i_u64[2];
} v128;
#elif defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
#elif defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \
|| defined(BUILD_TARGET_RISCV64_LP64D) || defined(BUILD_TARGET_RISCV64_LP64)
typedef long long v128 __attribute__ ((__vector_size__ (16),
__may_alias__, __aligned__ (1)));
#elif defined(BUILD_TARGET_AARCH64)