Add xtensa AOT support and fix build issue of alios (#223)

* Clean compiling warnings of zephyr samples * Support xtensa AOT and fix build issue of alios
2020-04-01 18:38:42 +08:00
parent c1a0e6d877
commit c6fc12b7b6
20 changed files with 762 additions and 68 deletions
--- a/core/iwasm/aot/aot_loader.c
+++ b/core/iwasm/aot/aot_loader.c
@ -985,14 +985,21 @@ load_text_section(const uint8 *buf, const uint8 *buf_end,
        return false;
    }

-    module->code = (void*)buf;
-    module->code_size = (uint32)(buf_end - buf);
+    read_uint32(buf, buf_end, module->literal_size);
+
+    /* literal data is at begining of the text section */
+    module->literal = (uint8*)buf;
+    module->code = (void*)(buf + module->literal_size);
+    module->code_size = (uint32)(buf_end - (uint8*)module->code);

    if (module->code_size > 0) {
        plt_base = (uint8*)buf_end - get_plt_table_size();
        init_plt_table(plt_base);
    }
    return true;
+
+fail:
+    return false;
 }

 static bool
@ -1184,13 +1191,20 @@ resolve_target_sym(const char *symbol, int32 *p_index)
    return NULL;
 }

+static bool
+is_literal_relocation(const char *reloc_sec_name)
+{
+    return !strcmp(reloc_sec_name, ".rela.literal");
+}
+
 static bool
 do_text_relocation(AOTModule *module,
                   AOTRelocationGroup *group,
                   char *error_buf, uint32 error_buf_size)
 {
-    uint8 *aot_text = module->code;
-    uint32 aot_text_size = module->code_size;
+    bool is_literal = is_literal_relocation(group->section_name);
+    uint8 *aot_text = is_literal ? module->literal : module->code;
+    uint32 aot_text_size = is_literal ? module->literal_size : module->code_size;
    uint32 i, func_index, symbol_len;
    char symbol_buf[128]  = { 0 }, *symbol, *p;
    void *symbol_addr;
@ -1248,6 +1262,9 @@ do_text_relocation(AOTModule *module,
                goto check_symbol_fail;
            }
        }
+        else if (!strcmp(symbol, ".literal")) {
+            symbol_addr = module->literal;
+        }
        else if (!(symbol_addr = resolve_target_sym(symbol, &symbol_index))) {
            if (error_buf != NULL)
                snprintf(error_buf, error_buf_size,
@ -1495,7 +1512,8 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end,
        }

        if (!strcmp(group->section_name, ".rel.text")
-            || !strcmp(group->section_name, ".rela.text")) {
+            || !strcmp(group->section_name, ".rela.text")
+            || !strcmp(group->section_name, ".rela.literal")) {
            if (!do_text_relocation(module, group, error_buf, error_buf_size))
                return false;
        }
@ -2079,8 +2097,11 @@ aot_unload(AOTModule *module)
    if (module->const_str_set)
        bh_hash_map_destroy(module->const_str_set);

-    if (module->code)
-        os_munmap(module->code, module->code_size);
+    if (module->code) {
+        uint8 *mmap_addr = module->literal - sizeof(module->literal_size);
+        uint32 total_size = sizeof(module->literal_size) + module->literal_size + module->code_size;
+        os_munmap(mmap_addr, total_size);
+    }

    if (module->data_sections)
        destroy_object_data_sections(module->data_sections,
--- a/core/iwasm/aot/aot_runtime.h
+++ b/core/iwasm/aot/aot_runtime.h
@ -125,6 +125,10 @@ typedef struct AOTModule {
    void *code;
    uint32 code_size;

+    /* literal for AOTed code, NULL for JIT mode */
+    uint8 *literal;
+    uint32 literal_size;
+
    /* data sections in AOT object file, including .data, .rodata
     * and .rodata.cstN. NULL for JIT mode. */
    AOTObjectDataSection *data_sections;
--- a/core/iwasm/aot/arch/aot_reloc_xtensa.c
+++ b/core/iwasm/aot/arch/aot_reloc_xtensa.c
@ -5,10 +5,50 @@

 #include "aot_reloc.h"

+#define R_XTENSA_32        1   /* Direct 32 bit */
+#define R_XTENSA_SLOT0_OP  20  /* PC relative */
+
+/* for soft-float */
+void __floatsidf();
+void __divdf3();
+void __ltdf2();
+
+/* for mul32 */
+void __mulsi3();
+void __muldi3();
+
+void __modsi3();
+
+void __divdi3();
+
 static SymbolMap target_sym_map[] = {
-    REG_COMMON_SYMBOLS
+    REG_COMMON_SYMBOLS,
+
+    /* API's for soft-float */
+    /* TODO: only register these symbols when Floating-Point Coprocessor
+     * Option is not enabled */
+    REG_SYM(__floatsidf),
+    REG_SYM(__divdf3),
+    REG_SYM(__ltdf2),
+
+    /* API's for 32-bit integer multiply */
+    /* TODO: only register these symbols when 32-bit Integer Multiply Option
+     * is not enabled */
+    REG_SYM(__mulsi3),
+    REG_SYM(__muldi3),
+
+    REG_SYM(__modsi3),
+
+    REG_SYM(__divdi3),
 };

+static void
+set_error_buf(char *error_buf, uint32 error_buf_size, const char *string)
+{
+    if (error_buf != NULL)
+        snprintf(error_buf, error_buf_size, "%s", string);
+}
+
 SymbolMap *
 get_target_symbol_map(uint32 *sym_num)
 {
@ -40,6 +80,67 @@ get_plt_table_size()
    return get_plt_item_size() * (sizeof(target_sym_map) / sizeof(SymbolMap));
 }

+static bool
+check_reloc_offset(uint32 target_section_size,
+                   uint64 reloc_offset, uint32 reloc_data_size,
+                   char *error_buf, uint32 error_buf_size)
+{
+    if (!(reloc_offset < (uint64)target_section_size
+          && reloc_offset + reloc_data_size <= (uint64)target_section_size)) {
+        set_error_buf(error_buf, error_buf_size,
+                      "AOT module load failed: invalid relocation offset.");
+        return false;
+    }
+    return true;
+}
+
+/*
+ * CPU like esp32 can read and write data through the instruction bus, but only
+ * in a word aligned manner; non-word-aligned access will cause a CPU exception.
+ * This function uses a world aligned manner to write 16bit value to instruction
+ * addreess.
+ */
+static void
+put_imm16_to_addr(int16 imm16, int16 *addr)
+{
+    int8 bytes[8];
+    int32 *addr_aligned1, *addr_aligned2;
+
+    addr_aligned1 = (int32*)((intptr_t)addr & ~3);
+
+    if ((intptr_t)addr % 4 != 3) {
+        *(int32*)bytes = *addr_aligned1;
+        *(int16*)(bytes + ((intptr_t)addr % 4)) = imm16;
+        memcpy(addr_aligned1, bytes, 4);
+    }
+    else {
+        addr_aligned2 = (int32*)(((intptr_t)addr + 3) & ~3);
+        *(int32*)bytes = *addr_aligned1;
+        *(int32*)(bytes + 4) = *addr_aligned2;
+        *(int16*)(bytes + 3) = imm16;
+        memcpy(addr_aligned1, bytes, 8);
+    }
+}
+
+static union {
+    int a;
+    char b;
+} __ue = { .a = 1 };
+
+#define is_little_endian() (__ue.b == 1)
+
+typedef union {
+    struct l32r_le {
+        int8 other;
+        int16 imm16;
+    } __packed l;
+
+    struct l32r_be {
+        int16 imm16;
+        int8 other;
+    } __packed b;
+} l32r_insn_t;
+
 bool
 apply_relocation(AOTModule *module,
                 uint8 *target_section_addr, uint32 target_section_size,
@ -48,7 +149,73 @@ apply_relocation(AOTModule *module,
                 char *error_buf, uint32 error_buf_size)
 {
    switch (reloc_type) {
-        /* TODO: implement relocation for xtensa */
+        case R_XTENSA_32:
+        {
+            uint8 *insn_addr = target_section_addr + reloc_offset;
+            int32 initial_addend;
+            /* (S + A) */
+            if ((intptr_t)insn_addr & 3) {
+                set_error_buf(error_buf, error_buf_size,
+                              "AOT module load failed: "
+                              "instruction address unaligned.");
+                return false;
+            }
+            CHECK_RELOC_OFFSET(4);
+            initial_addend = *(int32*)insn_addr;
+            *(uint8**)insn_addr
+                = (uint8*)symbol_addr + initial_addend + reloc_addend;
+            break;
+        }
+
+        case R_XTENSA_SLOT0_OP:
+        {
+            uint8 *insn_addr = target_section_addr + reloc_offset;
+            /* Currently only l32r instruction generates R_XTENSA_SLOT0_OP relocation */
+            l32r_insn_t *l32r_insn = (l32r_insn_t *)insn_addr;
+            uint8 *reloc_addr;
+            int32 relative_offset/*, initial_addend */;
+            int16 imm16;
+
+            CHECK_RELOC_OFFSET(3); /* size of l32r instruction */
+
+            /*
+            imm16 = is_little_endian() ?
+                    l32r_insn->l.imm16 : l32r_insn->b.imm16;
+            initial_addend = (int32)imm16 << 2;
+            */
+
+            reloc_addr = (uint8*)symbol_addr + reloc_addend;
+
+            if ((intptr_t)reloc_addr & 3) {
+                set_error_buf(error_buf, error_buf_size,
+                              "AOT module load failed: "
+                              "relocation address unaligned.");
+                return false;
+            }
+
+            relative_offset = (int32)
+                              ((intptr_t)reloc_addr -
+                               (((intptr_t)insn_addr + 3) & ~(intptr_t)3));
+            /* relative_offset += initial_addend; */
+
+            /* check relative offset boundary */
+            if (relative_offset < -256 * BH_KB || relative_offset > -4) {
+                set_error_buf(error_buf, error_buf_size,
+                              "AOT module load failed: "
+                              "target address out of range.");
+                return false;
+            }
+
+            imm16 = (int16)(relative_offset >> 2);
+
+            /* write back the imm16 to the l32r instruction */
+            if (is_little_endian())
+                put_imm16_to_addr(imm16, &l32r_insn->l.imm16);
+            else
+                put_imm16_to_addr(imm16, &l32r_insn->b.imm16);
+
+            break;
+        }

        default:
            if (error_buf != NULL)
--- a/core/iwasm/compilation/aot_emit_aot_file.c
+++ b/core/iwasm/compilation/aot_emit_aot_file.c
@ -49,6 +49,10 @@ typedef struct AOTObjectData {
    void *text;
    uint32 text_size;

+    /* literal data and size */
+    void *literal;
+    uint32 literal_size;
+
    AOTObjectDataSection *data_sections;
    uint32 data_sections_count;

@ -379,7 +383,7 @@ get_init_data_section_size(AOTCompData *comp_data, AOTObjectData *obj_data)
 static uint32
 get_text_section_size(AOTObjectData *obj_data)
 {
-    return obj_data->text_size;
+    return (sizeof(uint32) + obj_data->literal_size + obj_data->text_size + 3) & ~3;
 }

 static uint32
@ -1118,13 +1122,20 @@ aot_emit_text_section(uint8 *buf, uint8 *buf_end, uint32 *p_offset,
 {
    uint32 section_size = get_text_section_size(obj_data);
    uint32 offset = *p_offset;
+    uint8 placeholder = 0;

    *p_offset = offset = align_uint(offset, 4);

    EMIT_U32(AOT_SECTION_TYPE_TEXT);
    EMIT_U32(section_size);
+    EMIT_U32(obj_data->literal_size);
+    if (obj_data->literal_size > 0)
+        EMIT_BUF(obj_data->literal, obj_data->literal_size);
    EMIT_BUF(obj_data->text, obj_data->text_size);

+    while (offset & 3)
+        EMIT_BUF(&placeholder, 1);
+
    if (offset - *p_offset != section_size + sizeof(uint32) * 2) {
        aot_set_last_error("emit text section failed.");
        return false;
@ -1449,6 +1460,29 @@ aot_resolve_text(AOTObjectData *obj_data)
    return true;
 }

+static bool
+aot_resolve_literal(AOTObjectData *obj_data)
+{
+    LLVMSectionIteratorRef sec_itr;
+    char *name;
+
+    if (!(sec_itr = LLVMObjectFileCopySectionIterator(obj_data->binary))) {
+        aot_set_last_error("llvm get section iterator failed.");
+        return false;
+    }
+    while (!LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary, sec_itr)) {
+        if ((name = (char *)LLVMGetSectionName(sec_itr)) && !strcmp(name, ".literal")) {
+            obj_data->literal = (char *)LLVMGetSectionContents(sec_itr);
+            obj_data->literal_size = (uint32)LLVMGetSectionSize(sec_itr);
+            break;
+        }
+        LLVMMoveToNextSection(sec_itr);
+    }
+    LLVMDisposeSectionIterator(sec_itr);
+
+    return true;
+}
+
 static bool
 is_data_section(char *section_name)
 {
@ -1701,6 +1735,7 @@ is_relocation_section(char *section_name)
 {
    return (!strcmp(section_name, ".rela.text")
            || !strcmp(section_name, ".rel.text")
+            || !strcmp(section_name, ".rela.literal")
            || !strcmp(section_name, ".rela.data")
            || !strcmp(section_name, ".rel.data")
            || !strcmp(section_name, ".rela.rodata")
@ -1873,6 +1908,7 @@ aot_obj_data_create(AOTCompContext *comp_ctx)
    /* resolve target info/text/relocations/functions */
    if (!aot_resolve_target_info(comp_ctx, obj_data)
        || !aot_resolve_text(obj_data)
+        || !aot_resolve_literal(obj_data)
        || !aot_resolve_object_data_sections(obj_data)
        || !aot_resolve_object_relocation_groups(obj_data)
        || !aot_resolve_functions(comp_ctx, obj_data))
--- a/core/iwasm/compilation/aot_emit_numberic.c
+++ b/core/iwasm/compilation/aot_emit_numberic.c
@ -858,18 +858,63 @@ fail:
 }

 static bool
-is_targeting_soft_float(LLVMTargetMachineRef target_machine)
+is_target_arm(AOTCompContext *comp_ctx)
+{
+    return !strncmp(comp_ctx->target_arch, "arm", 3) ||
+           !strncmp(comp_ctx->target_arch, "thumb", 5);
+}
+
+static bool
+is_target_x86(AOTCompContext *comp_ctx)
+{
+    return !strncmp(comp_ctx->target_arch, "x86_64", 6) ||
+           !strncmp(comp_ctx->target_arch, "i386", 4);
+}
+
+static bool
+is_target_xtensa(AOTCompContext *comp_ctx)
+{
+    return !strncmp(comp_ctx->target_arch, "xtensa", 6);
+}
+
+static bool
+is_target_mips(AOTCompContext *comp_ctx)
+{
+    return !strncmp(comp_ctx->target_arch, "mips", 4);
+}
+
+static bool
+is_targeting_soft_float(AOTCompContext *comp_ctx, bool is_f32)
 {
    bool ret = false;
    char *feature_string;

    if (!(feature_string =
-                LLVMGetTargetMachineFeatureString(target_machine))) {
+                LLVMGetTargetMachineFeatureString(comp_ctx->target_machine))) {
        aot_set_last_error("llvm get target machine feature string fail.");
        return false;
    }

-    ret = strstr(feature_string, "+soft-float") ? true : false;
+    /* Note:
+     * LLVM CodeGen uses FPU Coprocessor registers by default,
+     * so user must specify '--cpu-features=+soft-float' to wamrc if the target
+     * doesn't have or enable FPU on arm, x86 or mips. */
+    if (is_target_arm(comp_ctx) ||
+        is_target_x86(comp_ctx) ||
+        is_target_mips(comp_ctx))
+        ret = strstr(feature_string, "+soft-float") ? true : false;
+    else if (is_target_xtensa(comp_ctx))
+        /* Note:
+         * 1. The Floating-Point Coprocessor Option of xtensa only support
+         * single-precision floating-point operations, so must use soft-float
+         * for f64(i.e. double).
+         * 2. LLVM CodeGen uses Floating-Point Coprocessor registers by default,
+         * so user must specify '--cpu-features=-fp' to wamrc if the target
+         * doesn't have or enable Floating-Point Coprocessor Option on xtensa. */
+        ret = (!is_f32 || strstr(feature_string, "-fp")) ? true : false;
+    else
+        ret = true;
+
    LLVMDisposeMessage(feature_string);
    return ret;
 }
@ -880,7 +925,7 @@ compile_op_float_arithmetic(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 {
    switch (arith_op) {
        case FLOAT_ADD:
-            if (is_targeting_soft_float(comp_ctx->target_machine))
+            if (is_targeting_soft_float(comp_ctx, is_f32))
                DEF_FP_BINARY_OP(LLVMBuildFAdd(comp_ctx->builder, left, right, "fadd"),
                                 "llvm build fadd fail.");
            else
@ -897,7 +942,7 @@ compile_op_float_arithmetic(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
                                 NULL);
            return true;
        case FLOAT_SUB:
-            if (is_targeting_soft_float(comp_ctx->target_machine))
+            if (is_targeting_soft_float(comp_ctx, is_f32))
                DEF_FP_BINARY_OP(LLVMBuildFSub(comp_ctx->builder, left, right, "fsub"),
                                 "llvm build fsub fail.");
            else
@ -914,7 +959,7 @@ compile_op_float_arithmetic(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
                                 NULL);
            return true;
        case FLOAT_MUL:
-            if (is_targeting_soft_float(comp_ctx->target_machine))
+            if (is_targeting_soft_float(comp_ctx, is_f32))
                DEF_FP_BINARY_OP(LLVMBuildFMul(comp_ctx->builder, left, right, "fmul"),
                                 "llvm build fmul fail.");
            else
@ -931,7 +976,7 @@ compile_op_float_arithmetic(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
                                 NULL);
            return true;
        case FLOAT_DIV:
-            if (is_targeting_soft_float(comp_ctx->target_machine))
+            if (is_targeting_soft_float(comp_ctx, is_f32))
                DEF_FP_BINARY_OP(LLVMBuildFDiv(comp_ctx->builder, left, right, "fdiv"),
                                 "llvm build fdiv fail.");
            else
@ -1050,7 +1095,7 @@ compile_op_float_math(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
                            NULL);
            return true;
        case FLOAT_SQRT:
-            if (is_targeting_soft_float(comp_ctx->target_machine))
+            if (is_targeting_soft_float(comp_ctx, is_f32))
                DEF_FP_UNARY_OP(call_llvm_float_math_intrinsic(comp_ctx,
                                                   is_f32 ? "llvm.sqrt.f32" :
                                                            "llvm.sqrt.f64",
--- a/core/iwasm/compilation/aot_llvm.c
+++ b/core/iwasm/compilation/aot_llvm.c
@ -740,6 +740,7 @@ typedef struct ArchItem {
 static ArchItem valid_archs[] = {
    { "x86_64", false },
    { "i386", false },
+    { "xtensa", false},
    { "mips", true },
    { "aarch64v8", false },
    { "aarch64v8.1", false },
--- a/core/shared/platform/zephyr/platform_internal.h
+++ b/core/shared/platform/zephyr/platform_internal.h
@ -77,4 +77,24 @@ unsigned long long int strtoull(const char *nptr, char **endptr, int base);
 double strtod(const char *nptr, char **endptr);
 float strtof(const char *nptr, char **endptr);

+/**
+ * @brief Allocate executable memroy
+ *
+ * @param size size of the memory to be allocated
+ *
+ * @return the address of the allocated memory if not NULL
+ */
+typedef void* (*exec_mem_alloc_func_t)(unsigned int size);
+
+/**
+ * @brief Release executable memroy
+ *
+ * @param the address of the executable memory to be released
+ */
+typedef void (*exec_mem_free_func_t)(void *addr);
+
+/* Below function are called by external project to set related function pointers that
+ * will be used to malloc/free executable memory. Otherwise default mechanise will be used. */
+void set_exec_mem_alloc_func(exec_mem_alloc_func_t alloc_func, exec_mem_free_func_t free_func);
+
 #endif
--- a/core/shared/platform/zephyr/zephyr_platform.c
+++ b/core/shared/platform/zephyr/zephyr_platform.c
@ -6,6 +6,10 @@
 #include "platform_api_vmcore.h"
 #include "platform_api_extension.h"

+/* function pointers for executable memory management */
+static exec_mem_alloc_func_t exec_mem_alloc_func = NULL;
+static exec_mem_free_func_t exec_mem_free_func = NULL;
+
 #if WASM_ENABLE_AOT != 0
 #ifdef CONFIG_ARM_MPU
 /**
@ -108,13 +112,19 @@ os_vprintf(const char *fmt, va_list ap)
 void *
 os_mmap(void *hint, unsigned int size, int prot, int flags)
 {
-    return BH_MALLOC(size);
+    if (exec_mem_alloc_func)
+        return exec_mem_alloc_func(size);
+    else
+        return BH_MALLOC(size);
 }

 void
 os_munmap(void *addr, uint32 size)
 {
-    return BH_FREE(addr);
+    if (exec_mem_free_func)
+        exec_mem_free_func(addr);
+    else
+        BH_FREE(addr);
 }

 int
@ -133,3 +143,11 @@ os_dcache_flush()
    irq_unlock(key);
 #endif
 }
+
+void set_exec_mem_alloc_func(exec_mem_alloc_func_t alloc_func,
+                             exec_mem_free_func_t free_func)
+{
+    exec_mem_alloc_func = alloc_func;
+    exec_mem_free_func = free_func;
+}
+