Merge pull request #2426 from bytecodealliance/main

Merge branch main into dev/wasi-libc-windows
2023-08-06 08:50:21 +08:00
parent 5463ea4a20 ebd9466d57
commit a07d8160f9
66 changed files with 2278 additions and 1401 deletions
--- a/core/shared/platform/esp-idf/espidf_memmap.c
+++ b/core/shared/platform/esp-idf/espidf_memmap.c
@ -5,16 +5,34 @@

 #include "platform_api_vmcore.h"
 #include "platform_api_extension.h"
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+#include "soc/mmu.h"
+#include "rom/cache.h"
+
+#define MEM_DUAL_BUS_OFFSET (IRAM0_CACHE_ADDRESS_LOW - DRAM0_CACHE_ADDRESS_LOW)
+
+#define in_ibus_ext(addr)                      \
+    (((uint32)addr >= IRAM0_CACHE_ADDRESS_LOW) \
+     && ((uint32)addr < IRAM0_CACHE_ADDRESS_HIGH))
+
+static portMUX_TYPE s_spinlock = portMUX_INITIALIZER_UNLOCKED;
+#endif

 void *
 os_mmap(void *hint, size_t size, int prot, int flags)
 {
    if (prot & MMAP_PROT_EXEC) {
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+        uint32_t mem_caps = MALLOC_CAP_SPIRAM;
+#else
+        uint32_t mem_caps = MALLOC_CAP_EXEC;
+#endif
+
        // Memory allocation with MALLOC_CAP_EXEC will return 4-byte aligned
        // Reserve extra 4 byte to fixup alignment and size for the pointer to
        // the originally allocated address
        void *buf_origin =
-            heap_caps_malloc(size + 4 + sizeof(uintptr_t), MALLOC_CAP_EXEC);
+            heap_caps_malloc(size + 4 + sizeof(uintptr_t), mem_caps);
        if (!buf_origin) {
            return NULL;
        }
@ -25,19 +43,35 @@ os_mmap(void *hint, size_t size, int prot, int flags)

        uintptr_t *addr_field = buf_fixed - sizeof(uintptr_t);
        *addr_field = (uintptr_t)buf_origin;
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+        return buf_fixed + MEM_DUAL_BUS_OFFSET;
+#else
        return buf_fixed;
+#endif
    }
    else {
-        return os_malloc(size);
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+        uint32_t mem_caps = MALLOC_CAP_SPIRAM;
+#else
+        uint32_t mem_caps = MALLOC_CAP_8BIT;
+#endif
+        return heap_caps_malloc(size, mem_caps);
    }
 }

 void
 os_munmap(void *addr, size_t size)
 {
+    char *ptr = (char *)addr;
+
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+    if (in_ibus_ext(ptr)) {
+        ptr -= MEM_DUAL_BUS_OFFSET;
+    }
+#endif
    // We don't need special handling of the executable allocations
    // here, free() of esp-idf handles it properly
-    return os_free(addr);
+    return os_free(ptr);
 }

 int
@ -47,5 +81,34 @@ os_mprotect(void *addr, size_t size, int prot)
 }

 void
-os_dcache_flush()
-{}
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+    IRAM_ATTR
+#endif
+    os_dcache_flush()
+{
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+    uint32_t preload;
+    extern void Cache_WriteBack_All(void);
+
+    portENTER_CRITICAL(&s_spinlock);
+
+    Cache_WriteBack_All();
+    preload = Cache_Disable_ICache();
+    Cache_Enable_ICache(preload);
+
+    portEXIT_CRITICAL(&s_spinlock);
+#endif
+}
+
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+void *
+os_get_dbus_mirror(void *ibus)
+{
+    if (in_ibus_ext(ibus)) {
+        return (void *)((char *)ibus - MEM_DUAL_BUS_OFFSET);
+    }
+    else {
+        return ibus;
+    }
+}
+#endif
--- a/core/shared/platform/esp-idf/shared_platform.cmake
+++ b/core/shared/platform/esp-idf/shared_platform.cmake
@ -11,3 +11,9 @@ include_directories(${PLATFORM_SHARED_DIR}/../include)
 file (GLOB_RECURSE source_all ${PLATFORM_SHARED_DIR}/*.c)

 set (PLATFORM_SHARED_SOURCE ${source_all} ${PLATFORM_COMMON_MATH_SOURCE})
+
+# If enable PSRAM of ESP32-S3, it had better to put AOT into PSRAM, so that
+# users can use SRAM to for Wi-Fi/BLE and peripheral driver.
+if(CONFIG_ESP32S3_SPIRAM_SUPPORT)
+    add_definitions(-DWASM_MEM_DUAL_BUS_MIRROR=1)
+endif()
--- a/core/shared/platform/include/platform_api_vmcore.h
+++ b/core/shared/platform/include/platform_api_vmcore.h
@ -129,6 +129,11 @@ os_munmap(void *addr, size_t size);
 int
 os_mprotect(void *addr, size_t size, int prot);

+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+void *
+os_get_dbus_mirror(void *ibus);
+#endif
+
 /**
 * Flush cpu data cache, in some CPUs, after applying relocation to the
 * AOT code, the code may haven't been written back to the cpu data cache,
--- a/core/shared/platform/nuttx/nuttx_platform.c
+++ b/core/shared/platform/nuttx/nuttx_platform.c
@ -10,6 +10,46 @@
 #include <nuttx/arch.h>
 #endif

+#if defined(CONFIG_ARCH_CHIP_ESP32S3)
+/*
+ * TODO: Move these methods below the operating system level
+ */
+#define MEM_DUAL_BUS_OFFSET (0x42000000 - 0x3C000000)
+#define IRAM0_CACHE_ADDRESS_LOW 0x42000000
+#define IRAM0_CACHE_ADDRESS_HIGH 0x44000000
+#define IRAM_ATTR locate_data(".iram1")
+
+#define in_ibus_ext(addr)                      \
+    (((uint32)addr >= IRAM0_CACHE_ADDRESS_LOW) \
+     && ((uint32)addr < IRAM0_CACHE_ADDRESS_HIGH))
+void IRAM_ATTR
+bus_sync(void)
+{
+    extern void cache_writeback_all(void);
+    extern uint32_t Cache_Disable_ICache(void);
+    extern void Cache_Enable_ICache(uint32_t autoload);
+
+    irqstate_t flags;
+    uint32_t preload;
+
+    flags = enter_critical_section();
+
+    cache_writeback_all();
+    preload = Cache_Disable_ICache();
+    Cache_Enable_ICache(preload);
+
+    leave_critical_section(flags);
+}
+#else
+#define MEM_DUAL_BUS_OFFSET (0)
+#define IRAM0_CACHE_ADDRESS_LOW (0)
+#define IRAM0_CACHE_ADDRESS_HIGH (0)
+#define in_ibus_ext(addr) (0)
+static void
+bus_sync(void)
+{}
+#endif
+
 int
 bh_platform_init()
 {
@ -47,6 +87,10 @@ os_dumps_proc_mem_info(char *out, unsigned int size)
 void *
 os_mmap(void *hint, size_t size, int prot, int flags)
 {
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+    void *i_addr, *d_addr;
+#endif
+
 #if defined(CONFIG_ARCH_USE_TEXT_HEAP)
    if ((prot & MMAP_PROT_EXEC) != 0) {
        return up_textheap_memalign(sizeof(void *), size);
@ -55,6 +99,17 @@ os_mmap(void *hint, size_t size, int prot, int flags)

    if ((uint64)size >= UINT32_MAX)
        return NULL;
+
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+    if ((prot & MMAP_PROT_EXEC) != 0) {
+        d_addr = malloc((uint32)size);
+        if (d_addr == NULL) {
+            return NULL;
+        }
+        i_addr = (void *)((uint8 *)d_addr + MEM_DUAL_BUS_OFFSET);
+        return in_ibus_ext(i_addr) ? i_addr : d_addr;
+    }
+#endif
    return malloc((uint32)size);
 }

@ -67,7 +122,14 @@ os_munmap(void *addr, size_t size)
        return;
    }
 #endif
-    return free(addr);
+
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+    if (in_ibus_ext(addr)) {
+        free((void *)((uint8 *)addr - MEM_DUAL_BUS_OFFSET));
+        return;
+    }
+#endif
+    free(addr);
 }

 int
@ -78,7 +140,22 @@ os_mprotect(void *addr, size_t size, int prot)

 void
 os_dcache_flush()
-{}
+{
+    bus_sync();
+}
+
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+void *
+os_get_dbus_mirror(void *ibus)
+{
+    if (in_ibus_ext(ibus)) {
+        return (void *)((uint8 *)ibus - MEM_DUAL_BUS_OFFSET);
+    }
+    else {
+        return ibus;
+    }
+}
+#endif

 /* If AT_FDCWD is provided, maybe we have openat family */
 #if !defined(AT_FDCWD)
--- a/core/shared/utils/bh_atomic.h
+++ b/core/shared/utils/bh_atomic.h
@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2023 Amazon Inc.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _BH_ATOMIC_H
+#define _BH_ATOMIC_H
+
+#include "gnuc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Why don't we use C11 stdatomics here?
+ *
+ * Unlike C11 stdatomics,
+ *
+ * - bh_atomic_xxx_t is guaranteed to have the same size as the base type.
+ *   Thus more friendly to our AOT conventions.
+ *
+ * - It's available for C++.
+ *   Although C++23 will have C-compatible stdatomics.h, it isn't widely
+ *   available yet.
+ */
+
+/*
+ * Note about BH_ATOMIC_32_IS_ATOMIC
+ *
+ * If BH_ATOMIC_32_IS_ATOMIC == 0, BH_ATOMIC_xxx operations defined below
+ * are not really atomic and require an external lock.
+ *
+ * Expected usage is:
+ *
+ *     bh_atomic_32_t var = 0;
+ *     uint32 old;
+ * #if BH_ATOMIC_32_IS_ATOMIC == 0
+ *     lock(&some_lock);
+ * #endif
+ *     old = BH_ATOMIC_32_FETCH_AND(var, 1);
+ * #if BH_ATOMIC_32_IS_ATOMIC == 0
+ *     unlock(&some_lock);
+ * #endif
+ */
+
+typedef uint32 bh_atomic_32_t;
+
+#if defined(__GNUC_PREREQ)
+#if __GNUC_PREREQ(4, 7)
+#define CLANG_GCC_HAS_ATOMIC_BUILTIN
+#endif
+#elif defined(__clang__)
+#if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 0)
+#define CLANG_GCC_HAS_ATOMIC_BUILTIN
+#endif
+#endif
+
+#if defined(CLANG_GCC_HAS_ATOMIC_BUILTIN)
+#define BH_ATOMIC_32_IS_ATOMIC 1
+#define BH_ATOMIC_32_LOAD(v) __atomic_load_n(&(v), __ATOMIC_SEQ_CST)
+#define BH_ATOMIC_32_FETCH_OR(v, val) \
+    __atomic_fetch_or(&(v), (val), __ATOMIC_SEQ_CST)
+#define BH_ATOMIC_32_FETCH_AND(v, val) \
+    __atomic_fetch_and(&(v), (val), __ATOMIC_SEQ_CST)
+#define BH_ATOMIC_32_FETCH_ADD(v, val) \
+    __atomic_fetch_add(&(v), (val), __ATOMIC_SEQ_CST)
+#define BH_ATOMIC_32_FETCH_SUB(v, val) \
+    __atomic_fetch_sub(&(v), (val), __ATOMIC_SEQ_CST)
+#else /* else of defined(CLANG_GCC_HAS_ATOMIC_BUILTIN) */
+#define BH_ATOMIC_32_LOAD(v) (v)
+#define BH_ATOMIC_32_FETCH_OR(v, val) nonatomic_32_fetch_or(&(v), val)
+#define BH_ATOMIC_32_FETCH_AND(v, val) nonatomic_32_fetch_and(&(v), val)
+#define BH_ATOMIC_32_FETCH_ADD(v, val) nonatomic_32_fetch_add(&(v), val)
+#define BH_ATOMIC_32_FETCH_SUB(v, val) nonatomic_32_fetch_sub(&(v), val)
+
+static inline uint32
+nonatomic_32_fetch_or(bh_atomic_32_t *p, uint32 val)
+{
+    uint32 old = *p;
+    *p |= val;
+    return old;
+}
+
+static inline uint32
+nonatomic_32_fetch_and(bh_atomic_32_t *p, uint32 val)
+{
+    uint32 old = *p;
+    *p &= val;
+    return old;
+}
+
+static inline uint32
+nonatomic_32_fetch_add(bh_atomic_32_t *p, uint32 val)
+{
+    uint32 old = *p;
+    *p += val;
+    return old;
+}
+
+static inline uint32
+nonatomic_32_fetch_sub(bh_atomic_32_t *p, uint32 val)
+{
+    uint32 old = *p;
+    *p -= val;
+    return old;
+}
+
+/* The flag can be defined by the user if the platform
+   supports atomic access to uint32 aligned memory. */
+#ifdef WASM_UINT32_IS_ATOMIC
+#define BH_ATOMIC_32_IS_ATOMIC 1
+#else /* else of WASM_UINT32_IS_ATOMIC */
+#define BH_ATOMIC_32_IS_ATOMIC 0
+#endif /* WASM_UINT32_IS_ATOMIC */
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* end of _BH_ATOMIC_H */
--- a/core/shared/utils/gnuc.h
+++ b/core/shared/utils/gnuc.h
@ -0,0 +1,14 @@
+/*
+ * Copyright (C) 2023 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#if !defined(__GNUC_PREREQ) && (defined(__GNUC__) || defined(__GNUG__)) \
+    && !defined(__clang__) && defined(__GNUC_MINOR__)
+/* Depending on the platform the macro is defined in sys/features.h or
+   features.h Given the macro is simple, we re-implement it here instead of
+   dealing with two different paths.
+ */
+#define __GNUC_PREREQ(maj, min) \
+    ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+#endif