Merge branch main into dev/wasi_threads

2023-02-17 08:46:12 +08:00
parent ee1871d3f8 50650e4634
commit e170c355a2
163 changed files with 7153 additions and 1857 deletions
--- a/core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c
+++ b/core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c
@ -565,6 +565,7 @@ pthread_create_wrapper(wasm_exec_env_t exec_env,
 #if WASM_ENABLE_LIBC_WASI != 0
    WASIContext *wasi_ctx;
 #endif
+    CApiFuncImport **new_c_api_func_imports = NULL;

    bh_assert(module);
    bh_assert(module_inst);
@ -597,6 +598,46 @@ pthread_create_wrapper(wasm_exec_env_t exec_env,
        wasm_runtime_set_wasi_ctx(new_module_inst, wasi_ctx);
 #endif

+    /* workaround about passing instantiate-linking information */
+    {
+        CApiFuncImport *c_api_func_imports;
+        uint32 import_func_count = 0;
+        uint32 size_in_bytes = 0;
+
+#if WASM_ENABLE_INTERP != 0
+        if (module_inst->module_type == Wasm_Module_Bytecode) {
+            new_c_api_func_imports = &(
+                ((WASMModuleInstance *)new_module_inst)->e->c_api_func_imports);
+            c_api_func_imports =
+                ((WASMModuleInstance *)module_inst)->e->c_api_func_imports;
+            import_func_count = ((WASMModule *)module)->import_function_count;
+        }
+#endif
+#if WASM_ENABLE_AOT != 0
+        if (module_inst->module_type == Wasm_Module_AoT) {
+            AOTModuleInstanceExtra *e =
+                (AOTModuleInstanceExtra *)((AOTModuleInstance *)new_module_inst)
+                    ->e;
+            new_c_api_func_imports = &(e->c_api_func_imports);
+
+            e = (AOTModuleInstanceExtra *)((AOTModuleInstance *)module_inst)->e;
+            c_api_func_imports = e->c_api_func_imports;
+
+            import_func_count = ((AOTModule *)module)->import_func_count;
+        }
+#endif
+
+        if (import_func_count != 0 && c_api_func_imports) {
+            size_in_bytes = sizeof(CApiFuncImport *) * import_func_count;
+            *new_c_api_func_imports = wasm_runtime_malloc(size_in_bytes);
+            if (!(*new_c_api_func_imports))
+                goto fail;
+
+            bh_memcpy_s(*new_c_api_func_imports, size_in_bytes,
+                        c_api_func_imports, size_in_bytes);
+        }
+    }
+
    if (!(info_node = wasm_runtime_malloc(sizeof(ThreadInfoNode))))
        goto fail;

--- a/core/iwasm/libraries/libc-wasi/libc_wasi_wrapper.c
+++ b/core/iwasm/libraries/libc-wasi/libc_wasi_wrapper.c
@ -63,6 +63,12 @@ typedef struct WASIContext {
 wasi_ctx_t
 wasm_runtime_get_wasi_ctx(wasm_module_inst_t module_inst);

+static inline size_t
+min(size_t a, size_t b)
+{
+    return a > b ? b : a;
+}
+
 static inline struct fd_table *
 wasi_ctx_get_curfds(wasm_module_inst_t module_inst, wasi_ctx_t wasi_ctx)
 {
@ -951,6 +957,97 @@ wasi_path_remove_directory(wasm_exec_env_t exec_env, wasi_fd_t fd,
    return wasmtime_ssp_path_remove_directory(curfds, fd, path, path_len);
 }

+#if WASM_ENABLE_THREAD_MGR != 0
+static __wasi_timestamp_t
+get_timeout_for_poll_oneoff(const wasi_subscription_t *in,
+                            uint32 nsubscriptions)
+{
+    __wasi_timestamp_t timeout = (__wasi_timestamp_t)-1;
+    uint32 i = 0;
+
+    for (i = 0; i < nsubscriptions; ++i) {
+        const __wasi_subscription_t *s = &in[i];
+        if (s->u.type == __WASI_EVENTTYPE_CLOCK
+            && (s->u.u.clock.flags & __WASI_SUBSCRIPTION_CLOCK_ABSTIME) == 0) {
+            timeout = min(timeout, s->u.u.clock.timeout);
+        }
+    }
+    return timeout;
+}
+
+static void
+update_clock_subscription_data(wasi_subscription_t *in, uint32 nsubscriptions,
+                               const wasi_timestamp_t new_timeout)
+{
+    uint32 i = 0;
+    for (i = 0; i < nsubscriptions; ++i) {
+        __wasi_subscription_t *s = &in[i];
+        if (s->u.type == __WASI_EVENTTYPE_CLOCK) {
+            s->u.u.clock.timeout = new_timeout;
+        }
+    }
+}
+
+static wasi_errno_t
+execute_interruptible_poll_oneoff(wasm_module_inst_t module_inst,
+#if !defined(WASMTIME_SSP_STATIC_CURFDS)
+                                  struct fd_table *curfds,
+#endif
+                                  const __wasi_subscription_t *in,
+                                  __wasi_event_t *out, size_t nsubscriptions,
+                                  size_t *nevents)
+{
+    if (nsubscriptions == 0) {
+        *nevents = 0;
+        return __WASI_ESUCCESS;
+    }
+
+    wasi_errno_t err;
+    __wasi_timestamp_t elapsed = 0;
+
+    const __wasi_timestamp_t timeout = get_timeout_for_poll_oneoff(
+                                 in, nsubscriptions),
+                             time_quant = 1e9;
+    const uint64 size_to_copy =
+        nsubscriptions * (uint64)sizeof(wasi_subscription_t);
+    __wasi_subscription_t *in_copy = NULL;
+
+    if (size_to_copy >= UINT32_MAX
+        || !(in_copy = (__wasi_subscription_t *)wasm_runtime_malloc(
+                 (uint32)size_to_copy))) {
+        return __WASI_ENOMEM;
+    }
+
+    bh_memcpy_s(in_copy, size_to_copy, in, size_to_copy);
+
+    while (timeout == (__wasi_timestamp_t)-1 || elapsed <= timeout) {
+        elapsed += time_quant;
+
+        /* update timeout for clock subscription events */
+        update_clock_subscription_data(in_copy, nsubscriptions,
+                                       min(time_quant, timeout - elapsed));
+        err = wasmtime_ssp_poll_oneoff(curfds, in_copy, out, nsubscriptions,
+                                       nevents);
+        if (err) {
+            wasm_runtime_free(in_copy);
+            return err;
+        }
+
+        if (wasm_runtime_get_exception(module_inst) || *nevents > 0) {
+            wasm_runtime_free(in_copy);
+
+            if (*nevents) {
+                return __WASI_ESUCCESS;
+            }
+            return EINTR;
+        }
+    }
+
+    wasm_runtime_free(in_copy);
+    return __WASI_ESUCCESS;
+}
+#endif
+
 static wasi_errno_t
 wasi_poll_oneoff(wasm_exec_env_t exec_env, const wasi_subscription_t *in,
                 wasi_event_t *out, uint32 nsubscriptions, uint32 *nevents_app)
@ -958,7 +1055,7 @@ wasi_poll_oneoff(wasm_exec_env_t exec_env, const wasi_subscription_t *in,
    wasm_module_inst_t module_inst = get_module_inst(exec_env);
    wasi_ctx_t wasi_ctx = get_wasi_ctx(module_inst);
    struct fd_table *curfds = wasi_ctx_get_curfds(module_inst, wasi_ctx);
-    size_t nevents;
+    size_t nevents = 0;
    wasi_errno_t err;

    if (!wasi_ctx)
@ -969,7 +1066,12 @@ wasi_poll_oneoff(wasm_exec_env_t exec_env, const wasi_subscription_t *in,
        || !validate_native_addr(nevents_app, sizeof(uint32)))
        return (wasi_errno_t)-1;

+#if WASM_ENABLE_THREAD_MGR == 0
    err = wasmtime_ssp_poll_oneoff(curfds, in, out, nsubscriptions, &nevents);
+#else
+    err = execute_interruptible_poll_oneoff(module_inst, curfds, in, out,
+                                            nsubscriptions, &nevents);
+#endif
    if (err)
        return err;

@ -1861,12 +1963,6 @@ allocate_iovec_app_buffer(wasm_module_inst_t module_inst,
    return __WASI_ESUCCESS;
 }

-static inline size_t
-min(size_t a, size_t b)
-{
-    return a > b ? b : a;
-}
-
 static wasi_errno_t
 copy_buffer_to_iovec_app(wasm_module_inst_t module_inst, uint8 *buf_begin,
                         uint32 buf_size, iovec_app_t *data, uint32 data_len,
--- a/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/gnuc.h
+++ b/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/gnuc.h
@ -0,0 +1,14 @@
+/*
+ * Copyright (C) 2023 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#if !defined(__GNUC_PREREQ) && (defined(__GNUC__) || defined(__GNUG__)) \
+    && !defined(__clang__) && defined(__GNUC_MINOR__)
+/* Depending on the platform the macro is defined in sys/features.h or
+   features.h Given the macro is simple, we re-implement it here instead of
+   dealing with two different paths.
+ */
+#define __GNUC_PREREQ(maj, min) \
+    ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+#endif
--- a/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/numeric_limits.h
+++ b/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/numeric_limits.h
@ -1,42 +0,0 @@
-// Part of the Wasmtime Project, under the Apache License v2.0 with LLVM
-// Exceptions. See
-// https://github.com/bytecodealliance/wasmtime/blob/main/LICENSE for license
-// information.
-//
-// Significant parts of this file are derived from cloudabi-utils. See
-// https://github.com/bytecodealliance/wasmtime/blob/main/lib/wasi/sandboxed-system-primitives/src/LICENSE
-// for license information.
-//
-// The upstream file contains the following copyright notice:
-//
-// Copyright (c) 2015 Nuxi, https://nuxi.nl/
-
-#ifndef COMMON_LIMITS_H
-#define COMMON_LIMITS_H
-
-#define NUMERIC_MIN(t)                                    \
-    _Generic((t)0, char                                   \
-             : CHAR_MIN, signed char                      \
-             : SCHAR_MIN, unsigned char : 0, short        \
-             : SHRT_MIN, unsigned short : 0, int          \
-             : INT_MIN, unsigned int : 0, long            \
-             : LONG_MIN, unsigned long : 0, long long     \
-             : LLONG_MIN, unsigned long long : 0, default \
-             : (void)0)
-
-#define NUMERIC_MAX(t)                       \
-    _Generic((t)0, char                      \
-             : CHAR_MAX, signed char         \
-             : SCHAR_MAX, unsigned char      \
-             : UCHAR_MAX, short              \
-             : SHRT_MAX, unsigned short      \
-             : USHRT_MAX, int                \
-             : INT_MAX, unsigned int         \
-             : UINT_MAX, long                \
-             : LONG_MAX, unsigned long       \
-             : ULONG_MAX, long long          \
-             : LLONG_MAX, unsigned long long \
-             : ULLONG_MAX, default           \
-             : (void)0)
-
-#endif
--- a/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/posix.c
+++ b/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/posix.c
@ -15,7 +15,6 @@
 #include "bh_platform.h"
 #include "wasmtime_ssp.h"
 #include "locking.h"
-#include "numeric_limits.h"
 #include "posix.h"
 #include "random.h"
 #include "refcount.h"
@ -2257,8 +2256,7 @@ convert_timestamp(__wasi_timestamp_t in, struct timespec *out)
    in /= 1000000000;

    // Clamp to the maximum in case it would overflow our system's time_t.
-    out->tv_sec =
-        (time_t)in < NUMERIC_MAX(time_t) ? (time_t)in : NUMERIC_MAX(time_t);
+    out->tv_sec = (time_t)in < BH_TIME_T_MAX ? (time_t)in : BH_TIME_T_MAX;
 }

 // Converts the provided timestamps and flags to a set of arguments for
@ -3226,6 +3224,7 @@ wasi_ssp_sock_get_reuse_port(
 #else
    errno = ENOTSUP;
    ret = BHT_ERROR;
+    optval = 0;
 #endif /* defined(SO_REUSEPORT) */

    fd_object_release(fo);
--- a/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/refcount.h
+++ b/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/refcount.h
@ -16,6 +16,7 @@

 #include "bh_platform.h"
 #include "locking.h"
+#include "gnuc.h"

 #define PRODUCES(...) LOCKS_SHARED(__VA_ARGS__) NO_LOCK_ANALYSIS
 #define CONSUMES(...) UNLOCKS(__VA_ARGS__) NO_LOCK_ANALYSIS
@ -95,6 +96,42 @@ refcount_release(struct refcount *r)
    return old == 1;
 }

+#elif defined(__GNUC_PREREQ)
+
+#if __GNUC_PREREQ(4, 7)
+
+struct refcount {
+    unsigned int count;
+};
+
+/* Initialize the reference counter. */
+static inline void
+refcount_init(struct refcount *r, unsigned int count)
+{
+    __atomic_store_n(&r->count, count, __ATOMIC_SEQ_CST);
+}
+
+/* Increment the reference counter. */
+static inline void
+refcount_acquire(struct refcount *r)
+{
+    __atomic_fetch_add(&r->count, 1, __ATOMIC_ACQUIRE);
+}
+
+/* Decrement the reference counter, returning whether the reference
+   dropped to zero. */
+static inline bool
+refcount_release(struct refcount *r)
+{
+    int old = (int)__atomic_fetch_sub(&r->count, 1, __ATOMIC_RELEASE);
+    bh_assert(old != 0 && "Reference count becoming negative");
+    return old == 1;
+}
+
+#else /* else of __GNUC_PREREQ (4.7) */
+#error "Reference counter isn't implemented"
+#endif /* end of __GNUC_PREREQ (4.7) */
+
 #else /* else of CONFIG_HAS_STD_ATOMIC */
 #error "Reference counter isn't implemented"
 #endif /* end of CONFIG_HAS_STD_ATOMIC */
--- a/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/ssp_config.h
+++ b/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/ssp_config.h
@ -14,6 +14,7 @@
 #ifndef SSP_CONFIG_H
 #define SSP_CONFIG_H

+#include "gnuc.h"
 #include <stdlib.h>

 #if defined(__FreeBSD__) || defined(__APPLE__) \
@ -107,10 +108,31 @@
 #endif

 #if !defined(BH_PLATFORM_LINUX_SGX)
+/* Clang's __GNUC_PREREQ macro has a different meaning than GCC one,
+so we have to handle this case specially */
+#if defined(__clang__)
+/* Clang provides stdatomic.h since 3.6.0
+See https://releases.llvm.org/3.6.0/tools/clang/docs/ReleaseNotes.html */
+#if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 6)
 #define CONFIG_HAS_STD_ATOMIC 1
 #else
 #define CONFIG_HAS_STD_ATOMIC 0
 #endif
+#elif defined(__GNUC_PREREQ)
+/* Even though older versions of GCC support C11, atomics were
+not implemented until 4.9. See
+https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58016 */
+#if __GNUC_PREREQ(4, 9)
+#define CONFIG_HAS_STD_ATOMIC 1
+#else /* else of __GNUC_PREREQ(4, 9) */
+#define CONFIG_HAS_STD_ATOMIC 0
+#endif /* end of __GNUC_PREREQ(4, 9) */
+#else  /* else of defined(__GNUC_PREREQ) */
+#define CONFIG_HAS_STD_ATOMIC 1
+#endif /* end of defined(__GNUC_PREREQ) */
+#else  /* else of !defined(BH_PLATFORM_LINUX_SGX) */
+#define CONFIG_HAS_STD_ATOMIC 0
+#endif /* end of !defined(BH_PLATFORM_LINUX_SGX) */

 #if !defined(__NuttX__)
 #define CONFIG_HAS_D_INO 1
--- a/core/iwasm/libraries/thread-mgr/thread_manager.c
+++ b/core/iwasm/libraries/thread-mgr/thread_manager.c
@ -76,6 +76,58 @@ traverse_list(bh_list *l, list_visitor visitor, void *user_data)
    }
 }

+/* Assumes cluster->lock is locked */
+static bool
+safe_traverse_exec_env_list(WASMCluster *cluster, list_visitor visitor,
+                            void *user_data)
+{
+    Vector proc_nodes;
+    void *node;
+    bool ret = true;
+
+    if (!bh_vector_init(&proc_nodes, cluster->exec_env_list.len, sizeof(void *),
+                        false)) {
+        ret = false;
+        goto final;
+    }
+
+    node = bh_list_first_elem(&cluster->exec_env_list);
+
+    while (node) {
+        bool already_processed = false;
+        void *proc_node;
+        for (size_t i = 0; i < bh_vector_size(&proc_nodes); i++) {
+            if (!bh_vector_get(&proc_nodes, i, &proc_node)) {
+                ret = false;
+                goto final;
+            }
+            if (proc_node == node) {
+                already_processed = true;
+                break;
+            }
+        }
+        if (already_processed) {
+            node = bh_list_elem_next(node);
+            continue;
+        }
+
+        os_mutex_unlock(&cluster->lock);
+        visitor(node, user_data);
+        os_mutex_lock(&cluster->lock);
+        if (!bh_vector_append(&proc_nodes, &node)) {
+            ret = false;
+            goto final;
+        }
+
+        node = bh_list_first_elem(&cluster->exec_env_list);
+    }
+
+final:
+    bh_vector_destroy(&proc_nodes);
+
+    return ret;
+}
+
 /* The caller must lock cluster->lock */
 static bool
 allocate_aux_stack(WASMExecEnv *exec_env, uint32 *start, uint32 *size)
@ -344,7 +396,6 @@ wasm_cluster_del_exec_env(WASMCluster *cluster, WASMExecEnv *exec_env)
        os_mutex_unlock(&cluster->debug_inst->wait_lock);
    }
 #endif
-
    if (bh_list_remove(&cluster->exec_env_list, exec_env) != 0)
        ret = false;

@ -478,7 +529,7 @@ fail4:
    /* free the allocated aux stack space */
    free_aux_stack(exec_env, aux_stack_start);
 fail3:
-    wasm_exec_env_destroy(new_exec_env);
+    wasm_exec_env_destroy_internal(new_exec_env);
 fail2:
    wasm_runtime_deinstantiate_internal(new_module_inst, true);
 fail1:
@ -616,7 +667,7 @@ fail3:
    if (alloc_aux_stack)
        free_aux_stack(exec_env, aux_stack_start);
 fail2:
-    wasm_exec_env_destroy(new_exec_env);
+    wasm_exec_env_destroy_internal(new_exec_env);
 fail1:
    os_mutex_unlock(&cluster->lock);

@ -786,16 +837,22 @@ wasm_cluster_join_thread(WASMExecEnv *exec_env, void **ret_val)
    korp_tid handle;

    os_mutex_lock(&cluster_list_lock);
+    os_mutex_lock(&exec_env->cluster->lock);
+
    if (!clusters_have_exec_env(exec_env) || exec_env->thread_is_detached) {
        /* Invalid thread, thread has exited or thread has been detached */
        if (ret_val)
            *ret_val = NULL;
+        os_mutex_unlock(&exec_env->cluster->lock);
        os_mutex_unlock(&cluster_list_lock);
        return 0;
    }
    exec_env->wait_count++;
    handle = exec_env->handle;
+
+    os_mutex_unlock(&exec_env->cluster->lock);
    os_mutex_unlock(&cluster_list_lock);
+
    return os_thread_join(handle, ret_val);
 }

@ -878,15 +935,22 @@ int32
 wasm_cluster_cancel_thread(WASMExecEnv *exec_env)
 {
    os_mutex_lock(&cluster_list_lock);
+    os_mutex_lock(&exec_env->cluster->lock);
+
+    if (!exec_env->cluster) {
+        goto final;
+    }
    if (!clusters_have_exec_env(exec_env)) {
        /* Invalid thread or the thread has exited */
-        os_mutex_unlock(&cluster_list_lock);
-        return 0;
+        goto final;
    }
-    os_mutex_unlock(&cluster_list_lock);

    set_thread_cancel_flags(exec_env);

+final:
+    os_mutex_unlock(&exec_env->cluster->lock);
+    os_mutex_unlock(&cluster_list_lock);
+
    return 0;
 }

@ -908,11 +972,9 @@ wasm_cluster_terminate_all(WASMCluster *cluster)
 {
    os_mutex_lock(&cluster->lock);
    cluster->processing = true;
-    os_mutex_unlock(&cluster->lock);

-    traverse_list(&cluster->exec_env_list, terminate_thread_visitor, NULL);
+    safe_traverse_exec_env_list(cluster, terminate_thread_visitor, NULL);

-    os_mutex_lock(&cluster->lock);
    cluster->processing = false;
    os_mutex_unlock(&cluster->lock);
 }
@ -923,12 +985,10 @@ wasm_cluster_terminate_all_except_self(WASMCluster *cluster,
 {
    os_mutex_lock(&cluster->lock);
    cluster->processing = true;
-    os_mutex_unlock(&cluster->lock);

-    traverse_list(&cluster->exec_env_list, terminate_thread_visitor,
-                  (void *)exec_env);
+    safe_traverse_exec_env_list(cluster, terminate_thread_visitor,
+                                (void *)exec_env);

-    os_mutex_lock(&cluster->lock);
    cluster->processing = false;
    os_mutex_unlock(&cluster->lock);
 }
@ -950,11 +1010,9 @@ wams_cluster_wait_for_all(WASMCluster *cluster)
 {
    os_mutex_lock(&cluster->lock);
    cluster->processing = true;
-    os_mutex_unlock(&cluster->lock);

-    traverse_list(&cluster->exec_env_list, wait_for_thread_visitor, NULL);
+    safe_traverse_exec_env_list(cluster, wait_for_thread_visitor, NULL);

-    os_mutex_lock(&cluster->lock);
    cluster->processing = false;
    os_mutex_unlock(&cluster->lock);
 }
@ -965,12 +1023,10 @@ wasm_cluster_wait_for_all_except_self(WASMCluster *cluster,
 {
    os_mutex_lock(&cluster->lock);
    cluster->processing = true;
-    os_mutex_unlock(&cluster->lock);

-    traverse_list(&cluster->exec_env_list, wait_for_thread_visitor,
-                  (void *)exec_env);
+    safe_traverse_exec_env_list(cluster, wait_for_thread_visitor,
+                                (void *)exec_env);

-    os_mutex_lock(&cluster->lock);
    cluster->processing = false;
    os_mutex_unlock(&cluster->lock);
 }
--- a/core/iwasm/libraries/wasi-nn/.dockerignore
+++ b/core/iwasm/libraries/wasi-nn/.dockerignore
@ -1 +0,0 @@
-**/Dockerfile
--- a/core/iwasm/libraries/wasi-nn/README.md
+++ b/core/iwasm/libraries/wasi-nn/README.md
@ -17,27 +17,76 @@ By only including this file in your WASM application you will bind WASI-NN into
 To run the tests we assume that the current directory is the root of the repository.


-1. Build the docker image,
+### Build the runtime
+
+Build the runtime base image,

 ```
-docker build -t wasi-nn -f core/iwasm/libraries/wasi-nn/test/Dockerfile .
+docker build -t wasi-nn-base -f core/iwasm/libraries/wasi-nn/test/Dockerfile.base .
 ```

-2. Run the container
+Build the runtime image for your execution target type.
+
+`EXECUTION_TYPE` can be:
+* `cpu`
+* `nvidia-gpu`

 ```
-docker run wasi-nn
+EXECUTION_TYPE=cpu
+docker build -t wasi-nn-${EXECUTION_TYPE} -f core/iwasm/libraries/wasi-nn/test/Dockerfile.${EXECUTION_TYPE} .
 ```

+
+### Build wasm app
+
+```
+docker build -t wasi-nn-compile -f core/iwasm/libraries/wasi-nn/test/Dockerfile.compile .
+```
+
+```
+docker run -v $PWD/core/iwasm/libraries/wasi-nn:/wasi-nn wasi-nn-compile
+```
+
+
+### Run wasm app
+
 If all the tests have run properly you will the the following message in the terminal,

 ```
 Tests: passed!
 ```

+* CPU
+
+```
+docker run \
+    -v $PWD/core/iwasm/libraries/wasi-nn/test:/assets wasi-nn-cpu \
+    --dir=/assets \
+    --env="TARGET=cpu" \
+    /assets/test_tensorflow.wasm
+```
+
+* (NVIDIA) GPU
+
+```
+docker run \
+    --runtime=nvidia \
+    -v $PWD/core/iwasm/libraries/wasi-nn/test:/assets wasi-nn-nvidia-gpu \
+    --dir=/assets \
+    --env="TARGET=gpu" \
+    /assets/test_tensorflow.wasm
+```
+
+Requirements:
+* [NVIDIA docker](https://github.com/NVIDIA/nvidia-docker).
+
 ## What is missing

-* Only 1 model at a time is supported.
+Supported:
+
+* Only 1 WASM app at a time.
+* Only 1 model at a time.
    * `graph` and `graph-execution-context` are ignored.
-* Only `tensorflow` (lite) is supported.
-* Only `cpu` is supported.
+* Graph encoding: `tensorflowlite`.
+* Execution target: `cpu` and `gpu`.
+* Tensor type: `fp32`.
--- a/core/iwasm/libraries/wasi-nn/logger.h
+++ b/core/iwasm/libraries/wasi-nn/logger.h
@ -1,55 +0,0 @@
-/*
- * Copyright (C) 2019 Intel Corporation.  All rights reserved.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- */
-
-#ifndef WASI_NN_LOGGER_H
-#define WASI_NN_LOGGER_H
-
-#include <stdio.h>
-#include <string.h>
-
-#define __FILENAME__ \
-    (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
-
-/* Disable a level by removing the define */
-#define ENABLE_ERR_LOG
-#define ENABLE_WARN_LOG
-#define ENABLE_DBG_LOG
-#define ENABLE_INFO_LOG
-
-// Definition of the levels
-#ifdef ENABLE_ERR_LOG
-#define NN_ERR_PRINTF(fmt, ...)                                    \
-    printf("[%s:%d] " fmt, __FILENAME__, __LINE__, ##__VA_ARGS__); \
-    printf("\n");                                                  \
-    fflush(stdout)
-#else
-#define NN_ERR_PRINTF(fmt, ...)
-#endif
-#ifdef ENABLE_WARN_LOG
-#define NN_WARN_PRINTF(fmt, ...)                                   \
-    printf("[%s:%d] " fmt, __FILENAME__, __LINE__, ##__VA_ARGS__); \
-    printf("\n");                                                  \
-    fflush(stdout)
-#else
-#define NN_WARN_PRINTF(fmt, ...)
-#endif
-#ifdef ENABLE_DBG_LOG
-#define NN_DBG_PRINTF(fmt, ...)                                    \
-    printf("[%s:%d] " fmt, __FILENAME__, __LINE__, ##__VA_ARGS__); \
-    printf("\n");                                                  \
-    fflush(stdout)
-#else
-#define NN_DBG_PRINTF(fmt, ...)
-#endif
-#ifdef ENABLE_INFO_LOG
-#define NN_INFO_PRINTF(fmt, ...)                                   \
-    printf("[%s:%d] " fmt, __FILENAME__, __LINE__, ##__VA_ARGS__); \
-    printf("\n");                                                  \
-    fflush(stdout)
-#else
-#define NN_INFO_PRINTF(fmt, ...)
-#endif
-
-#endif
--- a/core/iwasm/libraries/wasi-nn/src/utils/logger.h
+++ b/core/iwasm/libraries/wasi-nn/src/utils/logger.h
@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2019 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef WASI_NN_LOGGER_H
+#define WASI_NN_LOGGER_H
+
+#include <stdio.h>
+#include <string.h>
+
+#define __FILENAME__ \
+    (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
+
+/* Disable a level by removing the define */
+#define ENABLE_ERR_LOG
+#define ENABLE_WARN_LOG
+#define ENABLE_DBG_LOG
+#define ENABLE_INFO_LOG
+
+// Definition of the levels
+#ifdef ENABLE_ERR_LOG
+#define NN_ERR_PRINTF(fmt, ...)                                        \
+    do {                                                               \
+        printf("[%s:%d] " fmt, __FILENAME__, __LINE__, ##__VA_ARGS__); \
+        printf("\n");                                                  \
+        fflush(stdout);                                                \
+    } while (0)
+#else
+#define NN_ERR_PRINTF(fmt, ...)
+#endif
+#ifdef ENABLE_WARN_LOG
+#define NN_WARN_PRINTF(fmt, ...)                                       \
+    do {                                                               \
+        printf("[%s:%d] " fmt, __FILENAME__, __LINE__, ##__VA_ARGS__); \
+        printf("\n");                                                  \
+        fflush(stdout);                                                \
+    } while (0)
+#else
+#define NN_WARN_PRINTF(fmt, ...)
+#endif
+#ifdef ENABLE_DBG_LOG
+#define NN_DBG_PRINTF(fmt, ...)                                        \
+    do {                                                               \
+        printf("[%s:%d] " fmt, __FILENAME__, __LINE__, ##__VA_ARGS__); \
+        printf("\n");                                                  \
+        fflush(stdout);                                                \
+    } while (0)
+#else
+#define NN_DBG_PRINTF(fmt, ...)
+#endif
+#ifdef ENABLE_INFO_LOG
+#define NN_INFO_PRINTF(fmt, ...)                                       \
+    do {                                                               \
+        printf("[%s:%d] " fmt, __FILENAME__, __LINE__, ##__VA_ARGS__); \
+        printf("\n");                                                  \
+        fflush(stdout);                                                \
+    } while (0)
+#else
+#define NN_INFO_PRINTF(fmt, ...)
+#endif
+
+#endif
--- a/core/iwasm/libraries/wasi-nn/src/utils/wasi_nn_app_native.c
+++ b/core/iwasm/libraries/wasi-nn/src/utils/wasi_nn_app_native.c
@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2019 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "wasi_nn_app_native.h"
+
+static error
+graph_builder_app_native(wasm_module_inst_t instance,
+                         graph_builder_wasm *builder_wasm,
+                         graph_builder *builder)
+{
+    if (!wasm_runtime_validate_app_addr(instance, builder_wasm->buf_offset,
+                                        builder_wasm->size * sizeof(uint8_t))) {
+        NN_ERR_PRINTF("builder_wasm->buf_offset is invalid");
+        return invalid_argument;
+    }
+
+    builder->buf = (uint8_t *)wasm_runtime_addr_app_to_native(
+        instance, builder_wasm->buf_offset);
+    builder->size = builder_wasm->size;
+    return success;
+}
+
+error
+graph_builder_array_app_native(wasm_module_inst_t instance,
+                               graph_builder_array_wasm *builder_array_wasm,
+                               graph_builder_array *builder_array)
+{
+    if (!wasm_runtime_validate_native_addr(instance, builder_array_wasm,
+                                           sizeof(graph_builder_array_wasm))) {
+        NN_ERR_PRINTF("builder_array_wasm is invalid");
+        return invalid_argument;
+    }
+
+    NN_DBG_PRINTF("Graph builder array contains %d elements",
+                  builder_array_wasm->size);
+
+    if (!wasm_runtime_validate_app_addr(
+            instance, builder_array_wasm->buf_offset,
+            builder_array_wasm->size * sizeof(graph_builder_wasm))) {
+        NN_ERR_PRINTF("builder_array_wasm->buf_offset is invalid");
+        return invalid_argument;
+    }
+
+    graph_builder_wasm *builder_wasm =
+        (graph_builder_wasm *)wasm_runtime_addr_app_to_native(
+            instance, builder_array_wasm->buf_offset);
+
+    graph_builder *builder = (graph_builder *)wasm_runtime_malloc(
+        builder_array_wasm->size * sizeof(graph_builder));
+    if (builder == NULL)
+        return missing_memory;
+
+    for (uint32_t i = 0; i < builder_array_wasm->size; ++i) {
+        error res;
+        if (success
+            != (res = graph_builder_app_native(instance, &builder_wasm[i],
+                                               &builder[i]))) {
+            wasm_runtime_free(builder);
+            return res;
+        }
+
+        NN_DBG_PRINTF("Graph builder %d contains %d elements", i,
+                      builder->size);
+    }
+
+    builder_array->buf = builder;
+    builder_array->size = builder_array_wasm->size;
+    return success;
+}
+
+static error
+tensor_data_app_native(wasm_module_inst_t instance, uint32_t total_elements,
+                       tensor_wasm *input_tensor_wasm, tensor_data *data)
+{
+    if (!wasm_runtime_validate_app_addr(
+            instance, input_tensor_wasm->data_offset, total_elements)) {
+        NN_ERR_PRINTF("input_tensor_wasm->data_offset is invalid");
+        return invalid_argument;
+    }
+    *data = (tensor_data)wasm_runtime_addr_app_to_native(
+        instance, input_tensor_wasm->data_offset);
+    return success;
+}
+
+static error
+tensor_dimensions_app_native(wasm_module_inst_t instance,
+                             tensor_wasm *input_tensor_wasm,
+                             tensor_dimensions **dimensions)
+{
+    if (!wasm_runtime_validate_app_addr(instance,
+                                        input_tensor_wasm->dimensions_offset,
+                                        sizeof(tensor_dimensions_wasm))) {
+        NN_ERR_PRINTF("input_tensor_wasm->dimensions_offset is invalid");
+        return invalid_argument;
+    }
+
+    tensor_dimensions_wasm *dimensions_wasm =
+        (tensor_dimensions_wasm *)wasm_runtime_addr_app_to_native(
+            instance, input_tensor_wasm->dimensions_offset);
+
+    if (!wasm_runtime_validate_app_addr(instance, dimensions_wasm->buf_offset,
+                                        sizeof(tensor_dimensions))) {
+        NN_ERR_PRINTF("dimensions_wasm->buf_offset is invalid");
+        return invalid_argument;
+    }
+
+    *dimensions =
+        (tensor_dimensions *)wasm_runtime_malloc(sizeof(tensor_dimensions));
+    if (dimensions == NULL)
+        return missing_memory;
+
+    (*dimensions)->size = dimensions_wasm->size;
+    (*dimensions)->buf = (uint32_t *)wasm_runtime_addr_app_to_native(
+        instance, dimensions_wasm->buf_offset);
+
+    NN_DBG_PRINTF("Number of dimensions: %d", (*dimensions)->size);
+    return success;
+}
+
+error
+tensor_app_native(wasm_module_inst_t instance, tensor_wasm *input_tensor_wasm,
+                  tensor *input_tensor)
+{
+    NN_DBG_PRINTF("Converting tensor_wasm to tensor");
+    if (!wasm_runtime_validate_native_addr(instance, input_tensor_wasm,
+                                           sizeof(tensor_wasm))) {
+        NN_ERR_PRINTF("input_tensor_wasm is invalid");
+        return invalid_argument;
+    }
+
+    error res;
+
+    tensor_dimensions *dimensions = NULL;
+    if (success
+        != (res = tensor_dimensions_app_native(instance, input_tensor_wasm,
+                                               &dimensions))) {
+        NN_ERR_PRINTF("error when parsing dimensions");
+        return res;
+    }
+
+    uint32_t total_elements = 1;
+    for (uint32_t i = 0; i < dimensions->size; ++i) {
+        total_elements *= dimensions->buf[i];
+        NN_DBG_PRINTF("Dimension %d: %d", i, dimensions->buf[i]);
+    }
+    NN_DBG_PRINTF("Tensor type: %d", input_tensor_wasm->type);
+    NN_DBG_PRINTF("Total number of elements: %d", total_elements);
+
+    tensor_data data = NULL;
+    if (success
+        != (res = tensor_data_app_native(instance, total_elements,
+                                         input_tensor_wasm, &data))) {
+        wasm_runtime_free(dimensions);
+        return res;
+    }
+
+    input_tensor->type = input_tensor_wasm->type;
+    input_tensor->dimensions = dimensions;
+    input_tensor->data = data;
+    return success;
+}
--- a/core/iwasm/libraries/wasi-nn/src/utils/wasi_nn_app_native.h
+++ b/core/iwasm/libraries/wasi-nn/src/utils/wasi_nn_app_native.h
@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2019 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef WASI_NN_APP_NATIVE
+#define WASI_NN_APP_NATIVE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+
+#include "wasi_nn.h"
+#include "logger.h"
+
+#include "bh_platform.h"
+#include "wasm_export.h"
+
+typedef struct {
+    uint32_t buf_offset;
+    uint32_t size;
+} graph_builder_wasm;
+
+typedef struct {
+    uint32_t buf_offset;
+    uint32_t size;
+} graph_builder_array_wasm;
+
+typedef struct {
+    uint32_t buf_offset;
+    uint32_t size;
+} tensor_dimensions_wasm;
+
+typedef struct {
+    uint32_t dimensions_offset;
+    tensor_type type;
+    uint32_t data_offset;
+} tensor_wasm;
+
+error
+graph_builder_array_app_native(wasm_module_inst_t instance,
+                               graph_builder_array_wasm *builder,
+                               graph_builder_array *builder_native);
+
+error
+tensor_app_native(wasm_module_inst_t instance, tensor_wasm *input_tensor,
+                  tensor *input_tensor_native);
+
+#endif
--- a/core/iwasm/libraries/wasi-nn/src/wasi_nn.c
+++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn.c
@ -0,0 +1,302 @@
+/*
+ * Copyright (C) 2019 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+
+#include "wasi_nn.h"
+#include "wasi_nn_app_native.h"
+#include "logger.h"
+#include "wasi_nn_tensorflowlite.hpp"
+
+#include "bh_platform.h"
+#include "wasm_export.h"
+#include "wasm_runtime.h"
+#include "aot_runtime.h"
+
+/* Definition of 'wasi_nn.h' structs in WASM app format (using offset) */
+
+typedef error (*LOAD)(graph_builder_array *, graph_encoding, execution_target,
+                      graph *);
+typedef error (*INIT_EXECUTION_CONTEXT)(graph, graph_execution_context *);
+typedef error (*SET_INPUT)(graph_execution_context, uint32_t, tensor *);
+typedef error (*COMPUTE)(graph_execution_context);
+typedef error (*GET_OUTPUT)(graph_execution_context, uint32_t, tensor_data,
+                            uint32_t *);
+
+typedef struct {
+    LOAD load;
+    INIT_EXECUTION_CONTEXT init_execution_context;
+    SET_INPUT set_input;
+    COMPUTE compute;
+    GET_OUTPUT get_output;
+} api_function;
+
+/* Global variables */
+
+static api_function lookup[] = {
+    { NULL, NULL, NULL, NULL, NULL },
+    { NULL, NULL, NULL, NULL, NULL },
+    { NULL, NULL, NULL, NULL, NULL },
+    { NULL, NULL, NULL, NULL, NULL },
+    { tensorflowlite_load, tensorflowlite_init_execution_context,
+      tensorflowlite_set_input, tensorflowlite_compute,
+      tensorflowlite_get_output }
+};
+
+/* Utils */
+
+static bool
+is_encoding_implemented(graph_encoding encoding)
+{
+    return lookup[encoding].load && lookup[encoding].init_execution_context
+           && lookup[encoding].set_input && lookup[encoding].compute
+           && lookup[encoding].get_output;
+}
+
+static error
+is_model_initialized(WASINNContext *wasi_nn_ctx)
+{
+    if (!wasi_nn_ctx->is_initialized) {
+        NN_ERR_PRINTF("Model not initialized.");
+        return runtime_error;
+    }
+    return success;
+}
+
+WASINNContext *
+wasm_runtime_get_wasi_nn_ctx(wasm_module_inst_t instance)
+{
+    WASINNContext *wasi_nn_ctx = NULL;
+#if WASM_ENABLE_INTERP != 0
+    if (instance->module_type == Wasm_Module_Bytecode) {
+        NN_DBG_PRINTF("Getting ctx from WASM");
+        WASMModuleInstance *module_inst = (WASMModuleInstance *)instance;
+        wasi_nn_ctx = ((WASMModuleInstanceExtra *)module_inst->e)->wasi_nn_ctx;
+    }
+#endif
+#if WASM_ENABLE_AOT != 0
+    if (instance->module_type == Wasm_Module_AoT) {
+        NN_DBG_PRINTF("Getting ctx from AOT");
+        AOTModuleInstance *module_inst = (AOTModuleInstance *)instance;
+        wasi_nn_ctx = ((AOTModuleInstanceExtra *)module_inst->e)->wasi_nn_ctx;
+    }
+#endif
+    bh_assert(wasi_nn_ctx != NULL);
+    NN_DBG_PRINTF("Returning ctx");
+    return wasi_nn_ctx;
+}
+
+/* WASI-NN implementation */
+
+error
+wasi_nn_load(wasm_exec_env_t exec_env, graph_builder_array_wasm *builder,
+             graph_encoding encoding, execution_target target, graph *g)
+{
+    NN_DBG_PRINTF("Running wasi_nn_load [encoding=%d, target=%d]...", encoding,
+                  target);
+
+    if (!is_encoding_implemented(encoding)) {
+        NN_ERR_PRINTF("Encoding not supported.");
+        return invalid_encoding;
+    }
+
+    wasm_module_inst_t instance = wasm_runtime_get_module_inst(exec_env);
+    bh_assert(instance);
+
+    error res;
+    graph_builder_array builder_native = { 0 };
+    if (success
+        != (res = graph_builder_array_app_native(instance, builder,
+                                                 &builder_native)))
+        return res;
+
+    if (!wasm_runtime_validate_native_addr(instance, g, sizeof(graph))) {
+        NN_ERR_PRINTF("graph is invalid");
+        res = invalid_argument;
+        goto fail;
+    }
+
+    res = lookup[encoding].load(&builder_native, encoding, target, g);
+
+    NN_DBG_PRINTF("wasi_nn_load finished with status %d [graph=%d]", res, *g);
+
+    WASINNContext *wasi_nn_ctx = wasm_runtime_get_wasi_nn_ctx(instance);
+
+    wasi_nn_ctx->current_encoding = encoding;
+    wasi_nn_ctx->is_initialized = true;
+
+fail:
+    // XXX: Free intermediate structure pointers
+    if (builder_native.buf)
+        wasm_runtime_free(builder_native.buf);
+
+    return res;
+}
+
+error
+wasi_nn_init_execution_context(wasm_exec_env_t exec_env, graph g,
+                               graph_execution_context *ctx)
+{
+    NN_DBG_PRINTF("Running wasi_nn_init_execution_context [graph=%d]...", g);
+
+    wasm_module_inst_t instance = wasm_runtime_get_module_inst(exec_env);
+    bh_assert(instance);
+    WASINNContext *wasi_nn_ctx = wasm_runtime_get_wasi_nn_ctx(instance);
+
+    error res;
+    if (success != (res = is_model_initialized(wasi_nn_ctx)))
+        return res;
+
+    if (!wasm_runtime_validate_native_addr(instance, ctx,
+                                           sizeof(graph_execution_context))) {
+        NN_ERR_PRINTF("ctx is invalid");
+        return invalid_argument;
+    }
+
+    res = lookup[wasi_nn_ctx->current_encoding].init_execution_context(g, ctx);
+    *ctx = g;
+    NN_DBG_PRINTF(
+        "wasi_nn_init_execution_context finished with status %d [ctx=%d]", res,
+        *ctx);
+    return res;
+}
+
+error
+wasi_nn_set_input(wasm_exec_env_t exec_env, graph_execution_context ctx,
+                  uint32_t index, tensor_wasm *input_tensor)
+{
+    NN_DBG_PRINTF("Running wasi_nn_set_input [ctx=%d, index=%d]...", ctx,
+                  index);
+
+    wasm_module_inst_t instance = wasm_runtime_get_module_inst(exec_env);
+    bh_assert(instance);
+    WASINNContext *wasi_nn_ctx = wasm_runtime_get_wasi_nn_ctx(instance);
+
+    error res;
+    if (success != (res = is_model_initialized(wasi_nn_ctx)))
+        return res;
+
+    tensor input_tensor_native = { 0 };
+    if (success
+        != (res = tensor_app_native(instance, input_tensor,
+                                    &input_tensor_native)))
+        return res;
+
+    res = lookup[wasi_nn_ctx->current_encoding].set_input(ctx, index,
+                                                          &input_tensor_native);
+
+    // XXX: Free intermediate structure pointers
+    if (input_tensor_native.dimensions)
+        wasm_runtime_free(input_tensor_native.dimensions);
+
+    NN_DBG_PRINTF("wasi_nn_set_input finished with status %d", res);
+    return res;
+}
+
+error
+wasi_nn_compute(wasm_exec_env_t exec_env, graph_execution_context ctx)
+{
+    NN_DBG_PRINTF("Running wasi_nn_compute [ctx=%d]...", ctx);
+
+    wasm_module_inst_t instance = wasm_runtime_get_module_inst(exec_env);
+    bh_assert(instance);
+    WASINNContext *wasi_nn_ctx = wasm_runtime_get_wasi_nn_ctx(instance);
+
+    error res;
+    if (success != (res = is_model_initialized(wasi_nn_ctx)))
+        return res;
+
+    res = lookup[wasi_nn_ctx->current_encoding].compute(ctx);
+    NN_DBG_PRINTF("wasi_nn_compute finished with status %d", res);
+    return res;
+}
+
+error
+wasi_nn_get_output(wasm_exec_env_t exec_env, graph_execution_context ctx,
+                   uint32_t index, tensor_data output_tensor,
+                   uint32_t *output_tensor_size)
+{
+    NN_DBG_PRINTF("Running wasi_nn_get_output [ctx=%d, index=%d]...", ctx,
+                  index);
+
+    wasm_module_inst_t instance = wasm_runtime_get_module_inst(exec_env);
+    bh_assert(instance);
+    WASINNContext *wasi_nn_ctx = wasm_runtime_get_wasi_nn_ctx(instance);
+
+    error res;
+    if (success != (res = is_model_initialized(wasi_nn_ctx)))
+        return res;
+
+    if (!wasm_runtime_validate_native_addr(instance, output_tensor_size,
+                                           sizeof(uint32_t))) {
+        NN_ERR_PRINTF("output_tensor_size is invalid");
+        return invalid_argument;
+    }
+
+    res = lookup[wasi_nn_ctx->current_encoding].get_output(
+        ctx, index, output_tensor, output_tensor_size);
+    NN_DBG_PRINTF("wasi_nn_get_output finished with status %d [data_size=%d]",
+                  res, *output_tensor_size);
+    return res;
+}
+
+/* Non-exposed public functions */
+
+WASINNContext *
+wasi_nn_initialize()
+{
+    NN_DBG_PRINTF("Initializing wasi-nn");
+    WASINNContext *wasi_nn_ctx =
+        (WASINNContext *)wasm_runtime_malloc(sizeof(WASINNContext));
+    if (wasi_nn_ctx == NULL) {
+        NN_ERR_PRINTF("Error when allocating memory for WASI-NN context");
+        return NULL;
+    }
+    wasi_nn_ctx->is_initialized = true;
+    wasi_nn_ctx->current_encoding = 3;
+    return wasi_nn_ctx;
+}
+
+void
+wasi_nn_destroy(WASINNContext *wasi_nn_ctx)
+{
+    if (wasi_nn_ctx == NULL) {
+        NN_ERR_PRINTF(
+            "Error when deallocating memory. WASI-NN context is NULL");
+        return;
+    }
+    NN_DBG_PRINTF("Freeing wasi-nn");
+    NN_DBG_PRINTF("-> is_initialized: %d", wasi_nn_ctx->is_initialized);
+    NN_DBG_PRINTF("-> current_encoding: %d", wasi_nn_ctx->current_encoding);
+    tensorflowlite_destroy();
+    wasm_runtime_free(wasi_nn_ctx);
+}
+
+/* Register WASI-NN in WAMR */
+
+/* clang-format off */
+#define REG_NATIVE_FUNC(func_name, signature) \
+    { #func_name, wasi_nn_##func_name, signature, NULL }
+/* clang-format on */
+
+static NativeSymbol native_symbols_wasi_nn[] = {
+    REG_NATIVE_FUNC(load, "(*ii*)i"),
+    REG_NATIVE_FUNC(init_execution_context, "(i*)i"),
+    REG_NATIVE_FUNC(set_input, "(ii*)i"),
+    REG_NATIVE_FUNC(compute, "(i)i"),
+    REG_NATIVE_FUNC(get_output, "(ii**)i"),
+};
+
+uint32_t
+get_wasi_nn_export_apis(NativeSymbol **p_libc_wasi_apis)
+{
+    *p_libc_wasi_apis = native_symbols_wasi_nn;
+    return sizeof(native_symbols_wasi_nn) / sizeof(NativeSymbol);
+}
--- a/core/iwasm/libraries/wasi-nn/src/wasi_nn_private.h
+++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_private.h
@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2019 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef WASI_NN_PRIVATE_H
+#define WASI_NN_PRIVATE_H
+
+#include "wasi_nn_types.h"
+
+typedef struct {
+    bool is_initialized;
+    graph_encoding current_encoding;
+} WASINNContext;
+
+/**
+ * @brief Initialize wasi-nn
+ *
+ */
+WASINNContext *
+wasi_nn_initialize();
+/**
+ * @brief Destroy wasi-nn on app exists
+ *
+ */
+
+void
+wasi_nn_destroy(WASINNContext *wasi_nn_ctx);
+
+#endif
--- a/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
+++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
@ -3,8 +3,10 @@
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 */

-#include "wasi_nn_tensorflow.hpp"
-#include "wasi_nn_common.h"
+#include "wasi_nn.h"
+#include "wasi_nn_tensorflowlite.hpp"
+#include "logger.h"
+
 #include "bh_common.h"
 #include "bh_platform.h"
 #include "platform_common.h"
@ -14,6 +16,7 @@
 #include <tensorflow/lite/model.h>
 #include <tensorflow/lite/optional_debug_tools.h>
 #include <tensorflow/lite/error_reporter.h>
+#include <tensorflow/lite/delegates/gpu/delegate.h>

 /* Global variables */

@ -25,30 +28,30 @@ static char *model_pointer = NULL;
 /* WASI-NN (tensorflow) implementation */

 error
-tensorflow_load(graph_builder_array builder, graph_encoding encoding,
-                execution_target target, graph *graph)
+tensorflowlite_load(graph_builder_array *builder, graph_encoding encoding,
+                    execution_target target, graph *g)
 {
    if (model_pointer != NULL) {
        wasm_runtime_free(model_pointer);
        model_pointer = NULL;
    }

-    if (builder.size != 1) {
+    if (builder->size != 1) {
        NN_ERR_PRINTF("Unexpected builder format.");
        return invalid_argument;
    }

-    if (encoding != tensorflow) {
-        NN_ERR_PRINTF("Encoding is not tensorflow.");
+    if (encoding != tensorflowlite) {
+        NN_ERR_PRINTF("Encoding is not tensorflowlite.");
        return invalid_argument;
    }

-    if (target != cpu) {
-        NN_ERR_PRINTF("Only CPU target is supported.");
+    if (target != cpu && target != gpu) {
+        NN_ERR_PRINTF("Only CPU and GPU target is supported.");
        return invalid_argument;
    }

-    uint32_t size = builder.buf[0].size;
+    uint32_t size = builder->buf[0].size;

    model_pointer = (char *)wasm_runtime_malloc(size);
    if (model_pointer == NULL) {
@ -56,7 +59,7 @@ tensorflow_load(graph_builder_array builder, graph_encoding encoding,
        return missing_memory;
    }

-    bh_memcpy_s(model_pointer, size, builder.buf[0].buf, size);
+    bh_memcpy_s(model_pointer, size, builder->buf[0].buf, size);

    model = tflite::FlatBufferModel::BuildFromBuffer(model_pointer, size, NULL);
    if (model == NULL) {
@ -77,11 +80,34 @@ tensorflow_load(graph_builder_array builder, graph_encoding encoding,
        return missing_memory;
    }

+    bool use_default = false;
+    switch (target) {
+        case gpu:
+        {
+            // https://www.tensorflow.org/lite/performance/gpu
+            auto options = TfLiteGpuDelegateOptionsV2Default();
+            options.inference_preference =
+                TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED;
+            options.inference_priority1 =
+                TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY;
+            auto *delegate = TfLiteGpuDelegateV2Create(&options);
+            if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) {
+                NN_ERR_PRINTF("Error when enabling GPU delegate.");
+                use_default = true;
+            }
+            break;
+        }
+        default:
+            use_default = true;
+    }
+    if (use_default)
+        NN_WARN_PRINTF("Default encoding is CPU.");
+
    return success;
 }

 error
-tensorflow_init_execution_context(graph graph)
+tensorflowlite_init_execution_context(graph g, graph_execution_context *ctx)
 {
    if (interpreter == NULL) {
        NN_ERR_PRINTF("Non-initialized interpreter.");
@ -92,8 +118,8 @@ tensorflow_init_execution_context(graph graph)
 }

 error
-tensorflow_set_input(graph_execution_context ctx, uint32_t index,
-                     tensor *input_tensor)
+tensorflowlite_set_input(graph_execution_context ctx, uint32_t index,
+                         tensor *input_tensor)
 {
    if (interpreter == NULL) {
        NN_ERR_PRINTF("Non-initialized interpreter.");
@ -113,11 +139,11 @@ tensorflow_set_input(graph_execution_context ctx, uint32_t index,
    }

    uint32_t model_tensor_size = 1;
-    for (int i = 0; i < (int)tensor->dims->size; ++i)
+    for (int i = 0; i < tensor->dims->size; ++i)
        model_tensor_size *= (uint32_t)tensor->dims->data[i];

    uint32_t input_tensor_size = 1;
-    for (int i = 0; i < input_tensor->dimensions->size; i++)
+    for (uint32_t i = 0; i < input_tensor->dimensions->size; i++)
        input_tensor_size *= (uint32_t)input_tensor->dimensions->buf[i];

    if (model_tensor_size != input_tensor_size) {
@ -136,7 +162,7 @@ tensorflow_set_input(graph_execution_context ctx, uint32_t index,
 }

 error
-tensorflow_compute(graph_execution_context ctx)
+tensorflowlite_compute(graph_execution_context ctx)
 {
    if (interpreter == NULL) {
        NN_ERR_PRINTF("Non-initialized interpreter.");
@ -147,8 +173,9 @@ tensorflow_compute(graph_execution_context ctx)
 }

 error
-tensorflow_get_output(graph_execution_context context, uint32_t index,
-                      tensor_data output_tensor, uint32_t *output_tensor_size)
+tensorflowlite_get_output(graph_execution_context ctx, uint32_t index,
+                          tensor_data output_tensor,
+                          uint32_t *output_tensor_size)
 {
    if (interpreter == NULL) {
        NN_ERR_PRINTF("Non-initialized interpreter.");
@ -178,7 +205,7 @@ tensorflow_get_output(graph_execution_context context, uint32_t index,
    }

    float *tensor_f = interpreter->typed_output_tensor<float>(index);
-    for (int i = 0; i < model_tensor_size; ++i)
+    for (uint32_t i = 0; i < model_tensor_size; ++i)
        NN_DBG_PRINTF("output: %f", tensor_f[i]);

    *output_tensor_size = model_tensor_size;
@ -186,3 +213,22 @@ tensorflow_get_output(graph_execution_context context, uint32_t index,
                model_tensor_size * sizeof(float));
    return success;
 }
+
+void
+tensorflowlite_destroy()
+{
+    /*
+        TensorFlow Lite memory is man
+
+        Related issues:
+        * https://github.com/tensorflow/tensorflow/issues/15880
+    */
+    NN_DBG_PRINTF("Freeing memory.");
+    model.reset(nullptr);
+    model = NULL;
+    interpreter.reset(nullptr);
+    interpreter = NULL;
+    wasm_runtime_free(model_pointer);
+    model_pointer = NULL;
+    NN_DBG_PRINTF("Memory free'd.");
+}
--- a/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.hpp
+++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.hpp
@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2019 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef WASI_NN_TENSORFLOWLITE_HPP
+#define WASI_NN_TENSORFLOWLITE_HPP
+
+#include "wasi_nn.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+error
+tensorflowlite_load(graph_builder_array *builder, graph_encoding encoding,
+                    execution_target target, graph *g);
+
+error
+tensorflowlite_init_execution_context(graph g, graph_execution_context *ctx);
+
+error
+tensorflowlite_set_input(graph_execution_context ctx, uint32_t index,
+                         tensor *input_tensor);
+
+error
+tensorflowlite_compute(graph_execution_context ctx);
+
+error
+tensorflowlite_get_output(graph_execution_context ctx, uint32_t index,
+                          tensor_data output_tensor,
+                          uint32_t *output_tensor_size);
+
+void
+tensorflowlite_destroy();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/core/iwasm/libraries/wasi-nn/test/CMakeLists.txt
+++ b/core/iwasm/libraries/wasi-nn/test/CMakeLists.txt
@ -7,10 +7,10 @@ project (iwasm)

 set (CMAKE_VERBOSE_MAKEFILE OFF)
 # Reset default linker flags
-set (CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "")
-set (CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "")
 set (CMAKE_C_STANDARD 99)
 set (CMAKE_CXX_STANDARD 14)
+set (CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "")
+set (CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "")

 if (NOT DEFINED WAMR_BUILD_PLATFORM)
  set (WAMR_BUILD_PLATFORM "linux")
--- a/core/iwasm/libraries/wasi-nn/test/Dockerfile
+++ b/core/iwasm/libraries/wasi-nn/test/Dockerfile
@ -1,32 +0,0 @@
-# Copyright (C) 2019 Intel Corporation.  All rights reserved.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-FROM ubuntu:22.04
-
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN apt-get update && apt-get install -y \
-    cmake build-essential git wget python3.10 python3-pip
-
-RUN wget -q https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-14/wasi-sdk-14.0-linux.tar.gz && \
-    tar xf wasi-sdk-*-linux.tar.gz -C /opt && rm -f wasi-sdk-*-linux.tar.gz && \
-    mv /opt/wasi-sdk-14.0 /opt/wasi-sdk
-
-WORKDIR /home/wamr
-
-COPY core core
-COPY build-scripts build-scripts
-COPY product-mini product-mini
-
-RUN pip3 install -r core/iwasm/libraries/wasi-nn/test/requirements.txt
-
-WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build
-
-RUN cmake -DWAMR_BUILD_WASI_NN=1 ..
-RUN make -j $(grep -c ^processor /proc/cpuinfo)
-
-WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test
-
-RUN ./build.sh
-
-ENTRYPOINT [ "./build/iwasm",  "--dir=.", "test_tensorflow.wasm" ]
--- a/core/iwasm/libraries/wasi-nn/test/Dockerfile.base
+++ b/core/iwasm/libraries/wasi-nn/test/Dockerfile.base
@ -0,0 +1,22 @@
+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+FROM ubuntu:20.04 AS base
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && apt-get install -y \
+    cmake build-essential git
+
+WORKDIR /home/wamr
+
+COPY . .
+
+WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build
+
+RUN cmake \
+  -DWAMR_BUILD_WASI_NN=1 \
+  -DTFLITE_ENABLE_GPU=ON \
+  ..
+
+RUN make -j $(grep -c ^processor /proc/cpuinfo)
--- a/core/iwasm/libraries/wasi-nn/test/Dockerfile.compile
+++ b/core/iwasm/libraries/wasi-nn/test/Dockerfile.compile
@ -0,0 +1,23 @@
+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+FROM ubuntu:20.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && apt-get install -y \
+    cmake build-essential git wget python3.10 python3-pip
+
+ARG WASI_SDK_VER=19
+RUN wget -c --progress=dot:giga https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VER}/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz -P /opt \
+  && tar xf /opt/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz -C /opt \
+  && ln -fs /opt/wasi-sdk-${WASI_SDK_VER}.0 /opt/wasi-sdk \
+  && rm /opt/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz
+
+WORKDIR /wasi-nn/test
+
+COPY core/iwasm/libraries/wasi-nn/test/requirements.txt .
+
+RUN pip3 install -r requirements.txt && rm requirements.txt
+
+ENTRYPOINT [ "bash", "./build.sh" ]
--- a/core/iwasm/libraries/wasi-nn/test/Dockerfile.cpu
+++ b/core/iwasm/libraries/wasi-nn/test/Dockerfile.cpu
@ -0,0 +1,8 @@
+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+FROM ubuntu:20.04
+
+COPY --from=wasi-nn-base /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm
+
+ENTRYPOINT [ "/run/iwasm" ]
--- a/core/iwasm/libraries/wasi-nn/test/Dockerfile.nvidia-gpu
+++ b/core/iwasm/libraries/wasi-nn/test/Dockerfile.nvidia-gpu
@ -0,0 +1,20 @@
+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+FROM nvidia/cuda:11.3.0-runtime-ubuntu20.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        ocl-icd-libopencl1 \
+        ocl-icd-opencl-dev \
+        clinfo && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN mkdir -p /etc/OpenCL/vendors && \
+    echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
+
+ENV NVIDIA_VISIBLE_DEVICES=all
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
+
+COPY --from=wasi-nn-base /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm
+
+ENTRYPOINT [ "/run/iwasm" ]
--- a/core/iwasm/libraries/wasi-nn/test/build.sh
+++ b/core/iwasm/libraries/wasi-nn/test/build.sh
@ -7,7 +7,7 @@
    -Wl,--allow-undefined \
    -Wl,--strip-all,--no-entry \
    --sysroot=/opt/wasi-sdk/share/wasi-sysroot \
-    -I/home/wamr/core/iwasm/libraries/wasi-nn \
+    -I.. \
    -o test_tensorflow.wasm test_tensorflow.c

 # TFLite models to use in the tests
--- a/core/iwasm/libraries/wasi-nn/test/test_tensorflow.c
+++ b/core/iwasm/libraries/wasi-nn/test/test_tensorflow.c
@ -28,7 +28,7 @@ typedef struct {
 // WASI-NN wrappers

 error
-wasm_load(char *model_name, graph *graph)
+wasm_load(char *model_name, graph *g, execution_target target)
 {
    FILE *pFile = fopen(model_name, "r");
    if (pFile == NULL)
@ -64,7 +64,7 @@ wasm_load(char *model_name, graph *graph)
    arr.buf[0].size = result;
    arr.buf[0].buf = buffer;

-    error res = load(&arr, tensorflow, cpu, graph);
+    error res = load(&arr, tensorflowlite, target, g);

    fclose(pFile);
    free(buffer);
@ -73,13 +73,13 @@ wasm_load(char *model_name, graph *graph)
 }

 error
-wasm_init_execution_context(graph graph, graph_execution_context *ctx)
+wasm_init_execution_context(graph g, graph_execution_context *ctx)
 {
-    return init_execution_context(graph, ctx);
+    return init_execution_context(g, ctx);
 }

 error
-wasm_input(graph_execution_context ctx, float *input_tensor, uint32_t *dim)
+wasm_set_input(graph_execution_context ctx, float *input_tensor, uint32_t *dim)
 {
    tensor_dimensions dims;
    dims.size = INPUT_TENSOR_DIMS;
@ -115,11 +115,12 @@ wasm_get_output(graph_execution_context ctx, uint32_t index, float *out_tensor,
 // Inference

 float *
-run_inference(float *input, uint32_t *input_size, uint32_t *output_size,
-              char *model_name, uint32_t num_output_tensors)
+run_inference(execution_target target, float *input, uint32_t *input_size,
+              uint32_t *output_size, char *model_name,
+              uint32_t num_output_tensors)
 {
    graph graph;
-    if (wasm_load(model_name, &graph) != success) {
+    if (wasm_load(model_name, &graph, target) != success) {
        fprintf(stderr, "Error when loading model.");
        exit(1);
    }
@ -130,7 +131,7 @@ run_inference(float *input, uint32_t *input_size, uint32_t *output_size,
        exit(1);
    }

-    if (wasm_input(ctx, input, input_size) != success) {
+    if (wasm_set_input(ctx, input, input_size) != success) {
        fprintf(stderr, "Error when setting input tensor.");
        exit(1);
    }
@ -151,7 +152,7 @@ run_inference(float *input, uint32_t *input_size, uint32_t *output_size,
        *output_size = MAX_OUTPUT_TENSOR_SIZE - *output_size;
        if (wasm_get_output(ctx, i, &out_tensor[offset], output_size)
            != success) {
-            fprintf(stderr, "Error when getting input .");
+            fprintf(stderr, "Error when getting output .");
            exit(1);
        }

@ -185,14 +186,14 @@ create_input(int *dims)
 // TESTS

 void
-test_sum()
+test_sum(execution_target target)
 {
    int dims[] = { 1, 5, 5, 1 };
    input_info input = create_input(dims);

    uint32_t output_size = 0;
-    float *output = run_inference(input.input_tensor, input.dim, &output_size,
-                                  "models/sum.tflite", 1);
+    float *output = run_inference(target, input.input_tensor, input.dim,
+                                  &output_size, "/assets/models/sum.tflite", 1);

    assert(output_size == 1);
    assert(fabs(output[0] - 300.0) < EPSILON);
@ -203,14 +204,14 @@ test_sum()
 }

 void
-test_max()
+test_max(execution_target target)
 {
    int dims[] = { 1, 5, 5, 1 };
    input_info input = create_input(dims);

    uint32_t output_size = 0;
-    float *output = run_inference(input.input_tensor, input.dim, &output_size,
-                                  "models/max.tflite", 1);
+    float *output = run_inference(target, input.input_tensor, input.dim,
+                                  &output_size, "/assets/models/max.tflite", 1);

    assert(output_size == 1);
    assert(fabs(output[0] - 24.0) < EPSILON);
@ -222,14 +223,15 @@ test_max()
 }

 void
-test_average()
+test_average(execution_target target)
 {
    int dims[] = { 1, 5, 5, 1 };
    input_info input = create_input(dims);

    uint32_t output_size = 0;
-    float *output = run_inference(input.input_tensor, input.dim, &output_size,
-                                  "models/average.tflite", 1);
+    float *output =
+        run_inference(target, input.input_tensor, input.dim, &output_size,
+                      "/assets/models/average.tflite", 1);

    assert(output_size == 1);
    assert(fabs(output[0] - 12.0) < EPSILON);
@ -241,14 +243,15 @@ test_average()
 }

 void
-test_mult_dimensions()
+test_mult_dimensions(execution_target target)
 {
    int dims[] = { 1, 3, 3, 1 };
    input_info input = create_input(dims);

    uint32_t output_size = 0;
-    float *output = run_inference(input.input_tensor, input.dim, &output_size,
-                                  "models/mult_dim.tflite", 1);
+    float *output =
+        run_inference(target, input.input_tensor, input.dim, &output_size,
+                      "/assets/models/mult_dim.tflite", 1);

    assert(output_size == 9);
    for (int i = 0; i < 9; i++)
@ -260,14 +263,15 @@ test_mult_dimensions()
 }

 void
-test_mult_outputs()
+test_mult_outputs(execution_target target)
 {
    int dims[] = { 1, 4, 4, 1 };
    input_info input = create_input(dims);

    uint32_t output_size = 0;
-    float *output = run_inference(input.input_tensor, input.dim, &output_size,
-                                  "models/mult_out.tflite", 2);
+    float *output =
+        run_inference(target, input.input_tensor, input.dim, &output_size,
+                      "/assets/models/mult_out.tflite", 2);

    assert(output_size == 8);
    // first tensor check
@ -285,16 +289,30 @@ test_mult_outputs()
 int
 main()
 {
+    char *env = getenv("TARGET");
+    if (env == NULL) {
+        printf("Usage:\n--env=\"TARGET=[cpu|gpu]\"\n");
+        return 1;
+    }
+    execution_target target;
+    if (strcmp(env, "cpu") == 0)
+        target = cpu;
+    else if (strcmp(env, "gpu") == 0)
+        target = gpu;
+    else {
+        printf("Wrong target!");
+        return 1;
+    }
    printf("################### Testing sum...\n");
-    test_sum();
+    test_sum(target);
    printf("################### Testing max...\n");
-    test_max();
+    test_max(target);
    printf("################### Testing average...\n");
-    test_average();
+    test_average(target);
    printf("################### Testing multiple dimensions...\n");
-    test_mult_dimensions();
+    test_mult_dimensions(target);
    printf("################### Testing multiple outputs...\n");
-    test_mult_outputs();
+    test_mult_outputs(target);

    printf("Tests: passed!\n");
    return 0;
--- a/core/iwasm/libraries/wasi-nn/wasi_nn.cmake
+++ b/core/iwasm/libraries/wasi-nn/wasi_nn.cmake
@ -5,6 +5,15 @@ set (WASI_NN_DIR ${CMAKE_CURRENT_LIST_DIR})

 add_definitions (-DWASM_ENABLE_WASI_NN=1)

-set (LIBC_WASI_NN_SOURCE ${WASI_NN_DIR}/wasi_nn_native.c ${WASI_NN_DIR}/wasi_nn_tensorflow.cpp)
+include_directories (${WASI_NN_DIR})
+include_directories (${WASI_NN_DIR}/src)
+include_directories (${WASI_NN_DIR}/src/utils)
+
+set (
+    LIBC_WASI_NN_SOURCE
+    ${WASI_NN_DIR}/src/wasi_nn.c
+    ${WASI_NN_DIR}/src/wasi_nn_tensorflowlite.cpp
+    ${WASI_NN_DIR}/src/utils/wasi_nn_app_native.c
+)

 set (TENSORFLOW_LIB tensorflow-lite)
--- a/core/iwasm/libraries/wasi-nn/wasi_nn.h
+++ b/core/iwasm/libraries/wasi-nn/wasi_nn.h
@ -3,63 +3,17 @@
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 */

-#ifndef WASI_NN_WASM_H
-#define WASI_NN_WASM_H
-
-#include "wasi_nn_common.h"
-
 /**
 * Following definition from:
- * [Aug 10th, 2022]
- * https://github.com/WebAssembly/wasi-nn/blob/e5e1a6c31f424c7cd63026cd270e9746775675a0/wasi-nn.wit.md
+ * [Oct 25th, 2022]
+ * https://github.com/WebAssembly/wasi-nn/blob/0f77c48ec195748990ff67928a4b3eef5f16c2de/wasi-nn.wit.md
 */

-/* The graph initialization data. */
+#ifndef WASI_NN_H
+#define WASI_NN_H

-// This consists of an array of buffers because implementing backends may encode
-// their graph IR in parts (e.g., OpenVINO stores its IR and weights
-// separately).
-typedef struct {
-    uint8_t *buf;
-    uint32_t size;
-} graph_builder;
-
-typedef struct {
-    graph_builder *buf;
-    uint32_t size;
-} graph_builder_array;
-
-/* The dimensions of a tensor. */
-
-// The array length matches the tensor rank and each element in the array
-// describes the size of each dimension.
-typedef struct {
-    uint32_t *buf;
-    uint32_t size;
-} tensor_dimensions;
-
-/* The tensor data. */
-
-// Initially conceived as a sparse representation, each empty cell would be
-// filled with zeros and the array length must match the product of all of the
-// dimensions and the number of bytes in the type (e.g., a 2x2 tensor with
-// 4-byte f32 elements would have a data array of length 16). Naturally, this
-// representation requires some knowledge of how to lay out data in
-// memory--e.g., using row-major ordering--and could perhaps be improved.
-typedef uint8_t *tensor_data;
-
-/* A tensor. */
-
-typedef struct {
-    // Describe the size of the tensor (e.g., 2x2x2x2 -> [2, 2, 2, 2]). To
-    // represent a tensor containing a single value, use `[1]` for the tensor
-    // dimensions.
-    tensor_dimensions *dimensions;
-    // Describe the type of element in the tensor (e.g., f32).
-    tensor_type type;
-    // Contains the tensor data.
-    tensor_data data;
-} tensor;
+#include <stdint.h>
+#include "wasi_nn_types.h"

 /**
 * @brief Load an opaque sequence of bytes to use for inference.
@ -67,25 +21,31 @@ typedef struct {
 * @param builder   Model builder.
 * @param encoding  Model encoding.
 * @param target    Execution target.
- * @param graph     Graph.
+ * @param g         Graph.
 * @return error    Execution status.
 */
 error
 load(graph_builder_array *builder, graph_encoding encoding,
-     execution_target target, graph *graph)
-    __attribute__((export_module("wasi_nn")))
+     execution_target target, graph *g)
    __attribute__((import_module("wasi_nn")));

+/**
+ * INFERENCE
+ *
+ */
+
+// Bind a `graph` to the input and output tensors for an inference.
+typedef uint32_t graph_execution_context;
+
 /**
 * @brief Create an execution instance of a loaded graph.
 *
- * @param graph     Graph.
+ * @param g         Graph.
 * @param ctx       Execution context.
 * @return error    Execution status.
 */
 error
-init_execution_context(graph graph, graph_execution_context *ctx)
-    __attribute__((export_module("wasi_nn")))
+init_execution_context(graph g, graph_execution_context *ctx)
    __attribute__((import_module("wasi_nn")));

 /**
@ -98,7 +58,6 @@ init_execution_context(graph graph, graph_execution_context *ctx)
 */
 error
 set_input(graph_execution_context ctx, uint32_t index, tensor *tensor)
-    __attribute__((export_module("wasi_nn")))
    __attribute__((import_module("wasi_nn")));

 /**
@ -108,8 +67,7 @@ set_input(graph_execution_context ctx, uint32_t index, tensor *tensor)
 * @return error    Execution status.
 */
 error
-compute(graph_execution_context ctx) __attribute__((export_module("wasi_nn")))
-__attribute__((import_module("wasi_nn")));
+compute(graph_execution_context ctx) __attribute__((import_module("wasi_nn")));

 /**
 * @brief Extract the outputs after inference.
@ -126,7 +84,6 @@ __attribute__((import_module("wasi_nn")));
 error
 get_output(graph_execution_context ctx, uint32_t index,
           tensor_data output_tensor, uint32_t *output_tensor_size)
-    __attribute__((export_module("wasi_nn")))
    __attribute__((import_module("wasi_nn")));

 #endif
--- a/core/iwasm/libraries/wasi-nn/wasi_nn_common.h
+++ b/core/iwasm/libraries/wasi-nn/wasi_nn_common.h
@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2019 Intel Corporation.  All rights reserved.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- */
-
-#ifndef WASI_NN_COMMON_H
-#define WASI_NN_COMMON_H
-
-#include <stdint.h>
-
-// The type of the elements in a tensor.
-typedef enum { fp16 = 0, fp32, up8, ip32 } tensor_type;
-
-// Describes the encoding of the graph. This allows the API to be implemented by
-// various backends that encode (i.e., serialize) their graph IR with different
-// formats.
-typedef enum { openvino = 0, onnx, tensorflow, pytorch } graph_encoding;
-
-// Define where the graph should be executed.
-typedef enum { cpu = 0, gpu, tpu } execution_target;
-
-// Error codes returned by functions in this API.
-typedef enum {
-    // No error occurred.
-    success = 0,
-    // Caller module passed an invalid argument.
-    invalid_argument,
-    // Invalid encoding.
-    invalid_encoding,
-    // Caller module is missing a memory export.
-    missing_memory,
-    // Device or resource busy.
-    busy,
-    // Runtime Error.
-    runtime_error,
-} error;
-
-// An execution graph for performing inference (i.e., a model).
-typedef uint32_t graph;
-
-// Bind a `graph` to the input and output tensors for an inference.
-typedef uint32_t graph_execution_context;
-
-#endif
--- a/core/iwasm/libraries/wasi-nn/wasi_nn_native.c
+++ b/core/iwasm/libraries/wasi-nn/wasi_nn_native.c
@ -1,264 +0,0 @@
-/*
- * Copyright (C) 2019 Intel Corporation.  All rights reserved.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- */
-
-#include <stdio.h>
-#include <assert.h>
-#include <errno.h>
-#include <string.h>
-#include <stdlib.h>
-
-#include "wasi_nn_common.h"
-#include "wasm_export.h"
-#include "bh_platform.h"
-
-#include "wasi_nn.h"
-#include "wasi_nn_tensorflow.hpp"
-#include "logger.h"
-
-/* Definition of 'wasi_nn.h' structs in WASM app format (using offset) */
-
-typedef struct {
-    uint32_t buf_offset;
-    uint32_t size;
-} graph_builder_wasm;
-
-typedef struct {
-    uint32_t buf_offset;
-    uint32_t size;
-} graph_builder_array_wasm;
-
-typedef struct {
-    uint32_t dimensions_offset;
-    tensor_type type;
-    uint32_t data_offset;
-} tensor_wasm;
-
-typedef struct {
-    uint32_t buf_offset;
-    uint32_t size;
-} tensor_dimensions_wasm;
-
-/* Global variables */
-
-static uint8_t _is_initialized;
-static graph_encoding _encoding;
-
-/* Utils */
-
-static error
-check_initialized()
-{
-    if (!_is_initialized) {
-        NN_ERR_PRINTF("Model not initialized.");
-        return invalid_argument;
-    }
-    if (_encoding != tensorflow) {
-        NN_ERR_PRINTF("Model encoding is not tensorflow.");
-        return invalid_argument;
-    }
-    return success;
-}
-
-/* WASI-NN implementation */
-
-error
-wasi_nn_load(wasm_exec_env_t exec_env, graph_builder_array_wasm *builder,
-             graph_encoding encoding, execution_target target, graph *graph)
-{
-    NN_DBG_PRINTF("Running wasi_nn_load [encoding=%d, target=%d]...", encoding,
-                  target);
-
-    wasm_module_inst_t instance = wasm_runtime_get_module_inst(exec_env);
-    bh_assert(instance);
-
-    if (!wasm_runtime_validate_native_addr(instance, builder,
-                                           sizeof(graph_builder_array_wasm)))
-        return invalid_argument;
-
-    if (!wasm_runtime_validate_app_addr(instance, builder->buf_offset,
-                                        builder->size * sizeof(uint32_t)))
-        return invalid_argument;
-
-    NN_DBG_PRINTF("Graph builder array contains %d elements", builder->size);
-
-    graph_builder_wasm *gb_wasm =
-        (graph_builder_wasm *)wasm_runtime_addr_app_to_native(
-            instance, builder->buf_offset);
-
-    graph_builder *gb_native = (graph_builder *)wasm_runtime_malloc(
-        builder->size * sizeof(graph_builder));
-    if (gb_native == NULL)
-        return missing_memory;
-
-    for (int i = 0; i < builder->size; ++i) {
-        if (!wasm_runtime_validate_app_addr(instance, gb_wasm[i].buf_offset,
-                                            gb_wasm[i].size
-                                                * sizeof(uint8_t))) {
-            wasm_runtime_free(gb_native);
-            return invalid_argument;
-        }
-
-        gb_native[i].buf = (uint8_t *)wasm_runtime_addr_app_to_native(
-            instance, gb_wasm[i].buf_offset);
-        gb_native[i].size = gb_wasm[i].size;
-
-        NN_DBG_PRINTF("Graph builder %d contains %d elements", i,
-                      gb_wasm[i].size);
-    }
-
-    graph_builder_array gba_native = { .buf = gb_native,
-                                       .size = builder->size };
-
-    if (!wasm_runtime_validate_native_addr(instance, graph, sizeof(graph))) {
-        wasm_runtime_free(gb_native);
-        return invalid_argument;
-    }
-
-    switch (encoding) {
-        case tensorflow:
-            break;
-        default:
-            NN_ERR_PRINTF("Only tensorflow is supported.");
-            wasm_runtime_free(gb_native);
-            return invalid_argument;
-    }
-
-    _encoding = encoding;
-    _is_initialized = 1;
-
-    error res = tensorflow_load(gba_native, _encoding, target, graph);
-    NN_DBG_PRINTF("wasi_nn_load finished with status %d [graph=%d]", res,
-                  *graph);
-
-    wasm_runtime_free(gb_native);
-    return res;
-}
-
-error
-wasi_nn_init_execution_context(wasm_exec_env_t exec_env, graph graph,
-                               graph_execution_context *ctx)
-{
-    NN_DBG_PRINTF("Running wasi_nn_init_execution_context [graph=%d]...",
-                  graph);
-    error res;
-    if (success != (res = check_initialized()))
-        return res;
-    res = tensorflow_init_execution_context(graph);
-    *ctx = graph;
-    NN_DBG_PRINTF(
-        "wasi_nn_init_execution_context finished with status %d [ctx=%d]", res,
-        *ctx);
-    return res;
-}
-
-error
-wasi_nn_set_input(wasm_exec_env_t exec_env, graph_execution_context ctx,
-                  uint32_t index, tensor_wasm *input_tensor)
-{
-    NN_DBG_PRINTF("Running wasi_nn_set_input [ctx=%d, index=%d]...", ctx,
-                  index);
-
-    error res;
-    if (success != (res = check_initialized()))
-        return res;
-
-    wasm_module_inst_t instance = wasm_runtime_get_module_inst(exec_env);
-    bh_assert(instance);
-
-    if (!wasm_runtime_validate_native_addr(instance, input_tensor,
-                                           sizeof(tensor_wasm)))
-        return invalid_argument;
-
-    if (!wasm_runtime_validate_app_addr(
-            instance, input_tensor->dimensions_offset, sizeof(uint32_t)))
-        return invalid_argument;
-
-    tensor_dimensions_wasm *dimensions_w =
-        (tensor_dimensions_wasm *)wasm_runtime_addr_app_to_native(
-            instance, input_tensor->dimensions_offset);
-
-    if (!wasm_runtime_validate_app_addr(instance, dimensions_w->buf_offset,
-                                        dimensions_w->size * sizeof(uint32_t)))
-        return invalid_argument;
-
-    tensor_dimensions dimensions = {
-        .buf = (uint32_t *)wasm_runtime_addr_app_to_native(
-            instance, dimensions_w->buf_offset),
-        .size = dimensions_w->size
-    };
-
-    NN_DBG_PRINTF("Number of dimensions: %d", dimensions.size);
-    int total_elements = 1;
-    for (int i = 0; i < dimensions.size; ++i) {
-        NN_DBG_PRINTF("Dimension %d: %d", i, dimensions.buf[i]);
-        total_elements *= dimensions.buf[i];
-    }
-    NN_DBG_PRINTF("Tensor type: %d", input_tensor->type);
-
-    if (!wasm_runtime_validate_app_addr(instance, input_tensor->data_offset,
-                                        total_elements))
-        return invalid_argument;
-
-    tensor tensor = { .type = input_tensor->type,
-                      .dimensions = &dimensions,
-                      .data = (uint8_t *)wasm_runtime_addr_app_to_native(
-                          instance, input_tensor->data_offset) };
-
-    res = tensorflow_set_input(ctx, index, &tensor);
-    NN_DBG_PRINTF("wasi_nn_set_input finished with status %d", res);
-    return res;
-}
-
-error
-wasi_nn_compute(wasm_exec_env_t exec_env, graph_execution_context ctx)
-{
-    NN_DBG_PRINTF("Running wasi_nn_compute [ctx=%d]...", ctx);
-    error res;
-    if (success != (res = check_initialized()))
-        return res;
-
-    res = tensorflow_compute(ctx);
-    NN_DBG_PRINTF("wasi_nn_compute finished with status %d", res);
-    return res;
-}
-
-error
-wasi_nn_get_output(wasm_exec_env_t exec_env, graph_execution_context ctx,
-                   uint32_t index, tensor_data output_tensor,
-                   uint32_t *output_tensor_size)
-{
-    NN_DBG_PRINTF("Running wasi_nn_get_output [ctx=%d, index=%d]...", ctx,
-                  index);
-    error res;
-    if (success != (res = check_initialized()))
-        return res;
-
-    res = tensorflow_get_output(ctx, index, output_tensor, output_tensor_size);
-    NN_DBG_PRINTF("wasi_nn_get_output finished with status %d [data_size=%d]",
-                  res, *output_tensor_size);
-    return res;
-}
-
-/* Register WASI-NN in WAMR */
-
-/* clang-format off */
-#define REG_NATIVE_FUNC(func_name, signature) \
-    { #func_name, wasi_nn_##func_name, signature, NULL }
-/* clang-format on */
-
-static NativeSymbol native_symbols_wasi_nn[] = {
-    REG_NATIVE_FUNC(load, "(*ii*)i"),
-    REG_NATIVE_FUNC(init_execution_context, "(i*)i"),
-    REG_NATIVE_FUNC(set_input, "(ii*)i"),
-    REG_NATIVE_FUNC(compute, "(i)i"),
-    REG_NATIVE_FUNC(get_output, "(ii**)i"),
-};
-
-uint32_t
-get_wasi_nn_export_apis(NativeSymbol **p_libc_wasi_apis)
-{
-    *p_libc_wasi_apis = native_symbols_wasi_nn;
-    return sizeof(native_symbols_wasi_nn) / sizeof(NativeSymbol);
-}
--- a/core/iwasm/libraries/wasi-nn/wasi_nn_tensorflow.hpp
+++ b/core/iwasm/libraries/wasi-nn/wasi_nn_tensorflow.hpp
@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2019 Intel Corporation.  All rights reserved.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- */
-
-#ifndef WASI_NN_TENSORFLOW_HPP
-#define WASI_NN_TENSORFLOW_HPP
-
-#include <stdio.h>
-
-#include "wasi_nn.h"
-#include "logger.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-error
-tensorflow_load(graph_builder_array builder, graph_encoding encoding,
-                execution_target target, graph *graph);
-
-error
-tensorflow_init_execution_context(graph graph);
-
-error
-tensorflow_set_input(graph_execution_context ctx, uint32_t index,
-                     tensor *input_tensor);
-
-error
-tensorflow_compute(graph_execution_context ctx);
-
-error
-tensorflow_get_output(graph_execution_context context, uint32_t index,
-                      tensor_data output_tensor, uint32_t *output_tensor_size);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
--- a/core/iwasm/libraries/wasi-nn/wasi_nn_types.h
+++ b/core/iwasm/libraries/wasi-nn/wasi_nn_types.h
@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2019 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef WASI_NN_TYPES_H
+#define WASI_NN_TYPES_H
+
+/**
+ * ERRORS
+ *
+ */
+
+// Error codes returned by functions in this API.
+typedef enum {
+    // No error occurred.
+    success = 0,
+    // Caller module passed an invalid argument.
+    invalid_argument,
+    // Invalid encoding.
+    invalid_encoding,
+    // Caller module is missing a memory export.
+    missing_memory,
+    // Device or resource busy.
+    busy,
+    // Runtime Error.
+    runtime_error,
+} error;
+
+/**
+ * TENSOR
+ *
+ */
+
+// The dimensions of a tensor.
+//
+// The array length matches the tensor rank and each element in the array
+// describes the size of each dimension.
+typedef struct {
+    uint32_t *buf;
+    uint32_t size;
+} tensor_dimensions;
+
+// The type of the elements in a tensor.
+typedef enum { fp16 = 0, fp32, up8, ip32 } tensor_type;
+
+// The tensor data.
+//
+// Initially conceived as a sparse representation, each empty cell would be
+// filled with zeros and the array length must match the product of all of the
+// dimensions and the number of bytes in the type (e.g., a 2x2 tensor with
+// 4-byte f32 elements would have a data array of length 16). Naturally, this
+// representation requires some knowledge of how to lay out data in
+// memory--e.g., using row-major ordering--and could perhaps be improved.
+typedef uint8_t *tensor_data;
+
+// A tensor.
+typedef struct {
+    // Describe the size of the tensor (e.g., 2x2x2x2 -> [2, 2, 2, 2]). To
+    // represent a tensor containing a single value, use `[1]` for the tensor
+    // dimensions.
+    tensor_dimensions *dimensions;
+    // Describe the type of element in the tensor (e.g., f32).
+    tensor_type type;
+    // Contains the tensor data.
+    tensor_data data;
+} tensor;
+
+/**
+ * GRAPH
+ *
+ */
+
+// The graph initialization data.
+//
+// This consists of an array of buffers because implementing backends may encode
+// their graph IR in parts (e.g., OpenVINO stores its IR and weights
+// separately).
+typedef struct {
+    uint8_t *buf;
+    uint32_t size;
+} graph_builder;
+
+typedef struct {
+    graph_builder *buf;
+    uint32_t size;
+} graph_builder_array;
+
+// An execution graph for performing inference (i.e., a model).
+typedef uint32_t graph;
+
+// Describes the encoding of the graph. This allows the API to be implemented by
+// various backends that encode (i.e., serialize) their graph IR with different
+// formats.
+typedef enum {
+    openvino = 0,
+    onnx,
+    tensorflow,
+    pytorch,
+    tensorflowlite
+} graph_encoding;
+
+// Define where the graph should be executed.
+typedef enum execution_target { cpu = 0, gpu, tpu } execution_target;
+
+#endif