wasi-nn: Support multiple TFLite models (#2002)

Remove restrictions: - Only 1 WASM app at a time - Only 1 model at a time - `graph` and `graph-execution-context` are ignored Refer to previous document: e8d718096d/core/iwasm/libraries/wasi-nn/README.md
2023-03-08 08:54:06 +01:00
parent f279ba84ee
commit a15a731e12
16 changed files with 570 additions and 349 deletions
--- a/core/iwasm/libraries/wasi-nn/test/Dockerfile.base
+++ b/core/iwasm/libraries/wasi-nn/test/Dockerfile.base
@ -1,22 +0,0 @@
-# Copyright (C) 2019 Intel Corporation.  All rights reserved.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-FROM ubuntu:20.04 AS base
-
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN apt-get update && apt-get install -y \
-    cmake build-essential git
-
-WORKDIR /home/wamr
-
-COPY . .
-
-WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build
-
-RUN cmake \
-  -DWAMR_BUILD_WASI_NN=1 \
-  -DTFLITE_ENABLE_GPU=ON \
-  ..
-
-RUN make -j $(grep -c ^processor /proc/cpuinfo)
--- a/core/iwasm/libraries/wasi-nn/test/Dockerfile.cpu
+++ b/core/iwasm/libraries/wasi-nn/test/Dockerfile.cpu
@ -1,8 +1,27 @@
 # Copyright (C) 2019 Intel Corporation.  All rights reserved.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

-FROM ubuntu:20.04
+FROM ubuntu:20.04 AS base

-COPY --from=wasi-nn-base /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && apt-get install -y \
+    cmake build-essential git
+
+WORKDIR /home/wamr
+
+COPY . .
+
+WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build
+
+RUN cmake \
+  -DWAMR_BUILD_WASI_NN=1 \
+  ..
+
+RUN make -j $(grep -c ^processor /proc/cpuinfo)
+
+FROM ubuntu:22.04
+
+COPY --from=base /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm

 ENTRYPOINT [ "/run/iwasm" ]
--- a/core/iwasm/libraries/wasi-nn/test/Dockerfile.nvidia-gpu
+++ b/core/iwasm/libraries/wasi-nn/test/Dockerfile.nvidia-gpu
@ -1,6 +1,26 @@
 # Copyright (C) 2019 Intel Corporation.  All rights reserved.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

+FROM ubuntu:20.04 AS base
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && apt-get install -y \
+    cmake build-essential git
+
+WORKDIR /home/wamr
+
+COPY . .
+
+WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build
+
+RUN cmake \
+  -DWAMR_BUILD_WASI_NN=1 \
+  -DWASI_NN_ENABLE_GPU=1 \
+  ..
+
+RUN make -j $(grep -c ^processor /proc/cpuinfo)
+
 FROM nvidia/cuda:11.3.0-runtime-ubuntu20.04

 RUN apt-get update && apt-get install -y --no-install-recommends \
@ -15,6 +35,6 @@ RUN mkdir -p /etc/OpenCL/vendors && \
 ENV NVIDIA_VISIBLE_DEVICES=all
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility

-COPY --from=wasi-nn-base /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm
+COPY --from=base /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm

 ENTRYPOINT [ "/run/iwasm" ]
--- a/core/iwasm/libraries/wasi-nn/test/build.sh
+++ b/core/iwasm/libraries/wasi-nn/test/build.sh
@ -7,8 +7,9 @@
    -Wl,--allow-undefined \
    -Wl,--strip-all,--no-entry \
    --sysroot=/opt/wasi-sdk/share/wasi-sysroot \
-    -I.. \
-    -o test_tensorflow.wasm test_tensorflow.c
+    -I.. -I../src/utils \
+    -o test_tensorflow.wasm \
+    test_tensorflow.c utils.c

 # TFLite models to use in the tests

--- a/core/iwasm/libraries/wasi-nn/test/test_tensorflow.c
+++ b/core/iwasm/libraries/wasi-nn/test/test_tensorflow.c
@ -5,185 +5,12 @@

 #include <stdio.h>
 #include <stdlib.h>
-#include <string.h>
-#include <stdint.h>
-#include <math.h>
 #include <assert.h>
-#include "wasi_nn.h"
+#include <string.h>
+#include <math.h>

-#include <fcntl.h>
-#include <errno.h>
-
-#define MAX_MODEL_SIZE 85000000
-#define MAX_OUTPUT_TENSOR_SIZE 200
-#define INPUT_TENSOR_DIMS 4
-#define EPSILON 1e-8
-
-typedef struct {
-    float *input_tensor;
-    uint32_t *dim;
-    uint32_t elements;
-} input_info;
-
-// WASI-NN wrappers
-
-error
-wasm_load(char *model_name, graph *g, execution_target target)
-{
-    FILE *pFile = fopen(model_name, "r");
-    if (pFile == NULL)
-        return invalid_argument;
-
-    uint8_t *buffer;
-    size_t result;
-
-    // allocate memory to contain the whole file:
-    buffer = (uint8_t *)malloc(sizeof(uint8_t) * MAX_MODEL_SIZE);
-    if (buffer == NULL) {
-        fclose(pFile);
-        return missing_memory;
-    }
-
-    result = fread(buffer, 1, MAX_MODEL_SIZE, pFile);
-    if (result <= 0) {
-        fclose(pFile);
-        free(buffer);
-        return missing_memory;
-    }
-
-    graph_builder_array arr;
-
-    arr.size = 1;
-    arr.buf = (graph_builder *)malloc(sizeof(graph_builder));
-    if (arr.buf == NULL) {
-        fclose(pFile);
-        free(buffer);
-        return missing_memory;
-    }
-
-    arr.buf[0].size = result;
-    arr.buf[0].buf = buffer;
-
-    error res = load(&arr, tensorflowlite, target, g);
-
-    fclose(pFile);
-    free(buffer);
-    free(arr.buf);
-    return res;
-}
-
-error
-wasm_init_execution_context(graph g, graph_execution_context *ctx)
-{
-    return init_execution_context(g, ctx);
-}
-
-error
-wasm_set_input(graph_execution_context ctx, float *input_tensor, uint32_t *dim)
-{
-    tensor_dimensions dims;
-    dims.size = INPUT_TENSOR_DIMS;
-    dims.buf = (uint32_t *)malloc(dims.size * sizeof(uint32_t));
-    if (dims.buf == NULL)
-        return missing_memory;
-
-    tensor tensor;
-    tensor.dimensions = &dims;
-    for (int i = 0; i < tensor.dimensions->size; ++i)
-        tensor.dimensions->buf[i] = dim[i];
-    tensor.type = fp32;
-    tensor.data = (uint8_t *)input_tensor;
-    error err = set_input(ctx, 0, &tensor);
-
-    free(dims.buf);
-    return err;
-}
-
-error
-wasm_compute(graph_execution_context ctx)
-{
-    return compute(ctx);
-}
-
-error
-wasm_get_output(graph_execution_context ctx, uint32_t index, float *out_tensor,
-                uint32_t *out_size)
-{
-    return get_output(ctx, index, (uint8_t *)out_tensor, out_size);
-}
-
-// Inference
-
-float *
-run_inference(execution_target target, float *input, uint32_t *input_size,
-              uint32_t *output_size, char *model_name,
-              uint32_t num_output_tensors)
-{
-    graph graph;
-    if (wasm_load(model_name, &graph, target) != success) {
-        fprintf(stderr, "Error when loading model.");
-        exit(1);
-    }
-
-    graph_execution_context ctx;
-    if (wasm_init_execution_context(graph, &ctx) != success) {
-        fprintf(stderr, "Error when initialixing execution context.");
-        exit(1);
-    }
-
-    if (wasm_set_input(ctx, input, input_size) != success) {
-        fprintf(stderr, "Error when setting input tensor.");
-        exit(1);
-    }
-
-    if (wasm_compute(ctx) != success) {
-        fprintf(stderr, "Error when running inference.");
-        exit(1);
-    }
-
-    float *out_tensor = (float *)malloc(sizeof(float) * MAX_OUTPUT_TENSOR_SIZE);
-    if (out_tensor == NULL) {
-        fprintf(stderr, "Error when allocating memory for output tensor.");
-        exit(1);
-    }
-
-    uint32_t offset = 0;
-    for (int i = 0; i < num_output_tensors; ++i) {
-        *output_size = MAX_OUTPUT_TENSOR_SIZE - *output_size;
-        if (wasm_get_output(ctx, i, &out_tensor[offset], output_size)
-            != success) {
-            fprintf(stderr, "Error when getting output .");
-            exit(1);
-        }
-
-        offset += *output_size;
-    }
-    *output_size = offset;
-    return out_tensor;
-}
-
-// UTILS
-
-input_info
-create_input(int *dims)
-{
-    input_info input = { .dim = NULL, .input_tensor = NULL, .elements = 1 };
-
-    input.dim = malloc(INPUT_TENSOR_DIMS * sizeof(uint32_t));
-    if (input.dim)
-        for (int i = 0; i < INPUT_TENSOR_DIMS; ++i) {
-            input.dim[i] = dims[i];
-            input.elements *= dims[i];
-        }
-
-    input.input_tensor = malloc(input.elements * sizeof(float));
-    for (int i = 0; i < input.elements; ++i)
-        input.input_tensor[i] = i;
-
-    return input;
-}
-
-// TESTS
+#include "utils.h"
+#include "logger.h"

 void
 test_sum(execution_target target)
@ -215,7 +42,7 @@ test_max(execution_target target)

    assert(output_size == 1);
    assert(fabs(output[0] - 24.0) < EPSILON);
-    printf("Result: max is %f\n", output[0]);
+    NN_INFO_PRINTF("Result: max is %f", output[0]);

    free(input.dim);
    free(input.input_tensor);
@ -235,7 +62,7 @@ test_average(execution_target target)

    assert(output_size == 1);
    assert(fabs(output[0] - 12.0) < EPSILON);
-    printf("Result: average is %f\n", output[0]);
+    NN_INFO_PRINTF("Result: average is %f", output[0]);

    free(input.dim);
    free(input.input_tensor);
@ -291,7 +118,7 @@ main()
 {
    char *env = getenv("TARGET");
    if (env == NULL) {
-        printf("Usage:\n--env=\"TARGET=[cpu|gpu]\"\n");
+        NN_INFO_PRINTF("Usage:\n--env=\"TARGET=[cpu|gpu]\"");
        return 1;
    }
    execution_target target;
@ -300,20 +127,20 @@ main()
    else if (strcmp(env, "gpu") == 0)
        target = gpu;
    else {
-        printf("Wrong target!");
+        NN_ERR_PRINTF("Wrong target!");
        return 1;
    }
-    printf("################### Testing sum...\n");
+    NN_INFO_PRINTF("################### Testing sum...");
    test_sum(target);
-    printf("################### Testing max...\n");
+    NN_INFO_PRINTF("################### Testing max...");
    test_max(target);
-    printf("################### Testing average...\n");
+    NN_INFO_PRINTF("################### Testing average...");
    test_average(target);
-    printf("################### Testing multiple dimensions...\n");
+    NN_INFO_PRINTF("################### Testing multiple dimensions...");
    test_mult_dimensions(target);
-    printf("################### Testing multiple outputs...\n");
+    NN_INFO_PRINTF("################### Testing multiple outputs...");
    test_mult_outputs(target);

-    printf("Tests: passed!\n");
+    NN_INFO_PRINTF("Tests: passed!");
    return 0;
 }
--- a/core/iwasm/libraries/wasi-nn/test/utils.c
+++ b/core/iwasm/libraries/wasi-nn/test/utils.c
@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2019 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "utils.h"
+#include "logger.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+error
+wasm_load(char *model_name, graph *g, execution_target target)
+{
+    FILE *pFile = fopen(model_name, "r");
+    if (pFile == NULL)
+        return invalid_argument;
+
+    uint8_t *buffer;
+    size_t result;
+
+    // allocate memory to contain the whole file:
+    buffer = (uint8_t *)malloc(sizeof(uint8_t) * MAX_MODEL_SIZE);
+    if (buffer == NULL) {
+        fclose(pFile);
+        return missing_memory;
+    }
+
+    result = fread(buffer, 1, MAX_MODEL_SIZE, pFile);
+    if (result <= 0) {
+        fclose(pFile);
+        free(buffer);
+        return missing_memory;
+    }
+
+    graph_builder_array arr;
+
+    arr.size = 1;
+    arr.buf = (graph_builder *)malloc(sizeof(graph_builder));
+    if (arr.buf == NULL) {
+        fclose(pFile);
+        free(buffer);
+        return missing_memory;
+    }
+
+    arr.buf[0].size = result;
+    arr.buf[0].buf = buffer;
+
+    error res = load(&arr, tensorflowlite, target, g);
+
+    fclose(pFile);
+    free(buffer);
+    free(arr.buf);
+    return res;
+}
+
+error
+wasm_init_execution_context(graph g, graph_execution_context *ctx)
+{
+    return init_execution_context(g, ctx);
+}
+
+error
+wasm_set_input(graph_execution_context ctx, float *input_tensor, uint32_t *dim)
+{
+    tensor_dimensions dims;
+    dims.size = INPUT_TENSOR_DIMS;
+    dims.buf = (uint32_t *)malloc(dims.size * sizeof(uint32_t));
+    if (dims.buf == NULL)
+        return missing_memory;
+
+    tensor tensor;
+    tensor.dimensions = &dims;
+    for (int i = 0; i < tensor.dimensions->size; ++i)
+        tensor.dimensions->buf[i] = dim[i];
+    tensor.type = fp32;
+    tensor.data = (uint8_t *)input_tensor;
+    error err = set_input(ctx, 0, &tensor);
+
+    free(dims.buf);
+    return err;
+}
+
+error
+wasm_compute(graph_execution_context ctx)
+{
+    return compute(ctx);
+}
+
+error
+wasm_get_output(graph_execution_context ctx, uint32_t index, float *out_tensor,
+                uint32_t *out_size)
+{
+    return get_output(ctx, index, (uint8_t *)out_tensor, out_size);
+}
+
+float *
+run_inference(execution_target target, float *input, uint32_t *input_size,
+              uint32_t *output_size, char *model_name,
+              uint32_t num_output_tensors)
+{
+    graph graph;
+    if (wasm_load(model_name, &graph, target) != success) {
+        NN_ERR_PRINTF("Error when loading model.");
+        exit(1);
+    }
+
+    graph_execution_context ctx;
+    if (wasm_init_execution_context(graph, &ctx) != success) {
+        NN_ERR_PRINTF("Error when initialixing execution context.");
+        exit(1);
+    }
+
+    if (wasm_set_input(ctx, input, input_size) != success) {
+        NN_ERR_PRINTF("Error when setting input tensor.");
+        exit(1);
+    }
+
+    if (wasm_compute(ctx) != success) {
+        NN_ERR_PRINTF("Error when running inference.");
+        exit(1);
+    }
+
+    float *out_tensor = (float *)malloc(sizeof(float) * MAX_OUTPUT_TENSOR_SIZE);
+    if (out_tensor == NULL) {
+        NN_ERR_PRINTF("Error when allocating memory for output tensor.");
+        exit(1);
+    }
+
+    uint32_t offset = 0;
+    for (int i = 0; i < num_output_tensors; ++i) {
+        *output_size = MAX_OUTPUT_TENSOR_SIZE - *output_size;
+        if (wasm_get_output(ctx, i, &out_tensor[offset], output_size)
+            != success) {
+            NN_ERR_PRINTF("Error when getting output.");
+            exit(1);
+        }
+
+        offset += *output_size;
+    }
+    *output_size = offset;
+    return out_tensor;
+}
+
+input_info
+create_input(int *dims)
+{
+    input_info input = { .dim = NULL, .input_tensor = NULL, .elements = 1 };
+
+    input.dim = malloc(INPUT_TENSOR_DIMS * sizeof(uint32_t));
+    if (input.dim)
+        for (int i = 0; i < INPUT_TENSOR_DIMS; ++i) {
+            input.dim[i] = dims[i];
+            input.elements *= dims[i];
+        }
+
+    input.input_tensor = malloc(input.elements * sizeof(float));
+    for (int i = 0; i < input.elements; ++i)
+        input.input_tensor[i] = i;
+
+    return input;
+}
--- a/core/iwasm/libraries/wasi-nn/test/utils.h
+++ b/core/iwasm/libraries/wasi-nn/test/utils.h
@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2019 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef WASI_NN_UTILS
+#define WASI_NN_UTILS
+
+#include <stdint.h>
+
+#include "wasi_nn.h"
+
+#define MAX_MODEL_SIZE 85000000
+#define MAX_OUTPUT_TENSOR_SIZE 200
+#define INPUT_TENSOR_DIMS 4
+#define EPSILON 1e-8
+
+typedef struct {
+    float *input_tensor;
+    uint32_t *dim;
+    uint32_t elements;
+} input_info;
+
+/* wasi-nn wrappers */
+
+error
+wasm_load(char *model_name, graph *g, execution_target target);
+
+error
+wasm_init_execution_context(graph g, graph_execution_context *ctx);
+
+error
+wasm_set_input(graph_execution_context ctx, float *input_tensor, uint32_t *dim);
+
+error
+wasm_compute(graph_execution_context ctx);
+
+error
+wasm_get_output(graph_execution_context ctx, uint32_t index, float *out_tensor,
+                uint32_t *out_size);
+
+/* Utils */
+
+float *
+run_inference(execution_target target, float *input, uint32_t *input_size,
+              uint32_t *output_size, char *model_name,
+              uint32_t num_output_tensors);
+
+input_info
+create_input(int *dims);
+
+#endif