wasi-nn: Support multiple TFLite models (#2002)
Remove restrictions:
- Only 1 WASM app at a time
- Only 1 model at a time
- `graph` and `graph-execution-context` are ignored
Refer to previous document:
e8d718096d/core/iwasm/libraries/wasi-nn/README.md
This commit is contained in:
@ -1,22 +0,0 @@
|
||||
# Copyright (C) 2019 Intel Corporation. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
FROM ubuntu:20.04 AS base
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
cmake build-essential git
|
||||
|
||||
WORKDIR /home/wamr
|
||||
|
||||
COPY . .
|
||||
|
||||
WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build
|
||||
|
||||
RUN cmake \
|
||||
-DWAMR_BUILD_WASI_NN=1 \
|
||||
-DTFLITE_ENABLE_GPU=ON \
|
||||
..
|
||||
|
||||
RUN make -j $(grep -c ^processor /proc/cpuinfo)
|
||||
@ -1,8 +1,27 @@
|
||||
# Copyright (C) 2019 Intel Corporation. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
FROM ubuntu:20.04
|
||||
FROM ubuntu:20.04 AS base
|
||||
|
||||
COPY --from=wasi-nn-base /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
cmake build-essential git
|
||||
|
||||
WORKDIR /home/wamr
|
||||
|
||||
COPY . .
|
||||
|
||||
WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build
|
||||
|
||||
RUN cmake \
|
||||
-DWAMR_BUILD_WASI_NN=1 \
|
||||
..
|
||||
|
||||
RUN make -j $(grep -c ^processor /proc/cpuinfo)
|
||||
|
||||
FROM ubuntu:22.04
|
||||
|
||||
COPY --from=base /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm
|
||||
|
||||
ENTRYPOINT [ "/run/iwasm" ]
|
||||
|
||||
@ -1,6 +1,26 @@
|
||||
# Copyright (C) 2019 Intel Corporation. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
FROM ubuntu:20.04 AS base
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
cmake build-essential git
|
||||
|
||||
WORKDIR /home/wamr
|
||||
|
||||
COPY . .
|
||||
|
||||
WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build
|
||||
|
||||
RUN cmake \
|
||||
-DWAMR_BUILD_WASI_NN=1 \
|
||||
-DWASI_NN_ENABLE_GPU=1 \
|
||||
..
|
||||
|
||||
RUN make -j $(grep -c ^processor /proc/cpuinfo)
|
||||
|
||||
FROM nvidia/cuda:11.3.0-runtime-ubuntu20.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
@ -15,6 +35,6 @@ RUN mkdir -p /etc/OpenCL/vendors && \
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
|
||||
COPY --from=wasi-nn-base /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm
|
||||
COPY --from=base /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm
|
||||
|
||||
ENTRYPOINT [ "/run/iwasm" ]
|
||||
|
||||
@ -7,8 +7,9 @@
|
||||
-Wl,--allow-undefined \
|
||||
-Wl,--strip-all,--no-entry \
|
||||
--sysroot=/opt/wasi-sdk/share/wasi-sysroot \
|
||||
-I.. \
|
||||
-o test_tensorflow.wasm test_tensorflow.c
|
||||
-I.. -I../src/utils \
|
||||
-o test_tensorflow.wasm \
|
||||
test_tensorflow.c utils.c
|
||||
|
||||
# TFLite models to use in the tests
|
||||
|
||||
|
||||
@ -5,185 +5,12 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
#include "wasi_nn.h"
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
|
||||
#define MAX_MODEL_SIZE 85000000
|
||||
#define MAX_OUTPUT_TENSOR_SIZE 200
|
||||
#define INPUT_TENSOR_DIMS 4
|
||||
#define EPSILON 1e-8
|
||||
|
||||
typedef struct {
|
||||
float *input_tensor;
|
||||
uint32_t *dim;
|
||||
uint32_t elements;
|
||||
} input_info;
|
||||
|
||||
// WASI-NN wrappers
|
||||
|
||||
error
|
||||
wasm_load(char *model_name, graph *g, execution_target target)
|
||||
{
|
||||
FILE *pFile = fopen(model_name, "r");
|
||||
if (pFile == NULL)
|
||||
return invalid_argument;
|
||||
|
||||
uint8_t *buffer;
|
||||
size_t result;
|
||||
|
||||
// allocate memory to contain the whole file:
|
||||
buffer = (uint8_t *)malloc(sizeof(uint8_t) * MAX_MODEL_SIZE);
|
||||
if (buffer == NULL) {
|
||||
fclose(pFile);
|
||||
return missing_memory;
|
||||
}
|
||||
|
||||
result = fread(buffer, 1, MAX_MODEL_SIZE, pFile);
|
||||
if (result <= 0) {
|
||||
fclose(pFile);
|
||||
free(buffer);
|
||||
return missing_memory;
|
||||
}
|
||||
|
||||
graph_builder_array arr;
|
||||
|
||||
arr.size = 1;
|
||||
arr.buf = (graph_builder *)malloc(sizeof(graph_builder));
|
||||
if (arr.buf == NULL) {
|
||||
fclose(pFile);
|
||||
free(buffer);
|
||||
return missing_memory;
|
||||
}
|
||||
|
||||
arr.buf[0].size = result;
|
||||
arr.buf[0].buf = buffer;
|
||||
|
||||
error res = load(&arr, tensorflowlite, target, g);
|
||||
|
||||
fclose(pFile);
|
||||
free(buffer);
|
||||
free(arr.buf);
|
||||
return res;
|
||||
}
|
||||
|
||||
error
|
||||
wasm_init_execution_context(graph g, graph_execution_context *ctx)
|
||||
{
|
||||
return init_execution_context(g, ctx);
|
||||
}
|
||||
|
||||
error
|
||||
wasm_set_input(graph_execution_context ctx, float *input_tensor, uint32_t *dim)
|
||||
{
|
||||
tensor_dimensions dims;
|
||||
dims.size = INPUT_TENSOR_DIMS;
|
||||
dims.buf = (uint32_t *)malloc(dims.size * sizeof(uint32_t));
|
||||
if (dims.buf == NULL)
|
||||
return missing_memory;
|
||||
|
||||
tensor tensor;
|
||||
tensor.dimensions = &dims;
|
||||
for (int i = 0; i < tensor.dimensions->size; ++i)
|
||||
tensor.dimensions->buf[i] = dim[i];
|
||||
tensor.type = fp32;
|
||||
tensor.data = (uint8_t *)input_tensor;
|
||||
error err = set_input(ctx, 0, &tensor);
|
||||
|
||||
free(dims.buf);
|
||||
return err;
|
||||
}
|
||||
|
||||
error
|
||||
wasm_compute(graph_execution_context ctx)
|
||||
{
|
||||
return compute(ctx);
|
||||
}
|
||||
|
||||
error
|
||||
wasm_get_output(graph_execution_context ctx, uint32_t index, float *out_tensor,
|
||||
uint32_t *out_size)
|
||||
{
|
||||
return get_output(ctx, index, (uint8_t *)out_tensor, out_size);
|
||||
}
|
||||
|
||||
// Inference
|
||||
|
||||
float *
|
||||
run_inference(execution_target target, float *input, uint32_t *input_size,
|
||||
uint32_t *output_size, char *model_name,
|
||||
uint32_t num_output_tensors)
|
||||
{
|
||||
graph graph;
|
||||
if (wasm_load(model_name, &graph, target) != success) {
|
||||
fprintf(stderr, "Error when loading model.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
graph_execution_context ctx;
|
||||
if (wasm_init_execution_context(graph, &ctx) != success) {
|
||||
fprintf(stderr, "Error when initialixing execution context.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (wasm_set_input(ctx, input, input_size) != success) {
|
||||
fprintf(stderr, "Error when setting input tensor.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (wasm_compute(ctx) != success) {
|
||||
fprintf(stderr, "Error when running inference.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
float *out_tensor = (float *)malloc(sizeof(float) * MAX_OUTPUT_TENSOR_SIZE);
|
||||
if (out_tensor == NULL) {
|
||||
fprintf(stderr, "Error when allocating memory for output tensor.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
uint32_t offset = 0;
|
||||
for (int i = 0; i < num_output_tensors; ++i) {
|
||||
*output_size = MAX_OUTPUT_TENSOR_SIZE - *output_size;
|
||||
if (wasm_get_output(ctx, i, &out_tensor[offset], output_size)
|
||||
!= success) {
|
||||
fprintf(stderr, "Error when getting output .");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
offset += *output_size;
|
||||
}
|
||||
*output_size = offset;
|
||||
return out_tensor;
|
||||
}
|
||||
|
||||
// UTILS
|
||||
|
||||
input_info
|
||||
create_input(int *dims)
|
||||
{
|
||||
input_info input = { .dim = NULL, .input_tensor = NULL, .elements = 1 };
|
||||
|
||||
input.dim = malloc(INPUT_TENSOR_DIMS * sizeof(uint32_t));
|
||||
if (input.dim)
|
||||
for (int i = 0; i < INPUT_TENSOR_DIMS; ++i) {
|
||||
input.dim[i] = dims[i];
|
||||
input.elements *= dims[i];
|
||||
}
|
||||
|
||||
input.input_tensor = malloc(input.elements * sizeof(float));
|
||||
for (int i = 0; i < input.elements; ++i)
|
||||
input.input_tensor[i] = i;
|
||||
|
||||
return input;
|
||||
}
|
||||
|
||||
// TESTS
|
||||
#include "utils.h"
|
||||
#include "logger.h"
|
||||
|
||||
void
|
||||
test_sum(execution_target target)
|
||||
@ -215,7 +42,7 @@ test_max(execution_target target)
|
||||
|
||||
assert(output_size == 1);
|
||||
assert(fabs(output[0] - 24.0) < EPSILON);
|
||||
printf("Result: max is %f\n", output[0]);
|
||||
NN_INFO_PRINTF("Result: max is %f", output[0]);
|
||||
|
||||
free(input.dim);
|
||||
free(input.input_tensor);
|
||||
@ -235,7 +62,7 @@ test_average(execution_target target)
|
||||
|
||||
assert(output_size == 1);
|
||||
assert(fabs(output[0] - 12.0) < EPSILON);
|
||||
printf("Result: average is %f\n", output[0]);
|
||||
NN_INFO_PRINTF("Result: average is %f", output[0]);
|
||||
|
||||
free(input.dim);
|
||||
free(input.input_tensor);
|
||||
@ -291,7 +118,7 @@ main()
|
||||
{
|
||||
char *env = getenv("TARGET");
|
||||
if (env == NULL) {
|
||||
printf("Usage:\n--env=\"TARGET=[cpu|gpu]\"\n");
|
||||
NN_INFO_PRINTF("Usage:\n--env=\"TARGET=[cpu|gpu]\"");
|
||||
return 1;
|
||||
}
|
||||
execution_target target;
|
||||
@ -300,20 +127,20 @@ main()
|
||||
else if (strcmp(env, "gpu") == 0)
|
||||
target = gpu;
|
||||
else {
|
||||
printf("Wrong target!");
|
||||
NN_ERR_PRINTF("Wrong target!");
|
||||
return 1;
|
||||
}
|
||||
printf("################### Testing sum...\n");
|
||||
NN_INFO_PRINTF("################### Testing sum...");
|
||||
test_sum(target);
|
||||
printf("################### Testing max...\n");
|
||||
NN_INFO_PRINTF("################### Testing max...");
|
||||
test_max(target);
|
||||
printf("################### Testing average...\n");
|
||||
NN_INFO_PRINTF("################### Testing average...");
|
||||
test_average(target);
|
||||
printf("################### Testing multiple dimensions...\n");
|
||||
NN_INFO_PRINTF("################### Testing multiple dimensions...");
|
||||
test_mult_dimensions(target);
|
||||
printf("################### Testing multiple outputs...\n");
|
||||
NN_INFO_PRINTF("################### Testing multiple outputs...");
|
||||
test_mult_outputs(target);
|
||||
|
||||
printf("Tests: passed!\n");
|
||||
NN_INFO_PRINTF("Tests: passed!");
|
||||
return 0;
|
||||
}
|
||||
|
||||
162
core/iwasm/libraries/wasi-nn/test/utils.c
Normal file
162
core/iwasm/libraries/wasi-nn/test/utils.c
Normal file
@ -0,0 +1,162 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
#include "utils.h"
|
||||
#include "logger.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
error
|
||||
wasm_load(char *model_name, graph *g, execution_target target)
|
||||
{
|
||||
FILE *pFile = fopen(model_name, "r");
|
||||
if (pFile == NULL)
|
||||
return invalid_argument;
|
||||
|
||||
uint8_t *buffer;
|
||||
size_t result;
|
||||
|
||||
// allocate memory to contain the whole file:
|
||||
buffer = (uint8_t *)malloc(sizeof(uint8_t) * MAX_MODEL_SIZE);
|
||||
if (buffer == NULL) {
|
||||
fclose(pFile);
|
||||
return missing_memory;
|
||||
}
|
||||
|
||||
result = fread(buffer, 1, MAX_MODEL_SIZE, pFile);
|
||||
if (result <= 0) {
|
||||
fclose(pFile);
|
||||
free(buffer);
|
||||
return missing_memory;
|
||||
}
|
||||
|
||||
graph_builder_array arr;
|
||||
|
||||
arr.size = 1;
|
||||
arr.buf = (graph_builder *)malloc(sizeof(graph_builder));
|
||||
if (arr.buf == NULL) {
|
||||
fclose(pFile);
|
||||
free(buffer);
|
||||
return missing_memory;
|
||||
}
|
||||
|
||||
arr.buf[0].size = result;
|
||||
arr.buf[0].buf = buffer;
|
||||
|
||||
error res = load(&arr, tensorflowlite, target, g);
|
||||
|
||||
fclose(pFile);
|
||||
free(buffer);
|
||||
free(arr.buf);
|
||||
return res;
|
||||
}
|
||||
|
||||
error
|
||||
wasm_init_execution_context(graph g, graph_execution_context *ctx)
|
||||
{
|
||||
return init_execution_context(g, ctx);
|
||||
}
|
||||
|
||||
error
|
||||
wasm_set_input(graph_execution_context ctx, float *input_tensor, uint32_t *dim)
|
||||
{
|
||||
tensor_dimensions dims;
|
||||
dims.size = INPUT_TENSOR_DIMS;
|
||||
dims.buf = (uint32_t *)malloc(dims.size * sizeof(uint32_t));
|
||||
if (dims.buf == NULL)
|
||||
return missing_memory;
|
||||
|
||||
tensor tensor;
|
||||
tensor.dimensions = &dims;
|
||||
for (int i = 0; i < tensor.dimensions->size; ++i)
|
||||
tensor.dimensions->buf[i] = dim[i];
|
||||
tensor.type = fp32;
|
||||
tensor.data = (uint8_t *)input_tensor;
|
||||
error err = set_input(ctx, 0, &tensor);
|
||||
|
||||
free(dims.buf);
|
||||
return err;
|
||||
}
|
||||
|
||||
error
|
||||
wasm_compute(graph_execution_context ctx)
|
||||
{
|
||||
return compute(ctx);
|
||||
}
|
||||
|
||||
error
|
||||
wasm_get_output(graph_execution_context ctx, uint32_t index, float *out_tensor,
|
||||
uint32_t *out_size)
|
||||
{
|
||||
return get_output(ctx, index, (uint8_t *)out_tensor, out_size);
|
||||
}
|
||||
|
||||
float *
|
||||
run_inference(execution_target target, float *input, uint32_t *input_size,
|
||||
uint32_t *output_size, char *model_name,
|
||||
uint32_t num_output_tensors)
|
||||
{
|
||||
graph graph;
|
||||
if (wasm_load(model_name, &graph, target) != success) {
|
||||
NN_ERR_PRINTF("Error when loading model.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
graph_execution_context ctx;
|
||||
if (wasm_init_execution_context(graph, &ctx) != success) {
|
||||
NN_ERR_PRINTF("Error when initialixing execution context.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (wasm_set_input(ctx, input, input_size) != success) {
|
||||
NN_ERR_PRINTF("Error when setting input tensor.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (wasm_compute(ctx) != success) {
|
||||
NN_ERR_PRINTF("Error when running inference.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
float *out_tensor = (float *)malloc(sizeof(float) * MAX_OUTPUT_TENSOR_SIZE);
|
||||
if (out_tensor == NULL) {
|
||||
NN_ERR_PRINTF("Error when allocating memory for output tensor.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
uint32_t offset = 0;
|
||||
for (int i = 0; i < num_output_tensors; ++i) {
|
||||
*output_size = MAX_OUTPUT_TENSOR_SIZE - *output_size;
|
||||
if (wasm_get_output(ctx, i, &out_tensor[offset], output_size)
|
||||
!= success) {
|
||||
NN_ERR_PRINTF("Error when getting output.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
offset += *output_size;
|
||||
}
|
||||
*output_size = offset;
|
||||
return out_tensor;
|
||||
}
|
||||
|
||||
input_info
|
||||
create_input(int *dims)
|
||||
{
|
||||
input_info input = { .dim = NULL, .input_tensor = NULL, .elements = 1 };
|
||||
|
||||
input.dim = malloc(INPUT_TENSOR_DIMS * sizeof(uint32_t));
|
||||
if (input.dim)
|
||||
for (int i = 0; i < INPUT_TENSOR_DIMS; ++i) {
|
||||
input.dim[i] = dims[i];
|
||||
input.elements *= dims[i];
|
||||
}
|
||||
|
||||
input.input_tensor = malloc(input.elements * sizeof(float));
|
||||
for (int i = 0; i < input.elements; ++i)
|
||||
input.input_tensor[i] = i;
|
||||
|
||||
return input;
|
||||
}
|
||||
52
core/iwasm/libraries/wasi-nn/test/utils.h
Normal file
52
core/iwasm/libraries/wasi-nn/test/utils.h
Normal file
@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
#ifndef WASI_NN_UTILS
|
||||
#define WASI_NN_UTILS
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "wasi_nn.h"
|
||||
|
||||
#define MAX_MODEL_SIZE 85000000
|
||||
#define MAX_OUTPUT_TENSOR_SIZE 200
|
||||
#define INPUT_TENSOR_DIMS 4
|
||||
#define EPSILON 1e-8
|
||||
|
||||
typedef struct {
|
||||
float *input_tensor;
|
||||
uint32_t *dim;
|
||||
uint32_t elements;
|
||||
} input_info;
|
||||
|
||||
/* wasi-nn wrappers */
|
||||
|
||||
error
|
||||
wasm_load(char *model_name, graph *g, execution_target target);
|
||||
|
||||
error
|
||||
wasm_init_execution_context(graph g, graph_execution_context *ctx);
|
||||
|
||||
error
|
||||
wasm_set_input(graph_execution_context ctx, float *input_tensor, uint32_t *dim);
|
||||
|
||||
error
|
||||
wasm_compute(graph_execution_context ctx);
|
||||
|
||||
error
|
||||
wasm_get_output(graph_execution_context ctx, uint32_t index, float *out_tensor,
|
||||
uint32_t *out_size);
|
||||
|
||||
/* Utils */
|
||||
|
||||
float *
|
||||
run_inference(execution_target target, float *input, uint32_t *input_size,
|
||||
uint32_t *output_size, char *model_name,
|
||||
uint32_t num_output_tensors);
|
||||
|
||||
input_info
|
||||
create_input(int *dims);
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user