Merge branch main into dev/wasi-libc-windows
This commit is contained in:
@ -23,6 +23,7 @@ include(FetchContent)
|
||||
set(RATS_BUILD_MODE "sgx"
|
||||
CACHE INTERNAL "Select build mode for librats(host|occlum|sgx|wasm)")
|
||||
set(RATS_INSTALL_PATH "${CMAKE_BINARY_DIR}/librats" CACHE INTERNAL "")
|
||||
set(BUILD_SAMPLES OFF)
|
||||
|
||||
FetchContent_Declare(
|
||||
librats
|
||||
@ -34,8 +35,17 @@ if (NOT librats_POPULATED)
|
||||
message("-- Fetching librats ..")
|
||||
FetchContent_Populate(librats)
|
||||
include_directories("${librats_SOURCE_DIR}/include")
|
||||
|
||||
# Prevent the propagation of the CMAKE_C_FLAGS of WAMR into librats
|
||||
set(SAVED_CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
|
||||
set(CMAKE_C_FLAGS "")
|
||||
|
||||
# Import the building scripts of librats
|
||||
add_subdirectory(${librats_SOURCE_DIR} ${librats_BINARY_DIR} EXCLUDE_FROM_ALL)
|
||||
|
||||
# Restore the CMAKE_C_FLAGS of WAMR
|
||||
set(CMAKE_C_FLAGS ${SAVED_CMAKE_C_FLAGS})
|
||||
|
||||
endif()
|
||||
|
||||
file (GLOB source_all ${LIB_RATS_DIR}/*.c)
|
||||
|
||||
@ -9,8 +9,32 @@ set -eo pipefail
|
||||
CC=${CC:=/opt/wasi-sdk/bin/clang}
|
||||
WAMR_DIR=../../../../..
|
||||
|
||||
show_usage() {
|
||||
echo "Usage: $0 [--sysroot PATH_TO_SYSROOT]"
|
||||
echo "--sysroot PATH_TO_SYSROOT specify to build with custom sysroot for wasi-libc"
|
||||
}
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
key="$1"
|
||||
case $key in
|
||||
--sysroot)
|
||||
sysroot_path="$2"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
--help)
|
||||
show_usage
|
||||
exit
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Stress tests names
|
||||
thread_start_file_exclusions=("spawn_stress_test.wasm" "linear_memory_size_update.wasm")
|
||||
thread_start_file_exclusions=("spawn_stress_test.wasm" "linear_memory_size_update.wasm" "stress_test_threads_creation.wasm")
|
||||
|
||||
for test_c in *.c; do
|
||||
test_wasm="$(basename $test_c .c).wasm"
|
||||
@ -21,9 +45,18 @@ for test_c in *.c; do
|
||||
thread_start_file=$WAMR_DIR/samples/wasi-threads/wasm-apps/wasi_thread_start.S
|
||||
fi
|
||||
|
||||
if [[ -n "$sysroot_path" ]]; then
|
||||
if [ ! -d "$sysroot_path" ]; then
|
||||
echo "Directory $sysroot_path doesn't exist. Aborting"
|
||||
exit 1
|
||||
fi
|
||||
sysroot_command="--sysroot $sysroot_path"
|
||||
fi
|
||||
|
||||
echo "Compiling $test_c to $test_wasm"
|
||||
$CC \
|
||||
-target wasm32-wasi-threads \
|
||||
-O2 \
|
||||
-pthread -ftls-model=local-exec \
|
||||
-z stack-size=32768 \
|
||||
-Wl,--export=__heap_base \
|
||||
@ -33,6 +66,7 @@ for test_c in *.c; do
|
||||
-Wl,--export=malloc \
|
||||
-Wl,--export=free \
|
||||
-I $WAMR_DIR/samples/wasi-threads/wasm-apps \
|
||||
$sysroot_command \
|
||||
$thread_start_file \
|
||||
$test_c -o $test_wasm
|
||||
done
|
||||
@ -1,5 +1,6 @@
|
||||
{
|
||||
"lib-wasi-threads tests": {
|
||||
"spawn_stress_test": "Stress tests are incompatible with the other part and executed differently"
|
||||
"spawn_stress_test": "Stress tests are incompatible with the other part and executed differently",
|
||||
"stress_test_threads_creation": "Stress tests are incompatible with the other part and executed differently"
|
||||
}
|
||||
}
|
||||
|
||||
@ -18,8 +18,9 @@
|
||||
|
||||
enum CONSTANTS {
|
||||
NUM_ITER = 100000,
|
||||
NUM_RETRY = 5,
|
||||
NUM_RETRY = 8,
|
||||
MAX_NUM_THREADS = 8,
|
||||
RETRY_SLEEP_TIME_US = 2000,
|
||||
};
|
||||
|
||||
unsigned prime_numbers_count = 0;
|
||||
@ -62,11 +63,13 @@ void
|
||||
spawn_thread(pthread_t *thread, unsigned int *arg)
|
||||
{
|
||||
int status_code = -1;
|
||||
int timeout_us = RETRY_SLEEP_TIME_US;
|
||||
for (int tries = 0; status_code != 0 && tries < NUM_RETRY; ++tries) {
|
||||
status_code = pthread_create(thread, NULL, &check_if_prime, arg);
|
||||
assert(status_code == 0 || status_code == EAGAIN);
|
||||
if (status_code == EAGAIN) {
|
||||
usleep(2000);
|
||||
usleep(timeout_us);
|
||||
timeout_us *= 2;
|
||||
}
|
||||
}
|
||||
|
||||
@ -95,7 +98,7 @@ main(int argc, char **argv)
|
||||
|
||||
args[thread_num] = factorised_number;
|
||||
|
||||
usleep(2000);
|
||||
usleep(RETRY_SLEEP_TIME_US);
|
||||
spawn_thread(&threads[thread_num], &args[thread_num]);
|
||||
assert(threads[thread_num] != 0);
|
||||
}
|
||||
|
||||
@ -0,0 +1,93 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <pthread.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
||||
enum CONSTANTS {
|
||||
NUM_ITER = 200000,
|
||||
NUM_RETRY = 8,
|
||||
MAX_NUM_THREADS = 8,
|
||||
RETRY_SLEEP_TIME_US = 4000,
|
||||
SECOND = 1000 * 1000 * 1000
|
||||
};
|
||||
|
||||
int threads_executed = 0;
|
||||
unsigned int threads_creation_tried = 0;
|
||||
unsigned int threads_in_use = 0;
|
||||
|
||||
void *
|
||||
thread_func(void *arg)
|
||||
{
|
||||
(void)(arg);
|
||||
__atomic_fetch_add(&threads_executed, 1, __ATOMIC_RELAXED);
|
||||
__atomic_fetch_sub(&threads_in_use, 1, __ATOMIC_SEQ_CST);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
spawn_thread(pthread_t *thread)
|
||||
{
|
||||
int status_code = -1;
|
||||
int timeout_us = RETRY_SLEEP_TIME_US;
|
||||
for (int tries = 0; status_code != 0 && tries < NUM_RETRY; ++tries) {
|
||||
status_code = pthread_create(thread, NULL, &thread_func, NULL);
|
||||
__atomic_fetch_add(&threads_creation_tried, 1, __ATOMIC_RELAXED);
|
||||
|
||||
assert(status_code == 0 || status_code == EAGAIN);
|
||||
if (status_code == EAGAIN) {
|
||||
usleep(timeout_us);
|
||||
timeout_us *= 2;
|
||||
}
|
||||
}
|
||||
|
||||
assert(status_code == 0 && "Thread creation should succeed");
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
double percentage = 0.1;
|
||||
|
||||
for (int iter = 0; iter < NUM_ITER; ++iter) {
|
||||
if (iter > NUM_ITER * percentage) {
|
||||
fprintf(stderr, "Spawning stress test is %d%% finished\n",
|
||||
(unsigned int)(percentage * 100));
|
||||
percentage += 0.1;
|
||||
}
|
||||
while (__atomic_load_n(&threads_in_use, __ATOMIC_SEQ_CST)
|
||||
== MAX_NUM_THREADS) {
|
||||
usleep(100);
|
||||
}
|
||||
|
||||
__atomic_fetch_add(&threads_in_use, 1, __ATOMIC_SEQ_CST);
|
||||
pthread_t tmp;
|
||||
spawn_thread(&tmp);
|
||||
pthread_detach(tmp);
|
||||
}
|
||||
|
||||
while ((__atomic_load_n(&threads_in_use, __ATOMIC_SEQ_CST) != 0)) {
|
||||
__builtin_wasm_memory_atomic_wait32(&threads_in_use, 0, SECOND);
|
||||
}
|
||||
|
||||
assert(__atomic_load_n(&threads_in_use, __ATOMIC_SEQ_CST) == 0);
|
||||
|
||||
// Validation
|
||||
assert(threads_creation_tried >= threads_executed
|
||||
&& "Test executed more threads than were created");
|
||||
assert((1. * threads_creation_tried) / threads_executed < 2.5
|
||||
&& "Ensuring that we're retrying thread creation less than 2.5 "
|
||||
"times on average ");
|
||||
|
||||
fprintf(stderr,
|
||||
"Spawning stress test finished successfully executed %d threads "
|
||||
"with retry ratio %f\n",
|
||||
threads_creation_tried,
|
||||
(1. * threads_creation_tried) / threads_executed);
|
||||
return 0;
|
||||
}
|
||||
@ -21,7 +21,8 @@ tid_allocator_init(TidAllocator *tid_allocator)
|
||||
return false;
|
||||
|
||||
for (int64 i = tid_allocator->pos - 1; i >= 0; i--)
|
||||
tid_allocator->ids[i] = TID_MIN + (tid_allocator->pos - 1 - i);
|
||||
tid_allocator->ids[i] =
|
||||
(uint32)(TID_MIN + (tid_allocator->pos - 1 - i));
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -54,7 +55,8 @@ tid_allocator_get_tid(TidAllocator *tid_allocator)
|
||||
LOG_ERROR("Overflow detected during realloc");
|
||||
return -1;
|
||||
}
|
||||
int32 *tmp = wasm_runtime_realloc(tid_allocator->ids, realloc_size);
|
||||
int32 *tmp =
|
||||
wasm_runtime_realloc(tid_allocator->ids, (uint32)realloc_size);
|
||||
if (tmp == NULL) {
|
||||
LOG_ERROR("Thread ID allocator realloc failed");
|
||||
return -1;
|
||||
@ -64,7 +66,8 @@ tid_allocator_get_tid(TidAllocator *tid_allocator)
|
||||
tid_allocator->pos = new_size - old_size;
|
||||
tid_allocator->ids = tmp;
|
||||
for (int64 i = tid_allocator->pos - 1; i >= 0; i--)
|
||||
tid_allocator->ids[i] = TID_MIN + (tid_allocator->size - 1 - i);
|
||||
tid_allocator->ids[i] =
|
||||
(uint32)(TID_MIN + (tid_allocator->size - 1 - i));
|
||||
}
|
||||
|
||||
// Pop available thread identifier from the stack
|
||||
@ -77,4 +80,4 @@ tid_allocator_release_tid(TidAllocator *tid_allocator, int32 thread_id)
|
||||
// Release thread identifier by pushing it into the stack
|
||||
bh_assert(tid_allocator->pos < tid_allocator->size);
|
||||
tid_allocator->ids[tid_allocator->pos++] = thread_id;
|
||||
}
|
||||
}
|
||||
|
||||
@ -746,10 +746,10 @@ wasm_cluster_dup_c_api_imports(WASMModuleInstanceCommon *module_inst_dst,
|
||||
|
||||
#if WASM_ENABLE_INTERP != 0
|
||||
if (module_inst_src->module_type == Wasm_Module_Bytecode) {
|
||||
new_c_api_func_imports =
|
||||
&(((WASMModuleInstance *)module_inst_dst)->e->c_api_func_imports);
|
||||
new_c_api_func_imports = &(((WASMModuleInstance *)module_inst_dst)
|
||||
->e->common.c_api_func_imports);
|
||||
c_api_func_imports = ((const WASMModuleInstance *)module_inst_src)
|
||||
->e->c_api_func_imports;
|
||||
->e->common.c_api_func_imports;
|
||||
import_func_count =
|
||||
((WASMModule *)(((const WASMModuleInstance *)module_inst_src)
|
||||
->module))
|
||||
@ -760,10 +760,10 @@ wasm_cluster_dup_c_api_imports(WASMModuleInstanceCommon *module_inst_dst,
|
||||
if (module_inst_src->module_type == Wasm_Module_AoT) {
|
||||
AOTModuleInstanceExtra *e =
|
||||
(AOTModuleInstanceExtra *)((AOTModuleInstance *)module_inst_dst)->e;
|
||||
new_c_api_func_imports = &(e->c_api_func_imports);
|
||||
new_c_api_func_imports = &(e->common.c_api_func_imports);
|
||||
|
||||
e = (AOTModuleInstanceExtra *)((AOTModuleInstance *)module_inst_src)->e;
|
||||
c_api_func_imports = e->c_api_func_imports;
|
||||
c_api_func_imports = e->common.c_api_func_imports;
|
||||
|
||||
import_func_count =
|
||||
((AOTModule *)(((AOTModuleInstance *)module_inst_src)->module))
|
||||
|
||||
2
core/iwasm/libraries/wasi-nn/.gitignore
vendored
Normal file
2
core/iwasm/libraries/wasi-nn/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
**/*.wasm
|
||||
**/*.tflite
|
||||
@ -25,6 +25,7 @@ Build the runtime image for your execution target type.
|
||||
* `cpu`
|
||||
* `nvidia-gpu`
|
||||
* `vx-delegate`
|
||||
* `tpu`
|
||||
|
||||
```
|
||||
EXECUTION_TYPE=cpu
|
||||
@ -64,6 +65,8 @@ docker run \
|
||||
```
|
||||
|
||||
* (NVIDIA) GPU
|
||||
* Requirements:
|
||||
* [NVIDIA docker](https://github.com/NVIDIA/nvidia-docker).
|
||||
|
||||
```
|
||||
docker run \
|
||||
@ -76,25 +79,36 @@ docker run \
|
||||
/assets/test_tensorflow.wasm
|
||||
```
|
||||
|
||||
* vx-delegate for NPU (x86 simulater)
|
||||
* vx-delegate for NPU (x86 simulator)
|
||||
|
||||
```
|
||||
docker run \
|
||||
-v $PWD/core/iwasm/libraries/wasi-nn/test:/assets wasi-nn-vx-delegate \
|
||||
--dir=/assets \
|
||||
-v $PWD/core/iwasm/libraries/wasi-nn/test:/assets \
|
||||
wasi-nn-vx-delegate \
|
||||
--dir=/ \
|
||||
--env="TARGET=gpu" \
|
||||
/assets/test_tensorflow.wasm
|
||||
/assets/test_tensorflow_quantized.wasm
|
||||
```
|
||||
|
||||
* (Coral) TPU
|
||||
* Requirements:
|
||||
* [Coral USB](https://coral.ai/products/accelerator/).
|
||||
|
||||
|
||||
Requirements:
|
||||
* [NVIDIA docker](https://github.com/NVIDIA/nvidia-docker).
|
||||
```
|
||||
docker run \
|
||||
--privileged \
|
||||
--device=/dev/bus/usb:/dev/bus/usb \
|
||||
-v $PWD/core/iwasm/libraries/wasi-nn/test:/assets \
|
||||
wasi-nn-tpu \
|
||||
--dir=/ \
|
||||
--env="TARGET=tpu" \
|
||||
/assets/test_tensorflow_quantized.wasm
|
||||
```
|
||||
|
||||
## What is missing
|
||||
|
||||
Supported:
|
||||
|
||||
* Graph encoding: `tensorflowlite`.
|
||||
* Execution target: `cpu` and `gpu`.
|
||||
* Execution target: `cpu`, `gpu` and `tpu`.
|
||||
* Tensor type: `fp32`.
|
||||
|
||||
@ -18,12 +18,16 @@ if(NOT EXISTS ${TENSORFLOW_LITE})
|
||||
|
||||
set(TENSORFLOW_SOURCE_DIR "${WAMR_ROOT_DIR}/core/deps/tensorflow-src")
|
||||
|
||||
if(WASI_NN_ENABLE_GPU EQUAL 1)
|
||||
if(WAMR_BUILD_WASI_NN_ENABLE_GPU EQUAL 1)
|
||||
# Tensorflow specific:
|
||||
# * https://www.tensorflow.org/lite/guide/build_cmake#available_options_to_build_tensorflow_lite
|
||||
set (TFLITE_ENABLE_GPU ON)
|
||||
endif()
|
||||
|
||||
if (CMAKE_SIZEOF_VOID_P EQUAL 4)
|
||||
set (TFLITE_ENABLE_XNNPACK OFF)
|
||||
endif()
|
||||
|
||||
add_subdirectory(
|
||||
"${TENSORFLOW_SOURCE_DIR}/tensorflow/lite"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/tensorflow-lite"
|
||||
|
||||
@ -16,11 +16,11 @@
|
||||
#include <tensorflow/lite/optional_debug_tools.h>
|
||||
#include <tensorflow/lite/error_reporter.h>
|
||||
|
||||
#if defined(WASI_NN_ENABLE_GPU)
|
||||
#if WASM_ENABLE_WASI_NN_GPU != 0
|
||||
#include <tensorflow/lite/delegates/gpu/delegate.h>
|
||||
#endif
|
||||
|
||||
#if defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
|
||||
#if WASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE != 0
|
||||
#include <tensorflow/lite/delegates/external/external_delegate.h>
|
||||
#endif
|
||||
|
||||
@ -130,8 +130,8 @@ tensorflowlite_load(void *tflite_ctx, graph_builder_array *builder,
|
||||
return invalid_argument;
|
||||
}
|
||||
|
||||
if (target != cpu && target != gpu) {
|
||||
NN_ERR_PRINTF("Only CPU and GPU target is supported.");
|
||||
if (target != cpu && target != gpu && target != tpu) {
|
||||
NN_ERR_PRINTF("Only CPU, GPU and TPU target is supported.");
|
||||
return invalid_argument;
|
||||
}
|
||||
|
||||
@ -195,7 +195,7 @@ tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
|
||||
switch (tfl_ctx->models[g].target) {
|
||||
case gpu:
|
||||
{
|
||||
#if defined(WASI_NN_ENABLE_GPU)
|
||||
#if WASM_ENABLE_WASI_NN_GPU != 0
|
||||
NN_WARN_PRINTF("GPU enabled.");
|
||||
// https://www.tensorflow.org/lite/performance/gpu
|
||||
TfLiteGpuDelegateOptionsV2 options =
|
||||
@ -216,10 +216,19 @@ tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
|
||||
NN_ERR_PRINTF("Error when enabling GPU delegate.");
|
||||
use_default = true;
|
||||
}
|
||||
#elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
|
||||
#else
|
||||
NN_WARN_PRINTF("GPU not enabled.");
|
||||
use_default = true;
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case tpu:
|
||||
{
|
||||
#if WASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE != 0
|
||||
NN_WARN_PRINTF("external delegation enabled.");
|
||||
TfLiteExternalDelegateOptions options =
|
||||
TfLiteExternalDelegateOptionsDefault(WASI_NN_EXT_DELEGATE_PATH);
|
||||
TfLiteExternalDelegateOptionsDefault(
|
||||
WASM_WASI_NN_EXTERNAL_DELEGATE_PATH);
|
||||
tfl_ctx->delegate = TfLiteExternalDelegateCreate(&options);
|
||||
if (tfl_ctx->delegate == NULL) {
|
||||
NN_ERR_PRINTF("Error when generating External delegate.");
|
||||
@ -233,7 +242,7 @@ tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
|
||||
use_default = true;
|
||||
}
|
||||
#else
|
||||
NN_WARN_PRINTF("GPU not enabled.");
|
||||
NN_WARN_PRINTF("External delegate not enabled.");
|
||||
use_default = true;
|
||||
#endif
|
||||
break;
|
||||
@ -285,14 +294,37 @@ tensorflowlite_set_input(void *tflite_ctx, graph_execution_context ctx,
|
||||
return invalid_argument;
|
||||
}
|
||||
|
||||
auto *input =
|
||||
tfl_ctx->interpreters[ctx].interpreter->typed_input_tensor<float>(
|
||||
index);
|
||||
if (input == NULL)
|
||||
return missing_memory;
|
||||
if (tensor->quantization.type == kTfLiteNoQuantization) {
|
||||
NN_DBG_PRINTF("No quantization information. Using float as default");
|
||||
float *it =
|
||||
tfl_ctx->interpreters[ctx].interpreter->typed_input_tensor<float>(
|
||||
index);
|
||||
|
||||
int size = model_tensor_size * sizeof(float);
|
||||
bh_memcpy_s(it, size, input_tensor->data, size);
|
||||
}
|
||||
else { // TODO: Assumming uint8 quantized networks.
|
||||
TfLiteAffineQuantization *quant_info =
|
||||
(TfLiteAffineQuantization *)tensor->quantization.params;
|
||||
if (quant_info->scale->size != 1 || quant_info->zero_point->size != 1) {
|
||||
NN_ERR_PRINTF("Quantization per channel is not supported");
|
||||
return runtime_error;
|
||||
}
|
||||
uint8_t *it =
|
||||
tfl_ctx->interpreters[ctx].interpreter->typed_input_tensor<uint8_t>(
|
||||
index);
|
||||
|
||||
float scale = quant_info->scale->data[0];
|
||||
float zero_point = (float)quant_info->zero_point->data[0];
|
||||
NN_DBG_PRINTF("input tensor: (scale, offset) = (%f, %f)", scale,
|
||||
zero_point);
|
||||
|
||||
float *input_tensor_f = (float *)input_tensor->data;
|
||||
for (uint32_t i = 0; i < model_tensor_size; ++i) {
|
||||
it[i] = (uint8_t)(input_tensor_f[i] / scale + zero_point);
|
||||
}
|
||||
}
|
||||
|
||||
bh_memcpy_s(input, model_tensor_size * sizeof(float), input_tensor->data,
|
||||
model_tensor_size * sizeof(float));
|
||||
return success;
|
||||
}
|
||||
|
||||
@ -325,6 +357,7 @@ tensorflowlite_get_output(void *tflite_ctx, graph_execution_context ctx,
|
||||
NN_DBG_PRINTF("Number of tensors (%d)", num_output_tensors);
|
||||
|
||||
if (index + 1 > num_output_tensors) {
|
||||
NN_ERR_PRINTF("Index %d is invalid.", index);
|
||||
return runtime_error;
|
||||
}
|
||||
|
||||
@ -343,15 +376,37 @@ tensorflowlite_get_output(void *tflite_ctx, graph_execution_context ctx,
|
||||
return missing_memory;
|
||||
}
|
||||
|
||||
float *tensor_f =
|
||||
tfl_ctx->interpreters[ctx].interpreter->typed_output_tensor<float>(
|
||||
index);
|
||||
for (uint32_t i = 0; i < model_tensor_size; ++i)
|
||||
NN_DBG_PRINTF("output: %f", tensor_f[i]);
|
||||
if (tensor->quantization.type == kTfLiteNoQuantization) {
|
||||
NN_DBG_PRINTF("No quantization information");
|
||||
float *ot =
|
||||
tfl_ctx->interpreters[ctx].interpreter->typed_output_tensor<float>(
|
||||
index);
|
||||
|
||||
int size = model_tensor_size * sizeof(float);
|
||||
bh_memcpy_s(output_tensor, size, ot, size);
|
||||
}
|
||||
else { // TODO: Assumming uint8 quantized networks.
|
||||
TfLiteAffineQuantization *quant_info =
|
||||
(TfLiteAffineQuantization *)tensor->quantization.params;
|
||||
if (quant_info->scale->size != 1 || quant_info->zero_point->size != 1) {
|
||||
NN_ERR_PRINTF("Quantization per channel is not supported");
|
||||
return runtime_error;
|
||||
}
|
||||
uint8_t *ot = tfl_ctx->interpreters[ctx]
|
||||
.interpreter->typed_output_tensor<uint8_t>(index);
|
||||
|
||||
float scale = quant_info->scale->data[0];
|
||||
float zero_point = (float)quant_info->zero_point->data[0];
|
||||
NN_DBG_PRINTF("output tensor: (scale, offset) = (%f, %f)", scale,
|
||||
zero_point);
|
||||
|
||||
float *output_tensor_f = (float *)output_tensor;
|
||||
for (uint32_t i = 0; i < model_tensor_size; ++i) {
|
||||
output_tensor_f[i] = (ot[i] - zero_point) * scale;
|
||||
}
|
||||
}
|
||||
|
||||
*output_tensor_size = model_tensor_size;
|
||||
bh_memcpy_s(output_tensor, model_tensor_size * sizeof(float), tensor_f,
|
||||
model_tensor_size * sizeof(float));
|
||||
return success;
|
||||
}
|
||||
|
||||
@ -392,19 +447,35 @@ tensorflowlite_destroy(void *tflite_ctx)
|
||||
*/
|
||||
TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
|
||||
|
||||
if (tfl_ctx->delegate != NULL) {
|
||||
#if defined(WASI_NN_ENABLE_GPU)
|
||||
TfLiteGpuDelegateV2Delete(tfl_ctx->delegate);
|
||||
#elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
|
||||
TfLiteExternalDelegateDelete(tfl_ctx->delegate);
|
||||
#endif
|
||||
}
|
||||
|
||||
NN_DBG_PRINTF("Freeing memory.");
|
||||
for (int i = 0; i < MAX_GRAPHS_PER_INST; ++i) {
|
||||
tfl_ctx->models[i].model.reset();
|
||||
if (tfl_ctx->models[i].model_pointer)
|
||||
if (tfl_ctx->models[i].model_pointer) {
|
||||
if (tfl_ctx->delegate) {
|
||||
switch (tfl_ctx->models[i].target) {
|
||||
case gpu:
|
||||
{
|
||||
#if WASM_ENABLE_WASI_NN_GPU != 0
|
||||
TfLiteGpuDelegateV2Delete(tfl_ctx->delegate);
|
||||
#else
|
||||
NN_ERR_PRINTF("GPU delegate delete but not enabled.");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case tpu:
|
||||
{
|
||||
#if WASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE != 0
|
||||
TfLiteExternalDelegateDelete(tfl_ctx->delegate);
|
||||
#else
|
||||
NN_ERR_PRINTF(
|
||||
"External delegate delete but not enabled.");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
wasm_runtime_free(tfl_ctx->models[i].model_pointer);
|
||||
}
|
||||
tfl_ctx->models[i].model_pointer = NULL;
|
||||
}
|
||||
for (int i = 0; i < MAX_GRAPH_EXEC_CONTEXTS_PER_INST; ++i) {
|
||||
|
||||
@ -30,7 +30,6 @@ RUN make -j "$(grep -c ^processor /proc/cpuinfo)"
|
||||
|
||||
FROM ubuntu:22.04
|
||||
|
||||
COPY --from=base /home/wamr/product-mini/platforms/linux/build/libvmlib.so /libvmlib.so
|
||||
COPY --from=base /home/wamr/product-mini/platforms/linux/build/iwasm /iwasm
|
||||
|
||||
ENTRYPOINT [ "/iwasm" ]
|
||||
|
||||
@ -24,7 +24,7 @@ RUN apt-get install -y wget ca-certificates --no-install-recommends \
|
||||
|
||||
RUN cmake \
|
||||
-DWAMR_BUILD_WASI_NN=1 \
|
||||
-DWASI_NN_ENABLE_GPU=1 \
|
||||
-DWAMR_BUILD_WASI_NN_ENABLE_GPU=1 \
|
||||
..
|
||||
|
||||
RUN make -j "$(grep -c ^processor /proc/cpuinfo)"
|
||||
@ -44,7 +44,6 @@ RUN mkdir -p /etc/OpenCL/vendors && \
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
|
||||
COPY --from=base /home/wamr/product-mini/platforms/linux/build/libvmlib.so /libvmlib.so
|
||||
COPY --from=base /home/wamr/product-mini/platforms/linux/build/iwasm /iwasm
|
||||
|
||||
ENTRYPOINT [ "/iwasm" ]
|
||||
|
||||
37
core/iwasm/libraries/wasi-nn/test/Dockerfile.tpu
Normal file
37
core/iwasm/libraries/wasi-nn/test/Dockerfile.tpu
Normal file
@ -0,0 +1,37 @@
|
||||
# Copyright (C) 2019 Intel Corporation. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
FROM ubuntu:20.04 AS base
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# hadolint ignore=DL3008
|
||||
RUN apt-get update && apt-get install -y \
|
||||
cmake build-essential git curl gnupg --no-install-recommends && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# hadolint ignore=DL3008,DL4006
|
||||
RUN echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | tee /etc/apt/sources.list.d/coral-edgetpu.list && \
|
||||
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
|
||||
apt-get update && apt-get install -y libedgetpu1-std --no-install-recommends && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /home/wamr
|
||||
|
||||
COPY . .
|
||||
|
||||
WORKDIR /home/wamr/product-mini/platforms/linux/build
|
||||
|
||||
RUN cmake \
|
||||
-DWAMR_BUILD_WASI_NN=1 \
|
||||
-DWAMR_BUILD_WASI_NN_ENABLE_EXTERNAL_DELEGATE=1 \
|
||||
-DWAMR_BUILD_WASI_NN_EXTERNAL_DELEGATE_PATH="libedgetpu.so.1.0" \
|
||||
-DWAMR_BUILD_WASI_NN_ENABLE_GPU=1 \
|
||||
..
|
||||
|
||||
RUN make -j "$(grep -c ^processor /proc/cpuinfo)" && \
|
||||
cp /home/wamr/product-mini/platforms/linux/build/iwasm /iwasm
|
||||
|
||||
WORKDIR /assets
|
||||
|
||||
ENTRYPOINT [ "/iwasm" ]
|
||||
@ -1,6 +1,10 @@
|
||||
#!/bin/sh
|
||||
|
||||
# Copyright (C) 2019 Intel Corporation. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
CURR_PATH=$(cd $(dirname $0) && pwd -P)
|
||||
|
||||
# WASM application that uses WASI-NN
|
||||
|
||||
/opt/wasi-sdk/bin/clang \
|
||||
@ -13,9 +17,25 @@
|
||||
|
||||
# TFLite models to use in the tests
|
||||
|
||||
cd models
|
||||
cd ${CURR_PATH}/models
|
||||
python3 average.py
|
||||
python3 max.py
|
||||
python3 mult_dimension.py
|
||||
python3 mult_outputs.py
|
||||
python3 sum.py
|
||||
|
||||
# Specific tests for TPU
|
||||
|
||||
cd ${CURR_PATH}
|
||||
/opt/wasi-sdk/bin/clang \
|
||||
-Wl,--allow-undefined \
|
||||
-Wl,--strip-all,--no-entry \
|
||||
--sysroot=/opt/wasi-sdk/share/wasi-sysroot \
|
||||
-I../include -I../src/utils \
|
||||
-o test_tensorflow_quantized.wasm \
|
||||
test_tensorflow_quantized.c utils.c
|
||||
|
||||
cd ${CURR_PATH}/models
|
||||
python3 quantized.py
|
||||
|
||||
cd ${CURR_PATH}
|
||||
|
||||
30
core/iwasm/libraries/wasi-nn/test/models/quantized.py
Normal file
30
core/iwasm/libraries/wasi-nn/test/models/quantized.py
Normal file
@ -0,0 +1,30 @@
|
||||
# Copyright (C) 2019 Intel Corporation. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
import pathlib
|
||||
|
||||
model = tf.keras.Sequential([
|
||||
tf.keras.layers.InputLayer(input_shape=[5, 5, 1]),
|
||||
tf.keras.layers.AveragePooling2D(
|
||||
pool_size=(5, 5), strides=None, padding="valid", data_format=None)
|
||||
|
||||
])
|
||||
|
||||
def representative_dataset():
|
||||
for _ in range(1000):
|
||||
data = np.random.randint(0, 25, (1, 5, 5, 1))
|
||||
yield [data.astype(np.float32)]
|
||||
|
||||
converter = tf.lite.TFLiteConverter.from_keras_model(model)
|
||||
converter.optimizations = [tf.lite.Optimize.DEFAULT]
|
||||
converter.representative_dataset = representative_dataset
|
||||
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
|
||||
converter.inference_input_type = tf.uint8 # or tf.int8
|
||||
converter.inference_output_type = tf.uint8 # or tf.int8
|
||||
tflite_model = converter.convert()
|
||||
|
||||
tflite_models_dir = pathlib.Path("./")
|
||||
tflite_model_file = tflite_models_dir / "quantized_model.tflite"
|
||||
tflite_model_file.write_bytes(tflite_model)
|
||||
@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "utils.h"
|
||||
#include "logger.h"
|
||||
|
||||
#undef EPSILON
|
||||
#define EPSILON 1e-2
|
||||
|
||||
void
|
||||
test_average_quantized(execution_target target)
|
||||
{
|
||||
int dims[] = { 1, 5, 5, 1 };
|
||||
input_info input = create_input(dims);
|
||||
|
||||
uint32_t output_size = 0;
|
||||
float *output =
|
||||
run_inference(target, input.input_tensor, input.dim, &output_size,
|
||||
"./models/quantized_model.tflite", 1);
|
||||
|
||||
NN_INFO_PRINTF("Output size: %d", output_size);
|
||||
NN_INFO_PRINTF("Result: average is %f", output[0]);
|
||||
// NOTE: 11.95 instead of 12 because of errors due quantization
|
||||
assert(fabs(output[0] - 11.95) < EPSILON);
|
||||
|
||||
free(input.dim);
|
||||
free(input.input_tensor);
|
||||
free(output);
|
||||
}
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
char *env = getenv("TARGET");
|
||||
if (env == NULL) {
|
||||
NN_INFO_PRINTF("Usage:\n--env=\"TARGET=[cpu|gpu|tpu]\"");
|
||||
return 1;
|
||||
}
|
||||
execution_target target;
|
||||
if (strcmp(env, "cpu") == 0)
|
||||
target = cpu;
|
||||
else if (strcmp(env, "gpu") == 0)
|
||||
target = gpu;
|
||||
else if (strcmp(env, "tpu") == 0)
|
||||
target = tpu;
|
||||
else {
|
||||
NN_ERR_PRINTF("Wrong target!");
|
||||
return 1;
|
||||
}
|
||||
NN_INFO_PRINTF("################### Testing quantized model...");
|
||||
test_average_quantized(target);
|
||||
|
||||
NN_INFO_PRINTF("Tests: passed!");
|
||||
return 0;
|
||||
}
|
||||
@ -132,8 +132,8 @@ run_inference(execution_target target, float *input, uint32_t *input_size,
|
||||
*output_size = MAX_OUTPUT_TENSOR_SIZE - *output_size;
|
||||
if (wasm_get_output(ctx, i, &out_tensor[offset], output_size)
|
||||
!= success) {
|
||||
NN_ERR_PRINTF("Error when getting output.");
|
||||
exit(1);
|
||||
NN_ERR_PRINTF("Error when getting index %d.", i);
|
||||
break;
|
||||
}
|
||||
|
||||
offset += *output_size;
|
||||
|
||||
@ -11,7 +11,7 @@
|
||||
#include "wasi_nn.h"
|
||||
|
||||
#define MAX_MODEL_SIZE 85000000
|
||||
#define MAX_OUTPUT_TENSOR_SIZE 200
|
||||
#define MAX_OUTPUT_TENSOR_SIZE 1000000
|
||||
#define INPUT_TENSOR_DIMS 4
|
||||
#define EPSILON 1e-8
|
||||
|
||||
|
||||
Reference in New Issue
Block a user