diff --git a/README.md b/README.md index b34f8154..2a4a5fbb 100644 --- a/README.md +++ b/README.md @@ -115,6 +115,7 @@ The WAMR [samples](./samples) integrate the iwasm VM core, application manager a - **[spawn-thread](./samples/spawn-thread)**: Demonstrating how to execute wasm functions of the same wasm application concurrently, in threads created by host embedder or runtime, but not the wasm application itself. - **[multi-module](./samples/multi-module)**: Demonstrating the [multiple modules as dependencies](./doc/multi_module.md) feature which implements the [load-time dynamic linking](https://webassembly.org/docs/dynamic-linking/). - **[wasm-c-api](./samples/wasm-c-api/README.md)**: Demonstrating how to run some samples from [wasm-c-api proposal](https://github.com/WebAssembly/wasm-c-api) and showing the supported API's. +- **[workload](./samples/workload/README.md)**: Demonstrating how to build and run some complex workloads, e.g. tensorflow-lite, XNNPACK, wasm-av1, meshoptimizer and bwa. License diff --git a/core/iwasm/aot/aot_reloc.h b/core/iwasm/aot/aot_reloc.h index 882780a2..9df623e9 100644 --- a/core/iwasm/aot/aot_reloc.h +++ b/core/iwasm/aot/aot_reloc.h @@ -36,6 +36,8 @@ typedef struct { REG_SYM(aot_call_indirect), \ REG_SYM(wasm_runtime_enlarge_memory), \ REG_SYM(wasm_runtime_set_exception), \ + REG_SYM(memset), \ + REG_SYM(memmove), \ REG_BULK_MEMORY_SYM() \ REG_ATOMIC_WAIT_SYM() #else /* else of (defined(_WIN32) || defined(_WIN32_)) && defined(NDEBUG) */ @@ -45,6 +47,8 @@ typedef struct { REG_SYM(aot_call_indirect), \ REG_SYM(wasm_runtime_enlarge_memory), \ REG_SYM(wasm_runtime_set_exception), \ + REG_SYM(memset), \ + REG_SYM(memmove), \ REG_SYM(fmin), \ REG_SYM(fminf), \ REG_SYM(fmax), \ diff --git a/core/iwasm/libraries/libc-builtin/libc_builtin_wrapper.c b/core/iwasm/libraries/libc-builtin/libc_builtin_wrapper.c index c00fd02d..6d4fcc5f 100644 --- a/core/iwasm/libraries/libc-builtin/libc_builtin_wrapper.c +++ b/core/iwasm/libraries/libc-builtin/libc_builtin_wrapper.c @@ -1007,6 +1007,21 @@ __cxa_throw_wrapper(wasm_exec_env_t exec_env, wasm_runtime_set_exception(module_inst, buf); } +static int +setjmp_wrapper(wasm_exec_env_t exec_env, + void *jmp_buf) +{ + os_printf("in setjmp()\n"); + return 0; +} + +static void +longjmp_wrapper(wasm_exec_env_t exec_env, + void *jmp_buf, int val) +{ + os_printf("in longjmp()\n"); +} + #if WASM_ENABLE_SPEC_TEST != 0 static void print_wrapper(wasm_exec_env_t exec_env) @@ -1104,7 +1119,9 @@ static NativeSymbol native_symbols_libc_builtin[] = { REG_NATIVE_FUNC(nullFunc_X, "(i)"), REG_NATIVE_FUNC(__cxa_allocate_exception, "(i)i"), REG_NATIVE_FUNC(__cxa_begin_catch, "(*)"), - REG_NATIVE_FUNC(__cxa_throw, "(**i)") + REG_NATIVE_FUNC(__cxa_throw, "(**i)"), + REG_NATIVE_FUNC(setjmp, "(*)i"), + REG_NATIVE_FUNC(longjmp, "(*i)"), }; #if WASM_ENABLE_SPEC_TEST != 0 diff --git a/core/iwasm/libraries/libc-emcc/libc_emcc_wrapper.c b/core/iwasm/libraries/libc-emcc/libc_emcc_wrapper.c index 0930fbc1..22a42814 100644 --- a/core/iwasm/libraries/libc-emcc/libc_emcc_wrapper.c +++ b/core/iwasm/libraries/libc-emcc/libc_emcc_wrapper.c @@ -11,18 +11,27 @@ #define get_module_inst(exec_env) \ wasm_runtime_get_module_inst(exec_env) +#define validate_app_addr(offset, size) \ + wasm_runtime_validate_app_addr(module_inst, offset, size) + +#define validate_app_str_addr(offset) \ + wasm_runtime_validate_app_str_addr(module_inst, offset) + #define validate_native_addr(addr, size) \ wasm_runtime_validate_native_addr(module_inst, addr, size) +#define addr_app_to_native(offset) \ + wasm_runtime_addr_app_to_native(module_inst, offset) + +#define addr_native_to_app(ptr) \ + wasm_runtime_addr_native_to_app(module_inst, ptr) + #define module_malloc(size, p_native_addr) \ wasm_runtime_module_malloc(module_inst, size, p_native_addr) #define module_free(offset) \ wasm_runtime_module_free(module_inst, offset) -#define REG_NATIVE_FUNC(func_name, signature) \ - { #func_name, func_name##_wrapper, signature, NULL } - extern bool wasm_runtime_call_indirect(wasm_exec_env_t exec_env, uint32 element_idx, @@ -282,12 +291,15 @@ fopen_wrapper(wasm_exec_env_t exec_env, int file_id; if (pathname == NULL || mode == NULL) - return -1; + return 0; if ((file_id = get_free_file_slot()) == -1) - return -1; + return 0; file = fopen(pathname, mode); + if (!file) + return 0; + file_list[file_id] = file; return file_id + 1; } @@ -308,6 +320,22 @@ fread_wrapper(wasm_exec_env_t exec_env, return (uint32)fread(ptr, size, nmemb, file); } +static int +fseeko_wrapper(wasm_exec_env_t exec_env, + int file_id, int64 offset, int whence) +{ + FILE *file; + + file_id = file_id - 1; + if ((unsigned)file_id >= sizeof(file_list) / sizeof(FILE *)) { + return -1; + } + if ((file = file_list[file_id]) == NULL) { + return -1; + } + return (uint32)fseek(file, offset, whence); +} + static uint32 emcc_fwrite_wrapper(wasm_exec_env_t exec_env, const void *ptr, uint32 size, uint32 nmemb, @@ -351,6 +379,113 @@ fclose_wrapper(wasm_exec_env_t exec_env, int file_id) file_list[file_id] = NULL; return fclose(file); } + +static int +__sys_mkdir_wrapper(wasm_exec_env_t exec_env, + const char *pathname, int mode) +{ + if (!pathname) + return -1; + return mkdir(pathname, mode); +} + +static int +__sys_rmdir_wrapper(wasm_exec_env_t exec_env, const char *pathname) +{ + if (!pathname) + return -1; + return rmdir(pathname); +} + +static int +__sys_unlink_wrapper(wasm_exec_env_t exec_env, const char *pathname) +{ + if (!pathname) + return -1; + return unlink(pathname); +} + +static uint32 +__sys_getcwd_wrapper(wasm_exec_env_t exec_env, char *buf, uint32 size) +{ + wasm_module_inst_t module_inst = get_module_inst(exec_env); + char *ret; + + if (!buf) + return -1; + + ret = getcwd(buf, size); + return ret ? addr_native_to_app(ret) : 0; +} + +#include + +struct utsname_app { + char sysname[64]; + char nodename[64]; + char release[64]; + char version[64]; + char machine[64]; + char domainname[64]; +}; + +static int +__sys_uname_wrapper(wasm_exec_env_t exec_env, struct utsname_app *uname_app) +{ + wasm_module_inst_t module_inst = get_module_inst(exec_env); + struct utsname uname_native = { 0 }; + uint32 length; + + if (!validate_native_addr(uname_app, sizeof(struct utsname_app))) + return -1; + + if (uname(&uname_native) != 0) { + return -1; + } + + memset(uname_app, 0, sizeof(struct utsname_app)); + + length = strlen(uname_native.sysname); + if (length > sizeof(uname_app->sysname) - 1) + length = sizeof(uname_app->sysname) - 1; + bh_memcpy_s(uname_app->sysname, sizeof(uname_app->sysname), + uname_native.sysname, length); + + length = strlen(uname_native.nodename); + if (length > sizeof(uname_app->nodename) - 1) + length = sizeof(uname_app->nodename) - 1; + bh_memcpy_s(uname_app->nodename, sizeof(uname_app->nodename), + uname_native.nodename, length); + + length = strlen(uname_native.release); + if (length > sizeof(uname_app->release) - 1) + length = sizeof(uname_app->release) - 1; + bh_memcpy_s(uname_app->release, sizeof(uname_app->release), + uname_native.release, length); + + length = strlen(uname_native.version); + if (length > sizeof(uname_app->version) - 1) + length = sizeof(uname_app->version) - 1; + bh_memcpy_s(uname_app->version, sizeof(uname_app->version), + uname_native.version, length); + +#ifdef _GNU_SOURCE + length = strlen(uname_native.domainname); + if (length > sizeof(uname_app->domainname) - 1) + length = sizeof(uname_app->domainname) - 1; + bh_memcpy_s(uname_app->domainname, sizeof(uname_app->domainname), + uname_native.domainname, length); +#endif + + return 0; +} + +static void +emscripten_notify_memory_growth_wrapper(wasm_exec_env_t exec_env, int i) +{ + (void)i; +} + #endif /* end of BH_PLATFORM_LINUX_SGX */ #define REG_NATIVE_FUNC(func_name, signature) \ @@ -374,9 +509,16 @@ static NativeSymbol native_symbols_libc_emcc[] = { #if !defined(BH_PLATFORM_LINUX_SGX) REG_NATIVE_FUNC(fopen, "($$)i"), REG_NATIVE_FUNC(fread, "(*iii)i"), + REG_NATIVE_FUNC(fseeko, "(iIi)i"), REG_NATIVE_FUNC(emcc_fwrite, "(*iii)i"), REG_NATIVE_FUNC(feof, "(i)i"), REG_NATIVE_FUNC(fclose, "(i)i"), + REG_NATIVE_FUNC(__sys_mkdir, "($i)i"), + REG_NATIVE_FUNC(__sys_rmdir, "($)i"), + REG_NATIVE_FUNC(__sys_unlink, "($)i"), + REG_NATIVE_FUNC(__sys_getcwd, "(*~)i"), + REG_NATIVE_FUNC(__sys_uname, "(*)i"), + REG_NATIVE_FUNC(emscripten_notify_memory_growth, "(i)"), #endif /* end of BH_PLATFORM_LINUX_SGX */ }; diff --git a/samples/workload/XNNPACK/CMakeLists.txt b/samples/workload/XNNPACK/CMakeLists.txt new file mode 100644 index 00000000..04423001 --- /dev/null +++ b/samples/workload/XNNPACK/CMakeLists.txt @@ -0,0 +1,102 @@ +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +cmake_minimum_required (VERSION 3.0) + +project(xnnpack_wasm) + +################ EMCC ################ +if(NOT DEFINED ENV{EMSDK}) + message(SEND_ERROR + "can not find emsdk. " + "please refer to https://emscripten.org/docs/getting_started/downloads.html " + "and install it, " + "or active emsdk by 'source ./emsdk_env.sh'" + ) +endif() + +include(ExternalProject) + +ExternalProject_Add(xnnpack + PREFIX xnnpack + GIT_REPOSITORY https://github.com/google/XNNPACK.git + GIT_TAG 2da0de89960b829c6fae74204a102db524e73047 + GIT_PROGRESS ON + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack + UPDATE_COMMAND git checkout .bazelrc BUILD.bazel emscripten.bzl + && git apply ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack.patch + && cmake -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/toolchain ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack/toolchain + CONFIGURE_COMMAND "" + BUILD_COMMAND cd ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack + && bazel build -c opt --sandbox_writable_path=$ENV{HOME} --config=emscripten_wasm + //:qs8_gemm_bench.wasm + //:qs8_requantization_bench.wasm + //:qu8_gemm_bench.wasm + //:qu8_requantization_bench.wasm + //:f16_igemm_bench.wasm + //:f16_gemm_bench.wasm + //:f16_spmm_bench.wasm + //:f32_igemm_bench.wasm + //:f16_relu_bench.wasm + //:f32_conv_hwc_bench.wasm + //:f32_conv_hwc2chw_bench.wasm + //:f16_dwconv_bench.wasm + //:f32_dwconv_bench.wasm + //:f32_dwconv2d_chw_bench.wasm + //:f32_gemm_bench.wasm + //:f32_hswish_bench.wasm + //:f32_raddexpminusmax_bench.wasm + //:f32_raddextexp_bench.wasm + //:f32_raddstoreexpminusmax_bench.wasm + //:f32_relu_bench.wasm + //:f32_rmax_bench.wasm + //:f32_sigmoid_bench.wasm + //:f32_spmm_bench.wasm + //:f32_softmax_bench.wasm + //:f32_vscaleexpminusmax_bench.wasm + //:f32_vscaleextexp_bench.wasm + //:f32_vsqrt_bench.wasm + //:f32_im2col_gemm_bench.wasm + //:rounding_bench.wasm + //:average_pooling_bench.wasm + //:bankers_rounding_bench.wasm + //:ceiling_bench.wasm + //:channel_shuffle_bench.wasm + //:convolution_bench.wasm + //:deconvolution_bench.wasm + //:floor_bench.wasm + //:global_average_pooling_bench.wasm + //:hardswish_bench.wasm + //:max_pooling_bench.wasm + //:sigmoid_bench.wasm + //:prelu_bench.wasm + //:softmax_bench.wasm + //:square_root_bench.wasm + //:truncation_bench.wasm + //:fp32_mobilenet_v1.wasm + //:fp16_mobilenet_v1.wasm + //:qs8_mobilenet_v1.wasm + //:qs8_mobilenet_v2.wasm + //:fp32_mobilenet_v2.wasm + //:fp16_mobilenet_v2.wasm + //:fp32_mobilenet_v3_large.wasm + //:fp16_mobilenet_v3_large.wasm + //:fp32_mobilenet_v3_small.wasm + //:fp16_mobilenet_v3_small.wasm + //:f32_dwconv_e2e_bench.wasm + //:f32_gemm_e2e_bench.wasm + //:end2end_bench.wasm + //:f32_exp_eval.wasm + //:f32_expminus_eval.wasm + //:f32_extexp_eval.wasm + //:f32_roundne_eval.wasm + //:f32_roundd_eval.wasm + //:f32_roundu_eval.wasm + //:f32_roundz_eval.wasm + //:f32_sigmoid_eval.wasm + //:f32_sqrt_eval.wasm + #--sandbox_debug + INSTALL_COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack/bazel-out/wasm-opt/bin/ + ${CMAKE_CURRENT_SOURCE_DIR}/build/wasm-opt +) diff --git a/samples/workload/XNNPACK/README.md b/samples/workload/XNNPACK/README.md new file mode 100644 index 00000000..a34f6621 --- /dev/null +++ b/samples/workload/XNNPACK/README.md @@ -0,0 +1,60 @@ +"XNNPACK" sample introduction +============== +This sample demonstrates how to build [XNNPACK](https://github.com/google/XNNPACK) benchmarks into WebAssembly with emcc toolchain and run them with iwasm. + +## Installation toolchains + +- **bazel**. Please install bazel from [latest release](https://github.com/bazelbuild/bazel/releases) + +- **emsdk**. Please install [emsdk](https://github.com/emscripten-core/emsdk) to /opt/emsdk: +```bash +cd /opt +git clone https://github.com/emscripten-core/emsdk.git +cd emsdk +./emsdk install latest +./emsdk activate latest +``` +And set up ensdk environment: +```bash +source /opt/emsdk/emsdk_env.sh +``` + +## Build XNNPACK + +```bash +cd /samples/workload/XNNPACK +mkdir build +cd build +cmake .. +``` +The wasm files are generated under folder samples/workload/XNNPACK/xnnpack/bazel-bin. + +## Run benchmarks + +Firstly please build iwasm with simd, libc-emcc and lib-pthread support: + +``` bash +$ cd /product-mini/platforms/linux/ +$ mkdir build && cd build +$ cmake .. -DWAMR_BUILD_SIMD=1 -DWAMR_BUILD_LIBC_EMCC=1 -DWAMR_BUILD_LIB_PTHREAD=1 +$ make +``` + +And please build wamrc: + +``` bash +cd /wamr-compiler +./build_llvm.sh +mkdir build && cd build +cmake .. +make +``` + +Then compile wasm file to aot file and run: + +``` shell +$ cd /samples/workload/XNNPACK/xnnpack/bazel-bin +$ wamrc --enable-simd -o average_pooling_bench.aot average_pooling_bench.wasm (or other wasm files) +$ iwasm average_pooling_bench.aot +``` + diff --git a/samples/workload/XNNPACK/toolchain/BUILD.bazel b/samples/workload/XNNPACK/toolchain/BUILD.bazel new file mode 100644 index 00000000..9cf9b969 --- /dev/null +++ b/samples/workload/XNNPACK/toolchain/BUILD.bazel @@ -0,0 +1,30 @@ +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +package(default_visibility = ['//visibility:public']) + +load(":emscripten_toolchain_config.bzl", "emsdk_toolchain_config") + +cc_toolchain_suite( + name = "emscripten", + toolchains = { + "wasm": ":emsdk_toolchain", + }, +) + +filegroup(name = "empty") + +emsdk_toolchain_config(name = "emsdk_toolchain_config") + +cc_toolchain( + name = "emsdk_toolchain", + toolchain_identifier = "emsdk-toolchain", + toolchain_config = ":emsdk_toolchain_config", + all_files = ":empty", + compiler_files = ":empty", + dwp_files = ":empty", + linker_files = ":empty", + objcopy_files = ":empty", + strip_files = ":empty", + supports_param_files = 0, +) diff --git a/samples/workload/XNNPACK/toolchain/emscripten_toolchain_config.bzl b/samples/workload/XNNPACK/toolchain/emscripten_toolchain_config.bzl new file mode 100644 index 00000000..b90a7aba --- /dev/null +++ b/samples/workload/XNNPACK/toolchain/emscripten_toolchain_config.bzl @@ -0,0 +1,137 @@ +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +load("@bazel_tools//tools/build_defs/cc:action_names.bzl", "ACTION_NAMES") +load( + "@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl", + "feature", + "flag_group", + "flag_set", + "tool_path", +) + +all_compile_actions = [ + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, +] + +all_link_actions = [ + ACTION_NAMES.cpp_link_executable, + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, +] + +def _impl(ctx): + tool_paths = [ + tool_path( + name = "gcc", + path = "/opt/emsdk/upstream/emscripten/emcc", + ), + tool_path( + name = "ld", + path = "/opt/emsdk/upstream/emscripten/emcc", + ), + tool_path( + name = "ar", + path = "/opt/emsdk/upstream/emscripten/emar", + ), + tool_path( + name = "cpp", + path = "/opt/emsdk/upstream/emscripten/em++", + ), + tool_path( + name = "gcov", + path = "/bin/false", + ), + tool_path( + name = "nm", + path = "/bin/false", + ), + tool_path( + name = "objdump", + path = "/bin/false", + ), + tool_path( + name = "strip", + path = "/bin/false", + ), + ] + + features = [ # NEW + feature( + name = "default_compile_flags", + enabled = True, + flag_sets = [ + flag_set( + actions = all_compile_actions, + flag_groups = ([ + flag_group( + flags = [ + "-O3", + "-msimd128", + "-s", + "USE_PTHREADS=0", + "-s", + "ERROR_ON_UNDEFINED_SYMBOLS=0", + "-s", + "STANDALONE_WASM=1", + ], + ), + ]), + ), + ], + ), + feature( + name = "default_linker_flags", + enabled = True, + flag_sets = [ + flag_set( + actions = all_link_actions, + flag_groups = ([ + flag_group( + flags = [ + "-O3", + "-msimd128", + "-s", + "USE_PTHREADS=0", + "-s", + "ERROR_ON_UNDEFINED_SYMBOLS=0", + "-s", + "STANDALONE_WASM=1", + "-Wl,--export=__heap_base", + "-Wl,--export=__data_end", + ], + ), + ]), + ), + ], + ), + ] + + return cc_common.create_cc_toolchain_config_info( + ctx = ctx, + features = features, # NEW + cxx_builtin_include_directories = [ + "/opt/emsdk/upstream/emscripten/system/include/libcxx", + "/opt/emsdk/upstream/emscripten/system/lib/libcxxabi/include", + "/opt/emsdk/upstream/emscripten/system/include", + "/opt/emsdk/upstream/emscripten/system/include/libc", + "/opt/emsdk/upstream/emscripten/system/lib/libc/musl/arch/emscripten", + "/opt/emsdk/upstream/lib/clang/12.0.0/include/", + ], + toolchain_identifier = "wasm-emsdk", + host_system_name = "i686-unknown-linux-gnu", + target_system_name = "wasm32-unknown-emscripten", + target_cpu = "wasm32", + target_libc = "unknown", + compiler = "emsdk", + abi_version = "unknown", + abi_libc_version = "unknown", + tool_paths = tool_paths, + ) + +emsdk_toolchain_config = rule( + implementation = _impl, + attrs = {}, + provides = [CcToolchainConfigInfo], +) diff --git a/samples/workload/XNNPACK/xnnpack.patch b/samples/workload/XNNPACK/xnnpack.patch new file mode 100644 index 00000000..b58cbc79 --- /dev/null +++ b/samples/workload/XNNPACK/xnnpack.patch @@ -0,0 +1,742 @@ +diff --git a/.bazelrc b/.bazelrc +index ea28201..ffd4ed4 100644 +--- a/.bazelrc ++++ b/.bazelrc +@@ -44,3 +44,7 @@ build:ios_arm64e --watchos_cpus=armv7k + build:ios_fat --config=ios + build:ios_fat --ios_multi_cpus=armv7,arm64 + build:ios_fat --watchos_cpus=armv7k ++ ++# WASM configs ++build:emscripten_wasm --cpu=wasm ++build:emscripten_wasm --crosstool_top=//toolchain:emscripten +diff --git a/BUILD.bazel b/BUILD.bazel +index d38ef1e..f261eb5 100644 +--- a/BUILD.bazel ++++ b/BUILD.bazel +@@ -3228,13 +3228,19 @@ xnnpack_cc_library( + hdrs = INTERNAL_HDRS, + gcc_copts = xnnpack_gcc_std_copts(), + msvc_copts = xnnpack_msvc_std_copts(), +- wasm_srcs = WASM_UKERNELS, +- wasmsimd_srcs = WASM_UKERNELS + WASMSIMD_UKERNELS, ++ optimized_copts = [ ++ "-ffast-math", ++ ], ++ wasm_srcs = WASM_UKERNELS + WASMSIMD_UKERNELS + ++ PSIMD_FASTMATH_UKERNELS + PSIMD_ACCMATH_UKERNELS, ++ wasmsimd_srcs = WASM_UKERNELS + WASMSIMD_UKERNELS + ++ PSIMD_FASTMATH_UKERNELS + PSIMD_ACCMATH_UKERNELS, + deps = [ + ":tables", + "@FP16", + "@FXdiv", + "@pthreadpool", ++ "@psimd", + ], + ) + +@@ -3247,13 +3253,19 @@ xnnpack_cc_library( + ], + gcc_copts = xnnpack_gcc_std_copts(), + msvc_copts = xnnpack_msvc_std_copts(), +- wasm_srcs = WASM_UKERNELS, +- wasmsimd_srcs = WASM_UKERNELS + WASMSIMD_UKERNELS, ++ optimized_copts = [ ++ "-ffast-math", ++ ], ++ wasm_srcs = WASM_UKERNELS + WASMSIMD_UKERNELS + ++ PSIMD_FASTMATH_UKERNELS + PSIMD_ACCMATH_UKERNELS, ++ wasmsimd_srcs = WASM_UKERNELS + WASMSIMD_UKERNELS + ++ PSIMD_FASTMATH_UKERNELS + PSIMD_ACCMATH_UKERNELS, + deps = [ + ":tables", + "@FP16", + "@FXdiv", + "@pthreadpool", ++ "@psimd", + ], + ) + +@@ -4495,7 +4507,7 @@ xnnpack_cc_library( + ######################### Benchmarks for micro-kernels ######################### + + xnnpack_benchmark( +- name = "qs8_gemm_bench", ++ name = "qs8_gemm_bench.wasm", + srcs = [ + "bench/gemm.h", + "bench/qs8-gemm.cc", +@@ -4506,7 +4518,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "qs8_requantization_bench", ++ name = "qs8_requantization_bench.wasm", + srcs = [ + "bench/qs8-requantization.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -4516,7 +4528,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "qu8_gemm_bench", ++ name = "qu8_gemm_bench.wasm", + srcs = [ + "bench/gemm.h", + "bench/qu8-gemm.cc", +@@ -4527,7 +4539,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "qu8_requantization_bench", ++ name = "qu8_requantization_bench.wasm", + srcs = [ + "bench/qu8-requantization.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -4537,11 +4549,11 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f16_igemm_bench", ++ name = "f16_igemm_bench.wasm", + srcs = [ + "bench/f16-igemm.cc", + "bench/conv.h", +- "bench/google/conv.h", ++ #"bench/google/conv.h", + "src/xnnpack/AlignedAllocator.h", + ] + MICROKERNEL_BENCHMARK_HDRS, + deps = MICROKERNEL_BENCHMARK_DEPS + [ +@@ -4551,7 +4563,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f16_gemm_bench", ++ name = "f16_gemm_bench.wasm", + srcs = [ + "bench/f16-gemm.cc", + "bench/gemm.h", +@@ -4563,7 +4575,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f16_spmm_bench", ++ name = "f16_spmm_bench.wasm", + srcs = [ + "bench/f16-spmm.cc", + "bench/gemm.h", +@@ -4573,7 +4585,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_igemm_bench", ++ name = "f32_igemm_bench.wasm", + srcs = [ + "bench/f32-igemm.cc", + "bench/conv.h", +@@ -4586,7 +4598,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f16_relu_bench", ++ name = "f16_relu_bench.wasm", + srcs = [ + "bench/f16-relu.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -4595,7 +4607,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_conv_hwc_bench", ++ name = "f32_conv_hwc_bench.wasm", + srcs = [ + "bench/f32-conv-hwc.cc", + "bench/dconv.h", +@@ -4607,7 +4619,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_conv_hwc2chw_bench", ++ name = "f32_conv_hwc2chw_bench.wasm", + srcs = [ + "bench/f32-conv-hwc2chw.cc", + "bench/dconv.h", +@@ -4619,11 +4631,11 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f16_dwconv_bench", ++ name = "f16_dwconv_bench.wasm", + srcs = [ + "bench/f16-dwconv.cc", + "bench/dwconv.h", +- "bench/google/dwconv.h", ++ #"bench/google/dwconv.h", + "src/xnnpack/AlignedAllocator.h", + ] + MICROKERNEL_BENCHMARK_HDRS, + deps = MICROKERNEL_BENCHMARK_DEPS + [ +@@ -4633,7 +4645,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_dwconv_bench", ++ name = "f32_dwconv_bench.wasm", + srcs = [ + "bench/f32-dwconv.cc", + "bench/dwconv.h", +@@ -4646,7 +4658,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_dwconv2d_chw_bench", ++ name = "f32_dwconv2d_chw_bench.wasm", + srcs = [ + "bench/f32-dwconv2d-chw.cc", + "bench/dwconv.h", +@@ -4659,7 +4671,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_gemm_bench", ++ name = "f32_gemm_bench.wasm", + srcs = [ + "bench/f32-gemm.cc", + "bench/gemm.h", +@@ -4670,7 +4682,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_hswish_bench", ++ name = "f32_hswish_bench.wasm", + srcs = [ + "bench/f32-hswish.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -4679,7 +4691,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_raddexpminusmax_bench", ++ name = "f32_raddexpminusmax_bench.wasm", + srcs = [ + "bench/f32-raddexpminusmax.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -4688,7 +4700,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_raddextexp_bench", ++ name = "f32_raddextexp_bench.wasm", + srcs = [ + "bench/f32-raddextexp.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -4697,7 +4709,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_raddstoreexpminusmax_bench", ++ name = "f32_raddstoreexpminusmax_bench.wasm", + srcs = [ + "bench/f32-raddstoreexpminusmax.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -4706,7 +4718,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_relu_bench", ++ name = "f32_relu_bench.wasm", + srcs = [ + "bench/f32-relu.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -4715,7 +4727,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_rmax_bench", ++ name = "f32_rmax_bench.wasm", + srcs = [ + "bench/f32-rmax.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -4724,7 +4736,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_sigmoid_bench", ++ name = "f32_sigmoid_bench.wasm", + srcs = [ + "bench/f32-sigmoid.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -4733,7 +4745,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_spmm_bench", ++ name = "f32_spmm_bench.wasm", + srcs = [ + "bench/f32-spmm.cc", + "bench/gemm.h", +@@ -4743,7 +4755,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_softmax_bench", ++ name = "f32_softmax_bench.wasm", + srcs = [ + "bench/f32-softmax.cc", + ] + MICROKERNEL_BENCHMARK_HDRS, +@@ -4752,7 +4764,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_vscaleexpminusmax_bench", ++ name = "f32_vscaleexpminusmax_bench.wasm", + srcs = [ + "bench/f32-vscaleexpminusmax.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -4761,7 +4773,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_vscaleextexp_bench", ++ name = "f32_vscaleextexp_bench.wasm", + srcs = [ + "bench/f32-vscaleextexp.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -4770,7 +4782,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_vsqrt_bench", ++ name = "f32_vsqrt_bench.wasm", + srcs = [ + "bench/f32-vsqrt.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -4779,7 +4791,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_im2col_gemm_bench", ++ name = "f32_im2col_gemm_bench.wasm", + srcs = [ + "bench/f32-im2col-gemm.cc", + "bench/conv.h", +@@ -4792,7 +4804,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "rounding_bench", ++ name = "rounding_bench.wasm", + srcs = [ + "bench/rounding.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -4804,7 +4816,7 @@ xnnpack_benchmark( + ########################### Benchmarks for operators ########################### + + xnnpack_benchmark( +- name = "average_pooling_bench", ++ name = "average_pooling_bench.wasm", + srcs = ["bench/average-pooling.cc"], + copts = xnnpack_optional_tflite_copts(), + tags = ["nowin32"], +@@ -4812,7 +4824,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "bankers_rounding_bench", ++ name = "bankers_rounding_bench.wasm", + srcs = ["bench/bankers-rounding.cc"], + copts = xnnpack_optional_tflite_copts(), + tags = ["nowin32"], +@@ -4820,7 +4832,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "ceiling_bench", ++ name = "ceiling_bench.wasm", + srcs = ["bench/ceiling.cc"], + copts = xnnpack_optional_tflite_copts(), + tags = ["nowin32"], +@@ -4828,13 +4840,13 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "channel_shuffle_bench", ++ name = "channel_shuffle_bench.wasm", + srcs = ["bench/channel-shuffle.cc"], + deps = OPERATOR_BENCHMARK_DEPS, + ) + + xnnpack_benchmark( +- name = "convolution_bench", ++ name = "convolution_bench.wasm", + srcs = ["bench/convolution.cc"], + copts = xnnpack_optional_tflite_copts() + xnnpack_optional_armcl_copts(), + tags = ["nowin32"], +@@ -4842,7 +4854,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "deconvolution_bench", ++ name = "deconvolution_bench.wasm", + srcs = ["bench/deconvolution.cc"], + copts = xnnpack_optional_tflite_copts(), + tags = ["nowin32"], +@@ -4850,7 +4862,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "floor_bench", ++ name = "floor_bench.wasm", + srcs = ["bench/floor.cc"], + copts = xnnpack_optional_tflite_copts(), + tags = ["nowin32"], +@@ -4858,13 +4870,13 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "global_average_pooling_bench", ++ name = "global_average_pooling_bench.wasm", + srcs = ["bench/global-average-pooling.cc"], + deps = OPERATOR_BENCHMARK_DEPS, + ) + + xnnpack_benchmark( +- name = "hardswish_bench", ++ name = "hardswish_bench.wasm", + srcs = ["bench/hardswish.cc"], + copts = xnnpack_optional_tflite_copts(), + tags = ["nowin32"], +@@ -4872,13 +4884,13 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "max_pooling_bench", ++ name = "max_pooling_bench.wasm", + srcs = ["bench/max-pooling.cc"], + deps = OPERATOR_BENCHMARK_DEPS, + ) + + xnnpack_benchmark( +- name = "sigmoid_bench", ++ name = "sigmoid_bench.wasm", + srcs = ["bench/sigmoid.cc"], + copts = xnnpack_optional_tflite_copts(), + tags = ["nowin32"], +@@ -4886,7 +4898,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "prelu_bench", ++ name = "prelu_bench.wasm", + srcs = ["bench/prelu.cc"], + copts = xnnpack_optional_tflite_copts(), + tags = ["nowin32"], +@@ -4894,7 +4906,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "softmax_bench", ++ name = "softmax_bench.wasm", + srcs = ["bench/softmax.cc"], + copts = xnnpack_optional_tflite_copts(), + tags = ["nowin32"], +@@ -4902,7 +4914,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "square_root_bench", ++ name = "square_root_bench.wasm", + srcs = ["bench/square-root.cc"], + copts = xnnpack_optional_tflite_copts(), + tags = ["nowin32"], +@@ -4910,7 +4922,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "truncation_bench", ++ name = "truncation_bench.wasm", + srcs = ["bench/truncation.cc"], + deps = OPERATOR_BENCHMARK_DEPS, + ) +@@ -4918,7 +4930,7 @@ xnnpack_benchmark( + ############################# End-to-end benchmarks ############################ + + cc_library( +- name = "fp32_mobilenet_v1", ++ name = "fp32_mobilenet_v1.wasm", + srcs = ["models/fp32-mobilenet-v1.cc"], + hdrs = ["models/models.h"], + copts = xnnpack_std_cxxopts(), +@@ -4930,7 +4942,7 @@ cc_library( + ) + + cc_library( +- name = "fp16_mobilenet_v1", ++ name = "fp16_mobilenet_v1.wasm", + srcs = ["models/fp16-mobilenet-v1.cc"], + hdrs = ["models/models.h"], + copts = xnnpack_std_cxxopts(), +@@ -4943,7 +4955,7 @@ cc_library( + ) + + cc_library( +- name = "qs8_mobilenet_v1", ++ name = "qs8_mobilenet_v1.wasm", + srcs = ["models/qs8-mobilenet-v1.cc"], + hdrs = ["models/models.h"], + copts = xnnpack_std_cxxopts(), +@@ -4955,7 +4967,7 @@ cc_library( + ) + + cc_library( +- name = "qs8_mobilenet_v2", ++ name = "qs8_mobilenet_v2.wasm", + srcs = ["models/qs8-mobilenet-v2.cc"], + hdrs = ["models/models.h"], + copts = xnnpack_std_cxxopts(), +@@ -4967,7 +4979,7 @@ cc_library( + ) + + cc_library( +- name = "fp32_mobilenet_v2", ++ name = "fp32_mobilenet_v2.wasm", + srcs = ["models/fp32-mobilenet-v2.cc"], + hdrs = ["models/models.h"], + copts = xnnpack_std_cxxopts(), +@@ -4979,7 +4991,7 @@ cc_library( + ) + + cc_library( +- name = "fp16_mobilenet_v2", ++ name = "fp16_mobilenet_v2.wasm", + srcs = ["models/fp16-mobilenet-v2.cc"], + hdrs = ["models/models.h"], + copts = xnnpack_std_cxxopts(), +@@ -4992,7 +5004,7 @@ cc_library( + ) + + cc_library( +- name = "fp32_mobilenet_v3_large", ++ name = "fp32_mobilenet_v3_large.wasm", + srcs = ["models/fp32-mobilenet-v3-large.cc"], + hdrs = ["models/models.h"], + copts = xnnpack_std_cxxopts(), +@@ -5004,7 +5016,7 @@ cc_library( + ) + + cc_library( +- name = "fp16_mobilenet_v3_large", ++ name = "fp16_mobilenet_v3_large.wasm", + srcs = ["models/fp16-mobilenet-v3-large.cc"], + hdrs = ["models/models.h"], + copts = xnnpack_std_cxxopts(), +@@ -5017,7 +5029,7 @@ cc_library( + ) + + cc_library( +- name = "fp32_mobilenet_v3_small", ++ name = "fp32_mobilenet_v3_small.wasm", + srcs = ["models/fp32-mobilenet-v3-small.cc"], + hdrs = ["models/models.h"], + copts = xnnpack_std_cxxopts(), +@@ -5029,7 +5041,7 @@ cc_library( + ) + + cc_library( +- name = "fp16_mobilenet_v3_small", ++ name = "fp16_mobilenet_v3_small.wasm", + srcs = ["models/fp16-mobilenet-v3-small.cc"], + hdrs = ["models/models.h"], + copts = xnnpack_std_cxxopts(), +@@ -5042,51 +5054,51 @@ cc_library( + ) + + xnnpack_benchmark( +- name = "f32_dwconv_e2e_bench", ++ name = "f32_dwconv_e2e_bench.wasm", + srcs = [ + "bench/f32-dwconv-e2e.cc", + "bench/end2end.h", + ] + MICROKERNEL_BENCHMARK_HDRS, + deps = MICROKERNEL_BENCHMARK_DEPS + [ + ":XNNPACK", +- ":fp32_mobilenet_v1", +- ":fp32_mobilenet_v2", +- ":fp32_mobilenet_v3_large", +- ":fp32_mobilenet_v3_small", ++ ":fp32_mobilenet_v1.wasm", ++ ":fp32_mobilenet_v2.wasm", ++ ":fp32_mobilenet_v3_large.wasm", ++ ":fp32_mobilenet_v3_small.wasm", + ], + ) + + xnnpack_benchmark( +- name = "f32_gemm_e2e_bench", ++ name = "f32_gemm_e2e_bench.wasm", + srcs = [ + "bench/f32-gemm-e2e.cc", + "bench/end2end.h", + ] + MICROKERNEL_BENCHMARK_HDRS, + deps = MICROKERNEL_BENCHMARK_DEPS + [ + ":XNNPACK", +- ":fp32_mobilenet_v1", +- ":fp32_mobilenet_v2", +- ":fp32_mobilenet_v3_large", +- ":fp32_mobilenet_v3_small", ++ ":fp32_mobilenet_v1.wasm", ++ ":fp32_mobilenet_v2.wasm", ++ ":fp32_mobilenet_v3_large.wasm", ++ ":fp32_mobilenet_v3_small.wasm", + ], + ) + + xnnpack_benchmark( +- name = "end2end_bench", ++ name = "end2end_bench.wasm", + srcs = ["bench/end2end.cc"], + deps = [ + ":XNNPACK", + ":bench_utils", +- ":fp16_mobilenet_v1", +- ":fp16_mobilenet_v2", +- ":fp16_mobilenet_v3_large", +- ":fp16_mobilenet_v3_small", +- ":fp32_mobilenet_v1", +- ":fp32_mobilenet_v2", +- ":fp32_mobilenet_v3_large", +- ":fp32_mobilenet_v3_small", +- ":qs8_mobilenet_v1", +- ":qs8_mobilenet_v2", ++ ":fp16_mobilenet_v1.wasm", ++ ":fp16_mobilenet_v2.wasm", ++ ":fp16_mobilenet_v3_large.wasm", ++ ":fp16_mobilenet_v3_small.wasm", ++ ":fp32_mobilenet_v1.wasm", ++ ":fp32_mobilenet_v2.wasm", ++ ":fp32_mobilenet_v3_large.wasm", ++ ":fp32_mobilenet_v3_small.wasm", ++ ":qs8_mobilenet_v1.wasm", ++ ":qs8_mobilenet_v2.wasm", + "@pthreadpool", + ], + ) +@@ -5094,7 +5106,7 @@ xnnpack_benchmark( + #################### Accuracy evaluation for math functions #################### + + xnnpack_benchmark( +- name = "f32_exp_eval", ++ name = "f32_exp_eval.wasm", + srcs = [ + "eval/f32-exp.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -5103,7 +5115,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_expminus_eval", ++ name = "f32_expminus_eval.wasm", + srcs = [ + "eval/f32-expminus.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -5112,7 +5124,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_extexp_eval", ++ name = "f32_extexp_eval.wasm", + srcs = [ + "eval/f32-extexp.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -5121,7 +5133,7 @@ xnnpack_benchmark( + ) + + xnnpack_unit_test( +- name = "f32_roundne_eval", ++ name = "f32_roundne_eval.wasm", + srcs = [ + "eval/f32-roundne.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -5132,7 +5144,7 @@ xnnpack_unit_test( + ) + + xnnpack_unit_test( +- name = "f32_roundd_eval", ++ name = "f32_roundd_eval.wasm", + srcs = [ + "eval/f32-roundd.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -5143,7 +5155,7 @@ xnnpack_unit_test( + ) + + xnnpack_unit_test( +- name = "f32_roundu_eval", ++ name = "f32_roundu_eval.wasm", + srcs = [ + "eval/f32-roundu.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -5154,7 +5166,7 @@ xnnpack_unit_test( + ) + + xnnpack_unit_test( +- name = "f32_roundz_eval", ++ name = "f32_roundz_eval.wasm", + srcs = [ + "eval/f32-roundz.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -5165,7 +5177,7 @@ xnnpack_unit_test( + ) + + xnnpack_benchmark( +- name = "f32_sigmoid_eval", ++ name = "f32_sigmoid_eval.wasm", + srcs = [ + "eval/f32-sigmoid.cc", + "src/xnnpack/AlignedAllocator.h", +@@ -5174,7 +5186,7 @@ xnnpack_benchmark( + ) + + xnnpack_benchmark( +- name = "f32_sqrt_eval", ++ name = "f32_sqrt_eval.wasm", + srcs = [ + "eval/f32-sqrt.cc", + "src/xnnpack/AlignedAllocator.h", +diff --git a/emscripten.bzl b/emscripten.bzl +index faad087..2b4763f 100644 +--- a/emscripten.bzl ++++ b/emscripten.bzl +@@ -4,30 +4,25 @@ def xnnpack_emscripten_minimal_linkopts(): + """Minimal Emscripten-specific linkopts for binaries.""" + return [ + "-s ASSERTIONS=0", +- "-s ERROR_ON_UNDEFINED_SYMBOLS=1", +- "-s EXIT_RUNTIME=1", ++ "-s ERROR_ON_UNDEFINED_SYMBOLS=0", + ] + + def xnnpack_emscripten_test_linkopts(): + """Emscripten-specific linkopts for unit tests.""" + return [ + "-s ASSERTIONS=2", +- "-s ERROR_ON_UNDEFINED_SYMBOLS=1", ++ "-s ERROR_ON_UNDEFINED_SYMBOLS=0", + "-s DEMANGLE_SUPPORT=1", +- "-s EXIT_RUNTIME=1", + "-s ALLOW_MEMORY_GROWTH=1", +- "--pre-js $(location :preamble.js.lds)", + ] + + def xnnpack_emscripten_benchmark_linkopts(): + """Emscripten-specific linkopts for benchmarks.""" + return [ + "-s ASSERTIONS=1", +- "-s ERROR_ON_UNDEFINED_SYMBOLS=1", +- "-s EXIT_RUNTIME=1", ++ "-s ERROR_ON_UNDEFINED_SYMBOLS=0", + "-s ALLOW_MEMORY_GROWTH=1", + "-s TOTAL_MEMORY=268435456", # 256M +- "--pre-js $(location :preamble.js.lds)", + ] + + def xnnpack_emscripten_deps(): diff --git a/samples/workload/cmake/toolchain.cmake b/samples/workload/cmake/toolchain.cmake index 4b9ae8fd..85591323 100644 --- a/samples/workload/cmake/toolchain.cmake +++ b/samples/workload/cmake/toolchain.cmake @@ -59,6 +59,8 @@ add_compile_options( $<$:-v> ) +# need users to create their own additional include files + ################ AR ################ find_program(LLVM_AR NAMES llvm-ar llvm-ar-11 REQUIRED) diff --git a/samples/workload/docker/Dockerfile b/samples/workload/docker/Dockerfile index 56f3adb2..cb96768c 100644 --- a/samples/workload/docker/Dockerfile +++ b/samples/workload/docker/Dockerfile @@ -13,6 +13,7 @@ ARG WASI_SDK_VER=11.0 ARG WABT_VER=1.0.19 ARG CMAKE_VER=3.16.2 ARG BINARYEN_VER=version_97 +ARG BAZEL_VER=3.7.0 # # install wasi-sdk @@ -66,6 +67,16 @@ RUN cd /opt \ && rm ${BINARYEN_FILE} \ && ln -sf /opt/binaryen-${BINARYEN_VER} /opt/binaryen +RUN apt install -y unzip zip + +# +# install bazel +ARG BAZEL_FILE=bazel-${BAZEL_VER}-installer-linux-x86_64.sh +COPY ${BAZEL_FILE} /tmp +RUN cd /tmp \ + && chmod a+x ${BAZEL_FILE} \ + && ./${BAZEL_FILE} + # # Clean up RUN apt-get autoremove -y \ diff --git a/samples/workload/docker/build.sh b/samples/workload/docker/build.sh index c73c5bce..d14606c8 100755 --- a/samples/workload/docker/build.sh +++ b/samples/workload/docker/build.sh @@ -13,6 +13,7 @@ WASI_SDK_VER=11.0 WABT_VER=1.0.19 CMAKE_VER=3.16.2 BINARYEN_VER=version_97 +BAZEL_VER=3.7.0 cd build_scripts if [[ ! -f wasi-sdk-${WASI_SDK_VER}-linux.tar.gz ]]; then @@ -34,6 +35,10 @@ fi if [[ ! -f binaryen-${BINARYEN_VER}-x86_64-linux.tar.gz ]]; then wget https://github.com/WebAssembly/binaryen/releases/download/${BINARYEN_VER}/binaryen-${BINARYEN_VER}-x86_64-linux.tar.gz fi + +if [[ ! -f bazel-${BAZEL_VER}-installer-linux-x86_64.sh ]]; then + wget https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VER}/bazel-${BAZEL_VER}-installer-linux-x86_64.sh +fi cd - docker build \ @@ -45,4 +50,5 @@ docker build \ --build-arg WABT_VER=${WABT_VER} \ --build-arg CMAKE_VER=${CMAKE_VER} \ --build-arg BINARYEN_VER=${BINARYEN_VER} \ + --build-arg BAZEL_VER=${BAZEL_VER} \ -t clang_env:0.1 -f Dockerfile build_scripts diff --git a/samples/workload/wasm-av1/.gitignore b/samples/workload/wasm-av1/.gitignore new file mode 100644 index 00000000..e8bec70e --- /dev/null +++ b/samples/workload/wasm-av1/.gitignore @@ -0,0 +1,8 @@ +# from CMakeLists +av1 +build +include + +# from build.sh +wasm-av1 +out diff --git a/samples/workload/wasm-av1/CMakeLists.avx_wasm.txt b/samples/workload/wasm-av1/CMakeLists.avx_wasm.txt new file mode 100644 index 00000000..78244b45 --- /dev/null +++ b/samples/workload/wasm-av1/CMakeLists.avx_wasm.txt @@ -0,0 +1,79 @@ +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +cmake_minimum_required (VERSION 3.0) + +project(testavx) + +# a workaround to let aom find our non-public headers +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include/libc) + +################ AOM ################ +set(ENABLE_CCACHE ON) +set(ENABLE_DOCS OFF CACHE BOOL "ENABLE_DOCS" FORCE) +set(ENABLE_EXAMPLES OFF CACHE BOOL "ENABLE_EXAMPLES" FORCE) +set(ENABLE_NEON OFF CACHE BOOL "ENABLE_EXAMPLES" FORCE) +set(ENABLE_NEON_ASM OFF CACHE BOOL "ENABLE_EXAMPLES" FORCE) +set(ENABLE_VSX OFF CACHE BOOL "ENABLE_EXAMPLES" FORCE) +set(ENABLE_MMX OFF CACHE BOOL "ENABLE_EXAMPLES" FORCE) +set(AOM_TARGET_CPU generic) +set(CONFIG_ACCOUNTING 1 CACHE NUMBER "" FORCE) +set(CONFIG_INSPECTION 1 CACHE NUMBER "" FORCE) +set(CONFIG_MULTITHREAD 0 CACHE NUMBER "" FORCE) +set(CONFIG_RUNTIME_CPU_DETECT 0 CACHE NUMBER "" FORCE) +set(CONFIG_UNIT_TESTS 0 CACHE NUMBER "" FORCE) +set(CONFIG_WEBM_IO 0 CACHE NUMBER "" FORCE) +add_subdirectory(third_party/aom third_party/aom/bin EXCLUDE_FROM_ALL) + +################ AV ################ +add_executable(${PROJECT_NAME} + test.c + decode-av1.c +) + +target_include_directories(${PROJECT_NAME} + PRIVATE + third_party/aom/ + ${CMAKE_CURRENT_BINARY_DIR}/third_party/aom/bin +) + +set_target_properties(${PROJECT_NAME} + PROPERTIES + OUTPUT_NAME ${PROJECT_NAME}.wasm +) + +target_link_options(${PROJECT_NAME} + PRIVATE + LINKER:--allow-undefined + LINKER:--export=__heap_base + LINKER:--export=__data_end + LINKER:--initial-memory=33554432 + LINKER:-z,stack-size=25165824 +) + +target_link_libraries(${PROJECT_NAME} + PRIVATE + aom +) + +add_dependencies(${PROJECT_NAME} aom) + +find_program(WASM_OPT + NAMES wasm-opt + PATHS /opt/binaryen-version_97/bin /opt/binaryen/bin +) + +if (NOT WASM_OPT) + message(WARNING "can not find wasm-opt and will not optimize any wasm module") +endif() + +add_custom_target(${PROJECT_NAME}_opt ALL + COMMAND + ${WASM_OPT} -Oz --enable-simd -o ${PROJECT_NAME}.opt.wasm ${PROJECT_NAME}.wasm + BYPRODUCTS + ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.opt.wasm + WORKING_DIRECTORY + ${CMAKE_CURRENT_BINARY_DIR} +) + +add_dependencies(${PROJECT_NAME}_opt ${PROJECT_NAME}) diff --git a/samples/workload/wasm-av1/CMakeLists.txt b/samples/workload/wasm-av1/CMakeLists.txt new file mode 100644 index 00000000..ae33e6ff --- /dev/null +++ b/samples/workload/wasm-av1/CMakeLists.txt @@ -0,0 +1,61 @@ +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +cmake_minimum_required (VERSION 3.0) + +project(av1_wasm) + +################ BINARYEN ################ +find_program(WASM_OPT + NAMES wasm-opt + PATHS /opt/binaryen-version_97/bin /opt/binaryen/bin +) + +if (NOT WASM_OPT) + message(FATAL_ERROR + "can not find wasm-opt. " + "please download it from " + "https://github.com/WebAssembly/binaryen/releases/download/version_97/binaryen-version_97-x86_64-linux.tar.gz " + "and install it under /opt" + ) +endif() + +####################################### +include(ExternalProject) + +################ HEADERS ################ +ExternalProject_Add(headers_from_emcc + PREFIX headers + SOURCE_DIR "$ENV{EMSDK}/upstream/emscripten/system/" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/sys + && ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_SOURCE_DIR}/include/libc/bits + # copy emscripten pthread related header files + && ${CMAKE_COMMAND} -E copy $ENV{EMSDK}/upstream/emscripten/system/include/libc/pthread.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/ + && ${CMAKE_COMMAND} -E copy $ENV{EMSDK}/upstream/emscripten/system/include/libc/signal.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/ + && ${CMAKE_COMMAND} -E copy $ENV{EMSDK}/upstream/emscripten/system/include/libc/netdb.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/ + && ${CMAKE_COMMAND} -E copy $ENV{EMSDK}/upstream/emscripten/system/include/libc/sys/wait.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/sys/ + && ${CMAKE_COMMAND} -E copy $ENV{EMSDK}/upstream/emscripten/system/include/libc/sys/socket.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/sys/ + # copy emscripten setjmp headers + && ${CMAKE_COMMAND} -E copy $ENV{EMSDK}/upstream/emscripten/system/include/libc/setjmp.h ${CMAKE_CURRENT_SOURCE_DIR}/include/libc/setjmp.h + && ${CMAKE_COMMAND} -E copy $ENV{EMSDK}/upstream/emscripten/system/lib/libc/musl/arch/emscripten/bits/setjmp.h ${CMAKE_CURRENT_SOURCE_DIR}/include/libc/bits/setjmp.h +) + +################ av1 ################ +ExternalProject_Add(av1 + PREFIX av1 + GIT_REPOSITORY https://github.com/GoogleChromeLabs/wasm-av1.git + GIT_TAG master + GIT_PROGRESS ON + GIT_SHALLOW ON + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/av1 + DEPENDS headers_from_emcc + UPDATE_COMMAND git clean -fd && git checkout -- * + && ${CMAKE_COMMAND} -E echo "Copying pre-installed CMakeLists.txt" + && ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.avx_wasm.txt CMakeLists.txt + && git apply ../av1-clang.patch + CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_CURRENT_SOURCE_DIR}/../cmake/toolchain.cmake ${CMAKE_CURRENT_SOURCE_DIR}/av1 + BUILD_COMMAND make testavx_opt + INSTALL_COMMAND ${CMAKE_COMMAND} -E copy testavx.opt.wasm ${CMAKE_CURRENT_BINARY_DIR}/testavx.wasm +) diff --git a/samples/workload/wasm-av1/README.md b/samples/workload/wasm-av1/README.md index 30cb6ca5..68dad177 100644 --- a/samples/workload/wasm-av1/README.md +++ b/samples/workload/wasm-av1/README.md @@ -1,22 +1,56 @@ "wasm-av1" sample introduction ============== -This sample demonstrates how to build [wasm-av1](https://github.com/GoogleChromeLabs/wasm-av1) into WebAssembly with emcc toolchain and run it with iwasm. Please first install [emsdk](https://github.com/emscripten-core/emsdk): -```bash -git clone https://github.com/emscripten-core/emsdk.git -cd emsdk -./emsdk install latest -./emsdk activate latest -``` -And set up ensdk environment: -```bash -source emsdk_env.sh -``` -Then run + +This sample demonstrates how to build [wasm-av1](https://github.com/GoogleChromeLabs/wasm-av1) into +WebAssembly with simd support and run it with iwasm. + +## Preparation + +please refer to [installation instructions](../README.md). + +## Build with EMSDK + +just run the convenience script: + ```bash ./build.sh ``` -to build wasm-av1 and run it with iwasm, which basically contains the following steps: + +it is going to build wasm-av1 and run it with iwasm, which basically contains the following steps: - hack emcc to delete some objects in libc.a - patch wasm-av1 and build it with emcc compiler - build iwasm with simd and libc-emcc support - run testav1.aot with iwasm + +## Or build with clang-11 and wasi-sdk + +``` shell +$ mkdir build && cd build +$ cmake .. +$ make +# to verify +$ ls testavx.wasm +``` + +### Run workload + +Firstly please build iwasm with simd support: + +``` shell +$ cd /product-mini/platforms/linux/ +$ mkdir build && cd build +$ cmake .. -DWAMR_BUILD_SIMD=1 +$ make +``` + +Then compile wasm file to aot file and run: + +``` shell +$ cd /wamr-compiler/build +$ ./wamrc --enable-simd -o testavx.aot testavx.wasm +$ cd /product-mini/platforms/linux/ +$ # copy sample data like /samples/workload/wasm-av1/av1/third_party/samples/elephants_dream_480p24.ivf +$ # copy testavx.aot +$ # make sure you declare the access priority of the directory in which the sample data is +$ ./iwasm --dir=. ./testavx.aot ./elephants_dream_480p24.ivf +``` \ No newline at end of file diff --git a/samples/workload/wasm-av1/av1-clang.patch b/samples/workload/wasm-av1/av1-clang.patch new file mode 100644 index 00000000..97e79548 --- /dev/null +++ b/samples/workload/wasm-av1/av1-clang.patch @@ -0,0 +1,19 @@ +diff --git a/test.c b/test.c +index df2d44b..520bf13 100644 +--- a/test.c ++++ b/test.c +@@ -63,9 +63,14 @@ main(int argc, char *argv[]) { + static int i = 0; + + ++i; ++ printf("Decoding frame #%d\n", i); + if (30 <= i && i < 40) { ++ printf("Dumping frame #%d\n", i); + dump_raw_frame(af, i); + } ++ if (i >= 1000) { ++ break; ++ } + } + /* + * Run the decoder every time, so that we keep