Implement SIMD latest opcodes and update LLVM to 13.0 (#758)

Implement the latest SIMD opcodes and update LLVM 13.0,
update the llvm build scripts, update the sample workloads‘ build scripts,
and build customized wasi-sdk to build some workloads.
Also refine the CI rules.

Signed-off-by: Wenyong Huang <wenyong.huang@intel.com>
This commit is contained in:
Wenyong Huang
2021-09-17 19:12:57 +08:00
committed by GitHub
parent 7e60a5db8d
commit 7be0d385a6
82 changed files with 5266 additions and 4698 deletions

View File

@ -6,85 +6,87 @@ cmake_minimum_required (VERSION 3.0)
project(xnnpack_wasm)
################ EMCC ################
if(NOT DEFINED ENV{EMSDK})
message(SEND_ERROR
"can not find emsdk. "
"please refer to https://emscripten.org/docs/getting_started/downloads.html "
"and install it, "
"or active emsdk by 'source ./emsdk_env.sh'"
)
endif()
include(ExternalProject)
ExternalProject_Add(xnnpack
PREFIX xnnpack
GIT_REPOSITORY https://github.com/google/XNNPACK.git
GIT_TAG 90f520b6482bb99ac1bbfb71be1382f6c9b83241
GIT_TAG master
GIT_PROGRESS ON
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack
UPDATE_COMMAND git checkout .
&& cmake -E copy ${CMAKE_CURRENT_SOURCE_DIR}/benchmark.patch ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack/third_party
&& git apply ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack.patch
CONFIGURE_COMMAND ""
# grep xnnpack_benchmark -A 1 BUILD.bazel \
# | grep "name =" \
# | awk '{print $3}' \
# | sed -e 's/\"//g' -e 's/,//g' -e 's/^/\/\/:/g'
BUILD_COMMAND cd ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack
&& bazel --output_user_root=build_user_output build -c opt --config=wasm
//:qs8_gemm_bench.wasm
//:qs8_requantization_bench.wasm
//:qu8_gemm_bench.wasm
//:qu8_requantization_bench.wasm
//:f16_igemm_bench.wasm
//:f16_gemm_bench.wasm
//:f16_spmm_bench.wasm
//:f32_igemm_bench.wasm
//:f16_relu_bench.wasm
//:f32_conv_hwc_bench.wasm
//:f32_conv_hwc2chw_bench.wasm
//:f16_dwconv_bench.wasm
//:f32_dwconv_bench.wasm
//:f32_dwconv2d_chw_bench.wasm
//:f32_gemm_bench.wasm
//:f32_hswish_bench.wasm
//:f32_raddexpminusmax_bench.wasm
//:f32_raddextexp_bench.wasm
//:f32_raddstoreexpminusmax_bench.wasm
//:f32_relu_bench.wasm
//:f32_rmax_bench.wasm
//:f32_sigmoid_bench.wasm
//:f32_spmm_bench.wasm
//:f32_softmax_bench.wasm
//:f32_velu_bench.wasm
//:f32_vscaleexpminusmax_bench.wasm
//:f32_vscaleextexp_bench.wasm
//:f32_vsqrt_bench.wasm
//:f32_im2col_gemm_bench.wasm
//:rounding_bench.wasm
//:average_pooling_bench.wasm
//:bankers_rounding_bench.wasm
//:ceiling_bench.wasm
//:channel_shuffle_bench.wasm
//:convolution_bench.wasm
//:deconvolution_bench.wasm
//:elu_bench.wasm
//:floor_bench.wasm
//:global_average_pooling_bench.wasm
//:hardswish_bench.wasm
//:max_pooling_bench.wasm
//:sigmoid_bench.wasm
//:prelu_bench.wasm
//:softmax_bench.wasm
//:square_root_bench.wasm
//:truncation_bench.wasm
//:f32_dwconv_e2e_bench.wasm
//:f32_gemm_e2e_bench.wasm
//:qs8_gemm_e2e_bench.wasm
//:end2end_bench.wasm
//:f32_exp_ulp_eval.wasm
//:f32_expminus_ulp_eval.wasm
//:f32_expm1minus_ulp_eval.wasm
//:f32_extexp_ulp_eval.wasm
//:f32_sigmoid_ulp_eval.wasm
//:f32_sqrt_ulp_eval.wasm
#--sandbox_debug
//:qs8_dwconv_bench.wasm
//:qs8_gemm_bench.wasm
//:qs8_requantization_bench.wasm
//:qs8_vadd_bench.wasm
//:qs8_vaddc_bench.wasm
//:qu8_gemm_bench.wasm
//:qu8_requantization_bench.wasm
//:qu8_vadd_bench.wasm
//:qu8_vaddc_bench.wasm
//:f16_igemm_bench.wasm
//:f16_gemm_bench.wasm
//:f16_spmm_bench.wasm
//:f16_vrelu_bench.wasm
//:f32_igemm_bench.wasm
//:f32_conv_hwc_bench.wasm
//:f32_conv_hwc2chw_bench.wasm
//:f16_dwconv_bench.wasm
//:f32_dwconv_bench.wasm
//:f32_dwconv2d_chw_bench.wasm
//:f32_gemm_bench.wasm
//:f32_raddexpminusmax_bench.wasm
//:f32_raddextexp_bench.wasm
//:f32_raddstoreexpminusmax_bench.wasm
//:f32_rmax_bench.wasm
//:f32_spmm_bench.wasm
//:f32_softmax_bench.wasm
//:f32_velu_bench.wasm
//:f32_vhswish_bench.wasm
//:f32_vrelu_bench.wasm
//:f32_vscaleexpminusmax_bench.wasm
//:f32_vscaleextexp_bench.wasm
//:f32_vsigmoid_bench.wasm
//:f32_vsqrt_bench.wasm
//:f32_im2col_gemm_bench.wasm
//:rounding_bench.wasm
//:average_pooling_bench.wasm
//:bankers_rounding_bench.wasm
//:ceiling_bench.wasm
//:channel_shuffle_bench.wasm
//:convolution_bench.wasm
//:deconvolution_bench.wasm
//:elu_bench.wasm
//:floor_bench.wasm
//:global_average_pooling_bench.wasm
//:hardswish_bench.wasm
//:max_pooling_bench.wasm
//:sigmoid_bench.wasm
//:prelu_bench.wasm
//:softmax_bench.wasm
//:square_root_bench.wasm
//:truncation_bench.wasm
//:f32_dwconv_e2e_bench.wasm
//:f32_gemm_e2e_bench.wasm
//:qs8_dwconv_e2e_bench.wasm
//:qs8_gemm_e2e_bench.wasm
//:qu8_dwconv_e2e_bench.wasm
//:end2end_bench.wasm
//:f32_exp_ulp_eval.wasm
//:f32_expminus_ulp_eval.wasm
//:f32_expm1minus_ulp_eval.wasm
//:f32_extexp_ulp_eval.wasm
//:f32_sigmoid_ulp_eval.wasm
//:f32_sqrt_ulp_eval.wasm
INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_CURRENT_SOURCE_DIR}/xnnpack/bazel-out/wasm-opt/bin/
${CMAKE_CURRENT_SOURCE_DIR}/build/wasm-opt

View File

@ -5,20 +5,7 @@ This sample demonstrates how to build [XNNPACK](https://github.com/google/XNNPAC
## Installation toolchains
- **bazel**. Please install bazel from [latest release](https://github.com/bazelbuild/bazel/releases)
- **emsdk**. Please install [emsdk](https://github.com/emscripten-core/emsdk) to /opt/emsdk:
```bash
cd /opt
git clone https://github.com/emscripten-core/emsdk.git
cd emsdk
./emsdk install latest
./emsdk activate latest
```
And set up ensdk environment:
```bash
source /opt/emsdk/emsdk_env.sh
```
please refer to [installation instructions](../README.md).
## Build XNNPACK

View File

@ -0,0 +1,14 @@
diff --git include/benchmark/benchmark.h include/benchmark/benchmark.h
index 9b54802..baa5938 100755
--- include/benchmark/benchmark.h
+++ include/benchmark/benchmark.h
@@ -364,7 +364,9 @@ template <class Tp>
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
}
+
// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers
+inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { }
#endif
// This class is used for user-defined counters.

View File

@ -0,0 +1 @@
../docker/build_workload.sh

View File

@ -1 +0,0 @@
../docker/docker_build.sh

View File

@ -1,8 +1,8 @@
diff --git a/.bazelrc b/.bazelrc
index ec740f38..2c193244 100644
index ec740f38..29f9d56e 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -49,4 +49,10 @@ build:ios_fat --watchos_cpus=armv7k
@@ -49,4 +49,9 @@ build:ios_fat --watchos_cpus=armv7k
build:macos --apple_platform_type=macos
build:macos_arm64 --config=macos
@ -10,558 +10,31 @@ index ec740f38..2c193244 100644
\ No newline at end of file
+build:macos_arm64 --cpu=darwin_arm64
+
+build:wasm --copt=-msimd128
+build:wasm --cpu=wasm
+build:wasm --copt=-msimd128
+build:wasm --crosstool_top=@emsdk//emscripten_toolchain:everything
+build:wasm --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
+
diff --git a/BUILD.bazel b/BUILD.bazel
index 1f2b15a8..e7abf838 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -4996,7 +4996,7 @@ xnnpack_cc_library(
######################### Benchmarks for micro-kernels #########################
xnnpack_benchmark(
- name = "qs8_gemm_bench",
+ name = "qs8_gemm_bench.wasm",
srcs = [
"bench/gemm.h",
"bench/qs8-gemm.cc",
@@ -5007,7 +5007,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "qs8_requantization_bench",
+ name = "qs8_requantization_bench.wasm",
srcs = [
"bench/qs8-requantization.cc",
"src/xnnpack/AlignedAllocator.h",
@@ -5017,7 +5017,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "qu8_gemm_bench",
+ name = "qu8_gemm_bench.wasm",
srcs = [
"bench/gemm.h",
"bench/qu8-gemm.cc",
@@ -5028,7 +5028,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "qu8_requantization_bench",
+ name = "qu8_requantization_bench.wasm",
srcs = [
"bench/qu8-requantization.cc",
"src/xnnpack/AlignedAllocator.h",
@@ -5038,11 +5038,10 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f16_igemm_bench",
+ name = "f16_igemm_bench.wasm",
srcs = [
"bench/f16-igemm.cc",
"bench/conv.h",
- "bench/google/conv.h",
"src/xnnpack/AlignedAllocator.h",
] + MICROKERNEL_BENCHMARK_HDRS,
deps = MICROKERNEL_BENCHMARK_DEPS + [
@@ -5052,7 +5051,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f16_gemm_bench",
+ name = "f16_gemm_bench.wasm",
srcs = [
"bench/f16-gemm.cc",
"bench/gemm.h",
@@ -5064,7 +5063,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f16_spmm_bench",
+ name = "f16_spmm_bench.wasm",
srcs = [
"bench/f16-spmm.cc",
"bench/spmm.h",
@@ -5074,7 +5073,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_igemm_bench",
+ name = "f32_igemm_bench.wasm",
srcs = [
"bench/f32-igemm.cc",
"bench/conv.h",
@@ -5087,7 +5086,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f16_relu_bench",
+ name = "f16_relu_bench.wasm",
srcs = [
"bench/f16-relu.cc",
"src/xnnpack/AlignedAllocator.h",
@@ -5096,7 +5095,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_conv_hwc_bench",
+ name = "f32_conv_hwc_bench.wasm",
srcs = [
"bench/f32-conv-hwc.cc",
"bench/dconv.h",
@@ -5108,7 +5107,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_conv_hwc2chw_bench",
+ name = "f32_conv_hwc2chw_bench.wasm",
srcs = [
"bench/f32-conv-hwc2chw.cc",
"bench/dconv.h",
@@ -5120,11 +5119,10 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f16_dwconv_bench",
+ name = "f16_dwconv_bench.wasm",
srcs = [
"bench/f16-dwconv.cc",
"bench/dwconv.h",
- "bench/google/dwconv.h",
"src/xnnpack/AlignedAllocator.h",
] + MICROKERNEL_BENCHMARK_HDRS,
deps = MICROKERNEL_BENCHMARK_DEPS + [
@@ -5134,7 +5132,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_dwconv_bench",
+ name = "f32_dwconv_bench.wasm",
srcs = [
"bench/f32-dwconv.cc",
"bench/dwconv.h",
@@ -5147,7 +5145,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_dwconv2d_chw_bench",
+ name = "f32_dwconv2d_chw_bench.wasm",
srcs = [
"bench/f32-dwconv2d-chw.cc",
"bench/dwconv.h",
@@ -5160,7 +5158,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_gemm_bench",
+ name = "f32_gemm_bench.wasm",
srcs = [
"bench/f32-gemm.cc",
"bench/gemm.h",
@@ -5171,7 +5169,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_hswish_bench",
+ name = "f32_hswish_bench.wasm",
srcs = [
"bench/f32-hswish.cc",
"src/xnnpack/AlignedAllocator.h",
@@ -5180,7 +5178,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_raddexpminusmax_bench",
+ name = "f32_raddexpminusmax_bench.wasm",
srcs = [
"bench/f32-raddexpminusmax.cc",
"src/xnnpack/AlignedAllocator.h",
@@ -5189,7 +5187,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_raddextexp_bench",
+ name = "f32_raddextexp_bench.wasm",
srcs = [
"bench/f32-raddextexp.cc",
"src/xnnpack/AlignedAllocator.h",
@@ -5198,7 +5196,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_raddstoreexpminusmax_bench",
+ name = "f32_raddstoreexpminusmax_bench.wasm",
srcs = [
"bench/f32-raddstoreexpminusmax.cc",
"src/xnnpack/AlignedAllocator.h",
@@ -5207,7 +5205,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_relu_bench",
+ name = "f32_relu_bench.wasm",
srcs = [
"bench/f32-relu.cc",
"src/xnnpack/AlignedAllocator.h",
@@ -5216,7 +5214,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_rmax_bench",
+ name = "f32_rmax_bench.wasm",
srcs = [
"bench/f32-rmax.cc",
"src/xnnpack/AlignedAllocator.h",
@@ -5225,7 +5223,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_sigmoid_bench",
+ name = "f32_sigmoid_bench.wasm",
srcs = [
"bench/f32-sigmoid.cc",
"src/xnnpack/AlignedAllocator.h",
@@ -5234,7 +5232,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_spmm_bench",
+ name = "f32_spmm_bench.wasm",
srcs = [
"bench/f32-spmm.cc",
"bench/spmm.h",
@@ -5244,7 +5242,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_softmax_bench",
+ name = "f32_softmax_bench.wasm",
srcs = [
"bench/f32-softmax.cc",
] + MICROKERNEL_BENCHMARK_HDRS,
@@ -5253,7 +5251,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_velu_bench",
+ name = "f32_velu_bench.wasm",
srcs = [
"bench/f32-velu.cc",
"src/xnnpack/AlignedAllocator.h",
@@ -5262,7 +5260,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_vscaleexpminusmax_bench",
+ name = "f32_vscaleexpminusmax_bench.wasm",
srcs = [
"bench/f32-vscaleexpminusmax.cc",
"src/xnnpack/AlignedAllocator.h",
@@ -5271,7 +5269,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_vscaleextexp_bench",
+ name = "f32_vscaleextexp_bench.wasm",
srcs = [
"bench/f32-vscaleextexp.cc",
"src/xnnpack/AlignedAllocator.h",
@@ -5280,7 +5278,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_vsqrt_bench",
+ name = "f32_vsqrt_bench.wasm",
srcs = [
"bench/f32-vsqrt.cc",
"src/xnnpack/AlignedAllocator.h",
@@ -5289,7 +5287,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_im2col_gemm_bench",
+ name = "f32_im2col_gemm_bench.wasm",
srcs = [
"bench/f32-im2col-gemm.cc",
"bench/conv.h",
@@ -5302,7 +5300,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "rounding_bench",
+ name = "rounding_bench.wasm",
srcs = [
"bench/rounding.cc",
"src/xnnpack/AlignedAllocator.h",
@@ -5314,7 +5312,7 @@ xnnpack_benchmark(
########################### Benchmarks for operators ###########################
xnnpack_benchmark(
- name = "average_pooling_bench",
+ name = "average_pooling_bench.wasm",
srcs = ["bench/average-pooling.cc"],
copts = xnnpack_optional_tflite_copts(),
tags = ["nowin32"],
@@ -5322,7 +5320,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "bankers_rounding_bench",
+ name = "bankers_rounding_bench.wasm",
srcs = ["bench/bankers-rounding.cc"],
copts = xnnpack_optional_tflite_copts(),
tags = ["nowin32"],
@@ -5330,7 +5328,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "ceiling_bench",
+ name = "ceiling_bench.wasm",
srcs = ["bench/ceiling.cc"],
copts = xnnpack_optional_tflite_copts(),
tags = ["nowin32"],
@@ -5338,13 +5336,13 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "channel_shuffle_bench",
+ name = "channel_shuffle_bench.wasm",
srcs = ["bench/channel-shuffle.cc"],
deps = OPERATOR_BENCHMARK_DEPS,
)
xnnpack_benchmark(
- name = "convolution_bench",
+ name = "convolution_bench.wasm",
srcs = ["bench/convolution.cc"],
copts = xnnpack_optional_tflite_copts() + xnnpack_optional_armcl_copts(),
tags = ["nowin32"],
@@ -5352,7 +5350,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "deconvolution_bench",
+ name = "deconvolution_bench.wasm",
srcs = ["bench/deconvolution.cc"],
copts = xnnpack_optional_tflite_copts(),
tags = ["nowin32"],
@@ -5360,7 +5358,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "elu_bench",
+ name = "elu_bench.wasm",
srcs = ["bench/elu.cc"],
copts = xnnpack_optional_tflite_copts(),
tags = ["nowin32"],
@@ -5368,7 +5366,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "floor_bench",
+ name = "floor_bench.wasm",
srcs = ["bench/floor.cc"],
copts = xnnpack_optional_tflite_copts(),
tags = ["nowin32"],
@@ -5376,13 +5374,13 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "global_average_pooling_bench",
+ name = "global_average_pooling_bench.wasm",
srcs = ["bench/global-average-pooling.cc"],
deps = OPERATOR_BENCHMARK_DEPS,
)
xnnpack_benchmark(
- name = "hardswish_bench",
+ name = "hardswish_bench.wasm",
srcs = ["bench/hardswish.cc"],
copts = xnnpack_optional_tflite_copts(),
tags = ["nowin32"],
@@ -5390,13 +5388,13 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "max_pooling_bench",
+ name = "max_pooling_bench.wasm",
srcs = ["bench/max-pooling.cc"],
deps = OPERATOR_BENCHMARK_DEPS,
)
xnnpack_benchmark(
- name = "sigmoid_bench",
+ name = "sigmoid_bench.wasm",
srcs = ["bench/sigmoid.cc"],
copts = xnnpack_optional_tflite_copts(),
tags = ["nowin32"],
@@ -5404,7 +5402,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "prelu_bench",
+ name = "prelu_bench.wasm",
srcs = ["bench/prelu.cc"],
copts = xnnpack_optional_tflite_copts(),
tags = ["nowin32"],
@@ -5412,7 +5410,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "softmax_bench",
+ name = "softmax_bench.wasm",
srcs = ["bench/softmax.cc"],
copts = xnnpack_optional_tflite_copts(),
tags = ["nowin32"],
@@ -5420,7 +5418,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "square_root_bench",
+ name = "square_root_bench.wasm",
srcs = ["bench/square-root.cc"],
copts = xnnpack_optional_tflite_copts(),
tags = ["nowin32"],
@@ -5428,7 +5426,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "truncation_bench",
+ name = "truncation_bench.wasm",
srcs = ["bench/truncation.cc"],
deps = OPERATOR_BENCHMARK_DEPS,
)
@@ -5620,7 +5618,7 @@ cc_library(
)
xnnpack_benchmark(
- name = "f32_dwconv_e2e_bench",
+ name = "f32_dwconv_e2e_bench.wasm",
srcs = [
"bench/f32-dwconv-e2e.cc",
"bench/end2end.h",
@@ -5635,7 +5633,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_gemm_e2e_bench",
+ name = "f32_gemm_e2e_bench.wasm",
srcs = [
"bench/f32-gemm-e2e.cc",
"bench/end2end.h",
@@ -5650,7 +5648,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "qs8_gemm_e2e_bench",
+ name = "qs8_gemm_e2e_bench.wasm",
srcs = [
"bench/qs8-gemm-e2e.cc",
"bench/end2end.h",
@@ -5663,7 +5661,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "end2end_bench",
+ name = "end2end_bench.wasm",
srcs = ["bench/end2end.cc"],
deps = [
":XNNPACK",
@@ -5690,7 +5688,7 @@ xnnpack_benchmark(
#################### Accuracy evaluation for math functions ####################
xnnpack_benchmark(
- name = "f32_exp_ulp_eval",
+ name = "f32_exp_ulp_eval.wasm",
srcs = [
"eval/f32-exp-ulp.cc",
"src/xnnpack/AlignedAllocator.h",
@@ -5702,7 +5700,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_expminus_ulp_eval",
+ name = "f32_expminus_ulp_eval.wasm",
srcs = [
"eval/f32-expminus-ulp.cc",
"src/xnnpack/AlignedAllocator.h",
@@ -5714,7 +5712,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_expm1minus_ulp_eval",
+ name = "f32_expm1minus_ulp_eval.wasm",
srcs = [
"eval/f32-expm1minus-ulp.cc",
"src/xnnpack/AlignedAllocator.h",
@@ -5726,7 +5724,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_extexp_ulp_eval",
+ name = "f32_extexp_ulp_eval.wasm",
srcs = [
"eval/f32-extexp-ulp.cc",
"src/xnnpack/AlignedAllocator.h",
@@ -5738,7 +5736,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_sigmoid_ulp_eval",
+ name = "f32_sigmoid_ulp_eval.wasm",
srcs = [
"eval/f32-sigmoid-ulp.cc",
"src/xnnpack/AlignedAllocator.h",
@@ -5750,7 +5748,7 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
- name = "f32_sqrt_ulp_eval",
+ name = "f32_sqrt_ulp_eval.wasm",
srcs = [
"eval/f32-sqrt-ulp.cc",
"src/xnnpack/AlignedAllocator.h",
diff --git a/WORKSPACE b/WORKSPACE
index 4fa1aa2f..6181aab2 100644
index c58e76b6..30934678 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -5,8 +5,8 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
# Bazel rule definitions
http_archive(
name = "rules_cc",
- strip_prefix = "rules_cc-master",
- urls = ["https://github.com/bazelbuild/rules_cc/archive/master.zip"],
+ strip_prefix = "rules_cc-main",
+ urls = ["https://github.com/bazelbuild/rules_cc/archive/main.zip"],
)
# Google Test framework, used by most unit-tests.
@@ -19,8 +19,8 @@ http_archive(
# Google Benchmark library, used in micro-benchmarks.
http_archive(
@@ -21,6 +21,7 @@ http_archive(
name = "com_google_benchmark",
- strip_prefix = "benchmark-master",
- urls = ["https://github.com/google/benchmark/archive/master.zip"],
+ strip_prefix = "benchmark-1.5.3",
+ urls = ["https://github.com/google/benchmark/archive/refs/tags/v1.5.3.zip"],
strip_prefix = "benchmark-master",
urls = ["https://github.com/google/benchmark/archive/master.zip"],
+ patches = ["@//third_party:benchmark.patch"],
)
# FP16 library, used for half-precision conversions
@@ -89,3 +89,18 @@ android_ndk_repository(name = "androidndk")
@@ -84,6 +85,19 @@ http_archive(
],
)
# Android SDK location and API is auto-detected from $ANDROID_HOME environment variable
android_sdk_repository(name = "androidsdk")
+
+# emscripten library
+http_archive(
+ name = "emsdk",
+ strip_prefix = "emsdk-c1589b55641787d55d53e883852035beea9aec3f/bazel",
+ url = "https://github.com/emscripten-core/emsdk/archive/c1589b55641787d55d53e883852035beea9aec3f.tar.gz",
+ sha256 = "7a58a9996b113d3e0675df30b5f17e28aa47de2e684a844f05394fe2f6f12e8e",
+ name = "emsdk",
+ strip_prefix = "emsdk-2.0.26/bazel",
+ url = "https://github.com/emscripten-core/emsdk/archive/refs/tags/2.0.26.tar.gz",
+ sha256 = "79e7166aa8eaae6e52cef1363b2d8db795d03684846066bc51f9dcf905dd58ad",
+)
+
+load("@emsdk//:deps.bzl", emsdk_deps = "deps")
@ -570,67 +43,42 @@ index 4fa1aa2f..6181aab2 100644
+load("@emsdk//:emscripten_deps.bzl", emsdk_emscripten_deps = "emscripten_deps")
+emsdk_emscripten_deps()
+
# Android NDK location and version is auto-detected from $ANDROID_NDK_HOME environment variable
android_ndk_repository(name = "androidndk")
diff --git a/build_defs.bzl b/build_defs.bzl
index 10345032..0e926fca 100644
index 2442bed1..b860dfef 100644
--- a/build_defs.bzl
+++ b/build_defs.bzl
@@ -1,6 +1,6 @@
"""Build definitions and rules for XNNPACK."""
-load(":emscripten.bzl", "xnnpack_emscripten_benchmark_linkopts", "xnnpack_emscripten_deps", "xnnpack_emscripten_minimal_linkopts", "xnnpack_emscripten_test_linkopts")
+load(":emscripten.bzl", "xnnpack_emscripten_benchmark_linkopts", "xnnpack_emscripten_deps", "xnnpack_emscripten_minimal_linkopts", "xnnpack_emscripten_test_linkopts", "xnnpack_emscripten_benchmark_copts")
def xnnpack_visibility():
"""Visibility of :XNNPACK target.
@@ -424,10 +424,15 @@ def xnnpack_benchmark(name, srcs, copts = [], deps = [], tags = []):
":windows_x86_64_mingw": ["-Wno-unused-function"],
":windows_x86_64_msys": ["-Wno-unused-function"],
":windows_x86_64": [],
+ ":emscripten": xnnpack_emscripten_benchmark_copts(),
+ ":emscripten_wasm": xnnpack_emscripten_benchmark_copts(),
+ ":emscripten_wasmsimd": xnnpack_emscripten_benchmark_copts(),
"//conditions:default": ["-Wno-unused-function"],
}) + copts,
linkopts = select({
":emscripten": xnnpack_emscripten_benchmark_linkopts(),
+ ":emscripten_wasm": xnnpack_emscripten_benchmark_linkopts(),
+ ":emscripten_wasmsimd": xnnpack_emscripten_benchmark_linkopts(),
":windows_x86_64_mingw": ["-lshlwapi"],
":windows_x86_64_msys": ["-lshlwapi"],
"//conditions:default": [],
@@ -414,7 +414,7 @@ def xnnpack_benchmark(name, srcs, copts = [], deps = [], tags = []):
explicitly specified.
"""
native.cc_binary(
- name = name,
+ name = name + ".wasm",
srcs = srcs,
copts = xnnpack_std_cxxopts() + [
"-Iinclude",
diff --git a/emscripten.bzl b/emscripten.bzl
index 0a0caedf..aafe3199 100644
index 130d5f16..2696ad54 100644
--- a/emscripten.bzl
+++ b/emscripten.bzl
@@ -6,6 +6,7 @@ def xnnpack_emscripten_minimal_linkopts():
"-s ASSERTIONS=0",
"-s ERROR_ON_UNDEFINED_SYMBOLS=1",
"-s EXIT_RUNTIME=1",
+ "--oformat=wasm",
]
def xnnpack_emscripten_test_linkopts():
@@ -17,21 +18,36 @@ def xnnpack_emscripten_test_linkopts():
"-s EXIT_RUNTIME=1",
"-s ALLOW_MEMORY_GROWTH=1",
"--pre-js $(location :preamble.js.lds)",
+ "--oformat=wasm",
]
def xnnpack_emscripten_benchmark_linkopts():
@@ -25,12 +25,19 @@ def xnnpack_emscripten_benchmark_linkopts():
"""Emscripten-specific linkopts for benchmarks."""
return [
"-s ASSERTIONS=1",
- "-s ENVIRONMENT=node,shell,web",
- "-s ERROR_ON_UNDEFINED_SYMBOLS=1",
- "-s EXIT_RUNTIME=1",
- "-s ALLOW_MEMORY_GROWTH=1",
+ "-s ERROR_ON_UNDEFINED_SYMBOLS=0",
+ "-s ALLOW_MEMORY_GROWTH=0",
"-s TOTAL_MEMORY=436207616", # 416M
"-s ALLOW_MEMORY_GROWTH=1",
"-s TOTAL_MEMORY=445644800", # 425M
- "--pre-js $(location :preamble.js.lds)",
+ "-s USE_PTHREADS=0",
+ "-s STANDALONE_WASM=1",
+ "-Wno-unused",
+ "-Wno-unused-variable",
+ "-Wno-unused-command-line-argument",
+ "-Wl,--export=__heap_base",
+ "-Wl,--export=__data_end",
+ "-Wl,--export=malloc",
@ -639,19 +87,6 @@ index 0a0caedf..aafe3199 100644
]
def xnnpack_emscripten_deps():
"""Emscripten-specific dependencies for unit tests and benchmarks."""
+ return []
+
+def xnnpack_emscripten_benchmark_copts():
return [
- ":preamble.js.lds",
+ "-s ASSERTIONS=1",
+ "-s ERROR_ON_UNDEFINED_SYMBOLS=0",
+ "-s ALLOW_MEMORY_GROWTH=0",
+ "-s USE_PTHREADS=0",
+ "-s STANDALONE_WASM=1",
+ "-Wno-unused",
]
diff --git a/third_party/cpuinfo.BUILD b/third_party/cpuinfo.BUILD
index 128d683e..f6c287c4 100644
--- a/third_party/cpuinfo.BUILD