Implement the segue optimization for LLVM AOT/JIT (#2230)

Segue is an optimization technology which uses x86 segment register to store
the WebAssembly linear memory base address, so as to remove most of the cost
of SFI (Software-based Fault Isolation) base addition and free up a general
purpose register, by this way it may:
- Improve the performance of JIT/AOT
- Reduce the footprint of JIT/AOT, the JIT/AOT code generated is smaller
- Reduce the compilation time of JIT/AOT

This PR uses the x86-64 GS segment register to apply the optimization, currently
it supports linux and linux-sgx platforms on x86-64 target. By default it is disabled,
developer can use the option below to enable it for wamrc and iwasm(with LLVM
JIT enabled):
```bash
wamrc --enable-segue=[<flags>] -o output_file wasm_file
iwasm --enable-segue=[<flags>] wasm_file [args...]
```
`flags` can be:
    i32.load, i64.load, f32.load, f64.load, v128.load,
    i32.store, i64.store, f32.store, f64.store, v128.store
Use comma to separate them, e.g. `--enable-segue=i32.load,i64.store`,
and `--enable-segue` means all flags are added.

Acknowledgement:
Many thanks to Intel Labs, UC San Diego and UT Austin teams for introducing this
technology and the great support and guidance!

Signed-off-by: Wenyong Huang <wenyong.huang@intel.com>
Co-authored-by: Vahldiek-oberwagner, Anjo Lucas <anjo.lucas.vahldiek-oberwagner@intel.com>
This commit is contained in:
Wenyong Huang
2023-05-26 10:13:33 +08:00
committed by GitHub
parent 27239723a9
commit 76be848ec3
42 changed files with 1864 additions and 123 deletions

View File

@ -3,27 +3,45 @@
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
source /opt/emsdk/emsdk_env.sh
PLATFORM=$(uname -s | tr A-Z a-z)
OUT_DIR=$PWD/out
WAMRC_CMD=$PWD/../../../wamr-compiler/build/wamrc
mkdir -p jetstream
mkdir -p tsf-src
mkdir -p ${OUT_DIR}
if [[ $1 != "--no-simd" ]];then
NATIVE_SIMD_FLAGS="-msse2 -msse3 -msse4"
WASM_SIMD_FLAGS="-msimd128 -msse2 -msse3 -msse4"
else
NATIVE_SIMD_FLAGS=""
WASM_SIMD_FLAGS=""
fi
cd jetstream
echo "Download source files .."
wget https://browserbench.org/JetStream/wasm/gcc-loops.cpp
wget https://browserbench.org/JetStream/wasm/quicksort.c
wget https://browserbench.org/JetStream/wasm/HashSet.cpp
wget https://browserbench.org/JetStream/simple/float-mm.c
wget -N https://browserbench.org/JetStream/wasm/gcc-loops.cpp
wget -N https://browserbench.org/JetStream/wasm/quicksort.c
wget -N https://browserbench.org/JetStream/wasm/HashSet.cpp
wget -N https://browserbench.org/JetStream/simple/float-mm.c
patch -p1 < ../jetstream.patch
if [[ $? != 0 ]]; then
exit
fi
echo "Patch source files .."
patch -p1 -N < ../jetstream.patch
echo "Build gcc-loops with g++ .."
g++ -O3 -msse2 -msse3 -msse4 -o ${OUT_DIR}/gcc-loops_native gcc-loops.cpp
g++ -O3 ${NATIVE_SIMD_FLAGS} -o ${OUT_DIR}/gcc-loops_native gcc-loops.cpp
echo "Build gcc-loops with em++ .."
em++ -O3 -s STANDALONE_WASM=1 -msimd128 \
em++ -O3 -s STANDALONE_WASM=1 ${WASM_SIMD_FLAGS} \
-s INITIAL_MEMORY=1048576 \
-s TOTAL_STACK=32768 \
-s "EXPORTED_FUNCTIONS=['_main']" \
@ -33,11 +51,16 @@ em++ -O3 -s STANDALONE_WASM=1 -msimd128 \
echo "Compile gcc-loops.wasm to gcc-loops.aot"
${WAMRC_CMD} -o ${OUT_DIR}/gcc-loops.aot ${OUT_DIR}/gcc-loops.wasm
if [[ ${PLATFORM} == "linux" ]]; then
echo "Compile gcc-loops.wasm to gcc-loops_segue.aot"
${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/gcc-loops_segue.aot ${OUT_DIR}/gcc-loops.wasm
fi
echo "Build quicksort with gcc .."
gcc -O3 -msse2 -msse3 -msse4 -o ${OUT_DIR}/quicksort_native quicksort.c
gcc -O3 ${NATIVE_SIMD_FLAGS} -o ${OUT_DIR}/quicksort_native quicksort.c
echo "Build quicksort with emcc .."
emcc -O3 -s STANDALONE_WASM=1 -msimd128 \
emcc -O3 -s STANDALONE_WASM=1 ${WASM_SIMD_FLAGS} \
-s INITIAL_MEMORY=1048576 \
-s TOTAL_STACK=32768 \
-s "EXPORTED_FUNCTIONS=['_main']" \
@ -46,12 +69,17 @@ emcc -O3 -s STANDALONE_WASM=1 -msimd128 \
echo "Compile quicksort.wasm to quicksort.aot"
${WAMRC_CMD} -o ${OUT_DIR}/quicksort.aot ${OUT_DIR}/quicksort.wasm
if [[ ${PLATFORM} == "linux" ]]; then
echo "Compile quicksort.wasm to quicksort_segue.aot"
${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/quicksort_segue.aot ${OUT_DIR}/quicksort.wasm
fi
echo "Build HashSet with g++ .."
g++ -O3 -msse2 -msse3 -msse4 -o ${OUT_DIR}/HashSet_native HashSet.cpp \
g++ -O3 ${NATIVE_SIMD_FLAGS} -o ${OUT_DIR}/HashSet_native HashSet.cpp \
-lstdc++
echo "Build HashSet with em++ .."
em++ -O3 -s STANDALONE_WASM=1 -msimd128 \
em++ -O3 -s STANDALONE_WASM=1 ${WASM_SIMD_FLAGS} \
-s INITIAL_MEMORY=1048576 \
-s TOTAL_STACK=32768 \
-s "EXPORTED_FUNCTIONS=['_main']" \
@ -60,11 +88,16 @@ em++ -O3 -s STANDALONE_WASM=1 -msimd128 \
echo "Compile HashSet.wasm to HashSet.aot"
${WAMRC_CMD} -o ${OUT_DIR}/HashSet.aot ${OUT_DIR}/HashSet.wasm
if [[ ${PLATFORM} == "linux" ]]; then
echo "Compile HashSet.wasm to HashSet_segue.aot"
${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/HashSet_segue.aot ${OUT_DIR}/HashSet.wasm
fi
echo "Build float-mm with gcc .."
gcc -O3 -msse2 -msse3 -msse4 -o ${OUT_DIR}/float-mm_native float-mm.c
gcc -O3 ${NATIVE_SIMD_FLAGS} -o ${OUT_DIR}/float-mm_native float-mm.c
echo "Build float-mm with emcc .."
emcc -O3 -s STANDALONE_WASM=1 -msimd128 \
emcc -O3 -s STANDALONE_WASM=1 ${WASM_SIMD_FLAGS} \
-s INITIAL_MEMORY=1048576 \
-s TOTAL_STACK=32768 \
-s "EXPORTED_FUNCTIONS=['_main']" \
@ -72,3 +105,70 @@ emcc -O3 -s STANDALONE_WASM=1 -msimd128 \
echo "Compile float-mm.wasm to float-mm.aot"
${WAMRC_CMD} -o ${OUT_DIR}/float-mm.aot ${OUT_DIR}/float-mm.wasm
if [[ ${PLATFORM} == "linux" ]]; then
echo "Compile float-mm.wasm to float-mm_segue.aot"
${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/float-mm_segue.aot ${OUT_DIR}/float-mm.wasm
fi
cd ../tsf-src
tsf_srcs="tsf_asprintf.c tsf_buffer.c tsf_error.c tsf_reflect.c tsf_st.c \
tsf_type.c tsf_io.c tsf_native.c tsf_generator.c tsf_st_typetable.c \
tsf_parser.c tsf_buf_writer.c tsf_buf_reader.c tsf_primitive.c \
tsf_type_table.c tsf_copier.c tsf_destructor.c tsf_gpc_code_gen.c \
gpc_code_gen_util.c gpc_threaded.c gpc_intable.c gpc_instruction.c \
gpc_program.c gpc_proto.c gpc_stack_height.c tsf_serial_in_man.c \
tsf_serial_out_man.c tsf_type_in_map.c tsf_type_out_map.c \
tsf_stream_file_input.c tsf_stream_file_output.c tsf_sort.c \
tsf_version.c tsf_named_type.c tsf_io_utils.c tsf_zip_attr.c \
tsf_zip_reader.c tsf_zip_writer.c tsf_zip_abstract.c tsf_limits.c \
tsf_ra_type_man.c tsf_adaptive_reader.c tsf_sha1.c tsf_sha1_writer.c \
tsf_fsdb.c tsf_fsdb_protocol.c tsf_define_helpers.c tsf_ir.c \
tsf_ir_different.c tsf_ir_speed.c"
tsf_files="${tsf_srcs} config.h gpc_worklist.h \
tsf_config_stub.h tsf.h tsf_internal.h tsf_region.h tsf_types.h \
gpc.h tsf_atomics.h tsf_define_helpers.h tsf_indent.h tsf_inttypes.h \
tsf_serial_protocol.h tsf_util.h gpc_int_common.h tsf_build_defines.h \
tsf_format.h tsf_internal_config.h tsf_ir_different.h tsf_sha1.h \
tsf_zip_abstract.h gpc_internal.h tsf_config.h tsf_fsdb_protocol.h \
tsf_internal_config_stub.h tsf_ir.h tsf_st.h \
gpc_instruction_dispatch.gen gpc_instruction_stack_effects.gen \
gpc_instruction_to_string.gen gpc_instruction_size.gen \
gpc_instruction_static_size.gen gpc_interpreter.gen"
echo "Download tsf source files .."
for t in ${tsf_files}
do
wget -N "https://browserbench.org/JetStream/wasm/TSF/${t}"
if [[ $? != 0 ]]; then
exit
fi
done
patch -p1 -N < ../tsf.patch
echo "Build tsf with gcc .."
gcc \
-o ${OUT_DIR}/tsf_native -O3 ${NATIVE_SIMD_FLAGS} \
-I. -DTSF_BUILD_SYSTEM=1 \
${tsf_srcs} -lm
echo "Build tsf standalone with wasi-sdk .."
/opt/wasi-sdk/bin/clang -O3 ${WASM_SIMD_FLAGS} -z stack-size=1048576 \
-Wl,--initial-memory=52428800 \
-Wl,--export=main \
-Wl,--export=__heap_base,--export=__data_end \
-I. -DTSF_BUILD_SYSTEM=1 \
-Wl,--allow-undefined \
-o ${OUT_DIR}/tsf.wasm \
${tsf_srcs}
echo "Compile tsf.wasm to tsf.aot"
${WAMRC_CMD} -o ${OUT_DIR}/tsf.aot ${OUT_DIR}/tsf.wasm
if [[ ${PLATFORM} == "linux" ]]; then
echo "Compile tsf.wasm to tsf_segue.aot"
${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/tsf_segue.aot ${OUT_DIR}/tsf.wasm
fi

View File

@ -1,15 +1,18 @@
diff -urN jetstream-org/HashSet.cpp jetstream/HashSet.cpp
--- jetstream-org/HashSet.cpp 2020-10-30 04:12:42.000000000 +0800
+++ jetstream/HashSet.cpp 2022-01-24 17:11:08.619831711 +0800
@@ -24,6 +24,7 @@
--- jetstream-org/HashSet.cpp 2020-10-30 04:12:42.000000000 +0800
+++ jetstream/HashSet.cpp 2022-01-24 17:11:08.619831711 +0800
@@ -22,8 +22,10 @@
#include <algorithm>
#include <memory>
+#include <limits>
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <sys/time.h>
// Compile with: xcrun clang++ -o HashSet HashSet.cpp -O2 -W -framework Foundation -licucore -std=c++11 -fvisibility=hidden -DNDEBUG=1
@@ -76,7 +77,7 @@
@@ -76,7 +78,7 @@
inline ToType bitwise_cast(FromType from)
{
typename std::remove_const<ToType>::type to { };
@ -17,4 +20,4 @@ diff -urN jetstream-org/HashSet.cpp jetstream/HashSet.cpp
+ memcpy(&to, &from, sizeof(to));
return to;
}

View File

@ -3,6 +3,8 @@
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
PLATFORM=$(uname -s | tr A-Z a-z)
CUR_DIR=$PWD
OUT_DIR=$CUR_DIR/out
REPORT=$CUR_DIR/report.txt
@ -13,7 +15,7 @@ IWASM_CMD=$CUR_DIR/../../../product-mini/platforms/${PLATFORM}/build/iwasm
BENCH_NAME_MAX_LEN=20
JETSTREAM_CASES="gcc-loops quicksort HashSet float-mm"
JETSTREAM_CASES="gcc-loops HashSet tsf float-mm quicksort"
rm -f $REPORT
touch $REPORT
@ -34,7 +36,11 @@ echo "Start to run cases, the result is written to report.txt"
#run benchmarks
cd $OUT_DIR
echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT
if [[ ${PLATFORM} == "linux" ]]; then
echo -en "\t\t\t\t\t native\tiwasm-aot\tiwasm-aot-segue\n" >> $REPORT
else
echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT
fi
for t in $JETSTREAM_CASES
do
@ -46,7 +52,13 @@ do
echo "run $t with iwasm aot .."
echo -en "\t" >> $REPORT
$TIME -f "real-%e-time" $IWASM_CMD ${t}.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT
$TIME -f "real-%e-time" $IWASM_CMD --dir=. ${t}.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT
if [[ ${PLATFORM} == "linux" ]]; then
echo "run $t with iwasm aot segue .."
echo -en "\t" >> $REPORT
$TIME -f "real-%e-time" $IWASM_CMD --dir=. ${t}_segue.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT
fi
echo -en "\n" >> $REPORT
done

View File

@ -0,0 +1,24 @@
diff -urN tsf-src-org/tsf_internal.h tsf-src/tsf_internal.h
--- tsf-src-org/tsf_internal.h 2023-03-31 10:49:45.000000000 +0800
+++ tsf-src/tsf_internal.h 2023-05-11 08:18:35.000000000 +0800
@@ -429,6 +429,7 @@
#endif
tsf_fsdb_connection_t *connection;
#endif
+ uint32_t __padding;
} remote;
} u;
tsf_limits_t *limits;
diff -urN tsf-src-org/tsf_ir_speed.c tsf-src/tsf_ir_speed.c
--- tsf-src-org/tsf_ir_speed.c 2023-03-31 10:49:45.000000000 +0800
+++ tsf-src/tsf_ir_speed.c 2023-05-11 08:18:35.000000000 +0800
@@ -63,6 +63,9 @@
Program_t *program;
unsigned elementIndex;
+ if (!(programIndex % 100))
+ printf("##programIndex: %u\n", programIndex);
+
CS(program = tsf_region_create(sizeof(Program_t)));
program->globals.len = numDecls + numDefns;