Implement Fast JIT dump call stack and perf profiling (#1633)

Implement dump call stack and perf profiling features for Fast JIT,
and refine some code.
This commit is contained in:
Wenyong Huang
2022-10-27 09:28:32 +08:00
committed by GitHub
parent 6adf9194d4
commit ef21f0c951
8 changed files with 240 additions and 139 deletions

View File

@ -5,6 +5,7 @@
#include "jit_emit_control.h"
#include "jit_emit_exception.h"
#include "jit_emit_function.h"
#include "../jit_frontend.h"
#include "../interpreter/wasm_loader.h"
@ -380,11 +381,51 @@ copy_block_arities(JitCompContext *cc, JitReg dst_frame_sp, uint8 *dst_types,
}
}
static void
static bool
handle_func_return(JitCompContext *cc, JitBlock *block)
{
JitReg prev_frame, prev_frame_sp;
JitReg ret_reg = 0;
#if WASM_ENABLE_PERF_PROFILING != 0
JitReg func_inst = jit_cc_new_reg_ptr(cc);
JitReg time_start = jit_cc_new_reg_I64(cc);
JitReg time_end = jit_cc_new_reg_I64(cc);
JitReg cur_exec_time = jit_cc_new_reg_I64(cc);
JitReg total_exec_time = jit_cc_new_reg_I64(cc);
JitReg total_exec_cnt = jit_cc_new_reg_I32(cc);
#endif
#if WASM_ENABLE_PERF_PROFILING != 0
/* time_end = os_time_get_boot_microsecond() */
if (!jit_emit_callnative(cc, os_time_get_boot_microsecond, time_end, NULL,
0)) {
return false;
}
/* time_start = cur_frame->time_started */
GEN_INSN(LDI64, time_start, cc->fp_reg,
NEW_CONST(I32, offsetof(WASMInterpFrame, time_started)));
/* cur_exec_time = time_end - time_start */
GEN_INSN(SUB, cur_exec_time, time_end, time_start);
/* func_inst = cur_frame->function */
GEN_INSN(LDPTR, func_inst, cc->fp_reg,
NEW_CONST(I32, offsetof(WASMInterpFrame, function)));
/* total_exec_time = func_inst->total_exec_time */
GEN_INSN(LDI64, total_exec_time, func_inst,
NEW_CONST(I32, offsetof(WASMFunctionInstance, total_exec_time)));
/* total_exec_time += cur_exec_time */
GEN_INSN(ADD, total_exec_time, total_exec_time, cur_exec_time);
/* func_inst->total_exec_time = total_exec_time */
GEN_INSN(STI64, total_exec_time, func_inst,
NEW_CONST(I32, offsetof(WASMFunctionInstance, total_exec_time)));
/* totoal_exec_cnt = func_inst->total_exec_cnt */
GEN_INSN(LDI32, total_exec_cnt, func_inst,
NEW_CONST(I32, offsetof(WASMFunctionInstance, total_exec_cnt)));
/* total_exec_cnt++ */
GEN_INSN(ADD, total_exec_cnt, total_exec_cnt, NEW_CONST(I32, 1));
/* func_inst->total_exec_cnt = total_exec_cnt */
GEN_INSN(STI32, total_exec_cnt, func_inst,
NEW_CONST(I32, offsetof(WASMFunctionInstance, total_exec_cnt)));
#endif
prev_frame = jit_cc_new_reg_ptr(cc);
prev_frame_sp = jit_cc_new_reg_ptr(cc);
@ -420,6 +461,8 @@ handle_func_return(JitCompContext *cc, JitBlock *block)
GEN_INSN(MOV, cc->fp_reg, prev_frame);
/* return 0 */
GEN_INSN(RETURNBC, NEW_CONST(I32, JIT_INTERP_ACTION_NORMAL), ret_reg, 0);
return true;
}
/**
@ -446,7 +489,9 @@ handle_op_end(JitCompContext *cc, uint8 **p_frame_ip, bool is_block_polymorphic)
create the end basic block, just continue to translate
the following opcodes */
if (block->label_type == LABEL_TYPE_FUNCTION) {
handle_func_return(cc, block);
if (!handle_func_return(cc, block)) {
return false;
}
SET_BB_END_BCIP(cc->cur_basic_block, *p_frame_ip - 1);
clear_values(jit_frame);
}
@ -548,7 +593,10 @@ handle_op_end(JitCompContext *cc, uint8 **p_frame_ip, bool is_block_polymorphic)
block = jit_block_stack_pop(&cc->block_stack);
if (block->label_type == LABEL_TYPE_FUNCTION) {
handle_func_return(cc, block);
if (!handle_func_return(cc, block)) {
jit_block_destroy(block);
goto fail;
}
SET_BB_END_BCIP(cc->cur_basic_block, *p_frame_ip - 1);
clear_values(jit_frame);
}
@ -1190,7 +1238,9 @@ jit_compile_op_return(JitCompContext *cc, uint8 **p_frame_ip)
bh_assert(block_func);
handle_func_return(cc, block_func);
if (!handle_func_return(cc, block_func)) {
return false;
}
SET_BB_END_BCIP(cc->cur_basic_block, *p_frame_ip - 1);
clear_values(cc->jit_frame);

View File

@ -800,11 +800,9 @@ emit_callnative(JitCompContext *cc, JitReg native_func_reg, JitReg res,
char *f32_arg_names[] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" };
char *f64_arg_names[] = { "xmm0_f64", "xmm1_f64", "xmm2_f64",
"xmm3_f64", "xmm4_f64", "xmm5_f64" };
JitReg i64_arg_regs[6], f32_arg_regs[6], f64_arg_regs[6], res_hreg = 0;
JitReg i64_arg_regs[6], f32_arg_regs[6], f64_arg_regs[6], res_reg = 0;
JitReg eax_hreg = jit_codegen_get_hreg_by_name("eax");
JitReg rax_hreg = jit_codegen_get_hreg_by_name("rax");
JitReg xmm0_hreg = jit_codegen_get_hreg_by_name("xmm0");
JitReg xmm0_f64_hreg = jit_codegen_get_hreg_by_name("xmm0_f64");
uint32 i, i64_reg_idx, float_reg_idx;
bh_assert(param_count <= 6);
@ -839,16 +837,16 @@ emit_callnative(JitCompContext *cc, JitReg native_func_reg, JitReg res,
if (res) {
switch (jit_reg_kind(res)) {
case JIT_REG_KIND_I32:
res_hreg = eax_hreg;
res_reg = eax_hreg;
break;
case JIT_REG_KIND_I64:
res_hreg = rax_hreg;
res_reg = res;
break;
case JIT_REG_KIND_F32:
res_hreg = xmm0_hreg;
res_reg = xmm0_hreg;
break;
case JIT_REG_KIND_F64:
res_hreg = xmm0_f64_hreg;
res_reg = res;
break;
default:
bh_assert(0);
@ -856,7 +854,7 @@ emit_callnative(JitCompContext *cc, JitReg native_func_reg, JitReg res,
}
}
insn = GEN_INSN(CALLNATIVE, res_hreg, native_func_reg, param_count);
insn = GEN_INSN(CALLNATIVE, res_reg, native_func_reg, param_count);
if (!insn) {
return false;
}
@ -880,8 +878,8 @@ emit_callnative(JitCompContext *cc, JitReg native_func_reg, JitReg res,
}
}
if (res) {
GEN_INSN(MOV, res, res_hreg);
if (res && res != res_reg) {
GEN_INSN(MOV, res, res_reg);
}
return true;

View File

@ -177,38 +177,6 @@ get_global_type(const WASMModule *module, uint32 global_idx)
}
}
static uint32
get_global_data_offset(const WASMModule *module, uint32 global_idx)
{
uint32 module_inst_struct_size =
(uint32)offsetof(WASMModuleInstance, global_table_data.bytes);
uint32 mem_inst_size =
(uint32)sizeof(WASMMemoryInstance)
* (module->import_memory_count + module->memory_count);
uint32 global_base_offset;
#if WASM_ENABLE_JIT != 0
/* If the module dosen't have memory, reserve one mem_info space
with empty content to align with llvm jit compiler */
if (mem_inst_size == 0)
mem_inst_size = (uint32)sizeof(WASMMemoryInstance);
#endif
/* Size of module inst and memory instances */
global_base_offset = module_inst_struct_size + mem_inst_size;
if (global_idx < module->import_global_count) {
const WASMGlobalImport *import_global =
&((module->import_globals + global_idx)->u.global);
return global_base_offset + import_global->data_offset;
}
else {
const WASMGlobal *global =
module->globals + (global_idx - module->import_global_count);
return global_base_offset + global->data_offset;
}
}
bool
jit_compile_op_get_global(JitCompContext *cc, uint32 global_idx)
{
@ -219,7 +187,8 @@ jit_compile_op_get_global(JitCompContext *cc, uint32 global_idx)
bh_assert(global_idx < cc->cur_wasm_module->import_global_count
+ cc->cur_wasm_module->global_count);
data_offset = get_global_data_offset(cc->cur_wasm_module, global_idx);
data_offset =
jit_frontend_get_global_data_offset(cc->cur_wasm_module, global_idx);
global_type = get_global_type(cc->cur_wasm_module, global_idx);
switch (global_type) {
@ -280,7 +249,8 @@ jit_compile_op_set_global(JitCompContext *cc, uint32 global_idx,
bh_assert(global_idx < cc->cur_wasm_module->import_global_count
+ cc->cur_wasm_module->global_count);
data_offset = get_global_data_offset(cc->cur_wasm_module, global_idx);
data_offset =
jit_frontend_get_global_data_offset(cc->cur_wasm_module, global_idx);
global_type = get_global_type(cc->cur_wasm_module, global_idx);
switch (global_type) {