aot compiler: Use larger alignment for load/store when possible (#3552)
Consider the following wasm module:
```wast
(module
(func (export "foo")
i32.const 0x104
i32.const 0x12345678
i32.store
)
(memory 1 1)
)
```
While the address (0x104) is perfectly aligned for i32.store,
as our aot compiler uses 1-byte alignment for load/store LLVM
IR instructions, it often produces inefficient machine code,
especially for alignment-sensitive targets.
For example, the above "foo" function is compiled into the
following xtensa machine code.
```
0000002c <aot_func_internal#0>:
2c: 004136 entry a1, 32
2f: 07a182 movi a8, 0x107
32: 828a add.n a8, a2, a8
34: 291c movi.n a9, 18
36: 004892 s8i a9, a8, 0
39: 06a182 movi a8, 0x106
3c: 828a add.n a8, a2, a8
3e: ffff91 l32r a9, 3c <aot_func_internal#0+0x10> (ff91828a <aot_func_internal#0+0xff91825e>)
3e: R_XTENSA_SLOT0_OP .literal+0x8
41: 004892 s8i a9, a8, 0
44: 05a182 movi a8, 0x105
47: 828a add.n a8, a2, a8
49: ffff91 l32r a9, 48 <aot_func_internal#0+0x1c> (ffff9182 <aot_func_internal#0+0xffff9156>)
49: R_XTENSA_SLOT0_OP .literal+0xc
4c: 41a890 srli a10, a9, 8
4f: 0048a2 s8i a10, a8, 0
52: 04a182 movi a8, 0x104
55: 828a add.n a8, a2, a8
57: 004892 s8i a9, a8, 0
5a: f01d retw.n
```
Note that the each four bytes are stored separately using
one-byte-store instruction, s8i.
This commit tries to use larger alignments for load/store LLVM IR
instructions when possible. with this commit, the above example is
compiled into the following machine code, which seems more reasonable.
```
0000002c <aot_func_internal#0>:
2c: 004136 entry a1, 32
2f: ffff81 l32r a8, 2c <aot_func_internal#0> (81004136 <aot_func_internal#0+0x8100410a>)
2f: R_XTENSA_SLOT0_OP .literal+0x8
32: 416282 s32i a8, a2, 0x104
35: f01d retw.n
```
Note: this doesn't work well for --xip because aot_load_const_from_table()
hides the constness of the value. Maybe we need our own mechanism to
propagate the constness and the value.
This commit is contained in:
@ -883,6 +883,12 @@ wasm_enlarge_memory_internal(WASMModuleInstance *module, uint32 inc_page_count)
|
||||
}
|
||||
#endif /* end of WASM_MEM_ALLOC_WITH_USAGE */
|
||||
|
||||
/*
|
||||
* AOT compiler assumes at least 8 byte alignment.
|
||||
* see aot_check_memory_overflow.
|
||||
*/
|
||||
bh_assert(((uintptr_t)memory->memory_data & 0x7) == 0);
|
||||
|
||||
memory->num_bytes_per_page = num_bytes_per_page;
|
||||
memory->cur_page_count = total_page_count;
|
||||
memory->max_page_count = max_page_count;
|
||||
@ -1032,5 +1038,11 @@ wasm_allocate_linear_memory(uint8 **data, bool is_shared_memory,
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* AOT compiler assumes at least 8 byte alignment.
|
||||
* see aot_check_memory_overflow.
|
||||
*/
|
||||
bh_assert(((uintptr_t)*data & 0x7) == 0);
|
||||
|
||||
return BHT_OK;
|
||||
}
|
||||
|
||||
@ -96,7 +96,8 @@ get_memory_curr_page_count(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
|
||||
|
||||
LLVMValueRef
|
||||
aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
|
||||
mem_offset_t offset, uint32 bytes, bool enable_segue)
|
||||
mem_offset_t offset, uint32 bytes, bool enable_segue,
|
||||
unsigned int *alignp)
|
||||
{
|
||||
LLVMValueRef offset_const =
|
||||
MEMORY64_COND_VALUE(I64_CONST(offset), I32_CONST(offset));
|
||||
@ -180,6 +181,26 @@ aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
|
||||
comp_ctx->comp_data->memories[0].init_page_count;
|
||||
uint64 mem_data_size = (uint64)num_bytes_per_page * init_page_count;
|
||||
|
||||
if (alignp != NULL) {
|
||||
/*
|
||||
* A note about max_align below:
|
||||
* the assumption here is the base address of a linear memory
|
||||
* has the natural alignment. for platforms using mmap, it can
|
||||
* be even larger. for now, use a conservative value.
|
||||
*/
|
||||
const int max_align = 8;
|
||||
int shift = ffs((int)(unsigned int)mem_offset);
|
||||
if (shift == 0) {
|
||||
*alignp = max_align;
|
||||
}
|
||||
else {
|
||||
unsigned int align = 1 << (shift - 1);
|
||||
if (align > max_align) {
|
||||
align = max_align;
|
||||
}
|
||||
*alignp = align;
|
||||
}
|
||||
}
|
||||
if (mem_offset + bytes <= mem_data_size) {
|
||||
/* inside memory space */
|
||||
if (comp_ctx->pointer_size == sizeof(uint64))
|
||||
@ -205,6 +226,9 @@ aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
|
||||
return maddr;
|
||||
}
|
||||
}
|
||||
else if (alignp != NULL) {
|
||||
*alignp = 1;
|
||||
}
|
||||
|
||||
if (is_target_64bit) {
|
||||
if (!(offset_const = LLVMBuildZExt(comp_ctx->builder, offset_const,
|
||||
@ -324,7 +348,7 @@ fail:
|
||||
aot_set_last_error("llvm build load failed."); \
|
||||
goto fail; \
|
||||
} \
|
||||
LLVMSetAlignment(value, 1); \
|
||||
LLVMSetAlignment(value, known_align); \
|
||||
} while (0)
|
||||
|
||||
#define BUILD_TRUNC(value, data_type) \
|
||||
@ -343,7 +367,7 @@ fail:
|
||||
aot_set_last_error("llvm build store failed."); \
|
||||
goto fail; \
|
||||
} \
|
||||
LLVMSetAlignment(res, 1); \
|
||||
LLVMSetAlignment(res, known_align); \
|
||||
} while (0)
|
||||
|
||||
#define BUILD_SIGN_EXT(dst_type) \
|
||||
@ -445,8 +469,9 @@ aot_compile_op_i32_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
|
||||
LLVMTypeRef data_type;
|
||||
bool enable_segue = comp_ctx->enable_segue_i32_load;
|
||||
|
||||
unsigned int known_align;
|
||||
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
|
||||
enable_segue)))
|
||||
enable_segue, &known_align)))
|
||||
return false;
|
||||
|
||||
switch (bytes) {
|
||||
@ -515,8 +540,9 @@ aot_compile_op_i64_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
|
||||
LLVMTypeRef data_type;
|
||||
bool enable_segue = comp_ctx->enable_segue_i64_load;
|
||||
|
||||
unsigned int known_align;
|
||||
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
|
||||
enable_segue)))
|
||||
enable_segue, &known_align)))
|
||||
return false;
|
||||
|
||||
switch (bytes) {
|
||||
@ -591,8 +617,9 @@ aot_compile_op_f32_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
|
||||
LLVMValueRef maddr, value;
|
||||
bool enable_segue = comp_ctx->enable_segue_f32_load;
|
||||
|
||||
unsigned int known_align;
|
||||
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4,
|
||||
enable_segue)))
|
||||
enable_segue, &known_align)))
|
||||
return false;
|
||||
|
||||
if (!enable_segue)
|
||||
@ -614,8 +641,9 @@ aot_compile_op_f64_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
|
||||
LLVMValueRef maddr, value;
|
||||
bool enable_segue = comp_ctx->enable_segue_f64_load;
|
||||
|
||||
unsigned int known_align;
|
||||
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8,
|
||||
enable_segue)))
|
||||
enable_segue, &known_align)))
|
||||
return false;
|
||||
|
||||
if (!enable_segue)
|
||||
@ -640,8 +668,9 @@ aot_compile_op_i32_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
|
||||
|
||||
POP_I32(value);
|
||||
|
||||
unsigned int known_align;
|
||||
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
|
||||
enable_segue)))
|
||||
enable_segue, &known_align)))
|
||||
return false;
|
||||
|
||||
switch (bytes) {
|
||||
@ -691,8 +720,9 @@ aot_compile_op_i64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
|
||||
|
||||
POP_I64(value);
|
||||
|
||||
unsigned int known_align;
|
||||
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
|
||||
enable_segue)))
|
||||
enable_segue, &known_align)))
|
||||
return false;
|
||||
|
||||
switch (bytes) {
|
||||
@ -748,8 +778,9 @@ aot_compile_op_f32_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
|
||||
|
||||
POP_F32(value);
|
||||
|
||||
unsigned int known_align;
|
||||
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4,
|
||||
enable_segue)))
|
||||
enable_segue, &known_align)))
|
||||
return false;
|
||||
|
||||
if (!enable_segue)
|
||||
@ -771,8 +802,9 @@ aot_compile_op_f64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
|
||||
|
||||
POP_F64(value);
|
||||
|
||||
unsigned int known_align;
|
||||
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8,
|
||||
enable_segue)))
|
||||
enable_segue, &known_align)))
|
||||
return false;
|
||||
|
||||
if (!enable_segue)
|
||||
@ -1302,7 +1334,7 @@ aot_compile_op_atomic_rmw(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
|
||||
POP_I64(value);
|
||||
|
||||
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
|
||||
enable_segue)))
|
||||
enable_segue, NULL)))
|
||||
return false;
|
||||
|
||||
if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align))
|
||||
@ -1392,7 +1424,7 @@ aot_compile_op_atomic_cmpxchg(AOTCompContext *comp_ctx,
|
||||
}
|
||||
|
||||
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
|
||||
enable_segue)))
|
||||
enable_segue, NULL)))
|
||||
return false;
|
||||
|
||||
if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align))
|
||||
@ -1505,7 +1537,7 @@ aot_compile_op_atomic_wait(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
|
||||
CHECK_LLVM_CONST(is_wait64);
|
||||
|
||||
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
|
||||
false)))
|
||||
false, NULL)))
|
||||
return false;
|
||||
|
||||
if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align))
|
||||
@ -1579,7 +1611,7 @@ aot_compiler_op_atomic_notify(AOTCompContext *comp_ctx,
|
||||
POP_I32(count);
|
||||
|
||||
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
|
||||
false)))
|
||||
false, NULL)))
|
||||
return false;
|
||||
|
||||
if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align))
|
||||
|
||||
@ -53,7 +53,8 @@ aot_compile_op_f64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
|
||||
|
||||
LLVMValueRef
|
||||
aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
|
||||
mem_offset_t offset, uint32 bytes, bool enable_segue);
|
||||
mem_offset_t offset, uint32 bytes, bool enable_segue,
|
||||
unsigned int *alignp);
|
||||
|
||||
bool
|
||||
aot_compile_op_memory_size(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
|
||||
|
||||
@ -19,7 +19,7 @@ simd_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align,
|
||||
LLVMValueRef maddr, data;
|
||||
|
||||
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset,
|
||||
data_length, enable_segue))) {
|
||||
data_length, enable_segue, NULL))) {
|
||||
HANDLE_FAILURE("aot_check_memory_overflow");
|
||||
return NULL;
|
||||
}
|
||||
@ -287,7 +287,7 @@ simd_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align,
|
||||
LLVMValueRef maddr, result;
|
||||
|
||||
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset,
|
||||
data_length, enable_segue)))
|
||||
data_length, enable_segue, NULL)))
|
||||
return false;
|
||||
|
||||
if (!(maddr = LLVMBuildBitCast(comp_ctx->builder, maddr, value_ptr_type,
|
||||
|
||||
@ -100,7 +100,8 @@ TEST_F(compilation_aot_emit_memory_test, aot_check_memory_overflow)
|
||||
|
||||
for (uint32 i = 0; i < DEFAULT_CYCLE_TIMES; i++) {
|
||||
offset = (1 + (rand() % (DEFAULT_MAX_RAND_NUM - 1 + 1)));
|
||||
aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes, false);
|
||||
aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes, false,
|
||||
NULL);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user