From 37d7439ef9fb227a2afa778a79b3812d591c041a Mon Sep 17 00:00:00 2001 From: Marcin Kolny Date: Wed, 14 Aug 2024 01:46:24 +0100 Subject: [PATCH] [refactoring] Extract read leb to a separate file, share the code between loader and mini loader (#3701) There's probably a number of other places where the bh_leb_read could be used (e.g. aot loader) but I'm making the change as small as possible. Further refactoring can be done later. --- core/iwasm/common/wasm_loader_common.c | 53 +++++++++++--- core/iwasm/common/wasm_loader_common.h | 8 +++ core/iwasm/interpreter/wasm_loader.c | 81 +--------------------- core/iwasm/interpreter/wasm_mini_loader.c | 69 +----------------- core/shared/utils/bh_leb128.c | 77 ++++++++++++++++++++ core/shared/utils/bh_leb128.h | 30 ++++++++ product-mini/platforms/alios-things/aos.mk | 1 + product-mini/platforms/nuttx/wamr.mk | 1 + tests/unit/shared-utils/bh_leb128_test.cc | 62 +++++++++++++++++ 9 files changed, 223 insertions(+), 159 deletions(-) create mode 100644 core/shared/utils/bh_leb128.c create mode 100644 core/shared/utils/bh_leb128.h create mode 100644 tests/unit/shared-utils/bh_leb128_test.cc diff --git a/core/iwasm/common/wasm_loader_common.c b/core/iwasm/common/wasm_loader_common.c index 179639fc..6dd31be2 100644 --- a/core/iwasm/common/wasm_loader_common.c +++ b/core/iwasm/common/wasm_loader_common.c @@ -3,14 +3,15 @@ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception */ #include "wasm_loader_common.h" +#include "bh_leb128.h" #include "bh_log.h" #if WASM_ENABLE_GC != 0 #include "../common/gc/gc_type.h" #endif -static void -set_error_buf(char *error_buf, uint32 error_buf_size, const char *string, - bool is_aot) +void +wasm_loader_set_error_buf(char *error_buf, uint32 error_buf_size, + const char *string, bool is_aot) { if (error_buf != NULL) { snprintf(error_buf, error_buf_size, "%s module load failed: %s", @@ -29,30 +30,30 @@ wasm_memory_check_flags(const uint8 mem_flag, char *error_buf, if (mem_flag & SHARED_MEMORY_FLAG) { LOG_VERBOSE("shared memory flag was found, please enable shared " "memory, lib-pthread or lib-wasi-threads"); - set_error_buf(error_buf, error_buf_size, "invalid limits flags", - is_aot); + wasm_loader_set_error_buf(error_buf, error_buf_size, + "invalid limits flags", is_aot); return false; } #endif #if WASM_ENABLE_MEMORY64 == 0 if (mem_flag & MEMORY64_FLAG) { LOG_VERBOSE("memory64 flag was found, please enable memory64"); - set_error_buf(error_buf, error_buf_size, "invalid limits flags", - is_aot); + wasm_loader_set_error_buf(error_buf, error_buf_size, + "invalid limits flags", is_aot); return false; } #endif } if (mem_flag > MAX_PAGE_COUNT_FLAG + SHARED_MEMORY_FLAG + MEMORY64_FLAG) { - set_error_buf(error_buf, error_buf_size, "invalid limits flags", - is_aot); + wasm_loader_set_error_buf(error_buf, error_buf_size, + "invalid limits flags", is_aot); return false; } else if ((mem_flag & SHARED_MEMORY_FLAG) && !(mem_flag & MAX_PAGE_COUNT_FLAG)) { - set_error_buf(error_buf, error_buf_size, - "shared memory must have maximum", is_aot); + wasm_loader_set_error_buf(error_buf, error_buf_size, + "shared memory must have maximum", is_aot); return false; } @@ -130,3 +131,33 @@ is_indices_overflow(uint32 import, uint32 other, char *error_buf, return false; } + +bool +read_leb(uint8 **p_buf, const uint8 *buf_end, uint32 maxbits, bool sign, + uint64 *p_result, char *error_buf, uint32 error_buf_size) +{ + size_t offset = 0; + bh_leb_read_status_t status = + bh_leb_read(*p_buf, buf_end, maxbits, sign, p_result, &offset); + + switch (status) { + case BH_LEB_READ_SUCCESS: + *p_buf += offset; + return true; + case BH_LEB_READ_TOO_LONG: + wasm_loader_set_error_buf(error_buf, error_buf_size, + "integer representation too long", false); + return false; + case BH_LEB_READ_OVERFLOW: + wasm_loader_set_error_buf(error_buf, error_buf_size, + "integer too large", false); + return false; + case BH_LEB_READ_UNEXPECTED_END: + wasm_loader_set_error_buf(error_buf, error_buf_size, + "unexpected end", false); + return false; + default: + bh_assert(false); + return false; + } +} diff --git a/core/iwasm/common/wasm_loader_common.h b/core/iwasm/common/wasm_loader_common.h index 6bd0cf6c..d574110b 100644 --- a/core/iwasm/common/wasm_loader_common.h +++ b/core/iwasm/common/wasm_loader_common.h @@ -30,6 +30,14 @@ bool is_indices_overflow(uint32 import, uint32 other, char *error_buf, uint32 error_buf_size); +bool +read_leb(uint8 **p_buf, const uint8 *buf_end, uint32 maxbits, bool sign, + uint64 *p_result, char *error_buf, uint32 error_buf_size); + +void +wasm_loader_set_error_buf(char *error_buf, uint32 error_buf_size, + const char *string, bool is_aot); + #ifdef __cplusplus } #endif diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 785b2898..10324ac4 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -56,10 +56,7 @@ has_module_memory64(WASMModule *module) static void set_error_buf(char *error_buf, uint32 error_buf_size, const char *string) { - if (error_buf != NULL) { - snprintf(error_buf, error_buf_size, "WASM module load failed: %s", - string); - } + wasm_loader_set_error_buf(error_buf, error_buf_size, string, false); } #if WASM_ENABLE_MEMORY64 != 0 @@ -131,82 +128,6 @@ check_buf1(const uint8 *buf, const uint8 *buf_end, uint32 length, #define skip_leb_int32(p, p_end) skip_leb(p) #define skip_leb_mem_offset(p, p_end) skip_leb(p) -static bool -read_leb(uint8 **p_buf, const uint8 *buf_end, uint32 maxbits, bool sign, - uint64 *p_result, char *error_buf, uint32 error_buf_size) -{ - const uint8 *buf = *p_buf; - uint64 result = 0; - uint32 shift = 0; - uint32 offset = 0, bcnt = 0; - uint64 byte; - - while (true) { - /* uN or SN must not exceed ceil(N/7) bytes */ - if (bcnt + 1 > (maxbits + 6) / 7) { - set_error_buf(error_buf, error_buf_size, - "integer representation too long"); - return false; - } - - CHECK_BUF(buf, buf_end, offset + 1); - byte = buf[offset]; - offset += 1; - result |= ((byte & 0x7f) << shift); - shift += 7; - bcnt += 1; - if ((byte & 0x80) == 0) { - break; - } - } - - if (!sign && maxbits == 32 && shift >= maxbits) { - /* The top bits set represent values > 32 bits */ - if (((uint8)byte) & 0xf0) - goto fail_integer_too_large; - } - else if (sign && maxbits == 32) { - if (shift < maxbits) { - /* Sign extend, second-highest bit is the sign bit */ - if ((uint8)byte & 0x40) - result |= (~((uint64)0)) << shift; - } - else { - /* The top bits should be a sign-extension of the sign bit */ - bool sign_bit_set = ((uint8)byte) & 0x8; - int top_bits = ((uint8)byte) & 0xf0; - if ((sign_bit_set && top_bits != 0x70) - || (!sign_bit_set && top_bits != 0)) - goto fail_integer_too_large; - } - } - else if (sign && maxbits == 64) { - if (shift < maxbits) { - /* Sign extend, second-highest bit is the sign bit */ - if ((uint8)byte & 0x40) - result |= (~((uint64)0)) << shift; - } - else { - /* The top bits should be a sign-extension of the sign bit */ - bool sign_bit_set = ((uint8)byte) & 0x1; - int top_bits = ((uint8)byte) & 0xfe; - - if ((sign_bit_set && top_bits != 0x7e) - || (!sign_bit_set && top_bits != 0)) - goto fail_integer_too_large; - } - } - - *p_buf += offset; - *p_result = result; - return true; - -fail_integer_too_large: - set_error_buf(error_buf, error_buf_size, "integer too large"); -fail: - return false; -} - #define read_uint8(p) TEMPLATE_READ_VALUE(uint8, p) #define read_uint32(p) TEMPLATE_READ_VALUE(uint32, p) diff --git a/core/iwasm/interpreter/wasm_mini_loader.c b/core/iwasm/interpreter/wasm_mini_loader.c index 0012b84a..6e70203f 100644 --- a/core/iwasm/interpreter/wasm_mini_loader.c +++ b/core/iwasm/interpreter/wasm_mini_loader.c @@ -44,9 +44,7 @@ has_module_memory64(WASMModule *module) static void set_error_buf(char *error_buf, uint32 error_buf_size, const char *string) { - if (error_buf != NULL) - snprintf(error_buf, error_buf_size, "WASM module load failed: %s", - string); + wasm_loader_set_error_buf(error_buf, error_buf_size, string, false); } #define CHECK_BUF(buf, buf_end, length) \ @@ -95,71 +93,6 @@ is_byte_a_type(uint8 type) || (type == VALUE_TYPE_VOID); } -static void -read_leb(uint8 **p_buf, const uint8 *buf_end, uint32 maxbits, bool sign, - uint64 *p_result, char *error_buf, uint32 error_buf_size) -{ - const uint8 *buf = *p_buf; - uint64 result = 0; - uint32 shift = 0; - uint32 offset = 0, bcnt = 0; - uint64 byte; - - while (true) { - bh_assert(bcnt + 1 <= (maxbits + 6) / 7); - CHECK_BUF(buf, buf_end, offset + 1); - byte = buf[offset]; - offset += 1; - result |= ((byte & 0x7f) << shift); - shift += 7; - bcnt += 1; - if ((byte & 0x80) == 0) { - break; - } - } - - if (!sign && maxbits == 32 && shift >= maxbits) { - /* The top bits set represent values > 32 bits */ - bh_assert(!(((uint8)byte) & 0xf0)); - } - else if (sign && maxbits == 32) { - if (shift < maxbits) { - /* Sign extend, second-highest bit is the sign bit */ - if ((uint8)byte & 0x40) - result |= (~((uint64)0)) << shift; - } - else { - /* The top bits should be a sign-extension of the sign bit */ - bool sign_bit_set = ((uint8)byte) & 0x8; - int top_bits = ((uint8)byte) & 0xf0; - bh_assert(!((sign_bit_set && top_bits != 0x70) - || (!sign_bit_set && top_bits != 0))); - (void)top_bits; - (void)sign_bit_set; - } - } - else if (sign && maxbits == 64) { - if (shift < maxbits) { - /* Sign extend, second-highest bit is the sign bit */ - if ((uint8)byte & 0x40) - result |= (~((uint64)0)) << shift; - } - else { - /* The top bits should be a sign-extension of the sign bit */ - bool sign_bit_set = ((uint8)byte) & 0x1; - int top_bits = ((uint8)byte) & 0xfe; - - bh_assert(!((sign_bit_set && top_bits != 0x7e) - || (!sign_bit_set && top_bits != 0))); - (void)top_bits; - (void)sign_bit_set; - } - } - - *p_buf += offset; - *p_result = result; -} - #define read_uint8(p) TEMPLATE_READ_VALUE(uint8, p) #define read_uint32(p) TEMPLATE_READ_VALUE(uint32, p) #define read_bool(p) TEMPLATE_READ_VALUE(bool, p) diff --git a/core/shared/utils/bh_leb128.c b/core/shared/utils/bh_leb128.c new file mode 100644 index 00000000..8e4b13dc --- /dev/null +++ b/core/shared/utils/bh_leb128.c @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include "bh_leb128.h" + +bh_leb_read_status_t +bh_leb_read(const uint8 *buf, const uint8 *buf_end, uint32 maxbits, bool sign, + uint64 *p_result, size_t *p_offset) +{ + uint64 result = 0; + uint32 shift = 0; + uint32 offset = 0, bcnt = 0; + uint64 byte; + + while (true) { + /* uN or SN must not exceed ceil(N/7) bytes */ + if (bcnt + 1 > (maxbits + 6) / 7) { + return BH_LEB_READ_TOO_LONG; + } + + if ((uintptr_t)buf + offset + 1 < (uintptr_t)buf + || (uintptr_t)buf + offset + 1 > (uintptr_t)buf_end) { + return BH_LEB_READ_UNEXPECTED_END; + } + byte = buf[offset]; + offset += 1; + result |= ((byte & 0x7f) << shift); + shift += 7; + bcnt += 1; + if ((byte & 0x80) == 0) { + break; + } + } + + if (!sign && maxbits == 32 && shift >= maxbits) { + /* The top bits set represent values > 32 bits */ + if (((uint8)byte) & 0xf0) + return BH_LEB_READ_OVERFLOW; + } + else if (sign && maxbits == 32) { + if (shift < maxbits) { + /* Sign extend, second-highest bit is the sign bit */ + if ((uint8)byte & 0x40) + result |= (~((uint64)0)) << shift; + } + else { + /* The top bits should be a sign-extension of the sign bit */ + bool sign_bit_set = ((uint8)byte) & 0x8; + int top_bits = ((uint8)byte) & 0xf0; + if ((sign_bit_set && top_bits != 0x70) + || (!sign_bit_set && top_bits != 0)) + return BH_LEB_READ_OVERFLOW; + } + } + else if (sign && maxbits == 64) { + if (shift < maxbits) { + /* Sign extend, second-highest bit is the sign bit */ + if ((uint8)byte & 0x40) + result |= (~((uint64)0)) << shift; + } + else { + /* The top bits should be a sign-extension of the sign bit */ + bool sign_bit_set = ((uint8)byte) & 0x1; + int top_bits = ((uint8)byte) & 0xfe; + + if ((sign_bit_set && top_bits != 0x7e) + || (!sign_bit_set && top_bits != 0)) + return BH_LEB_READ_OVERFLOW; + } + } + + *p_offset = offset; + *p_result = result; + return BH_LEB_READ_SUCCESS; +} \ No newline at end of file diff --git a/core/shared/utils/bh_leb128.h b/core/shared/utils/bh_leb128.h new file mode 100644 index 00000000..ce73b4b8 --- /dev/null +++ b/core/shared/utils/bh_leb128.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef _BH_LEB128_H +#define _BH_LEB128_H + +#include "bh_platform.h" + +typedef enum { + BH_LEB_READ_SUCCESS, + BH_LEB_READ_TOO_LONG, + BH_LEB_READ_OVERFLOW, + BH_LEB_READ_UNEXPECTED_END, +} bh_leb_read_status_t; + +#ifdef __cplusplus +extern "C" { +#endif + +bh_leb_read_status_t +bh_leb_read(const uint8 *buf, const uint8 *buf_end, uint32 maxbits, bool sign, + uint64 *p_result, size_t *p_offset); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/product-mini/platforms/alios-things/aos.mk b/product-mini/platforms/alios-things/aos.mk index 947a4a91..7d98ca0b 100644 --- a/product-mini/platforms/alios-things/aos.mk +++ b/product-mini/platforms/alios-things/aos.mk @@ -102,6 +102,7 @@ $(NAME)_SOURCES := ${SHARED_ROOT}/platform/alios/alios_platform.c \ ${SHARED_ROOT}/utils/bh_common.c \ ${SHARED_ROOT}/utils/bh_hashmap.c \ ${SHARED_ROOT}/utils/bh_list.c \ + ${SHARED_ROOT}/utils/bh_leb128.c \ ${SHARED_ROOT}/utils/bh_log.c \ ${SHARED_ROOT}/utils/bh_queue.c \ ${SHARED_ROOT}/utils/bh_vector.c \ diff --git a/product-mini/platforms/nuttx/wamr.mk b/product-mini/platforms/nuttx/wamr.mk index 75bd69be..96a3e2ab 100644 --- a/product-mini/platforms/nuttx/wamr.mk +++ b/product-mini/platforms/nuttx/wamr.mk @@ -441,6 +441,7 @@ CSRCS += nuttx_platform.c \ bh_common.c \ bh_hashmap.c \ bh_list.c \ + bh_leb128.c \ bh_log.c \ bh_queue.c \ bh_vector.c \ diff --git a/tests/unit/shared-utils/bh_leb128_test.cc b/tests/unit/shared-utils/bh_leb128_test.cc new file mode 100644 index 00000000..f5386464 --- /dev/null +++ b/tests/unit/shared-utils/bh_leb128_test.cc @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include "bh_leb128.h" +#include "gtest/gtest.h" + +#include +#include + +template +void +run_read_leb_test(std::vector data, + bh_leb_read_status_t expected_status, T expected_value) +{ + size_t offset = 0; + uint64 value; + bh_leb_read_status_t status = + bh_leb_read(data.data(), data.data() + data.size(), sizeof(T) * 8, + std::is_signed::value, &value, &offset); + ASSERT_EQ(expected_status, status); + if (status == BH_LEB_READ_SUCCESS) { + ASSERT_EQ(data.size(), offset); + ASSERT_EQ(expected_value, (T)value); + } +} + +TEST(bh_leb128_test_suite, read_leb_u32) +{ + run_read_leb_test({ 0 }, BH_LEB_READ_SUCCESS, + 0); // min value + run_read_leb_test({ 2 }, BH_LEB_READ_SUCCESS, + 2); // single-byte value + run_read_leb_test({ 127 }, BH_LEB_READ_SUCCESS, + 127); // max single-byte value + run_read_leb_test({ 128, 1 }, BH_LEB_READ_SUCCESS, + 128); // min value with continuation bit + run_read_leb_test({ 160, 138, 32 }, BH_LEB_READ_SUCCESS, + 525600); // arbitrary value + run_read_leb_test({ 255, 255, 255, 255, 15 }, BH_LEB_READ_SUCCESS, + UINT32_MAX); // max value + run_read_leb_test({ 255, 255, 255, 255, 16 }, BH_LEB_READ_OVERFLOW, + UINT32_MAX); // overflow + run_read_leb_test({ 255, 255, 255, 255, 128 }, BH_LEB_READ_TOO_LONG, + 0); + run_read_leb_test({ 128 }, BH_LEB_READ_UNEXPECTED_END, 0); +} + +TEST(bh_leb128_test_suite, read_leb_i64) +{ + run_read_leb_test({ 184, 188, 195, 159, 237, 209, 128, 2 }, + BH_LEB_READ_SUCCESS, + 1128712371232312); // arbitrary value + run_read_leb_test( + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 127 }, + BH_LEB_READ_SUCCESS, + (uint64)INT64_MIN); // min value + run_read_leb_test({ 255, 255, 255, 255, 255, 255, 255, 255, 255, 0 }, + BH_LEB_READ_SUCCESS, + INT64_MAX); // max value +} \ No newline at end of file