Add wasm tacle-bench targets

2026-06-12 20:06:22 +02:00
parent 30daa8a00c
commit 08c2e9c13d
1122 changed files with 520422 additions and 0 deletions
--- a/targets/wasm-tacle/kernel/matrix1/generated/default/matrix1.wasm
+++ b/targets/wasm-tacle/kernel/matrix1/generated/default/matrix1.wasm
--- a/targets/wasm-tacle/kernel/matrix1/generated/default/matrix1.wat
+++ b/targets/wasm-tacle/kernel/matrix1/generated/default/matrix1.wat
@ -0,0 +1,356 @@
+(module $matrix1.wasm
+  (type (;0;) (func (param i32 i32)))
+  (type (;1;) (func))
+  (type (;2;) (func (result i32)))
+  (import "__pragma" "loopbound" (func $__pragma_loopbound (type 0)))
+  (func $__wasm_apply_data_relocs (type 1))
+  (func $matrix1_main (type 1)
+    (local i32 i32 i32 i32 i32)
+    i32.const 10
+    i32.const 10
+    call $__pragma_loopbound
+    i32.const 1824
+    local.set 0
+    i32.const 0
+    local.set 1
+    loop  ;; label = @1
+      i32.const 10
+      i32.const 10
+      call $__pragma_loopbound
+      local.get 1
+      i32.const 40
+      i32.mul
+      i32.const 1424
+      i32.add
+      local.set 2
+      i32.const -400
+      local.set 3
+      loop  ;; label = @2
+        local.get 0
+        i32.const 0
+        i32.store
+        i32.const 10
+        i32.const 10
+        call $__pragma_loopbound
+        local.get 0
+        local.get 0
+        i32.load
+        local.get 2
+        i32.load
+        local.get 3
+        i32.const 1424
+        i32.add
+        i32.load
+        i32.mul
+        i32.add
+        local.tee 4
+        i32.store
+        local.get 0
+        local.get 4
+        local.get 2
+        i32.load offset=4
+        local.get 3
+        i32.const 1428
+        i32.add
+        i32.load
+        i32.mul
+        i32.add
+        local.tee 4
+        i32.store
+        local.get 0
+        local.get 4
+        local.get 2
+        i32.load offset=8
+        local.get 3
+        i32.const 1432
+        i32.add
+        i32.load
+        i32.mul
+        i32.add
+        local.tee 4
+        i32.store
+        local.get 0
+        local.get 4
+        local.get 2
+        i32.load offset=12
+        local.get 3
+        i32.const 1436
+        i32.add
+        i32.load
+        i32.mul
+        i32.add
+        local.tee 4
+        i32.store
+        local.get 0
+        local.get 4
+        local.get 2
+        i32.load offset=16
+        local.get 3
+        i32.const 1440
+        i32.add
+        i32.load
+        i32.mul
+        i32.add
+        local.tee 4
+        i32.store
+        local.get 0
+        local.get 4
+        local.get 2
+        i32.load offset=20
+        local.get 3
+        i32.const 1444
+        i32.add
+        i32.load
+        i32.mul
+        i32.add
+        local.tee 4
+        i32.store
+        local.get 0
+        local.get 4
+        local.get 2
+        i32.load offset=24
+        local.get 3
+        i32.const 1448
+        i32.add
+        i32.load
+        i32.mul
+        i32.add
+        local.tee 4
+        i32.store
+        local.get 0
+        local.get 4
+        local.get 2
+        i32.load offset=28
+        local.get 3
+        i32.const 1452
+        i32.add
+        i32.load
+        i32.mul
+        i32.add
+        local.tee 4
+        i32.store
+        local.get 0
+        local.get 4
+        local.get 2
+        i32.load offset=32
+        local.get 3
+        i32.const 1456
+        i32.add
+        i32.load
+        i32.mul
+        i32.add
+        local.tee 4
+        i32.store
+        local.get 0
+        local.get 4
+        local.get 2
+        i32.load offset=36
+        local.get 3
+        i32.const 1460
+        i32.add
+        i32.load
+        i32.mul
+        i32.add
+        i32.store
+        local.get 0
+        i32.const 4
+        i32.add
+        local.set 0
+        local.get 3
+        i32.const 40
+        i32.add
+        local.tee 3
+        br_if 0 (;@2;)
+      end
+      local.get 1
+      i32.const 1
+      i32.add
+      local.tee 1
+      i32.const 10
+      i32.ne
+      br_if 0 (;@1;)
+    end)
+  (func $__original_main (type 2) (result i32)
+    (local i32 i32 i32)
+    global.get $__stack_pointer
+    i32.const 16
+    i32.sub
+    local.tee 0
+    global.set $__stack_pointer
+    local.get 0
+    i32.const 1
+    i32.store offset=12
+    i32.const 100
+    i32.const 100
+    call $__pragma_loopbound
+    i32.const -400
+    local.set 1
+    loop  ;; label = @1
+      local.get 1
+      i32.const 1424
+      i32.add
+      local.get 0
+      i32.load offset=12
+      i32.store
+      local.get 1
+      i32.const 1428
+      i32.add
+      local.get 0
+      i32.load offset=12
+      i32.store
+      local.get 1
+      i32.const 1432
+      i32.add
+      local.get 0
+      i32.load offset=12
+      i32.store
+      local.get 1
+      i32.const 1436
+      i32.add
+      local.get 0
+      i32.load offset=12
+      i32.store
+      local.get 1
+      i32.const 1440
+      i32.add
+      local.get 0
+      i32.load offset=12
+      i32.store
+      local.get 1
+      i32.const 20
+      i32.add
+      local.tee 1
+      br_if 0 (;@1;)
+    end
+    i32.const 100
+    i32.const 100
+    call $__pragma_loopbound
+    i32.const -400
+    local.set 1
+    loop  ;; label = @1
+      local.get 1
+      i32.const 1824
+      i32.add
+      local.get 0
+      i32.load offset=12
+      i32.store
+      local.get 1
+      i32.const 1828
+      i32.add
+      local.get 0
+      i32.load offset=12
+      i32.store
+      local.get 1
+      i32.const 1832
+      i32.add
+      local.get 0
+      i32.load offset=12
+      i32.store
+      local.get 1
+      i32.const 1836
+      i32.add
+      local.get 0
+      i32.load offset=12
+      i32.store
+      local.get 1
+      i32.const 1840
+      i32.add
+      local.get 0
+      i32.load offset=12
+      i32.store
+      local.get 1
+      i32.const 20
+      i32.add
+      local.tee 1
+      br_if 0 (;@1;)
+    end
+    i32.const 100
+    i32.const 100
+    call $__pragma_loopbound
+    i32.const -400
+    local.set 1
+    loop  ;; label = @1
+      local.get 1
+      i32.const 2240
+      i32.add
+      i32.const 0
+      i32.store
+      local.get 1
+      i32.const 2232
+      i32.add
+      i64.const 0
+      i64.store align=4
+      local.get 1
+      i32.const 2224
+      i32.add
+      i64.const 0
+      i64.store align=4
+      local.get 1
+      i32.const 20
+      i32.add
+      local.tee 1
+      br_if 0 (;@1;)
+    end
+    call $matrix1_main
+    i32.const 100
+    i32.const 100
+    call $__pragma_loopbound
+    i32.const 0
+    local.set 2
+    i32.const -400
+    local.set 1
+    loop  ;; label = @1
+      local.get 1
+      i32.const 2240
+      i32.add
+      i32.load
+      local.get 1
+      i32.const 2236
+      i32.add
+      i32.load
+      local.get 1
+      i32.const 2232
+      i32.add
+      i32.load
+      local.get 1
+      i32.const 2228
+      i32.add
+      i32.load
+      local.get 1
+      i32.const 2224
+      i32.add
+      i32.load
+      local.get 2
+      i32.add
+      i32.add
+      i32.add
+      i32.add
+      i32.add
+      local.set 2
+      local.get 1
+      i32.const 20
+      i32.add
+      local.tee 1
+      br_if 0 (;@1;)
+    end
+    local.get 0
+    i32.const 16
+    i32.add
+    global.set $__stack_pointer
+    i32.const -1
+    i32.const 0
+    local.get 2
+    i32.const 1000
+    i32.ne
+    select)
+  (table (;0;) 1 1 funcref)
+  (memory (;0;) 1)
+  (global $__stack_pointer (mut i32) (i32.const 6320))
+  (global (;1;) i32 (i32.const 2224))
+  (global (;2;) i32 (i32.const 6320))
+  (export "memory" (memory 0))
+  (export "__wasm_apply_data_relocs" (func $__wasm_apply_data_relocs))
+  (export "entrypoint" (func $matrix1_main))
+  (export "main" (func $__original_main))
+  (export "__data_end" (global 1))
+  (export "__heap_base" (global 2)))
--- a/targets/wasm-tacle/kernel/matrix1/generated/modified_sources/default/matrix1.c
+++ b/targets/wasm-tacle/kernel/matrix1/generated/modified_sources/default/matrix1.c
@ -0,0 +1,168 @@
+/*
+
+  This program is part of the TACLeBench benchmark suite.
+  Version V 1.x
+
+  Name: matrix1
+
+  Author: Juan Martinez Velarde
+
+  Function: Generic matrix - multiply benchmarking
+
+    This program performs a matrix multiplication of the form C=AB,
+    where A and B are two dimensional matrices of arbitrary dimension.
+    The only restriction os that the inner dimension of the arrays must
+    be greater than 1.
+
+      A[ X x Y ] * B[ Y x Z ] = C[ X x Z ]
+
+                       |a11     a12     ..      a1y|
+                       |a21     a22     ..      a2y|
+      matrix A[ X x Y ]= |..      ..      ..     ..  |
+                       |a(x-1)1 a(x-1)2 ..  a(x-1)y|
+                       |ax1     ax2     ..      axy|
+
+
+                       |b11     b12     ..     b1z|
+                       |b21     b22     ..     b2z|
+      matrix B[ Y x Z ]= |..      ..      ..     .. |
+                       |b(y-1)1 b(y-1)2 .. b(y-1)z|
+                       |by1     by2     ..     byz|
+
+                       |c11     c12     ..     c1z|
+                       |c21     c22     ..     c2z|
+      matrix C[ X x Z ]= |..      ..      ..     .. |
+                       |c(x-1)1 c(x-1)2 .. c(x-1)z|
+                       |cx1     cx2     ..     cxz|
+
+      matrix elements are stored as
+
+      A[ X x Y ] = { a11, a12, .. , a1y,
+                   a21, a22, .. , a2y,
+                   ...,
+                   ax1, ax2, .. , axy}
+
+      B[ Y x Z ] = { b11, b21, .., b(y-1)1, by1, b12, b22, .. , b(y-1)z, byz }
+
+      C[ X x Z ] = { c11, c21, .. , c(x-1)1, cx1, c12, c22, .. ,c(x-1)z, cxz }
+
+  Source: DSP-Stone
+          http://www.ice.rwth-aachen.de/research/tools-projects/entry/detail/dspstone
+
+  Changes: no major functional changes
+
+  License: may be used, modified, and re-distributed freely
+
+*/
+
+/*
+  Macro definitions
+*/
+
+// Wasm loop bounds
+
+__attribute__((import_module("__pragma"), import_name("loopbound"))) extern void
+__pragma_loopbound(unsigned int min_bound, unsigned int max_bound);
+
+#define X 10 /* first dimension of array A */
+#define Y 10 /* second dimension of array A, first dimension of array B */
+#define Z 10 /* second dimension of array B */
+
+/*
+  Forward declaration of functions
+*/
+
+void matrix1_pin_down(int A[], int B[], int C[]);
+void matrix1_init(void);
+__attribute__((noinline)) __attribute__((export_name("entrypoint"))) void
+matrix1_main(void);
+__attribute__((noinline)) __attribute__((export_name("main"))) int main(void);
+
+/*
+  Declaration of global variables
+*/
+
+int matrix1_A[X * Y];
+int matrix1_B[Y * Z];
+int matrix1_C[X * Z];
+
+/*
+  Initialization functions
+*/
+
+void
+matrix1_pin_down(int A[], int B[], int C[]) {
+    int i;
+    volatile int x = 1;
+
+    __pragma_loopbound(100, 100);
+    for (i = 0; i < X * Y; i++)
+        A[i] = x;
+
+    __pragma_loopbound(100, 100);
+    for (i = 0; i < Y * Z; i++)
+        B[i] = x;
+
+    __pragma_loopbound(100, 100);
+    for (i = 0; i < X * Z; i++)
+        C[i] = 0;
+}
+
+void
+matrix1_init(void) {
+    matrix1_pin_down(&matrix1_A[0], &matrix1_B[0], &matrix1_C[0]);
+}
+
+/*
+  Return function
+*/
+
+int
+matrix1_return(void) {
+    int i;
+    int checksum = 0;
+
+    __pragma_loopbound(100, 100);
+    for (i = 0; i < X * Z; i++)
+        checksum += matrix1_C[i];
+
+    return (checksum == 1000 ? 0 : -1);
+}
+
+/*
+  Main functions
+*/
+
+__attribute__((noinline)) __attribute__((export_name("entrypoint"))) void
+matrix1_main(void) {
+    register int *p_a = &matrix1_A[0];
+    register int *p_b = &matrix1_B[0];
+    register int *p_c = &matrix1_C[0];
+
+    register int f, i, k;
+
+    __pragma_loopbound(10, 10);
+    for (k = 0; k < Z; k++) {
+        p_a = &matrix1_A[0]; /* point to the beginning of array A */
+
+        __pragma_loopbound(10, 10);
+        for (i = 0; i < X; i++) {
+            p_b = &matrix1_B[k * Y]; /* take next column */
+
+            *p_c = 0;
+            __pragma_loopbound(10, 10);
+            for (f = 0; f < Y; f++) /* do multiply */
+                *p_c += *p_a++ * *p_b++;
+
+            p_c++;
+        }
+    }
+}
+
+__attribute__((noinline)) __attribute__((export_name("main"))) int
+main(void) {
+    matrix1_init();
+    matrix1_main();
+
+    return matrix1_return();
+}
--- a/targets/wasm-tacle/kernel/matrix1/generated/modified_sources/inline/matrix1.c
+++ b/targets/wasm-tacle/kernel/matrix1/generated/modified_sources/inline/matrix1.c
@ -0,0 +1,177 @@
+/*
+
+  This program is part of the TACLeBench benchmark suite.
+  Version V 1.x
+
+  Name: matrix1
+
+  Author: Juan Martinez Velarde
+
+  Function: Generic matrix - multiply benchmarking
+
+    This program performs a matrix multiplication of the form C=AB,
+    where A and B are two dimensional matrices of arbitrary dimension.
+    The only restriction os that the inner dimension of the arrays must
+    be greater than 1.
+
+      A[ X x Y ] * B[ Y x Z ] = C[ X x Z ]
+
+                       |a11     a12     ..      a1y|
+                       |a21     a22     ..      a2y|
+      matrix A[ X x Y ]= |..      ..      ..     ..  |
+                       |a(x-1)1 a(x-1)2 ..  a(x-1)y|
+                       |ax1     ax2     ..      axy|
+
+
+                       |b11     b12     ..     b1z|
+                       |b21     b22     ..     b2z|
+      matrix B[ Y x Z ]= |..      ..      ..     .. |
+                       |b(y-1)1 b(y-1)2 .. b(y-1)z|
+                       |by1     by2     ..     byz|
+
+                       |c11     c12     ..     c1z|
+                       |c21     c22     ..     c2z|
+      matrix C[ X x Z ]= |..      ..      ..     .. |
+                       |c(x-1)1 c(x-1)2 .. c(x-1)z|
+                       |cx1     cx2     ..     cxz|
+
+      matrix elements are stored as
+
+      A[ X x Y ] = { a11, a12, .. , a1y,
+                   a21, a22, .. , a2y,
+                   ...,
+                   ax1, ax2, .. , axy}
+
+      B[ Y x Z ] = { b11, b21, .., b(y-1)1, by1, b12, b22, .. , b(y-1)z, byz }
+
+      C[ X x Z ] = { c11, c21, .. , c(x-1)1, cx1, c12, c22, .. ,c(x-1)z, cxz }
+
+  Source: DSP-Stone
+          http://www.ice.rwth-aachen.de/research/tools-projects/entry/detail/dspstone
+
+  Changes: no major functional changes
+
+  License: may be used, modified, and re-distributed freely
+
+*/
+
+/*
+  Macro definitions
+*/
+
+// Wasm loop bounds
+
+
+
+
+__attribute__((import_module("__pragma"), import_name("loopbound"))) extern void
+__pragma_loopbound(unsigned int min_bound, unsigned int max_bound);
+
+#define X 10 /* first dimension of array A */
+#define Y 10 /* second dimension of array A, first dimension of array B */
+#define Z 10 /* second dimension of array B */
+
+/*
+  Forward declaration of functions
+*/
+
+__attribute__((always_inline)) static inline void
+matrix1_pin_down(int A[], int B[], int C[]);
+__attribute__((always_inline)) static inline void matrix1_init(void);
+__attribute__((noinline)) __attribute__((export_name("entrypoint")))
+__attribute__((noinline)) __attribute__((export_name("entrypoint"))) void
+matrix1_main(void);
+__attribute__((noinline)) __attribute__((export_name("main")))
+__attribute__((noinline)) __attribute__((export_name("main"))) int
+main(void);
+
+/*
+  Declaration of global variables
+*/
+
+int matrix1_A[X * Y];
+int matrix1_B[Y * Z];
+int matrix1_C[X * Z];
+
+/*
+  Initialization functions
+*/
+
+__attribute__((always_inline)) static inline void
+matrix1_pin_down(int A[], int B[], int C[]) {
+    int i;
+    volatile int x = 1;
+
+    __pragma_loopbound(100, 100);
+    for (i = 0; i < X * Y; i++)
+        A[i] = x;
+
+    __pragma_loopbound(100, 100);
+    for (i = 0; i < Y * Z; i++)
+        B[i] = x;
+
+    __pragma_loopbound(100, 100);
+    for (i = 0; i < X * Z; i++)
+        C[i] = 0;
+}
+
+__attribute__((always_inline)) static inline void
+matrix1_init(void) {
+    matrix1_pin_down(&matrix1_A[0], &matrix1_B[0], &matrix1_C[0]);
+}
+
+/*
+  Return function
+*/
+
+__attribute__((always_inline)) static inline int
+matrix1_return(void) {
+    int i;
+    int checksum = 0;
+
+    __pragma_loopbound(100, 100);
+    for (i = 0; i < X * Z; i++)
+        checksum += matrix1_C[i];
+
+    return (checksum == 1000 ? 0 : -1);
+}
+
+/*
+  Main functions
+*/
+
+__attribute__((noinline)) __attribute__((export_name("entrypoint")))
+__attribute__((noinline)) __attribute__((export_name("entrypoint"))) void
+matrix1_main(void) {
+    register int *p_a = &matrix1_A[0];
+    register int *p_b = &matrix1_B[0];
+    register int *p_c = &matrix1_C[0];
+
+    register int f, i, k;
+
+    __pragma_loopbound(10, 10);
+    for (k = 0; k < Z; k++) {
+        p_a = &matrix1_A[0]; /* point to the beginning of array A */
+
+        __pragma_loopbound(10, 10);
+        for (i = 0; i < X; i++) {
+            p_b = &matrix1_B[k * Y]; /* take next column */
+
+            *p_c = 0;
+            __pragma_loopbound(10, 10);
+            for (f = 0; f < Y; f++) /* do multiply */
+                *p_c += *p_a++ * *p_b++;
+
+            p_c++;
+        }
+    }
+}
+
+__attribute__((noinline)) __attribute__((export_name("main")))
+__attribute__((noinline)) __attribute__((export_name("main"))) int
+main(void) {
+    matrix1_init();
+    matrix1_main();
+
+    return matrix1_return();
+}