Files

742 lines
26 KiB
C

/*
This program is part of the TACLeBench benchmark suite.
Version 2.0
Name: pm
Author: Hector Chan
MIT Lincoln Laboratory
Function: This is the ANSI C Pattern Match kernel. It finds the closest
match of a pattern from a library of patterns. The code below
serves as a reference implemenation of the pattern match kernel.
Source: HPEC Challenge Benchmark Suite, Pattern Match Kernel Benchmark
Original name: pm
Changes: See ChangeLog.txt
License: BSD 3-clause
*/
/*
Include section
*/
#include "pm_math.h"
#include "pm_string.h"
/*
Definition of types
*/
// Wasm loop bounds
__attribute__((import_module("__pragma"), import_name("loopbound"))) extern void
__pragma_loopbound(unsigned int min_bound, unsigned int max_bound);
typedef struct pm_float_array_t {
float *data;
void *datav;
int size[3];
unsigned int ndims;
unsigned int rctype;
char padding[4];
} pm_float_array_t;
typedef struct pm_data_t {
float *template_profiles_db; /* the library of patterns */
float *test_profile_db; /* the test pattern */
float *template_copy; /* temporary storage for a template */
float *test_noise_db_array; /* copies of test noise in an array for
fast copy */
float *MSE_scores; /* the likelihood of the matching between a
range shift of the test pattern and the libary */
float *mag_shift_scores; /* the likelihood of the matching between a
magnitude scaling of the test pattern and the
libary */
float *minimum_MSE_score; /* the likelihood of the matching between the
test pattern and the libary */
float *all_shifted_test_db; /* contains the shiftings of the test pattern */
unsigned char *template_exceed; /* marking where a library template
exceeds twice the noise level of
the test pattern */
float *test_exceed_means; /* pixels where test pattern exceeds twice
its noise level */
float shift_ratio; /* determines the number of range shifts */
int shift_size; /* the actual number of range shifts */
int profile_size; /* the length of the pattern */
int num_templates; /* the number of library templates */
int elsize; /* the size of a single fp number */
char padding[4];
} pm_data_t;
/*
Forward declaration of functions
*/
void pm_init_lib(pm_float_array_t *lib);
void pm_init_pattern(pm_float_array_t *pattern);
void pm_init_data(pm_data_t *pmdata, pm_float_array_t *lib,
pm_float_array_t *pattern);
void pm_clean(pm_data_t *pmdata);
__attribute__((noinline)) __attribute__((export_name("entrypoint"))) void
pm_main(void);
void pm_init(void);
int pm_return(void);
int pm_kernel(pm_data_t *pmdata);
/*
Declaration of global variables
*/
/* input data */
extern float pm_lib_data[60][64];
extern float pm_pattern_data[60][64];
/* some magic number */
#define pm_MIN_NOISE 1e-10f
/* main data structures used by the benchmark */
static pm_data_t pm_data;
static pm_float_array_t pm_lib;
static float *pm_lib_ptr[60];
static pm_float_array_t pm_pattern;
static float *pm_pattern_ptr[60];
static int pm_result;
/* arrays for the pm_init_data function */
static unsigned char pm_init_array_1[64];
static float pm_init_array_2[21];
static float pm_init_array_3[64];
static float pm_init_array_4[64];
static float pm_init_array_5[21];
static float pm_init_array_6[21];
static float pm_init_array_7[72];
static float pm_init_array_8[110];
/*
Initialization- and return-value-related functions
*/
void
pm_init_lib(pm_float_array_t *lib) {
int i;
volatile int do_not_optimize_away = 0;
lib->rctype = 1;
lib->ndims = 2;
lib->size[0] = 60;
lib->size[1] = 64;
lib->size[2] = 0;
__pragma_loopbound(60, 60);
for (i = 0; i < 60; i++)
pm_lib_ptr[i] = pm_lib_data[i];
__pragma_loopbound(60, 60);
for (i = 0; i < 60; i++)
pm_lib_ptr[i] += do_not_optimize_away;
lib->data = *pm_lib_ptr;
lib->datav = (void *) pm_lib_ptr;
}
void
pm_init_pattern(pm_float_array_t *pattern) {
int i;
volatile int do_not_optimize_away = 0;
pattern->rctype = 1;
pattern->ndims = 2;
pattern->size[0] = 60;
pattern->size[1] = 64;
pattern->size[2] = 0;
__pragma_loopbound(60, 60);
for (i = 0; i < 60; i++)
pm_pattern_ptr[i] = pm_pattern_data[i];
__pragma_loopbound(60, 60);
for (i = 0; i < 60; i++)
pm_pattern_ptr[i] += do_not_optimize_away;
pattern->data = *pm_pattern_ptr;
pattern->datav = (void *) pm_pattern_ptr;
}
void
pm_init(void) {
pm_math_init();
}
int
pm_return(void) {
return pm_result - 12;
}
/*
Core benchmark functions
*/
__attribute__((noinline)) __attribute__((export_name("entrypoint"))) void
pm_main(void) {
pm_init_lib(&pm_lib);
pm_init_pattern(&pm_pattern);
pm_init_data(&pm_data, &pm_lib, &pm_pattern);
pm_result = pm_kernel(&pm_data);
pm_clean(&pm_data);
}
/***********************************************************************/
/* Allocate and initailize the test pattern, the template library, and
other necessary data structure. */
/***********************************************************************/
void
pm_init_data(pm_data_t *pmdata, pm_float_array_t *lib,
pm_float_array_t *pattern) {
int elsize = sizeof(float);
float x;
/* Getting the input parameters from the PCA C array structure */
pmdata->profile_size = lib->size[1];
pmdata->num_templates = lib->size[0];
pmdata->elsize = elsize;
pmdata->shift_ratio = 3.0f;
pmdata->template_profiles_db = lib->data;
pmdata->test_profile_db = pattern->data;
/* Equivalent to shift_size = roundf((float)profile_size / shift_ratio) */
x = (float) (pmdata->profile_size) / pmdata->shift_ratio;
pmdata->shift_size =
((x - (int) (x)) < 0.5f) ? (int) (pm_floor(x)) : (int) (pm_ceil(x));
pmdata->template_exceed = pm_init_array_1;
pmdata->test_exceed_means = pm_init_array_2;
pmdata->template_copy = pm_init_array_3;
pmdata->test_noise_db_array = pm_init_array_4;
pmdata->MSE_scores = pm_init_array_5;
pmdata->mag_shift_scores = pm_init_array_6;
pmdata->minimum_MSE_score = pm_init_array_7;
pmdata->all_shifted_test_db = pm_init_array_8;
}
/***********************************************************************/
/* Free up memory for all structures */
/***********************************************************************/
void
pm_clean(pm_data_t *pmdata) {
pmdata->test_exceed_means = 0;
pmdata->template_exceed = 0;
pmdata->template_copy = 0;
pmdata->test_noise_db_array = 0;
pmdata->MSE_scores = 0;
pmdata->mag_shift_scores = 0;
pmdata->minimum_MSE_score = 0;
pmdata->all_shifted_test_db = 0;
}
/***********************************************************************/
/* The pattern match kernel overlays two patterns to compute the likelihood
that the two vectors match. This process is performed on a library of
patterns. */
/***********************************************************************/
int
pm_kernel(pm_data_t *pmdata) {
const int elsize = pmdata->elsize; /* size of a single fp number */
const int shift_size = pmdata->shift_size; /* number of shifting to the
left and right of the test
profile */
const int profile_size =
pmdata->profile_size; /* number of pixels in a pattern */
const int num_templates =
pmdata->num_templates; /* number of library patterns */
float *test_profile_db = pmdata->test_profile_db; /* the test pattern */
float *template_profiles_db =
pmdata->template_profiles_db; /* the library of patterns */
float *test_noise_db_array =
pmdata->test_noise_db_array; /* the noise in the test pattern in an
array for fast copy */
float *all_shifted_test_db =
pmdata->all_shifted_test_db; /* the shifted test pattern */
int match_index; /* the index of the most likely template that matches the
test pattern */
int min_MSE_index; /* the index of the range shifts with
the lowest mean square error */
unsigned int num_template_exceed,
num_test_exceed; /* the number of pixels exceeded the test pattern
and a library template */
unsigned char mag_shift_scores_flag; /* flag that tells if the magnitude
scaling loop has been run
(existed just to save ops) */
float test_peak, template_peak; /* the maximum pixels of the test pattern
and a library template pattern */
float template_noise; /* the noise level of a library template */
float noise_shift, noise_shift2; /* temporary storage for calculating the
mse for range shifting */
float min_MSE,
match_score; /* temporary storage for finding the minimum mse */
float sumWeights_inv =
1.0f / profile_size; /* the inverse of the weights
used for calculating the mse */
/* Note: weights for the kernel would be application dependent.
They are set to 1 for our purposes */
float mag_db; /* the magnitude shifts in dB */
float power_shift,
ave_power_ratio; /* the diff of the avg shifted test profile power to
the avg template power */
float power_ratio; /* the mean power of the pixels of a template that
exceeded twice test noise */
float test_noise = (pm_pow10f(test_profile_db[0] *
0.1f) + /* noise level of the test pattern */
pm_pow10f(test_profile_db[profile_size - 1] * 0.1f)) *
0.5f;
/* since "shift_size/2" is used a lot, so we create a var to hold it */
int half_shift_size = (int) (pm_ceil((float) (shift_size) / 2.0f));
int template_index, current_shift; /* indices */
int patsize = profile_size * elsize; /* number of bytes of a pattern */
float *minimum_MSE_score = pmdata->minimum_MSE_score;
float *MSE_scores = pmdata->MSE_scores;
float *mag_shift_scores = pmdata->mag_shift_scores;
float test_noise_db =
(test_noise == 0.0f)
? -100.0f
: 10.0f * pm_log10f(pm_fabs(test_noise)); /* test noise in dB */
float test_noise_db_plus_3 =
test_noise_db + 3.0f; /* twice test noise in the
power domain, approximately +3dB */
float *template_copy = pmdata->template_copy;
unsigned char *template_exceed = pmdata->template_exceed;
float *test_exceed_means = pmdata->test_exceed_means;
int i, j; /* indices */
float
tmp1; /* temporary storage for calculating the mse for range shifting */
float sum_exceed; /* the sum of the test pattern pixels exceeded twice
test noise */
float template_exceed_mean = 0; /* the mean of a template pattern pixels
exceeded twice test noise */
float weighted_MSE; /* temporary storage for computing the weighted MSE */
/* These pointers are solely used for fast memory access */
float *cur_tp, *fptr, *fptr2, *fptr3, *endptr;
unsigned char *bptr;
/* Having an array of test noise for fast copying of noise returns */
__pragma_loopbound(64, 64);
for (i = 0; i < profile_size; i++)
test_noise_db_array[i] = test_noise_db;
/* Finding the maximum pixels of the test pattern */
fptr = test_profile_db;
test_peak = *fptr++;
__pragma_loopbound(63, 63);
for (i = 1; i < profile_size; i++, fptr++) {
if (test_peak < *fptr)
test_peak = *fptr;
}
/* Paddle array for all the possible range shifts. Essentially, we are
performing the following:
Adding these two portions to the beginning and end of the test pattern
| |
V V
|<------>| |<------>|
__ __
| | | |
| |___| |
| |
_________| |_________ <- test noise in dB domain
--------------------------------------- <- zero
|<--------------->|
original test pattern
The all_shifted_test_db will be accessed in a sliding window manner.
*/
pm_memcpy((void *) all_shifted_test_db, (void *) test_noise_db_array,
elsize * half_shift_size);
pm_memcpy((void *) (all_shifted_test_db + half_shift_size),
(void *) test_profile_db, elsize * profile_size);
pm_memcpy((void *) (all_shifted_test_db + half_shift_size + profile_size),
(void *) test_noise_db_array, elsize * half_shift_size);
/* Set the pixels to test noise in dB domain if pixel is less than test
noise in dB */
fptr = all_shifted_test_db + half_shift_size;
__pragma_loopbound(64, 64);
for (i = 0; i < profile_size; i++, fptr++) {
if (*fptr < test_noise_db)
*fptr = test_noise_db;
}
/* Calculating the mean of the pixels that exceeded twice test noise for
each possible shift of the test profile */
fptr2 = test_exceed_means;
__pragma_loopbound(21, 21);
for (current_shift = 0; current_shift < shift_size; current_shift++) {
/* Pointer arithmetics to find the start and end pointers */
if (current_shift < half_shift_size) {
endptr = all_shifted_test_db + current_shift + profile_size;
fptr = all_shifted_test_db + half_shift_size;
} else {
endptr = all_shifted_test_db + half_shift_size + profile_size;
fptr = all_shifted_test_db + current_shift;
}
/* Summing the pixels that exceed twice test noise for the current
* shifts */
sum_exceed = 0.0f;
num_test_exceed = 0;
__pragma_loopbound(53, 64);
while (fptr != endptr) {
if (*fptr > test_noise_db_plus_3) {
num_test_exceed++;
sum_exceed += *fptr;
}
fptr++;
}
*fptr2++ =
num_test_exceed ? sum_exceed / (float) (num_test_exceed) : 0.0f;
}
/* Loop over all the templates. Determine the best shift distance, then
the best gain adjustment. */
__pragma_loopbound(60, 60);
for (template_index = 0; template_index < num_templates; template_index++) {
cur_tp = template_profiles_db + (template_index * profile_size);
/* Scale the template profile we're currently working on so that its
peak is equal to the peak of the test profile */
fptr = cur_tp;
template_peak = *fptr++;
__pragma_loopbound(63, 63);
for (i = 1; i < profile_size; i++, fptr++) {
if (template_peak < *fptr)
template_peak = *fptr;
}
/* Additively adjust the noise level of this template profile in the
raw power domain so that its noise level matches the noise level
of the test profile */
/* --------------------------------------------------------------------
Setting up all the constants */
noise_shift = test_peak - template_peak;
pm_memset((void *) template_exceed, 0,
((int) sizeof(char)) * profile_size);
sum_exceed = 0.0f;
num_template_exceed = 0;
/* --------------------------------------------------------------------
The following blocks are optimized code that essentially
perform the operations immediately below. The calculation of the
template noise constants is done once the exponentials are complete
*/
/* template_profile = template_profile + test_peak - template_peak
template = 10 ^ (template_profile / 10)
template = template + test_noise - template_noise
if (input < fp_epsilon) then clip the input to -100 dB
template = log10( abs(template) )
template_profile = 10 * template + test_noise_db */
fptr = cur_tp;
__pragma_loopbound(64, 64);
for (i = 0; i < profile_size; i++) {
tmp1 = *fptr + noise_shift;
*fptr = pm_pow10f(tmp1 * 0.1f);
fptr++;
}
/* Calculates noise levels from first and last elements of the current
template */
template_noise = (cur_tp[0] + cur_tp[profile_size - 1]) * 0.5f;
noise_shift2 = test_noise - template_noise;
fptr = cur_tp;
__pragma_loopbound(64, 64);
for (i = 0; i < profile_size; i++) {
tmp1 = *fptr + noise_shift2;
if (tmp1 == 0.0f)
tmp1 = pm_MIN_NOISE;
*fptr = 10.0f * pm_log10f(pm_fabs(tmp1)) + test_noise_db;
/* Because many of the operations in the search for the best shift
amount depend on knowledge of which pixels in the template
have values exceeding twice test_noise (recall that 3db is
roughly equivalent to a doubling of raw power), we'll put those
indices in template_exceed */
if (*fptr > test_noise_db_plus_3) {
template_exceed[i] = 1;
num_template_exceed++;
sum_exceed += *fptr;
}
fptr++;
}
/* Note: The following block has 4 different branches:
1. Both the current template and the test pattern have values
exceeded twice test noise.
2. Only the current template has values exceeded twice test noise.
3. Only the test pattern has values exceeded twice test noise.
4. Neither the current template nor the test pattern has values
exceeded twice test noise.
*/
/* If there is at least one pixel in the template we're
currently working on whose value exceeds twice test_noise */
if (num_template_exceed) {
template_exceed_mean = sum_exceed / (float) (num_template_exceed);
fptr3 = test_exceed_means;
__pragma_loopbound(21, 21);
for (current_shift = 0; current_shift < shift_size;
current_shift++, fptr3++) {
/* Work on a copy of the template we're currently working on */
pm_memcpy((void *) template_copy, (void *) cur_tp, patsize);
/* If there is at least one pixel in the shifted test profile
whose value exceeds twice test noise. */
if (*fptr3 != 0.0f) {
/* CASE 1 */
/* Considering only those pixels whose powers exceed twice
test noise, compute the difference of the mean power in
template we're currently working on. */
power_ratio = *fptr3 - template_exceed_mean;
/* Scale template values that exceed twice test noise by
power ratio and set the values that are less than test
noise in db to test noise in db */
fptr = template_copy;
bptr = template_exceed;
__pragma_loopbound(64, 64);
for (i = 0; i < profile_size; i++, fptr++) {
if (*bptr++)
*fptr += power_ratio;
if (*fptr < test_noise_db)
*fptr = test_noise_db;
}
} /* if (*fptr3 != 0.0f) */
else {
/* CASE 2 */
/* Set those pixels in the template we're currently working
on whose values are less than test_noise to test_noise.
*/
fptr = cur_tp;
__pragma_loopbound(64, 64);
for (i = 0; i < profile_size; i++) {
if (*fptr++ < test_noise_db)
template_copy[i] = test_noise_db;
}
} /* else ... if (num_test_exceed) */
/* Compute the weighted MSE */
weighted_MSE = 0.0f;
fptr = all_shifted_test_db + current_shift;
fptr2 = template_copy;
__pragma_loopbound(64, 64);
for (i = 0; i < profile_size; i++) {
tmp1 = *fptr++ - *fptr2++;
weighted_MSE += tmp1 * tmp1;
}
MSE_scores[current_shift] = weighted_MSE * sumWeights_inv;
} /* for current_shift */
} else { /* if (num_template_exceed) */
fptr3 = test_exceed_means;
__pragma_loopbound(0, 0);
for (current_shift = 0; current_shift < shift_size;
current_shift++) {
/* CASE 3 */
/* If there is at least one pixel that exceeds twice test noise
*/
if (*fptr3++ != 0.0f)
fptr2 = cur_tp;
else {
/* CASE 4 */
/* Work on a copy of the template we're currently working
* on. */
pm_memcpy((void *) template_copy, (void *) cur_tp, patsize);
fptr = cur_tp;
__pragma_loopbound(0, 0);
for (i = 0; i < profile_size; i++) {
if (*fptr++ < test_noise_db)
template_copy[i] = test_noise_db;
}
fptr2 = template_copy;
}
/* Compute the weighted MSE */
weighted_MSE = 0.0f;
fptr = all_shifted_test_db + current_shift;
__pragma_loopbound(0, 0);
for (i = 0; i < profile_size; i++) {
tmp1 = *fptr++ - *fptr2++;
weighted_MSE += tmp1 * tmp1;
}
MSE_scores[current_shift] = weighted_MSE * sumWeights_inv;
} /* for current_shift */
} /* else .. if (num_template_exceed) */
/* Finding the minimum MSE for range shifting */
fptr = MSE_scores;
min_MSE_index = 0;
min_MSE = *fptr++;
__pragma_loopbound(20, 20);
for (i = 1; i < shift_size; i++, fptr++) {
if (min_MSE > *fptr) {
min_MSE = *fptr;
min_MSE_index = i;
}
}
/* Work on a copy of the template we're currently working on. */
pm_memcpy((void *) template_copy, (void *) cur_tp, patsize);
mag_shift_scores_flag = 1;
if (test_exceed_means[min_MSE_index] != 0.0f) {
if (num_template_exceed) {
/* Compute the difference of the average shifted test profile
power to the average template power */
ave_power_ratio =
test_exceed_means[min_MSE_index] - template_exceed_mean;
/* Loop over all possible magnitude shifts */
__pragma_loopbound(21, 21);
for (j = 0, mag_db = -5.0f; mag_db <= 5.0f; mag_db += 0.5f) {
power_shift = ave_power_ratio + mag_db;
bptr = template_exceed;
__pragma_loopbound(64, 64);
for (i = 0; i < profile_size; i++) {
if (*bptr++)
template_copy[i] = cur_tp[i] + power_shift;
}
/* Compute the weighted MSE */
weighted_MSE = 0.0f;
fptr = all_shifted_test_db + min_MSE_index;
fptr2 = template_copy;
__pragma_loopbound(64, 64);
for (i = 0; i < profile_size; i++) {
tmp1 = *fptr++ - *fptr2++;
weighted_MSE += tmp1 * tmp1;
}
mag_shift_scores[j++] = weighted_MSE * sumWeights_inv;
} /* for mag_db */
} /* if (num_template_exceed) */
} else { /* if (num_test_exceed) */
/* Set those pixels in the template we're currently working on
whose values are less than test_noise to test_noise. */
fptr = cur_tp;
__pragma_loopbound(64, 64);
for (i = 0; i < profile_size; i++) {
if (*fptr++ < test_noise_db)
template_copy[i] = test_noise_db;
}
/* Compute the weighted MSE */
weighted_MSE = 0.0f;
fptr = all_shifted_test_db + min_MSE_index;
fptr2 = template_copy;
__pragma_loopbound(64, 64);
for (i = 0; i < profile_size; i++) {
tmp1 = *fptr++ - *fptr2++;
weighted_MSE += tmp1 * tmp1;
}
minimum_MSE_score[template_index] = weighted_MSE * sumWeights_inv;
mag_shift_scores_flag = 0;
} /* if (num_test_exceed) */
/* If magnitude shifting has performed above */
if (mag_shift_scores_flag) {
/* Find the minimum MSE for magnitude scaling */
fptr = mag_shift_scores;
min_MSE = *fptr++;
__pragma_loopbound(20, 20);
for (i = 1; i < 21; i++, fptr++) {
if (min_MSE > *fptr)
min_MSE = *fptr;
}
minimum_MSE_score[template_index] = min_MSE;
}
} /* for template_index */
/* Find the minimum mean square error */
fptr = minimum_MSE_score;
match_index = 0;
match_score = *fptr++;
__pragma_loopbound(59, 59);
for (i = 1; i < num_templates; i++, fptr++) {
if (match_score > *fptr) {
match_score = *fptr;
match_index = i;
}
}
return match_index;
}
/*
Main function
*/
__attribute__((noinline)) __attribute__((export_name("main"))) int
main(void) {
pm_init();
pm_main();
return pm_return();
}