/* This file is part of the TACLeBench benchmark suite. Version 2.0 Name: pm_libm.c Author: Hector Chan MIT Lincoln Laboratory Function: This file contains the C math library functions used by pm. Source: HPEC Challenge Benchmark Suite, Pattern Match Kernel Benchmark Original name: pm Changes: See ChangeLog.txt License: BSD 3-clause */ /* Include section */ #include "pm_math.h" /* Declaration of global variables */ #define pm_LOG10 2.302585093f /* The coefficients for the pm_log10f and pm_pow10f functions below */ static float pm_pow_coeff[ 19 ]; static float pm_log_coeff[ 16 ]; /* Math functions */ /***********************************************************************/ /* We found out the bottle neck of this kernel was in the pow and log functions. Therefore, we have implemented our own log and pow, instead of using the float fp ones in the standard C math libary. This function sets up the coefficients for the single fp log and pow functions. */ /***********************************************************************/ void pm_math_init( void ) { pm_pow_coeff[ 0 ] = 0.5f; /* 1/2! */ pm_pow_coeff[ 1 ] = 0.166666667f; /* 1/3! */ pm_pow_coeff[ 2 ] = 0.041666666f; /* 1/4! */ pm_pow_coeff[ 3 ] = 8.333333333e-3f; pm_pow_coeff[ 4 ] = 1.388888889e-3f; pm_pow_coeff[ 5 ] = 1.984126984e-4f; pm_pow_coeff[ 6 ] = 2.480158730e-5f; pm_pow_coeff[ 7 ] = 2.755731922e-6f; pm_pow_coeff[ 8 ] = 2.755731922e-7f; pm_pow_coeff[ 9 ] = 2.505210839e-8f; pm_pow_coeff[ 10 ] = 2.087675699e-9f; pm_pow_coeff[ 11 ] = 1.605904384e-10f; pm_pow_coeff[ 12 ] = 1.147074560e-11f; pm_pow_coeff[ 13 ] = 7.647163732e-13f; pm_pow_coeff[ 14 ] = 4.779477332e-14f; pm_pow_coeff[ 15 ] = 2.811457254e-15f; pm_pow_coeff[ 16 ] = 1.561920697e-16f; pm_pow_coeff[ 17 ] = 8.220635247e-18f; pm_pow_coeff[ 18 ] = 4.110317623e-19f; pm_log_coeff[ 0 ] = 0.333333333f; /* 1/3 */ pm_log_coeff[ 1 ] = 0.2f; /* 1/5 */ pm_log_coeff[ 2 ] = 0.142857143f; /* 1/7 */ pm_log_coeff[ 3 ] = 0.111111111f; /* 1/9 */ pm_log_coeff[ 4 ] = 9.090909091e-2f; /* 1/11 */ pm_log_coeff[ 5 ] = 7.692307692e-2f; /* 1/13 */ pm_log_coeff[ 6 ] = 6.666666667e-2f; /* 1/15 */ pm_log_coeff[ 7 ] = 5.882352941e-2f; /* 1/17 */ pm_log_coeff[ 8 ] = 5.263157895e-2f; /* 1/19 */ pm_log_coeff[ 9 ] = 4.761904762e-2f; /* 1/21 */ pm_log_coeff[ 10 ] = 4.347826087e-2f; /* 1/23 */ pm_log_coeff[ 11 ] = 0.04f; /* 1/25 */ pm_log_coeff[ 12 ] = 3.703703704e-2f; /* 1/27 */ pm_log_coeff[ 13 ] = 3.448275862e-2f; /* 1/29 */ pm_log_coeff[ 14 ] = 3.225806452e-2f; /* 1/31 */ pm_log_coeff[ 15 ] = 3.030303030e-2f; /* 1/33 */ } /***********************************************************************/ /* This single fp pow base 10 function implements the corresponding Taylor series. The loop has been unrolled to save ops. */ /***********************************************************************/ float pm_pow10f ( float exp ) { float mul = exp * pm_LOG10; float const term = exp * pm_LOG10; float ans = 1.0f; float const *fptr = pm_pow_coeff; ans += mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; return ans; } /***********************************************************************/ /* This single fp log base 10 function implements the corresponding Taylor series. The loop has been unrolled to save ops. */ /***********************************************************************/ float pm_log10f ( float exp ) { float mul = ( exp - 1.0f ) / ( exp + 1.0f ); float ans = 0.0f; float const *fptr = pm_log_coeff; float const term = mul * mul; ans = mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; mul *= term; ans += *fptr++ * mul; ans *= 0.86858896381f; /* ans = ans * 2 / log(10) */ return ans; } float pm_fabs( float n ) { if ( n >= 0 ) return n; else return -n; } float pm_floor( float arg ) { if ( arg >= 0 ) return ( int )arg; return -( ( int )( -arg ) + 1 ); } float pm_ceil( float arg ) { if ( arg > 0 ) return ( int )( arg + 1 ); return ( int )( arg ); }