perf-test: best- and worst-case tests for evaluating fast-breakpoint performance (+ results).
git-svn-id: https://www4.informatik.uni-erlangen.de/i4svn/danceos/trunk/devel/fail@1745 8c4709b5-6ec9-48aa-a5cd-a96041d1645a
This commit is contained in:
17
src/experiments/perf-test/CMakeLists.txt
Normal file
17
src/experiments/perf-test/CMakeLists.txt
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
set(EXPERIMENT_NAME perf-test)
|
||||||
|
set(EXPERIMENT_TYPE PerfTestExperiment)
|
||||||
|
configure_file(../instantiate-experiment.ah.in
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/instantiate-${EXPERIMENT_NAME}.ah @ONLY
|
||||||
|
)
|
||||||
|
|
||||||
|
#experiment sources
|
||||||
|
set(MY_EXPERIMENT_SRCS
|
||||||
|
experiment.hpp
|
||||||
|
experiment.cc
|
||||||
|
)
|
||||||
|
|
||||||
|
#### include directories ####
|
||||||
|
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||||
|
|
||||||
|
## build library
|
||||||
|
add_library(fail-${EXPERIMENT_NAME} ${MY_EXPERIMENT_SRCS})
|
||||||
58
src/experiments/perf-test/experiment.cc
Normal file
58
src/experiments/perf-test/experiment.cc
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
#include "util/Logger.hpp"
|
||||||
|
#include "util/WallclockTimer.hpp"
|
||||||
|
|
||||||
|
#include "experiment.hpp"
|
||||||
|
#include "sal/SALInst.hpp"
|
||||||
|
#include "sal/Listener.hpp"
|
||||||
|
#include "config/FailConfig.hpp"
|
||||||
|
|
||||||
|
// Check if configuration dependencies are satisfied:
|
||||||
|
#if !defined(CONFIG_EVENT_BREAKPOINTS)
|
||||||
|
#error This experiment just needs breakpoints. You may want to enable Fast-Breakpoints as well.
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace fail;
|
||||||
|
|
||||||
|
bool PerfTestExperiment::run()
|
||||||
|
{
|
||||||
|
Logger log("PERF", false);
|
||||||
|
log << "Experiment started (measuring ellapsed time using wallclock timer)..." << endl;
|
||||||
|
|
||||||
|
// Performance tests:
|
||||||
|
WallclockTimer tm;
|
||||||
|
tm.startTimer();
|
||||||
|
#if 1
|
||||||
|
log << "Activated: CASE A (Best-Case)..." << endl;
|
||||||
|
// Case A): A lot of non-BP listeners a only one (or none) BPs:
|
||||||
|
const unsigned NON_BP_COUNT = 50;
|
||||||
|
log << "Adding " << NON_BP_COUNT << " non-BP listeners..." << endl;
|
||||||
|
MemReadListener mrl[NON_BP_COUNT];
|
||||||
|
for (unsigned i = 0; i < NON_BP_COUNT; ++i) {
|
||||||
|
mrl[i].setWatchAddress(static_cast<address_t>(-1));
|
||||||
|
simulator.addListener(&mrl[i]);
|
||||||
|
}
|
||||||
|
log << "Adding one breakpoint listener and returning to simulator..." << endl;
|
||||||
|
BPSingleListener bp(0x00003c34);
|
||||||
|
simulator.addListenerAndResume(&bp);
|
||||||
|
#else
|
||||||
|
log << "Activated: CASE B (Worst-Case)..." << endl;
|
||||||
|
// Case B): n (non matching) BP listeners and no other listener types
|
||||||
|
const unsigned BP_COUNT = 50;
|
||||||
|
log << "Adding " << BP_COUNT << " BPSingleListeners..." << endl;
|
||||||
|
BPSingleListener bsl[BP_COUNT];
|
||||||
|
for (unsigned i = 0; i < BP_COUNT; ++i) {
|
||||||
|
bsl[i].setWatchInstructionPointer(0xFFFFFFF); // we do not want them to trigger...
|
||||||
|
simulator.addListener(&bsl[i]);
|
||||||
|
}
|
||||||
|
log << "Adding final BPSingleListener and continuing simulation..." << endl;
|
||||||
|
// This is required to terminate the experiment:
|
||||||
|
BPSingleListener final(0x00003c34);
|
||||||
|
simulator.addListenerAndResume(&final);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
tm.stopTimer();
|
||||||
|
log << "Time elapsed: " << tm << "s. Done, Bye!" << endl;
|
||||||
|
simulator.terminate();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
13
src/experiments/perf-test/experiment.hpp
Normal file
13
src/experiments/perf-test/experiment.hpp
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
#ifndef __PERF_TEST_EXPERIMENT_HPP__
|
||||||
|
#define __PERF_TEST_EXPERIMENT_HPP__
|
||||||
|
|
||||||
|
#include "efw/ExperimentFlow.hpp"
|
||||||
|
|
||||||
|
class PerfTestExperiment : public fail::ExperimentFlow {
|
||||||
|
public:
|
||||||
|
PerfTestExperiment() { }
|
||||||
|
|
||||||
|
bool run();
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // __PERF_TEST_EXPERIMENT_HPP__
|
||||||
103
src/experiments/perf-test/results.txt
Normal file
103
src/experiments/perf-test/results.txt
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
****************************************************************************************************
|
||||||
|
RESULTS:
|
||||||
|
****************************************************************************************************
|
||||||
|
(A) WITH FAST_BREAKPOINTS (Default mode):
|
||||||
|
|
||||||
|
hsc-simple (r1636) - phase 1:
|
||||||
|
real 1m8.604s
|
||||||
|
user 1m8.384s
|
||||||
|
sys 0m0.132s
|
||||||
|
|
||||||
|
hsc-simple (r1636) - phase 2:
|
||||||
|
real 0m0.591s
|
||||||
|
user 0m0.064s
|
||||||
|
sys 0m0.076s
|
||||||
|
|
||||||
|
perf-test (r1745): Best- vs. Worst-Case with Wallclock-Timer (NON_BP_COUNT = 50 and BP_COUNT = 50):
|
||||||
|
Case A: 511.46s (= ~9min, around 5,6 times faster than (B).a)
|
||||||
|
Case B: 4731.53s (= ~79min, around 1,1 times slower than (B).b)
|
||||||
|
|
||||||
|
----------------------------------------------
|
||||||
|
|
||||||
|
(B) WITHOUT FAST_BREAKPOINTS (Default mode):
|
||||||
|
|
||||||
|
hsc-simple (r1636) - phase 1:
|
||||||
|
real 0m34.712s
|
||||||
|
user 0m34.246s
|
||||||
|
sys 0m00.148s
|
||||||
|
|
||||||
|
hsc-simple (r1636) - phase 2:
|
||||||
|
real 0m0.429s
|
||||||
|
user 0m0.048s
|
||||||
|
sys 0m0.084s
|
||||||
|
|
||||||
|
perf-test (r1745): Best- vs. Worst-Case with Wallclock-Timer (NON_BP_COUNT = 50 and BP_COUNT = 50):
|
||||||
|
Case A: 2853.63s (= 47min)
|
||||||
|
Case B: 4214.03s (= 70min)
|
||||||
|
|
||||||
|
----------------------------------------------
|
||||||
|
|
||||||
|
(C) WITH FAST_BREAKPOINTS (Release mode):
|
||||||
|
|
||||||
|
hsc-simple (r1636) - phase 1:
|
||||||
|
real 0m13.341s
|
||||||
|
user 0m12.377s
|
||||||
|
sys 0m00.168s
|
||||||
|
|
||||||
|
hsc-simple (r1636) - phase 2:
|
||||||
|
real 0m0.506s
|
||||||
|
user 0m0.032s
|
||||||
|
sys 0m0.100s
|
||||||
|
|
||||||
|
perf-test (r1745): Best- vs. Worst-Case with Wallclock-Timer (NON_BP_COUNT = 50 and BP_COUNT = 50):
|
||||||
|
Case A: 43.0115s (< 1min, around 7,5 times faster than (D).a)
|
||||||
|
Case B: 385.547s (= ~6min, around 1,5 times faster than (D).b)
|
||||||
|
|
||||||
|
----------------------------------------------
|
||||||
|
|
||||||
|
(D) WITHOUT FAST_BREAKPOINTS (Release mode):
|
||||||
|
|
||||||
|
hsc-simple (r1636) - phase 1:
|
||||||
|
real 0m28.806s
|
||||||
|
user 0m28.214s
|
||||||
|
sys 0m00.160s
|
||||||
|
|
||||||
|
hsc-simple (r1636) - phase 2:
|
||||||
|
real 0m0.565s
|
||||||
|
user 0m0.052s
|
||||||
|
sys 0m0.084s
|
||||||
|
|
||||||
|
perf-test (r1745): Best- vs. Worst-Case with Wallclock-Timer (NON_BP_COUNT = 50 and BP_COUNT = 50):
|
||||||
|
Case A: 321.594s (= ~5min)
|
||||||
|
Case B: 587.698s (= ~9min)
|
||||||
|
|
||||||
|
****************************************************************************************************
|
||||||
|
EVALUATION:
|
||||||
|
****************************************************************************************************
|
||||||
|
Note: These are just exemplary results based on the observed values (see above).
|
||||||
|
|
||||||
|
- The (former) BufferCache's enabled a speedup up to 2,5x (according to Martin Unzer).
|
||||||
|
- hsc-simple: Fast-Breakpoints are only faster if compiled in Release mode (yields a
|
||||||
|
speedup up to 2x).
|
||||||
|
- hsc-simple: Unfortunately, they are also slower by a factor of 2, if compiled in
|
||||||
|
Default-Mode (and probably in Debug mode, too).
|
||||||
|
- perf-test: Except for case B in Default mode, Fast-Breakpoints enable a speedup that
|
||||||
|
ranges from 1,5 to 7,5! For case B (in Default mode -> no optimization), the Fast-
|
||||||
|
Breakpoint implementation slows down the overall execution speed by a factor of (only)
|
||||||
|
1,1. However, for case A (Best-Case) we assume that the overall speedup (compared to
|
||||||
|
the corresponding case where Fast-Breakpoints are switched off) will tend to rise
|
||||||
|
when the experiment parameter NON_BP_COUNT is increased.
|
||||||
|
|
||||||
|
****************************************************************************************************
|
||||||
|
POSSIBLE OPTIMIZATIONS:
|
||||||
|
****************************************************************************************************
|
||||||
|
Note: The following observations and conjectures are partly derived from the analysis of the
|
||||||
|
callgrind profile (using kcachegrind).
|
||||||
|
|
||||||
|
(i) gather() should be inlined. (At the moment, this avoids an include cycle.)
|
||||||
|
(ii) Bypass the construction of a ResultSet object (the bypass would avoid an additional iteration
|
||||||
|
over the elements stored in the ResultSet itself), by calling makeActive in gather()
|
||||||
|
(iii) Complete the implementation of the PerfVecSortedSingleBP class (uses binary search in IPs)
|
||||||
|
|
||||||
|
=> (i) won't effect the speed in Default and Debug mode. (ii) should enable a speedup in all
|
||||||
|
cases. (iii) will only improve the speed when many *BPSingleListeners* are in use.
|
||||||
Reference in New Issue
Block a user