perf-test: best- and worst-case tests for evaluating fast-breakpoint performance (+ results).
git-svn-id: https://www4.informatik.uni-erlangen.de/i4svn/danceos/trunk/devel/fail@1745 8c4709b5-6ec9-48aa-a5cd-a96041d1645a
This commit is contained in:
17
src/experiments/perf-test/CMakeLists.txt
Normal file
17
src/experiments/perf-test/CMakeLists.txt
Normal file
@ -0,0 +1,17 @@
|
||||
set(EXPERIMENT_NAME perf-test)
|
||||
set(EXPERIMENT_TYPE PerfTestExperiment)
|
||||
configure_file(../instantiate-experiment.ah.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/instantiate-${EXPERIMENT_NAME}.ah @ONLY
|
||||
)
|
||||
|
||||
#experiment sources
|
||||
set(MY_EXPERIMENT_SRCS
|
||||
experiment.hpp
|
||||
experiment.cc
|
||||
)
|
||||
|
||||
#### include directories ####
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
## build library
|
||||
add_library(fail-${EXPERIMENT_NAME} ${MY_EXPERIMENT_SRCS})
|
||||
58
src/experiments/perf-test/experiment.cc
Normal file
58
src/experiments/perf-test/experiment.cc
Normal file
@ -0,0 +1,58 @@
|
||||
#include "util/Logger.hpp"
|
||||
#include "util/WallclockTimer.hpp"
|
||||
|
||||
#include "experiment.hpp"
|
||||
#include "sal/SALInst.hpp"
|
||||
#include "sal/Listener.hpp"
|
||||
#include "config/FailConfig.hpp"
|
||||
|
||||
// Check if configuration dependencies are satisfied:
|
||||
#if !defined(CONFIG_EVENT_BREAKPOINTS)
|
||||
#error This experiment just needs breakpoints. You may want to enable Fast-Breakpoints as well.
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
using namespace fail;
|
||||
|
||||
bool PerfTestExperiment::run()
|
||||
{
|
||||
Logger log("PERF", false);
|
||||
log << "Experiment started (measuring ellapsed time using wallclock timer)..." << endl;
|
||||
|
||||
// Performance tests:
|
||||
WallclockTimer tm;
|
||||
tm.startTimer();
|
||||
#if 1
|
||||
log << "Activated: CASE A (Best-Case)..." << endl;
|
||||
// Case A): A lot of non-BP listeners a only one (or none) BPs:
|
||||
const unsigned NON_BP_COUNT = 50;
|
||||
log << "Adding " << NON_BP_COUNT << " non-BP listeners..." << endl;
|
||||
MemReadListener mrl[NON_BP_COUNT];
|
||||
for (unsigned i = 0; i < NON_BP_COUNT; ++i) {
|
||||
mrl[i].setWatchAddress(static_cast<address_t>(-1));
|
||||
simulator.addListener(&mrl[i]);
|
||||
}
|
||||
log << "Adding one breakpoint listener and returning to simulator..." << endl;
|
||||
BPSingleListener bp(0x00003c34);
|
||||
simulator.addListenerAndResume(&bp);
|
||||
#else
|
||||
log << "Activated: CASE B (Worst-Case)..." << endl;
|
||||
// Case B): n (non matching) BP listeners and no other listener types
|
||||
const unsigned BP_COUNT = 50;
|
||||
log << "Adding " << BP_COUNT << " BPSingleListeners..." << endl;
|
||||
BPSingleListener bsl[BP_COUNT];
|
||||
for (unsigned i = 0; i < BP_COUNT; ++i) {
|
||||
bsl[i].setWatchInstructionPointer(0xFFFFFFF); // we do not want them to trigger...
|
||||
simulator.addListener(&bsl[i]);
|
||||
}
|
||||
log << "Adding final BPSingleListener and continuing simulation..." << endl;
|
||||
// This is required to terminate the experiment:
|
||||
BPSingleListener final(0x00003c34);
|
||||
simulator.addListenerAndResume(&final);
|
||||
#endif
|
||||
|
||||
tm.stopTimer();
|
||||
log << "Time elapsed: " << tm << "s. Done, Bye!" << endl;
|
||||
simulator.terminate();
|
||||
return true;
|
||||
}
|
||||
13
src/experiments/perf-test/experiment.hpp
Normal file
13
src/experiments/perf-test/experiment.hpp
Normal file
@ -0,0 +1,13 @@
|
||||
#ifndef __PERF_TEST_EXPERIMENT_HPP__
|
||||
#define __PERF_TEST_EXPERIMENT_HPP__
|
||||
|
||||
#include "efw/ExperimentFlow.hpp"
|
||||
|
||||
class PerfTestExperiment : public fail::ExperimentFlow {
|
||||
public:
|
||||
PerfTestExperiment() { }
|
||||
|
||||
bool run();
|
||||
};
|
||||
|
||||
#endif // __PERF_TEST_EXPERIMENT_HPP__
|
||||
103
src/experiments/perf-test/results.txt
Normal file
103
src/experiments/perf-test/results.txt
Normal file
@ -0,0 +1,103 @@
|
||||
****************************************************************************************************
|
||||
RESULTS:
|
||||
****************************************************************************************************
|
||||
(A) WITH FAST_BREAKPOINTS (Default mode):
|
||||
|
||||
hsc-simple (r1636) - phase 1:
|
||||
real 1m8.604s
|
||||
user 1m8.384s
|
||||
sys 0m0.132s
|
||||
|
||||
hsc-simple (r1636) - phase 2:
|
||||
real 0m0.591s
|
||||
user 0m0.064s
|
||||
sys 0m0.076s
|
||||
|
||||
perf-test (r1745): Best- vs. Worst-Case with Wallclock-Timer (NON_BP_COUNT = 50 and BP_COUNT = 50):
|
||||
Case A: 511.46s (= ~9min, around 5,6 times faster than (B).a)
|
||||
Case B: 4731.53s (= ~79min, around 1,1 times slower than (B).b)
|
||||
|
||||
----------------------------------------------
|
||||
|
||||
(B) WITHOUT FAST_BREAKPOINTS (Default mode):
|
||||
|
||||
hsc-simple (r1636) - phase 1:
|
||||
real 0m34.712s
|
||||
user 0m34.246s
|
||||
sys 0m00.148s
|
||||
|
||||
hsc-simple (r1636) - phase 2:
|
||||
real 0m0.429s
|
||||
user 0m0.048s
|
||||
sys 0m0.084s
|
||||
|
||||
perf-test (r1745): Best- vs. Worst-Case with Wallclock-Timer (NON_BP_COUNT = 50 and BP_COUNT = 50):
|
||||
Case A: 2853.63s (= 47min)
|
||||
Case B: 4214.03s (= 70min)
|
||||
|
||||
----------------------------------------------
|
||||
|
||||
(C) WITH FAST_BREAKPOINTS (Release mode):
|
||||
|
||||
hsc-simple (r1636) - phase 1:
|
||||
real 0m13.341s
|
||||
user 0m12.377s
|
||||
sys 0m00.168s
|
||||
|
||||
hsc-simple (r1636) - phase 2:
|
||||
real 0m0.506s
|
||||
user 0m0.032s
|
||||
sys 0m0.100s
|
||||
|
||||
perf-test (r1745): Best- vs. Worst-Case with Wallclock-Timer (NON_BP_COUNT = 50 and BP_COUNT = 50):
|
||||
Case A: 43.0115s (< 1min, around 7,5 times faster than (D).a)
|
||||
Case B: 385.547s (= ~6min, around 1,5 times faster than (D).b)
|
||||
|
||||
----------------------------------------------
|
||||
|
||||
(D) WITHOUT FAST_BREAKPOINTS (Release mode):
|
||||
|
||||
hsc-simple (r1636) - phase 1:
|
||||
real 0m28.806s
|
||||
user 0m28.214s
|
||||
sys 0m00.160s
|
||||
|
||||
hsc-simple (r1636) - phase 2:
|
||||
real 0m0.565s
|
||||
user 0m0.052s
|
||||
sys 0m0.084s
|
||||
|
||||
perf-test (r1745): Best- vs. Worst-Case with Wallclock-Timer (NON_BP_COUNT = 50 and BP_COUNT = 50):
|
||||
Case A: 321.594s (= ~5min)
|
||||
Case B: 587.698s (= ~9min)
|
||||
|
||||
****************************************************************************************************
|
||||
EVALUATION:
|
||||
****************************************************************************************************
|
||||
Note: These are just exemplary results based on the observed values (see above).
|
||||
|
||||
- The (former) BufferCache's enabled a speedup up to 2,5x (according to Martin Unzer).
|
||||
- hsc-simple: Fast-Breakpoints are only faster if compiled in Release mode (yields a
|
||||
speedup up to 2x).
|
||||
- hsc-simple: Unfortunately, they are also slower by a factor of 2, if compiled in
|
||||
Default-Mode (and probably in Debug mode, too).
|
||||
- perf-test: Except for case B in Default mode, Fast-Breakpoints enable a speedup that
|
||||
ranges from 1,5 to 7,5! For case B (in Default mode -> no optimization), the Fast-
|
||||
Breakpoint implementation slows down the overall execution speed by a factor of (only)
|
||||
1,1. However, for case A (Best-Case) we assume that the overall speedup (compared to
|
||||
the corresponding case where Fast-Breakpoints are switched off) will tend to rise
|
||||
when the experiment parameter NON_BP_COUNT is increased.
|
||||
|
||||
****************************************************************************************************
|
||||
POSSIBLE OPTIMIZATIONS:
|
||||
****************************************************************************************************
|
||||
Note: The following observations and conjectures are partly derived from the analysis of the
|
||||
callgrind profile (using kcachegrind).
|
||||
|
||||
(i) gather() should be inlined. (At the moment, this avoids an include cycle.)
|
||||
(ii) Bypass the construction of a ResultSet object (the bypass would avoid an additional iteration
|
||||
over the elements stored in the ResultSet itself), by calling makeActive in gather()
|
||||
(iii) Complete the implementation of the PerfVecSortedSingleBP class (uses binary search in IPs)
|
||||
|
||||
=> (i) won't effect the speed in Default and Debug mode. (ii) should enable a speedup in all
|
||||
cases. (iii) will only improve the speed when many *BPSingleListeners* are in use.
|
||||
Reference in New Issue
Block a user