new experiment: ecos_kernel_test

git-svn-id: https://www4.informatik.uni-erlangen.de/i4svn/danceos/trunk/devel/fail@1426 8c4709b5-6ec9-48aa-a5cd-a96041d1645a
2012-07-05 14:37:38 +00:00
parent 48a83137fa
commit 596f4c0644
9 changed files with 1050 additions and 0 deletions
--- a/src/experiments/ecos_kernel_test/CMakeLists.txt
+++ b/src/experiments/ecos_kernel_test/CMakeLists.txt
@ -0,0 +1,33 @@
+set(EXPERIMENT_NAME ecos_kernel_test)
+set(EXPERIMENT_TYPE EcosKernelTestExperiment)
+configure_file(../instantiate-experiment.ah.in
+               ${CMAKE_CURRENT_BINARY_DIR}/instantiate-${EXPERIMENT_NAME}.ah @ONLY
+)
+
+## Setup desired protobuf descriptions HERE ##
+set(MY_PROTOS 
+	ecos_kernel_test.proto
+)
+
+set(MY_CAMPAIGN_SRCS
+	experiment.hpp
+	experiment.cc
+	campaign.hpp
+	campaign.cc
+)
+
+#### PROTOBUFS ####
+find_package(Protobuf REQUIRED)
+include_directories(${PROTOBUF_INCLUDE_DIRS})
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+
+PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS ${MY_PROTOS})
+
+## Build library
+add_library(${EXPERIMENT_NAME} ${PROTO_SRCS} ${PROTO_HDRS} ${MY_CAMPAIGN_SRCS})
+add_dependencies(${EXPERIMENT_NAME} tracing)
+
+## This is the example's campaign server distributing experiment parameters
+add_executable(${EXPERIMENT_NAME}-server main.cc)
+target_link_libraries(${EXPERIMENT_NAME}-server ${EXPERIMENT_NAME} fail ${PROTOBUF_LIBRARY} ${Boost_THREAD_LIBRARY})   
+install(TARGETS ${EXPERIMENT_NAME}-server RUNTIME DESTINATION bin)
--- a/src/experiments/ecos_kernel_test/campaign.cc
+++ b/src/experiments/ecos_kernel_test/campaign.cc
@ -0,0 +1,430 @@
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <map>
+
+#include <boost/timer.hpp>
+
+#include "campaign.hpp"
+#include "experimentInfo.hpp"
+#include "cpn/CampaignManager.hpp"
+#include "util/Logger.hpp"
+#include "util/ProtoStream.hpp"
+#include "util/MemoryMap.hpp"
+
+#include "ecc_region.hpp"
+
+#include "../plugins/tracing/TracingPlugin.hpp"
+
+//#define PRUNING_DEBUG_OUTPUT
+
+using namespace std;
+using namespace fail;
+
+char const * const trace_filename = "trace.tc";
+char const * const results_filename = "ecos_kernel_test.csv";
+
+// equivalence class type: addr, [i1, i2]
+// addr: byte to inject a bit-flip into
+// [i1, i2]: interval of instruction numbers, counted from experiment
+//           begin
+struct equivalence_class {
+	address_t data_address;
+	int instr1, instr2;
+	address_t instr2_absolute; // FIXME we could record them all here
+};
+
+bool EcosKernelTestCampaign::run()
+{
+	Logger log("EcosKernelTest Campaign");
+
+	// non-destructive: due to the CSV header we can always manually recover
+	// from an accident (append mode)
+	ofstream results(results_filename, ios::out | ios::app);
+	if (!results.is_open()) {
+		log << "failed to open " << results_filename << endl;
+		return false;
+	}
+
+	log << "startup" << endl;
+
+	boost::timer t;
+
+	// load trace
+	ifstream tracef(trace_filename);
+	if (tracef.fail()) {
+		log << "couldn't open " << trace_filename << endl;
+		return false;
+	}
+	ProtoIStream ps(&tracef);
+
+	// a map of addresses of ECC protected objects
+	MemoryMap mm;
+	for (unsigned i = 0; i < sizeof(memoryMap)/sizeof(*memoryMap); ++i) {
+		mm.add(memoryMap[i][0], memoryMap[i][1]);
+	}
+
+	// set of equivalence classes that need one (rather: eight, one for
+	// each bit in that byte) experiment to determine them all
+	vector<equivalence_class> ecs_need_experiment;
+	// set of equivalence classes that need no experiment, because we know
+	// they'd be identical to the golden run
+	vector<equivalence_class> ecs_no_effect;
+
+#if 0
+	equivalence_class current_ec;
+
+	// map for efficient access when results come in
+	map<EcosKernelTestExperimentData *, unsigned> experiment_ecs;
+	// experiment count
+	int count = 0;
+
+	// XXX do it the other way around: iterate over trace, search addresses
+	//   -> one "open" EC for every address
+	// for every injection address ...
+	for (MemoryMap::iterator it = mm.begin(); it != mm.end(); ++it) {
+		//cerr << ".";
+		address_t data_address = *it;
+		current_ec.instr1 = 0;
+		int instr = 0;
+		address_t instr_absolute = 0; // FIXME this one probably should also be recorded ...
+		Trace_Event ev;
+		ps.reset();
+
+		// for every section in the trace between subsequent memory
+		// accesses to that address ...
+		while (ps.getNext(&ev) && instr < OOSTUBS_NUMINSTR) {
+			// instruction events just get counted
+			if (!ev.has_memaddr()) {
+				// new instruction
+				instr++;
+				instr_absolute = ev.ip();
+				continue;
+
+			// skip accesses to other data
+			// FIXME again, do it the other way around, and use mm.isMatching()!
+			} else if (ev.memaddr() + ev.width() <= data_address
+			        || ev.memaddr() > data_address) {
+				continue;
+
+			// skip zero-sized intervals: these can
+			// occur when an instruction accesses a
+			// memory location more than once
+			// (e.g., INC, CMPXCHG)
+			} else if (current_ec.instr1 > instr) {
+				continue;
+			}
+
+			// we now have an interval-terminating R/W
+			// event to the memaddr we're currently looking
+			// at:
+
+			// complete the equivalence interval
+			current_ec.instr2 = instr;
+			current_ec.instr2_absolute = instr_absolute;
+			current_ec.data_address = data_address;
+
+			if (ev.accesstype() == ev.READ) {
+				// a sequence ending with READ: we need
+				// to do one experiment to cover it
+				// completely
+				ecs_need_experiment.push_back(current_ec);
+#ifdef PRUNING_DEBUG_OUTPUT
+				cerr << dec << "EX " << current_ec.instr1 << " " << current_ec.instr2 << " " << current_ec.data_address << "\n";
+#endif
+
+				// instantly enqueue job: that way the job clients can already
+				// start working in parallel
+				EcosKernelTestExperimentData *d = new EcosKernelTestExperimentData;
+				// we pick the rightmost instruction in that interval
+				d->msg.set_instr_offset(current_ec.instr2);
+				d->msg.set_instr_address(current_ec.instr2_absolute);
+				d->msg.set_mem_addr(current_ec.data_address);
+
+				// store index into ecs_need_experiment
+				experiment_ecs[d] = ecs_need_experiment.size() - 1;
+
+				campaignmanager.addParam(d);
+				++count;
+			} else if (ev.accesstype() == ev.WRITE) {
+				// a sequence ending with WRITE: an
+				// injection anywhere here would have
+				// no effect.
+				ecs_no_effect.push_back(current_ec);
+#ifdef PRUNING_DEBUG_OUTPUT
+				cerr << dec << "NE " << current_ec.instr1 << " " << current_ec.instr2 << " " << current_ec.data_address << "\n";
+#endif
+			} else {
+				log << "WAT" << endl;
+			}
+
+			// next interval must start at next
+			// instruction; the aforementioned
+			// skipping mechanism wouldn't work
+			// otherwise
+			current_ec.instr1 = instr + 1;
+		}
+
+		// close the last interval:
+		// Why -1?  In most cases it does not make sense to inject before the
+		// very last instruction, as we won't execute it anymore.  This *only*
+		// makes sense if we also inject into parts of the result vector.  This
+		// is not the case in this experiment, and with -1 we'll get a
+		// result comparable to the non-pruned campaign.
+		// XXX still true for checksum-oostubs?
+		current_ec.instr2 = instr - 1;
+		current_ec.instr2_absolute = 0; // unknown
+		current_ec.data_address = data_address;
+		// zero-sized?  skip.
+		if (current_ec.instr1 > current_ec.instr2) {
+			continue;
+		}
+		// as the experiment ends, this byte is a "don't care":
+		ecs_no_effect.push_back(current_ec);
+#ifdef PRUNING_DEBUG_OUTPUT
+		cerr << dec << "NE " << current_ec.instr1 << " " << current_ec.instr2 << " " << current_ec.data_address << "\n";
+#endif
+	}
+#else
+	// map for efficient access when results come in
+	map<EcosKernelTestExperimentData *, unsigned> experiment_ecs;
+	// map for keeping one "open" EC for every address
+	map<address_t, equivalence_class> open_ecs;
+	// experiment count
+	int count = 0;
+
+	// instruction counter within trace
+	int instr = 0;
+
+	// fill open_ecs with one EC for every address
+	for (MemoryMap::iterator it = mm.begin(); it != mm.end(); ++it) {
+		open_ecs[*it].instr1 = instr;
+	}
+
+	// absolute address of current trace instruction
+	address_t instr_absolute = 0; // FIXME this one probably should also be recorded ...
+
+	Trace_Event ev;
+	// for every event in the trace ...
+	while (ps.getNext(&ev) && instr < ECOS_NUMINSTR) {
+		// instruction events just get counted
+		if (!ev.has_memaddr()) {
+			// new instruction
+			instr++;
+			instr_absolute = ev.ip();
+			continue;
+		}
+
+		// for each single byte in this memory access ...
+		for (address_t data_address = ev.memaddr(); data_address < ev.memaddr() + ev.width();
+			++data_address) {
+			// skip accesses to data outside our map of interesting addresses
+			map<address_t, equivalence_class>::iterator current_ec_it;
+			if ((current_ec_it = open_ecs.find(data_address)) == open_ecs.end()) {
+				continue;
+			}
+			equivalence_class& current_ec = current_ec_it->second;
+
+			// skip zero-sized intervals: these can occur when an instruction
+			// accesses a memory location more than once (e.g., INC, CMPXCHG)
+			if (current_ec.instr1 > instr) {
+				continue;
+			}
+
+			// we now have an interval-terminating R/W event to the memaddr
+			// we're currently looking at:
+
+			// complete the equivalence interval
+			current_ec.instr2 = instr;
+			current_ec.instr2_absolute = instr_absolute;
+			current_ec.data_address = data_address;
+
+			if (ev.accesstype() == ev.READ) {
+				// a sequence ending with READ: we need to do one experiment to
+				// cover it completely
+				ecs_need_experiment.push_back(current_ec);
+#ifdef PRUNING_DEBUG_OUTPUT
+				cerr << dec << "EX " << current_ec.instr1 << " " << current_ec.instr2 << " " << current_ec.data_address << "\n";
+#endif
+
+				// instantly enqueue job: that way the job clients can already
+				// start working in parallel
+				EcosKernelTestExperimentData *d = new EcosKernelTestExperimentData;
+				// we pick the rightmost instruction in that interval
+				d->msg.set_instr_offset(current_ec.instr2);
+				d->msg.set_instr_address(current_ec.instr2_absolute);
+				d->msg.set_mem_addr(current_ec.data_address);
+
+				// store index into ecs_need_experiment
+				experiment_ecs[d] = ecs_need_experiment.size() - 1;
+
+				campaignmanager.addParam(d);
+				++count;
+			} else if (ev.accesstype() == ev.WRITE) {
+				// a sequence ending with WRITE: an injection anywhere here
+				// would have no effect.
+				ecs_no_effect.push_back(current_ec);
+#ifdef PRUNING_DEBUG_OUTPUT
+				cerr << dec << "NE " << current_ec.instr1 << " " << current_ec.instr2 << " " << current_ec.data_address << "\n";
+#endif
+			} else {
+				log << "WAT" << endl;
+			}
+
+			// next interval must start at next instruction; the aforementioned
+			// skipping mechanism wouldn't work otherwise
+			current_ec.instr1 = instr + 1;
+		}
+	}
+
+	// close all open intervals (right end of the fault-space)
+	for (map<address_t, equivalence_class>::iterator current_ec_it = open_ecs.begin();
+	     current_ec_it != open_ecs.end(); ++current_ec_it) {
+		address_t data_address = current_ec_it->first;
+		equivalence_class& current_ec = current_ec_it->second;
+
+		// Why -1?  In most cases it does not make sense to inject before the
+		// very last instruction, as we won't execute it anymore.  This *only*
+		// makes sense if we also inject into parts of the result vector.  This
+		// is not the case in this experiment, and with -1 we'll get a result
+		// comparable to the non-pruned campaign.
+		// XXX still true for checksum-oostubs?
+
+		current_ec.instr2 = instr - 1;
+		current_ec.instr2_absolute = 0; // unknown
+		current_ec.data_address = data_address;
+
+		// zero-sized?  skip.
+		if (current_ec.instr1 > current_ec.instr2) {
+			continue;
+		}
+
+#if 0
+		// the run continues after the FI window, so do this experiment
+		// XXX this creates at least one experiment for *every* bit!
+		//     fix: full trace, limited FI window
+		ecs_need_experiment.push_back(current_ec);
+#ifdef PRUNING_DEBUG_OUTPUT
+		cerr << dec << "EX " << current_ec.instr1 << " " << current_ec.instr2 << " " << current_ec.data_address << "\n";
+#endif
+
+		// FIXME copy/paste, encapsulate this:
+		// instantly enqueue job: that way the job clients can already start
+		// working in parallel
+		EcosKernelTestExperimentData *d = new EcosKernelTestExperimentData;
+		// we pick the rightmost instruction in that interval
+		d->msg.set_instr_offset(current_ec.instr2);
+		//d->msg.set_instr_address(current_ec.instr2_absolute); // unknown!
+		d->msg.set_mem_addr(current_ec.data_address);
+
+		// store index into ecs_need_experiment
+		experiment_ecs[d] = ecs_need_experiment.size() - 1;
+
+		campaignmanager.addParam(d);
+		++count;
+#else
+		// as the experiment ends, this byte is a "don't care":
+		ecs_no_effect.push_back(current_ec);
+#ifdef PRUNING_DEBUG_OUTPUT
+		cerr << dec << "NE " << current_ec.instr1 << " " << current_ec.instr2 << " " << current_ec.data_address << "\n";
+#endif
+#endif
+	}
+	// conserve some memory
+	open_ecs.clear();
+#endif
+
+	campaignmanager.noMoreParameters();
+	log << "done enqueueing parameter sets (" << count << ")." << endl;
+
+	log << "equivalence classes generated:"
+	    << " need_experiment = " << ecs_need_experiment.size()
+	    << " no_effect = " << ecs_no_effect.size() << endl;
+
+	// statistics
+	unsigned long num_dumb_experiments = 0;
+	for (vector<equivalence_class>::const_iterator it = ecs_need_experiment.begin();
+	     it != ecs_need_experiment.end(); ++it) {
+		num_dumb_experiments += (*it).instr2 - (*it).instr1 + 1;
+	}
+	for (vector<equivalence_class>::const_iterator it = ecs_no_effect.begin();
+	     it != ecs_no_effect.end(); ++it) {
+		num_dumb_experiments += (*it).instr2 - (*it).instr1 + 1;
+	}
+	log << "pruning: reduced " << num_dumb_experiments * 8 <<
+	       " experiments to " << ecs_need_experiment.size() * 8 << endl;
+
+	// CSV header
+	results << "ec_instr1\tec_instr2\tec_instr2_absolute\tec_data_address\tbitnr\tbit_width\tresulttype\tecos_test_result\tfinish_reached\tlatest_ip\terror_corrected\tdetails" << endl;
+
+	// store no-effect "experiment" results
+	for (vector<equivalence_class>::const_iterator it = ecs_no_effect.begin();
+	     it != ecs_no_effect.end(); ++it) {
+		results
+		 << (*it).instr1 << "\t"
+		 << (*it).instr2 << "\t"
+		 << (*it).instr2_absolute << "\t" // incorrect in all but one case!
+		 << (*it).data_address << "\t"
+		 << "0\t" // this entry starts at bit 0 ...
+		 << "8\t" // ... and is 8 bits wide
+		 << "1\t"
+		 << "1\t" // dummy value (PASS): we didn't do any real experiments
+		 << "1\t"
+		 << "99\t" // dummy value: we didn't do any real experiments
+		 << "0\t\n";
+	}
+
+	// collect results
+	EcosKernelTestExperimentData *res;
+	int rescount = 0;
+	while ((res = static_cast<EcosKernelTestExperimentData *>(campaignmanager.getDone()))) {
+		rescount++;
+
+		map<EcosKernelTestExperimentData *, unsigned>::iterator it =
+			experiment_ecs.find(res);
+		if (it == experiment_ecs.end()) {
+			results << "WTF, didn't find res!" << endl;
+			log << "WTF, didn't find res!" << endl;
+			continue;
+		}
+		equivalence_class &ec = ecs_need_experiment[it->second];
+
+		// sanity check
+		if (ec.instr2 != res->msg.instr_offset()) {
+			results << "ec.instr2 != instr_offset" << endl;
+			log << "ec.instr2 != instr_offset" << endl;
+		}
+		if (res->msg.result_size() != 8) {
+			results << "result_size " << res->msg.result_size()
+			        << " instr2 " << ec.instr2
+			        << " data_address " << ec.data_address << endl;
+			log << "result_size " << res->msg.result_size() << endl;
+		}
+
+		// one job contains 8 experiments
+		for (int idx = 0; idx < res->msg.result_size(); ++idx) {
+			results
+			// repeated for all single experiments:
+			 << ec.instr1 << "\t"
+			 << ec.instr2 << "\t"
+			 << ec.instr2_absolute << "\t"
+			 << ec.data_address << "\t"
+			// individual results:
+			 << res->msg.result(idx).bit_offset() << "\t"
+			 << "1\t" // 1 bit wide
+			 << res->msg.result(idx).resulttype() << "\t"
+			 << res->msg.result(idx).ecos_test_result() << "\t"
+			 << res->msg.result(idx).finish_reached() << "\t"
+			 << res->msg.result(idx).latest_ip() << "\t"
+			 << res->msg.result(idx).error_corrected() << "\t"
+			 << res->msg.result(idx).details() << "\n";
+		}
+		//delete res;	// currently racy if jobs are reassigned
+
+	}
+	results.close();
+	log << "done.  sent " << count << " received " << rescount << endl;
+	log << "elapsed: " << t.elapsed() << "s" << endl;
+
+	return true;
+}
--- a/src/experiments/ecos_kernel_test/campaign.hpp
+++ b/src/experiments/ecos_kernel_test/campaign.hpp
@ -0,0 +1,17 @@
+#pragma once
+
+#include "cpn/Campaign.hpp"
+#include "comm/ExperimentData.hpp"
+#include "ecos_kernel_test.pb.h"
+
+class EcosKernelTestExperimentData : public fail::ExperimentData {
+public:
+	EcosKernelTestProtoMsg msg;
+	EcosKernelTestExperimentData() : fail::ExperimentData(&msg) {}
+};
+
+class EcosKernelTestCampaign : public fail::Campaign {
+public:
+	virtual bool run();
+};
+
--- a/src/experiments/ecos_kernel_test/ecc_region.hpp
+++ b/src/experiments/ecos_kernel_test/ecc_region.hpp
@ -0,0 +1,82 @@
+// generated from STEP 0 output with region2array.sh
+static const unsigned memoryMap[][2] = {
+{0x9bec, 4},
+{0xade0, 4},
+{0xade4, 4},
+{0xade8, 4},
+{0xadf0, 4},
+{0xadf4, 4},
+{0xadf8, 4},
+{0xadfc, 4},
+{0xae00, 4},
+{0xae04, 4},
+{0xae08, 4},
+{0xae0c, 4},
+{0xae10, 4},
+{0xae14, 2},
+{0xae44, 4},
+{0xae48, 4},
+{0xae4c, 24},
+{0xae64, 4},
+{0xae68, 4},
+{0xae6c, 4},
+{0xae70, 4},
+{0xae74, 4},
+{0xae7c, 4},
+{0xae80, 4},
+{0xae84, 4},
+{0xae88, 4},
+{0xae8c, 4},
+{0xae90, 4},
+{0xae94, 4},
+{0xae98, 4},
+{0xae9c, 4},
+{0xaea0, 2},
+{0xaed0, 4},
+{0xaed4, 4},
+{0xaed8, 24},
+{0xaef0, 4},
+{0xaef4, 4},
+{0xca48, 4},
+{0xca60, 4},
+{0xca64, 4},
+{0xca68, 4},
+{0xca70, 4},
+{0xca74, 4},
+{0xca78, 4},
+{0xca7c, 4},
+{0xca80, 4},
+{0xca84, 4},
+{0xca88, 4},
+{0xca8c, 4},
+{0xca90, 4},
+{0xca94, 2},
+{0xcac4, 4},
+{0xcac8, 4},
+{0xcacc, 24},
+{0xcae4, 4},
+{0xcae8, 4},
+{0xd314, 4},
+{0xd318, 4},
+{0xd320, 4},
+{0xd324, 128},
+{0xd3a4, 4},
+{0xd3c0, 4},
+{0xd3c4, 4},
+{0xd3c8, 4},
+{0xd3d0, 4},
+{0xd3d4, 4},
+{0xd3d8, 4},
+{0xd3dc, 4},
+{0xd3e0, 4},
+{0xd3e4, 4},
+{0xd3e8, 4},
+{0xd3ec, 4},
+{0xd3f0, 4},
+{0xd3f4, 2},
+{0xd424, 4},
+{0xd428, 4},
+{0xd42c, 24},
+{0xd444, 4},
+{0xd448, 4},
+};
--- a/src/experiments/ecos_kernel_test/ecos_kernel_test.proto
+++ b/src/experiments/ecos_kernel_test/ecos_kernel_test.proto
@ -0,0 +1,57 @@
+message EcosKernelTestProtoMsg {
+	// Input: experiment parameters
+	// (client executes 8 experiments, one for each bit at mem_addr)
+
+	// FI at #instructions from experiment start
+	required int32 instr_offset = 1;
+	// the exact IP value at this point in time (from golden run)
+	optional int32 instr_address = 2; // for sanity checks
+	// address of the byte to inject bit-flips
+	required int32 mem_addr = 3;
+
+	// ----------------------------------------------------
+
+	// Output: experiment results
+
+	// IP where we did the injection: for debugging purposes, must be identical
+	// to instr_address
+	optional int32 injection_ip = 4;
+
+	repeated group Result = 5 {
+		// single experiment bit offset
+		required int32 bit_offset = 1;
+
+		// result type:
+		// FINISHED = planned number of instructions were executed
+		// TRAP = premature guest "crash"
+		// OUTSIDE = IP left text segment
+		enum ResultType {
+			FINISHED = 1;
+			TRAP = 2;
+			OUTSIDE = 3;
+			DETECTED = 4;
+			TIMEOUT = 5;
+			UNKNOWN = 6;
+		}
+		required ResultType resulttype = 2;
+
+		// especially interesting for TRAP/UNKNOWN: latest IP
+		required uint32 latest_ip = 3;
+
+		// test results
+		enum EcosTestResultType {
+			PASS = 1;
+			FAIL = 2;
+		}
+		required EcosTestResultType ecos_test_result = 4;
+
+		// was finish() ever reached?
+		optional bool finish_reached = 5;
+
+		// did ECC correct the fault?
+		optional int32 error_corrected = 6;
+
+		// optional textual description of what happened
+		optional string details = 7;
+	}
+}
--- a/src/experiments/ecos_kernel_test/experiment.cc
+++ b/src/experiments/ecos_kernel_test/experiment.cc
@ -0,0 +1,364 @@
+#include <iostream>
+#include <fstream>
+
+// getpid
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "util/Logger.hpp"
+#include "experiment.hpp"
+#include "experimentInfo.hpp"
+#include "campaign.hpp"
+#include "sal/SALConfig.hpp"
+#include "sal/SALInst.hpp"
+#include "sal/Memory.hpp"
+#include "sal/bochs/BochsRegister.hpp"
+#include "sal/bochs/BochsEvents.hpp"
+#include "sal/Event.hpp"
+
+// You need to have the tracing plugin enabled for this
+#include "../plugins/tracing/TracingPlugin.hpp"
+
+#include "ecc_region.hpp"
+
+#define LOCAL 0
+
+using namespace std;
+using namespace fail;
+
+// Check if configuration dependencies are satisfied:
+#if !defined(CONFIG_EVENT_BREAKPOINTS) || !defined(CONFIG_SR_RESTORE) || \
+    !defined(CONFIG_SR_SAVE) || !defined(CONFIG_EVENT_TRAP)
+  #error This experiment needs: breakpoints, traps, save, and restore. Enable these in the configuration.
+#endif
+
+bool EcosKernelTestExperiment::run()
+{
+	char const *statename = "ecos_kernel_test.state";
+	Logger log("eCos Kernel Test", false);
+	BPSingleEvent bp;
+	
+	log << "startup" << endl;
+
+#if 0
+	// STEP 0: record memory map with addresses of "interesting" objects
+	GuestEvent g;
+	while (true) {
+		simulator.addEventAndWait(&g);
+		cout << g.getData() << flush;
+	}
+#elif 0
+	// STEP 1: run until interesting function starts, and save state
+	bp.setWatchInstructionPointer(ECOS_FUNC_ENTRY);
+	simulator.addEventAndWait(&bp);
+	log << "test function entry reached, saving state" << endl;
+	log << "EIP = " << hex << bp.getTriggerInstructionPointer() << endl;
+	//log << "error_corrected = " << dec << ((int)simulator.getMemoryManager().getByte(OOSTUBS_ERROR_CORRECTED)) << endl;
+	simulator.save(statename);
+	assert(bp.getTriggerInstructionPointer() == ECOS_FUNC_ENTRY);
+	assert(simulator.getRegisterManager().getInstructionPointer() == ECOS_FUNC_ENTRY);
+#elif 0
+	// STEP 2: record trace for fault-space pruning
+	log << "restoring state" << endl;
+	simulator.restore(statename);
+	log << "EIP = " << hex << simulator.getRegisterManager().getInstructionPointer() << endl;
+	assert(simulator.getRegisterManager().getInstructionPointer() == ECOS_FUNC_ENTRY);
+
+	log << "enabling tracing" << endl;
+	TracingPlugin tp;
+
+	// restrict memory access logging to injection target
+	MemoryMap mm;
+	for (unsigned i = 0; i < sizeof(memoryMap)/sizeof(*memoryMap); ++i) {
+		mm.add(memoryMap[i][0], memoryMap[i][1]);
+	}
+	tp.restrictMemoryAddresses(&mm);
+
+	// record trace
+	char const *tracefile = "trace.tc";
+	ofstream of(tracefile);
+	tp.setTraceFile(&of);
+
+	// this must be done *after* configuring the plugin:
+	simulator.addFlow(&tp);
+
+#if 1
+	// trace WEATHER_NUMITER_TRACING measurement loop iterations
+	// -> calibration
+	bp.setWatchInstructionPointer(ECOS_FUNC_FINISH);
+	//bp.setCounter(WEATHER_NUMITER_TRACING); // single event, only
+#else
+	// FIXME this doesn't work properly: trace is one instruction too short as
+	//       tp is removed before all events were delivered
+	// trace WEATHER_NUMINSTR_TRACING instructions
+	// -> campaign-ready traces with identical lengths
+	bp.setWatchInstructionPointer(ANY_ADDR);
+	bp.setCounter(OOSTUBS_NUMINSTR);
+#endif
+	simulator.addEvent(&bp);
+	BPSingleEvent ev_count(ANY_ADDR);
+	simulator.addEvent(&ev_count);
+
+	// count instructions
+	// FIXME add SAL functionality for this?
+	int instr_counter = 0;
+	while (simulator.waitAny() == &ev_count) {
+		++instr_counter;
+		simulator.addEvent(&ev_count);
+	}
+
+	log << dec << "tracing finished after " << instr_counter  << " instructions" << endl;
+	
+
+	simulator.removeFlow(&tp);
+
+	// serialize trace to file
+	if (of.fail()) {
+		log << "failed to write " << tracefile << endl;
+		simulator.clearEvents(this);
+		return false;
+	}
+	of.close();
+	log << "trace written to " << tracefile << endl;
+	
+#elif 1
+	// STEP 3: The actual experiment.
+#if !LOCAL
+	for (int i = 0; i < 400; ++i) { // more than 400 will be very slow (500 is max)
+#endif
+
+	// get an experiment parameter set
+	log << "asking job server for experiment parameters" << endl;
+	EcosKernelTestExperimentData param;
+#if !LOCAL
+	if (!m_jc.getParam(param)) {
+		log << "Dying." << endl;
+		// communicate that we were told to die
+		simulator.terminate(1);
+	}
+#else
+	// XXX debug
+	param.msg.set_instr_offset(7462);
+	//param.msg.set_instr_address(12345);
+	param.msg.set_mem_addr(44540);
+#endif
+
+	int id = param.getWorkloadID();
+	int instr_offset = param.msg.instr_offset();
+	int mem_addr = param.msg.mem_addr();
+
+	// for each job we're actually doing *8* experiments (one for each bit)
+	for (int bit_offset = 0; bit_offset < 8; ++bit_offset) {
+		// 8 results in one job
+		EcosKernelTestProtoMsg_Result *result = param.msg.add_result();
+		result->set_bit_offset(bit_offset);
+		log << dec << "job " << id << " instr " << instr_offset
+		    << " mem " << mem_addr << "+" << bit_offset << endl;
+
+		log << "restoring state" << endl;
+		simulator.restore(statename);
+
+		// XXX debug
+/*
+		stringstream fname;
+		fname << "job." << ::getpid();
+		ofstream job(fname.str().c_str());
+		job << "job " << id << " instr " << instr_offset << " (" << param.msg.instr_address() << ") mem " << mem_addr << "+" << bit_offset << endl;
+		job.close();
+*/
+
+		// reaching finish() could happen before OR after FI
+		BPSingleEvent func_finish(ECOS_FUNC_FINISH);
+		simulator.addEvent(&func_finish);
+		bool finish_reached = false;
+
+		// no need to wait if offset is 0
+		if (instr_offset > 0) {
+			// XXX could be improved with intermediate states (reducing runtime until injection)
+			bp.setWatchInstructionPointer(ANY_ADDR);
+			bp.setCounter(instr_offset);
+			simulator.addEvent(&bp);
+
+			// finish() before FI?
+			if (simulator.waitAny() == &func_finish) {
+				finish_reached = true;
+				log << "experiment reached finish() before FI" << endl;
+
+				// wait for bp
+				simulator.waitAny();
+				//TODO: why wait here? it seems that something went completely wrong?
+			}
+		}
+
+		// --- fault injection ---
+		MemoryManager& mm = simulator.getMemoryManager();
+		byte_t data = mm.getByte(mem_addr);
+		byte_t newdata = data ^ (1 << bit_offset);
+		mm.setByte(mem_addr, newdata);
+		// note at what IP we did it
+		int32_t injection_ip = simulator.getRegisterManager().getInstructionPointer();
+		param.msg.set_injection_ip(injection_ip);
+		log << "fault injected @ ip " << injection_ip
+			<< " 0x" << hex << ((int)data) << " -> 0x" << ((int)newdata) << endl;
+		// sanity check
+		if (param.msg.has_instr_address() &&
+			injection_ip != param.msg.instr_address()) {
+			stringstream ss;
+			ss << "SANITY CHECK FAILED: " << injection_ip
+			   << " != " << param.msg.instr_address();
+			log << ss.str() << endl;
+			result->set_resulttype(result->UNKNOWN);
+			result->set_latest_ip(injection_ip);
+			result->set_details(ss.str());
+
+			simulator.clearEvents();
+			continue;
+		}
+
+		// --- aftermath ---
+		// possible outcomes:
+		// - trap, "crash"
+		// - jump outside text segment
+		// - (XXX unaligned jump inside text segment)
+		// - (XXX weird instructions?)
+		// - (XXX results displayed?)
+		// - reaches THE END
+		// - error detected, stop
+		// additional info:
+		// - #loop iterations before/after FI
+		// - (XXX "sane" display?)
+
+		// catch traps as "extraordinary" ending
+		TrapEvent ev_trap(ANY_TRAP);
+		simulator.addEvent(&ev_trap);
+		// jump outside text segment
+		BPRangeEvent ev_below_text(ANY_ADDR, ECOS_TEXT_START - 1);
+		BPRangeEvent ev_beyond_text(ECOS_TEXT_END + 1, ANY_ADDR);
+		simulator.addEvent(&ev_below_text);
+		simulator.addEvent(&ev_beyond_text);
+		// timeout (e.g., stuck in a HLT instruction)
+		// 10000us = 500000 instructions
+		TimerEvent ev_timeout(500000);
+		simulator.addEvent(&ev_timeout);
+
+		// remaining instructions until "normal" ending
+		BPSingleEvent ev_end(ANY_ADDR);
+		ev_end.setCounter(ECOS_NUMINSTR + ECOS_RECOVERYINSTR - instr_offset);
+		simulator.addEvent(&ev_end);
+		
+		// eCos' test output function, which will show if the test PASSed or FAILed
+		BPSingleEvent func_test_output(ECOS_FUNC_TEST_OUTPUT);
+		simulator.addEvent(&func_test_output);
+
+#if LOCAL && 0
+		// XXX debug
+		log << "enabling tracing" << endl;
+		TracingPlugin tp;
+		tp.setLogIPOnly(true);
+		tp.setOstream(&cout);
+		// this must be done *after* configuring the plugin:
+		simulator.addFlow(&tp);
+#endif
+
+		BaseEvent* ev = simulator.waitAny();
+
+		bool ecos_test_passed = false;
+		bool ecos_test_failed = false;
+
+		while ( (ev == &func_test_output) || (ev == &func_finish) ) {
+			// Do we reach finish() while waiting for ev_trap/ev_done?
+			if (ev == &func_finish) {
+				finish_reached = true;
+				log << "experiment reached finish()" << endl;
+			}
+			else if(ev == &func_test_output) {
+				// 1st argument of cyg_test_output shows what has happened (FAIL or PASS)
+				address_t stack_ptr = simulator.getRegisterManager().getStackPointer(); // esp
+				int32_t cyg_test_output_argument = simulator.getMemoryManager().getByte(stack_ptr + 4); // 1st argument is at esp+4
+				
+				log << "cyg_test_output_argument (#1): " << cyg_test_output_argument << endl;
+				
+				/*
+				typedef enum {
+					CYGNUM_TEST_FAIL,
+					CYGNUM_TEST_PASS,
+					CYGNUM_TEST_EXIT,
+					CYGNUM_TEST_INFO,
+					CYGNUM_TEST_GDBCMD,
+					CYGNUM_TEST_NA
+				} Cyg_test_code;
+				*/				
+				
+				if (cyg_test_output_argument == 0) {
+					ecos_test_failed = true;
+				} else if (cyg_test_output_argument == 1) {
+					ecos_test_passed = true;
+				}
+			}
+
+			// wait for ev_trap/ev_done
+			ev = simulator.waitAny();
+		}
+
+		// record latest IP regardless of result
+		result->set_latest_ip(simulator.getRegisterManager().getInstructionPointer());
+
+		// record finish_reached and error_corrected regardless of result
+		result->set_finish_reached(finish_reached);
+		int32_t error_corrected = simulator.getMemoryManager().getByte(ECOS_ERROR_CORRECTED);
+		result->set_error_corrected(error_corrected);
+		
+		// record ecos_test_result
+		if (ecos_test_failed) {
+			result->set_ecos_test_result(result->FAIL);
+		} else if (ecos_test_passed) {
+			result->set_ecos_test_result(result->PASS);
+		} else {
+			result->set_ecos_test_result(result->FAIL);
+		}
+
+		if (ev == &ev_end) {
+			log << dec << "Result FINISHED" << endl;
+			result->set_resulttype(result->FINISHED);
+		} else if (ev == &ev_timeout) {
+			log << "Result TIMEOUT" << endl;
+			result->set_resulttype(result->TIMEOUT);
+		} else if (ev == &ev_below_text || ev == &ev_beyond_text) {
+			log << "Result OUTSIDE" << endl;
+			result->set_resulttype(result->OUTSIDE);
+		} else if (ev == &ev_trap) {
+			log << dec << "Result TRAP #" << ev_trap.getTriggerNumber() << endl;
+			result->set_resulttype(result->TRAP);
+
+			stringstream ss;
+			ss << ev_trap.getTriggerNumber();
+			result->set_details(ss.str());
+		} else {
+			log << "Result WTF?" << endl;
+			result->set_resulttype(result->UNKNOWN);
+
+			stringstream ss;
+			ss << "eventid " << ev->getId() << " EIP " << simulator.getRegisterManager().getInstructionPointer();
+			result->set_details(ss.str());
+		}
+		// explicitly remove all events before we leave their scope
+		// FIXME event destructors should remove them from the queues
+		simulator.clearEvents();
+	}
+	// sanity check: do we have exactly 8 results?
+	if (param.msg.result_size() != 8) {
+		log << "WTF? param.msg.result_size() != 8" << endl;
+	} else {
+#if !LOCAL
+		m_jc.sendResult(param);
+#endif
+	}
+
+#if !LOCAL
+	}
+#endif
+
+#endif
+	// Explicitly terminate, or the simulator will continue to run.
+	simulator.terminate();
+}
--- a/src/experiments/ecos_kernel_test/experiment.hpp
+++ b/src/experiments/ecos_kernel_test/experiment.hpp
@ -0,0 +1,12 @@
+#pragma once
+  
+#include "efw/ExperimentFlow.hpp"
+#include "efw/JobClient.hpp"
+
+class EcosKernelTestExperiment : public fail::ExperimentFlow {
+	fail::JobClient m_jc;
+public:
+	EcosKernelTestExperiment() : m_jc("ios.cs.tu-dortmund.de") {}
+	bool run();
+};
+
--- a/src/experiments/ecos_kernel_test/experimentInfo.hpp
+++ b/src/experiments/ecos_kernel_test/experimentInfo.hpp
@ -0,0 +1,40 @@
+#pragma once
+
+// FIXME autogenerate this
+
+#if 1 // with ECC
+
+// the task function's entry address:
+// nm -C thread1 | fgrep cyg_start
+#define ECOS_FUNC_ENTRY		0x00003cc0
+// empty function that is called explicitly when the experiment finished
+// nm -C thread1 | fgrep cyg_test_exit
+#define ECOS_FUNC_FINISH		0x000058dc
+// nm -C thread1 | fgrep "cyg_test_output"
+#define ECOS_FUNC_TEST_OUTPUT	0x000058e4
+
+// nm -C thread1 | grep "_[se]text"
+#define ECOS_TEXT_START		0x00003000
+#define ECOS_TEXT_END		0x000092ce
+
+// number of instructions the target executes under non-error conditions from ENTRY to DONE:
+// (result of experiment's step #2)
+#define ECOS_NUMINSTR		12390
+// number of instructions that are executed additionally for error corrections
+// (this is a rough guess ... TODO)
+#define ECOS_RECOVERYINSTR	0x2000
+// the variable that's increased if ECC corrects an error:
+// nm -C thread1|fgrep errors_corrected
+#define ECOS_ERROR_CORRECTED	0x0010adec //FIXME TODO XXX
+
+#else // without ECC
+
+#define COOL_ECC_FUNC_ENTRY		0x00200a90
+#define COOL_ECC_CALCDONE		0x00200ab7
+#define COOL_ECC_NUMINSTR		97
+#define COOL_ECC_OBJUNDERTEST		0x0021263c
+#define COOL_ECC_OBJUNDERTEST_SIZE	10
+#define COOL_ECC_ERROR_CORRECTED	0x002127b0 // dummy
+
+#endif
+
--- a/src/experiments/ecos_kernel_test/main.cc
+++ b/src/experiments/ecos_kernel_test/main.cc
@ -0,0 +1,15 @@
+#include <iostream>
+#include <cstdlib>
+
+#include "cpn/CampaignManager.hpp"
+#include "campaign.hpp"
+
+int main(int argc, char **argv)
+{
+	EcosKernelTestCampaign c;
+	if (fail::campaignmanager.runCampaign(&c)) {
+		return 0;
+	} else {
+		return 1;
+	}
+}