Added weather-monitor-gem5 experiment, which is a clone of the weather-monitor experiment with only one run per fail* instance.

git-svn-id: https://www4.informatik.uni-erlangen.de/i4svn/danceos/trunk/devel/fail@2026 8c4709b5-6ec9-48aa-a5cd-a96041d1645a
2013-01-30 23:59:28 +00:00
parent 9c62e4a7f2
commit 7588834f41
10 changed files with 1119 additions and 0 deletions
--- a/src/experiments/weather-monitor-gem5/CMakeLists.txt
+++ b/src/experiments/weather-monitor-gem5/CMakeLists.txt
@ -0,0 +1,35 @@
+set(EXPERIMENT_NAME weather-monitor)
+set(EXPERIMENT_TYPE WeatherMonitorExperiment)
+configure_file(../instantiate-experiment.ah.in
+               ${CMAKE_CURRENT_BINARY_DIR}/instantiate-${EXPERIMENT_NAME}.ah @ONLY
+)
+
+## Setup desired protobuf descriptions HERE ##
+set(MY_PROTOS 
+	weathermonitor.proto
+)
+
+set(MY_CAMPAIGN_SRCS
+	experimentInfo.hpp
+	experiment.hpp
+	experiment.cc
+	campaign.hpp
+	campaign.cc
+)
+
+#### PROTOBUFS ####
+find_package(Protobuf REQUIRED)
+include_directories(${PROTOBUF_INCLUDE_DIRS})
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+
+PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS ${MY_PROTOS})
+
+## Build library
+add_library(fail-${EXPERIMENT_NAME} ${PROTO_SRCS} ${PROTO_HDRS} ${MY_CAMPAIGN_SRCS})
+add_dependencies(fail-${EXPERIMENT_NAME} fail-tracing)
+target_link_libraries(fail-${EXPERIMENT_NAME} ${PROTOBUF_LIBRARY})
+
+## This is the example's campaign server distributing experiment parameters
+add_executable(${EXPERIMENT_NAME}-server main.cc)
+target_link_libraries(${EXPERIMENT_NAME}-server fail-${EXPERIMENT_NAME} fail ${PROTOBUF_LIBRARY} ${Boost_THREAD_LIBRARY})   
+install(TARGETS ${EXPERIMENT_NAME}-server RUNTIME DESTINATION bin)
--- a/src/experiments/weather-monitor-gem5/campaign.cc
+++ b/src/experiments/weather-monitor-gem5/campaign.cc
@ -0,0 +1,440 @@
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <map>
+
+#include <boost/timer.hpp>
+
+#include "campaign.hpp"
+#include "experimentInfo.hpp"
+#include "cpn/CampaignManager.hpp"
+#include "util/Logger.hpp"
+#include "util/MemoryMap.hpp"
+#include "util/ProtoStream.hpp"
+
+#include "vptr_map.hpp"
+
+#include "../plugins/tracing/TracingPlugin.hpp"
+
+//#define PRUNING_DEBUG_OUTPUT
+
+using namespace std;
+using namespace fail;
+
+char const * const trace_filename   = "trace.tc" WEATHER_SUFFIX;
+char const * const results_filename = "weathermonitor" WEATHER_SUFFIX ".csv";
+
+// equivalence class type: addr, [i1, i2]
+// addr: byte to inject a bit-flip into
+// [i1, i2]: interval of instruction numbers, counted from experiment
+//           begin
+struct equivalence_class {
+	address_t data_address;
+	int instr1, instr2;
+	address_t instr2_absolute;
+};
+
+bool WeatherMonitorCampaign::run()
+{
+	Logger log("Weathermonitor Campaign");
+
+	// non-destructive: due to the CSV header we can always manually recover
+	// from an accident (append mode)
+	ofstream results(results_filename, ios::out | ios::app);
+	if (!results.is_open()) {
+		log << "failed to open " << results_filename << endl;
+		return false;
+	}
+
+	log << "startup" << endl;
+
+	boost::timer t;
+
+	// load trace
+	ifstream tracef(trace_filename);
+	if (tracef.fail()) {
+		log << "couldn't open " << trace_filename << endl;
+		return false;
+	}
+	ProtoIStream ps(&tracef);
+
+	// a map of FI data addresses
+	MemoryMap mm;
+	mm.add(WEATHER_DATA_START, WEATHER_DATA_END - WEATHER_DATA_START);
+
+	// set of equivalence classes that need one (rather: eight, one for
+	// each bit in that byte) experiment to determine them all
+	vector<equivalence_class> ecs_need_experiment;
+	// set of equivalence classes that need no experiment, because we know
+	// they'd be identical to the golden run
+	vector<equivalence_class> ecs_no_effect;
+
+#if 0
+	equivalence_class current_ec;
+
+	// map for efficient access when results come in
+	map<WeatherMonitorExperimentData *, unsigned> experiment_ecs;
+	// experiment count
+	int count = 0;
+
+	// XXX do it the other way around: iterate over trace, search addresses
+	//   -> one "open" EC for every address
+	// for every injection address ...
+	for (MemoryMap::iterator it = mm.begin(); it != mm.end(); ++it) {
+		//cerr << ".";
+		address_t data_address = *it;
+		current_ec.instr1 = 0;
+		int instr = 0;
+		address_t instr_absolute = 0; // FIXME this one probably should also be recorded ...
+		Trace_Event ev;
+		ps.reset();
+
+		// for every section in the trace between subsequent memory
+		// accesses to that address ...
+		while (ps.getNext(&ev) && instr < WEATHER_NUMINSTR_TRACING) {
+			// instruction events just get counted
+			if (!ev.has_memaddr()) {
+				// new instruction
+				instr++;
+				instr_absolute = ev.ip();
+				continue;
+
+			// skip accesses to other data
+			// FIXME again, do it the other way around, and use mm.isMatching()!
+			} else if (ev.memaddr() + ev.width() <= data_address
+			        || ev.memaddr() > data_address) {
+				continue;
+
+			// skip zero-sized intervals: these can
+			// occur when an instruction accesses a
+			// memory location more than once
+			// (e.g., INC, CMPXCHG)
+			} else if (current_ec.instr1 > instr) {
+				continue;
+			}
+
+			// we now have an interval-terminating R/W
+			// event to the memaddr we're currently looking
+			// at:
+
+			// complete the equivalence interval
+			current_ec.instr2 = instr;
+			current_ec.instr2_absolute = instr_absolute;
+			current_ec.data_address = data_address;
+
+			if (ev.accesstype() == ev.READ) {
+				// a sequence ending with READ: we need
+				// to do one experiment to cover it
+				// completely
+				ecs_need_experiment.push_back(current_ec);
+#ifdef PRUNING_DEBUG_OUTPUT
+				cerr << dec << "EX " << current_ec.instr1 << " " << current_ec.instr2 << " " << current_ec.data_address << "\n";
+#endif
+
+				// instantly enqueue job: that way the job clients can already
+				// start working in parallel
+				WeatherMonitorExperimentData *d = new WeatherMonitorExperimentData;
+				// we pick the rightmost instruction in that interval
+				d->msg.set_instr_offset(current_ec.instr2);
+				d->msg.set_instr_address(current_ec.instr2_absolute);
+				d->msg.set_mem_addr(current_ec.data_address);
+
+				// store index into ecs_need_experiment
+				experiment_ecs[d] = ecs_need_experiment.size() - 1;
+
+				campaignmanager.addParam(d);
+				++count;
+			} else if (ev.accesstype() == ev.WRITE) {
+				// a sequence ending with WRITE: an
+				// injection anywhere here would have
+				// no effect.
+				ecs_no_effect.push_back(current_ec);
+#ifdef PRUNING_DEBUG_OUTPUT
+				cerr << dec << "NE " << current_ec.instr1 << " " << current_ec.instr2 << " " << current_ec.data_address << "\n";
+#endif
+			} else {
+				log << "WAT" << endl;
+			}
+
+			// next interval must start at next
+			// instruction; the aforementioned
+			// skipping mechanism wouldn't work
+			// otherwise
+			current_ec.instr1 = instr + 1;
+		}
+
+		// close the last interval:
+		// Why -1?  In most cases it does not make sense to inject before the
+		// very last instruction, as we won't execute it anymore.  This *only*
+		// makes sense if we also inject into parts of the result vector.  This
+		// is not the case in this experiment, and with -1 we'll get a
+		// result comparable to the non-pruned campaign.
+		// XXX still true for weathermonitor?
+		current_ec.instr2 = instr - 1;
+		current_ec.instr2_absolute = 0; // unknown
+		current_ec.data_address = data_address;
+		// zero-sized?  skip.
+		if (current_ec.instr1 > current_ec.instr2) {
+			continue;
+		}
+		// the run continues after the FI window, so do this experiment
+		// XXX this creates at least one experiment for *every* bit!
+		//     fix: full trace, limited FI window
+		//ecs_no_effect.push_back(current_ec);
+		ecs_need_experiment.push_back(current_ec);
+#ifdef PRUNING_DEBUG_OUTPUT
+		cerr << dec << "EX " << current_ec.instr1 << " " << current_ec.instr2 << " " << current_ec.data_address << "\n";
+#endif
+
+		// FIXME copy/paste, encapsulate this:
+		// instantly enqueue job: that way the job clients can already
+		// start working in parallel
+		WeatherMonitorExperimentData *d = new WeatherMonitorExperimentData;
+		// we pick the rightmost instruction in that interval
+		d->msg.set_instr_offset(current_ec.instr2);
+		//d->msg.set_instr_address(current_ec.instr2_absolute); // unknown!
+		d->msg.set_mem_addr(current_ec.data_address);
+
+		// store index into ecs_need_experiment
+		experiment_ecs[d] = ecs_need_experiment.size() - 1;
+
+		campaignmanager.addParam(d);
+		++count;
+	}
+#else
+	// map for efficient access when results come in
+	map<WeatherMonitorExperimentData *, unsigned> experiment_ecs;
+	// map for keeping one "open" EC for every address
+	map<address_t, equivalence_class> open_ecs;
+	// experiment count
+	int count = 0;
+
+	// instruction counter within trace
+	int instr = 0;
+
+	// fill open_ecs with one EC for every address
+	for (MemoryMap::iterator it = mm.begin(); it != mm.end(); ++it) {
+		open_ecs[*it].instr1 = instr;
+	}
+
+	// absolute address of current trace instruction
+	address_t instr_absolute = 0; // FIXME this one probably should also be recorded ...
+
+	Trace_Event ev;
+	// for every event in the trace ...
+	while (ps.getNext(&ev) && instr < WEATHER_NUMINSTR_TRACING) {
+		// instruction events just get counted
+		if (!ev.has_memaddr()) {
+			// new instruction
+			instr++;
+			instr_absolute = ev.ip();
+			continue;
+		}
+
+		// for each single byte in this memory access ...
+		for (address_t data_address = ev.memaddr(); data_address < ev.memaddr() + ev.width();
+			++data_address) {
+			// skip accesses to data outside our map of interesting addresses
+			map<address_t, equivalence_class>::iterator current_ec_it;
+			if ((current_ec_it = open_ecs.find(data_address)) == open_ecs.end()) {
+				continue;
+			}
+			equivalence_class& current_ec = current_ec_it->second;
+
+			// skip zero-sized intervals: these can occur when an instruction
+			// accesses a memory location more than once (e.g., INC, CMPXCHG)
+			if (current_ec.instr1 > instr) {
+				continue;
+			}
+
+			// we now have an interval-terminating R/W event to the memaddr
+			// we're currently looking at:
+
+			// complete the equivalence interval
+			current_ec.instr2 = instr;
+			current_ec.instr2_absolute = instr_absolute;
+			current_ec.data_address = data_address;
+
+			if (ev.accesstype() == ev.READ) {
+				// a sequence ending with READ: we need to do one experiment to
+				// cover it completely
+				ecs_need_experiment.push_back(current_ec);
+#ifdef PRUNING_DEBUG_OUTPUT
+				cerr << dec << "EX " << current_ec.instr1 << " " << current_ec.instr2 << " " << current_ec.data_address << "\n";
+#endif
+
+				// instantly enqueue job: that way the job clients can already
+				// start working in parallel
+				WeatherMonitorExperimentData *d = new WeatherMonitorExperimentData;
+				// we pick the rightmost instruction in that interval
+				d->msg.set_instr_offset(current_ec.instr2);
+				d->msg.set_instr_address(current_ec.instr2_absolute);
+				d->msg.set_mem_addr(current_ec.data_address);
+
+				// store index into ecs_need_experiment
+				experiment_ecs[d] = ecs_need_experiment.size() - 1;
+
+				campaignmanager.addParam(d);
+				++count;
+			} else if (ev.accesstype() == ev.WRITE) {
+				// a sequence ending with WRITE: an injection anywhere here
+				// would have no effect.
+				ecs_no_effect.push_back(current_ec);
+#ifdef PRUNING_DEBUG_OUTPUT
+				cerr << dec << "NE " << current_ec.instr1 << " " << current_ec.instr2 << " " << current_ec.data_address << "\n";
+#endif
+			} else {
+				log << "WAT" << endl;
+			}
+
+			// next interval must start at next instruction; the aforementioned
+			// skipping mechanism wouldn't work otherwise
+			current_ec.instr1 = instr + 1;
+		}
+	}
+
+	// close all open intervals (right end of the fault-space)
+	for (map<address_t, equivalence_class>::iterator current_ec_it = open_ecs.begin();
+	     current_ec_it != open_ecs.end(); ++current_ec_it) {
+		address_t data_address = current_ec_it->first;
+		equivalence_class& current_ec = current_ec_it->second;
+
+		// Why -1?  In most cases it does not make sense to inject before the
+		// very last instruction, as we won't execute it anymore.  This *only*
+		// makes sense if we also inject into parts of the result vector.  This
+		// is not the case in this experiment, and with -1 we'll get a result
+		// comparable to the non-pruned campaign.
+		// XXX still true for weathermonitor?
+		
+		current_ec.instr2 = instr - 1;
+		current_ec.instr2_absolute = 0; // unknown
+		current_ec.data_address = data_address;
+
+		// zero-sized?  skip.
+		if (current_ec.instr1 > current_ec.instr2) {
+			continue;
+		}
+
+		// the run continues after the FI window, so do this experiment
+		// XXX this creates at least one experiment for *every* bit!
+		//     fix: full trace, limited FI window
+		//ecs_no_effect.push_back(current_ec);
+		ecs_need_experiment.push_back(current_ec);
+#ifdef PRUNING_DEBUG_OUTPUT
+		cerr << dec << "EX " << current_ec.instr1 << " " << current_ec.instr2 << " " << current_ec.data_address << "\n";
+#endif
+
+		// FIXME copy/paste, encapsulate this:
+		// instantly enqueue job: that way the job clients can already start
+		// working in parallel
+		WeatherMonitorExperimentData *d = new WeatherMonitorExperimentData;
+		// we pick the rightmost instruction in that interval
+		d->msg.set_instr_offset(current_ec.instr2);
+		//d->msg.set_instr_address(current_ec.instr2_absolute); // unknown!
+		d->msg.set_mem_addr(current_ec.data_address);
+
+		// store index into ecs_need_experiment
+		experiment_ecs[d] = ecs_need_experiment.size() - 1;
+
+		campaignmanager.addParam(d);
+		++count;
+	}
+	// conserve some memory
+	open_ecs.clear();
+#endif
+
+	campaignmanager.noMoreParameters();
+	log << "done enqueueing parameter sets (" << count << ")." << endl;
+
+	log << "equivalence classes generated:"
+	    << " need_experiment = " << ecs_need_experiment.size()
+	    << " no_effect = " << ecs_no_effect.size() << endl;
+
+	// statistics
+	unsigned long num_dumb_experiments = 0;
+	for (vector<equivalence_class>::const_iterator it = ecs_need_experiment.begin();
+	     it != ecs_need_experiment.end(); ++it) {
+		num_dumb_experiments += (*it).instr2 - (*it).instr1 + 1;
+	}
+	for (vector<equivalence_class>::const_iterator it = ecs_no_effect.begin();
+	     it != ecs_no_effect.end(); ++it) {
+		num_dumb_experiments += (*it).instr2 - (*it).instr1 + 1;
+	}
+	log << "pruning: reduced " << num_dumb_experiments * 8 <<
+	       " experiments to " << ecs_need_experiment.size() * 8 << endl;
+
+	// CSV header
+	results << "ec_instr1\tec_instr2\tec_instr2_absolute\tec_data_address\tbitnr\tbit_width\tresulttype\tlatest_ip\titer1\titer2\tdetails" << endl;
+
+	// store no-effect "experiment" results
+	for (vector<equivalence_class>::const_iterator it = ecs_no_effect.begin();
+	     it != ecs_no_effect.end(); ++it) {
+		results
+		 << (*it).instr1 << "\t"
+		 << (*it).instr2 << "\t"
+		 << (*it).instr2_absolute << "\t" // incorrect in all but one case!
+		 << (*it).data_address << "\t"
+		 << "0\t" // this entry starts at bit 0 ...
+		 << "8\t" // ... and is 8 bits wide
+		 << "1\t"
+		 << "99\t" // dummy value: we didn't do any real experiments
+		 << "0\t"
+		 << (WEATHER_NUMITER_TRACING + WEATHER_NUMITER_AFTER) << "\t\n";
+	}
+
+	// collect results
+	WeatherMonitorExperimentData *res;
+	int rescount = 0;
+	while ((res = static_cast<WeatherMonitorExperimentData *>(campaignmanager.getDone()))) {
+		rescount++;
+
+		map<WeatherMonitorExperimentData *, unsigned>::iterator it =
+			experiment_ecs.find(res);
+		if (it == experiment_ecs.end()) {
+			results << "WTF, didn't find res!" << endl;
+			log << "WTF, didn't find res!" << endl;
+			continue;
+		}
+		equivalence_class &ec = ecs_need_experiment[it->second];
+
+		// sanity check
+		if (ec.instr2 != res->msg.instr_offset()) {
+			results << "ec.instr2 != instr_offset" << endl;
+			log << "ec.instr2 != instr_offset" << endl;
+		}
+		// We are only sending one result while using gem5
+		#if 0
+		if (res->msg.result_size() != 8) {
+			results << "result_size " << res->msg.result_size()
+			        << " instr2 " << ec.instr2
+			        << " data_address " << ec.data_address << endl;
+			log << "result_size " << res->msg.result_size() << endl;
+		}
+		#endif
+
+		// one job contains 8 experiments
+		for (int idx = 0; idx < res->msg.result_size(); ++idx) {
+			//results << "ec_instr1\tec_instr2\tec_instr2_absolute\tec_data_address\tbitnr\tresulttype\tlatest_ip\titer1\titer2\tdetails" << endl;
+			results
+			// repeated for all single experiments:
+			 << ec.instr1 << "\t"
+			 << ec.instr2 << "\t"
+			 << ec.instr2_absolute << "\t"
+			 << ec.data_address << "\t"
+			// individual results:
+			 << res->msg.result(idx).bit_offset() << "\t"
+			 << "1\t" // 1 bit wide
+			 << res->msg.result(idx).resulttype() << "\t"
+			 << res->msg.result(idx).latest_ip() << "\t"
+			 << res->msg.result(idx).iter_before_fi() << "\t"
+			 << res->msg.result(idx).iter_after_fi() << "\t"
+			 << res->msg.result(idx).details() << "\n";
+		}
+		//delete res;	// currently racy if jobs are reassigned
+	}
+	results.close();
+	log << "done.  sent " << count << " received " << rescount << endl;
+	log << "elapsed: " << t.elapsed() << "s" << endl;
+
+	return true;
+}
--- a/src/experiments/weather-monitor-gem5/campaign.hpp
+++ b/src/experiments/weather-monitor-gem5/campaign.hpp
@ -0,0 +1,19 @@
+#ifndef __WEATHERMONITOR_CAMPAIGN_HPP__
+  #define __WEATHERMONITOR_CAMPAIGN_HPP__
+
+#include "cpn/Campaign.hpp"
+#include "comm/ExperimentData.hpp"
+#include "weathermonitor.pb.h"
+
+class WeatherMonitorExperimentData : public fail::ExperimentData {
+public:
+	WeathermonitorProtoMsg msg;
+	WeatherMonitorExperimentData() : fail::ExperimentData(&msg) {}
+};
+
+class WeatherMonitorCampaign : public fail::Campaign {
+public:
+	virtual bool run();
+};
+
+#endif // __WEATHERMONITOR_CAMPAIGN_HPP__
--- a/src/experiments/weather-monitor-gem5/experiment.cc
+++ b/src/experiments/weather-monitor-gem5/experiment.cc
@ -0,0 +1,341 @@
+#include <iostream>
+
+// getpid
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "util/Logger.hpp"
+
+#include "experiment.hpp"
+#include "experimentInfo.hpp"
+#include "campaign.hpp"
+
+#include "sal/SALConfig.hpp"
+#include "sal/SALInst.hpp"
+#include "sal/Memory.hpp"
+#include "sal/Listener.hpp"
+
+// you need to have the tracing plugin enabled for this
+#include "../plugins/tracing/TracingPlugin.hpp"
+
+#include "vptr_map.hpp"
+
+#define LOCAL 0
+
+using namespace std;
+using namespace fail;
+
+// Check if configuration dependencies are satisfied:
+#if !defined(CONFIG_EVENT_BREAKPOINTS) || !defined(CONFIG_SR_RESTORE) || \
+    !defined(CONFIG_SR_SAVE) || !defined(CONFIG_EVENT_TRAP)
+  #error This experiment needs: breakpoints, traps, save, and restore. Enable these in the configuration.
+#endif
+
+bool WeatherMonitorExperiment::run()
+{
+	char const *statename = "bochs.state" WEATHER_SUFFIX;
+	Logger log("Weathermonitor", false);
+	BPSingleListener bp;
+	
+	log << "startup" << endl;
+
+#if 1
+	// STEP 0: record memory map with vptr addresses
+	GuestListener g;
+	while (true) {
+		simulator.addListenerAndResume(&g);
+		cout << g.getData() << flush;
+	}
+#elif 0
+	// STEP 1: run until interesting function starts, and save state
+	bp.setWatchInstructionPointer(WEATHER_FUNC_MAIN);
+	simulator.addListenerAndResume(&bp);
+	log << "test function entry reached, saving state" << endl;
+	log << "EIP = " << hex << bp.getTriggerInstructionPointer() << endl;
+	simulator.save(statename);
+	assert(bp.getTriggerInstructionPointer() == WEATHER_FUNC_MAIN);
+	assert(simulator.getCPU(0).getInstructionPointer() == WEATHER_FUNC_MAIN);
+
+	// STEP 2: record trace for fault-space pruning
+	log << "restoring state" << endl;
+	simulator.restore(statename);
+	log << "EIP = " << hex << simulator.getCPU(0).getInstructionPointer() << endl;
+	assert(simulator.getCPU(0).getInstructionPointer() == WEATHER_FUNC_MAIN);
+
+	log << "enabling tracing" << endl;
+	TracingPlugin tp;
+
+	// TODO: record max(ESP)
+
+	// restrict memory access logging to injection target
+	MemoryMap mm;
+	mm.add(WEATHER_DATA_START, WEATHER_DATA_END - WEATHER_DATA_START);
+	tp.restrictMemoryAddresses(&mm);
+	//tp.setLogIPOnly(true);
+
+	// record trace
+	char const *tracefile = "trace.tc" WEATHER_SUFFIX;
+	ofstream of(tracefile);
+	tp.setTraceFile(&of);
+
+	// this must be done *after* configuring the plugin:
+	simulator.addFlow(&tp);
+
+#if 1
+	// trace WEATHER_NUMITER_TRACING measurement loop iterations
+	// -> calibration
+	bp.setWatchInstructionPointer(WEATHER_FUNC_WAIT_END);
+	bp.setCounter(WEATHER_NUMITER_TRACING);
+#else
+	// FIXME this doesn't work properly: trace is one instruction too short as
+	//       tp is removed before all events were delivered
+	// trace WEATHER_NUMINSTR_TRACING instructions
+	// -> campaign-ready traces with identical lengths
+	bp.setWatchInstructionPointer(ANY_ADDR);
+	bp.setCounter(WEATHER_NUMINSTR_TRACING);
+#endif
+	simulator.addListener(&bp);
+	BPSingleListener ev_count(ANY_ADDR);
+	simulator.addListener(&ev_count);
+
+	// count instructions
+	// FIXME add SAL functionality for this?
+	int instr_counter = 0;
+	while (simulator.resume() == &ev_count) {
+		++instr_counter;
+		simulator.addListener(&ev_count);
+	}
+
+	log << dec << "tracing finished after " << instr_counter
+	    << " instructions, seeing wait_end " << WEATHER_NUMITER_TRACING << " times" << endl;
+	simulator.removeFlow(&tp);
+
+	// serialize trace to file
+	if (of.fail()) {
+		log << "failed to write " << tracefile << endl;
+		simulator.clearListeners(this); // cleanup
+		return false;
+	}
+	of.close();
+	log << "trace written to " << tracefile << endl;
+
+	// wait another WEATHER_NUMITER_AFTER measurement loop iterations
+	bp.setWatchInstructionPointer(WEATHER_FUNC_WAIT_END);
+	bp.setCounter(WEATHER_NUMITER_AFTER);
+	simulator.addListener(&bp);
+
+	// count instructions
+	// FIXME add SAL functionality for this?
+	instr_counter = 0;
+	while (simulator.resume() == &ev_count) {
+		++instr_counter;
+		simulator.addListener(&ev_count);
+	}
+
+	log << dec << "experiment finished after " << instr_counter
+	    << " instructions, seeing wait_end " << WEATHER_NUMITER_AFTER << " times" << endl;
+
+#elif 0
+	// STEP 3: The actual experiment.
+#if !LOCAL
+	// Without restore() we can only do one experiment
+#endif
+
+	// get an experiment parameter set
+	log << "asking job server for experiment parameters" << endl;
+	WeatherMonitorExperimentData param;
+#if !LOCAL
+	if (!m_jc.getParam(param)) {
+		log << "Dying." << endl;
+		// communicate that we were told to die
+		simulator.terminate(1);
+	}
+#else
+	// XXX debug
+	param.msg.set_instr_offset(1000);
+	//param.msg.set_instr_address(12345);
+	param.msg.set_mem_addr(0x00103bdc);
+#endif
+
+	int id = param.getWorkloadID();
+	int instr_offset = param.msg.instr_offset();
+	int mem_addr = param.msg.mem_addr();
+	// Choose the bit_offset for this gem5 build. 
+	// To test all 8 bits, 8 campaign runs are needed.
+	int bit_offset = 0;
+
+	
+		// 8 results in one job
+		WeathermonitorProtoMsg_Result *result = param.msg.add_result();
+		result->set_bit_offset(bit_offset);
+		log << dec << "job " << id << " instr " << instr_offset
+		    << " mem " << mem_addr << "+" << bit_offset << endl;
+
+		// Instead of restore, the guest is running still it hits the main function
+		log << "Run till main()" << endl;
+		BPSingleListener mainbp(WEATHER_FUNC_MAIN);
+		simulator.addListenerAndResume(&mainbp);
+		log << "main() reached" << endl;
+
+
+
+		// XXX debug
+/*
+		stringstream fname;
+		fname << "job." << ::getpid();
+		ofstream job(fname.str().c_str());
+		job << "job " << id << " instr " << instr_offset << " (" << param.msg.instr_address() << ") mem " << mem_addr << "+" << bit_offset << endl;
+		job.close();
+*/
+
+		// this marks THE END
+		BPSingleListener ev_end(ANY_ADDR);
+		ev_end.setCounter(WEATHER_NUMINSTR_TRACING + WEATHER_NUMINSTR_AFTER);
+		simulator.addListener(&ev_end);
+
+		// count loop iterations by counting wait_begin() calls
+		// FIXME would be nice to have a callback API for this as this needs to
+		//       be done "in parallel"
+		BPSingleListener ev_wait_begin(WEATHER_FUNC_WAIT_BEGIN);
+		simulator.addListener(&ev_wait_begin);
+		int count_loop_iter_before = 0;
+
+		// no need to wait if offset is 0
+		if (instr_offset > 0) {
+			// XXX could be improved with intermediate states (reducing runtime until injection)
+			bp.setWatchInstructionPointer(ANY_ADDR);
+			bp.setCounter(instr_offset);
+			simulator.addListener(&bp);
+
+			// count loop iterations until FI
+			while (simulator.resume() == &ev_wait_begin) {
+				++count_loop_iter_before;
+				simulator.addListener(&ev_wait_begin);
+			}
+		}
+
+		// --- fault injection ---
+		MemoryManager& mm = simulator.getMemoryManager();
+		byte_t data = mm.getByte(mem_addr);
+		byte_t newdata = data ^ (1 << bit_offset);
+		mm.setByte(mem_addr, newdata);
+		// note at what IP we did it
+		int32_t injection_ip = simulator.getCPU(0).getInstructionPointer();
+		param.msg.set_injection_ip(injection_ip);
+		result->set_iter_before_fi(count_loop_iter_before);
+		log << "fault injected @ ip " << injection_ip
+			<< " 0x" << hex << ((int)data) << " -> 0x" << ((int)newdata) << endl;
+		// sanity check
+		if (param.msg.has_instr_address() &&
+			injection_ip != param.msg.instr_address()) {
+			stringstream ss;
+			ss << "SANITY CHECK FAILED: " << injection_ip
+			   << " != " << param.msg.instr_address();
+			log << ss.str() << endl;
+			result->set_resulttype(result->UNKNOWN);
+			result->set_latest_ip(injection_ip);
+			result->set_details(ss.str());
+			result->set_iter_after_fi(0);
+
+			simulator.clearListeners();
+		}
+
+		// --- aftermath ---
+		// possible outcomes:
+		// - trap, "crash"
+		// - jump outside text segment
+		// - (XXX unaligned jump inside text segment)
+		// - (XXX weird instructions?)
+		// - (XXX results displayed?)
+		// - reaches THE END
+		// - error detected, stop
+		// additional info:
+		// - #loop iterations before/after FI
+		// - (XXX "sane" display?)
+
+		// catch traps as "extraordinary" ending
+		TrapListener ev_trap(ANY_TRAP);
+		simulator.addListener(&ev_trap);
+		// jump outside text segment
+		BPRangeListener ev_below_text(ANY_ADDR, WEATHER_TEXT_START - 1);
+		BPRangeListener ev_beyond_text(WEATHER_TEXT_END + 1, ANY_ADDR);
+		simulator.addListener(&ev_below_text);
+		simulator.addListener(&ev_beyond_text);
+		// error detected
+		BPSingleListener ev_detected(WEATHER_FUNC_VPTR_PANIC);
+		simulator.addListener(&ev_detected);
+		// timeout (e.g., stuck in a HLT instruction)
+		// 10000us = 500000 instructions
+		TimerListener ev_timeout(10000);
+		simulator.addListener(&ev_timeout);
+
+#if LOCAL && 0
+		// XXX debug
+		log << "enabling tracing" << endl;
+		TracingPlugin tp;
+		tp.setLogIPOnly(true);
+		tp.setOstream(&cout);
+		// this must be done *after* configuring the plugin:
+		simulator.addFlow(&tp);
+#endif
+
+		BaseListener* ev;
+
+		// count loop iterations
+		int count_loop_iter_after = 0;
+		while ((ev = simulator.resume()) == &ev_wait_begin) {
+			++count_loop_iter_after;
+			simulator.addListener(&ev_wait_begin);
+		}
+		result->set_iter_after_fi(count_loop_iter_after);
+
+		// record latest IP regardless of result
+		result->set_latest_ip(simulator.getCPU(0).getInstructionPointer());
+
+		if (ev == &ev_end) {
+			log << "Result FINISHED (" << dec
+			    << count_loop_iter_before << "+" << count_loop_iter_after << ")" << endl;
+			result->set_resulttype(result->FINISHED);
+		} else if (ev == &ev_timeout) {
+			log << "Result TIMEOUT (" << dec
+			    << count_loop_iter_before << "+" << count_loop_iter_after << ")" << endl;
+			result->set_resulttype(result->TIMEOUT);
+		} else if (ev == &ev_below_text || ev == &ev_beyond_text) {
+			log << "Result OUTSIDE" << endl;
+			result->set_resulttype(result->OUTSIDE);
+		} else if (ev == &ev_trap) {
+			log << dec << "Result TRAP #" << ev_trap.getTriggerNumber() << endl;
+			result->set_resulttype(result->TRAP);
+
+			stringstream ss;
+			ss << ev_trap.getTriggerNumber();
+			result->set_details(ss.str());
+		} else if (ev == &ev_detected) {
+			log << dec << "Result DETECTED" << endl;
+			result->set_resulttype(result->DETECTED);
+		} else {
+			log << "Result WTF?" << endl;
+			result->set_resulttype(result->UNKNOWN);
+
+			stringstream ss;
+			//ss << "eventid " << ev->getId() << " EIP " << simulator.getCPU(0).getInstructionPointer();
+			result->set_details(ss.str());
+		}
+	// } // End bit_offset for
+	// sanity check: do we have exactly 8 results?
+	/*if (param.msg.result_size() != 8) {
+		log << "WTF? param.msg.result_size() != 8" << endl;
+	} else {*/
+#if !LOCAL
+		m_jc.sendResult(param);
+#endif
+	//}
+
+#if !LOCAL
+	// } // End Experiment count loop
+#endif
+
+#endif
+	// Explicitly terminate, or the simulator will continue to run.
+	simulator.terminate();
+}
--- a/src/experiments/weather-monitor-gem5/experiment.hpp
+++ b/src/experiments/weather-monitor-gem5/experiment.hpp
@ -0,0 +1,13 @@
+#ifndef __WEATHERMONITOR_EXPERIMENT_HPP__
+  #define __WEATHERMONITOR_EXPERIMENT_HPP__
+  
+#include "efw/ExperimentFlow.hpp"
+#include "efw/JobClient.hpp"
+
+class WeatherMonitorExperiment : public fail::ExperimentFlow {
+	fail::JobClient m_jc;
+public:
+	bool run();
+};
+
+#endif // __WEATHERMONITOR_EXPERIMENT_HPP__
--- a/src/experiments/weather-monitor-gem5/experimentInfo.hpp
+++ b/src/experiments/weather-monitor-gem5/experimentInfo.hpp
@ -0,0 +1,127 @@
+#ifndef __WEATHERMONITOR_EXPERIMENT_INFO_HPP__
+#define __WEATHERMONITOR_EXPERIMENT_INFO_HPP__
+
+// autogenerated, don't edit!
+
+// 0 = vanilla, 1 = guarded, 2 = plausibility
+#define WEATHERMONITOR_VARIANT 0
+
+#if WEATHERMONITOR_VARIANT == 0 // without vptr guards
+
+// suffix for simulator state, trace file
+#define WEATHER_SUFFIX				".weather"
+// main() address:
+// nm -C weather.elf|fgrep main
+#define WEATHER_FUNC_MAIN			0x00801084
+// wait_begin address
+#define WEATHER_FUNC_WAIT_BEGIN		0x0080105c
+// wait_end address
+#define WEATHER_FUNC_WAIT_END		0x00801070
+// vptr_panic address (only exists in guarded variant)
+#define WEATHER_FUNC_VPTR_PANIC		0x00800f90
+// number of main loop iterations to trace
+// (determines trace length and therefore fault-space width)
+#define WEATHER_NUMITER_TRACING		4
+// number of instructions needed for these iterations in golden run (taken from
+// experiment step #2)
+#define WEATHER_NUMINSTR_TRACING	30293
+// number of additional loop iterations for FI experiments (to see whether
+// everything continues working fine)
+#define WEATHER_NUMITER_AFTER		2
+// number of instructions needed for these iterations in golden run (taken from
+// experiment step #2)
+#define WEATHER_NUMINSTR_AFTER		10272
+// data/BSS begin:
+// nm -C weather.elf|fgrep ___DATA_START__
+#define WEATHER_DATA_START			0x00801fd4
+// data/BSS end:
+// nm -C weather.elf|fgrep ___BSS_END__
+#define WEATHER_DATA_END			0x00802228
+// text begin:
+// nm -C weather.elf|fgrep ___TEXT_START__
+#define WEATHER_TEXT_START			0x00800000
+// text end:
+// nm -C weather.elf|fgrep ___TEXT_END__
+#define WEATHER_TEXT_END			0x00801eac
+
+#elif WEATHERMONITOR_VARIANT == 1 // with guards
+
+// suffix for simulator state, trace file
+#define WEATHER_SUFFIX				".weather"
+// main() address:
+// nm -C weather.elf|fgrep main
+#define WEATHER_FUNC_MAIN			0x00801084
+// wait_begin address
+#define WEATHER_FUNC_WAIT_BEGIN		0x0080105c
+// wait_end address
+#define WEATHER_FUNC_WAIT_END		0x00801070
+// vptr_panic address (only exists in guarded variant)
+#define WEATHER_FUNC_VPTR_PANIC		0x00800f90
+// number of main loop iterations to trace
+// (determines trace length and therefore fault-space width)
+#define WEATHER_NUMITER_TRACING		4
+// number of instructions needed for these iterations in golden run (taken from
+// experiment step #2)
+#define WEATHER_NUMINSTR_TRACING	20599
+// number of additional loop iterations for FI experiments (to see whether
+// everything continues working fine)
+#define WEATHER_NUMITER_AFTER		2
+// number of instructions needed for these iterations in golden run (taken from
+// experiment step #2)
+#define WEATHER_NUMINSTR_AFTER		10272
+// data/BSS begin:
+// nm -C weather.elf|fgrep ___DATA_START__
+#define WEATHER_DATA_START			0x00801fd4
+// data/BSS end:
+// nm -C weather.elf|fgrep ___BSS_END__
+#define WEATHER_DATA_END			0x00802228
+// text begin:
+// nm -C weather.elf|fgrep ___TEXT_START__
+#define WEATHER_TEXT_START			0x00800000
+// text end:
+// nm -C weather.elf|fgrep ___TEXT_END__
+#define WEATHER_TEXT_END			0x00801eac
+
+#elif WEATHERMONITOR_VARIANT == 2 // with guards + plausibility check
+
+// suffix for simulator state, trace file
+#define WEATHER_SUFFIX				".weather"
+// main() address:
+// nm -C weather.elf|fgrep main
+#define WEATHER_FUNC_MAIN			0x00801084
+// wait_begin address
+#define WEATHER_FUNC_WAIT_BEGIN		0x0080105c
+// wait_end address
+#define WEATHER_FUNC_WAIT_END		0x00801070
+// vptr_panic address (only exists in guarded variant)
+#define WEATHER_FUNC_VPTR_PANIC		0x00800f90
+// number of main loop iterations to trace
+// (determines trace length and therefore fault-space width)
+#define WEATHER_NUMITER_TRACING		4
+// number of instructions needed for these iterations in golden run (taken from
+// experiment step #2)
+#define WEATHER_NUMINSTR_TRACING	20599
+// number of additional loop iterations for FI experiments (to see whether
+// everything continues working fine)
+#define WEATHER_NUMITER_AFTER		2
+// number of instructions needed for these iterations in golden run (taken from
+// experiment step #2)
+#define WEATHER_NUMINSTR_AFTER		10272
+// data/BSS begin:
+// nm -C weather.elf|fgrep ___DATA_START__
+#define WEATHER_DATA_START			0x00801fd4
+// data/BSS end:
+// nm -C weather.elf|fgrep ___BSS_END__
+#define WEATHER_DATA_END			0x00802228
+// text begin:
+// nm -C weather.elf|fgrep ___TEXT_START__
+#define WEATHER_TEXT_START			0x00800000
+// text end:
+// nm -C weather.elf|fgrep ___TEXT_END__
+#define WEATHER_TEXT_END			0x00801eac
+
+#else
+#error Unknown WEATHERMONITOR_VARIANT
+#endif
+
+#endif
--- a/src/experiments/weather-monitor-gem5/experimentInfo.hpp.sh
+++ b/src/experiments/weather-monitor-gem5/experimentInfo.hpp.sh
@ -0,0 +1,82 @@
+#!/bin/bash
+set -e
+TARGET=experimentInfo.hpp
+
+[ ! -e "$1" -o ! -e "$2" -o ! -e "$3" ] && echo "usage: $0 vanilla.elf guarded.elf plausibility.elf" && exit 1
+
+function addrof() { nm -C $1 | (fgrep "$2" || echo 99999999) | awk '{print $1}'; }
+
+cat >$TARGET <<EOF
+#ifndef __WEATHERMONITOR_EXPERIMENT_INFO_HPP__
+#define __WEATHERMONITOR_EXPERIMENT_INFO_HPP__
+
+// autogenerated, don't edit!
+
+// 0 = vanilla, 1 = guarded, 2 = plausibility
+#define WEATHERMONITOR_VARIANT 0
+
+#if WEATHERMONITOR_VARIANT == 0 // without vptr guards
+
+EOF
+
+function alldefs() {
+cat <<EOF
+// suffix for simulator state, trace file
+#define WEATHER_SUFFIX				".`basename $1|sed s/\\\\..*$//`"
+// main() address:
+// nm -C $(basename $1)|fgrep main
+#define WEATHER_FUNC_MAIN			0x`addrof $1 main`
+// wait_begin address
+#define WEATHER_FUNC_WAIT_BEGIN		0x`addrof $1 wait_begin`
+// wait_end address
+#define WEATHER_FUNC_WAIT_END		0x`addrof $1 wait_end`
+// vptr_panic address (only exists in guarded variant)
+#define WEATHER_FUNC_VPTR_PANIC		0x`addrof $1 vptr_panic`
+// number of main loop iterations to trace
+// (determines trace length and therefore fault-space width)
+#define WEATHER_NUMITER_TRACING		4
+// number of instructions needed for these iterations in golden run (taken from
+// experiment step #2)
+#define WEATHER_NUMINSTR_TRACING	20599
+// number of additional loop iterations for FI experiments (to see whether
+// everything continues working fine)
+#define WEATHER_NUMITER_AFTER		2
+// number of instructions needed for these iterations in golden run (taken from
+// experiment step #2)
+#define WEATHER_NUMINSTR_AFTER		10272
+// data/BSS begin:
+// nm -C $(basename $1)|fgrep ___DATA_START__
+#define WEATHER_DATA_START			0x`addrof $1 ___DATA_START__`
+// data/BSS end:
+// nm -C $(basename $1)|fgrep ___BSS_END__
+#define WEATHER_DATA_END			0x`addrof $1 ___BSS_END__`
+// text begin:
+// nm -C $(basename $1)|fgrep ___TEXT_START__
+#define WEATHER_TEXT_START			0x`addrof $1 ___TEXT_START__`
+// text end:
+// nm -C $(basename $1)|fgrep ___TEXT_END__
+#define WEATHER_TEXT_END			0x`addrof $1 ___TEXT_END__`
+EOF
+}
+
+alldefs $1 >>$TARGET
+cat >>$TARGET <<EOF
+
+#elif WEATHERMONITOR_VARIANT == 1 // with guards
+
+EOF
+alldefs $2 >>$TARGET
+cat >>$TARGET <<EOF
+
+#elif WEATHERMONITOR_VARIANT == 2 // with guards + plausibility check
+
+EOF
+alldefs $3 >>$TARGET
+cat >>$TARGET <<EOF
+
+#else
+#error Unknown WEATHERMONITOR_VARIANT
+#endif
+
+#endif
+EOF
--- a/src/experiments/weather-monitor-gem5/main.cc
+++ b/src/experiments/weather-monitor-gem5/main.cc
@ -0,0 +1,11 @@
+#include <iostream>
+#include <cstdlib>
+
+#include "cpn/CampaignManager.hpp"
+#include "campaign.hpp"
+
+int main(int argc, char **argv)
+{
+	WeatherMonitorCampaign c;
+	return !fail::campaignmanager.runCampaign(&c);
+}
--- a/src/experiments/weather-monitor-gem5/vptr_map.hpp
+++ b/src/experiments/weather-monitor-gem5/vptr_map.hpp
@ -0,0 +1,2 @@
+// will be generated from STEP 0 output with region2array.sh
+// XXX
--- a/src/experiments/weather-monitor-gem5/weathermonitor.proto
+++ b/src/experiments/weather-monitor-gem5/weathermonitor.proto
@ -0,0 +1,49 @@
+message WeathermonitorProtoMsg {
+	// Input: experiment parameters
+	// (client executes 8 experiments, one for each bit at mem_addr)
+
+	// FI at #instructions from experiment start
+	required int32 instr_offset = 1;
+	// the exact IP value at this point in time (from golden run)
+	optional int32 instr_address = 2; // for sanity checks
+	// address of the byte to inject bit-flips
+	required int32 mem_addr = 3;
+
+	// ----------------------------------------------------
+
+	// Output: experiment results
+
+	// IP where we did the injection: for debugging purposes, must be identical
+	// to instr_address
+	optional int32 injection_ip = 4;
+
+	repeated group Result = 5 {
+		// single experiment bit offset
+		required int32 bit_offset = 1;
+
+		// result type:
+		// FINISHED = planned number of instructions were executed
+		// TRAP = premature guest "crash"
+		// OUTSIDE = IP left text segment
+		enum ResultType {
+			FINISHED = 1;
+			TRAP = 2;
+			OUTSIDE = 3;
+			DETECTED = 4;
+			TIMEOUT = 5;
+			UNKNOWN = 6;
+		}
+		required ResultType resulttype = 2;
+
+		// especially interesting for TRAP/UNKNOWN: latest IP
+		required uint32 latest_ip = 3;
+
+		// number of wmoo measuring/displaying iterations before FI
+		required uint32 iter_before_fi = 4;
+		// number of wmoo measuring/displaying iterations after FI
+		required uint32 iter_after_fi = 5;
+
+		// optional textual description of what happened
+		optional string details = 6;
+	}
+}