DatabaseExperiment: base class for distributed fail experiments
The DatabaseExperiment is a class a concrete experiment can inherit from. It handles the communication with the campaign server. Does the fast forward to the fault location, injects the fault and gives the result over experiment outcome to the child class. Change-Id: I1fb676da6c704cd570a638f0dfaadd4f1a9845e4
This commit is contained in:
@ -21,4 +21,10 @@ message DatabaseCampaignMessage {
|
||||
required string benchmark = 9 [(sql_ignore) = true];
|
||||
|
||||
required InjectionPointMessage injection_point = 10 [(sql_ignore) = true];
|
||||
}
|
||||
}
|
||||
|
||||
message DatabaseExperimentMessage {
|
||||
required uint32 bitoffset = 1 [(sql_primary_key) = true];
|
||||
required uint32 original_value = 2;
|
||||
}
|
||||
|
||||
|
||||
@ -4,6 +4,8 @@ set(SRCS
|
||||
ExperimentFlow.hpp
|
||||
JobClient.hpp
|
||||
JobClient.cc
|
||||
DatabaseExperiment.hpp
|
||||
DatabaseExperiment.cc
|
||||
)
|
||||
|
||||
add_library(fail-efw ${SRCS})
|
||||
|
||||
183
src/core/efw/DatabaseExperiment.cc
Normal file
183
src/core/efw/DatabaseExperiment.cc
Normal file
@ -0,0 +1,183 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "sal/SALConfig.hpp"
|
||||
#include "sal/Memory.hpp"
|
||||
#include "sal/Listener.hpp"
|
||||
#include "efw/DatabaseExperiment.hpp"
|
||||
#include <google/protobuf/descriptor.h>
|
||||
#include <google/protobuf/message.h>
|
||||
#include "comm/DatabaseCampaignMessage.pb.h"
|
||||
#include "sal/bochs/BochsListener.hpp"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
||||
using namespace std;
|
||||
using namespace fail;
|
||||
using namespace google::protobuf;
|
||||
|
||||
// Check if configuration dependencies are satisfied:
|
||||
#if !defined(CONFIG_EVENT_BREAKPOINTS) || !defined(CONFIG_SR_RESTORE)
|
||||
#error This experiment needs: breakpoints, restore. Enable these in the configuration.
|
||||
#endif
|
||||
|
||||
DatabaseExperiment::~DatabaseExperiment() {
|
||||
delete this->m_jc;
|
||||
}
|
||||
|
||||
unsigned DatabaseExperiment::injectBitFlip(address_t data_address, unsigned bitpos){
|
||||
unsigned int value, injectedval;
|
||||
|
||||
value = m_mm.getByte(data_address);
|
||||
injectedval = value ^ (1 << bitpos);
|
||||
m_mm.setByte(data_address, injectedval);
|
||||
|
||||
m_log << "INJECTION at: 0x" << hex<< setw(2) << setfill('0') << data_address
|
||||
<< " value: 0x" << setw(2) << setfill('0') << value << " -> 0x"
|
||||
<< setw(2) << setfill('0') << (unsigned) m_mm.getByte(data_address) << endl;
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
T * protobufFindSubmessageByTypename(Message *msg, const std::string &name) {
|
||||
T * submessage = 0;
|
||||
const Descriptor *msg_type = msg->GetDescriptor();
|
||||
const Message::Reflection *ref = msg->GetReflection();
|
||||
const Descriptor *database_desc =
|
||||
DescriptorPool::generated_pool()->FindMessageTypeByName(name);
|
||||
assert(database_desc != 0);
|
||||
|
||||
size_t count = msg_type->field_count();
|
||||
|
||||
for (unsigned i = 0; i < count; i++) {
|
||||
const FieldDescriptor *field = msg_type->field(i);
|
||||
assert(field != 0);
|
||||
if (field->message_type() == database_desc) {
|
||||
submessage = dynamic_cast<T*>(ref->MutableMessage(msg, field));
|
||||
assert(submessage != 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return submessage;
|
||||
}
|
||||
|
||||
|
||||
bool DatabaseExperiment::run()
|
||||
{
|
||||
m_log << "STARTING EXPERIMENT" << endl;
|
||||
|
||||
if (!this->cb_start_experiment()) {
|
||||
m_log << "Initialization failed. Exiting." << endl;
|
||||
simulator.terminate(1);
|
||||
}
|
||||
|
||||
unsigned executed_jobs = 0;
|
||||
|
||||
while (executed_jobs < 25 || m_jc->getNumberOfUndoneJobs() > 0) {
|
||||
m_log << "asking jobserver for parameters" << endl;
|
||||
ExperimentData * param = this->cb_allocate_experiment_data();
|
||||
if (!m_jc->getParam(*param)){
|
||||
m_log << "Dying." << endl; // We were told to die.
|
||||
simulator.terminate(1);
|
||||
}
|
||||
m_current_param = param;
|
||||
|
||||
DatabaseCampaignMessage * fsppilot =
|
||||
protobufFindSubmessageByTypename<DatabaseCampaignMessage>(¶m->getMessage(), "DatabaseCampaignMessage");
|
||||
assert (fsppilot != 0);
|
||||
|
||||
unsigned injection_instr = fsppilot->injection_instr();
|
||||
address_t data_address = fsppilot->data_address();
|
||||
unsigned width = fsppilot->data_width();
|
||||
|
||||
for (unsigned bit_offset = 0; bit_offset < width * 8; ++bit_offset) {
|
||||
// 8 results in one job
|
||||
Message *outer_result = cb_new_result(param);
|
||||
m_current_result = outer_result;
|
||||
DatabaseExperimentMessage *result =
|
||||
protobufFindSubmessageByTypename<DatabaseExperimentMessage>(outer_result, "DatabaseExperimentMessage");
|
||||
result->set_bitoffset(bit_offset);
|
||||
m_log << "restoring state" << endl;
|
||||
// Restore to the image, which starts at address(main)
|
||||
simulator.restore(cb_state_directory());
|
||||
executed_jobs ++;
|
||||
|
||||
m_log << "Trying to inject @ instr #" << dec << injection_instr << endl;
|
||||
|
||||
simulator.clearListeners();
|
||||
|
||||
// Generate an experiment listener, that matches on any IP
|
||||
// event. It is used to forward to the injection
|
||||
// point. The +1 is needed, since even for the zeroth
|
||||
// dynamic instruction we need at least one breakpoint
|
||||
// event.
|
||||
BPSingleListener bp;
|
||||
bp.setWatchInstructionPointer(ANY_ADDR);
|
||||
bp.setCounter(injection_instr + 1);
|
||||
simulator.addListener(&bp);
|
||||
|
||||
if (!this->cb_before_fast_forward()) {
|
||||
continue;
|
||||
}
|
||||
fail::BaseListener * listener;
|
||||
while (true) {
|
||||
listener = simulator.resume();
|
||||
if (listener == &bp) {
|
||||
break;
|
||||
} else {
|
||||
bool should_continue = this->cb_during_fast_forward(listener);
|
||||
if (!should_continue)
|
||||
break; // Stop fast forwarding
|
||||
}
|
||||
}
|
||||
if (!this->cb_after_fast_forward(listener)) {
|
||||
continue; // Continue to next injection experiment
|
||||
}
|
||||
|
||||
address_t injection_instr_absolute = fsppilot->injection_instr_absolute();
|
||||
bool found_eip;
|
||||
for (int i = 0; i < BX_SMP_PROCESSORS; i++) {
|
||||
address_t eip = simulator.getCPU(i).getInstructionPointer();
|
||||
if (eip == injection_instr_absolute) {
|
||||
found_eip = true;
|
||||
}
|
||||
}
|
||||
if (!found_eip) {
|
||||
m_log << "Invalid Injection address != 0x" << injection_instr_absolute << std::endl;
|
||||
simulator.terminate(1);
|
||||
}
|
||||
|
||||
simulator.clearListeners();
|
||||
|
||||
/// INJECT BITFLIP:
|
||||
result->set_original_value(injectBitFlip(data_address, bit_offset));
|
||||
|
||||
if (!this->cb_before_resume()) {
|
||||
continue; // Continue to next experiment
|
||||
}
|
||||
|
||||
m_log << "Resuming till the crash" << std::endl;
|
||||
// resume and wait for results
|
||||
while (true) {
|
||||
listener = simulator.resume();
|
||||
bool should_continue = this->cb_during_resume(listener);
|
||||
if (!should_continue)
|
||||
break;
|
||||
}
|
||||
m_log << "Resume done" << std::endl;
|
||||
this->cb_after_resume(listener);
|
||||
|
||||
simulator.clearListeners();
|
||||
}
|
||||
m_jc->sendResult(*param);
|
||||
this->cb_free_experiment_data(param);
|
||||
}
|
||||
// Explicitly terminate, or the simulator will continue to run.
|
||||
simulator.terminate();
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
150
src/core/efw/DatabaseExperiment.hpp
Normal file
150
src/core/efw/DatabaseExperiment.hpp
Normal file
@ -0,0 +1,150 @@
|
||||
#ifndef __DATABASE_EXPERIMENT_HPP__
|
||||
#define __DATABASE_EXPERIMENT_HPP__
|
||||
|
||||
#include <google/protobuf/message.h>
|
||||
#include "efw/ExperimentFlow.hpp"
|
||||
#include "efw/JobClient.hpp"
|
||||
#include "util/Logger.hpp"
|
||||
#include <string>
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace fail {
|
||||
class ExperimentData;
|
||||
|
||||
class DatabaseExperiment : public fail::ExperimentFlow {
|
||||
fail::JobClient *m_jc;
|
||||
|
||||
unsigned injectBitFlip(fail::address_t data_address, unsigned bitpos);
|
||||
|
||||
/**
|
||||
The current experiment data as returned by the job client. This
|
||||
allocated by cb_allocate_experiment_data()
|
||||
*/
|
||||
ExperimentData *m_current_param;
|
||||
google::protobuf::Message *m_current_result;
|
||||
|
||||
public:
|
||||
DatabaseExperiment(const std::string &name)
|
||||
: m_log(name, false), m_mm(fail::simulator.getMemoryManager()) {
|
||||
|
||||
/* The fail server can be set with an environent variable,
|
||||
otherwise the JOBSERVER configured by cmake ist used */
|
||||
char *server_host = getenv("FAIL_SERVER_HOST");
|
||||
if (server_host != NULL){
|
||||
this->m_jc = new fail::JobClient(std::string(server_host));
|
||||
} else {
|
||||
this->m_jc = new fail::JobClient();
|
||||
}
|
||||
}
|
||||
|
||||
virtual ~DatabaseExperiment();
|
||||
|
||||
bool run();
|
||||
|
||||
|
||||
protected:
|
||||
fail::Logger m_log;
|
||||
fail::MemoryManager& m_mm;
|
||||
|
||||
/** Returns the currently running experiment message as returned
|
||||
* by the job client
|
||||
*/
|
||||
ExperimentData * get_current_experiment_data() { return m_current_param; }
|
||||
|
||||
/** Returns the currently result message, that was allocated by
|
||||
* cb_allocate_new_result.
|
||||
*/
|
||||
google::protobuf::Message * get_current_result() { return m_current_result; }
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// Can be overwritten by experiment
|
||||
//////////////////////////////////////////////////////////////////
|
||||
|
||||
/**
|
||||
* Get path to the state directory
|
||||
*/
|
||||
virtual std::string cb_state_directory() { return "state"; }
|
||||
|
||||
/**
|
||||
* Callback that is called, before the actual experiment
|
||||
* starts. Simulation is terminated on false.
|
||||
* @param The current result message
|
||||
* @return \c true on success, \c false otherwise
|
||||
*/
|
||||
virtual bool cb_start_experiment() { return true; };
|
||||
|
||||
/**
|
||||
* Allocate enough space to hold the incoming ExperimentData
|
||||
* message. The can be accessed during the experiment through
|
||||
* get_current_experiment_data()
|
||||
*/
|
||||
virtual ExperimentData* cb_allocate_experiment_data() = 0;
|
||||
virtual void cb_free_experiment_data(ExperimentData *) {};
|
||||
|
||||
|
||||
/**
|
||||
* Allocate a new result slot in the given experiment data. The
|
||||
* returned pointer can be obtained by calling
|
||||
* get_current_result()
|
||||
*/
|
||||
virtual google::protobuf::Message* cb_new_result(ExperimentData*) = 0;
|
||||
|
||||
/**
|
||||
* Callback that is called before the fast forward is done. This
|
||||
* can be used to add additional event listeners during the fast
|
||||
* forward phase. If returning false, the experiment is canceled.
|
||||
* @return \c true on success, \c false otherwise
|
||||
*/
|
||||
virtual bool cb_before_fast_forward() { return true; };
|
||||
|
||||
/**
|
||||
* Callback that is called during the fast forward, when an event
|
||||
* has triggered, but it was not the fast forward listener. This
|
||||
* can be used to collect additional information during the fast
|
||||
* forward If returning false, the fast forwarding is stopped.
|
||||
*
|
||||
* @return \c true on should continue, \c false stop ff
|
||||
*/
|
||||
virtual bool cb_during_fast_forward(fail::BaseListener *) { return false; };
|
||||
|
||||
/**
|
||||
* Callback that is called after the fast forward, with the last
|
||||
* triggered event forward If returning false, the experiment is
|
||||
* canceled.
|
||||
*
|
||||
* @return \c true on success, \c false otherwise
|
||||
*/
|
||||
virtual bool cb_after_fast_forward(fail::BaseListener *) { return true; };
|
||||
|
||||
/**
|
||||
* Callback that is called before the resuming till crash has
|
||||
* started. This is called after the fault was injected. Here the
|
||||
* end listeners should be installed. Returns true on
|
||||
* success. Otherwise the experiment is canceled.
|
||||
|
||||
* @return \c true on success, \c false otherwise
|
||||
*/
|
||||
virtual bool cb_before_resume() = 0;
|
||||
|
||||
/**
|
||||
* Callback that is called during the resume-till-crash phase,
|
||||
* when an event has triggered, This can be used to collect
|
||||
* additional information during the resuming phse. If returning
|
||||
* false, the resuming has finished and the experiment has stopped.
|
||||
*
|
||||
* @return \c true on should continue ff, \c false stop ff
|
||||
*/
|
||||
virtual bool cb_during_resume(fail::BaseListener *) { return false; };
|
||||
|
||||
/**
|
||||
* Callback that is called after the resume-till-crash phase with
|
||||
* the last triggered listener. This callback should collect all data and
|
||||
*
|
||||
*/
|
||||
virtual void cb_after_resume(fail::BaseListener *) = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // __DATABASE_EXPERIMENT_HPP__
|
||||
Reference in New Issue
Block a user