diff --git a/src/core/comm/DatabaseCampaignMessage.proto.in b/src/core/comm/DatabaseCampaignMessage.proto.in index 0e9f3bd6..2044ea1c 100644 --- a/src/core/comm/DatabaseCampaignMessage.proto.in +++ b/src/core/comm/DatabaseCampaignMessage.proto.in @@ -21,4 +21,10 @@ message DatabaseCampaignMessage { required string benchmark = 9 [(sql_ignore) = true]; required InjectionPointMessage injection_point = 10 [(sql_ignore) = true]; -} \ No newline at end of file +} + +message DatabaseExperimentMessage { + required uint32 bitoffset = 1 [(sql_primary_key) = true]; + required uint32 original_value = 2; +} + diff --git a/src/core/efw/CMakeLists.txt b/src/core/efw/CMakeLists.txt index 73f440ab..b13748e4 100644 --- a/src/core/efw/CMakeLists.txt +++ b/src/core/efw/CMakeLists.txt @@ -4,6 +4,8 @@ set(SRCS ExperimentFlow.hpp JobClient.hpp JobClient.cc + DatabaseExperiment.hpp + DatabaseExperiment.cc ) add_library(fail-efw ${SRCS}) diff --git a/src/core/efw/DatabaseExperiment.cc b/src/core/efw/DatabaseExperiment.cc new file mode 100644 index 00000000..24c9234e --- /dev/null +++ b/src/core/efw/DatabaseExperiment.cc @@ -0,0 +1,183 @@ +#include +#include + +#include +#include "sal/SALConfig.hpp" +#include "sal/Memory.hpp" +#include "sal/Listener.hpp" +#include "efw/DatabaseExperiment.hpp" +#include +#include +#include "comm/DatabaseCampaignMessage.pb.h" +#include "sal/bochs/BochsListener.hpp" +#include +#include + + +using namespace std; +using namespace fail; +using namespace google::protobuf; + +// Check if configuration dependencies are satisfied: +#if !defined(CONFIG_EVENT_BREAKPOINTS) || !defined(CONFIG_SR_RESTORE) + #error This experiment needs: breakpoints, restore. Enable these in the configuration. +#endif + +DatabaseExperiment::~DatabaseExperiment() { + delete this->m_jc; +} + +unsigned DatabaseExperiment::injectBitFlip(address_t data_address, unsigned bitpos){ + unsigned int value, injectedval; + + value = m_mm.getByte(data_address); + injectedval = value ^ (1 << bitpos); + m_mm.setByte(data_address, injectedval); + + m_log << "INJECTION at: 0x" << hex<< setw(2) << setfill('0') << data_address + << " value: 0x" << setw(2) << setfill('0') << value << " -> 0x" + << setw(2) << setfill('0') << (unsigned) m_mm.getByte(data_address) << endl; + + return value; +} + +template +T * protobufFindSubmessageByTypename(Message *msg, const std::string &name) { + T * submessage = 0; + const Descriptor *msg_type = msg->GetDescriptor(); + const Message::Reflection *ref = msg->GetReflection(); + const Descriptor *database_desc = + DescriptorPool::generated_pool()->FindMessageTypeByName(name); + assert(database_desc != 0); + + size_t count = msg_type->field_count(); + + for (unsigned i = 0; i < count; i++) { + const FieldDescriptor *field = msg_type->field(i); + assert(field != 0); + if (field->message_type() == database_desc) { + submessage = dynamic_cast(ref->MutableMessage(msg, field)); + assert(submessage != 0); + break; + } + } + return submessage; +} + + +bool DatabaseExperiment::run() +{ + m_log << "STARTING EXPERIMENT" << endl; + + if (!this->cb_start_experiment()) { + m_log << "Initialization failed. Exiting." << endl; + simulator.terminate(1); + } + + unsigned executed_jobs = 0; + + while (executed_jobs < 25 || m_jc->getNumberOfUndoneJobs() > 0) { + m_log << "asking jobserver for parameters" << endl; + ExperimentData * param = this->cb_allocate_experiment_data(); + if (!m_jc->getParam(*param)){ + m_log << "Dying." << endl; // We were told to die. + simulator.terminate(1); + } + m_current_param = param; + + DatabaseCampaignMessage * fsppilot = + protobufFindSubmessageByTypename(¶m->getMessage(), "DatabaseCampaignMessage"); + assert (fsppilot != 0); + + unsigned injection_instr = fsppilot->injection_instr(); + address_t data_address = fsppilot->data_address(); + unsigned width = fsppilot->data_width(); + + for (unsigned bit_offset = 0; bit_offset < width * 8; ++bit_offset) { + // 8 results in one job + Message *outer_result = cb_new_result(param); + m_current_result = outer_result; + DatabaseExperimentMessage *result = + protobufFindSubmessageByTypename(outer_result, "DatabaseExperimentMessage"); + result->set_bitoffset(bit_offset); + m_log << "restoring state" << endl; + // Restore to the image, which starts at address(main) + simulator.restore(cb_state_directory()); + executed_jobs ++; + + m_log << "Trying to inject @ instr #" << dec << injection_instr << endl; + + simulator.clearListeners(); + + // Generate an experiment listener, that matches on any IP + // event. It is used to forward to the injection + // point. The +1 is needed, since even for the zeroth + // dynamic instruction we need at least one breakpoint + // event. + BPSingleListener bp; + bp.setWatchInstructionPointer(ANY_ADDR); + bp.setCounter(injection_instr + 1); + simulator.addListener(&bp); + + if (!this->cb_before_fast_forward()) { + continue; + } + fail::BaseListener * listener; + while (true) { + listener = simulator.resume(); + if (listener == &bp) { + break; + } else { + bool should_continue = this->cb_during_fast_forward(listener); + if (!should_continue) + break; // Stop fast forwarding + } + } + if (!this->cb_after_fast_forward(listener)) { + continue; // Continue to next injection experiment + } + + address_t injection_instr_absolute = fsppilot->injection_instr_absolute(); + bool found_eip; + for (int i = 0; i < BX_SMP_PROCESSORS; i++) { + address_t eip = simulator.getCPU(i).getInstructionPointer(); + if (eip == injection_instr_absolute) { + found_eip = true; + } + } + if (!found_eip) { + m_log << "Invalid Injection address != 0x" << injection_instr_absolute << std::endl; + simulator.terminate(1); + } + + simulator.clearListeners(); + + /// INJECT BITFLIP: + result->set_original_value(injectBitFlip(data_address, bit_offset)); + + if (!this->cb_before_resume()) { + continue; // Continue to next experiment + } + + m_log << "Resuming till the crash" << std::endl; + // resume and wait for results + while (true) { + listener = simulator.resume(); + bool should_continue = this->cb_during_resume(listener); + if (!should_continue) + break; + } + m_log << "Resume done" << std::endl; + this->cb_after_resume(listener); + + simulator.clearListeners(); + } + m_jc->sendResult(*param); + this->cb_free_experiment_data(param); + } + // Explicitly terminate, or the simulator will continue to run. + simulator.terminate(); + return false; +} + + diff --git a/src/core/efw/DatabaseExperiment.hpp b/src/core/efw/DatabaseExperiment.hpp new file mode 100644 index 00000000..f60ddf9a --- /dev/null +++ b/src/core/efw/DatabaseExperiment.hpp @@ -0,0 +1,150 @@ +#ifndef __DATABASE_EXPERIMENT_HPP__ +#define __DATABASE_EXPERIMENT_HPP__ + +#include +#include "efw/ExperimentFlow.hpp" +#include "efw/JobClient.hpp" +#include "util/Logger.hpp" +#include +#include + +namespace fail { +class ExperimentData; + +class DatabaseExperiment : public fail::ExperimentFlow { + fail::JobClient *m_jc; + + unsigned injectBitFlip(fail::address_t data_address, unsigned bitpos); + + /** + The current experiment data as returned by the job client. This + allocated by cb_allocate_experiment_data() + */ + ExperimentData *m_current_param; + google::protobuf::Message *m_current_result; + +public: + DatabaseExperiment(const std::string &name) + : m_log(name, false), m_mm(fail::simulator.getMemoryManager()) { + + /* The fail server can be set with an environent variable, + otherwise the JOBSERVER configured by cmake ist used */ + char *server_host = getenv("FAIL_SERVER_HOST"); + if (server_host != NULL){ + this->m_jc = new fail::JobClient(std::string(server_host)); + } else { + this->m_jc = new fail::JobClient(); + } + } + + virtual ~DatabaseExperiment(); + + bool run(); + + +protected: + fail::Logger m_log; + fail::MemoryManager& m_mm; + + /** Returns the currently running experiment message as returned + * by the job client + */ + ExperimentData * get_current_experiment_data() { return m_current_param; } + + /** Returns the currently result message, that was allocated by + * cb_allocate_new_result. + */ + google::protobuf::Message * get_current_result() { return m_current_result; } + + + ////////////////////////////////////////////////////////////////// + // Can be overwritten by experiment + ////////////////////////////////////////////////////////////////// + + /** + * Get path to the state directory + */ + virtual std::string cb_state_directory() { return "state"; } + + /** + * Callback that is called, before the actual experiment + * starts. Simulation is terminated on false. + * @param The current result message + * @return \c true on success, \c false otherwise + */ + virtual bool cb_start_experiment() { return true; }; + + /** + * Allocate enough space to hold the incoming ExperimentData + * message. The can be accessed during the experiment through + * get_current_experiment_data() + */ + virtual ExperimentData* cb_allocate_experiment_data() = 0; + virtual void cb_free_experiment_data(ExperimentData *) {}; + + + /** + * Allocate a new result slot in the given experiment data. The + * returned pointer can be obtained by calling + * get_current_result() + */ + virtual google::protobuf::Message* cb_new_result(ExperimentData*) = 0; + + /** + * Callback that is called before the fast forward is done. This + * can be used to add additional event listeners during the fast + * forward phase. If returning false, the experiment is canceled. + * @return \c true on success, \c false otherwise + */ + virtual bool cb_before_fast_forward() { return true; }; + + /** + * Callback that is called during the fast forward, when an event + * has triggered, but it was not the fast forward listener. This + * can be used to collect additional information during the fast + * forward If returning false, the fast forwarding is stopped. + * + * @return \c true on should continue, \c false stop ff + */ + virtual bool cb_during_fast_forward(fail::BaseListener *) { return false; }; + + /** + * Callback that is called after the fast forward, with the last + * triggered event forward If returning false, the experiment is + * canceled. + * + * @return \c true on success, \c false otherwise + */ + virtual bool cb_after_fast_forward(fail::BaseListener *) { return true; }; + + /** + * Callback that is called before the resuming till crash has + * started. This is called after the fault was injected. Here the + * end listeners should be installed. Returns true on + * success. Otherwise the experiment is canceled. + + * @return \c true on success, \c false otherwise + */ + virtual bool cb_before_resume() = 0; + + /** + * Callback that is called during the resume-till-crash phase, + * when an event has triggered, This can be used to collect + * additional information during the resuming phse. If returning + * false, the resuming has finished and the experiment has stopped. + * + * @return \c true on should continue ff, \c false stop ff + */ + virtual bool cb_during_resume(fail::BaseListener *) { return false; }; + + /** + * Callback that is called after the resume-till-crash phase with + * the last triggered listener. This callback should collect all data and + * + */ + virtual void cb_after_resume(fail::BaseListener *) = 0; +}; + +} + +#endif // __DATABASE_EXPERIMENT_HPP__