fiascoFail: grand overhaul of this experiment
* Removed all command-line options. * Read all required information from *-traceinfo.txt file or kernel elf file. * Record error_corrected (but only in the 'OK' case). * Add support for multiple variants (similar to the ecos experiment). Change-Id: I933e52881fc6bee0750d8aaef813fe2539166b06
This commit is contained in:
@ -14,163 +14,113 @@
|
|||||||
#include "sal/SALInst.hpp"
|
#include "sal/SALInst.hpp"
|
||||||
#include "sal/Memory.hpp"
|
#include "sal/Memory.hpp"
|
||||||
#include "sal/Listener.hpp"
|
#include "sal/Listener.hpp"
|
||||||
#include <sal/bochs/BochsMemory.hpp>
|
|
||||||
#include "util/WallclockTimer.hpp"
|
#include "util/WallclockTimer.hpp"
|
||||||
#include "config/FailConfig.hpp"
|
#include "config/FailConfig.hpp"
|
||||||
#include "util/CommandLine.hpp"
|
|
||||||
|
// You need to have the serialoutput plugin enabled for this
|
||||||
|
#include "../plugins/serialoutput/SerialOutputLogger.hpp"
|
||||||
|
|
||||||
|
#define LIMIT_SERIAL 1024*1024
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace fail;
|
using namespace fail;
|
||||||
|
|
||||||
#define LOCAL 0
|
#define LOCAL 0
|
||||||
|
|
||||||
void FiascoFailExperiment::parseOptions()
|
const std::string FiascoFailExperiment::dir_images(DIR_IMAGES);
|
||||||
{
|
const std::string FiascoFailExperiment::dir_prerequisites(DIR_PREREQUISITES);
|
||||||
CommandLine &cmd = CommandLine::Inst();
|
|
||||||
cmd.addOption("", "", Arg::None, "USAGE: fail-client -Wf,[option] ... <BochsOptions...>\n\n");
|
|
||||||
CommandLine::option_handle GOLDEN = cmd.addOption("g", "golden", Arg::None, "-g,--golden \tExecute golden-run experiment?");
|
|
||||||
CommandLine::option_handle HELP = cmd.addOption("h", "help", Arg::None, "-h,--help \tPrint usage and exit");
|
|
||||||
CommandLine::option_handle END_ADDRESS = cmd.addOption("E", "end", Arg::Required, "-E,--end \tEnd-Address of experiment");
|
|
||||||
CommandLine::option_handle TOTAL_TIMER = cmd.addOption("T", "time", Arg::Required, "-T,--time \tTotal timer ticks of the golden run experiment from restoring point");
|
|
||||||
CommandLine::option_handle TOTAL_INSTR = cmd.addOption("t", "total", Arg::Required, "-t,--total \tTotal instructions of the golden run experiment from restoring point");
|
|
||||||
CommandLine::option_handle ECC_PANIC_FUNC = cmd.addOption("p", "panic", Arg::Required, "-p--panic \tAddress of the ecc_panic function");
|
|
||||||
CommandLine::option_handle ERROR_CORRECTED_ADDR = cmd.addOption("c", "corrected", Arg::Required, "-c--corected \tAddress of the errors_corrected variable");
|
|
||||||
|
|
||||||
if(!cmd.parse())
|
bool FiascoFailExperiment::readTraceInfo(unsigned &instr_counter, unsigned long long &runtime, fail::guest_address_t &addr_finish,
|
||||||
{
|
const std::string& variant, const std::string& benchmark) {
|
||||||
cerr << "Error parsing arguments." << endl;
|
ifstream file(filename_traceinfo(variant, benchmark).c_str());
|
||||||
exit(-1);
|
if (!file.is_open()) {
|
||||||
|
cout << "failed to open " << filename_traceinfo(variant, benchmark) << endl;
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(cmd[HELP])
|
string buf;
|
||||||
{
|
unsigned count = 0;
|
||||||
cmd.printUsage();
|
|
||||||
exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
if(cmd[GOLDEN].count() > 0)
|
while (getline(file, buf)) {
|
||||||
{
|
stringstream ss(buf, ios::in);
|
||||||
_golden_run = true;
|
switch (count) {
|
||||||
}
|
case 0:
|
||||||
else
|
ss >> instr_counter;
|
||||||
{
|
break;
|
||||||
_golden_run = false;
|
case 1:
|
||||||
}
|
ss >> runtime;
|
||||||
|
break;
|
||||||
// Check if end-address is given
|
case 2:
|
||||||
if(cmd[END_ADDRESS].count() > 0)
|
ss >> addr_finish;
|
||||||
{
|
|
||||||
endAddress = strtoul(cmd[END_ADDRESS].first()->arg, NULL, 16);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
m_log << "You have to give an end address!" << endl;
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if number of golden run timer ticks is given
|
|
||||||
if(cmd[TOTAL_TIMER].count() > 0)
|
|
||||||
{
|
|
||||||
golden_run_timer_ticks = strtoull(cmd[TOTAL_TIMER].first()->arg, NULL, 10);
|
|
||||||
}
|
|
||||||
else if(!_golden_run)
|
|
||||||
{
|
|
||||||
m_log << "You hava to give the number of total timer ticks of the golden run" << endl;
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if number of total instructions is given
|
|
||||||
if(cmd[TOTAL_INSTR].count() > 0)
|
|
||||||
{
|
|
||||||
golden_run_instructions = strtoul(cmd[TOTAL_INSTR].first()->arg, NULL, 10);
|
|
||||||
}
|
|
||||||
else if(!_golden_run)
|
|
||||||
{
|
|
||||||
m_log << "You have to give the number of total instructions of the golden run" << endl;
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if ecc_panic function address is given
|
|
||||||
if(cmd[ECC_PANIC_FUNC].count() > 0)
|
|
||||||
{
|
|
||||||
ecc_panic_address = strtoul(cmd[ECC_PANIC_FUNC].first()->arg, NULL, 16);
|
|
||||||
}
|
|
||||||
else if(!_golden_run)
|
|
||||||
{
|
|
||||||
m_log << "You have to give the address of the ecc_panic function" << endl;
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if errors_corrected variable address is given
|
|
||||||
if(cmd[ERROR_CORRECTED_ADDR].count() > 0)
|
|
||||||
{
|
|
||||||
addr_errors_corrected = strtoul(cmd[ERROR_CORRECTED_ADDR].first()->arg, NULL, 16);
|
|
||||||
}
|
|
||||||
else if(!_golden_run)
|
|
||||||
{
|
|
||||||
m_log << "You have to give the address of the errors_corrected variable" << endl;
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void FiascoFailExperiment::readGoldenRun(string& target)
|
|
||||||
{
|
|
||||||
ifstream golden_run_file("golden.out");
|
|
||||||
|
|
||||||
if(!golden_run_file.good())
|
|
||||||
{
|
|
||||||
m_log << "Could not open file golden.out" << endl;
|
|
||||||
simulator.terminate();
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
target.assign((istreambuf_iterator<char>(golden_run_file)), istreambuf_iterator<char>());
|
|
||||||
golden_run_file.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Function to record every output on the VGA-Output-Port in m_CurrentOutput
|
|
||||||
* Runs until any breakpoint different from the VGA-Port is reached
|
|
||||||
*/
|
|
||||||
BaseListener* FiascoFailExperiment::waitIOOrOther(bool clear_output)
|
|
||||||
{
|
|
||||||
IOPortListener ev_ioport(0x3F8, true); // VGA-Output-Port: 0x3F8
|
|
||||||
BaseListener* ev = NULL;
|
|
||||||
if(clear_output)
|
|
||||||
{
|
|
||||||
m_CurrentOutput.clear();
|
|
||||||
}
|
|
||||||
while(true)
|
|
||||||
{
|
|
||||||
simulator.addListener(&ev_ioport); // Add VGA-Port to the current listeners...
|
|
||||||
ev = simulator.resume(); // ...and continue
|
|
||||||
if(ev == &ev_ioport) // If current breakpoint == VGA-Port...
|
|
||||||
{
|
|
||||||
m_CurrentOutput += ev_ioport.getData(); // ... add output to m_CurrentOutput
|
|
||||||
}
|
|
||||||
else // Else: Breakpoint different from VGA-Port reached, break
|
|
||||||
{
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
count++;
|
||||||
}
|
}
|
||||||
return ev; // Return the current breakpoint
|
file.close();
|
||||||
|
assert(count == 3);
|
||||||
|
return (count == 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string FiascoFailExperiment::filename_state(unsigned instr_offset, const std::string& variant, const std::string& benchmark)
|
||||||
|
{
|
||||||
|
stringstream ss;
|
||||||
|
ss << instr_offset;
|
||||||
|
if (variant.size() && benchmark.size()) {
|
||||||
|
return dir_prerequisites + "/" + variant + "-" + benchmark + "-" + "state" + "-" + ss.str();
|
||||||
|
}
|
||||||
|
return "state-" + ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string FiascoFailExperiment::filename_traceinfo(const std::string& variant, const std::string& benchmark)
|
||||||
|
{
|
||||||
|
if (variant.size() && benchmark.size()) {
|
||||||
|
return dir_prerequisites + "/" + variant + "-" + benchmark + "-" + "traceinfo.txt";
|
||||||
|
}
|
||||||
|
return "traceinfo.txt";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string FiascoFailExperiment::filename_elf(const std::string& variant, const std::string& benchmark)
|
||||||
|
{
|
||||||
|
if (variant.size() && benchmark.size()) {
|
||||||
|
return dir_images + "/" + variant + "/" + "fiasco.image";
|
||||||
|
}
|
||||||
|
return "fiasco.image";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string FiascoFailExperiment::filename_serial(const std::string& variant, const std::string& benchmark)
|
||||||
|
{
|
||||||
|
if (variant.size() && benchmark.size()) {
|
||||||
|
return dir_prerequisites + "/" + variant + "-" + benchmark + ".serial";
|
||||||
|
}
|
||||||
|
return "serial";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<char> FiascoFailExperiment::loadFile(std::string filename)
|
||||||
|
{
|
||||||
|
std::vector<char> data;
|
||||||
|
FILE *f = fopen(filename.c_str(), "rb");
|
||||||
|
if (!f) {
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
fseek(f, 0, SEEK_END);
|
||||||
|
long len = ftell(f);
|
||||||
|
fseek(f, 0, SEEK_SET);
|
||||||
|
if (len > 0) {
|
||||||
|
data.resize(len);
|
||||||
|
fread(&data[0], len, 1, f);
|
||||||
|
}
|
||||||
|
fclose(f);
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool FiascoFailExperiment::run()
|
bool FiascoFailExperiment::run()
|
||||||
{
|
{
|
||||||
m_log << "startup" << endl;
|
log << "startup" << endl;
|
||||||
parseOptions();
|
|
||||||
|
|
||||||
if(_golden_run)
|
// Do the actual fault injection
|
||||||
{
|
faultInjection();
|
||||||
// Do the golden-run experiment
|
|
||||||
goldenRun();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Do the actual fault injection
|
|
||||||
faultInjection();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Experiment finished
|
// Experiment finished
|
||||||
simulator.terminate();
|
simulator.terminate();
|
||||||
@ -180,8 +130,11 @@ bool FiascoFailExperiment::run()
|
|||||||
|
|
||||||
bool FiascoFailExperiment::faultInjection()
|
bool FiascoFailExperiment::faultInjection()
|
||||||
{
|
{
|
||||||
string golden_run;
|
// trace info
|
||||||
readGoldenRun(golden_run); // Read the output string from the golden run
|
unsigned goldenrun_instr_counter;
|
||||||
|
unsigned long long goldenrun_runtime_ticks;
|
||||||
|
unsigned goldenrun_runtime, timeout_runtime;
|
||||||
|
guest_address_t addr_finish;
|
||||||
|
|
||||||
BPSingleListener bp;
|
BPSingleListener bp;
|
||||||
int experiments = 0;
|
int experiments = 0;
|
||||||
@ -189,12 +142,12 @@ bool FiascoFailExperiment::faultInjection()
|
|||||||
for(experiments = 0; experiments < 500 || (m_jc.getNumberOfUndoneJobs() != 0); )
|
for(experiments = 0; experiments < 500 || (m_jc.getNumberOfUndoneJobs() != 0); )
|
||||||
{
|
{
|
||||||
#endif
|
#endif
|
||||||
m_log << "asking job server for experiment parameters" << endl;
|
log << "asking job server for experiment parameters" << endl;
|
||||||
FiascoFailExperimentData param;
|
FiascoFailExperimentData param;
|
||||||
#if !LOCAL
|
#if !LOCAL
|
||||||
if(!m_jc.getParam(param))
|
if(!m_jc.getParam(param))
|
||||||
{
|
{
|
||||||
m_log << "Dying." << endl;
|
log << "Dying." << endl;
|
||||||
simulator.terminate(1);
|
simulator.terminate(1);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
@ -209,7 +162,7 @@ bool FiascoFailExperiment::faultInjection()
|
|||||||
|
|
||||||
if(param.msg.fsppilot().data_width() != 1)
|
if(param.msg.fsppilot().data_width() != 1)
|
||||||
{
|
{
|
||||||
m_log << "cannot deal with data_width = " << param.msg.fsppilot().data_width() << endl;
|
log << "cannot deal with data_width = " << param.msg.fsppilot().data_width() << endl;
|
||||||
simulator.terminate(1);
|
simulator.terminate(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -217,8 +170,23 @@ bool FiascoFailExperiment::faultInjection()
|
|||||||
int id = param.getWorkloadID();
|
int id = param.getWorkloadID();
|
||||||
m_variant = param.msg.fsppilot().variant();
|
m_variant = param.msg.fsppilot().variant();
|
||||||
m_benchmark = param.msg.fsppilot().benchmark();
|
m_benchmark = param.msg.fsppilot().benchmark();
|
||||||
unsigned instr_offset = param.msg.fsppilot().injection_instr(); // Offset to the IP where the fault injection has to be done
|
// Offset to the IP where the fault injection has to be done:
|
||||||
guest_address_t mem_addr = param.msg.fsppilot().data_address(); // Memory address wehre the fault injection has to be done
|
unsigned instr_offset = param.msg.fsppilot().injection_instr();
|
||||||
|
// Memory address wehre the fault injection has to be done:
|
||||||
|
guest_address_t mem_addr = param.msg.fsppilot().data_address();
|
||||||
|
|
||||||
|
readTraceInfo(goldenrun_instr_counter, goldenrun_runtime_ticks, addr_finish, m_variant, m_benchmark);
|
||||||
|
|
||||||
|
// ELF symbol addresses
|
||||||
|
ElfReader elfreader(filename_elf(m_variant, m_benchmark).c_str());
|
||||||
|
guest_address_t addr_errors_corrected = elfreader.getSymbol("errors_corrected").getAddress();
|
||||||
|
guest_address_t ecc_panic_address = elfreader.getSymbol("_Z9ecc_panicv").getAddress();
|
||||||
|
if (ecc_panic_address == ADDR_INV && m_variant != "baseline") {
|
||||||
|
log << "WARNING: retrieving ELF symbol 'ecc_panic' failed!" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// for compatibility with the ecos kernel test experiment (MULTIPLE_SNAPSHOTS)
|
||||||
|
string statename = filename_state(0, m_variant, m_benchmark);
|
||||||
|
|
||||||
// for each job with the SINGLEBITFLIP fault model we're actually doing *8*
|
// for each job with the SINGLEBITFLIP fault model we're actually doing *8*
|
||||||
// experiments (one for each bit)
|
// experiments (one for each bit)
|
||||||
@ -226,155 +194,101 @@ bool FiascoFailExperiment::faultInjection()
|
|||||||
{
|
{
|
||||||
++experiments;
|
++experiments;
|
||||||
|
|
||||||
WallclockTimer timer; // Timer to log the actual time of the experiment
|
// Timer to log the actual time of the experiment:
|
||||||
|
WallclockTimer timer;
|
||||||
timer.startTimer();
|
timer.startTimer();
|
||||||
|
|
||||||
FiascofailProtoMsg_Result *result = param.msg.add_result(); // Protobuf object for the result
|
// 8 results in one job
|
||||||
result->set_bit_offset(bit_offset); // Set the bit offset (1 if FAULTMODEL_BURST is active)
|
FiascofailProtoMsg_Result *result = param.msg.add_result();
|
||||||
m_log << dec << "job " << id << " @bit: " << bit_offset << " " << m_variant << "/" << m_benchmark
|
result->set_bit_offset(bit_offset);
|
||||||
|
log << dec << "job " << id << " @bit: " << bit_offset << " " << m_variant << "/" << m_benchmark
|
||||||
<< " instr-offset " << hex << instr_offset
|
<< " instr-offset " << hex << instr_offset
|
||||||
<< " mem " << hex << mem_addr << "+" << dec << bit_offset << endl;
|
<< " mem " << hex << mem_addr << "+" << dec << bit_offset << endl;
|
||||||
|
|
||||||
m_log << "restoring state" << endl;
|
log << "restoring state" << endl;
|
||||||
simulator.restore("state"); // Restore the state (Entry point is the first IP of the main-function from the application)
|
simulator.restore(statename);
|
||||||
m_log << "restore @ip " << hex << simulator.getCPU(0).getInstructionPointer() << " finished!" << endl;
|
|
||||||
|
|
||||||
// convert to microseconds (simulator.getTimerTicksPerSecond() only
|
// convert to microseconds (simulator.getTimerTicksPerSecond() only
|
||||||
// works reliably when simulation has begun)
|
// works reliably when simulation has begun)
|
||||||
unsigned goldenrun_runtime = (unsigned)(golden_run_timer_ticks * 1000000.0 / simulator.getTimerTicksPerSecond());
|
goldenrun_runtime = (unsigned)
|
||||||
unsigned timeout_runtime = goldenrun_runtime + 1000000/18.2; // + 1 timer tick
|
(goldenrun_runtime_ticks * 1000000.0 / simulator.getTimerTicksPerSecond());
|
||||||
|
timeout_runtime = goldenrun_runtime + 1000000/18.2; // + 1 timer tick
|
||||||
|
|
||||||
BPSingleListener func_finish(endAddress); // Add the last IP of the main-function to the listeners (end of experiment)
|
BPSingleListener func_finish(addr_finish);
|
||||||
simulator.addListener(&func_finish);
|
simulator.addListener(&func_finish);
|
||||||
|
bool reached_finish = false;
|
||||||
|
|
||||||
|
// record serial output
|
||||||
|
SerialOutputLogger sol(0x3f8, LIMIT_SERIAL);
|
||||||
|
simulator.addFlow(&sol);
|
||||||
|
|
||||||
simtime_t time_start = simulator.getTimerTicks(); // measure elapsed time
|
// measure elapsed time
|
||||||
|
simtime_t time_start = simulator.getTimerTicks();
|
||||||
|
|
||||||
if(instr_offset > 0)
|
// no need to wait if offset is 0
|
||||||
{
|
BaseListener* ev;
|
||||||
bp.setWatchInstructionPointer(ANY_ADDR); // Create new Breakpoint...
|
|
||||||
bp.setCounter(instr_offset); // ...to break when the IP for the fault injection is reached...
|
|
||||||
simulator.addListener(&bp); // ...and add it to the actual listeners
|
|
||||||
|
|
||||||
BaseListener *go = waitIOOrOther(true); // Resume simulation and log VGA-Output
|
if (instr_offset > 0) {
|
||||||
if(go == &func_finish) // If func_finish has triggerd the break, something went wong...
|
bp.setWatchInstructionPointer(ANY_ADDR);
|
||||||
{
|
bp.setCounter(instr_offset);
|
||||||
stringstream ss;
|
simulator.addListener(&bp);
|
||||||
ss << "experiment reached finish() before FI";
|
|
||||||
m_log << ss.str() << endl;
|
|
||||||
result->set_resulttype(result->UNKNOWN);
|
|
||||||
result->set_details(ss.str());
|
|
||||||
result->set_runtime(timer);
|
|
||||||
m_jc.sendResult(param);
|
|
||||||
|
|
||||||
continue; // ... so continue with next experiment
|
ev = simulator.resume();
|
||||||
}
|
if (ev == &func_finish) {
|
||||||
else if(go != &bp) // Else if the breakpoint for the fault injection is not reached, something went wrong...
|
log << "experiment reached finish() before FI" << endl;
|
||||||
{
|
reached_finish = true;
|
||||||
stringstream ss;
|
|
||||||
ss << "experiment didn't reach bp";
|
|
||||||
m_log << ss.str() << endl;
|
|
||||||
result->set_resulttype(result->UNKNOWN);
|
|
||||||
result->set_details(ss.str());
|
|
||||||
result->set_latest_ip(simulator.getCPU(0).getInstructionPointer());
|
|
||||||
result->set_runtime(timer);
|
|
||||||
#if !LOCAL
|
|
||||||
m_jc.sendResult(param);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if FIASCO_FAULTMODEL_BURST
|
|
||||||
bit_offset = 8;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
continue; // ... so continue with the next experiment
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// sanity check (check if actual IP equals the trced IP)
|
|
||||||
uint32_t injection_ip = simulator.getCPU(0).getInstructionPointer();
|
|
||||||
if(param.msg.fsppilot().has_injection_instr_absolute() &&
|
|
||||||
injection_ip != param.msg.fsppilot().injection_instr_absolute())
|
|
||||||
{
|
|
||||||
stringstream ss;
|
|
||||||
ss << "SANITY CHECK FAILED: " << hex << injection_ip
|
|
||||||
<< " != " << hex << param.msg.fsppilot().injection_instr_absolute();
|
|
||||||
m_log << ss.str() << endl;
|
|
||||||
result->set_resulttype(result->UNKNOWN);
|
|
||||||
result->set_latest_ip(injection_ip);
|
|
||||||
result->set_details(ss.str());
|
|
||||||
result->set_runtime(timer);
|
|
||||||
#if !LOCAL
|
|
||||||
m_jc.sendResult(param);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if FIASCO_FAULTMODEL_BURST
|
|
||||||
bit_offset = 8;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
continue; // If sanity check fails: next experiment
|
|
||||||
}
|
|
||||||
if(param.msg.fsppilot().has_injection_instr_absolute())
|
|
||||||
{
|
|
||||||
m_log << "Absolute IP sanity check OK" << endl;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
m_log << "Absolute IP sanity check skipped (job parameters insufficient)" << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// --- fault injection ---
|
// --- fault injection ---
|
||||||
MemoryManager& mm = simulator.getMemoryManager(); // Get the memory manager from Bochs
|
MemoryManager& mm = simulator.getMemoryManager();
|
||||||
host_address_t addr = reinterpret_cast<BochsMemoryManager*>(&mm)->guestToHost(mem_addr); // check if the fault-address is mapped (guestToHost returns ADDR_INV if not)
|
byte_t data = mm.getByte(mem_addr);
|
||||||
if (addr == (host_address_t)ADDR_INV)
|
|
||||||
{
|
|
||||||
result->set_resulttype(result->UNKNOWN);
|
|
||||||
result->set_latest_ip(injection_ip);
|
|
||||||
result->set_runtime(timer);
|
|
||||||
stringstream ss;
|
|
||||||
ss << "INVALID DATA-ADDRESS " << hex << mem_addr << " @ ip " << injection_ip;
|
|
||||||
result->set_details(ss.str());
|
|
||||||
m_jc.sendResult(param);
|
|
||||||
|
|
||||||
continue; // Faul-address is not mapped so continue with the next experiment
|
|
||||||
}
|
|
||||||
byte_t data = mm.getByte(mem_addr); // Get tha actual value stored in the fault-addres
|
|
||||||
byte_t newdata;
|
byte_t newdata;
|
||||||
#if FIASCO_FAULTMODEL_BURST
|
#if FIASCO_FAULTMODEL_BURST
|
||||||
newdata = data ^ 0xff; // If Faultmode burst is active: Flip every 8 bits...
|
newdata = data ^ 0xff;
|
||||||
bit_offset = 8; // ...and continue with the next byte
|
bit_offset = 8; // enforce loop termination
|
||||||
#else
|
#else
|
||||||
newdata = data ^ (1 << bit_offset); // Else: Flip the bit according to the actual bit-offset and continue with next bit
|
newdata = data ^ (1 << bit_offset);
|
||||||
#endif
|
#endif
|
||||||
mm.setByte(mem_addr, newdata); // Store the new data in the actual faut-address
|
mm.setByte(mem_addr, newdata);
|
||||||
m_log << "fault injected @ ip " << injection_ip
|
// note at what IP we did it
|
||||||
<< " 0x" << hex << ((int)data) << " -> 0x" << ((int)newdata) << endl;
|
uint32_t injection_ip = simulator.getCPU(0).getInstructionPointer();
|
||||||
|
log << "fault injected @ ip " << injection_ip
|
||||||
|
<< " 0x" << hex << ((int)data) << " -> 0x" << ((int)newdata) << endl;
|
||||||
|
// sanity check
|
||||||
|
if (param.msg.fsppilot().has_injection_instr_absolute() &&
|
||||||
|
injection_ip != param.msg.fsppilot().injection_instr_absolute()) {
|
||||||
|
stringstream ss;
|
||||||
|
ss << "SANITY CHECK FAILED: " << injection_ip
|
||||||
|
<< " != " << param.msg.fsppilot().injection_instr_absolute();
|
||||||
|
log << ss.str() << endl;
|
||||||
|
result->set_resulttype(result->UNKNOWN);
|
||||||
|
result->set_latest_ip(injection_ip);
|
||||||
|
result->set_details(ss.str());
|
||||||
|
result->set_runtime(timer);
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (param.msg.fsppilot().has_injection_instr_absolute()) {
|
||||||
|
log << "Absolute IP sanity check OK" << endl;
|
||||||
|
} else {
|
||||||
|
log << "Absolute IP sanity check skipped (job parameters insufficient)" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
// --- aftermath ---
|
// --- aftermath ---
|
||||||
// catch traps as "extraordinary" ending
|
// catch traps as "extraordinary" ending
|
||||||
TrapListener ev_trap(ANY_TRAP);
|
TrapListener ev_trap(ANY_TRAP);
|
||||||
simulator.addListener(&ev_trap);
|
simulator.addListener(&ev_trap);
|
||||||
|
|
||||||
// jump outside text segment (TODO: text segments for multiple elf-files + paging)
|
|
||||||
/*
|
|
||||||
BPRangeListener ev_below_text(ANY_ADDR, addr_text_start -1); // TODO
|
|
||||||
BPRangeListener ev_beyond_text(addr_text_end + 1, ANY_ADDR); // TODO
|
|
||||||
simulator.addListener(&ev_below_text);
|
|
||||||
simulator.addListener(&ev_beyond_text);
|
|
||||||
*/
|
|
||||||
|
|
||||||
// timeout (e.g., stuck in a HLT instruction)
|
// timeout (e.g., stuck in a HLT instruction)
|
||||||
TimerListener ev_timeout(timeout_runtime);
|
TimerListener ev_timeout(timeout_runtime);
|
||||||
simulator.addListener(&ev_timeout);
|
simulator.addListener(&ev_timeout);
|
||||||
|
|
||||||
// grant generous (10x) more instructions before aborting to avoid false positives
|
// grant generous (10x) more instructions before aborting to avoid false positives
|
||||||
BPSingleListener ev_dyninstructions(ANY_ADDR);
|
BPSingleListener ev_dyninstructions(ANY_ADDR);
|
||||||
|
//ev_dyninstructions.setCounter((goldenrun_instr_counter - param.msg.fsppilot().injection_instr()) * 10);
|
||||||
// FIXME overflow possible
|
// FIXME overflow possible
|
||||||
ev_dyninstructions.setCounter(golden_run_instructions * 10);
|
ev_dyninstructions.setCounter(goldenrun_instr_counter * 10);
|
||||||
simulator.addListener(&ev_dyninstructions);
|
simulator.addListener(&ev_dyninstructions);
|
||||||
|
|
||||||
// incomplete (e.g. cursors blinks so nothing happens any more or a longjump occurs)
|
// incomplete (e.g. cursors blinks so nothing happens any more or a longjump occurs)
|
||||||
@ -385,33 +299,19 @@ bool FiascoFailExperiment::faultInjection()
|
|||||||
|
|
||||||
// function called by ecc apsects, when an uncorrectable error is detected
|
// function called by ecc apsects, when an uncorrectable error is detected
|
||||||
BPSingleListener func_ecc_panic(ecc_panic_address);
|
BPSingleListener func_ecc_panic(ecc_panic_address);
|
||||||
if(ecc_panic_address != ADDR_INV)
|
if(ecc_panic_address != ADDR_INV) {
|
||||||
{
|
|
||||||
simulator.addListener(&func_ecc_panic);
|
simulator.addListener(&func_ecc_panic);
|
||||||
}
|
}
|
||||||
|
|
||||||
// wait until experiment-terminating event occurs
|
// wait until experiment-terminating event occurs
|
||||||
bool finished = false;
|
while (!reached_finish) {
|
||||||
BaseListener *go;
|
ev = simulator.resume();
|
||||||
while(!finished)
|
if( (ev == &ev_trap) && (ev_trap.getTriggerNumber() == 14) ) {
|
||||||
{
|
// Page fault trap is OK
|
||||||
go = waitIOOrOther(false); // resume experiment until func_finish or any other BP is reached and log the output
|
simulator.addListener(&ev_trap);
|
||||||
if(go == &ev_trap) // if a trap is triggered, check which one
|
} else {
|
||||||
{
|
// in any other case, the experiment is finished
|
||||||
// Traps that occour in golden run are considered as deliberate
|
break;
|
||||||
if(ev_trap.getTriggerNumber() == 14) // Page fault trap
|
|
||||||
{
|
|
||||||
finished = false;
|
|
||||||
simulator.addListener(&ev_trap); // Trap considered as deliberate so continue
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
finished = true; // Trap not considered as deliberate so break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
finished = true; // Experiment reached BP so break
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -420,156 +320,85 @@ bool FiascoFailExperiment::faultInjection()
|
|||||||
// would make this usable for the jump-outside case
|
// would make this usable for the jump-outside case
|
||||||
result->set_latest_ip(simulator.getCPU(0).getInstructionPointer());
|
result->set_latest_ip(simulator.getCPU(0).getInstructionPointer());
|
||||||
|
|
||||||
// record error_corrected regardless of result
|
// record test result
|
||||||
if (addr_errors_corrected != ADDR_INV)
|
bool output_correct;
|
||||||
{
|
std::vector<char> serial_correct = loadFile(filename_serial(m_variant, m_benchmark));
|
||||||
int32_t error_corrected = mm.getByte(addr_errors_corrected);
|
|
||||||
result->set_error_corrected(error_corrected ? result->TRUE : result->FALSE);
|
// sanity check
|
||||||
}
|
if (serial_correct.size() == 0) {
|
||||||
else
|
log << "sanity check failed, golden run should have had output" << endl;
|
||||||
{
|
simulator.terminate(0);
|
||||||
// not setting this yields NULL in the DB
|
|
||||||
//result->set_error_corrected(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string serial_actual = sol.getOutput();
|
||||||
|
if (serial_actual.size() == serial_correct.size() &&
|
||||||
|
equal(serial_actual.begin(), serial_actual.end(), serial_correct.begin())) {
|
||||||
|
output_correct = true;
|
||||||
|
} else {
|
||||||
|
output_correct = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Get the runtime factor compared to the golden run
|
||||||
result->set_sim_runtime_factor(
|
result->set_sim_runtime_factor(
|
||||||
(simulator.getTimerTicks() - time_start) / (double) golden_run_timer_ticks); // Get the runtime factor compared to the golden run
|
(simulator.getTimerTicks() - time_start) / (double) goldenrun_runtime_ticks);
|
||||||
|
|
||||||
|
if (ev == &func_finish && output_correct) {
|
||||||
// Look for result
|
// do we reach finish?
|
||||||
if(go == &func_finish) // If BP == func_finished...
|
log << "experiment finished ordinarily" << endl;
|
||||||
{
|
result->set_resulttype(result->OK);
|
||||||
if(strcmp(m_CurrentOutput.c_str(), golden_run.c_str()) == 0) // ...and output is equal to the golden run: Result: OK
|
// record error_corrected only in the 'OK' case
|
||||||
{
|
// isMapped() crashes in case something (e.g., paging) went horribly wrong
|
||||||
m_log << "experiment finished ordinarily" << endl;
|
if ( (addr_errors_corrected != ADDR_INV) && mm.isMapped(addr_errors_corrected) ) {
|
||||||
result->set_resulttype(result->OK);
|
int32_t error_corrected = mm.getByte(addr_errors_corrected);
|
||||||
|
result->set_error_corrected(error_corrected ? result->TRUE : result->FALSE);
|
||||||
|
} else {
|
||||||
|
// not setting this yields NULL in the DB
|
||||||
|
//result->set_error_corrected(0);
|
||||||
}
|
}
|
||||||
else // ...or output is different from the golden run: Result: SDC
|
} else if (ev == &func_finish && !output_correct) {
|
||||||
{
|
// do we reach finish?
|
||||||
m_log << "experiment finished, but output incorrect" << endl;
|
log << "experiment finished, but output incorrect" << endl;
|
||||||
result->set_resulttype(result->SDC);
|
result->set_resulttype(result->SDC);
|
||||||
result->set_details(m_CurrentOutput.c_str());
|
} else if (ev == &func_ecc_panic) {
|
||||||
}
|
log << "ECC Panic: uncorrectable error" << endl;
|
||||||
}
|
result->set_resulttype(result->DETECTED); // DETECTED <=> ECC_PANIC <=> reboot
|
||||||
else if(go == &ev_trap) // If BP == trap: Result: Trap
|
} else if (ev == &ev_trap) {
|
||||||
{
|
log << dec << "Result TRAP #" << ev_trap.getTriggerNumber() << endl;
|
||||||
m_log << dec << "Result TRAP #" << ev_trap.getTriggerNumber() << endl;
|
|
||||||
result->set_resulttype(result->TRAP);
|
result->set_resulttype(result->TRAP);
|
||||||
|
|
||||||
stringstream ss;
|
stringstream ss;
|
||||||
ss << ev_trap.getTriggerNumber();
|
ss << ev_trap.getTriggerNumber();
|
||||||
result->set_details(ss.str());
|
result->set_details(ss.str());
|
||||||
}
|
} else if (ev == &ev_timeout || ev == &ev_dyninstructions || ev == &ev_blink || ev == &ev_longjmp) {
|
||||||
else if(go == &func_ecc_panic) // If BP == ecc_panic_function: Result: Detected (but not corrected)
|
log << "Result TIMEOUT" << endl;
|
||||||
{
|
|
||||||
m_log << "ECC Panic: uncorrectable error" << endl;
|
|
||||||
result->set_resulttype(result->DETECTED); // DETECTED <=> ECC_PANIC <=> reboot
|
|
||||||
}
|
|
||||||
// TODO (see above)
|
|
||||||
/*else if(go == &ev_below_text || go == &ev_beyond_text)
|
|
||||||
{
|
|
||||||
m_log << "Result Trap #" << ev_trap.getTriggerNumber() << endl;
|
|
||||||
result->set_jump_outside(result->TRUE);
|
|
||||||
result->set_resulttype(result->TRAP);
|
|
||||||
|
|
||||||
stringstream ss;
|
|
||||||
ss << ev_trap.getTriggerNumber();
|
|
||||||
result->set_details(ss.str());
|
|
||||||
}*/
|
|
||||||
else if(go == &ev_timeout || go == &ev_dyninstructions || go == &ev_blink || go == &ev_longjmp) // Result: Timeout if any of these BP occur
|
|
||||||
{
|
|
||||||
m_log << "Result TIMEOUT" << endl;
|
|
||||||
result->set_resulttype(result->TIMEOUT);
|
result->set_resulttype(result->TIMEOUT);
|
||||||
if(go == &ev_dyninstructions)
|
if (ev == &ev_dyninstructions) {
|
||||||
{
|
|
||||||
result->set_details("i");
|
result->set_details("i");
|
||||||
}
|
}
|
||||||
else if(go == &ev_blink)
|
else if (ev == &ev_blink) {
|
||||||
{
|
|
||||||
result->set_details("b");
|
result->set_details("b");
|
||||||
}
|
}
|
||||||
else if(go == &ev_longjmp)
|
else if (ev == &ev_longjmp) {
|
||||||
{
|
|
||||||
result->set_details("l");
|
result->set_details("l");
|
||||||
}
|
}
|
||||||
else
|
} else {
|
||||||
{
|
log << "Result WTF?" << endl;
|
||||||
result->set_details("t");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else // None of the above BPs reached so obviously something went wrong
|
|
||||||
{
|
|
||||||
m_log << "Result WTF?" << endl;
|
|
||||||
result->set_resulttype(result->UNKNOWN);
|
result->set_resulttype(result->UNKNOWN);
|
||||||
|
|
||||||
stringstream ss;
|
stringstream ss;
|
||||||
ss << "event addr: " << go << " EIP " << simulator.getCPU(0).getInstructionPointer();
|
ss << "event addr " << ev << " EIP " << simulator.getCPU(0).getInstructionPointer();
|
||||||
result->set_details(ss.str());
|
result->set_details(ss.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
result->set_runtime(timer);
|
result->set_runtime(timer);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !LOCAL
|
#if !LOCAL
|
||||||
m_jc.sendResult(param); // Send the result back to the job server and continue with next experiment (if there is one)
|
// Send the result back to the job server and continue with next experiment (if there is one)
|
||||||
}
|
m_jc.sendResult(param);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void FiascoFailExperiment::goldenRun()
|
|
||||||
{
|
|
||||||
std::vector<int> m_lTraps;
|
|
||||||
|
|
||||||
|
|
||||||
simulator.restore("state"); // Restore state (Continues from the first IP in the main function of the Application)
|
|
||||||
|
|
||||||
BPSingleListener l_stop_address(endAddress); // Add the last IP of the main function to the actual listeners
|
|
||||||
m_log << "Golden-Run start, Stop-Address: 0x" << hex << endAddress << endl;
|
|
||||||
|
|
||||||
std::string golden_output;
|
|
||||||
ofstream golden_output_file("golden.out"); // Save the logged output in "golden.out"
|
|
||||||
|
|
||||||
simulator.addListener(&l_stop_address);
|
|
||||||
|
|
||||||
TrapListener ev_trap(ANY_TRAP); // Trap listeners, break if any trap is triggered
|
|
||||||
simulator.addListener(&ev_trap);
|
|
||||||
|
|
||||||
bool finished = false;
|
|
||||||
m_CurrentOutput = "";
|
|
||||||
while(!finished) // Continue and log output
|
|
||||||
{
|
|
||||||
BaseListener* ev = waitIOOrOther(false);
|
|
||||||
if(ev == &ev_trap) // if trap is triggered, log the number
|
|
||||||
{
|
|
||||||
m_lTraps.push_back(ev_trap.getTriggerNumber());
|
|
||||||
simulator.addListener(&ev_trap);
|
|
||||||
}
|
|
||||||
else if(ev == &l_stop_address) // if stop address is reached, save the output and finish the experiment
|
|
||||||
{
|
|
||||||
golden_output.assign(m_CurrentOutput.c_str());
|
|
||||||
golden_output_file << m_CurrentOutput.c_str();
|
|
||||||
m_log << "Output successfully logged..." << endl;
|
|
||||||
finished = true;
|
|
||||||
}
|
|
||||||
else // something went wrong
|
|
||||||
{
|
|
||||||
m_log << "Error on logging Output, terminating..." << endl;
|
|
||||||
golden_output_file.close();
|
|
||||||
simulator.clearListeners();
|
|
||||||
simulator.terminate();
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
m_log << "Saving..." << endl;
|
|
||||||
golden_output_file.close();
|
|
||||||
m_log << "Done." << endl;
|
|
||||||
|
|
||||||
stringstream ss;
|
|
||||||
ss << "triggered traps: ";
|
|
||||||
for(std::vector<int>::iterator lIterator = m_lTraps.begin(); lIterator != m_lTraps.end(); ++lIterator)
|
|
||||||
{
|
|
||||||
ss << *lIterator << " ";
|
|
||||||
}
|
|
||||||
m_log << ss.str() << endl;
|
|
||||||
}
|
|
||||||
|
|||||||
@ -6,29 +6,28 @@
|
|||||||
#include "util/Logger.hpp"
|
#include "util/Logger.hpp"
|
||||||
#include "efw/ExperimentFlow.hpp"
|
#include "efw/ExperimentFlow.hpp"
|
||||||
#include "efw/JobClient.hpp"
|
#include "efw/JobClient.hpp"
|
||||||
#include "sal/Listener.hpp"
|
|
||||||
|
|
||||||
class FiascoFailExperiment : public fail::ExperimentFlow
|
class FiascoFailExperiment : public fail::ExperimentFlow
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
fail::Logger m_log;
|
fail::Logger log;
|
||||||
fail::JobClient m_jc;
|
fail::JobClient m_jc;
|
||||||
std::string m_variant, m_benchmark;
|
std::string m_variant, m_benchmark;
|
||||||
void readGoldenRun(std::string &);
|
static const std::string dir_images;
|
||||||
fail::BaseListener* waitIOOrOther(bool);
|
static const std::string dir_prerequisites;
|
||||||
void parseOptions();
|
|
||||||
|
bool readTraceInfo(unsigned &instr_counter, unsigned long long &runtime, fail::guest_address_t &addr_finish,
|
||||||
|
const std::string& variant, const std::string& benchmark);
|
||||||
|
|
||||||
|
std::string filename_state(unsigned instr_offset, const std::string& variant, const std::string& benchmark);
|
||||||
|
std::string filename_traceinfo(const std::string& variant, const std::string& benchmark);
|
||||||
|
std::string filename_elf(const std::string& variant, const std::string& benchmark);
|
||||||
|
std::string filename_serial(const std::string& variant, const std::string& benchmark);
|
||||||
|
std::vector<char> loadFile(std::string filename);
|
||||||
|
|
||||||
bool faultInjection();
|
bool faultInjection();
|
||||||
|
|
||||||
std::string m_CurrentOutput;
|
|
||||||
fail::guest_address_t endAddress;
|
|
||||||
unsigned golden_run_instructions;
|
|
||||||
unsigned long long golden_run_timer_ticks;
|
|
||||||
fail::guest_address_t ecc_panic_address;
|
|
||||||
fail::guest_address_t addr_errors_corrected;
|
|
||||||
bool _golden_run;
|
|
||||||
void goldenRun();
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
FiascoFailExperiment() : m_log("FiascoFail", false){}
|
FiascoFailExperiment() : log("FiascoFail", false){}
|
||||||
bool run();
|
bool run();
|
||||||
};
|
};
|
||||||
|
|||||||
@ -1,9 +1,11 @@
|
|||||||
#ifndef __FIASCOFAIL_EXPERIMENT_INFO_HPP
|
#ifndef __FIASCOFAIL_EXPERIMENT_INFO_HPP
|
||||||
#define __FIASCOFAIL_EXPERIMENT_INFO_HPP
|
#define __FIASCOFAIL_EXPERIMENT_INFO_HPP
|
||||||
|
|
||||||
|
#define DIR_IMAGES "images"
|
||||||
|
#define DIR_PREREQUISITES "prerequisites"
|
||||||
|
#define FIASCO_FAULTMODEL_BURST 0
|
||||||
|
|
||||||
#define FIASCO_BREAK_BLINK 0xf004b800
|
#define FIASCO_BREAK_BLINK 0xf004b800
|
||||||
#define FIASCO_BREAK_LONGJMP 0xf004c88e
|
#define FIASCO_BREAK_LONGJMP 0xf004c88e
|
||||||
|
|
||||||
#define FIASCO_FAULTMODEL_BURST 1
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user