Merge branch 'master' of ssh://vamos.informatik.uni-erlangen.de:29418/fail

This commit is contained in:
Adrian Böckenkamp
2013-05-07 15:06:01 +02:00
28 changed files with 510 additions and 608 deletions

7
.mailmap Normal file
View File

@ -0,0 +1,7 @@
Adrian Böckenkamp <adrian.boeckenkamp@tu-dortmund.de> adrian <adrian@8c4709b5-6ec9-48aa-a5cd-a96041d1645a>
Christoph Borchert <christoph.borchert@tu-dortmund.de> chb <chb@8c4709b5-6ec9-48aa-a5cd-a96041d1645a>
Horst Schirmeier <horst.schirmeier@tu-dortmund.de> hsc <hsc@8c4709b5-6ec9-48aa-a5cd-a96041d1645a>
Martin Hoffmann <hoffmann@cs.fau.de> hoffmann <hoffmann@8c4709b5-6ec9-48aa-a5cd-a96041d1645a>
Martin Unzner <martin.unzner@mailbox.tu-dresden.de> unzner <unzner@8c4709b5-6ec9-48aa-a5cd-a96041d1645a>
Richard Hellwig <richard.hellwig@gmail.com> hellwig <hellwig@8c4709b5-6ec9-48aa-a5cd-a96041d1645a>
Tobias Friemel <tobias.friemel@tu-dortmund.de> friemel <friemel@8c4709b5-6ec9-48aa-a5cd-a96041d1645a>

View File

@ -1,10 +1,18 @@
#include <string>
#include <errno.h>
#include <signal.h>
#include "SocketComm.hpp"
namespace fail {
void SocketComm::init()
{
// It's usually much easier to handle the error on write(), than to do
// anything intelligent in a SIGPIPE handler.
signal(SIGPIPE, SIG_IGN);
}
bool SocketComm::sendMsg(int sockfd, google::protobuf::Message& msg)
{
#ifdef USE_SIZE_PREFIX

View File

@ -21,6 +21,10 @@ namespace fail {
class SocketComm {
public:
/**
* This allows us to ignore SIGPIPE.
*/
static void init();
/**
* Send Protobuf-generated message
* @param sockfd open socket descriptor to write to

View File

@ -7,6 +7,7 @@
#include "util/SynchronizedMap.hpp"
#include "config/FailConfig.hpp"
#include "comm/FailControlMessage.pb.h"
#include "comm/SocketComm.hpp"
#include <list>
#include <ctime>
@ -81,6 +82,7 @@ public:
JobServer(int port = SERVER_COMM_TCP_PORT) : m_port(port), m_finish(false), m_noMoreExps(false),
m_maxThreads(128), m_threadtimeout(0), m_undoneJobs(SERVER_OUT_QUEUE_SIZE)
{
SocketComm::init();
m_runid = std::time(0);
#ifndef __puma
m_serverThread = new boost::thread(&JobServer::run, this); // run operator()() in a thread.

View File

@ -1,4 +1,5 @@
#include "JobClient.hpp"
#include "comm/SocketComm.hpp"
using namespace std;
@ -6,6 +7,7 @@ namespace fail {
JobClient::JobClient(const std::string& server, int port)
{
SocketComm::init();
m_server_port = port;
m_server = server;
m_server_ent = gethostbyname(m_server.c_str());
@ -83,7 +85,7 @@ bool JobClient::getParam(ExperimentData& exp)
return true;
// Nothing to do right now, but maybe later
case FailControlMessage::COME_AGAIN:
sleep(1);
sleep(10);
continue;
default:
return false;

View File

@ -20,13 +20,13 @@ public:
* Gets the content of the passed Register.
* @param reg the register to get the content from
*/
virtual regdata_t getRegisterContent(Register* reg) const = 0;
virtual regdata_t getRegisterContent(const Register* reg) const = 0;
/**
* Writes the passed value into the given register.
* @param reg the register that should be written to
* @param value the value that should be written into the register
*/
virtual void setRegisterContent(Register* reg, regdata_t value) = 0;
virtual void setRegisterContent(const Register* reg, regdata_t value) = 0;
/**
* Returns the current instruction pointer.
* @return the current eip

View File

@ -5,7 +5,7 @@
namespace fail {
regdata_t BochsCPU::getRegisterContent(Register* reg) const
regdata_t BochsCPU::getRegisterContent(const Register* reg) const
{
assert(reg != NULL && "FATAL ERROR: reg-ptr cannot be NULL!");
// TODO: BX_CPU(0) *always* correct?
@ -26,7 +26,7 @@ regdata_t BochsCPU::getRegisterContent(Register* reg) const
#endif // SIM_SUPPORT_64
}
void BochsCPU::setRegisterContent(Register* reg, regdata_t value)
void BochsCPU::setRegisterContent(const Register* reg, regdata_t value)
{
assert(reg != NULL && "FATAL ERROR: reg-ptr cannot be NULL!");
// TODO: BX_CPU(0) *always* correct?

View File

@ -36,13 +36,13 @@ public:
* @param reg the register pointer of interest (cannot be \c NULL)
* @return the content of the register \c reg
*/
regdata_t getRegisterContent(Register* reg) const;
regdata_t getRegisterContent(const Register* reg) const;
/**
* Sets the content of the register \c reg to \c value.
* @param reg the destination register object pointer (cannot be \c NULL)
* @param value the new value to assign
*/
void setRegisterContent(Register* reg, regdata_t value);
void setRegisterContent(const Register* reg, regdata_t value);
/**
* Returns the current instruction pointer (aka program counter).
* @return the current (e)ip register content

View File

@ -116,7 +116,7 @@ public:
*/
void fireInterruptDone();
virtual simtime_t getTimerTicks() { return bx_pc_system.time_ticks(); }
virtual simtime_t getTimerTicksPerSecond() { return bx_pc_system.time_ticks() / bx_pc_system.time_usec(); /* imprecise hack */ }
virtual simtime_t getTimerTicksPerSecond() { return bx_pc_system.time_ticks() / bx_pc_system.time_usec() * 1000000; /* imprecise hack */ }
/* ********************************************************************
* BochsController-specific (not implemented in SimulatorController!):
* ********************************************************************/

View File

@ -4,12 +4,12 @@
namespace fail {
regdata_t Gem5ArmCPU::getRegisterContent(Register* reg) const
regdata_t Gem5ArmCPU::getRegisterContent(const Register* reg) const
{
return GetRegisterContent(m_System, m_Id, reg->getType(), reg->getIndex());
}
void Gem5ArmCPU::setRegisterContent(Register* reg, regdata_t value)
void Gem5ArmCPU::setRegisterContent(const Register* reg, regdata_t value)
{
SetRegisterContent(m_System, m_Id, reg->getType(), reg->getIndex(), value);
}

View File

@ -33,13 +33,13 @@ public:
* @param reg the destination register whose content should be retrieved
* @return the content of register \c reg
*/
regdata_t getRegisterContent(Register* reg) const;
regdata_t getRegisterContent(const Register* reg) const;
/**
* Sets the register content for the \a current gem5 CPU.
* @param reg the (initialized) register object whose content should be set
* @param value the new content of the register \c reg
*/
void setRegisterContent(Register* reg, regdata_t value);
void setRegisterContent(const Register* reg, regdata_t value);
/**
* Retrieves the current instruction pointer (IP aka program counter, PC for short)
* for the current CPU \c this.

View File

@ -6,7 +6,7 @@ namespace fail {
static const uint64_t lower = 0x00000000ffffffff;
regdata_t T32ArmCPU::getRegisterContent(Register* reg) const
regdata_t T32ArmCPU::getRegisterContent(const Register* reg) const
{
// T32_ReadRegister wants a mask of bits representig the registers to read:
// e.g., reading R1 and R4 and R63
@ -28,7 +28,7 @@ regdata_t T32ArmCPU::getRegisterContent(Register* reg) const
return 0; // we should not come here.
}
void T32ArmCPU::setRegisterContent(Register* reg, regdata_t value)
void T32ArmCPU::setRegisterContent(const Register* reg, regdata_t value)
{
uint64_t mask = (1 << reg->getIndex());

View File

@ -29,13 +29,13 @@ public:
* @param reg the destination register whose content should be retrieved
* @return the content of register \c reg
*/
regdata_t getRegisterContent(Register* reg) const;
regdata_t getRegisterContent(const Register* reg) const;
/**
* Sets the register content for the \a current CPU.
* @param reg the (initialized) register object whose content should be set
* @param value the new content of the register \c reg
*/
void setRegisterContent(Register* reg, regdata_t value);
void setRegisterContent(const Register* reg, regdata_t value);
/**
* Retrieves the current instruction pointer (IP aka program counter, PC for short)
* for the current CPU \c this.

View File

@ -80,6 +80,8 @@ namespace fail {
typedef ElfSymbol entry_t;
typedef std::vector<entry_t> container_t;
typedef container_t::const_iterator symbol_iterator;
typedef container_t::const_iterator section_iterator;
/**
* Constructor.
@ -144,6 +146,14 @@ namespace fail {
container_t::const_iterator sym_begin() { return m_symboltable.begin(); }
container_t::const_iterator sym_end() { return m_symboltable.end(); }
/**
* Get section iterator. Derefences to a ElfSymbol
* @return iterator
*/
container_t::const_iterator sec_begin() { return m_sectiontable.begin(); }
container_t::const_iterator sec_end() { return m_sectiontable.end(); }
private:
Logger m_log;

View File

@ -27,9 +27,9 @@ PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS ${MY_PROTOS})
## Build library
add_library(fail-${EXPERIMENT_NAME} ${PROTO_SRCS} ${PROTO_HDRS} ${MY_CAMPAIGN_SRCS})
add_dependencies(fail-${EXPERIMENT_NAME} fail-tracing fail-comm)
target_link_libraries(fail-${EXPERIMENT_NAME} ${PROTOBUF_LIBRARY})
target_link_libraries(fail-${EXPERIMENT_NAME} ${PROTOBUF_LIBRARY} -lmysqlclient_r)
## This is the example's campaign server distributing experiment parameters
add_executable(${EXPERIMENT_NAME}-server main.cc)
target_link_libraries(${EXPERIMENT_NAME}-server fail-${EXPERIMENT_NAME} fail ${PROTOBUF_LIBRARY} ${Boost_THREAD_LIBRARY})
target_link_libraries(${EXPERIMENT_NAME}-server fail-${EXPERIMENT_NAME} fail ${PROTOBUF_LIBRARY} ${Boost_THREAD_LIBRARY} -lmysqlclient_r)
install(TARGETS ${EXPERIMENT_NAME}-server RUNTIME DESTINATION bin)

View File

@ -2,6 +2,7 @@
#include <fstream>
#include <vector>
#include <map>
#include <sstream>
#include "campaign.hpp"
#include "experimentInfo.hpp"
@ -9,16 +10,20 @@
#include "util/ProtoStream.hpp"
#include "util/MemoryMap.hpp"
#include "util/gzstream/gzstream.h"
#include "util/CommandLine.hpp"
#include "../plugins/tracing/TracingPlugin.hpp"
//#define PRUNING_DEBUG_OUTPUT
using namespace std;
using namespace fail;
const std::string EcosKernelTestCampaign::dir_images("images");
#if BASELINE_ASSESSMENT
const std::string EcosKernelTestCampaign::dir_prerequisites("prerequisites-baseline");
const std::string EcosKernelTestCampaign::dir_images("images-baseline");
#else
const std::string EcosKernelTestCampaign::dir_prerequisites("prerequisites");
const std::string EcosKernelTestCampaign::dir_images("images");
#endif
const std::string EcosKernelTestCampaign::dir_results("results");
bool EcosKernelTestCampaign::writeTraceInfo(unsigned instr_counter, unsigned timeout,
@ -114,14 +119,6 @@ std::string EcosKernelTestCampaign::filename_traceinfo(const std::string& varian
return "traceinfo.txt";
}
std::string EcosKernelTestCampaign::filename_results(const std::string& variant, const std::string& benchmark)
{
if (variant.size() && benchmark.size()) {
return dir_results + "/" + variant + "-" + benchmark + "-" + "results.csv";
}
return "results.csv";
}
std::string EcosKernelTestCampaign::filename_elf(const std::string& variant, const std::string& benchmark)
{
if (variant.size() && benchmark.size()) {
@ -146,350 +143,217 @@ char const *variants[] = {
0
};
// big four (three): (mutex3,) bin_sem2, clocktruth, sync2
// busy waiters, sloooow at ips=2666mhz: kill, mutex3, clocktruth
// batch 1: line 1
char const *benchmarks[] = {
#if 1
"bin_sem0", "bin_sem1", "bin_sem2", "bin_sem3", "clock1", "clockcnv",
"clocktruth", "cnt_sem1", "except1", "flag1", "kill", "mqueue1", "mutex1",
"mutex2", "mutex3", "release", "sched1", "sync2", "sync3", "thread0",
/*"clocktruth",*/ "cnt_sem1", "except1", "flag1", /*"kill",*/ "mqueue1", "mutex1",
"mutex2", /*"mutex3",*/ "release", "sched1", "sync2", "sync3", "thread0",
"thread1", "thread2",
#elif 0
"sync2",
#endif
0
};
bool EcosKernelTestCampaign::run()
{
m_log << "startup" << endl;
CommandLine &cmd = CommandLine::Inst();
if (!init_results()) {
return false;
cmd.addOption("", "", Arg::None, "USAGE: fail-server [options...]");
CommandLine::option_handle HELP =
cmd.addOption("h", "help", Arg::None, "-h/--help \tPrint usage and exit");
CommandLine::option_handle RESULTTABLE =
cmd.addOption("r", "resulttable", Arg::Required, "-r/--resulttable \tTable to store results in (default: 'result')");
Database::cmdline_setup();
if (!cmd.parse()) {
m_log << "Error parsing arguments." << std::endl;
exit(1);
}
if (cmd[HELP].count() > 0) {
cmd.printUsage();
exit(0);
}
if (cmd[RESULTTABLE].count() > 0) {
m_result_table = std::string(cmd[RESULTTABLE].first()->arg);
} else {
m_result_table = std::string("result");
}
m_log << "Storing results in table '" << m_result_table << "'\n";
db = Database::cmdline_connect();
db_recv = Database::cmdline_connect();
fspmethod_id = 1; // constant for now
std::stringstream ss;
ss << "CREATE TABLE IF NOT EXISTS " << m_result_table << " ("
"pilot_id int(11) NOT NULL,\n"
"bitnr tinyint(4) NOT NULL,\n"
"bit_width tinyint(4) NOT NULL,\n"
"resulttype tinyint(4) NOT NULL,\n"
"ecos_test_result tinyint(4) NOT NULL,\n"
"latest_ip int(10) unsigned DEFAULT NULL,\n"
"error_corrected tinyint(4) NOT NULL,\n"
"details varchar(255) DEFAULT NULL,\n"
"runtime float NOT NULL,\n"
"PRIMARY KEY (pilot_id,bitnr))\n"
"ENGINE = MyISAM";
if (!db->query(ss.str().c_str())) return false;
// collect results in parallel to avoid deadlock
#ifndef __puma
boost::thread collect_thread(&EcosKernelTestCampaign::collect_results, this);
#endif
ss.str("");
/* Gather all unfinished jobs */
m_log << "Looking for unfinished jobs in the database ..." << std::endl;
ss << "(";
for (int variant_nr = 0; variants[variant_nr]; ++variant_nr) {
char const *variant = variants[variant_nr];
for (int benchmark_nr = 0; benchmarks[benchmark_nr]; ++benchmark_nr) {
char const *benchmark = benchmarks[benchmark_nr];
// local copies of experiment/job count (to calculate differences)
int local_count_exp = count_exp, local_count_exp_jobs = count_exp_jobs,
local_count_known = count_known, local_count_known_jobs = count_known_jobs;
// load trace
igzstream tracef(filename_trace(variant, benchmark).c_str());
if (tracef.fail()) {
m_log << "couldn't open " << filename_trace(variant, benchmark) << endl;
return false;
}
ProtoIStream ps(&tracef);
// read trace info
unsigned instr_counter, estimated_timeout, lowest_addr, highest_addr;
// FIXME properly deal with 2nd memory range
EcosKernelTestCampaign::readTraceInfo(instr_counter,
estimated_timeout, lowest_addr, highest_addr, lowest_addr, highest_addr, variant, benchmark);
// a map of addresses of ECC protected objects
MemoryMap mm;
mm.readFromFile(filename_memorymap(variant, benchmark).c_str());
// map for keeping one "open" EC for every address
// (maps injection data address => equivalence class)
AddrLastaccessMap open_ecs;
// instruction counter within trace
unsigned instr = 0;
// "rightmost" instr where we did a FI experiment
unsigned instr_rightmost = 0;
// fill open_ecs with one EC for every address
for (MemoryMap::iterator it = mm.begin(); it != mm.end(); ++it) {
open_ecs[*it] = instr;
}
// absolute address of current trace instruction
address_t instr_absolute = 0;
Trace_Event ev;
// for every event in the trace ...
while (ps.getNext(&ev) && instr < instr_counter) {
// instruction events just get counted
if (!ev.has_memaddr()) {
// new instruction
instr++;
instr_absolute = ev.ip();
continue;
}
// for each single byte in this memory access ...
for (address_t data_address = ev.memaddr();
data_address < ev.memaddr() + ev.width();
++data_address) {
// skip accesses to data outside our map of interesting addresses
AddrLastaccessMap::iterator lastuse_it;
if ((lastuse_it = open_ecs.find(data_address)) == open_ecs.end()) {
continue;
}
int instr1 = lastuse_it->second;
int instr2 = instr;
// skip zero-sized intervals: these can occur when an instruction
// accesses a memory location more than once (e.g., INC, CMPXCHG)
if (instr1 > instr2) {
continue;
}
// we now have an interval-terminating R/W event to the memaddr
// we're currently looking at; the EC is defined by
// data_address [instr1, instr2] (instr_absolute)
if (ev.accesstype() == ev.READ) {
// a sequence ending with READ: we need to do one experiment to
// cover it completely
add_experiment_ec(variant, benchmark, data_address, instr1, instr2, instr_absolute);
instr_rightmost = instr2;
} else if (ev.accesstype() == ev.WRITE) {
// a sequence ending with WRITE: an injection anywhere here
// would have no effect.
add_known_ec(variant, benchmark, data_address, instr1, instr2, instr_absolute);
} else {
m_log << "WAT" << endl;
}
// next interval must start at next instruction; the aforementioned
// skipping mechanism wouldn't work otherwise
lastuse_it->second = instr2 + 1;
}
}
// close all open intervals (right end of the fault-space)
for (AddrLastaccessMap::iterator lastuse_it = open_ecs.begin();
lastuse_it != open_ecs.end(); ++lastuse_it) {
address_t data_address = lastuse_it->first;
int instr1 = lastuse_it->second;
ss << "v.variant = '" << variant << "' OR ";
}
ss << "0) AND ("; // dummy terminator
for (int benchmark_nr = 0; benchmarks[benchmark_nr]; ++benchmark_nr) {
char const *benchmark = benchmarks[benchmark_nr];
ss << "v.benchmark = '" << benchmark << "' OR ";
}
ss << "0)"; // dummy terminator
std::string sql_variants = ss.str();
ss.str("");
#if 0
// Why -1? In most cases it does not make sense to inject before the
// very last instruction, as we won't execute it anymore. This *only*
// makes sense if we also inject into parts of the result vector. This
// is not the case in this experiment, and with -1 we'll get a result
// comparable to the non-pruned campaign.
int instr2 = instr - 1;
#else
// EcosKernelTestCampaign only variant: fault space ends with the last FI experiment
int instr2 = instr_rightmost;
ss << "SELECT STRAIGHT_JOIN p.id AS pilot_id, v.id AS variant_id, v.variant, v.benchmark, p.injection_instr, p.injection_instr_absolute, p.data_address, SUM(r.bit_width) AS existing_results "
<< "FROM variant v "
<< "JOIN fsppilot p ON p.variant_id = v.id "
<< "LEFT JOIN result r ON r.pilot_id = p.id "
<< "WHERE p.known_outcome = 0 "
<< " AND p.fspmethod_id = " << fspmethod_id << " "
<< " AND (" << sql_variants << ") "
<< "GROUP BY p.id "
<< "HAVING existing_results < 8 OR existing_results IS NULL "; // 8 results per pilot
#elif 0
std::string sql_select = "SELECT p.id AS pilot_id, v.id AS variant_id, v.variant, v.benchmark, p.injection_instr, p.injection_instr_absolute, p.data_address ";
ss << "FROM variant v "
<< "JOIN fsppilot p ON p.variant_id = v.id "
<< "LEFT JOIN " << m_result_table << " r ON r.pilot_id = p.id "
<< "WHERE p.known_outcome = 0 "
<< " AND p.fspmethod_id = " << fspmethod_id << " "
<< " AND (" << sql_variants << ") "
<< " AND r.pilot_id IS NULL ";
#elif 0
std::string sql_select = "SELECT p.id AS pilot_id, v.id AS variant_id, v.variant, v.benchmark, p.injection_instr, p.injection_instr_absolute, p.data_address ";
ss << "FROM variant v "
<< "JOIN fsppilot p ON p.variant_id = v.id "
// << "WHERE p.known_outcome = 0 "
<< " AND p.fspmethod_id = " << fspmethod_id << " "
<< " AND (" << sql_variants << ") ";
#elif 1
if (!db->query("CREATE TEMPORARY TABLE done_pilots (id INT UNSIGNED NOT NULL PRIMARY KEY)")) return false;
ss << "INSERT INTO done_pilots SELECT pilot_id FROM " << m_result_table << " GROUP BY pilot_id HAVING SUM(bit_width) = 8";
if (!db->query(ss.str().c_str())) return false;
unsigned finished_jobs = db->affected_rows();
ss.str("");
ss << "DELETE r FROM " << m_result_table << " r LEFT JOIN done_pilots ON r.pilot_id = done_pilots.id WHERE done_pilots.id IS NULL";
if (!db->query(ss.str().c_str())) return false;
unsigned deleted_rows = db->affected_rows();
ss.str("");
m_log << "Deleted " << deleted_rows << " rows from incomplete jobs" << std::endl;
std::string sql_select = "SELECT STRAIGHT_JOIN p.id AS pilot_id, v.id AS variant_id, v.variant, v.benchmark, p.injection_instr, p.injection_instr_absolute, p.data_address ";
ss << "FROM variant v "
<< "JOIN fsppilot p ON p.variant_id = v.id "
<< "LEFT JOIN done_pilots d ON d.id = p.id "
<< "WHERE d.id IS NULL "
<< " AND p.fspmethod_id = " << fspmethod_id << " "
<< " AND (" << sql_variants << ") ";
#endif
int instr_absolute = 0; // unknown
std::string sql_body = ss.str();
//std::string sql_order = "ORDER BY v.benchmark, v.variant";
std::string sql_order = "ORDER BY v.id";
//std::string sql_order = "";
// zero-sized? skip.
if (instr1 > instr2) {
continue;
}
/* Get the number of unfinished experiments */
std::string sql_count = "SELECT COUNT(*) " + sql_body;
m_log << sql_count << std::endl;
MYSQL_RES *count = db->query(sql_count.c_str(), true);
if (!count) {
return false;
}
MYSQL_ROW row = mysql_fetch_row(count);
unsigned unfinished_jobs;
unfinished_jobs = strtoul(row[0], NULL, 10);
#if 0
// the run continues after the FI window, so do this experiment
// XXX this creates at least one experiment for *every* bit!
// fix: full trace, limited FI window
m_log << "Found " << unfinished_jobs << " unfinished jobs (" << finished_jobs << " already finished)." << std::endl;
ecs_need_experiment.push_back(current_ec);
add_experiment_ec(variant, benchmark, data_address, instr1, instr2, instr_absolute);
#else
// as the experiment ends, this byte is a "don't care":
add_known_ec(variant, benchmark, data_address, instr1, instr2, instr_absolute);
#endif
}
// conserve some memory
open_ecs.clear();
// progress report
m_log << variant << "/" << benchmark
<< " exp " << (count_exp - local_count_exp) << " (" << (count_exp_jobs - local_count_exp_jobs) << " jobs)"
<< " known " << (count_known - local_count_known) << " (" << (count_known_jobs - local_count_known_jobs) << " jobs)"
<< " faultspace cutoff @ " << instr_rightmost << " out of " << instr
<< endl;
}
std::string sql_pilots = sql_select + sql_body + sql_order;
m_log << sql_pilots << std::endl;
MYSQL_RES *pilots = db->query_stream(sql_pilots.c_str());
if (!pilots) {
return false;
}
available_results.clear();
m_log << "Filling queue ..." << std::endl;
unsigned prev_variant_id = 0;
while ((row = mysql_fetch_row(pilots))) {
unsigned pilot_id = atoi(row[0]);
unsigned variant_id = atoi(row[1]);
unsigned injection_instr = atoi(row[4]);
unsigned data_address = atoi(row[6]);
EcosKernelTestExperimentData *d = new EcosKernelTestExperimentData;
d->msg.set_pilot_id(pilot_id);
d->msg.set_variant(row[2]);
d->msg.set_benchmark(row[3]);
d->msg.set_instr2_offset(injection_instr);
if (row[5]) {
unsigned injection_instr_absolute = atoi(row[5]);
d->msg.set_instr2_address(injection_instr_absolute);
}
d->msg.set_mem_addr(data_address);
d->msg.set_faultmodel(ECOS_FAULTMODEL_BURST ? d->msg.BURST : d->msg.SINGLEBITFLIP);
if (prev_variant_id != variant_id) {
m_log << "Enqueueing jobs for " << row[2] << "/" << row[3] << std::endl;
}
prev_variant_id = variant_id;
campaignmanager.addParam(d);
}
if (mysql_errno(db->getHandle())) {
std::cerr << "mysql_fetch_row failed: " << mysql_error(db->getHandle()) << std::endl;
}
delete db;
m_log << "finished, waiting for the clients to complete ..." << std::endl;
campaignmanager.noMoreParameters();
m_log << "total"
<< " exp " << count_exp << " (" << count_exp_jobs << " jobs)"
<< " known " << count_known << " (" << count_known_jobs << " jobs)"
<< endl;
// collect results
#ifndef __puma
collect_thread.join();
#endif
finalize_results();
m_log << "done." << endl;
delete db_recv;
m_log << "results complete, terminating." << std::endl;
return true;
}
bool EcosKernelTestCampaign::add_experiment_ec(const std::string& variant, const std::string& benchmark,
address_t data_address, int instr1, int instr2, address_t instr_absolute)
{
if (check_available(variant, benchmark, data_address, instr2)) {
return false;
}
count_exp_jobs++;
if (ECOS_FAULTMODEL_BURST) {
count_exp++;
} else {
count_exp += 8;
}
// enqueue job
#if 1
EcosKernelTestExperimentData *d = new EcosKernelTestExperimentData;
d->msg.set_variant(variant);
d->msg.set_benchmark(benchmark);
d->msg.set_instr1_offset(instr1);
d->msg.set_instr2_offset(instr2);
d->msg.set_instr2_address(instr_absolute);
d->msg.set_mem_addr(data_address);
d->msg.set_faultmodel(ECOS_FAULTMODEL_BURST ? d->msg.BURST : d->msg.SINGLEBITFLIP);
campaignmanager.addParam(d);
#endif
return true;
}
bool EcosKernelTestCampaign::add_known_ec(const std::string& variant, const std::string& benchmark,
address_t data_address, int instr1, int instr2, address_t instr_absolute)
{
if (check_available(variant, benchmark, data_address, instr2)) {
return false;
}
count_known_jobs++;
if (ECOS_FAULTMODEL_BURST) {
count_known++;
} else {
count_known += 8;
}
#if 1
add_result(variant, benchmark, instr1, instr2, instr_absolute, data_address,
0, 8, // bitnr, bit_width
1, // resulttype
1, // ecos_test_result
99, // latest_ip
0, // error_corrected
"", // details
0 // runtime
);
#endif
return true;
}
bool EcosKernelTestCampaign::init_results()
{
// read already existing results
bool file_exists = false;
ifstream oldresults(filename_results().c_str(), ios::in);
if (oldresults.is_open()) {
file_exists = true;
char buf[16*1024];
std::string variant, benchmark;
unsigned ignore;
int instr2;
address_t data_address;
int bit_width;
int rowcount = 0;
int expcount = 0;
m_log << "scanning existing results ..." << endl;
while (oldresults.getline(buf, sizeof(buf)).good()) {
stringstream ss;
ss << buf;
ss >> hex >> variant >> benchmark >> ignore >> instr2 >> ignore
>> data_address >> ignore >> bit_width;
if (ss.fail()) {
continue;
}
++rowcount;
expcount += bit_width;
// TODO: sanity check (duplicates?)
available_results
[AvailableResultMap::key_type(variant, benchmark)]
[data_address].insert(instr2);
}
m_log << "found " << dec << expcount << " existing experiment results ("
<< rowcount << " CSV rows)" << endl;
oldresults.close();
}
// non-destructive: due to the CSV header we can always manually recover
// from an accident (append mode)
resultstream.open(filename_results().c_str(), ios::out | ios::app);
if (!resultstream.is_open()) {
m_log << "failed to open " << filename_results() << endl;
return false;
}
// only write CSV header if file didn't exist before
if (!file_exists) {
resultstream << "variant\tbenchmark\tec_instr1\tec_instr2\t"
"ec_instr2_absolute\tec_data_address\tbitnr\tbit_width\t"
"resulttype\tecos_test_result\tlatest_ip\t"
"error_corrected\tdetails\truntime" << endl;
}
return true;
}
bool EcosKernelTestCampaign::check_available(const std::string& variant, const std::string& benchmark,
address_t data_address, int instr2)
{
AvailableResultMap::const_iterator it_variant =
available_results.find(AvailableResultMap::key_type(variant, benchmark));
if (it_variant == available_results.end()) {
return false;
}
AvailableResultMap::mapped_type::const_iterator it_address =
it_variant->second.find(data_address);
if (it_address == it_variant->second.end()) {
return false;
}
AvailableResultMap::mapped_type::mapped_type::const_iterator it_instr =
it_address->second.find(instr2);
if (it_instr == it_address->second.end()) {
return false;
}
return true;
}
void EcosKernelTestCampaign::add_result(const std::string& variant, const std::string& benchmark,
int instr1, int instr2, address_t instr2_absolute, address_t ec_data_address,
void EcosKernelTestCampaign::add_result(unsigned pilot_id,
int instr2, address_t instr2_absolute, address_t ec_data_address,
int bitnr, int bit_width, int resulttype, int ecos_test_result, address_t latest_ip,
int error_corrected, const std::string& details, float runtime)
{
#ifndef __puma
boost::lock_guard<boost::mutex> guard(m_result_mutex);
#endif
resultstream << hex
<< variant << "\t"
<< benchmark << "\t"
<< instr1 << "\t"
<< instr2 << "\t"
<< instr2_absolute << "\t"
<< ec_data_address << "\t"
<< bitnr << "\t"
<< bit_width << "\t"
<< resulttype << "\t"
<< ecos_test_result << "\t"
<< latest_ip << "\t"
<< error_corrected << "\t"
<< details << "\t"
<< runtime << "\n";
//resultstream.flush(); // for debugging purposes
}
void EcosKernelTestCampaign::finalize_results()
{
resultstream.close();
std::stringstream ss;
ss << "INSERT DELAYED INTO " << m_result_table << " "
<< "(pilot_id, bitnr, bit_width, resulttype, ecos_test_result, latest_ip, error_corrected, details, runtime) VALUES "
<< "(" << pilot_id << "," << bitnr << "," << bit_width << "," << resulttype << "," << ecos_test_result << ","
<< latest_ip << "," << error_corrected << ",'" << details << "'," << runtime << ")";
// Database::query is protected by a mutex
db_recv->query(ss.str().c_str());
}
void EcosKernelTestCampaign::collect_results()
@ -526,7 +390,7 @@ void EcosKernelTestCampaign::collect_results()
bit_width++;
continue;
}
add_result(res->msg.variant(), res->msg.benchmark(), res->msg.instr1_offset(),
add_result(res->msg.pilot_id(),
res->msg.instr2_offset(), res->msg.instr2_address(), res->msg.mem_addr(),
first_bit, bit_width, prev_singleres->resulttype(), prev_singleres->ecos_test_result(),
prev_singleres->latest_ip(), prev_singleres->error_corrected(), prev_singleres->details(),
@ -541,7 +405,7 @@ void EcosKernelTestCampaign::collect_results()
bit_width = 8;
prev_singleres = &res->msg.result(0);
#endif
add_result(res->msg.variant(), res->msg.benchmark(), res->msg.instr1_offset(),
add_result(res->msg.pilot_id(),
res->msg.instr2_offset(), res->msg.instr2_address(), res->msg.mem_addr(),
first_bit, bit_width, prev_singleres->resulttype(), prev_singleres->ecos_test_result(),
prev_singleres->latest_ip(), prev_singleres->error_corrected(), prev_singleres->details(),

View File

@ -2,11 +2,13 @@
#include <string>
#include <fstream>
#include <mysql/mysql.h>
#ifndef __puma
#include <boost/thread.hpp>
#endif
#include "util/Database.hpp"
#include "cpn/Campaign.hpp"
#include "comm/ExperimentData.hpp"
#include "ecos_kernel_test.pb.h"
@ -20,33 +22,21 @@ public:
};
class EcosKernelTestCampaign : public fail::Campaign {
fail::Database *db;
fail::Database *db_recv;
int fspmethod_id;
static const std::string dir_images;
static const std::string dir_prerequisites;
static const std::string dir_results;
std::string m_result_table;
fail::Logger m_log;
int count_exp, count_exp_jobs;
int count_known, count_known_jobs;
bool add_experiment_ec(const std::string& variant, const std::string& benchmark,
fail::address_t data_address, int instr1, int instr2, fail::address_t instr_absolute);
bool add_known_ec(const std::string& variant, const std::string& benchmark,
fail::address_t data_address, int instr1, int instr2, fail::address_t instr_absolute);
bool init_results();
void add_result(const std::string& variant, const std::string& benchmark,
int instr1, int instr2, fail::address_t instr2_absolute, fail::address_t ec_data_address,
void add_result(unsigned pilot_id,
int instr2, fail::address_t instr2_absolute, fail::address_t ec_data_address,
int bitnr, int bit_width, int resulttype, int ecos_test_result, fail::address_t latest_ip,
int error_corrected, const std::string& details, float runtime);
void finalize_results();
void collect_results();
bool check_available(const std::string& variant, const std::string& benchmark, fail::address_t data_address, int instr2);
std::ofstream resultstream;
typedef std::map<std::pair<const std::string, const std::string>, std::map<fail::address_t, std::set<int> > > AvailableResultMap;
AvailableResultMap available_results;
#ifndef __puma
boost::mutex m_result_mutex;
#endif
public:
EcosKernelTestCampaign() : m_log("EcosKernelTest Campaign"),
count_exp(0), count_exp_jobs(0), count_known(0), count_known_jobs(0) {}
EcosKernelTestCampaign() : m_log("EcosKernelTest Campaign") {}
virtual bool run();
static bool readMemoryMap(fail::MemoryMap &mm, char const * const filename);
static bool writeTraceInfo(unsigned instr_counter, unsigned timeout, unsigned mem1_low, unsigned mem1_high, unsigned mem2_low, unsigned mem2_high, const std::string& variant = "", const std::string& benchmark = "");
@ -55,6 +45,5 @@ public:
static std::string filename_state(unsigned instr_offset, const std::string& variant = "", const std::string& benchmark = "");
static std::string filename_trace(const std::string& variant = "", const std::string& benchmark = "");
static std::string filename_traceinfo(const std::string& variant = "", const std::string& benchmark = "");
static std::string filename_results(const std::string& variant = "", const std::string& benchmark = "");
static std::string filename_elf(const std::string& variant = "", const std::string& benchmark = "");
};

View File

@ -7,8 +7,8 @@ message EcosKernelTestProtoMsg {
// benchmark
required string benchmark = 2;
// equivalence class start (for storage)
required int32 instr1_offset = 3;
// pilot ID (database)
required int32 pilot_id = 3;
// FI at #instructions from experiment start
required int32 instr2_offset = 4;
// the exact IP value at this point in time (from golden run)

View File

@ -27,7 +27,7 @@
#define LOCAL 0
#ifndef PREREQUISITES
#define PREREQUISITES 0 // 1: do step 0-2 ; 0: do step 3
#error Configure experimentInfo.hpp properly!
#endif
// create/use multiple snapshots to speed up long experiments
@ -35,8 +35,6 @@
#define MULTIPLE_SNAPSHOTS 0
#define MULTIPLE_SNAPSHOTS_DISTANCE 1000000
#define TIMER_GRANULARITY 10 // microseconds
#define VIDEOMEM_START 0xb8000
#define VIDEOMEM_SIZE (80*25*2 *2) // two text mode screens
#define VIDEOMEM_END (VIDEOMEM_START + VIDEOMEM_SIZE)
@ -61,7 +59,13 @@ using namespace fail;
#endif
#if PREREQUISITES
bool EcosKernelTestExperiment::retrieveGuestAddresses(guest_address_t addr_finish) {
bool EcosKernelTestExperiment::retrieveGuestAddresses(guest_address_t addr_finish, guest_address_t addr_data_start, guest_address_t addr_data_end) {
#if BASELINE_ASSESSMENT
log << "STEP 0: creating memory map spanning all of DATA and BSS" << endl;
MemoryMap mm;
mm.add(addr_data_start, addr_data_end - addr_data_start);
mm.writeToFile(EcosKernelTestCampaign::filename_memorymap(m_variant, m_benchmark).c_str());
#else
log << "STEP 0: record memory map with addresses of 'interesting' objects" << endl;
// run until func_finish is reached
@ -113,6 +117,7 @@ bool EcosKernelTestExperiment::retrieveGuestAddresses(guest_address_t addr_finis
// close serialized mm
mm.close();
#endif
return true;
}
@ -199,13 +204,8 @@ bool EcosKernelTestExperiment::performTrace(guest_address_t addr_entry, guest_ad
simulator.addListener(&ev_count);
unsigned instr_counter = 0;
// on the way, count elapsed time
TimerListener time_step(TIMER_GRANULARITY); //TODO: granularity?
//elapsed_time.setCounter(0xFFFFFFFFU); // not working for TimerListener
simulator.addListener(&time_step);
unsigned elapsed_time = 1; // always run 1 step
// just increase elapsed_time counter by 1, which serves as time for ECC recovery algorithm
++elapsed_time; // (this is a rough guess ... TODO)
// measure elapsed time
simtime_t time_start = simulator.getTimerTicks();
// on the way, record lowest and highest memory address accessed
MemAccessListener ev_mem(ANY_ADDR, MemAccessEvent::MEM_READWRITE);
@ -227,13 +227,6 @@ bool EcosKernelTestExperiment::performTrace(guest_address_t addr_entry, guest_ad
}
simulator.addListener(&ev_count);
}
else if(ev == &time_step) {
if(elapsed_time++ == 0xFFFFFFFFU) {
log << "ERROR: elapsed_time overflowed" << endl;
return false;
}
simulator.addListener(&time_step);
}
else if(ev == &ev_mem) {
unsigned lo = ev_mem.getTriggerAddress();
unsigned hi = lo + ev_mem.getTriggerWidth() - 1;
@ -252,16 +245,14 @@ bool EcosKernelTestExperiment::performTrace(guest_address_t addr_entry, guest_ad
ev = simulator.resume();
}
unsigned long long estimated_timeout_overflow_check = ((unsigned long long)elapsed_time) * time_step.getTimeout();
if(estimated_timeout_overflow_check > 0xFFFFFFFFU) {
log << "Timeout estimation overflowed" << endl;
return false;
}
unsigned estimated_timeout = (unsigned)estimated_timeout_overflow_check;
unsigned long long estimated_timeout_overflow_check =
simulator.getTimerTicks() - time_start + 10000;
unsigned estimated_timeout =
(unsigned) (estimated_timeout_overflow_check * 1000000 / simulator.getTimerTicksPerSecond());
log << dec << "tracing finished after " << instr_counter << " instructions" << endl;
log << hex << "all memory accesses within [0x" << mem1_low << ", 0x" << mem1_high << "] u [0x" << mem2_low << ", 0x" << mem2_high << "] (ignoring VGA mem)" << endl;
log << dec << "elapsed simulated time (plus safety margin): " << (estimated_timeout * TIMER_GRANULARITY / 1000000.0) << "s" << endl;
log << dec << "elapsed simulated time (plus safety margin): " << (estimated_timeout / 1000000.0) << "s" << endl;
// sanitize memory ranges
if (mem1_low > mem1_high) {
@ -296,12 +287,14 @@ bool EcosKernelTestExperiment::faultInjection() {
unsigned instr_counter, estimated_timeout, mem1_low, mem1_high, mem2_low, mem2_high;
// ELF symbol addresses
guest_address_t addr_entry, addr_finish, addr_test_output, addr_errors_corrected,
addr_panic, addr_text_start, addr_text_end;
addr_panic, addr_text_start, addr_text_end,
addr_data_start, addr_data_end;
BPSingleListener bp;
#if !LOCAL
for (int i = 0; i < 50 || (m_jc.getNumberOfUndoneJobs() != 0) ; ++i) { // only do 50 sequential experiments, to prevent swapping
for (int experiments = 0;
experiments < 500 || (m_jc.getNumberOfUndoneJobs() != 0); ) { // stop after ~500 experiments to prevent swapping
// 50 exp ~ 0.5GB RAM usage per instance (linearly increasing)
#endif
@ -335,7 +328,8 @@ bool EcosKernelTestExperiment::faultInjection() {
EcosKernelTestCampaign::readTraceInfo(instr_counter, estimated_timeout,
mem1_low, mem1_high, mem2_low, mem2_high, m_variant, m_benchmark);
readELFSymbols(addr_entry, addr_finish, addr_test_output,
addr_errors_corrected, addr_panic, addr_text_start, addr_text_end);
addr_errors_corrected, addr_panic, addr_text_start, addr_text_end,
addr_data_start, addr_data_end);
int state_instr_offset = instr_offset - (instr_offset % MULTIPLE_SNAPSHOTS_DISTANCE);
string statename;
@ -356,6 +350,8 @@ bool EcosKernelTestExperiment::faultInjection() {
// for each job with the SINGLEBITFLIP fault model we're actually doing *8*
// experiments (one for each bit)
for (int bit_offset = 0; bit_offset < 8; ++bit_offset) {
++experiments;
// 8 results in one job
EcosKernelTestProtoMsg_Result *result = param.msg.add_result();
result->set_bit_offset(bit_offset);
@ -425,6 +421,9 @@ bool EcosKernelTestExperiment::faultInjection() {
continue;
}
if (param.msg.has_instr2_address()) {
log << "Absolute IP sanity check OK" << endl;
}
// --- aftermath ---
// possible outcomes:
@ -625,7 +624,9 @@ bool EcosKernelTestExperiment::readELFSymbols(
fail::guest_address_t& errors_corrected,
fail::guest_address_t& panic,
fail::guest_address_t& text_start,
fail::guest_address_t& text_end)
fail::guest_address_t& text_end,
fail::guest_address_t& data_start,
fail::guest_address_t& data_end)
{
ElfReader elfreader(EcosKernelTestCampaign::filename_elf(m_variant, m_benchmark).c_str());
entry = elfreader.getSymbol("cyg_start").getAddress();
@ -635,10 +636,13 @@ bool EcosKernelTestExperiment::readELFSymbols(
panic = elfreader.getSymbol("_Z9ecc_panicv").getAddress();
text_start = elfreader.getSymbol("_stext").getAddress();
text_end = elfreader.getSymbol("_etext").getAddress();
data_start = elfreader.getSymbol("__ram_data_start").getAddress();
data_end = elfreader.getSymbol("__bss_end").getAddress();
// it's OK if errors_corrected or ecc_panic are missing
if (entry == ADDR_INV || finish == ADDR_INV || test_output == ADDR_INV ||
text_start == ADDR_INV || text_end == ADDR_INV) {
text_start == ADDR_INV || text_end == ADDR_INV ||
data_start == ADDR_INV || data_end == ADDR_INV) {
return false;
}
return true;
@ -680,36 +684,36 @@ bool EcosKernelTestExperiment::run()
parseOptions();
#endif
#if PREREQUISITES
#if PREREQUISITES
log << "retrieving ELF symbol addresses ..." << endl;
guest_address_t entry, finish, test_output, errors_corrected,
panic, text_start, text_end;
panic, text_start, text_end, data_start, data_end;
if (!readELFSymbols(entry, finish, test_output, errors_corrected,
panic, text_start, text_end)) {
panic, text_start, text_end, data_start, data_end)) {
log << "failed, essential symbols are missing!" << endl;
simulator.terminate(1);
}
// step 0
if(retrieveGuestAddresses(finish)) {
if (retrieveGuestAddresses(finish, data_start, data_end)) {
log << "STEP 0 finished: rebooting ..." << endl;
simulator.reboot();
} else { return false; }
// step 1
if(establishState(entry, finish, errors_corrected)) {
if (establishState(entry, finish, errors_corrected)) {
log << "STEP 1 finished: proceeding ..." << endl;
} else { return false; }
// step 2
if(performTrace(entry, finish)) {
if (performTrace(entry, finish)) {
log << "STEP 2 finished: terminating ..." << endl;
} else { return false; }
#else // !PREREQUISITES
#else // !PREREQUISITES
// step 3
faultInjection();
#endif // PREREQUISITES
#endif // PREREQUISITES
// Explicitly terminate, or the simulator will continue to run.
simulator.terminate();

View File

@ -16,7 +16,7 @@ public:
bool run();
void parseOptions();
bool retrieveGuestAddresses(fail::guest_address_t addr_finish); // step 0
bool retrieveGuestAddresses(fail::guest_address_t addr_finish, fail::guest_address_t addr_data_start, fail::guest_address_t addr_data_end); // step 0
bool establishState(fail::guest_address_t addr_entry, fail::guest_address_t addr_finish, fail::guest_address_t addr_errors_corrected); // step 1
bool performTrace(fail::guest_address_t addr_entry, fail::guest_address_t addr_finish); // step 2
bool faultInjection(); // step 3
@ -28,5 +28,7 @@ public:
fail::guest_address_t& errors_corrected,
fail::guest_address_t& panic,
fail::guest_address_t& text_start,
fail::guest_address_t& text_end);
fail::guest_address_t& text_end,
fail::guest_address_t& data_start,
fail::guest_address_t& data_end);
};

View File

@ -1,4 +1,5 @@
#pragma once
#define BASELINE_ASSESSMENT 1
#define PREREQUISITES 0
#define ECOS_FAULTMODEL_BURST 0

View File

@ -3,9 +3,14 @@
#include "cpn/CampaignManager.hpp"
#include "campaign.hpp"
#include "util/CommandLine.hpp"
int main(int argc, char **argv)
{
fail::CommandLine &cmd = fail::CommandLine::Inst();
for (int i = 1; i < argc; ++i)
cmd.add_args(argv[i]);
EcosKernelTestCampaign c;
if (fail::campaignmanager.runCampaign(&c)) {
return 0;

View File

@ -30,5 +30,5 @@ target_link_libraries(fail-${EXPERIMENT_NAME} ${PROTOBUF_LIBRARY})
## This is the example's campaign server distributing experiment parameters
add_executable(${EXPERIMENT_NAME}-server main.cc)
target_link_libraries(${EXPERIMENT_NAME}-server fail-${EXPERIMENT_NAME} fail ${PROTOBUF_LIBRARY} ${Boost_THREAD_LIBRARY})
target_link_libraries(${EXPERIMENT_NAME}-server -Wl,--start-group fail-${EXPERIMENT_NAME} fail-sal fail-util fail-cpn fail-comm ${PROTOBUF_LIBRARY} ${Boost_THREAD_LIBRARY} -lmysqlclient -Wl,--end-group)
install(TARGETS ${EXPERIMENT_NAME}-server RUNTIME DESTINATION bin)

View File

@ -5,80 +5,16 @@
#include "experimentInfo.hpp"
#include "cpn/CampaignManager.hpp"
#include "util/Logger.hpp"
#include "util/ElfReader.hpp"
#include "util/ProtoStream.hpp"
#include "sal/SALConfig.hpp"
//#if COOL_FAULTSPACE_PRUNING
//#include "../plugins/tracing/TracingPlugin.hpp"
//char const * const trace_filename = "trace.pb";
//#endif
using namespace std;
using namespace fail;
using namespace google::protobuf;
char const * const results_csv = "kesorefs.csv";
bool KesoRefCampaign::run()
{
Logger log("KesoRefCampaign");
ElfReader elf;
ifstream test(results_csv);
if (test.is_open()) {
log << results_csv << " already exists" << endl;
return false;
}
ofstream results(results_csv);
if (!results.is_open()) {
log << "failed to open " << results_csv << endl;
return false;
}
address_t injip = elf.getSymbol("c23_PersistentDetectorScopeEntry_m5_run").getAddress();
address_t rambase = elf.getSymbol("__CIAO_APPDATA_cdx_det__heap").getAddress();
// address_t ramend = rambase + 0x80000;
address_t ramend = rambase + 4;
cout << "ramend: " << hex << ramend << endl;
log << "startup, injecting ram @ " << hex << rambase << endl;
int count = 0;
for (address_t ram_address = rambase; ram_address < ramend ; ram_address += 4) {
for (int bit_offset = 23; bit_offset < 24; ++bit_offset) {
KesoRefExperimentData *d = new KesoRefExperimentData;
d->msg.set_pc_address(injip);
d->msg.set_ram_address(ram_address);
d->msg.set_bit_offset(bit_offset);
campaignmanager.addParam(d);
++count;
}
}
campaignmanager.noMoreParameters();
log << "done enqueueing parameter sets (" << dec << count << ")." << endl;
// collect results
KesoRefExperimentData *res;
int rescount = 0;
results << "injection_ip\tram_address\tbit_offset\tresulttype\toriginal_value\tdetails" << endl;
while ((res = static_cast<KesoRefExperimentData *>(campaignmanager.getDone()))) {
rescount++;
results
<< "0x" << hex << res->msg.pc_address() << "\t"
<< "0x" << hex << res->msg.ram_address() << "\t"
<< dec << res->msg.bit_offset() << "\t"
<< res->msg.resulttype() << "\t"
<< res->msg.original_value() << "\t"
<< res->msg.details() << "\n";
delete res;
}
log << "done. sent " << count << " received " << rescount << endl;
results.close();
return true;
void KesoRefCampaign::cb_send_pilot(DatabaseCampaignMessage pilot) {
KesoRefExperimentData *data = new KesoRefExperimentData;
data->msg.mutable_fsppilot()->CopyFrom(pilot);
campaignmanager.addParam(data);
}

View File

@ -1,19 +1,24 @@
#ifndef __KESOREFCAMPAIGN_HPP__
#define __KESOREFCAMPAIGN_HPP__
#include "cpn/Campaign.hpp"
#include "cpn/DatabaseCampaign.hpp"
#include "comm/ExperimentData.hpp"
#include <google/protobuf/descriptor.h>
#include "kesoref.pb.h"
class KesoRefExperimentData : public fail::ExperimentData {
public:
KesoRefProtoMsg msg;
KesoRefExperimentData() : fail::ExperimentData(&msg) {}
};
class KesoRefCampaign : public fail::Campaign {
public:
virtual bool run();
class KesoRefCampaign : public fail::DatabaseCampaign {
virtual const google::protobuf::Descriptor * cb_result_message()
{ return google::protobuf::DescriptorPool::generated_pool()->FindMessageTypeByName("KesoRefProtoMsg"); }
virtual void cb_send_pilot(DatabaseCampaignMessage pilot);
};
#endif // __KESOREFCAMPAIGN_HPP__

View File

@ -28,9 +28,8 @@ using namespace fail;
#define SAFESTATE (1)
// Check if configuration dependencies are satisfied:
#if !defined(CONFIG_EVENT_BREAKPOINTS) || !defined(CONFIG_SR_RESTORE) || \
!defined(CONFIG_SR_SAVE)
#error This experiment needs: breakpoints, traps, save, and restore. Enable these in the configuration.
#if !defined(CONFIG_EVENT_BREAKPOINTS) || !defined(CONFIG_SR_RESTORE)
#error This experiment needs: breakpoints, traps, save, and restore. Enable these in the configuration.
#endif
//
//void KESOrefs::printEIP() {
@ -39,27 +38,28 @@ using namespace fail;
unsigned KESOrefs::injectBitFlip(address_t data_address, unsigned bitpos){
MemoryManager& mm = simulator.getMemoryManager();
unsigned value, injectedval;
MemoryManager& mm = simulator.getMemoryManager();
unsigned int value, injectedval;
mm.getBytes(data_address, 4, (void*)&value);
injectedval = value ^ (1<<bitpos);
mm.setBytes(data_address, 4, (void*)&injectedval);
value = mm.getByte(data_address);
injectedval = value ^ (1 << bitpos);
mm.setByte(data_address, injectedval);
m_log << "INJECTION at: 0x" << hex << setw(8) << setfill('0') << data_address;
cout << " value: 0x" << setw(8) << setfill('0') << value << " -> 0x" << setw(8) << setfill('0') << injectedval << endl;
m_log << "INJECTION at: 0x" << hex << setw(2) << setfill('0') << data_address
<< " value: 0x" << setw(2) << setfill('0') << value << " -> 0x" << setw(2) << setfill('0') << injectedval << endl;
return value;
return value;
}
void handleEvent(KesoRefExperimentData& param, KesoRefProtoMsg_ResultType restype, const std::string &msg){
cout << msg << endl;
param.msg.set_resulttype(restype);
param.msg.set_details(msg);
void handleEvent(KesoRefProtoMsg_Result& result, KesoRefProtoMsg_Result_ResultType restype, const std::string &msg) {
cout << msg << endl;
result.set_resulttype(restype);
result.set_details(msg);
}
void handleMemoryAccessEvent(KesoRefExperimentData& param, const fail::MemAccessListener& l_mem){
void handleMemoryAccessEvent(KesoRefProtoMsg_Result& result, const fail::MemAccessListener& l_mem){
stringstream sstr;
sstr << "mem access (";
switch (l_mem.getTriggerAccessType()) {
@ -75,128 +75,187 @@ void handleMemoryAccessEvent(KesoRefExperimentData& param, const fail::MemAccess
sstr << " ip @ 0x" << hex << l_mem.getTriggerInstructionPointer();
handleEvent(param, param.msg.MEMACCESS, sstr.str());
handleEvent(result, result.MEMACCESS, sstr.str());
}
bool KESOrefs::run()
{
m_dis.init();
//******* Boot, and store state *******//
m_log << "STARTING EXPERIMENT" << endl;
#if SAFESTATE // define SS (SafeState) when building: make -DSS
#warning "Building safe state variant"
m_log << "Booting, and saving state at main" << std::endl;
// m_elf.printSections();
// m_elf.printDemangled();
address_t minimal_ip = INT_MAX; // 1 Mbyte
address_t maximal_ip = 0;
address_t minimal_data = 0x100000; // 1 Mbyte
address_t maximal_data = 0;
simulator.terminate();
BPSingleListener bp;
// STEP 1: run until interesting function starts, and save state
bp.setWatchInstructionPointer(m_elf.getSymbol("main").getAddress());
if(simulator.addListenerAndResume(&bp) == &bp){
m_log << "main function entry reached, saving state" << endl;
}
for (ElfReader::section_iterator it = m_elf.sec_begin();
it != m_elf.sec_end(); ++it) {
const ElfSymbol &symbol = *it;
std::string prefix(".text");
if (symbol.getName().compare(0, prefix.size(), prefix) == 0) {
minimal_ip = std::min(minimal_ip, symbol.getStart());
maximal_ip = std::max(maximal_ip, symbol.getEnd());
} else {
minimal_data = std::min(minimal_data, symbol.getStart());
maximal_data = std::max(maximal_data, symbol.getEnd());
}
}
simulator.save("keso.state");
simulator.terminate();
#else
std::cout << "Code section from " << hex << minimal_ip << " to " << maximal_ip << std::endl;
std::cout << "Whole programm section from " << hex << minimal_data << " to " << maximal_data << std::endl;
//******* Fault injection *******//
#warning "Building restore state variant"
for (int experiment_count = 0; experiment_count < 200 || (m_jc.getNumberOfUndoneJobs() != 0) ; ) { // only do 200 sequential experiments, to prevent swapping
m_log << "asking jobserver for parameters" << endl;
KesoRefExperimentData param;
if(!m_jc.getParam(param)){
m_log << "Dying." << endl; // We were told to die.
simulator.terminate(1);
}
// m_dis.init();
//******* Boot, and store state *******//
m_log << "STARTING EXPERIMENT" << endl;
// Get input data from Jobserver
address_t injectionPC = param.msg.pc_address();
address_t data_address = param.msg.ram_address();
unsigned bitpos = param.msg.bit_offset();
unsigned executed_jobs = 0;
simulator.restore("keso.state");
// Goto injection point
BPSingleListener injBP;
m_log << "Trying to inject @ " << hex << m_elf.getSymbol(injectionPC).getAddress() << endl;
// Setup exit points
const ElfSymbol &s_error = m_elf.getSymbol("keso_throw_error");
BPSingleListener l_error(s_error.getAddress());
const ElfSymbol &s_nullp = m_elf.getSymbol("keso_throw_nullpointer");
BPSingleListener l_nullp(s_nullp.getAddress());
const ElfSymbol &s_parity = m_elf.getSymbol("keso_throw_parity");
BPSingleListener l_parity(s_parity.getAddress());
const ElfSymbol &s_oobounds = m_elf.getSymbol("keso_throw_index_out_of_bounds");
BPSingleListener l_oobounds(s_oobounds.getAddress());
BPSingleListener l_dump(m_elf.getSymbol("c17_Main_m4_dumpResults_console").getAddress());
injBP.setWatchInstructionPointer(injectionPC);
MemAccessListener l_mem_text(minimal_ip, MemAccessEvent::MEM_WRITE);
l_mem_text.setWatchWidth(maximal_ip - minimal_ip);
simulator.addListenerAndResume(&injBP);
/// INJECT BITFLIP:
param.msg.set_original_value(injectBitFlip(data_address, bitpos));
MemAccessListener l_mem_outerspace( maximal_data);
l_mem_outerspace.setWatchWidth(0xfffffff0);
TrapListener l_trap(ANY_TRAP);
// Setup exit points
BPSingleListener l_error(m_elf.getSymbol("keso_throw_error").getAddress());
BPSingleListener l_nullp(m_elf.getSymbol("keso_throw_nullpointer").getAddress());
BPSingleListener l_parity(m_elf.getSymbol("keso_throw_parity").getAddress());
BPSingleListener l_oobounds(m_elf.getSymbol("keso_throw_index_out_of_bounds").getAddress());
BPSingleListener l_dump(m_elf.getSymbol("c17_Main_m4_dumpResults_console").getAddress());
TimerListener l_timeout(1000 * 1000); // 1 second in microseconds
ElfSymbol sym = m_elf.getSection(".text");
MemAccessListener l_mem_text(sym.getStart(), , AccessEvent::MEM_WRITE); l_mem_text.setWatchWidth(sym.getSize());
while (executed_jobs < 25 || m_jc.getNumberOfUndoneJobs() > 0) {
m_log << "asking jobserver for parameters" << endl;
KesoRefExperimentData param;
if(!m_jc.getParam(param)){
m_log << "Dying." << endl; // We were told to die.
simulator.terminate(1);
}
sym = m_elf.getSection(".text.cdx_det");
MemAccessListener l_mem_textcdx_det(sym.getStart(), MemAccessEvent::MEM_WRITE ); l_mem_textcdx_det.setWatchWidth(sym.getSize());
// Get input data from Jobserver
unsigned injection_instr = param.msg.fsppilot().injection_instr();
address_t data_address = param.msg.fsppilot().data_address();
sym = m_elf.getSection(".copy_sec");
MemAccessListener l_mem_outerspace( sym.getStart() ); l_mem_outerspace.setWatchWidth(0xfffffff0);
TrapListener l_trap(ANY_TRAP);
for (int bit_offset = 0; bit_offset < 8; ++bit_offset) {
// 8 results in one job
KesoRefProtoMsg_Result *result = param.msg.add_result();
result->set_bitoffset(bit_offset);
cout << " outerspace : " << l_mem_outerspace.getWatchWidth() << " --- @ :" << l_mem_outerspace.getWatchAddress() << endl;
simulator.addListener(&l_trap);
simulator.addListener(&l_error);
simulator.addListener(&l_nullp);
simulator.addListener(&l_oobounds);
simulator.addListener(&l_dump);
simulator.addListener(&l_parity);
simulator.addListener(&l_mem_text);
simulator.addListener(&l_mem_outerspace);
simulator.addListener(&l_mem_textcdx_det);
// resume and wait for results
fail::BaseListener* l = simulator.resume();
m_log << "restoring state" << endl;
// Restore to the image, which starts at address(main)
simulator.restore("state");
executed_jobs ++;
// Evaluate result
if(l == &l_error) {
handleEvent(param, param.msg.EXC_ERROR, "exc error");
m_log << "Trying to inject @ instr #" << dec << injection_instr << endl;
} else if ( l == &l_nullp ) {
handleEvent(param, param.msg.EXC_NULLPOINTER, "exc nullpointer");
} else if ( l == &l_oobounds ) {
handleEvent(param, param.msg.EXC_OOBOUNDS, "exc out of bounds");
if (injection_instr > 0) {
simulator.clearListeners();
// XXX could be improved with intermediate states (reducing runtime until injection)
simulator.addListener(&l_dump);
} else if (l == &l_dump) {
handleEvent(param, param.msg.CALCDONE, "calculation done");
BPSingleListener bp;
bp.setWatchInstructionPointer(ANY_ADDR);
bp.setCounter(injection_instr + 1);
simulator.addListener(&bp);
} else if (l == &l_parity) {
handleEvent(param, param.msg.EXC_PARITY, "exc parity");
bool inject = true;
while (1) {
fail::BaseListener * listener = simulator.resume();
// finish() before FI?
if (listener == &l_dump) {
m_log << "experiment reached finish() before FI" << endl;
handleEvent(*result, result->NOINJECTION, "time_marker reached before instr2");
inject = false;
break;
} else if (listener == &bp) {
break;
} else {
inject = false;
handleEvent(*result, result->NOINJECTION, "WTF");
break;
}
}
} else if (l == &l_trap) {
stringstream sstr;
sstr << "trap #" << l_trap.getTriggerNumber();
handleEvent(param, param.msg.TRAP, sstr.str());
// Next experiment
if (!inject)
continue;
}
address_t injection_instr_absolute = param.msg.fsppilot().injection_instr_absolute();
if (simulator.getCPU(0).getInstructionPointer() != injection_instr_absolute) {
m_log << "Invalid Injection address EIP=0x"
<< std::hex << simulator.getCPU(0).getInstructionPointer()
<< " != 0x" << injection_instr_absolute << std::endl;
simulator.terminate(1);
}
} else if (l == &l_mem_text){
handleMemoryAccessEvent(param, l_mem_text);
/// INJECT BITFLIP:
result->set_original_value(injectBitFlip(data_address, bit_offset));
} else if (l == &l_mem_textcdx_det){
handleMemoryAccessEvent(param, l_mem_textcdx_det);
cout << " outerspace : " << l_mem_outerspace.getWatchWidth() << " --- @ :" << l_mem_outerspace.getWatchAddress() << endl;
simulator.clearListeners();
simulator.addListener(&l_trap);
if (s_error.isValid())
simulator.addListener(&l_error);
if (s_nullp.isValid())
simulator.addListener(&l_nullp);
if (s_oobounds.isValid())
simulator.addListener(&l_oobounds);
simulator.addListener(&l_dump);
if (s_parity.isValid())
simulator.addListener(&l_parity);
simulator.addListener(&l_mem_text);
simulator.addListener(&l_mem_outerspace);
simulator.addListener(&l_timeout);
m_log << "Resuming till the crash" << std::endl;
// resume and wait for results
fail::BaseListener* l = simulator.resume();
m_log << "CDX has ended" << std::endl;
} else if (l == &l_mem_outerspace){
handleMemoryAccessEvent(param, l_mem_outerspace);
// Evaluate result
if(l == &l_error) {
handleEvent(*result, result->EXC_ERROR, "exc error");
} else if ( l == &l_nullp ) {
handleEvent(*result, result->EXC_NULLPOINTER, "exc nullpointer");
} else {
handleEvent(param, param.msg.UNKNOWN, "UNKNOWN event");
}
simulator.clearListeners();
m_jc.sendResult(param);
} // end while (1)
// Explicitly terminate, or the simulator will continue to run.
#endif
simulator.terminate();
} else if ( l == &l_oobounds ) {
handleEvent(*result, result->EXC_OOBOUNDS, "exc out of bounds");
} else if (l == &l_dump) {
handleEvent(*result, result->CALCDONE, "calculation done");
} else if (l == &l_parity) {
handleEvent(*result, result->EXC_PARITY, "exc parity");
} else if (l == &l_timeout) {
handleEvent(*result, result->TIMEOUT, "1s");
} else if (l == &l_trap) {
stringstream sstr;
sstr << "trap #" << l_trap.getTriggerNumber();
handleEvent(*result, result->TRAP, sstr.str());
} else if (l == &l_mem_text){
handleMemoryAccessEvent(*result, l_mem_text);
} else if (l == &l_mem_outerspace){
handleMemoryAccessEvent(*result, l_mem_outerspace);
} else {
handleEvent(*result, result->UNKNOWN, "UNKNOWN event");
}
simulator.clearListeners();
}
m_jc.sendResult(param);
}
// Explicitly terminate, or the simulator will continue to run.
simulator.terminate();
}

View File

@ -1,27 +1,26 @@
message KesoRefProtoMsg {
// parameters
required int32 pc_address = 1;
required int32 ram_address = 2;
required int32 bit_offset = 3;
import "DatabaseCampaignMessage.proto";
// results
// make these optional to reduce overhead for server->client communication
enum ResultType {
CALCDONE = 1;
TIMEOUT = 2;
TRAP = 3;
EXC_ERROR = 4;
EXC_PARITY = 5;
EXC_NULLPOINTER = 6;
EXC_OOBOUNDS = 7;
MEMACCESS = 8;
UNKNOWN = 9;
message KesoRefProtoMsg {
required DatabaseCampaignMessage fsppilot = 1;
repeated group Result = 2 {
// make these optional to reduce overhead for server->client communication
enum ResultType {
CALCDONE = 1;
TIMEOUT = 2;
TRAP = 3;
EXC_ERROR = 4;
EXC_PARITY = 5;
EXC_NULLPOINTER = 6;
EXC_OOBOUNDS = 7;
MEMACCESS = 8;
NOINJECTION = 9;
UNKNOWN = 10;
}
// result type, see above
required ResultType resulttype = 4;
required uint32 original_value = 5;
required uint32 bitoffset = 6 [(sql_primary_key) = true];
optional string details = 7;
}
// result type, see above
optional ResultType resulttype = 4;
optional uint32 original_value = 5;
// did ECC correct the fault?
//optional int32 error_corrected = 6;
// optional textual description of what happened
optional string details = 7;
}

View File

@ -2,10 +2,15 @@
#include <cstdlib>
#include "cpn/CampaignManager.hpp"
#include "util/CommandLine.hpp"
#include "campaign.hpp"
int main(int argc, char **argv)
{
fail::CommandLine &cmd = fail::CommandLine::Inst();
for (int i = 1; i < argc; ++i)
cmd.add_args(argv[i]);
KesoRefCampaign c;
if (fail::campaignmanager.runCampaign(&c)) {
return 0;