diff --git a/src/experiments/ecos_kernel_test/CMakeLists.txt b/src/experiments/ecos_kernel_test/CMakeLists.txt new file mode 100644 index 00000000..9fd21905 --- /dev/null +++ b/src/experiments/ecos_kernel_test/CMakeLists.txt @@ -0,0 +1,33 @@ +set(EXPERIMENT_NAME ecos_kernel_test) +set(EXPERIMENT_TYPE EcosKernelTestExperiment) +configure_file(../instantiate-experiment.ah.in + ${CMAKE_CURRENT_BINARY_DIR}/instantiate-${EXPERIMENT_NAME}.ah @ONLY +) + +## Setup desired protobuf descriptions HERE ## +set(MY_PROTOS + ecos_kernel_test.proto +) + +set(MY_CAMPAIGN_SRCS + experiment.hpp + experiment.cc + campaign.hpp + campaign.cc +) + +#### PROTOBUFS #### +find_package(Protobuf REQUIRED) +include_directories(${PROTOBUF_INCLUDE_DIRS}) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS ${MY_PROTOS}) + +## Build library +add_library(${EXPERIMENT_NAME} ${PROTO_SRCS} ${PROTO_HDRS} ${MY_CAMPAIGN_SRCS}) +add_dependencies(${EXPERIMENT_NAME} tracing) + +## This is the example's campaign server distributing experiment parameters +add_executable(${EXPERIMENT_NAME}-server main.cc) +target_link_libraries(${EXPERIMENT_NAME}-server ${EXPERIMENT_NAME} fail ${PROTOBUF_LIBRARY} ${Boost_THREAD_LIBRARY}) +install(TARGETS ${EXPERIMENT_NAME}-server RUNTIME DESTINATION bin) diff --git a/src/experiments/ecos_kernel_test/campaign.cc b/src/experiments/ecos_kernel_test/campaign.cc new file mode 100644 index 00000000..0e16fd96 --- /dev/null +++ b/src/experiments/ecos_kernel_test/campaign.cc @@ -0,0 +1,430 @@ +#include +#include +#include +#include + +#include + +#include "campaign.hpp" +#include "experimentInfo.hpp" +#include "cpn/CampaignManager.hpp" +#include "util/Logger.hpp" +#include "util/ProtoStream.hpp" +#include "util/MemoryMap.hpp" + +#include "ecc_region.hpp" + +#include "../plugins/tracing/TracingPlugin.hpp" + +//#define PRUNING_DEBUG_OUTPUT + +using namespace std; +using namespace fail; + +char const * const trace_filename = "trace.tc"; +char const * const results_filename = "ecos_kernel_test.csv"; + +// equivalence class type: addr, [i1, i2] +// addr: byte to inject a bit-flip into +// [i1, i2]: interval of instruction numbers, counted from experiment +// begin +struct equivalence_class { + address_t data_address; + int instr1, instr2; + address_t instr2_absolute; // FIXME we could record them all here +}; + +bool EcosKernelTestCampaign::run() +{ + Logger log("EcosKernelTest Campaign"); + + // non-destructive: due to the CSV header we can always manually recover + // from an accident (append mode) + ofstream results(results_filename, ios::out | ios::app); + if (!results.is_open()) { + log << "failed to open " << results_filename << endl; + return false; + } + + log << "startup" << endl; + + boost::timer t; + + // load trace + ifstream tracef(trace_filename); + if (tracef.fail()) { + log << "couldn't open " << trace_filename << endl; + return false; + } + ProtoIStream ps(&tracef); + + // a map of addresses of ECC protected objects + MemoryMap mm; + for (unsigned i = 0; i < sizeof(memoryMap)/sizeof(*memoryMap); ++i) { + mm.add(memoryMap[i][0], memoryMap[i][1]); + } + + // set of equivalence classes that need one (rather: eight, one for + // each bit in that byte) experiment to determine them all + vector ecs_need_experiment; + // set of equivalence classes that need no experiment, because we know + // they'd be identical to the golden run + vector ecs_no_effect; + +#if 0 + equivalence_class current_ec; + + // map for efficient access when results come in + map experiment_ecs; + // experiment count + int count = 0; + + // XXX do it the other way around: iterate over trace, search addresses + // -> one "open" EC for every address + // for every injection address ... + for (MemoryMap::iterator it = mm.begin(); it != mm.end(); ++it) { + //cerr << "."; + address_t data_address = *it; + current_ec.instr1 = 0; + int instr = 0; + address_t instr_absolute = 0; // FIXME this one probably should also be recorded ... + Trace_Event ev; + ps.reset(); + + // for every section in the trace between subsequent memory + // accesses to that address ... + while (ps.getNext(&ev) && instr < OOSTUBS_NUMINSTR) { + // instruction events just get counted + if (!ev.has_memaddr()) { + // new instruction + instr++; + instr_absolute = ev.ip(); + continue; + + // skip accesses to other data + // FIXME again, do it the other way around, and use mm.isMatching()! + } else if (ev.memaddr() + ev.width() <= data_address + || ev.memaddr() > data_address) { + continue; + + // skip zero-sized intervals: these can + // occur when an instruction accesses a + // memory location more than once + // (e.g., INC, CMPXCHG) + } else if (current_ec.instr1 > instr) { + continue; + } + + // we now have an interval-terminating R/W + // event to the memaddr we're currently looking + // at: + + // complete the equivalence interval + current_ec.instr2 = instr; + current_ec.instr2_absolute = instr_absolute; + current_ec.data_address = data_address; + + if (ev.accesstype() == ev.READ) { + // a sequence ending with READ: we need + // to do one experiment to cover it + // completely + ecs_need_experiment.push_back(current_ec); +#ifdef PRUNING_DEBUG_OUTPUT + cerr << dec << "EX " << current_ec.instr1 << " " << current_ec.instr2 << " " << current_ec.data_address << "\n"; +#endif + + // instantly enqueue job: that way the job clients can already + // start working in parallel + EcosKernelTestExperimentData *d = new EcosKernelTestExperimentData; + // we pick the rightmost instruction in that interval + d->msg.set_instr_offset(current_ec.instr2); + d->msg.set_instr_address(current_ec.instr2_absolute); + d->msg.set_mem_addr(current_ec.data_address); + + // store index into ecs_need_experiment + experiment_ecs[d] = ecs_need_experiment.size() - 1; + + campaignmanager.addParam(d); + ++count; + } else if (ev.accesstype() == ev.WRITE) { + // a sequence ending with WRITE: an + // injection anywhere here would have + // no effect. + ecs_no_effect.push_back(current_ec); +#ifdef PRUNING_DEBUG_OUTPUT + cerr << dec << "NE " << current_ec.instr1 << " " << current_ec.instr2 << " " << current_ec.data_address << "\n"; +#endif + } else { + log << "WAT" << endl; + } + + // next interval must start at next + // instruction; the aforementioned + // skipping mechanism wouldn't work + // otherwise + current_ec.instr1 = instr + 1; + } + + // close the last interval: + // Why -1? In most cases it does not make sense to inject before the + // very last instruction, as we won't execute it anymore. This *only* + // makes sense if we also inject into parts of the result vector. This + // is not the case in this experiment, and with -1 we'll get a + // result comparable to the non-pruned campaign. + // XXX still true for checksum-oostubs? + current_ec.instr2 = instr - 1; + current_ec.instr2_absolute = 0; // unknown + current_ec.data_address = data_address; + // zero-sized? skip. + if (current_ec.instr1 > current_ec.instr2) { + continue; + } + // as the experiment ends, this byte is a "don't care": + ecs_no_effect.push_back(current_ec); +#ifdef PRUNING_DEBUG_OUTPUT + cerr << dec << "NE " << current_ec.instr1 << " " << current_ec.instr2 << " " << current_ec.data_address << "\n"; +#endif + } +#else + // map for efficient access when results come in + map experiment_ecs; + // map for keeping one "open" EC for every address + map open_ecs; + // experiment count + int count = 0; + + // instruction counter within trace + int instr = 0; + + // fill open_ecs with one EC for every address + for (MemoryMap::iterator it = mm.begin(); it != mm.end(); ++it) { + open_ecs[*it].instr1 = instr; + } + + // absolute address of current trace instruction + address_t instr_absolute = 0; // FIXME this one probably should also be recorded ... + + Trace_Event ev; + // for every event in the trace ... + while (ps.getNext(&ev) && instr < ECOS_NUMINSTR) { + // instruction events just get counted + if (!ev.has_memaddr()) { + // new instruction + instr++; + instr_absolute = ev.ip(); + continue; + } + + // for each single byte in this memory access ... + for (address_t data_address = ev.memaddr(); data_address < ev.memaddr() + ev.width(); + ++data_address) { + // skip accesses to data outside our map of interesting addresses + map::iterator current_ec_it; + if ((current_ec_it = open_ecs.find(data_address)) == open_ecs.end()) { + continue; + } + equivalence_class& current_ec = current_ec_it->second; + + // skip zero-sized intervals: these can occur when an instruction + // accesses a memory location more than once (e.g., INC, CMPXCHG) + if (current_ec.instr1 > instr) { + continue; + } + + // we now have an interval-terminating R/W event to the memaddr + // we're currently looking at: + + // complete the equivalence interval + current_ec.instr2 = instr; + current_ec.instr2_absolute = instr_absolute; + current_ec.data_address = data_address; + + if (ev.accesstype() == ev.READ) { + // a sequence ending with READ: we need to do one experiment to + // cover it completely + ecs_need_experiment.push_back(current_ec); +#ifdef PRUNING_DEBUG_OUTPUT + cerr << dec << "EX " << current_ec.instr1 << " " << current_ec.instr2 << " " << current_ec.data_address << "\n"; +#endif + + // instantly enqueue job: that way the job clients can already + // start working in parallel + EcosKernelTestExperimentData *d = new EcosKernelTestExperimentData; + // we pick the rightmost instruction in that interval + d->msg.set_instr_offset(current_ec.instr2); + d->msg.set_instr_address(current_ec.instr2_absolute); + d->msg.set_mem_addr(current_ec.data_address); + + // store index into ecs_need_experiment + experiment_ecs[d] = ecs_need_experiment.size() - 1; + + campaignmanager.addParam(d); + ++count; + } else if (ev.accesstype() == ev.WRITE) { + // a sequence ending with WRITE: an injection anywhere here + // would have no effect. + ecs_no_effect.push_back(current_ec); +#ifdef PRUNING_DEBUG_OUTPUT + cerr << dec << "NE " << current_ec.instr1 << " " << current_ec.instr2 << " " << current_ec.data_address << "\n"; +#endif + } else { + log << "WAT" << endl; + } + + // next interval must start at next instruction; the aforementioned + // skipping mechanism wouldn't work otherwise + current_ec.instr1 = instr + 1; + } + } + + // close all open intervals (right end of the fault-space) + for (map::iterator current_ec_it = open_ecs.begin(); + current_ec_it != open_ecs.end(); ++current_ec_it) { + address_t data_address = current_ec_it->first; + equivalence_class& current_ec = current_ec_it->second; + + // Why -1? In most cases it does not make sense to inject before the + // very last instruction, as we won't execute it anymore. This *only* + // makes sense if we also inject into parts of the result vector. This + // is not the case in this experiment, and with -1 we'll get a result + // comparable to the non-pruned campaign. + // XXX still true for checksum-oostubs? + + current_ec.instr2 = instr - 1; + current_ec.instr2_absolute = 0; // unknown + current_ec.data_address = data_address; + + // zero-sized? skip. + if (current_ec.instr1 > current_ec.instr2) { + continue; + } + +#if 0 + // the run continues after the FI window, so do this experiment + // XXX this creates at least one experiment for *every* bit! + // fix: full trace, limited FI window + ecs_need_experiment.push_back(current_ec); +#ifdef PRUNING_DEBUG_OUTPUT + cerr << dec << "EX " << current_ec.instr1 << " " << current_ec.instr2 << " " << current_ec.data_address << "\n"; +#endif + + // FIXME copy/paste, encapsulate this: + // instantly enqueue job: that way the job clients can already start + // working in parallel + EcosKernelTestExperimentData *d = new EcosKernelTestExperimentData; + // we pick the rightmost instruction in that interval + d->msg.set_instr_offset(current_ec.instr2); + //d->msg.set_instr_address(current_ec.instr2_absolute); // unknown! + d->msg.set_mem_addr(current_ec.data_address); + + // store index into ecs_need_experiment + experiment_ecs[d] = ecs_need_experiment.size() - 1; + + campaignmanager.addParam(d); + ++count; +#else + // as the experiment ends, this byte is a "don't care": + ecs_no_effect.push_back(current_ec); +#ifdef PRUNING_DEBUG_OUTPUT + cerr << dec << "NE " << current_ec.instr1 << " " << current_ec.instr2 << " " << current_ec.data_address << "\n"; +#endif +#endif + } + // conserve some memory + open_ecs.clear(); +#endif + + campaignmanager.noMoreParameters(); + log << "done enqueueing parameter sets (" << count << ")." << endl; + + log << "equivalence classes generated:" + << " need_experiment = " << ecs_need_experiment.size() + << " no_effect = " << ecs_no_effect.size() << endl; + + // statistics + unsigned long num_dumb_experiments = 0; + for (vector::const_iterator it = ecs_need_experiment.begin(); + it != ecs_need_experiment.end(); ++it) { + num_dumb_experiments += (*it).instr2 - (*it).instr1 + 1; + } + for (vector::const_iterator it = ecs_no_effect.begin(); + it != ecs_no_effect.end(); ++it) { + num_dumb_experiments += (*it).instr2 - (*it).instr1 + 1; + } + log << "pruning: reduced " << num_dumb_experiments * 8 << + " experiments to " << ecs_need_experiment.size() * 8 << endl; + + // CSV header + results << "ec_instr1\tec_instr2\tec_instr2_absolute\tec_data_address\tbitnr\tbit_width\tresulttype\tecos_test_result\tfinish_reached\tlatest_ip\terror_corrected\tdetails" << endl; + + // store no-effect "experiment" results + for (vector::const_iterator it = ecs_no_effect.begin(); + it != ecs_no_effect.end(); ++it) { + results + << (*it).instr1 << "\t" + << (*it).instr2 << "\t" + << (*it).instr2_absolute << "\t" // incorrect in all but one case! + << (*it).data_address << "\t" + << "0\t" // this entry starts at bit 0 ... + << "8\t" // ... and is 8 bits wide + << "1\t" + << "1\t" // dummy value (PASS): we didn't do any real experiments + << "1\t" + << "99\t" // dummy value: we didn't do any real experiments + << "0\t\n"; + } + + // collect results + EcosKernelTestExperimentData *res; + int rescount = 0; + while ((res = static_cast(campaignmanager.getDone()))) { + rescount++; + + map::iterator it = + experiment_ecs.find(res); + if (it == experiment_ecs.end()) { + results << "WTF, didn't find res!" << endl; + log << "WTF, didn't find res!" << endl; + continue; + } + equivalence_class &ec = ecs_need_experiment[it->second]; + + // sanity check + if (ec.instr2 != res->msg.instr_offset()) { + results << "ec.instr2 != instr_offset" << endl; + log << "ec.instr2 != instr_offset" << endl; + } + if (res->msg.result_size() != 8) { + results << "result_size " << res->msg.result_size() + << " instr2 " << ec.instr2 + << " data_address " << ec.data_address << endl; + log << "result_size " << res->msg.result_size() << endl; + } + + // one job contains 8 experiments + for (int idx = 0; idx < res->msg.result_size(); ++idx) { + results + // repeated for all single experiments: + << ec.instr1 << "\t" + << ec.instr2 << "\t" + << ec.instr2_absolute << "\t" + << ec.data_address << "\t" + // individual results: + << res->msg.result(idx).bit_offset() << "\t" + << "1\t" // 1 bit wide + << res->msg.result(idx).resulttype() << "\t" + << res->msg.result(idx).ecos_test_result() << "\t" + << res->msg.result(idx).finish_reached() << "\t" + << res->msg.result(idx).latest_ip() << "\t" + << res->msg.result(idx).error_corrected() << "\t" + << res->msg.result(idx).details() << "\n"; + } + //delete res; // currently racy if jobs are reassigned + + } + results.close(); + log << "done. sent " << count << " received " << rescount << endl; + log << "elapsed: " << t.elapsed() << "s" << endl; + + return true; +} diff --git a/src/experiments/ecos_kernel_test/campaign.hpp b/src/experiments/ecos_kernel_test/campaign.hpp new file mode 100644 index 00000000..5dee77f6 --- /dev/null +++ b/src/experiments/ecos_kernel_test/campaign.hpp @@ -0,0 +1,17 @@ +#pragma once + +#include "cpn/Campaign.hpp" +#include "comm/ExperimentData.hpp" +#include "ecos_kernel_test.pb.h" + +class EcosKernelTestExperimentData : public fail::ExperimentData { +public: + EcosKernelTestProtoMsg msg; + EcosKernelTestExperimentData() : fail::ExperimentData(&msg) {} +}; + +class EcosKernelTestCampaign : public fail::Campaign { +public: + virtual bool run(); +}; + diff --git a/src/experiments/ecos_kernel_test/ecc_region.hpp b/src/experiments/ecos_kernel_test/ecc_region.hpp new file mode 100644 index 00000000..eb74ead1 --- /dev/null +++ b/src/experiments/ecos_kernel_test/ecc_region.hpp @@ -0,0 +1,82 @@ +// generated from STEP 0 output with region2array.sh +static const unsigned memoryMap[][2] = { +{0x9bec, 4}, +{0xade0, 4}, +{0xade4, 4}, +{0xade8, 4}, +{0xadf0, 4}, +{0xadf4, 4}, +{0xadf8, 4}, +{0xadfc, 4}, +{0xae00, 4}, +{0xae04, 4}, +{0xae08, 4}, +{0xae0c, 4}, +{0xae10, 4}, +{0xae14, 2}, +{0xae44, 4}, +{0xae48, 4}, +{0xae4c, 24}, +{0xae64, 4}, +{0xae68, 4}, +{0xae6c, 4}, +{0xae70, 4}, +{0xae74, 4}, +{0xae7c, 4}, +{0xae80, 4}, +{0xae84, 4}, +{0xae88, 4}, +{0xae8c, 4}, +{0xae90, 4}, +{0xae94, 4}, +{0xae98, 4}, +{0xae9c, 4}, +{0xaea0, 2}, +{0xaed0, 4}, +{0xaed4, 4}, +{0xaed8, 24}, +{0xaef0, 4}, +{0xaef4, 4}, +{0xca48, 4}, +{0xca60, 4}, +{0xca64, 4}, +{0xca68, 4}, +{0xca70, 4}, +{0xca74, 4}, +{0xca78, 4}, +{0xca7c, 4}, +{0xca80, 4}, +{0xca84, 4}, +{0xca88, 4}, +{0xca8c, 4}, +{0xca90, 4}, +{0xca94, 2}, +{0xcac4, 4}, +{0xcac8, 4}, +{0xcacc, 24}, +{0xcae4, 4}, +{0xcae8, 4}, +{0xd314, 4}, +{0xd318, 4}, +{0xd320, 4}, +{0xd324, 128}, +{0xd3a4, 4}, +{0xd3c0, 4}, +{0xd3c4, 4}, +{0xd3c8, 4}, +{0xd3d0, 4}, +{0xd3d4, 4}, +{0xd3d8, 4}, +{0xd3dc, 4}, +{0xd3e0, 4}, +{0xd3e4, 4}, +{0xd3e8, 4}, +{0xd3ec, 4}, +{0xd3f0, 4}, +{0xd3f4, 2}, +{0xd424, 4}, +{0xd428, 4}, +{0xd42c, 24}, +{0xd444, 4}, +{0xd448, 4}, +}; diff --git a/src/experiments/ecos_kernel_test/ecos_kernel_test.proto b/src/experiments/ecos_kernel_test/ecos_kernel_test.proto new file mode 100644 index 00000000..7ae77546 --- /dev/null +++ b/src/experiments/ecos_kernel_test/ecos_kernel_test.proto @@ -0,0 +1,57 @@ +message EcosKernelTestProtoMsg { + // Input: experiment parameters + // (client executes 8 experiments, one for each bit at mem_addr) + + // FI at #instructions from experiment start + required int32 instr_offset = 1; + // the exact IP value at this point in time (from golden run) + optional int32 instr_address = 2; // for sanity checks + // address of the byte to inject bit-flips + required int32 mem_addr = 3; + + // ---------------------------------------------------- + + // Output: experiment results + + // IP where we did the injection: for debugging purposes, must be identical + // to instr_address + optional int32 injection_ip = 4; + + repeated group Result = 5 { + // single experiment bit offset + required int32 bit_offset = 1; + + // result type: + // FINISHED = planned number of instructions were executed + // TRAP = premature guest "crash" + // OUTSIDE = IP left text segment + enum ResultType { + FINISHED = 1; + TRAP = 2; + OUTSIDE = 3; + DETECTED = 4; + TIMEOUT = 5; + UNKNOWN = 6; + } + required ResultType resulttype = 2; + + // especially interesting for TRAP/UNKNOWN: latest IP + required uint32 latest_ip = 3; + + // test results + enum EcosTestResultType { + PASS = 1; + FAIL = 2; + } + required EcosTestResultType ecos_test_result = 4; + + // was finish() ever reached? + optional bool finish_reached = 5; + + // did ECC correct the fault? + optional int32 error_corrected = 6; + + // optional textual description of what happened + optional string details = 7; + } +} diff --git a/src/experiments/ecos_kernel_test/experiment.cc b/src/experiments/ecos_kernel_test/experiment.cc new file mode 100644 index 00000000..02ce2ded --- /dev/null +++ b/src/experiments/ecos_kernel_test/experiment.cc @@ -0,0 +1,364 @@ +#include +#include + +// getpid +#include +#include + +#include "util/Logger.hpp" +#include "experiment.hpp" +#include "experimentInfo.hpp" +#include "campaign.hpp" +#include "sal/SALConfig.hpp" +#include "sal/SALInst.hpp" +#include "sal/Memory.hpp" +#include "sal/bochs/BochsRegister.hpp" +#include "sal/bochs/BochsEvents.hpp" +#include "sal/Event.hpp" + +// You need to have the tracing plugin enabled for this +#include "../plugins/tracing/TracingPlugin.hpp" + +#include "ecc_region.hpp" + +#define LOCAL 0 + +using namespace std; +using namespace fail; + +// Check if configuration dependencies are satisfied: +#if !defined(CONFIG_EVENT_BREAKPOINTS) || !defined(CONFIG_SR_RESTORE) || \ + !defined(CONFIG_SR_SAVE) || !defined(CONFIG_EVENT_TRAP) + #error This experiment needs: breakpoints, traps, save, and restore. Enable these in the configuration. +#endif + +bool EcosKernelTestExperiment::run() +{ + char const *statename = "ecos_kernel_test.state"; + Logger log("eCos Kernel Test", false); + BPSingleEvent bp; + + log << "startup" << endl; + +#if 0 + // STEP 0: record memory map with addresses of "interesting" objects + GuestEvent g; + while (true) { + simulator.addEventAndWait(&g); + cout << g.getData() << flush; + } +#elif 0 + // STEP 1: run until interesting function starts, and save state + bp.setWatchInstructionPointer(ECOS_FUNC_ENTRY); + simulator.addEventAndWait(&bp); + log << "test function entry reached, saving state" << endl; + log << "EIP = " << hex << bp.getTriggerInstructionPointer() << endl; + //log << "error_corrected = " << dec << ((int)simulator.getMemoryManager().getByte(OOSTUBS_ERROR_CORRECTED)) << endl; + simulator.save(statename); + assert(bp.getTriggerInstructionPointer() == ECOS_FUNC_ENTRY); + assert(simulator.getRegisterManager().getInstructionPointer() == ECOS_FUNC_ENTRY); +#elif 0 + // STEP 2: record trace for fault-space pruning + log << "restoring state" << endl; + simulator.restore(statename); + log << "EIP = " << hex << simulator.getRegisterManager().getInstructionPointer() << endl; + assert(simulator.getRegisterManager().getInstructionPointer() == ECOS_FUNC_ENTRY); + + log << "enabling tracing" << endl; + TracingPlugin tp; + + // restrict memory access logging to injection target + MemoryMap mm; + for (unsigned i = 0; i < sizeof(memoryMap)/sizeof(*memoryMap); ++i) { + mm.add(memoryMap[i][0], memoryMap[i][1]); + } + tp.restrictMemoryAddresses(&mm); + + // record trace + char const *tracefile = "trace.tc"; + ofstream of(tracefile); + tp.setTraceFile(&of); + + // this must be done *after* configuring the plugin: + simulator.addFlow(&tp); + +#if 1 + // trace WEATHER_NUMITER_TRACING measurement loop iterations + // -> calibration + bp.setWatchInstructionPointer(ECOS_FUNC_FINISH); + //bp.setCounter(WEATHER_NUMITER_TRACING); // single event, only +#else + // FIXME this doesn't work properly: trace is one instruction too short as + // tp is removed before all events were delivered + // trace WEATHER_NUMINSTR_TRACING instructions + // -> campaign-ready traces with identical lengths + bp.setWatchInstructionPointer(ANY_ADDR); + bp.setCounter(OOSTUBS_NUMINSTR); +#endif + simulator.addEvent(&bp); + BPSingleEvent ev_count(ANY_ADDR); + simulator.addEvent(&ev_count); + + // count instructions + // FIXME add SAL functionality for this? + int instr_counter = 0; + while (simulator.waitAny() == &ev_count) { + ++instr_counter; + simulator.addEvent(&ev_count); + } + + log << dec << "tracing finished after " << instr_counter << " instructions" << endl; + + + simulator.removeFlow(&tp); + + // serialize trace to file + if (of.fail()) { + log << "failed to write " << tracefile << endl; + simulator.clearEvents(this); + return false; + } + of.close(); + log << "trace written to " << tracefile << endl; + +#elif 1 + // STEP 3: The actual experiment. +#if !LOCAL + for (int i = 0; i < 400; ++i) { // more than 400 will be very slow (500 is max) +#endif + + // get an experiment parameter set + log << "asking job server for experiment parameters" << endl; + EcosKernelTestExperimentData param; +#if !LOCAL + if (!m_jc.getParam(param)) { + log << "Dying." << endl; + // communicate that we were told to die + simulator.terminate(1); + } +#else + // XXX debug + param.msg.set_instr_offset(7462); + //param.msg.set_instr_address(12345); + param.msg.set_mem_addr(44540); +#endif + + int id = param.getWorkloadID(); + int instr_offset = param.msg.instr_offset(); + int mem_addr = param.msg.mem_addr(); + + // for each job we're actually doing *8* experiments (one for each bit) + for (int bit_offset = 0; bit_offset < 8; ++bit_offset) { + // 8 results in one job + EcosKernelTestProtoMsg_Result *result = param.msg.add_result(); + result->set_bit_offset(bit_offset); + log << dec << "job " << id << " instr " << instr_offset + << " mem " << mem_addr << "+" << bit_offset << endl; + + log << "restoring state" << endl; + simulator.restore(statename); + + // XXX debug +/* + stringstream fname; + fname << "job." << ::getpid(); + ofstream job(fname.str().c_str()); + job << "job " << id << " instr " << instr_offset << " (" << param.msg.instr_address() << ") mem " << mem_addr << "+" << bit_offset << endl; + job.close(); +*/ + + // reaching finish() could happen before OR after FI + BPSingleEvent func_finish(ECOS_FUNC_FINISH); + simulator.addEvent(&func_finish); + bool finish_reached = false; + + // no need to wait if offset is 0 + if (instr_offset > 0) { + // XXX could be improved with intermediate states (reducing runtime until injection) + bp.setWatchInstructionPointer(ANY_ADDR); + bp.setCounter(instr_offset); + simulator.addEvent(&bp); + + // finish() before FI? + if (simulator.waitAny() == &func_finish) { + finish_reached = true; + log << "experiment reached finish() before FI" << endl; + + // wait for bp + simulator.waitAny(); + //TODO: why wait here? it seems that something went completely wrong? + } + } + + // --- fault injection --- + MemoryManager& mm = simulator.getMemoryManager(); + byte_t data = mm.getByte(mem_addr); + byte_t newdata = data ^ (1 << bit_offset); + mm.setByte(mem_addr, newdata); + // note at what IP we did it + int32_t injection_ip = simulator.getRegisterManager().getInstructionPointer(); + param.msg.set_injection_ip(injection_ip); + log << "fault injected @ ip " << injection_ip + << " 0x" << hex << ((int)data) << " -> 0x" << ((int)newdata) << endl; + // sanity check + if (param.msg.has_instr_address() && + injection_ip != param.msg.instr_address()) { + stringstream ss; + ss << "SANITY CHECK FAILED: " << injection_ip + << " != " << param.msg.instr_address(); + log << ss.str() << endl; + result->set_resulttype(result->UNKNOWN); + result->set_latest_ip(injection_ip); + result->set_details(ss.str()); + + simulator.clearEvents(); + continue; + } + + // --- aftermath --- + // possible outcomes: + // - trap, "crash" + // - jump outside text segment + // - (XXX unaligned jump inside text segment) + // - (XXX weird instructions?) + // - (XXX results displayed?) + // - reaches THE END + // - error detected, stop + // additional info: + // - #loop iterations before/after FI + // - (XXX "sane" display?) + + // catch traps as "extraordinary" ending + TrapEvent ev_trap(ANY_TRAP); + simulator.addEvent(&ev_trap); + // jump outside text segment + BPRangeEvent ev_below_text(ANY_ADDR, ECOS_TEXT_START - 1); + BPRangeEvent ev_beyond_text(ECOS_TEXT_END + 1, ANY_ADDR); + simulator.addEvent(&ev_below_text); + simulator.addEvent(&ev_beyond_text); + // timeout (e.g., stuck in a HLT instruction) + // 10000us = 500000 instructions + TimerEvent ev_timeout(500000); + simulator.addEvent(&ev_timeout); + + // remaining instructions until "normal" ending + BPSingleEvent ev_end(ANY_ADDR); + ev_end.setCounter(ECOS_NUMINSTR + ECOS_RECOVERYINSTR - instr_offset); + simulator.addEvent(&ev_end); + + // eCos' test output function, which will show if the test PASSed or FAILed + BPSingleEvent func_test_output(ECOS_FUNC_TEST_OUTPUT); + simulator.addEvent(&func_test_output); + +#if LOCAL && 0 + // XXX debug + log << "enabling tracing" << endl; + TracingPlugin tp; + tp.setLogIPOnly(true); + tp.setOstream(&cout); + // this must be done *after* configuring the plugin: + simulator.addFlow(&tp); +#endif + + BaseEvent* ev = simulator.waitAny(); + + bool ecos_test_passed = false; + bool ecos_test_failed = false; + + while ( (ev == &func_test_output) || (ev == &func_finish) ) { + // Do we reach finish() while waiting for ev_trap/ev_done? + if (ev == &func_finish) { + finish_reached = true; + log << "experiment reached finish()" << endl; + } + else if(ev == &func_test_output) { + // 1st argument of cyg_test_output shows what has happened (FAIL or PASS) + address_t stack_ptr = simulator.getRegisterManager().getStackPointer(); // esp + int32_t cyg_test_output_argument = simulator.getMemoryManager().getByte(stack_ptr + 4); // 1st argument is at esp+4 + + log << "cyg_test_output_argument (#1): " << cyg_test_output_argument << endl; + + /* + typedef enum { + CYGNUM_TEST_FAIL, + CYGNUM_TEST_PASS, + CYGNUM_TEST_EXIT, + CYGNUM_TEST_INFO, + CYGNUM_TEST_GDBCMD, + CYGNUM_TEST_NA + } Cyg_test_code; + */ + + if (cyg_test_output_argument == 0) { + ecos_test_failed = true; + } else if (cyg_test_output_argument == 1) { + ecos_test_passed = true; + } + } + + // wait for ev_trap/ev_done + ev = simulator.waitAny(); + } + + // record latest IP regardless of result + result->set_latest_ip(simulator.getRegisterManager().getInstructionPointer()); + + // record finish_reached and error_corrected regardless of result + result->set_finish_reached(finish_reached); + int32_t error_corrected = simulator.getMemoryManager().getByte(ECOS_ERROR_CORRECTED); + result->set_error_corrected(error_corrected); + + // record ecos_test_result + if (ecos_test_failed) { + result->set_ecos_test_result(result->FAIL); + } else if (ecos_test_passed) { + result->set_ecos_test_result(result->PASS); + } else { + result->set_ecos_test_result(result->FAIL); + } + + if (ev == &ev_end) { + log << dec << "Result FINISHED" << endl; + result->set_resulttype(result->FINISHED); + } else if (ev == &ev_timeout) { + log << "Result TIMEOUT" << endl; + result->set_resulttype(result->TIMEOUT); + } else if (ev == &ev_below_text || ev == &ev_beyond_text) { + log << "Result OUTSIDE" << endl; + result->set_resulttype(result->OUTSIDE); + } else if (ev == &ev_trap) { + log << dec << "Result TRAP #" << ev_trap.getTriggerNumber() << endl; + result->set_resulttype(result->TRAP); + + stringstream ss; + ss << ev_trap.getTriggerNumber(); + result->set_details(ss.str()); + } else { + log << "Result WTF?" << endl; + result->set_resulttype(result->UNKNOWN); + + stringstream ss; + ss << "eventid " << ev->getId() << " EIP " << simulator.getRegisterManager().getInstructionPointer(); + result->set_details(ss.str()); + } + // explicitly remove all events before we leave their scope + // FIXME event destructors should remove them from the queues + simulator.clearEvents(); + } + // sanity check: do we have exactly 8 results? + if (param.msg.result_size() != 8) { + log << "WTF? param.msg.result_size() != 8" << endl; + } else { +#if !LOCAL + m_jc.sendResult(param); +#endif + } + +#if !LOCAL + } +#endif + +#endif + // Explicitly terminate, or the simulator will continue to run. + simulator.terminate(); +} diff --git a/src/experiments/ecos_kernel_test/experiment.hpp b/src/experiments/ecos_kernel_test/experiment.hpp new file mode 100644 index 00000000..77ac2296 --- /dev/null +++ b/src/experiments/ecos_kernel_test/experiment.hpp @@ -0,0 +1,12 @@ +#pragma once + +#include "efw/ExperimentFlow.hpp" +#include "efw/JobClient.hpp" + +class EcosKernelTestExperiment : public fail::ExperimentFlow { + fail::JobClient m_jc; +public: + EcosKernelTestExperiment() : m_jc("ios.cs.tu-dortmund.de") {} + bool run(); +}; + diff --git a/src/experiments/ecos_kernel_test/experimentInfo.hpp b/src/experiments/ecos_kernel_test/experimentInfo.hpp new file mode 100644 index 00000000..05cf3d0c --- /dev/null +++ b/src/experiments/ecos_kernel_test/experimentInfo.hpp @@ -0,0 +1,40 @@ +#pragma once + +// FIXME autogenerate this + +#if 1 // with ECC + +// the task function's entry address: +// nm -C thread1 | fgrep cyg_start +#define ECOS_FUNC_ENTRY 0x00003cc0 +// empty function that is called explicitly when the experiment finished +// nm -C thread1 | fgrep cyg_test_exit +#define ECOS_FUNC_FINISH 0x000058dc +// nm -C thread1 | fgrep "cyg_test_output" +#define ECOS_FUNC_TEST_OUTPUT 0x000058e4 + +// nm -C thread1 | grep "_[se]text" +#define ECOS_TEXT_START 0x00003000 +#define ECOS_TEXT_END 0x000092ce + +// number of instructions the target executes under non-error conditions from ENTRY to DONE: +// (result of experiment's step #2) +#define ECOS_NUMINSTR 12390 +// number of instructions that are executed additionally for error corrections +// (this is a rough guess ... TODO) +#define ECOS_RECOVERYINSTR 0x2000 +// the variable that's increased if ECC corrects an error: +// nm -C thread1|fgrep errors_corrected +#define ECOS_ERROR_CORRECTED 0x0010adec //FIXME TODO XXX + +#else // without ECC + +#define COOL_ECC_FUNC_ENTRY 0x00200a90 +#define COOL_ECC_CALCDONE 0x00200ab7 +#define COOL_ECC_NUMINSTR 97 +#define COOL_ECC_OBJUNDERTEST 0x0021263c +#define COOL_ECC_OBJUNDERTEST_SIZE 10 +#define COOL_ECC_ERROR_CORRECTED 0x002127b0 // dummy + +#endif + diff --git a/src/experiments/ecos_kernel_test/main.cc b/src/experiments/ecos_kernel_test/main.cc new file mode 100644 index 00000000..85103323 --- /dev/null +++ b/src/experiments/ecos_kernel_test/main.cc @@ -0,0 +1,15 @@ +#include +#include + +#include "cpn/CampaignManager.hpp" +#include "campaign.hpp" + +int main(int argc, char **argv) +{ + EcosKernelTestCampaign c; + if (fail::campaignmanager.runCampaign(&c)) { + return 0; + } else { + return 1; + } +}