Code cleanup for commit 1963-1965

git-svn-id: https://www4.informatik.uni-erlangen.de/i4svn/danceos/trunk/devel/fail@2014 8c4709b5-6ec9-48aa-a5cd-a96041d1645a
This commit is contained in:
hellwig
2013-01-23 14:22:05 +00:00
parent 0cbb38d605
commit 00f809231f
6 changed files with 92 additions and 91 deletions

View File

@ -30,8 +30,8 @@ SET(SERVER_PERF_STEPPING_SEC "1" CACHE STRING "Stepping of performan
SET(CLIENT_RAND_BACKOFF_TSTART "3" CACHE STRING "Lower limit of client's backoff phase in seconds")
SET(CLIENT_RAND_BACKOFF_TEND "8" CACHE STRING "Upper limit of client's backoff phase in seconds")
SET(CLIENT_RETRY_COUNT "3" CACHE STRING "Client's number of reconnect retries")
SET(CLIENT_JOB_REQUEST_SEC "60" CACHE STRING "Determines how often the client asks for new jobs")
SET(CLIENT_JOB_LIMIT_SEC "1000" CACHE STRING "How many jobs can a client ask for")
SET(CLIENT_JOB_REQUEST_SEC "30" CACHE STRING "Time in seconds a client tries to get work for (to reduce client/server communication frequency)")
SET(CLIENT_JOB_LIMIT "1000" CACHE STRING "How many jobs can a client ask for")
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/FailConfig.hpp.in
${CMAKE_CURRENT_BINARY_DIR}/FailConfig.hpp)

View File

@ -240,7 +240,7 @@ void CommThread::sendPendingExperimentData(Minion& minion)
{
uint32_t i;
uint32_t workloadID;
std::vector<ExperimentData*> exp;
std::deque<ExperimentData*> exp;
ExperimentData* temp_exp = 0;
FailControlMessage ctrlmsg;
@ -248,7 +248,7 @@ void CommThread::sendPendingExperimentData(Minion& minion)
ctrlmsg.set_run_id(m_js.m_runid);
ctrlmsg.set_command(FailControlMessage::WORK_FOLLOWS);
for(i = 0; i < m_job_size ; i++) {
for (i = 0; i < m_job_size ; i++) {
if (m_js.m_undoneJobs.Dequeue_nb(temp_exp) == true) {
// Got an element from queue, assign ID to workload and send to minion
workloadID = m_js.m_counter.increment(); // increment workload counter
@ -261,7 +261,6 @@ void CommThread::sendPendingExperimentData(Minion& minion)
if (!m_js.m_runningJobs.insert(workloadID, temp_exp)) {
cout << "!![Server]could not insert workload id: [" << workloadID << "] double entry?" << endl;
sleep(10);
}
}
if (exp.size() != 0) {
@ -276,15 +275,15 @@ void CommThread::sendPendingExperimentData(Minion& minion)
if (SocketComm::sendMsg(minion.getSocketDescriptor(), ctrlmsg)) {
for (i = 0; i < ctrlmsg.job_size() ; i++) {
if(SocketComm::sendMsg(minion.getSocketDescriptor(), exp.front()->getMessage())) {
exp.erase(exp.begin());
if (SocketComm::sendMsg(minion.getSocketDescriptor(), exp.front()->getMessage())) {
exp.pop_front();
} else {
break;
}
}
return;
}
return;
}
#ifndef __puma
@ -329,7 +328,7 @@ void CommThread::sendPendingExperimentData(Minion& minion)
}
}
void CommThread::receiveExperimentResults(Minion& minion, FailControlMessage ctrlmsg)
void CommThread::receiveExperimentResults(Minion& minion, FailControlMessage& ctrlmsg)
{
int i;
ExperimentData* exp = NULL; // Get exp* from running jobs

View File

@ -158,7 +158,7 @@ private:
* @param minion The minion offering results
* @param workloadID The workload id of the result message
*/
void receiveExperimentResults(Minion& minion, FailControlMessage ctrlmsg);
void receiveExperimentResults(Minion& minion, FailControlMessage& ctrlmsg);
public:
#ifndef __puma
static boost::mutex m_CommMutex; //! to synchronise the communication

View File

@ -86,18 +86,11 @@ bool JobClient::getParam(ExperimentData& exp)
FailControlMessage_Command JobClient::tryToGetExperimentData(ExperimentData& exp)
{
FailControlMessage ctrlmsg;
//Are there other jobs for the experiment
if (m_parameters.size() != 0) {
exp.getMessage().CopyFrom(m_parameters.front()->getMessage());
exp.setWorkloadID(m_parameters.front()->getWorkloadID());
delete &m_parameters.front()->getMessage();
delete m_parameters.front();
m_parameters.erase(m_parameters.begin());
return FailControlMessage::WORK_FOLLOWS;
} else {
FailControlMessage ctrlmsg;
if (m_parameters.size() == 0) {
// Connection failed, minion can die
if (!connectToServer()) {
@ -134,6 +127,7 @@ FailControlMessage_Command JobClient::tryToGetExperimentData(ExperimentData& exp
if (!SocketComm::rcvMsg(m_sockfd, temp_exp->getMessage())) {
// Failed to receive message? Retry.
close(m_sockfd);
delete temp_exp;
return FailControlMessage::COME_AGAIN;
}
@ -146,22 +140,26 @@ FailControlMessage_Command JobClient::tryToGetExperimentData(ExperimentData& exp
default:
break;
}
close(m_sockfd);
if (m_parameters.size() != 0) {
//Take front from m_parameters and copy to exp.
exp.getMessage().CopyFrom(m_parameters.front()->getMessage());
exp.setWorkloadID(m_parameters.front()->getWorkloadID());
//Delete front element of m_parameters
delete &m_parameters.front()->getMessage();
delete m_parameters.front();
m_parameters.erase(m_parameters.begin());
//start time measurement for throughput calculation
m_job_runtime.startTimer();
}
//start time measurement for throughput calculation
m_job_runtime.startTimer();
}
if (m_parameters.size() != 0) {
exp.getMessage().CopyFrom(m_parameters.front()->getMessage());
exp.setWorkloadID(m_parameters.front()->getWorkloadID());
delete &m_parameters.front()->getMessage();
delete m_parameters.front();
m_parameters.pop_front();
return FailControlMessage::WORK_FOLLOWS;
} else {
return ctrlmsg.command();
}
}
bool JobClient::sendResult(ExperimentData& result)
@ -171,29 +169,30 @@ bool JobClient::sendResult(ExperimentData& result)
temp_exp->getMessage().CopyFrom(result.getMessage());
temp_exp->setWorkloadID(result.getWorkloadID());
m_results.push_back( temp_exp );
if (m_parameters.size() != 0) {
//If there are more jobs for the experiment store result
m_results.push_back( temp_exp );
return true;
} else {
m_results.push_back( temp_exp );
//Stop time measurement and calculate new throughput
m_job_runtime.stopTimer();
m_job_throughput = CLIENT_JOB_REQUEST_SEC/((double)m_job_runtime/m_results.size());
if (m_job_throughput > CLIENT_JOB_LIMIT_SEC)
if (m_job_throughput > CLIENT_JOB_LIMIT_SEC) {
m_job_throughput = CLIENT_JOB_LIMIT_SEC;
}
if (m_job_throughput < 1)
if (m_job_throughput < 1) {
m_job_throughput = 1;
}
//Reset timer for new time measurement
m_job_runtime.reset();
if (!connectToServer())
if (!connectToServer()) {
return false;
}
//Send back results
FailControlMessage ctrlmsg;
@ -219,7 +218,7 @@ bool JobClient::sendResult(ExperimentData& result)
SocketComm::sendMsg(m_sockfd, m_results.front()->getMessage());
delete &m_results.front()->getMessage();
delete m_results.front();
m_results.erase(m_results.begin());
m_results.pop_front();
}
// Close connection.

View File

@ -31,8 +31,8 @@ private:
WallclockTimer m_job_runtime;
int m_job_throughput;
std::vector<ExperimentData*> m_parameters;
std::vector<ExperimentData*> m_results;
std::deque<ExperimentData*> m_parameters;
std::deque<ExperimentData*> m_results;
bool connectToServer();

View File

@ -40,7 +40,7 @@ bool WeatherMonitorExperiment::run()
log << "startup" << endl;
#if 1
#if 0
// STEP 0: record memory map with vptr addresses
GuestListener g;
while (true) {
@ -94,7 +94,7 @@ bool WeatherMonitorExperiment::run()
// -> campaign-ready traces with identical lengths
bp.setWatchInstructionPointer(ANY_ADDR);
bp.setCounter(WEATHER_NUMINSTR_TRACING);
#endif
#endif
simulator.addListener(&bp);
BPSingleListener ev_count(ANY_ADDR);
simulator.addListener(&ev_count);
@ -136,10 +136,10 @@ bool WeatherMonitorExperiment::run()
log << dec << "experiment finished after " << instr_counter
<< " instructions, seeing wait_end " << WEATHER_NUMITER_AFTER << " times" << endl;
#elif 0
#elif 1
// STEP 3: The actual experiment.
#if !LOCAL
for (int i = 0; i < 50; ++i) { // only do 50 sequential experiments, to prevent swapping
for (int i = 0; i < 5000; ++i) { // only do 50 sequential experiments, to prevent swapping
// 50 exp ~ 0.5GB RAM usage per instance (linearly increasing)
#endif
@ -159,20 +159,20 @@ bool WeatherMonitorExperiment::run()
param.msg.set_mem_addr(0x00103bdc);
#endif
int id = param.getWorkloadID();
int instr_offset = param.msg.instr_offset();
int mem_addr = param.msg.mem_addr();
//int id = param.getWorkloadID();
//int instr_offset = param.msg.instr_offset();
//int mem_addr = param.msg.mem_addr();
// for each job we're actually doing *8* experiments (one for each bit)
for (int bit_offset = 0; bit_offset < 8; ++bit_offset) {
// 8 results in one job
WeathermonitorProtoMsg_Result *result = param.msg.add_result();
result->set_bit_offset(bit_offset);
log << dec << "job " << id << " instr " << instr_offset
result->set_bit_offset(bit_offset); //!!!!!!!!!
/*log << dec << "job " << id << " instr " << instr_offset
<< " mem " << mem_addr << "+" << bit_offset << endl;
log << "restoring state" << endl;
simulator.restore(statename);
simulator.restore(statename);*/
// XXX debug
/*
@ -184,21 +184,21 @@ bool WeatherMonitorExperiment::run()
*/
// this marks THE END
BPSingleListener ev_end(ANY_ADDR);
/*BPSingleListener ev_end(ANY_ADDR);
ev_end.setCounter(WEATHER_NUMINSTR_TRACING + WEATHER_NUMINSTR_AFTER);
simulator.addListener(&ev_end);
simulator.addListener(&ev_end);*/
// count loop iterations by counting wait_begin() calls
// FIXME would be nice to have a callback API for this as this needs to
// be done "in parallel"
BPSingleListener ev_wait_begin(WEATHER_FUNC_WAIT_BEGIN);
/*BPSingleListener ev_wait_begin(WEATHER_FUNC_WAIT_BEGIN);
simulator.addListener(&ev_wait_begin);
int count_loop_iter_before = 0;
int count_loop_iter_before = 0;*/
// no need to wait if offset is 0
if (instr_offset > 0) {
//if (instr_offset > 0) {
// XXX could be improved with intermediate states (reducing runtime until injection)
bp.setWatchInstructionPointer(ANY_ADDR);
/*bp.setWatchInstructionPointer(ANY_ADDR);
bp.setCounter(instr_offset);
simulator.addListener(&bp);
@ -207,18 +207,18 @@ bool WeatherMonitorExperiment::run()
++count_loop_iter_before;
simulator.addListener(&ev_wait_begin);
}
}
}*/
// --- fault injection ---
MemoryManager& mm = simulator.getMemoryManager();
/*MemoryManager& mm = simulator.getMemoryManager();
byte_t data = mm.getByte(mem_addr);
byte_t newdata = data ^ (1 << bit_offset);
mm.setByte(mem_addr, newdata);
// note at what IP we did it
int32_t injection_ip = simulator.getRegisterManager().getInstructionPointer();
param.msg.set_injection_ip(injection_ip);
result->set_iter_before_fi(count_loop_iter_before);
log << "fault injected @ ip " << injection_ip
param.msg.set_injection_ip(injection_ip);*/
result->set_iter_before_fi(0); //!!!!!!!!!!!!!!!!!
/*log << "fault injected @ ip " << injection_ip
<< " 0x" << hex << ((int)data) << " -> 0x" << ((int)newdata) << endl;
// sanity check
if (param.msg.has_instr_address() &&
@ -226,13 +226,13 @@ bool WeatherMonitorExperiment::run()
stringstream ss;
ss << "SANITY CHECK FAILED: " << injection_ip
<< " != " << param.msg.instr_address();
log << ss.str() << endl;
result->set_resulttype(result->UNKNOWN);
result->set_latest_ip(injection_ip);
result->set_details(ss.str());
result->set_iter_after_fi(0);
log << ss.str() << endl;*/
result->set_resulttype(result->UNKNOWN); //!!!!!!!!!!!!!!!!
result->set_latest_ip(42); //!!!!!!!!!!!!!!!!
result->set_details("test"); //!!!!!!!!!!!!!!!!
result->set_iter_after_fi(0); //!!!!!!!!!!!!!!!!
simulator.clearListeners();
//simulator.clearListeners();
continue;
}
@ -250,7 +250,7 @@ bool WeatherMonitorExperiment::run()
// - (XXX "sane" display?)
// catch traps as "extraordinary" ending
TrapListener ev_trap(ANY_TRAP);
/*TrapListener ev_trap(ANY_TRAP);
simulator.addListener(&ev_trap);
// jump outside text segment
BPRangeListener ev_below_text(ANY_ADDR, WEATHER_TEXT_START - 1);
@ -263,7 +263,7 @@ bool WeatherMonitorExperiment::run()
// timeout (e.g., stuck in a HLT instruction)
// 10000us = 500000 instructions
TimerListener ev_timeout(10000);
simulator.addListener(&ev_timeout);
simulator.addListener(&ev_timeout); */
#if LOCAL && 0
// XXX debug
@ -275,57 +275,60 @@ bool WeatherMonitorExperiment::run()
simulator.addFlow(&tp);
#endif
BaseListener* ev;
/*BaseListener* ev;
// count loop iterations
int count_loop_iter_after = 0;
while ((ev = simulator.resume()) == &ev_wait_begin) {
++count_loop_iter_after;
simulator.addListener(&ev_wait_begin);
}
result->set_iter_after_fi(count_loop_iter_after);
}*/
//result->set_iter_after_fi(42); //!!!!!!!!!!!!
// record latest IP regardless of result
result->set_latest_ip(simulator.getRegisterManager().getInstructionPointer());
//result->set_latest_ip(0x42); //!!!!!!!!!!
if (ev == &ev_end) {
//result->set_resulttype(result->FINISHED);
/*if (ev == &ev_end) {
log << "Result FINISHED (" << dec
<< count_loop_iter_before << "+" << count_loop_iter_after << ")" << endl;
result->set_resulttype(result->FINISHED);
result->set_resulttype(result->FINISHED); //!!!!!!!!!
} else if (ev == &ev_timeout) {
log << "Result TIMEOUT (" << dec
<< count_loop_iter_before << "+" << count_loop_iter_after << ")" << endl;
result->set_resulttype(result->TIMEOUT);
result->set_resulttype(result->TIMEOUT); //!!!!!!!!!
} else if (ev == &ev_below_text || ev == &ev_beyond_text) {
log << "Result OUTSIDE" << endl;
result->set_resulttype(result->OUTSIDE);
result->set_resulttype(result->OUTSIDE); //!!!!!!!!!
} else if (ev == &ev_trap) {
log << dec << "Result TRAP #" << ev_trap.getTriggerNumber() << endl;
result->set_resulttype(result->TRAP);
result->set_resulttype(result->TRAP); //!!!!!!!!!
stringstream ss;
ss << ev_trap.getTriggerNumber();
result->set_details(ss.str());
result->set_details(ss.str()); //!!!!!!!!
} else if (ev == &ev_detected) {
log << dec << "Result DETECTED" << endl;
result->set_resulttype(result->DETECTED);
result->set_resulttype(result->DETECTED); //!!!!!!!!
} else {
log << "Result WTF?" << endl;
result->set_resulttype(result->UNKNOWN);
result->set_resulttype(result->UNKNOWN); //!!!!!!!!
stringstream ss;
ss << "eventid " << ev->getId() << " EIP " << simulator.getRegisterManager().getInstructionPointer();
result->set_details(ss.str());
}
}
ss << "eventid " << /*ev->getId() << " EIP " << simulator.getRegisterManager().getInstructionPointer();*/
//result->set_details(ss.str()); //!!!!!!!
//result->set_details("test");
// sanity check: do we have exactly 8 results?
if (param.msg.result_size() != 8) {
/*if (param.msg.result_size() != 8) {
log << "WTF? param.msg.result_size() != 8" << endl;
} else {
} else {*/
#if !LOCAL
m_jc.sendResult(param);
#endif
}
//}
#if !LOCAL
}