Fail* directories reorganized, Code-cleanup (-> coding-style), Typos+comments fixed.
git-svn-id: https://www4.informatik.uni-erlangen.de/i4svn/danceos/trunk/devel/fail@1321 8c4709b5-6ec9-48aa-a5cd-a96041d1645a
This commit is contained in:
10
src/core/efw/CMakeLists.txt
Normal file
10
src/core/efw/CMakeLists.txt
Normal file
@ -0,0 +1,10 @@
|
||||
set(SRCS
|
||||
CoroutineManager.cc
|
||||
JobClient.cc
|
||||
)
|
||||
|
||||
# FIXME: Add dependency check for pcl-library here.
|
||||
|
||||
add_library(efw ${SRCS})
|
||||
|
||||
add_dependencies(efw comm)
|
||||
95
src/core/efw/CoroutineManager.cc
Normal file
95
src/core/efw/CoroutineManager.cc
Normal file
@ -0,0 +1,95 @@
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
|
||||
#include "CoroutineManager.hpp"
|
||||
#include "ExperimentFlow.hpp"
|
||||
|
||||
namespace fail {
|
||||
|
||||
void CoroutineManager::m_invoke(void* pData)
|
||||
{
|
||||
//std::cerr << "CORO m_invoke " << co_current() << std::endl;
|
||||
// TODO: Log-Level?
|
||||
reinterpret_cast<ExperimentFlow*>(pData)->coroutine_entry();
|
||||
//m_togglerstack.pop();
|
||||
// FIXME: need to pop our caller
|
||||
co_exit(); // deletes the associated coroutine memory as well
|
||||
|
||||
// We really shouldn't get here:
|
||||
assert(false && "FATAL ERROR: CoroutineManager::m_invoke() -- shitstorm unloading!");
|
||||
while (1); // freeze.
|
||||
}
|
||||
|
||||
CoroutineManager::~CoroutineManager() { }
|
||||
|
||||
void CoroutineManager::toggle(ExperimentFlow* flow)
|
||||
{
|
||||
m_togglerstack.push(co_current());
|
||||
//std::cerr << "CORO toggle from " << m_togglerstack.top() << " to ";
|
||||
if (flow == SIM_FLOW) {
|
||||
co_call(m_simCoro);
|
||||
return;
|
||||
}
|
||||
|
||||
flowmap_t::iterator it = m_Flows.find(flow);
|
||||
assert(it != m_Flows.end() && "FATAL ERROR: Flow does not exist!");
|
||||
//std::cerr << it->second << std::endl;
|
||||
co_call(it->second);
|
||||
}
|
||||
|
||||
void CoroutineManager::create(ExperimentFlow* flow)
|
||||
{
|
||||
corohandle_t res = co_create(CoroutineManager::m_invoke, flow, NULL,
|
||||
STACK_SIZE_DEFAULT);
|
||||
//std::cerr << "CORO create " << res << std::endl;
|
||||
m_Flows.insert(std::pair<ExperimentFlow*,corohandle_t>(flow, res));
|
||||
}
|
||||
|
||||
void CoroutineManager::remove(ExperimentFlow* flow)
|
||||
{
|
||||
// find coroutine handle for this flow
|
||||
flowmap_t::iterator it = m_Flows.find(flow);
|
||||
if (it == m_Flows.end()) {
|
||||
assert(false && "FATAL ERROR: Cannot remove flow");
|
||||
return;
|
||||
}
|
||||
corohandle_t coro = it->second;
|
||||
//std::cerr << "CORO remove " << coro << std::endl;
|
||||
|
||||
// remove flow from active list
|
||||
m_Flows.erase(it);
|
||||
|
||||
// FIXME make sure resume() keeps working
|
||||
|
||||
// delete coroutine (and handle the special case we're removing
|
||||
// ourselves)
|
||||
if (coro == co_current()) {
|
||||
co_exit();
|
||||
} else {
|
||||
co_delete(coro);
|
||||
}
|
||||
}
|
||||
|
||||
void CoroutineManager::resume()
|
||||
{
|
||||
corohandle_t next = m_togglerstack.top();
|
||||
m_togglerstack.pop();
|
||||
//std::cerr << "CORO resume from " << co_current() << " to " << next << std::endl;
|
||||
co_call(next);
|
||||
}
|
||||
|
||||
ExperimentFlow* CoroutineManager::getCurrent()
|
||||
{
|
||||
coroutine_t cr = co_current();
|
||||
for (std::map<ExperimentFlow*,corohandle_t>::iterator it = m_Flows.begin();
|
||||
it != m_Flows.end(); it++)
|
||||
if (it->second == cr)
|
||||
return it->first;
|
||||
|
||||
assert(false && "FATAL ERROR: The current flow could not be retrieved!");
|
||||
return 0;
|
||||
}
|
||||
|
||||
const ExperimentFlow* CoroutineManager::SIM_FLOW = NULL;
|
||||
|
||||
} // end-of-namespace: fail
|
||||
72
src/core/efw/CoroutineManager.hpp
Normal file
72
src/core/efw/CoroutineManager.hpp
Normal file
@ -0,0 +1,72 @@
|
||||
#ifndef __COROUTINE_MANAGER_HPP__
|
||||
#define __COROUTINE_MANAGER_HPP__
|
||||
|
||||
#include <map>
|
||||
#include <stack>
|
||||
|
||||
#include <pcl.h> // the underlying "portable coroutine library"
|
||||
|
||||
namespace fail {
|
||||
|
||||
class ExperimentFlow;
|
||||
|
||||
/**
|
||||
* \class CoroutineManager
|
||||
* Manages the experiments flow encapsulated in coroutines. Each
|
||||
* experiment (flow) has it's associated data structure which is
|
||||
* represented by the ExperimentData-class.
|
||||
*/
|
||||
class CoroutineManager {
|
||||
private:
|
||||
//! the default stack size for coroutines (= experiment flows)
|
||||
static const unsigned STACK_SIZE_DEFAULT = 4096*4096;
|
||||
//! the abstraction for coroutine identification
|
||||
typedef coroutine_t corohandle_t;
|
||||
typedef std::map<ExperimentFlow*,corohandle_t> flowmap_t;
|
||||
//! the mapping "flows <-> coro-handle"
|
||||
flowmap_t m_Flows;
|
||||
//! the simulator/backend coroutine handle
|
||||
corohandle_t m_simCoro;
|
||||
//! stack of coroutines that explicitly activated another one with toggle()
|
||||
std::stack<corohandle_t> m_togglerstack;
|
||||
//! manages the run-calls for each ExperimentFlow-object
|
||||
static void m_invoke(void* pData);
|
||||
public:
|
||||
static const ExperimentFlow* SIM_FLOW; //!< the simulator coroutine flow
|
||||
|
||||
CoroutineManager() : m_simCoro(co_current()) { }
|
||||
~CoroutineManager();
|
||||
/**
|
||||
* Creates a new coroutine for the specified experiment flow.
|
||||
* @param flow the flow to be executed in the newly created coroutine
|
||||
*/
|
||||
void create(ExperimentFlow* flow);
|
||||
/**
|
||||
* Destroys coroutine for the specified experiment flow.
|
||||
* @param flow the flow to be removed
|
||||
*/
|
||||
void remove(ExperimentFlow* flow);
|
||||
/**
|
||||
* Switches the control flow to the experiment \a flow. If \a flow is
|
||||
* equal to \c SIM_FLOW, the control will be handed back to the
|
||||
* simulator. The current control flow is pushed onto an
|
||||
* internal stack.
|
||||
* @param flow the destination control flow or \c SIM_FLOW (= \c NULL )
|
||||
*/
|
||||
void toggle(ExperimentFlow* flow);
|
||||
/**
|
||||
* Gives the control back to the coroutine that toggle()d the
|
||||
* current one, by drawing from the internal stack built from
|
||||
* calls to toggle().
|
||||
*/
|
||||
void resume();
|
||||
/**
|
||||
* Retrieves the current (active) coroutine (= flow).
|
||||
* @return the current experiment flow.
|
||||
*/
|
||||
ExperimentFlow* getCurrent();
|
||||
};
|
||||
|
||||
} // end-of-namespace: fail
|
||||
|
||||
#endif // __COROUTINE_MANAGER_HPP__
|
||||
37
src/core/efw/ExperimentFlow.hpp
Normal file
37
src/core/efw/ExperimentFlow.hpp
Normal file
@ -0,0 +1,37 @@
|
||||
#ifndef __EXPERIMENT_FLOW_HPP__
|
||||
#define __EXPERIMENT_FLOW_HPP__
|
||||
|
||||
#include "sal/SALInst.hpp"
|
||||
|
||||
namespace fail {
|
||||
|
||||
/**
|
||||
* \class ExperimentFlow
|
||||
* Basic interface for user-defined experiments. To create a new experiment,
|
||||
* derive your own class from ExperimentFlow and define the run method.
|
||||
*/
|
||||
class ExperimentFlow {
|
||||
public:
|
||||
ExperimentFlow() { }
|
||||
/**
|
||||
* Defines the experiment flow.
|
||||
* @return \c true if the experiment was successful, \c false otherwise
|
||||
*/
|
||||
virtual bool run() = 0;
|
||||
/**
|
||||
* The entry point for this experiment's coroutine.
|
||||
* Should do some cleanup afterwards.
|
||||
*/
|
||||
void coroutine_entry()
|
||||
{
|
||||
run();
|
||||
simulator.clearEvents(this); // remove residual events
|
||||
// FIXME: Consider removing this call (see EventList.cc, void remove(ExperimentFlow* flow))
|
||||
// a) with the advantage that we will potentially prevent serious segfaults but
|
||||
// b) with the drawback that we cannot enforce any cleanups.
|
||||
}
|
||||
};
|
||||
|
||||
} // end-of-namespace: fail
|
||||
|
||||
#endif // __EXPERIMENT_FLOW_HPP__
|
||||
133
src/core/efw/JobClient.cc
Normal file
133
src/core/efw/JobClient.cc
Normal file
@ -0,0 +1,133 @@
|
||||
#include "JobClient.hpp"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace fail {
|
||||
|
||||
JobClient::JobClient(const std::string& server, int port)
|
||||
{
|
||||
m_server_port = port;
|
||||
m_server = server;
|
||||
m_server_ent = gethostbyname(m_server.c_str());
|
||||
if(m_server_ent == NULL) {
|
||||
perror("[Client@gethostbyname()]");
|
||||
// TODO: Log-level?
|
||||
exit(1);
|
||||
}
|
||||
srand(time(NULL)); // needed for random backoff (see connectToServer)
|
||||
}
|
||||
|
||||
bool JobClient::connectToServer()
|
||||
{
|
||||
// Connect to server
|
||||
struct sockaddr_in serv_addr;
|
||||
m_sockfd = socket(AF_INET, SOCK_STREAM, 0);
|
||||
if(m_sockfd < 0) {
|
||||
perror("[Client@socket()]");
|
||||
// TODO: Log-level?
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/* Enable address reuse */
|
||||
int on = 1;
|
||||
setsockopt( m_sockfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on) );
|
||||
|
||||
memset(&serv_addr, 0, sizeof(serv_addr));
|
||||
serv_addr.sin_family = AF_INET;
|
||||
memcpy(&serv_addr.sin_addr.s_addr, m_server_ent->h_addr, m_server_ent->h_length);
|
||||
serv_addr.sin_port = htons(m_server_port);
|
||||
|
||||
int retries = CLIENT_RETRY_COUNT;
|
||||
while (true) {
|
||||
if (connect(m_sockfd, (sockaddr *)&serv_addr, sizeof(serv_addr)) < 0) {
|
||||
perror("[Client@connect()]");
|
||||
// TODO: Log-level?
|
||||
if (retries > 0) {
|
||||
// Wait CLIENT_RAND_BACKOFF_TSTART to RAND_BACKOFF_TEND seconds:
|
||||
int delay = rand() % (CLIENT_RAND_BACKOFF_TEND-CLIENT_RAND_BACKOFF_TSTART) + CLIENT_RAND_BACKOFF_TSTART;
|
||||
cout << "[Client] Retrying to connect to server in ~" << delay << "s..." << endl;
|
||||
// TODO: Log-level?
|
||||
sleep(delay);
|
||||
usleep(rand() % 1000000);
|
||||
--retries;
|
||||
continue;
|
||||
}
|
||||
cout << "[Client] Unable to reconnect (tried " << CLIENT_RETRY_COUNT << " times); "
|
||||
<< "I'll give it up!" << endl;
|
||||
// TODO: Log-level?
|
||||
return false; // finally: unable to connect, give it up :-(
|
||||
}
|
||||
break; // connected! :-)
|
||||
}
|
||||
cout << "[Client] Connection established!" << endl;
|
||||
// TODO: Log-level?
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool JobClient::getParam(ExperimentData& exp)
|
||||
{
|
||||
while (1) { // Here we try to acquire a parameter set
|
||||
switch (tryToGetExperimentData(exp)) {
|
||||
// Jobserver will sent workload, params are set in \c exp
|
||||
case FailControlMessage_Command_WORK_FOLLOWS: return true;
|
||||
// Nothing to do right now, but maybe later
|
||||
case FailControlMessage_Command_COME_AGAIN:
|
||||
sleep(1);
|
||||
continue;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FailControlMessage_Command JobClient::tryToGetExperimentData(ExperimentData& exp)
|
||||
{
|
||||
// Connection failed, minion can die
|
||||
if (!connectToServer())
|
||||
return FailControlMessage_Command_DIE;
|
||||
|
||||
// Retrieve ExperimentData
|
||||
FailControlMessage ctrlmsg;
|
||||
ctrlmsg.set_command(FailControlMessage_Command_NEED_WORK);
|
||||
ctrlmsg.set_build_id(42);
|
||||
|
||||
SocketComm::sendMsg(m_sockfd, ctrlmsg);
|
||||
ctrlmsg.Clear();
|
||||
SocketComm::rcvMsg(m_sockfd, ctrlmsg);
|
||||
|
||||
switch (ctrlmsg.command()) {
|
||||
case FailControlMessage_Command_WORK_FOLLOWS:
|
||||
SocketComm::rcvMsg(m_sockfd, exp.getMessage());
|
||||
exp.setWorkloadID(ctrlmsg.workloadid()); // Store workload id of experiment data
|
||||
break;
|
||||
case FailControlMessage_Command_COME_AGAIN:
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
close(m_sockfd);
|
||||
return ctrlmsg.command();
|
||||
}
|
||||
|
||||
bool JobClient::sendResult(ExperimentData& result)
|
||||
{
|
||||
if (!connectToServer())
|
||||
return false;
|
||||
|
||||
// Send back results
|
||||
FailControlMessage ctrlmsg;
|
||||
ctrlmsg.set_command(FailControlMessage_Command_RESULT_FOLLOWS);
|
||||
ctrlmsg.set_build_id(42);
|
||||
ctrlmsg.set_workloadid(result.getWorkloadID());
|
||||
cout << "[Client] Sending back result [" << std::dec << result.getWorkloadID() << "]..." << endl;
|
||||
// TODO: Log-level?
|
||||
SocketComm::sendMsg(m_sockfd, ctrlmsg);
|
||||
SocketComm::sendMsg(m_sockfd, result.getMessage());
|
||||
|
||||
// Close connection.
|
||||
close(m_sockfd);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // end-of-namespace: fail
|
||||
57
src/core/efw/JobClient.hpp
Normal file
57
src/core/efw/JobClient.hpp
Normal file
@ -0,0 +1,57 @@
|
||||
#ifndef __JOB_CLIENT_H__
|
||||
#define __JOB_CLIENT_H__
|
||||
|
||||
#include <string>
|
||||
#include <ctime>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <iostream>
|
||||
|
||||
#include "comm/SocketComm.hpp"
|
||||
#include "comm/ExperimentData.hpp"
|
||||
#include "comm/msg/FailControlMessage.pb.h"
|
||||
#include "config/FailConfig.hpp"
|
||||
|
||||
namespace fail {
|
||||
|
||||
/**
|
||||
* \class JobClient
|
||||
*
|
||||
* \brief Manages communication with JobServer
|
||||
* The Minion's JobClient requests ExperimentData and returns results.
|
||||
*/
|
||||
class JobClient {
|
||||
private:
|
||||
std::string m_server;
|
||||
int m_server_port;
|
||||
struct hostent* m_server_ent;
|
||||
int m_sockfd;
|
||||
|
||||
bool connectToServer();
|
||||
|
||||
FailControlMessage_Command tryToGetExperimentData(ExperimentData& exp);
|
||||
public:
|
||||
JobClient(const std::string& server = "localhost", int port = 1111);
|
||||
/**
|
||||
* Receive experiment data set from (remote) JobServer
|
||||
* The caller (experiment developer) is responsible for
|
||||
* allocating his ExperimentData object.
|
||||
*
|
||||
* @param exp Reference to a ExperimentData object allocated by the caller!
|
||||
* @return \c true if parameter have been received and put into \c exp, \c false else.
|
||||
*/
|
||||
bool getParam(ExperimentData& exp);
|
||||
/**
|
||||
* Send back experiment result to the (remote) JobServer
|
||||
* The caller (experiment developer) is responsible for
|
||||
* destroying his ExperimentData object afterwards.
|
||||
*
|
||||
* @param result Reference to the ExperimentData holding result values
|
||||
* @return \c true Result successfully sent, \c false else.
|
||||
*/
|
||||
bool sendResult(ExperimentData& result);
|
||||
};
|
||||
|
||||
} // end-of-namespace: fail
|
||||
|
||||
#endif // __JOB_CLIENT_H__
|
||||
Reference in New Issue
Block a user