Fail* directories reorganized, Code-cleanup (-> coding-style), Typos+comments fixed.

git-svn-id: https://www4.informatik.uni-erlangen.de/i4svn/danceos/trunk/devel/fail@1321 8c4709b5-6ec9-48aa-a5cd-a96041d1645a
2012-06-08 20:09:43 +00:00
parent d474a5b952
commit 2575604b41
866 changed files with 1848 additions and 1879 deletions
--- a/src/core/efw/CMakeLists.txt
+++ b/src/core/efw/CMakeLists.txt
@ -0,0 +1,10 @@
+set(SRCS
+	CoroutineManager.cc
+	JobClient.cc
+)
+
+# FIXME: Add dependency check for pcl-library here.
+
+add_library(efw ${SRCS})
+
+add_dependencies(efw comm)
--- a/src/core/efw/CoroutineManager.cc
+++ b/src/core/efw/CoroutineManager.cc
@ -0,0 +1,95 @@
+#include <iostream>
+#include <cassert>
+
+#include "CoroutineManager.hpp"
+#include "ExperimentFlow.hpp"
+
+namespace fail {
+
+void CoroutineManager::m_invoke(void* pData)
+{
+	//std::cerr << "CORO m_invoke " << co_current() << std::endl;
+	// TODO: Log-Level?
+	reinterpret_cast<ExperimentFlow*>(pData)->coroutine_entry();
+	//m_togglerstack.pop();
+	// FIXME: need to pop our caller
+	co_exit(); // deletes the associated coroutine memory as well
+
+	// We really shouldn't get here:
+	assert(false && "FATAL ERROR: CoroutineManager::m_invoke() -- shitstorm unloading!");
+	while (1); // freeze.
+}
+
+CoroutineManager::~CoroutineManager() { }
+
+void CoroutineManager::toggle(ExperimentFlow* flow)
+{
+	m_togglerstack.push(co_current());
+	//std::cerr << "CORO toggle from " << m_togglerstack.top() << " to ";
+	if (flow == SIM_FLOW) {
+		co_call(m_simCoro);
+		return;
+	}
+
+	flowmap_t::iterator it = m_Flows.find(flow);
+	assert(it != m_Flows.end() && "FATAL ERROR: Flow does not exist!");
+	//std::cerr << it->second << std::endl;
+	co_call(it->second);
+}
+
+void CoroutineManager::create(ExperimentFlow* flow)
+{
+	corohandle_t res = co_create(CoroutineManager::m_invoke, flow, NULL,
+								 STACK_SIZE_DEFAULT);
+	//std::cerr << "CORO create " << res << std::endl;
+	m_Flows.insert(std::pair<ExperimentFlow*,corohandle_t>(flow, res));
+}
+
+void CoroutineManager::remove(ExperimentFlow* flow)
+{
+	// find coroutine handle for this flow
+	flowmap_t::iterator it = m_Flows.find(flow);
+	if (it == m_Flows.end()) {
+		assert(false && "FATAL ERROR: Cannot remove flow");
+		return;
+	}
+	corohandle_t coro = it->second;
+	//std::cerr << "CORO remove " << coro << std::endl;
+
+	// remove flow from active list
+	m_Flows.erase(it);
+
+	// FIXME make sure resume() keeps working
+
+	// delete coroutine (and handle the special case we're removing
+	// ourselves)
+	if (coro == co_current()) {
+		co_exit();
+	} else {
+		co_delete(coro);
+	}
+}
+
+void CoroutineManager::resume()
+{
+	corohandle_t next = m_togglerstack.top();
+	m_togglerstack.pop();
+	//std::cerr << "CORO resume from " << co_current() << " to " << next << std::endl;
+	co_call(next);
+}
+
+ExperimentFlow* CoroutineManager::getCurrent()
+{
+	coroutine_t cr = co_current();
+	for (std::map<ExperimentFlow*,corohandle_t>::iterator it = m_Flows.begin();
+		it != m_Flows.end(); it++)
+		if (it->second == cr)
+			return it->first;
+
+	assert(false && "FATAL ERROR: The current flow could not be retrieved!");
+	return 0;
+}
+
+const ExperimentFlow* CoroutineManager::SIM_FLOW = NULL;
+
+} // end-of-namespace: fail
--- a/src/core/efw/CoroutineManager.hpp
+++ b/src/core/efw/CoroutineManager.hpp
@ -0,0 +1,72 @@
+#ifndef __COROUTINE_MANAGER_HPP__
+  #define __COROUTINE_MANAGER_HPP__
+
+#include <map>
+#include <stack>
+
+#include <pcl.h> // the underlying "portable coroutine library"
+
+namespace fail {
+
+class ExperimentFlow;
+
+/**
+ * \class CoroutineManager
+ * Manages the experiments flow encapsulated in coroutines. Each
+ * experiment (flow) has it's associated data structure which is
+ * represented by the ExperimentData-class.
+ */
+class CoroutineManager {
+private:
+	//! the default stack size for coroutines (= experiment flows)
+	static const unsigned STACK_SIZE_DEFAULT = 4096*4096;
+	//! the abstraction for coroutine identification
+	typedef coroutine_t corohandle_t;
+	typedef std::map<ExperimentFlow*,corohandle_t> flowmap_t;
+	//! the mapping "flows <-> coro-handle"
+	flowmap_t m_Flows;
+	//! the simulator/backend coroutine handle
+	corohandle_t m_simCoro;
+	//! stack of coroutines that explicitly activated another one with toggle()
+	std::stack<corohandle_t> m_togglerstack;
+	//! manages the run-calls for each ExperimentFlow-object
+	static void m_invoke(void* pData);
+public:
+	static const ExperimentFlow* SIM_FLOW; //!< the simulator coroutine flow
+
+	CoroutineManager() : m_simCoro(co_current()) { }
+	~CoroutineManager();
+	/**
+	 * Creates a new coroutine for the specified experiment flow.
+	 * @param flow the flow to be executed in the newly created coroutine
+	 */
+	void create(ExperimentFlow* flow);
+	/**
+	 * Destroys coroutine for the specified experiment flow.
+	 * @param flow the flow to be removed
+	 */
+	void remove(ExperimentFlow* flow);
+	/**
+	 * Switches the control flow to the experiment \a flow. If \a flow is
+	 * equal to \c SIM_FLOW, the control will be handed back to the
+	 * simulator.  The current control flow is pushed onto an
+	 * internal stack.
+	 * @param flow the destination control flow or \c SIM_FLOW (= \c NULL )
+	 */
+	void toggle(ExperimentFlow* flow);
+	/**
+	 * Gives the control back to the coroutine that toggle()d the
+	 * current one, by drawing from the internal stack built from
+	 * calls to toggle().
+	 */
+	void resume();
+	/**
+	 * Retrieves the current (active) coroutine (= flow).
+	 * @return the current experiment flow.
+	 */
+	ExperimentFlow* getCurrent();
+};
+
+} // end-of-namespace: fail
+
+#endif // __COROUTINE_MANAGER_HPP__
--- a/src/core/efw/ExperimentFlow.hpp
+++ b/src/core/efw/ExperimentFlow.hpp
@ -0,0 +1,37 @@
+#ifndef __EXPERIMENT_FLOW_HPP__
+  #define __EXPERIMENT_FLOW_HPP__
+
+#include "sal/SALInst.hpp"
+
+namespace fail {
+
+/**
+ * \class ExperimentFlow
+ * Basic interface for user-defined experiments. To create a new experiment,
+ * derive your own class from ExperimentFlow and define the run method.
+ */
+class ExperimentFlow {
+public:
+	ExperimentFlow() { }
+	/**
+	 * Defines the experiment flow.
+	 * @return \c true if the experiment was successful, \c false otherwise
+	 */
+	virtual bool run() = 0;
+	/**
+	 * The entry point for this experiment's coroutine.
+	 * Should do some cleanup afterwards.
+	 */
+	void coroutine_entry()
+	{
+		run();
+		simulator.clearEvents(this); // remove residual events
+		// FIXME: Consider removing this call (see EventList.cc, void remove(ExperimentFlow* flow)) 
+		//        a) with the advantage that we will potentially prevent serious segfaults but
+		//        b) with the drawback that we cannot enforce any cleanups.
+	}
+};
+
+} // end-of-namespace: fail
+
+#endif // __EXPERIMENT_FLOW_HPP__
--- a/src/core/efw/JobClient.cc
+++ b/src/core/efw/JobClient.cc
@ -0,0 +1,133 @@
+#include "JobClient.hpp"
+
+using namespace std;
+
+namespace fail {
+
+JobClient::JobClient(const std::string& server, int port)
+{
+	m_server_port = port;
+	m_server = server;
+	m_server_ent = gethostbyname(m_server.c_str());
+	if(m_server_ent == NULL) {
+		perror("[Client@gethostbyname()]");
+		// TODO: Log-level?
+		exit(1);
+	}
+	srand(time(NULL)); // needed for random backoff (see connectToServer)
+}
+
+bool JobClient::connectToServer()
+{
+	// Connect to server
+	struct sockaddr_in serv_addr;
+	m_sockfd = socket(AF_INET, SOCK_STREAM, 0);
+	if(m_sockfd < 0) {
+		perror("[Client@socket()]");
+		// TODO: Log-level?
+		exit(0);
+	}
+
+	/* Enable address reuse */
+	int on = 1;
+	setsockopt( m_sockfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on) );
+	
+	memset(&serv_addr, 0, sizeof(serv_addr));
+	serv_addr.sin_family = AF_INET;
+	memcpy(&serv_addr.sin_addr.s_addr, m_server_ent->h_addr, m_server_ent->h_length);
+	serv_addr.sin_port = htons(m_server_port);
+	
+	int retries = CLIENT_RETRY_COUNT;
+	while (true) {
+		if (connect(m_sockfd, (sockaddr *)&serv_addr, sizeof(serv_addr)) < 0) {
+			perror("[Client@connect()]");
+			// TODO: Log-level?
+			if (retries > 0) {
+				// Wait CLIENT_RAND_BACKOFF_TSTART to RAND_BACKOFF_TEND seconds:
+				int delay = rand() % (CLIENT_RAND_BACKOFF_TEND-CLIENT_RAND_BACKOFF_TSTART) + CLIENT_RAND_BACKOFF_TSTART;
+				cout << "[Client] Retrying to connect to server in ~" << delay << "s..." << endl;
+				// TODO: Log-level?
+				sleep(delay);
+				usleep(rand() % 1000000);
+				--retries;
+				continue;
+			}
+			cout << "[Client] Unable to reconnect (tried " << CLIENT_RETRY_COUNT << " times); "
+			     << "I'll give it up!" << endl;
+			     // TODO: Log-level?
+			return false; // finally: unable to connect, give it up :-(
+		}
+		break; // connected! :-)
+	}
+	cout << "[Client] Connection established!" << endl;
+	// TODO: Log-level?
+
+	return true;
+}
+
+bool JobClient::getParam(ExperimentData& exp)
+{
+	while (1) { // Here we try to acquire a parameter set
+		switch (tryToGetExperimentData(exp)) {
+			// Jobserver will sent workload, params are set in \c exp
+		case FailControlMessage_Command_WORK_FOLLOWS: return true;
+			// Nothing to do right now, but maybe later
+		case FailControlMessage_Command_COME_AGAIN:
+			sleep(1);
+			continue;
+		default:
+			return false;
+		}
+	}
+}
+
+FailControlMessage_Command JobClient::tryToGetExperimentData(ExperimentData& exp)
+{
+	// Connection failed, minion can die
+	if (!connectToServer())
+		return FailControlMessage_Command_DIE;
+
+	// Retrieve ExperimentData
+	FailControlMessage ctrlmsg;
+	ctrlmsg.set_command(FailControlMessage_Command_NEED_WORK);
+	ctrlmsg.set_build_id(42);
+
+	SocketComm::sendMsg(m_sockfd, ctrlmsg);
+	ctrlmsg.Clear();
+	SocketComm::rcvMsg(m_sockfd, ctrlmsg);
+
+	switch (ctrlmsg.command()) {
+	case FailControlMessage_Command_WORK_FOLLOWS:
+		SocketComm::rcvMsg(m_sockfd, exp.getMessage());
+		exp.setWorkloadID(ctrlmsg.workloadid()); // Store workload id of experiment data
+		break;
+	case FailControlMessage_Command_COME_AGAIN:
+		break;
+	default:
+		break;  
+	}
+	close(m_sockfd);
+	return ctrlmsg.command();
+}
+
+bool JobClient::sendResult(ExperimentData& result)
+{
+	if (!connectToServer())
+		return false;
+
+	// Send back results
+	FailControlMessage ctrlmsg;
+	ctrlmsg.set_command(FailControlMessage_Command_RESULT_FOLLOWS);
+	ctrlmsg.set_build_id(42);
+	ctrlmsg.set_workloadid(result.getWorkloadID());	
+	cout << "[Client] Sending back result [" << std::dec << result.getWorkloadID() << "]..."  << endl;
+	// TODO: Log-level?
+	SocketComm::sendMsg(m_sockfd, ctrlmsg);
+	SocketComm::sendMsg(m_sockfd, result.getMessage());
+
+	// Close connection.
+	close(m_sockfd);
+	return true;
+}
+
+} // end-of-namespace: fail
--- a/src/core/efw/JobClient.hpp
+++ b/src/core/efw/JobClient.hpp
@ -0,0 +1,57 @@
+#ifndef __JOB_CLIENT_H__
+  #define __JOB_CLIENT_H__
+
+#include <string>
+#include <ctime>
+#include <stdlib.h>
+#include <unistd.h>
+#include <iostream>
+
+#include "comm/SocketComm.hpp"
+#include "comm/ExperimentData.hpp"
+#include "comm/msg/FailControlMessage.pb.h"
+#include "config/FailConfig.hpp"
+
+namespace fail {
+
+/**
+* \class JobClient
+* 
+* \brief Manages communication with JobServer
+* The Minion's JobClient requests ExperimentData and returns results.
+*/
+class JobClient {
+private:
+	std::string m_server;
+	int m_server_port;
+	struct hostent* m_server_ent;
+	int m_sockfd;
+
+	bool connectToServer();
+
+	FailControlMessage_Command tryToGetExperimentData(ExperimentData& exp);
+public:
+	JobClient(const std::string& server = "localhost", int port = 1111);
+	/**
+	* Receive experiment data set from (remote) JobServer
+	* The caller (experiment developer) is responsible for
+	* allocating his ExperimentData object.
+	* 
+	* @param exp Reference to a ExperimentData object allocated by the caller!
+	* @return \c true if parameter have been received and put into \c exp, \c false else.
+	*/
+	bool getParam(ExperimentData& exp);
+	/**
+	* Send back experiment result to the (remote) JobServer
+	* The caller (experiment developer) is responsible for
+	* destroying his ExperimentData object afterwards.
+	* 
+	* @param result Reference to the ExperimentData holding result values
+	* @return \c true Result successfully sent, \c false else. 
+	*/
+	bool sendResult(ExperimentData& result);
+};
+
+} // end-of-namespace: fail
+
+#endif // __JOB_CLIENT_H__