Moved client- and server-settings to CMAKE-config.
git-svn-id: https://www4.informatik.uni-erlangen.de/i4svn/danceos/trunk/devel/fail@1248 8c4709b5-6ec9-48aa-a5cd-a96041d1645a
This commit is contained in:
@ -16,7 +16,13 @@ OPTION(CONFIG_STFU "Misc: Reduced verbosity" OFF)
|
|||||||
OPTION(CONFIG_SUPPRESS_INTERRUPTS "Target backend: Suppress interrupts" OFF)
|
OPTION(CONFIG_SUPPRESS_INTERRUPTS "Target backend: Suppress interrupts" OFF)
|
||||||
OPTION(CONFIG_FIRE_INTERRUPTS "Target backend: Fire interrupts" OFF)
|
OPTION(CONFIG_FIRE_INTERRUPTS "Target backend: Fire interrupts" OFF)
|
||||||
OPTION(CONFIG_DISABLE_KEYB_INTERRUPTS "Target backend: Suppress keyboard interrupts" OFF)
|
OPTION(CONFIG_DISABLE_KEYB_INTERRUPTS "Target backend: Suppress keyboard interrupts" OFF)
|
||||||
OPTION(CONFIG_FI_MEM_ACCESS_BITFLIP "deprecated something" OFF)
|
OPTION(CONFIG_FI_MEM_ACCESS_BITFLIP "Deprecated something" OFF)
|
||||||
|
OPTION(SERVER_PERFORMANCE_MEASURE "Performance measurement in job-server" OFF)
|
||||||
|
SET(SERVER_PERF_LOG_PATH "perf.log" CACHE STRING "A file name for storing the server's performance log (CSV)")
|
||||||
|
SET(SERVER_PERF_STEPPING_SEC "1" CACHE STRING "Stepping of performance measurements in seconds")
|
||||||
|
SET(CLIENT_RAND_BACKOFF_TSTART "3" CACHE STRING "Lower limit of client's backoff phase in seconds")
|
||||||
|
SET(CLIENT_RAND_BACKOFF_TEND "8" CACHE STRING "Upper limit of client's backoff phase in seconds")
|
||||||
|
SET(CLIENT_RETRY_COUNT "3" CACHE STRING "Client's number of reconnect-reties")
|
||||||
|
|
||||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/FailConfig.hpp.in
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/FailConfig.hpp.in
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/FailConfig.hpp)
|
${CMAKE_CURRENT_BINARY_DIR}/FailConfig.hpp)
|
||||||
|
|||||||
@ -19,11 +19,17 @@
|
|||||||
#cmakedefine CONFIG_SR_SAVE
|
#cmakedefine CONFIG_SR_SAVE
|
||||||
#cmakedefine CONFIG_SR_REBOOT
|
#cmakedefine CONFIG_SR_REBOOT
|
||||||
|
|
||||||
// Miscellaneous
|
// Fail configuration
|
||||||
#cmakedefine CONFIG_STFU
|
#cmakedefine CONFIG_STFU
|
||||||
#cmakedefine CONFIG_SUPPRESS_INTERRUPTS
|
#cmakedefine CONFIG_SUPPRESS_INTERRUPTS
|
||||||
#cmakedefine CONFIG_FIRE_INTERRUPTS
|
#cmakedefine CONFIG_FIRE_INTERRUPTS
|
||||||
#cmakedefine CONFIG_DISABLE_KEYB_INTERRUPTS
|
#cmakedefine CONFIG_DISABLE_KEYB_INTERRUPTS
|
||||||
|
#cmakedefine SERVER_PERFORMANCE_MEASURE
|
||||||
|
#cmakedefine SERVER_PERF_LOG_PATH "@SERVER_PERF_LOG_PATH@"
|
||||||
|
#cmakedefine SERVER_PERF_STEPPING_SEC @SERVER_PERF_STEPPING_SEC@
|
||||||
|
#cmakedefine CLIENT_RAND_BACKOFF_TSTART @CLIENT_RAND_BACKOFF_TSTART@
|
||||||
|
#cmakedefine CLIENT_RAND_BACKOFF_TEND @CLIENT_RAND_BACKOFF_TEND@
|
||||||
|
#cmakedefine CLIENT_RETRY_COUNT @CLIENT_RETRY_COUNT@
|
||||||
|
|
||||||
// Fault injection
|
// Fault injection
|
||||||
#cmakedefine CONFIG_FI_MEM_ACCESS_BITFLIP
|
#cmakedefine CONFIG_FI_MEM_ACCESS_BITFLIP
|
||||||
|
|||||||
@ -34,20 +34,20 @@ bool JobClient::connectToServer()
|
|||||||
memcpy(&serv_addr.sin_addr.s_addr, m_server_ent->h_addr, m_server_ent->h_length);
|
memcpy(&serv_addr.sin_addr.s_addr, m_server_ent->h_addr, m_server_ent->h_length);
|
||||||
serv_addr.sin_port = htons(m_server_port);
|
serv_addr.sin_port = htons(m_server_port);
|
||||||
|
|
||||||
int retries = RETRY_COUNT;
|
int retries = CLIENT_RETRY_COUNT;
|
||||||
while(true) {
|
while(true) {
|
||||||
if(connect(m_sockfd, (sockaddr *)&serv_addr, sizeof(serv_addr)) < 0) {
|
if(connect(m_sockfd, (sockaddr *)&serv_addr, sizeof(serv_addr)) < 0) {
|
||||||
perror("[Client@connect()]");
|
perror("[Client@connect()]");
|
||||||
if(retries > 0) {
|
if(retries > 0) {
|
||||||
// Wait RAND_BACKOFF_TSTART to RAND_BACKOFF_TEND seconds:
|
// Wait CLIENT_RAND_BACKOFF_TSTART to RAND_BACKOFF_TEND seconds:
|
||||||
int delay = rand() % (RAND_BACKOFF_TEND-RAND_BACKOFF_TSTART) + RAND_BACKOFF_TSTART;
|
int delay = rand() % (CLIENT_RAND_BACKOFF_TEND-CLIENT_RAND_BACKOFF_TSTART) + CLIENT_RAND_BACKOFF_TSTART;
|
||||||
cout << "[Client] Retrying to connect to server in ~" << delay << "s..." << endl;
|
cout << "[Client] Retrying to connect to server in ~" << delay << "s..." << endl;
|
||||||
sleep(delay);
|
sleep(delay);
|
||||||
usleep(rand() % 1000000);
|
usleep(rand() % 1000000);
|
||||||
--retries;
|
--retries;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
cout << "[Client] Unable to reconnect (tried " << RETRY_COUNT << " times); "
|
cout << "[Client] Unable to reconnect (tried " << CLIENT_RETRY_COUNT << " times); "
|
||||||
<< "I'll give it up!" << endl;
|
<< "I'll give it up!" << endl;
|
||||||
return false; // finally: unable to connect, give it up :-(
|
return false; // finally: unable to connect, give it up :-(
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,26 +1,16 @@
|
|||||||
/**
|
|
||||||
* \brief The Minion's JobClient requests ExperimentData and returns results.
|
|
||||||
*
|
|
||||||
* \author Martin Hoffmann
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef __JOB_CLIENT_H__
|
#ifndef __JOB_CLIENT_H__
|
||||||
#define __JOB_CLIENT_H__
|
#define __JOB_CLIENT_H__
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <ctime>
|
#include <ctime>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#include "SocketComm.hpp"
|
#include "SocketComm.hpp"
|
||||||
#include "controller/ExperimentData.hpp"
|
#include "controller/ExperimentData.hpp"
|
||||||
#include "jobserver/messagedefs/FailControlMessage.pb.h"
|
#include "jobserver/messagedefs/FailControlMessage.pb.h"
|
||||||
|
#include "config/FailConfig.hpp"
|
||||||
// FIXME This should be part of a "client config" (?).
|
|
||||||
#define RAND_BACKOFF_TSTART 3
|
|
||||||
#define RAND_BACKOFF_TEND 8
|
|
||||||
#define RETRY_COUNT 3
|
|
||||||
|
|
||||||
namespace fi {
|
namespace fi {
|
||||||
|
|
||||||
@ -28,6 +18,7 @@ namespace fi {
|
|||||||
* \class JobClient
|
* \class JobClient
|
||||||
*
|
*
|
||||||
* \brief Manages communication with JobServer
|
* \brief Manages communication with JobServer
|
||||||
|
* The Minion's JobClient requests ExperimentData and returns results.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
class JobClient {
|
class JobClient {
|
||||||
@ -63,10 +54,6 @@ namespace fi {
|
|||||||
bool sendResult(ExperimentData& result);
|
bool sendResult(ExperimentData& result);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif // __JOB_CLIENT_H__
|
||||||
#endif
|
|
||||||
|
|||||||
@ -1,6 +1,3 @@
|
|||||||
// Author: Martin Hoffmann, Richard Hellwig, Adrian Böckenkamp
|
|
||||||
// Date: 07.10.11
|
|
||||||
|
|
||||||
// <iostream> needs to be included before *.pb.h, otherwise ac++/Puma chokes on the latter
|
// <iostream> needs to be included before *.pb.h, otherwise ac++/Puma chokes on the latter
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
@ -17,6 +14,7 @@
|
|||||||
#include "jobserver/messagedefs/FailControlMessage.pb.h"
|
#include "jobserver/messagedefs/FailControlMessage.pb.h"
|
||||||
#include "SocketComm.hpp"
|
#include "SocketComm.hpp"
|
||||||
#include "controller/Minion.hpp"
|
#include "controller/Minion.hpp"
|
||||||
|
|
||||||
#ifndef __puma
|
#ifndef __puma
|
||||||
#include <boost/thread.hpp>
|
#include <boost/thread.hpp>
|
||||||
#include <boost/date_time.hpp>
|
#include <boost/date_time.hpp>
|
||||||
@ -32,7 +30,9 @@ void JobServer::addParam(ExperimentData* exp){
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef SERVER_PERFORMANCE_MEASURE
|
||||||
volatile unsigned JobServer::m_DoneCount = 0;
|
volatile unsigned JobServer::m_DoneCount = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
ExperimentData *JobServer::getDone()
|
ExperimentData *JobServer::getDone()
|
||||||
{
|
{
|
||||||
@ -58,12 +58,12 @@ ExperimentData *JobServer::getDone()
|
|||||||
#ifdef SERVER_PERFORMANCE_MEASURE
|
#ifdef SERVER_PERFORMANCE_MEASURE
|
||||||
void JobServer::measure()
|
void JobServer::measure()
|
||||||
{
|
{
|
||||||
cout << "\n[Server] Logging throughput in \"" << PERFORMANCE_LOG_PATH << "\"..." << endl;
|
cout << "\n[Server] Logging throughput in \"" << SERVER_PERF_LOG_PATH << "\"..." << endl;
|
||||||
ofstream m_file(PERFORMANCE_LOG_PATH, std::ios::trunc); // overwrite existing perf-logs
|
ofstream m_file(SERVER_PERF_LOG_PATH, std::ios::trunc); // overwrite existing perf-logs
|
||||||
if(!m_file.is_open()) {
|
if(!m_file.is_open()) {
|
||||||
cerr << "[Server] Perf-logging has been enabled"
|
cerr << "[Server] Perf-logging has been enabled"
|
||||||
<< "but I was not able to write the log-file \""
|
<< "but I was not able to write the log-file \""
|
||||||
<< PERFORMANCE_LOG_PATH << "\"." << endl;
|
<< SERVER_PERF_LOG_PATH << "\"." << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
unsigned counter = 0;
|
unsigned counter = 0;
|
||||||
@ -73,9 +73,9 @@ void JobServer::measure()
|
|||||||
while(!m_finish) {
|
while(!m_finish) {
|
||||||
// Format: 1st column (seconds)[TAB]2nd column (throughput)
|
// Format: 1st column (seconds)[TAB]2nd column (throughput)
|
||||||
m_file << counter << "\t" << (m_DoneCount - diff) << endl;
|
m_file << counter << "\t" << (m_DoneCount - diff) << endl;
|
||||||
counter += PERFORMANCE_STEPPING_SEC;
|
counter += SERVER_PERF_STEPPING_SEC;
|
||||||
diff = m_DoneCount;
|
diff = m_DoneCount;
|
||||||
sleep(PERFORMANCE_STEPPING_SEC);
|
sleep(SERVER_PERF_STEPPING_SEC);
|
||||||
}
|
}
|
||||||
// NOTE: Summing up the values written in the 2nd column does not
|
// NOTE: Summing up the values written in the 2nd column does not
|
||||||
// necessarily yield the number of completed experiments/jobs
|
// necessarily yield the number of completed experiments/jobs
|
||||||
|
|||||||
@ -1,39 +1,28 @@
|
|||||||
/**
|
|
||||||
* \brief The JobServer supplies the Minions with ExperimentData's
|
|
||||||
* and receives the result data.
|
|
||||||
*
|
|
||||||
* \author Martin Hoffmann, Richard Hellwig, Adrian Böckenkamp
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef __JOB_SERVER_H__
|
#ifndef __JOB_SERVER_H__
|
||||||
#define __JOB_SERVER_H__
|
#define __JOB_SERVER_H__
|
||||||
|
|
||||||
#include "controller/Minion.hpp"
|
#include "controller/Minion.hpp"
|
||||||
#include "util/SynchronizedQueue.hpp"
|
#include "util/SynchronizedQueue.hpp"
|
||||||
#include "util/SynchronizedCounter.hpp"
|
#include "util/SynchronizedCounter.hpp"
|
||||||
#include "util/SynchronizedMap.hpp"
|
#include "util/SynchronizedMap.hpp"
|
||||||
|
#include "config/FailConfig.hpp"
|
||||||
|
|
||||||
#include <list>
|
#include <list>
|
||||||
#ifndef __puma
|
#ifndef __puma
|
||||||
#include <boost/thread.hpp>
|
#include <boost/thread.hpp>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// TODO: This should be part of a "server-config".
|
|
||||||
#define SERVER_PERFORMANCE_MEASURE
|
|
||||||
#define PERFORMANCE_LOG_PATH "perf.dat"
|
|
||||||
#define PERFORMANCE_STEPPING_SEC 1
|
|
||||||
|
|
||||||
namespace fi {
|
namespace fi {
|
||||||
|
|
||||||
class CommThread;
|
class CommThread;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \class JobServer
|
* \class JobServer
|
||||||
* Manages the campaigns parameter distributions.
|
* The server supplies the Minions with ExperimentData's and receives the result data.
|
||||||
* The Campaign Controller can add experiment parameter sets,
|
*
|
||||||
* which the Jobserver will distribute to requesting clients.
|
* Manages the campaigns parameter distributions. The Campaign Controller can add
|
||||||
* The campaign controller can wait for all results, or a timeout.
|
* experiment parameter sets, which the Jobserver will distribute to requesting
|
||||||
|
* clients. The campaign controller can wait for all results, or a timeout.
|
||||||
*/
|
*/
|
||||||
class JobServer
|
class JobServer
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user