Files
fail/src/core/efw/JobClient.cc
2012-11-30 16:50:02 +00:00

230 lines
6.4 KiB
C++

#include "JobClient.hpp"
using namespace std;
namespace fail {
JobClient::JobClient(const std::string& server, int port)
{
m_server_port = port;
m_server = server;
m_server_ent = gethostbyname(m_server.c_str());
if(m_server_ent == NULL) {
perror("[Client@gethostbyname()]");
// TODO: Log-level?
exit(1);
}
srand(time(NULL)); // needed for random backoff (see connectToServer)
m_server_runid = 0; // server accepts this for virgin clients
m_job_throughput = 1; // client gets only one job at the first request
}
bool JobClient::connectToServer()
{
// Connect to server
struct sockaddr_in serv_addr;
m_sockfd = socket(AF_INET, SOCK_STREAM, 0);
if(m_sockfd < 0) {
perror("[Client@socket()]");
// TODO: Log-level?
exit(0);
}
/* Enable address reuse */
int on = 1;
setsockopt( m_sockfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on) );
memset(&serv_addr, 0, sizeof(serv_addr));
serv_addr.sin_family = AF_INET;
memcpy(&serv_addr.sin_addr.s_addr, m_server_ent->h_addr, m_server_ent->h_length);
serv_addr.sin_port = htons(m_server_port);
int retries = CLIENT_RETRY_COUNT;
while (true) {
if (connect(m_sockfd, (sockaddr *)&serv_addr, sizeof(serv_addr)) < 0) {
perror("[Client@connect()]");
// TODO: Log-level?
if (retries > 0) {
// Wait CLIENT_RAND_BACKOFF_TSTART to RAND_BACKOFF_TEND seconds:
int delay = rand() % (CLIENT_RAND_BACKOFF_TEND-CLIENT_RAND_BACKOFF_TSTART) + CLIENT_RAND_BACKOFF_TSTART;
cout << "[Client] Retrying to connect to server in ~" << delay << "s..." << endl;
// TODO: Log-level?
sleep(delay);
usleep(rand() % 1000000);
--retries;
continue;
}
cout << "[Client] Unable to reconnect (tried " << CLIENT_RETRY_COUNT << " times); "
<< "I'll give it up!" << endl;
// TODO: Log-level?
return false; // finally: unable to connect, give it up :-(
}
break; // connected! :-)
}
cout << "[Client] Connection established!" << endl;
// TODO: Log-level?
return true;
}
bool JobClient::getParam(ExperimentData& exp)
{
while (1) { // Here we try to acquire a parameter set
switch (tryToGetExperimentData(exp)) {
// Jobserver will sent workload, params are set in \c exp
case FailControlMessage::WORK_FOLLOWS:
return true;
// Nothing to do right now, but maybe later
case FailControlMessage::COME_AGAIN:
sleep(1);
continue;
default:
return false;
}
}
}
FailControlMessage_Command JobClient::tryToGetExperimentData(ExperimentData& exp)
{
//Are there other jobs for the experiment
if (m_parameters.size() != 0) {
exp.getMessage().CopyFrom(m_parameters.front()->getMessage());
exp.setWorkloadID(m_parameters.front()->getWorkloadID());
delete &m_parameters.front()->getMessage();
delete m_parameters.front();
m_parameters.erase(m_parameters.begin());
return FailControlMessage::WORK_FOLLOWS;
} else {
FailControlMessage ctrlmsg;
// Connection failed, minion can die
if (!connectToServer()) {
return FailControlMessage::DIE;
}
// Retrieve ExperimentData
ctrlmsg.set_command(FailControlMessage::NEED_WORK);
ctrlmsg.set_build_id(42);
ctrlmsg.set_run_id(m_server_runid);
ctrlmsg.set_job_size(m_job_throughput); //Request for a number of jobs
if (!SocketComm::sendMsg(m_sockfd, ctrlmsg)) {
// Failed to send message? Retry.
close(m_sockfd);
return FailControlMessage::COME_AGAIN;
}
ctrlmsg.Clear();
if (!SocketComm::rcvMsg(m_sockfd, ctrlmsg)) {
// Failed to receive message? Retry.
close(m_sockfd);
return FailControlMessage::COME_AGAIN;
}
// now we know the current run ID
m_server_runid = ctrlmsg.run_id();
switch (ctrlmsg.command()) {
case FailControlMessage::WORK_FOLLOWS:
uint32_t i;
for (i = 0 ; i < ctrlmsg.job_size() ; i++) {
ExperimentData* temp_exp = new ExperimentData(exp.getMessage().New());
if (!SocketComm::rcvMsg(m_sockfd, temp_exp->getMessage())) {
// Failed to receive message? Retry.
close(m_sockfd);
return FailControlMessage::COME_AGAIN;
}
temp_exp->setWorkloadID(ctrlmsg.workloadid(i)); //Store workload id of experiment data
m_parameters.push_back(temp_exp);
}
break;
case FailControlMessage::COME_AGAIN:
break;
default:
break;
}
close(m_sockfd);
//Take front from m_parameters and copy to exp.
exp.getMessage().CopyFrom(m_parameters.front()->getMessage());
exp.setWorkloadID(m_parameters.front()->getWorkloadID());
//Delete front element of m_parameters
delete &m_parameters.front()->getMessage();
delete m_parameters.front();
m_parameters.erase(m_parameters.begin());
//start time measurement for throughput calculation
m_job_runtime.startTimer();
return ctrlmsg.command();
}
}
bool JobClient::sendResult(ExperimentData& result)
{
//Create new ExperimentData for result
ExperimentData* temp_exp = new ExperimentData(result.getMessage().New());
temp_exp->getMessage().CopyFrom(result.getMessage());
temp_exp->setWorkloadID(result.getWorkloadID());
if (m_parameters.size() != 0) {
//If there are more jobs for the experiment store result
m_results.push_back( temp_exp );
return true;
} else {
m_results.push_back( temp_exp );
//Stop time measurement and calculate new throughput
m_job_runtime.stopTimer();
m_job_throughput = CLIENT_JOB_REQUEST_SEC/((double)m_job_runtime/m_results.size());
if (m_job_throughput > CLIENT_JOB_LIMIT_SEC)
m_job_throughput = CLIENT_JOB_LIMIT_SEC;
if (m_job_throughput < 1)
m_job_throughput = 1;
//Reset timer for new time measurement
m_job_runtime.reset();
if (!connectToServer())
return false;
//Send back results
FailControlMessage ctrlmsg;
ctrlmsg.set_command(FailControlMessage::RESULT_FOLLOWS);
ctrlmsg.set_build_id(42);
ctrlmsg.set_run_id(m_server_runid);
ctrlmsg.set_job_size(m_results.size()); //Store how many results will be sent
cout << "[Client] Sending back result [";
uint32_t i;
for (i = 0; i < m_results.size() ; i++) {
ctrlmsg.add_workloadid(m_results[i]->getWorkloadID());
cout << std::dec << m_results[i]->getWorkloadID();
cout << " ";
}
cout << "]";
// TODO: Log-level?
SocketComm::sendMsg(m_sockfd, ctrlmsg);
for (i = 0; i < ctrlmsg.job_size() ; i++) {
SocketComm::sendMsg(m_sockfd, m_results.front()->getMessage());
delete &m_results.front()->getMessage();
delete m_results.front();
m_results.erase(m_results.begin());
}
// Close connection.
close(m_sockfd);
return true;
}
}
} // end-of-namespace: fail