core/efw: send back results earlier

The client sends results back earlier (i.e., before all jobs are
done) if the client response time (CLIENT_JOB_REQUEST_SEC) is
exceeded. This makes sure that extraordinarily long-running
experiments get reported back before, e.g., the LIDO job timeout
kills the Fail* instance.

Change-Id: I3ada0360ec54b63f80a7008570ca514449720220
This commit is contained in:
Richard Hellwig
2013-06-12 15:51:07 +02:00
parent eeeeb92ee7
commit 12f9915d1c
2 changed files with 17 additions and 2 deletions

View File

@ -182,12 +182,23 @@ bool JobClient::sendResult(ExperimentData& result)
m_results.push_back( temp_exp );
if (m_parameters.size() != 0) {
//If job request time is over send back all existing results
if (CLIENT_JOB_REQUEST_SEC < (double)m_job_runtime) {
m_job_runtime_total += (double) m_job_runtime;
m_job_runtime.reset();
m_job_runtime.startTimer();
m_job_total += m_results.size();
sendResultsToServer();
}
//If there are more jobs for the experiment store result
return true;
} else {
//Stop time measurement and calculate new throughput
m_job_runtime.stopTimer();
m_job_throughput = 0.5 * m_job_throughput + 0.5*(CLIENT_JOB_REQUEST_SEC/((double)m_job_runtime/m_results.size()));
m_job_runtime_total += (double) m_job_runtime;
m_job_total += m_results.size();
m_job_throughput = 0.5 * m_job_throughput + 0.5*(CLIENT_JOB_REQUEST_SEC/(m_job_runtime_total/m_job_total));
if (m_job_throughput > CLIENT_JOB_LIMIT) {
m_job_throughput = CLIENT_JOB_LIMIT;
@ -195,8 +206,10 @@ bool JobClient::sendResult(ExperimentData& result)
m_job_throughput = 1;
}
//Reset timer for new time measurement
//Timer/Counter cleanup
m_job_runtime.reset();
m_job_runtime_total = 0;
m_job_total = 0;
return sendResultsToServer();
}

View File

@ -31,7 +31,9 @@ private:
uint64_t m_server_runid;
WallclockTimer m_job_runtime;
double m_job_runtime_total;
int m_job_throughput;
int m_job_total;
std::deque<ExperimentData*> m_parameters;
std::deque<ExperimentData*> m_results;