core/efw: send back results earlier
The client sends results back earlier (i.e., before all jobs are done) if the client response time (CLIENT_JOB_REQUEST_SEC) is exceeded. This makes sure that extraordinarily long-running experiments get reported back before, e.g., the LIDO job timeout kills the Fail* instance. Change-Id: I3ada0360ec54b63f80a7008570ca514449720220
This commit is contained in:
@ -182,12 +182,23 @@ bool JobClient::sendResult(ExperimentData& result)
|
||||
m_results.push_back( temp_exp );
|
||||
|
||||
if (m_parameters.size() != 0) {
|
||||
//If job request time is over send back all existing results
|
||||
if (CLIENT_JOB_REQUEST_SEC < (double)m_job_runtime) {
|
||||
m_job_runtime_total += (double) m_job_runtime;
|
||||
m_job_runtime.reset();
|
||||
m_job_runtime.startTimer();
|
||||
m_job_total += m_results.size();
|
||||
sendResultsToServer();
|
||||
}
|
||||
|
||||
//If there are more jobs for the experiment store result
|
||||
return true;
|
||||
} else {
|
||||
//Stop time measurement and calculate new throughput
|
||||
m_job_runtime.stopTimer();
|
||||
m_job_throughput = 0.5 * m_job_throughput + 0.5*(CLIENT_JOB_REQUEST_SEC/((double)m_job_runtime/m_results.size()));
|
||||
m_job_runtime_total += (double) m_job_runtime;
|
||||
m_job_total += m_results.size();
|
||||
m_job_throughput = 0.5 * m_job_throughput + 0.5*(CLIENT_JOB_REQUEST_SEC/(m_job_runtime_total/m_job_total));
|
||||
|
||||
if (m_job_throughput > CLIENT_JOB_LIMIT) {
|
||||
m_job_throughput = CLIENT_JOB_LIMIT;
|
||||
@ -195,8 +206,10 @@ bool JobClient::sendResult(ExperimentData& result)
|
||||
m_job_throughput = 1;
|
||||
}
|
||||
|
||||
//Reset timer for new time measurement
|
||||
//Timer/Counter cleanup
|
||||
m_job_runtime.reset();
|
||||
m_job_runtime_total = 0;
|
||||
m_job_total = 0;
|
||||
|
||||
return sendResultsToServer();
|
||||
}
|
||||
|
||||
@ -31,7 +31,9 @@ private:
|
||||
uint64_t m_server_runid;
|
||||
|
||||
WallclockTimer m_job_runtime;
|
||||
double m_job_runtime_total;
|
||||
int m_job_throughput;
|
||||
int m_job_total;
|
||||
std::deque<ExperimentData*> m_parameters;
|
||||
std::deque<ExperimentData*> m_results;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user