JobServer: print completion percentage and ETA

If the JobServer is provided a total number of experiments by the
campaign, it now prints a completion percentage and an estimated
remaining runtime along the usual progress reports.

Change-Id: Ibd781ba8bff9af3a85683bbd29728216e316da57
This commit is contained in:
Horst Schirmeier
2018-08-03 18:24:36 +02:00
parent f89794329c
commit a547b0d5b4
5 changed files with 46 additions and 1 deletions

View File

@ -42,5 +42,15 @@ void CampaignManager::noMoreParameters()
m_jobserver->setNoMoreExperiments();
}
void CampaignManager::setTotalCount(uint64_t count)
{
m_jobserver->setTotalCount(count);
}
void CampaignManager::skipJobs(uint64_t count)
{
m_jobserver->skipJobs(count);
}
void CampaignManager::done() { m_jobserver->done(); }
} // end-of-namespace: fail

View File

@ -64,6 +64,15 @@ public:
* Signal, that there will not come any further parameter sets.
*/
void noMoreParameters();
/**
* Can optionally be used to tell the JobServer how many jobs to expect in
* total. This count is used for progress reporting. Make sure you also
* call skipJobs() if some of these early-on announced jobs will not be
* sent after all (e.g. because the campaign already found results for them
* in the database).
*/
void setTotalCount(uint64_t count);
void skipJobs(uint64_t count);
/**
* User campaign has finished.
*/

View File

@ -215,6 +215,7 @@ bool DatabaseCampaign::run_variant(Database::Variant variant) {
log_send << "Found " << experiment_count << " jobs in database. ("
<< variant.variant << "/" << variant.benchmark << ")" << std::endl;
campaignmanager.setTotalCount(experiment_count);
// abstraction of injection point:
// must not be initialized in loop, because hop chain calculator would lose
@ -229,6 +230,7 @@ bool DatabaseCampaign::run_variant(Database::Variant variant) {
unsigned pilot_id = strtoul(row[0], NULL, 10);
if (existing_results_for_pilot(pilot_id) == expected_results) {
skipped_pilots++;
campaignmanager.skipJobs(1);
continue;
}

View File

@ -276,9 +276,9 @@ void CommThread::print_progress(const enum ProgressType type,
const uint32_t w_id, const uint32_t count)
{
using namespace std::chrono;
static system_clock::time_point last;
const auto now = system_clock::now();
const auto delay = milliseconds{500};
static system_clock::time_point last = system_clock::now() - delay;
if (last + delay > now) {
return;
@ -297,6 +297,19 @@ void CommThread::print_progress(const enum ProgressType type,
<< std::setw(6) << m_js.m_runningJobs.Size() << " run/"
<< std::setw(6) << donecount_cur << " tot/ ("
<< std::setw(6) << std::setprecision(1) << std::fixed << rate << "/s) ";
if (m_js.m_TotalCount) {
float percentage = (float) donecount_cur / *m_js.m_TotalCount * 100.0;
std::cout << std::setw(4) << std::setprecision(1) << std::fixed << percentage << "%";
if (rate > 0) {
float ETA_s = std::max(.0f, (*m_js.m_TotalCount - donecount_cur) / rate);
std::cout << " (ETA " << std::dec
<< std::setw(2) << std::setfill('0') << ((int64_t)ETA_s / (60*60)) << ':'
<< std::setw(2) << std::setfill('0') << (((int64_t)ETA_s / 60) % 60) << ':'
<< std::setw(2) << std::setfill('0') << ((int64_t)ETA_s % 60)
<< std::setfill(' ') << ')';
}
std::cout << " ";
}
const char *sep;
if (type == ProgressType::Send) {
sep = " >";

View File

@ -16,6 +16,7 @@
#ifndef __puma
#include <boost/thread.hpp>
#endif
#include <boost/optional.hpp>
namespace fail {
@ -52,6 +53,7 @@ private:
uint64_t m_runid;
volatile uint64_t m_DoneCount = 0; //! the number of finished jobs
boost::optional<uint64_t> m_TotalCount; //! the total number of jobs to be expected
#ifdef SERVER_PERFORMANCE_MEASURE
#ifndef __puma
boost::thread* m_measureThread; //! the performance measurement thread
@ -114,6 +116,15 @@ public:
* distribution.
*/
void setNoMoreExperiments();
/**
* Can optionally be used to tell the JobServer how many jobs to expect in
* total. This count is used for progress reporting. Make sure you also
* call skipJobs() if some of these early-on announced jobs will not be
* sent after all (e.g. because the campaign already found results for them
* in the database).
*/
void setTotalCount(uint64_t count) { m_TotalCount = count; }
void skipJobs(uint64_t count) { ++m_DoneCount; /* FIXME assume atomic */ }
/**
* Checks whether there are no more experiment parameter sets.
* @return \c true if no more parameter sets available, \c false otherwise