jobserver: improved job retry

FIXME: linear complexity not really hurting here, but not pretty git-svn-id: https://www4.informatik.uni-erlangen.de/i4svn/danceos/trunk/devel/fail@1312 8c4709b5-6ec9-48aa-a5cd-a96041d1645a
2012-06-06 14:27:23 +00:00
parent 08ec3548af
commit cfa6153b56
2 changed files with 37 additions and 17 deletions
--- a/core/jobserver/JobServer.cc
+++ b/core/jobserver/JobServer.cc
@ -244,12 +244,12 @@ void CommThread::sendPendingExperimentData(Minion& minion)
  #ifndef __puma
 	boost::unique_lock<boost::mutex> lock(m_CommMutex);
  #endif
-	if((exp = m_js.m_runningJobs.first()) != NULL) { // 2nd priority
-		// (This simply gets the first running-job.)
-		// TODO: Improve selection of parameter-set to be resend (the first is not
-		//       necessarily the best...especially when the specific parameter-set
-		//       causes the experiment-client to terminate abnormally -> endless loop!)
-		//       Further ideas: sequential, random, ...? (+ "retry-counter" for each job)
+	if((exp = m_js.m_runningJobs.pickone()) != NULL) { // 2nd priority
+		// (This picks one running job.)
+		// TODO: Improve selection of parameter set to be resent:
+		//  -  currently: Linear complexity!
+		//  -  pick entry at random?
+		//  -  retry counter for each job?

 		// Implement resend of running-parameter sets to improve campaign speed
 		// and to prevent result loss due to (unexpected) termination of experiment