jobserver: improved job retry

FIXME: linear complexity not really hurting here, but not pretty git-svn-id: https://www4.informatik.uni-erlangen.de/i4svn/danceos/trunk/devel/fail@1312 8c4709b5-6ec9-48aa-a5cd-a96041d1645a
2012-06-06 14:27:23 +00:00
parent 08ec3548af
commit cfa6153b56
2 changed files with 37 additions and 17 deletions
--- a/core/jobserver/JobServer.cc
+++ b/core/jobserver/JobServer.cc
@ -244,12 +244,12 @@ void CommThread::sendPendingExperimentData(Minion& minion)
  #ifndef __puma
 	boost::unique_lock<boost::mutex> lock(m_CommMutex);
  #endif
-	if((exp = m_js.m_runningJobs.first()) != NULL) { // 2nd priority
-		// (This simply gets the first running-job.)
-		// TODO: Improve selection of parameter-set to be resend (the first is not
-		//       necessarily the best...especially when the specific parameter-set
-		//       causes the experiment-client to terminate abnormally -> endless loop!)
-		//       Further ideas: sequential, random, ...? (+ "retry-counter" for each job)
+	if((exp = m_js.m_runningJobs.pickone()) != NULL) { // 2nd priority
+		// (This picks one running job.)
+		// TODO: Improve selection of parameter set to be resent:
+		//  -  currently: Linear complexity!
+		//  -  pick entry at random?
+		//  -  retry counter for each job?

 		// Implement resend of running-parameter sets to improve campaign speed
 		// and to prevent result loss due to (unexpected) termination of experiment
--- a/core/util/SynchronizedMap.hpp
+++ b/core/util/SynchronizedMap.hpp
@ -20,26 +20,46 @@ private:
 #ifndef __puma
 	boost::mutex m_mutex;				// The mutex to synchronise on
 #endif
-	public:
-		int Size(){
+
+	int nextpick;
+	enum { pick_window_size = 500 };
+
+public:
+	SynchronizedMap() : nextpick(0) {}
+	int Size(){
 #ifndef __puma
-			boost::unique_lock<boost::mutex> lock(m_mutex);
+		boost::unique_lock<boost::mutex> lock(m_mutex);
 #endif
-			return m_map.size();
-		}
+		return m_map.size();
+	}
 	/**
-	 * Retrieves the first element in the map.
-	 * @return a pointer to the first element, or \c NULL if empty
+	 * Retrieves one element from the map from a small window at the beginning.
+	 * @return a pointer to the element, or \c NULL if empty
 	 */
-	Tvalue first()
+	Tvalue pickone()
 	{
 	  #ifndef __puma
 		boost::unique_lock<boost::mutex> lock(m_mutex);
 	  #endif
-		if(m_map.size() > 0)
-			return m_map.begin()->second;
-		else
+		if (m_map.size() == 0) {
 			return NULL;
+		}
+
+		// XXX not really elegant: linear complexity
+		typename Tmap::iterator it = m_map.begin();
+		for (int i = 0; i < nextpick; ++i) {
+			++it;
+			if (it == m_map.end()) {
+				it = m_map.begin();
+				nextpick = 0;
+				break;
+			}
+		}
+		++nextpick;
+		if (nextpick >= pick_window_size) {
+			nextpick = 0;
+		}
+		return it->second;
 	} // Lock is automatically released here
 	/**
 	 * Add data to the map, return false if already present