From 6f98d64613d0c2d72b57eb82d586ec9cfa7c1132 Mon Sep 17 00:00:00 2001 From: hellwig Date: Mon, 12 Nov 2012 11:46:26 +0000 Subject: [PATCH] bugfix: racecondition removed git-svn-id: https://www4.informatik.uni-erlangen.de/i4svn/danceos/trunk/devel/fail@1921 8c4709b5-6ec9-48aa-a5cd-a96041d1645a --- src/core/cpn/JobServer.cc | 33 +++++++++++++++------------------ src/core/cpn/JobServer.hpp | 7 ++++--- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/src/core/cpn/JobServer.cc b/src/core/cpn/JobServer.cc index d04d99d8..dba6ecc3 100644 --- a/src/core/cpn/JobServer.cc +++ b/src/core/cpn/JobServer.cc @@ -27,6 +27,7 @@ void JobServer::addParam(ExperimentData* exp) { #ifndef __puma m_undoneJobs.Enqueue(exp); + m_inOutCounter.increment(); #endif } @@ -34,24 +35,28 @@ void JobServer::addParam(ExperimentData* exp) volatile unsigned JobServer::m_DoneCount = 0; #endif +#ifndef __puma +boost::mutex CommThread::m_CommMutex; +#endif + ExperimentData *JobServer::getDone() { - // FIXME race condition, need to synchronize with - // sendPendingExperimentData() and receiveExperimentResults() + #ifndef __puma + boost::unique_lock lock(CommThread::m_CommMutex); + if (m_undoneJobs.Size() == 0 && noMoreExperiments() && m_runningJobs.Size() == 0 - && m_doneJobs.Size() == 0) { - // FRICKEL workaround - sleep(1); - ExperimentData *exp = NULL; - if (m_doneJobs.Dequeue_nb(exp)) { - return exp; - } + && m_doneJobs.Size() == 0 + && m_inOutCounter.getValue() == 0) { return 0; } - return m_doneJobs.Dequeue(); + + ExperimentData *exp = NULL; + exp = m_doneJobs.Dequeue(); + m_inOutCounter.decrement(); + return exp; #endif } @@ -232,10 +237,6 @@ void CommThread::operator()() close(m_sock); } -#ifndef __puma -boost::mutex CommThread::m_CommMutex; -#endif // __puma - void CommThread::sendPendingExperimentData(Minion& minion) { FailControlMessage ctrlmsg; @@ -297,10 +298,6 @@ void CommThread::sendPendingExperimentData(Minion& minion) void CommThread::receiveExperimentResults(Minion& minion, uint32_t workloadID) { -#ifndef __puma - boost::unique_lock lock(m_CommMutex); -#endif - ExperimentData* exp = NULL; // Get exp* from running jobs //cout << "<<[Server] Received result for workload id [" << workloadID << "]" << endl; cout << "<<[" << workloadID << "] " << flush; diff --git a/src/core/cpn/JobServer.hpp b/src/core/cpn/JobServer.hpp index 8b80c5e7..65b5b3b3 100644 --- a/src/core/cpn/JobServer.hpp +++ b/src/core/cpn/JobServer.hpp @@ -55,6 +55,7 @@ private: boost::thread* m_measureThread; //! the performance measurement thread #endif #endif + SynchronizedCounter m_inOutCounter; //! Atomic counter for Workload IDs. SynchronizedCounter m_counter; //! Map of running jobs (referenced by Workload ID @@ -139,9 +140,6 @@ class CommThread { private: int m_sock; //! Socket descriptor of the connection JobServer& m_js; //! Calling jobserver -#ifndef __puma - static boost::mutex m_CommMutex; //! to synchronise the communication -#endif // __puma // FIXME: Concerns are not really separated yet ;) /** @@ -160,6 +158,9 @@ private: */ void receiveExperimentResults(Minion& minion, uint32_t workloadID); public: +#ifndef __puma + static boost::mutex m_CommMutex; //! to synchronise the communication +#endif // __puma CommThread(int sockfd, JobServer& p) : m_sock(sockfd), m_js(p) { } /**