diff --git a/core/jobserver/JobClient.cc b/core/jobserver/JobClient.cc index 47c0b2d3..8c975873 100644 --- a/core/jobserver/JobClient.cc +++ b/core/jobserver/JobClient.cc @@ -12,7 +12,7 @@ JobClient::JobClient(std::string server, int port) perror("[Client@gethostbyname()]"); exit(1); } - srand(time(NULL)); + srand(time(NULL)); // needed for random backoff (see connectToServer) } bool JobClient::connectToServer() @@ -34,22 +34,25 @@ bool JobClient::connectToServer() memcpy(&serv_addr.sin_addr.s_addr, m_server_ent->h_addr, m_server_ent->h_length); serv_addr.sin_port = htons(m_server_port); - int retries = 3; + int retries = RETRY_COUNT; while(true) { if(connect(m_sockfd, (sockaddr *)&serv_addr, sizeof(serv_addr)) < 0) { perror("[Client@connect()]"); if(retries > 0) { - int delay = rand() % 5 + 3; + // Wait RAND_BACKOFF_TSTART to RAND_BACKOFF_TEND seconds: + int delay = rand() % (RAND_BACKOFF_TEND-RAND_BACKOFF_TSTART) + RAND_BACKOFF_TSTART; cout << "[Client] Retrying to connect to server in " << delay << "s..." << endl; sleep(delay); --retries; continue; } + cout << "|Client] Unable to reconnect (tried " << RETRY_COUNT << " times); " + << "I'll give it up!" << endl; return false; // finally: unable to connect, give it up :-( } break; // connected! :-) } - cout << "[Client] Connected established!" << endl; + cout << "[Client] Connection established!" << endl; return true; } diff --git a/core/jobserver/JobClient.hpp b/core/jobserver/JobClient.hpp index e9e91018..e17f0df7 100644 --- a/core/jobserver/JobClient.hpp +++ b/core/jobserver/JobClient.hpp @@ -17,6 +17,11 @@ #include "controller/ExperimentData.hpp" #include "jobserver/messagedefs/FailControlMessage.pb.h" +// FIXME This should be part of a "client config" (?). +#define RAND_BACKOFF_TSTART 3 +#define RAND_BACKOFF_TEND 8 +#define RETRY_COUNT 3 + namespace fi { /** diff --git a/core/jobserver/SocketComm.hpp b/core/jobserver/SocketComm.hpp index 5863c39a..86b59361 100644 --- a/core/jobserver/SocketComm.hpp +++ b/core/jobserver/SocketComm.hpp @@ -32,7 +32,7 @@ public: /** * Receive Protobuf-generated message - * @param sockfd open socket descriptor to write to + * @param sockfd open socket descriptor to read from * @param Msg Reference to Protobuf generated message type * \return false if message reception failed */