From 5567c595fb4e6a5c82a60c9c07cbaa7719600a89 Mon Sep 17 00:00:00 2001 From: Florian Lukas Date: Wed, 12 Mar 2014 13:34:32 +0100 Subject: [PATCH] DatabaseCampaign: experiment completion checks If the queue for outbound jobs is not unlimited, experiment rows are fetched from the DB server continuously as experiments finish. When this takes too long the connection to the DB server can be lost. The code did not check for a mysql_error and assumed the result set was fetched completely, thus skipping a potentially large amount of experiments (in our case only ~20000 of 400000+ experiments were run). This change adds checks to determine if the result fetch loop was finished due to an error and checks the sent pilot count to the unfinished experiment count. Additionally, the mysql result object is correctly freed. The underlying problem of MySQL connection loss can hopefully be prevented by increasing timeouts in the MySQL config as described in doc/how-to-build.txt. To prevent the problem from occurring when this is forgotten, this change reverts the default job queue length to be unlimited (SERVER_OUT_QUEUE_SIZE=0), at the cost of increased memory usage. Change-Id: I09d9faddd8190c6dd5fbe733a0679a733d5837ec --- src/core/config/CMakeLists.txt | 2 +- src/core/cpn/DatabaseCampaign.cc | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/core/config/CMakeLists.txt b/src/core/config/CMakeLists.txt index dc30d259..a2b181b6 100644 --- a/src/core/config/CMakeLists.txt +++ b/src/core/config/CMakeLists.txt @@ -25,7 +25,7 @@ OPTION(CONFIG_FAST_BREAKPOINTS "Enable fast breakpoints (requires break OPTION(CONFIG_FAST_WATCHPOINTS "Enable fast watchpoints (requires memory access events to be enabled)" OFF) SET(SERVER_COMM_HOSTNAME "localhost" CACHE STRING "Job-server hostname or IP") SET(SERVER_COMM_TCP_PORT "1111" CACHE STRING "Job-server TCP port") -SET(SERVER_OUT_QUEUE_SIZE "10000" CACHE STRING "Queue size for outbound jobs (0 = unlimited)") +SET(SERVER_OUT_QUEUE_SIZE "0" CACHE STRING "Queue size for outbound jobs (0 = unlimited)") SET(SERVER_PERF_LOG_PATH "perf.log" CACHE STRING "A file name for storing the server's performance log (CSV)") SET(SERVER_PERF_STEPPING_SEC "1" CACHE STRING "Stepping of performance measurements in seconds") SET(CLIENT_RAND_BACKOFF_TSTART "3" CACHE STRING "Lower limit of client's backoff phase in seconds") diff --git a/src/core/cpn/DatabaseCampaign.cc b/src/core/cpn/DatabaseCampaign.cc index b9832275..5e32d81d 100644 --- a/src/core/cpn/DatabaseCampaign.cc +++ b/src/core/cpn/DatabaseCampaign.cc @@ -200,6 +200,16 @@ bool DatabaseCampaign::run_variant(Database::Variant variant) { log_send << "pushed " << sent_pilots << " pilots into the queue" << std::endl; } } + + if (*mysql_error(db->getHandle())) { + log_send << "MYSQL ERROR: " << mysql_error(db->getHandle()) << std::endl; + return false; + } + + assert(experiment_count == sent_pilots && "ERROR: not all unfinished experiments pushed to queue"); + + mysql_free_result(pilots); + return true; }