diff --git a/src/core/cpn/DatabaseCampaign.cc b/src/core/cpn/DatabaseCampaign.cc index b4a7a55e..e31b3a78 100644 --- a/src/core/cpn/DatabaseCampaign.cc +++ b/src/core/cpn/DatabaseCampaign.cc @@ -1,3 +1,5 @@ +#include + #include "DatabaseCampaign.hpp" #include "cpn/CampaignManager.hpp" #include "util/CommandLine.hpp" @@ -28,12 +30,21 @@ bool DatabaseCampaign::run() { line interface */ if (!cb_commandline_init()) return false; - CommandLine::option_handle VARIANT = cmd.addOption("v", "variant", Arg::Required, - "-v/--variant \tVariant label (default: \"none\"; use % and _ as wildcard characters)"); - CommandLine::option_handle BENCHMARK = cmd.addOption("b", "benchmark", Arg::Required, - "-b/--benchmark \tBenchmark label (default: \"none\"; use % and _ as wildcard characters)\n"); - CommandLine::option_handle PRUNER = cmd.addOption("p", "prune-method", Arg::Required, - "-p/--prune-method \tWhich import method to use (default: basic)"); + CommandLine::option_handle VARIANT = + cmd.addOption("v", "variant", Arg::Required, + "-v/--variant \tVariant label (default: \"%\"; use % and _ as wildcard characters; may be used more than once)"); + CommandLine::option_handle VARIANT_EXCLUDE = + cmd.addOption("", "variant-exclude", Arg::Required, + "--variant-exclude \tVariant to exclude (default: UNSET; use % and _ as wildcard characters; may be used more than once)"); + CommandLine::option_handle BENCHMARK = + cmd.addOption("b", "benchmark", Arg::Required, + "-b/--benchmark \tBenchmark label (default: \"%\"; use % and _ as wildcard characters; may be used more than once)"); + CommandLine::option_handle BENCHMARK_EXCLUDE = + cmd.addOption("", "benchmark-exclude", Arg::Required, + "--benchmark-exclude \tBenchmark to exclude (default: UNSET; use % and _ as wildcard characters; may be used more than once)"); + CommandLine::option_handle PRUNER = + cmd.addOption("p", "prune-method", Arg::Required, + "-p/--prune-method \tWhich import method to use (default: basic)"); if (!cmd.parse()) { log_send << "Error parsing arguments." << std::endl; @@ -45,25 +56,49 @@ bool DatabaseCampaign::run() { exit(0); } - std::string variant, benchmark, pruner; + std::vector variants, benchmarks, variants_exclude, benchmarks_exclude; + if (cmd[VARIANT]) { + for (option::Option *o = cmd[VARIANT]; o; o = o->next()) { + variants.push_back(std::string(o->arg)); + } + } - if (cmd[VARIANT].count() > 0) - variant = std::string(cmd[VARIANT].first()->arg); - else - variant = "none"; + if (cmd[VARIANT_EXCLUDE]) { + for (option::Option *o = cmd[VARIANT_EXCLUDE]; o; o = o->next()) { + variants_exclude.push_back(std::string(o->arg)); + } + } - if (cmd[BENCHMARK].count() > 0) - benchmark = std::string(cmd[BENCHMARK].first()->arg); - else - benchmark = "none"; + // fallback + if (variants.size() == 0) { + variants.push_back("%"); + } - if (cmd[PRUNER].count() > 0) + if (cmd[BENCHMARK]) { + for (option::Option *o = cmd[BENCHMARK]; o; o = o->next()) { + benchmarks.push_back(std::string(o->arg)); + } + } + + if (cmd[BENCHMARK_EXCLUDE]) { + for (option::Option *o = cmd[BENCHMARK_EXCLUDE]; o; o = o->next()) { + benchmarks_exclude.push_back(std::string(o->arg)); + } + } + + // fallback + if (benchmarks.size() == 0) { + benchmarks.push_back("%"); + } + + std::string pruner; + if (cmd[PRUNER]) { pruner = std::string(cmd[PRUNER].first()->arg); - else + } else { pruner = "basic"; + } db = Database::cmdline_connect(); - log_send << "Variant to use " << variant << "/" << benchmark << std::endl; fspmethod_id = db->get_fspmethod_id(pruner); log_send << "Pruner to use " << pruner << " (ID: " << fspmethod_id << ")" << std::endl; @@ -79,15 +114,14 @@ bool DatabaseCampaign::run() { boost::thread collect_thread(&DatabaseCampaign::collect_result_thread, this); #endif - std::vector variants = db->get_variants(variant, benchmark); - + std::vector variantlist = + db->get_variants(variants, variants_exclude, benchmarks, benchmarks_exclude); // Which Pilots were already processed? - load_completed_pilots(variants); - - for (std::vector::const_iterator it = variants.begin(); - it != variants.end(); ++it) { + load_completed_pilots(variantlist); + for (std::vector::const_iterator it = variantlist.begin(); + it != variantlist.end(); ++it) { // Push all other variants to the queue if (!run_variant(*it)) { log_send << "run_variant failed for " << it->variant << "/" << it->benchmark < *aliases) { + aliases->push_back("BasicPruner"); + aliases->push_back("basic"); + } +}; + +class BasicPrunerLeft : public BasicPruner { +public: + BasicPrunerLeft() : BasicPruner(true) {} + void getAliases(std::deque *aliases) { + aliases->push_back("BasicPrunerLeft"); + aliases->push_back("basic-left"); + } }; #endif diff --git a/tools/prune-trace/FESamplingPruner.hpp b/tools/prune-trace/FESamplingPruner.hpp index 6d4dc0cf..a9538622 100644 --- a/tools/prune-trace/FESamplingPruner.hpp +++ b/tools/prune-trace/FESamplingPruner.hpp @@ -24,6 +24,11 @@ public: virtual bool commandline_init(); virtual bool prune_all(); + void getAliases(std::deque *aliases) { + aliases->push_back("FESamplingPruner"); + aliases->push_back("sampling"); + } + private: bool sampling_prune(const fail::Database::Variant& variant); }; diff --git a/tools/prune-trace/Pruner.cc b/tools/prune-trace/Pruner.cc index 644a9d0b..3e8c7e10 100644 --- a/tools/prune-trace/Pruner.cc +++ b/tools/prune-trace/Pruner.cc @@ -8,38 +8,72 @@ static Logger LOG ("Pruner"); #include "Pruner.hpp" -bool Pruner::init(fail::Database *db, +bool Pruner::init( const std::vector& variants, const std::vector& variants_exclude, const std::vector& benchmarks, - const std::vector& benchmarks_exclude) + const std::vector& benchmarks_exclude, + bool overwrite) { - this->db = db; - m_variants = db->get_variants( variants, variants_exclude, benchmarks, benchmarks_exclude); + + if (!(m_method_id = db->get_fspmethod_id(method_name()))) { + return false; + } + LOG << "Pruning with method " << method_name() << " (ID: " << m_method_id << ")" + << std::endl; + + // make sure we only prune variants that haven't been pruned previously + // (unless we run with --overwrite) + if (!overwrite) { + for (std::vector::iterator it = m_variants.begin(); + it != m_variants.end(); ) { + std::stringstream ss; + MYSQL_RES *res; + ss << "(SELECT variant_id FROM fsppilot WHERE " + << " variant_id = " << it->id << " AND " + << " fspmethod_id = " << m_method_id + << " LIMIT 1)" + << " UNION ALL " + << "(SELECT variant_id FROM fspgroup WHERE " + << " variant_id = " << it->id << " AND " + << " fspmethod_id = " << m_method_id + << " LIMIT 1)"; + if (!(res = db->query(ss.str().c_str(), true))) { + return false; + } + if (mysql_num_rows(res) > 0) { + // skip this variant + LOG << "skipping " << it->variant << "/" << it->benchmark + << " due to existing pruning data (use --overwrite to skip this check)" + << std::endl; + it = m_variants.erase(it); + } else { + ++it; + } + } + } + + // any variants left? if (m_variants.size() == 0) { LOG << "no variants found, nothing to do" << std::endl; return false; } - std::stringstream ss; + // construct comma-separated list usable in SQL "IN (...)" + std::stringstream commalist; for (std::vector::const_iterator it = m_variants.begin(); it != m_variants.end(); ++it) { + if (it != m_variants.begin()) { - ss << ","; + commalist << ","; } - ss << it->id; + commalist << it->id; } - m_variants_sql = ss.str(); + m_variants_sql = commalist.str(); - if (!(m_method_id = db->get_fspmethod_id(method_name()))) { - return false; - } - - LOG << "Pruning with method " << method_name() << " (ID: " << m_method_id << ")" - << std::endl; return true; } diff --git a/tools/prune-trace/Pruner.hpp b/tools/prune-trace/Pruner.hpp index e6e1f5dc..a0ec044d 100644 --- a/tools/prune-trace/Pruner.hpp +++ b/tools/prune-trace/Pruner.hpp @@ -4,8 +4,9 @@ #include #include #include "util/Database.hpp" +#include "util/AliasedRegisterable.hpp" -class Pruner { +class Pruner : public fail::AliasedRegisterable { protected: int m_method_id; fail::Database *db; @@ -13,11 +14,14 @@ protected: std::string m_variants_sql; public: - bool init(fail::Database *db, + void set_db(fail::Database *db) { this->db = db; } + + bool init( const std::vector& variants, const std::vector& variants_exclude, const std::vector& benchmarks, - const std::vector& benchmarks_exclude); + const std::vector& benchmarks_exclude, + bool overwrite); /** * Callback function that can be used to add command line options diff --git a/tools/prune-trace/main.cc b/tools/prune-trace/main.cc index 55d4c47a..22b2b6bf 100644 --- a/tools/prune-trace/main.cc +++ b/tools/prune-trace/main.cc @@ -4,6 +4,8 @@ #include "util/CommandLine.hpp" #include "util/Logger.hpp" +#include "util/AliasedRegistry.hpp" + static fail::Logger LOG("prune-trace", true); using namespace fail; @@ -16,6 +18,17 @@ using std::endl; int main(int argc, char *argv[]) { std::string username, hostname, database; + // register possible Pruners + AliasedRegistry registry; + BasicPruner basicpruner; + registry.add(&basicpruner); + BasicPrunerLeft basicprunerleft; + registry.add(&basicprunerleft); + FESamplingPruner fesamplingpruner; + registry.add(&fesamplingpruner); + + std::string pruners = registry.getPrimeAliasesCSV(); + // Manually fill the command line option parser CommandLine &cmd = CommandLine::Inst(); for (int i = 1; i < argc; ++i) { @@ -29,22 +42,25 @@ int main(int argc, char *argv[]) { CommandLine::option_handle VARIANT = cmd.addOption("v", "variant", Arg::Required, - "-v/--variant \tVariant label (default: \"none\"; use % and _ as wildcard characters; may be used more than once)"); + "-v/--variant \tVariant label (default: \"%\"; use % and _ as wildcard characters; may be used more than once)"); CommandLine::option_handle VARIANT_EXCLUDE = cmd.addOption("", "variant-exclude", Arg::Required, "--variant-exclude \tVariant to exclude (default: UNSET; use % and _ as wildcard characters; may be used more than once)"); CommandLine::option_handle BENCHMARK = cmd.addOption("b", "benchmark", Arg::Required, - "-b/--benchmark \tBenchmark label (default: \"none\"; use % and _ as wildcard characters; may be used more than once)"); + "-b/--benchmark \tBenchmark label (default: \"%\"; use % and _ as wildcard characters; may be used more than once)"); CommandLine::option_handle BENCHMARK_EXCLUDE = cmd.addOption("", "benchmark-exclude", Arg::Required, "--benchmark-exclude \tBenchmark to exclude (default: UNSET; use % and _ as wildcard characters; may be used more than once)"); + std::string pruner_help = "-p/--prune-method \tWhich pruning method to use (default: basic); available pruning methods: " + pruners; CommandLine::option_handle PRUNER = - cmd.addOption("p", "prune-method", Arg::Required, - "-p/--prune-method \tWhich import method to use (default: basic)"); + cmd.addOption("p", "prune-method", Arg::Required, pruner_help); CommandLine::option_handle NO_DELETE = cmd.addOption("", "no-delete", Arg::None, "--no-delete \tAssume there are no DB entries for this variant/benchmark, don't issue a DELETE"); + CommandLine::option_handle OVERWRITE = + cmd.addOption("", "overwrite", Arg::None, + "--overwrite \tOverwrite already existing pruning data (the default is to skip variants with existing entries)"); if (!cmd.parse()) { std::cerr << "Error parsing arguments." << std::endl; @@ -52,6 +68,20 @@ int main(int argc, char *argv[]) { } Pruner *pruner; + std::string pruner_name = "BasicPruner"; + if (cmd[PRUNER]) { + pruner_name = cmd[PRUNER].first()->arg; + } + + // try and get the according pruner object; die on failure + if ((pruner = (Pruner *)registry.get(pruner_name)) == 0) { + if (pruner_name != "?" ) { + std::cerr << "Unknown import method: " << pruner_name << std::endl; + } + std::cerr << "Available import methods: " << pruners << std::endl; + exit(-1); + } + if (cmd[PRUNER]) { std::string imp(cmd[PRUNER].first()->arg); if (imp == "BasicPruner" || imp == "basic") { @@ -88,6 +118,7 @@ int main(int argc, char *argv[]) { } Database *db = Database::cmdline_connect(); + pruner->set_db(db); std::vector variants, benchmarks, variants_exclude, benchmarks_exclude; if (cmd[VARIANT]) { @@ -103,8 +134,8 @@ int main(int argc, char *argv[]) { } // fallback - if (variants.size() == 0 && variants_exclude.size() == 0) { - variants.push_back(std::string("none")); + if (variants.size() == 0) { + variants.push_back("%"); } if (cmd[BENCHMARK]) { @@ -120,11 +151,16 @@ int main(int argc, char *argv[]) { } // fallback - if (benchmarks.size() == 0 && benchmarks_exclude.size() == 0) { - benchmarks.push_back(std::string("none")); + if (benchmarks.size() == 0) { + benchmarks.push_back("%"); } - if (!pruner->init(db, variants, variants_exclude, benchmarks, benchmarks_exclude)) { + if (!pruner->create_database()) { + LOG << "pruner->create_database() failed" << endl; + exit(-1); + } + + if (!pruner->init(variants, variants_exclude, benchmarks, benchmarks_exclude, cmd[OVERWRITE])) { LOG << "pruner->init() failed" << endl; exit(-1); } @@ -132,12 +168,7 @@ int main(int argc, char *argv[]) { //////////////////////////////////////////////////////////////// // Do the actual import //////////////////////////////////////////////////////////////// - if (!pruner->create_database()) { - LOG << "create_database() failed" << endl; - exit(-1); - } - - if (!cmd[NO_DELETE] && !pruner->clear_database()) { + if (!cmd[NO_DELETE] && cmd[OVERWRITE] && !pruner->clear_database()) { LOG << "clear_database() failed" << endl; exit(-1); }