From 4cbcf30b7c7b4d72a7ef395b6ec67ffb7b0fdde7 Mon Sep 17 00:00:00 2001 From: Horst Schirmeier Date: Thu, 15 Jan 2015 19:11:33 +0100 Subject: [PATCH] prune-trace: incremental mode for SamplingPruner The --incremental switch allows to add more samples if the resulting confidence intervals are not satisfactory yet. Change-Id: I65dc99522f45f8a4eaf4ce68e832f7636585381d --- tools/prune-trace/Pruner.cc | 6 +- tools/prune-trace/Pruner.hpp | 10 ++- tools/prune-trace/SamplingPruner.cc | 102 ++++++++++++++++++++------- tools/prune-trace/SamplingPruner.hpp | 6 +- tools/prune-trace/main.cc | 18 ++++- 5 files changed, 110 insertions(+), 32 deletions(-) diff --git a/tools/prune-trace/Pruner.cc b/tools/prune-trace/Pruner.cc index 72c4854a..a3491b75 100644 --- a/tools/prune-trace/Pruner.cc +++ b/tools/prune-trace/Pruner.cc @@ -13,7 +13,7 @@ bool Pruner::init( const std::vector& variants_exclude, const std::vector& benchmarks, const std::vector& benchmarks_exclude, - bool overwrite) + bool overwrite, bool incremental) { m_variants = db->get_variants( variants, variants_exclude, @@ -26,8 +26,8 @@ bool Pruner::init( << std::endl; // make sure we only prune variants that haven't been pruned previously - // (unless we run with --overwrite) - if (!overwrite) { + // (unless we run with --overwrite or --incremental) + if (!overwrite && !incremental) { for (std::vector::iterator it = m_variants.begin(); it != m_variants.end(); ) { std::stringstream ss; diff --git a/tools/prune-trace/Pruner.hpp b/tools/prune-trace/Pruner.hpp index a0ec044d..30fd4e63 100644 --- a/tools/prune-trace/Pruner.hpp +++ b/tools/prune-trace/Pruner.hpp @@ -21,7 +21,7 @@ public: const std::vector& variants_exclude, const std::vector& benchmarks, const std::vector& benchmarks_exclude, - bool overwrite); + bool overwrite, bool incremental); /** * Callback function that can be used to add command line options @@ -35,6 +35,14 @@ public: virtual bool clear_database(); virtual bool prune_all() = 0; + + /** + * Tell the pruner to work incrementally. For example, a sampling pruner + * could add more pilots to already existing ones (which already may be + * associated with fault-injection results). Returns false if the pruner + * is incapable of working in the desired mode. + */ + virtual bool set_incremental(bool incremental) { return !incremental; } }; #endif diff --git a/tools/prune-trace/SamplingPruner.cc b/tools/prune-trace/SamplingPruner.cc index 3a382a9d..ce362fcc 100644 --- a/tools/prune-trace/SamplingPruner.cc +++ b/tools/prune-trace/SamplingPruner.cc @@ -13,11 +13,9 @@ using std::endl; struct WeightedPilot { uint64_t duration; - uint32_t instr2; - union { - uint32_t instr2_absolute; uint32_t id; - }; + uint32_t instr2; + uint32_t instr2_absolute; uint32_t data_address; uint32_t weight; @@ -94,13 +92,27 @@ bool SamplingPruner::sampling_prune(const fail::Database::Variant& variant) uint64_t pilotcount = 0; if (!m_use_known_results) { - LOG << "loading trace entries for " << variant.variant << "/" << variant.benchmark << " ..." << endl; + LOG << "loading trace entries " + << (m_incremental ? "and existing pilots " : "") + << "for " << variant.variant << "/" << variant.benchmark << " ..." << endl; - // load trace entries - ss << "SELECT instr2, instr2_absolute, data_address, time2-time1+1 AS duration" - << " FROM trace" - << " WHERE variant_id = " << variant.id - << " AND accesstype = 'R'"; + if (!m_incremental) { + // load trace entries + ss << "SELECT instr2, instr2_absolute, data_address, time2-time1+1 AS duration" + " FROM trace" + " WHERE variant_id = " << variant.id << + " AND accesstype = 'R'"; + } else { + // load trace entries and existing pilots + ss << "SELECT t.instr2, t.instr2_absolute, t.data_address, t.time2-t.time1+1 AS duration," + " IFNULL(g.pilot_id, 0), IFNULL(g.weight, 0)" + " FROM trace t" + " LEFT JOIN fspgroup g" + " ON t.variant_id = g.variant_id AND t.data_address = g.data_address AND t.instr2 = g.instr2" + " AND g.fspmethod_id = " << m_method_id << + " WHERE t.variant_id = " << variant.id << + " AND t.accesstype = 'R'"; + } res = db->query_stream(ss.str().c_str()); ss.str(""); if (!res) return false; @@ -110,7 +122,8 @@ bool SamplingPruner::sampling_prune(const fail::Database::Variant& variant) p.instr2_absolute = strtoul(row[1], 0, 10); p.data_address = strtoul(row[2], 0, 10); p.duration = m_weighting ? strtoull(row[3], 0, 10) : 1; - p.weight = 0; + p.id = m_incremental ? strtoul(row[4], 0, 10) : 0; + p.weight = m_incremental ? strtoul(row[5], 0, 10) : 0; pop.add(p); ++pilotcount; } @@ -118,14 +131,28 @@ bool SamplingPruner::sampling_prune(const fail::Database::Variant& variant) } else { LOG << "loading pilots for " << variant.variant << "/" << variant.benchmark << " ..." << endl; - // load fsppilot entries - ss << "SELECT p.id, p.instr2, p.data_address, t.time2 - t.time1 + 1 AS duration" - << " FROM fsppilot p" - << " JOIN trace t" - << " ON t.variant_id = p.variant_id AND t.data_address = p.data_address AND t.instr2 = p.instr2" - << " WHERE p.fspmethod_id = " << db->get_fspmethod_id("basic") - << " AND p.variant_id = " << variant.id - << " AND p.known_outcome = 0"; + if (!m_incremental) { + // load fsppilot entries + ss << "SELECT p.id, p.instr2, p.data_address, t.time2 - t.time1 + 1 AS duration" + " FROM fsppilot p" + " JOIN trace t" + " ON t.variant_id = p.variant_id AND t.data_address = p.data_address AND t.instr2 = p.instr2" + " WHERE p.fspmethod_id = " << db->get_fspmethod_id("basic") << + " AND p.variant_id = " << variant.id << + " AND p.known_outcome = 0"; + } else { + // load fsppilot entries and existing sampling pilots + ss << "SELECT p.id, p.instr2, p.data_address, t.time2 - t.time1 + 1 AS duration, IFNULL(g.weight, 0)" + " FROM fsppilot p" + " JOIN trace t" + " ON t.variant_id = p.variant_id AND t.data_address = p.data_address AND t.instr2 = p.instr2" + " LEFT JOIN fspgroup g" + " ON t.variant_id = g.variant_id AND t.data_address = g.data_address AND t.instr2 = g.instr2" + " AND g.fspmethod_id = " << m_method_id << + " WHERE p.fspmethod_id = " << db->get_fspmethod_id("basic") << + " AND p.variant_id = " << variant.id << + " AND p.known_outcome = 0"; + } res = db->query_stream(ss.str().c_str()); ss.str(""); if (!res) return false; @@ -135,7 +162,7 @@ bool SamplingPruner::sampling_prune(const fail::Database::Variant& variant) p.instr2 = strtoul(row[1], 0, 10); p.data_address = strtoul(row[2], 0, 10); p.duration = m_weighting ? strtoull(row[3], 0, 10) : 1; - p.weight = 0; + p.weight = m_incremental ? strtoull(row[4], 0, 10) : 0; pop.add(p); ++pilotcount; } @@ -158,6 +185,8 @@ bool SamplingPruner::sampling_prune(const fail::Database::Variant& variant) p.weight++; // first time we sample this pilot? if (!m_use_known_results && p.weight == 1) { + // no need to special-case existing pilots (incremental mode), as + // their initial weight is supposed to be at least 1 ss << "(0," << variant.id << "," << p.instr2 << "," << p.instr2 << "," << p.instr2_absolute << "," << p.data_address << ",1," << m_method_id << ")"; @@ -174,7 +203,13 @@ bool SamplingPruner::sampling_prune(const fail::Database::Variant& variant) // fspgroup entries for sampled trace entries if (!m_use_known_results) { - ss << "INSERT INTO fspgroup (variant_id, instr2, data_address, fspmethod_id, pilot_id, weight) " + if (!m_incremental) { + ss << "INSERT"; + } else { + // this spares us to delete existing pilots before + ss << "REPLACE"; + } + ss << " INTO fspgroup (variant_id, instr2, data_address, fspmethod_id, pilot_id, weight) " << "SELECT p.variant_id, p.instr2, p.data_address, " << m_method_id << ", p.id, 1 " << "FROM fsppilot p " << "WHERE known_outcome = 0 AND p.fspmethod_id = " << m_method_id << " " @@ -182,7 +217,14 @@ bool SamplingPruner::sampling_prune(const fail::Database::Variant& variant) if (!db->query(ss.str().c_str())) return false; ss.str(""); - uint64_t num_fspgroup_entries = db->affected_rows(); + uint64_t num_fspgroup_entries; + if (!m_incremental) { + num_fspgroup_entries = db->affected_rows(); + } else { + // with REPLACE INTO, affected_rows does not yield the number of + // new rows; take num_fsppilot_entries instead + num_fspgroup_entries = num_fsppilot_entries; + } LOG << "created " << num_fspgroup_entries << " fspgroup entries" << std::endl; // FIXME is this faster than manually INSERTing all fspgroup entries? @@ -208,13 +250,25 @@ bool SamplingPruner::sampling_prune(const fail::Database::Variant& variant) } ss.str(""); } - LOG << "updated " << num_fspgroup_entries << " fspgroup entries" << std::endl; + + if (!m_incremental) { + LOG << "updated " << num_fspgroup_entries << " fspgroup entries" << std::endl; + } else { + // we don't know how many rows we really updated + LOG << "updated fspgroup entries" << std::endl; + } } else { uint64_t num_fspgroup_entries = 0; LOG << "creating fspgroup entries ..." << std::endl; - ss << "INSERT INTO fspgroup (variant_id, instr2, data_address, fspmethod_id, pilot_id, weight) VALUES "; + if (!m_incremental) { + ss << "INSERT"; + } else { + // this spares us to delete existing pilots before + ss << "REPLACE"; + } + ss << " INTO fspgroup (variant_id, instr2, data_address, fspmethod_id, pilot_id, weight) VALUES "; insert_sql = ss.str(); ss.str(""); diff --git a/tools/prune-trace/SamplingPruner.hpp b/tools/prune-trace/SamplingPruner.hpp index db129f29..9fe6a2ee 100644 --- a/tools/prune-trace/SamplingPruner.hpp +++ b/tools/prune-trace/SamplingPruner.hpp @@ -17,10 +17,10 @@ class SamplingPruner : public Pruner { fail::CommandLine::option_handle NO_WEIGHTING; uint64_t m_samplesize; - bool m_use_known_results, m_weighting; + bool m_use_known_results, m_weighting, m_incremental; public: - SamplingPruner() : m_samplesize(0), m_use_known_results(false), m_weighting(true) { } + SamplingPruner() : m_samplesize(0), m_use_known_results(false), m_weighting(true), m_incremental(false) { } virtual std::string method_name() { return "sampling"; } virtual bool commandline_init(); virtual bool prune_all(); @@ -30,6 +30,8 @@ public: aliases->push_back("sampling"); } + virtual bool set_incremental(bool incremental) { m_incremental = incremental; return true; } + private: bool sampling_prune(const fail::Database::Variant& variant); }; diff --git a/tools/prune-trace/main.cc b/tools/prune-trace/main.cc index c0867495..0a391889 100644 --- a/tools/prune-trace/main.cc +++ b/tools/prune-trace/main.cc @@ -65,12 +65,20 @@ int main(int argc, char *argv[]) { CommandLine::option_handle OVERWRITE = cmd.addOption("", "overwrite", Arg::None, "--overwrite \tOverwrite already existing pruning data (the default is to skip variants with existing entries)"); + CommandLine::option_handle INCREMENTAL = + cmd.addOption("", "incremental", Arg::None, + "--incremental \tTell the pruner to work incrementally (if supported)"); if (!cmd.parse()) { std::cerr << "Error parsing arguments." << std::endl; exit(-1); } + if (cmd[OVERWRITE] && cmd[INCREMENTAL]) { + std::cerr << "--overwrite and --incremental cannot be used together." << std::endl; + exit(-1); + } + Pruner *pruner; std::string pruner_name = "BasicPruner"; if (cmd[PRUNER]) { @@ -110,6 +118,11 @@ int main(int argc, char *argv[]) { Database *db = Database::cmdline_connect(); pruner->set_db(db); + if (cmd[INCREMENTAL] && !pruner->set_incremental(true)) { + std::cerr << "Pruner is incapable of running incrementally" << std::endl; + exit(-1); + } + std::vector variants, benchmarks, variants_exclude, benchmarks_exclude; if (cmd[VARIANT]) { for (option::Option *o = cmd[VARIANT]; o; o = o->next()) { @@ -150,7 +163,8 @@ int main(int argc, char *argv[]) { exit(-1); } - if (!pruner->init(variants, variants_exclude, benchmarks, benchmarks_exclude, cmd[OVERWRITE])) { + if (!pruner->init(variants, variants_exclude, benchmarks, benchmarks_exclude, + cmd[OVERWRITE], cmd[INCREMENTAL])) { LOG << "pruner->init() failed" << endl; exit(-1); } @@ -158,7 +172,7 @@ int main(int argc, char *argv[]) { //////////////////////////////////////////////////////////////// // Do the actual pruning //////////////////////////////////////////////////////////////// - if (!cmd[NO_DELETE] && cmd[OVERWRITE] && !pruner->clear_database()) { + if (!cmd[NO_DELETE] && cmd[OVERWRITE] && !cmd[INCREMENTAL] && !pruner->clear_database()) { LOG << "clear_database() failed" << endl; exit(-1); }