diff --git a/tools/prune-trace/CMakeLists.txt b/tools/prune-trace/CMakeLists.txt index 4043ffd4..aadf6545 100644 --- a/tools/prune-trace/CMakeLists.txt +++ b/tools/prune-trace/CMakeLists.txt @@ -2,6 +2,7 @@ set(SRCS Pruner.cc BasicPruner.cc FESamplingPruner.cc + SamplingPruner.cc ) find_package(MySQL REQUIRED) diff --git a/tools/prune-trace/FESamplingPruner.hpp b/tools/prune-trace/FESamplingPruner.hpp index d33c1e94..c938be20 100644 --- a/tools/prune-trace/FESamplingPruner.hpp +++ b/tools/prune-trace/FESamplingPruner.hpp @@ -30,7 +30,6 @@ public: void getAliases(std::deque *aliases) { aliases->push_back("FESamplingPruner"); - aliases->push_back("sampling"); } private: diff --git a/tools/prune-trace/Pruner.cc b/tools/prune-trace/Pruner.cc index 3e8c7e10..72c4854a 100644 --- a/tools/prune-trace/Pruner.cc +++ b/tools/prune-trace/Pruner.cc @@ -100,6 +100,7 @@ bool Pruner::create_database() { " data_address int(10) unsigned NOT NULL," " fspmethod_id int(11) NOT NULL," " pilot_id int(11) NOT NULL," + " weight int(11) UNSIGNED," " PRIMARY KEY (variant_id, data_address, instr2, fspmethod_id)," " KEY joinresults (pilot_id,fspmethod_id)) engine=MyISAM"; diff --git a/tools/prune-trace/SamplingPruner.cc b/tools/prune-trace/SamplingPruner.cc new file mode 100644 index 00000000..3a382a9d --- /dev/null +++ b/tools/prune-trace/SamplingPruner.cc @@ -0,0 +1,236 @@ +#include +#include +#include +#include +#include "SamplingPruner.hpp" +#include "util/Logger.hpp" +#include "util/CommandLine.hpp" +#include "util/SumTree.hpp" + +static fail::Logger LOG("SamplingPruner"); +using std::endl; + +struct WeightedPilot { + uint64_t duration; + + uint32_t instr2; + union { + uint32_t instr2_absolute; + uint32_t id; + }; + uint32_t data_address; + uint32_t weight; + + typedef uint64_t size_type; + size_type size() const { return duration; } +}; + +bool SamplingPruner::commandline_init() +{ + fail::CommandLine &cmd = fail::CommandLine::Inst(); + SAMPLESIZE = cmd.addOption("", "samplesize", Arg::Required, + "--samplesize N \tNumber of samples to take (per variant)"); + USE_KNOWN_RESULTS = cmd.addOption("", "use-known-results", Arg::None, + "--use-known-results \tReuse known results from a campaign with the 'basic' pruner "); + NO_WEIGHTING = cmd.addOption("", "no-weighting", Arg::None, + "--no-weighting \tDisable weighted sampling (weight = 1 for all ECs) " + "(don't do this unless you know what you're doing)"); + return true; +} + +bool SamplingPruner::prune_all() +{ + fail::CommandLine &cmd = fail::CommandLine::Inst(); + if (!cmd[SAMPLESIZE]) { + LOG << "parameter --samplesize required, aborting" << endl; + return false; + } + m_samplesize = strtoul(cmd[SAMPLESIZE].first()->arg, 0, 10); + + if (cmd[USE_KNOWN_RESULTS]) { + m_use_known_results = true; + } + + // for each variant: + for (std::vector::const_iterator it = m_variants.begin(); + it != m_variants.end(); ++it) { + if (!sampling_prune(*it)) { + return false; + } + } + + return true; +} + +// TODO: replace with a less syscall-intensive RNG +// TODO: deduplicate (copied from FESamplingPruner), put in a central place +static std::ifstream dev_urandom("/dev/urandom", std::ifstream::binary); +static uint64_t my_rand(uint64_t limit) +{ + // find smallest bitpos that satisfies (1 << bitpos) > limit + int bitpos = 0; + while (limit >> bitpos) { + bitpos++; + } + + uint64_t retval; + + do { + dev_urandom.read((char *) &retval, sizeof(retval)); + retval &= (1ULL << bitpos) - 1; + } while (retval > limit); + + return retval; +} + +bool SamplingPruner::sampling_prune(const fail::Database::Variant& variant) +{ + typedef fail::SumTree sumtree_type; + sumtree_type pop; // sample population + std::stringstream ss; + MYSQL_RES *res; + MYSQL_ROW row; + + uint64_t pilotcount = 0; + + if (!m_use_known_results) { + LOG << "loading trace entries for " << variant.variant << "/" << variant.benchmark << " ..." << endl; + + // load trace entries + ss << "SELECT instr2, instr2_absolute, data_address, time2-time1+1 AS duration" + << " FROM trace" + << " WHERE variant_id = " << variant.id + << " AND accesstype = 'R'"; + res = db->query_stream(ss.str().c_str()); + ss.str(""); + if (!res) return false; + while ((row = mysql_fetch_row(res))) { + WeightedPilot p; + p.instr2 = strtoul(row[0], 0, 10); + p.instr2_absolute = strtoul(row[1], 0, 10); + p.data_address = strtoul(row[2], 0, 10); + p.duration = m_weighting ? strtoull(row[3], 0, 10) : 1; + p.weight = 0; + pop.add(p); + ++pilotcount; + } + mysql_free_result(res); + } else { + LOG << "loading pilots for " << variant.variant << "/" << variant.benchmark << " ..." << endl; + + // load fsppilot entries + ss << "SELECT p.id, p.instr2, p.data_address, t.time2 - t.time1 + 1 AS duration" + << " FROM fsppilot p" + << " JOIN trace t" + << " ON t.variant_id = p.variant_id AND t.data_address = p.data_address AND t.instr2 = p.instr2" + << " WHERE p.fspmethod_id = " << db->get_fspmethod_id("basic") + << " AND p.variant_id = " << variant.id + << " AND p.known_outcome = 0"; + res = db->query_stream(ss.str().c_str()); + ss.str(""); + if (!res) return false; + while ((row = mysql_fetch_row(res))) { + WeightedPilot p; + p.id = strtoul(row[0], 0, 10); + p.instr2 = strtoul(row[1], 0, 10); + p.data_address = strtoul(row[2], 0, 10); + p.duration = m_weighting ? strtoull(row[3], 0, 10) : 1; + p.weight = 0; + pop.add(p); + ++pilotcount; + } + mysql_free_result(res); + } + + LOG << "loaded " << pilotcount << " entries, sampling " + << m_samplesize << " fault-space coordinates ..." << endl; + + ss << "INSERT INTO fsppilot (known_outcome, variant_id, instr2, injection_instr, " + << "injection_instr_absolute, data_address, data_width, fspmethod_id) VALUES "; + std::string insert_sql(ss.str()); + ss.str(""); + + uint64_t popsize = pop.get_size(); // stays constant + uint64_t num_fsppilot_entries = 0; + for (uint64_t i = 0; i < m_samplesize; ++i) { + uint64_t pos = my_rand(popsize - 1); + WeightedPilot& p = pop.get(pos); + p.weight++; + // first time we sample this pilot? + if (!m_use_known_results && p.weight == 1) { + ss << "(0," << variant.id << "," << p.instr2 << "," << p.instr2 + << "," << p.instr2_absolute << "," << p.data_address + << ",1," << m_method_id << ")"; + db->insert_multiple(insert_sql.c_str(), ss.str().c_str()); + ss.str(""); + ++num_fsppilot_entries; + } + } + + if (!m_use_known_results) { + db->insert_multiple(); + LOG << "created " << num_fsppilot_entries << " fsppilot entries" << std::endl; + } + + // fspgroup entries for sampled trace entries + if (!m_use_known_results) { + ss << "INSERT INTO fspgroup (variant_id, instr2, data_address, fspmethod_id, pilot_id, weight) " + << "SELECT p.variant_id, p.instr2, p.data_address, " << m_method_id << ", p.id, 1 " + << "FROM fsppilot p " + << "WHERE known_outcome = 0 AND p.fspmethod_id = " << m_method_id << " " + << "AND p.variant_id = " << variant.id; + + if (!db->query(ss.str().c_str())) return false; + ss.str(""); + uint64_t num_fspgroup_entries = db->affected_rows(); + LOG << "created " << num_fspgroup_entries << " fspgroup entries" << std::endl; + + // FIXME is this faster than manually INSERTing all fspgroup entries? + num_fspgroup_entries = 0; + LOG << "updating fspgroup entries with weight > 1 ..." << std::endl; + for (sumtree_type::iterator it = pop.begin(); it != pop.end(); ++it) { + if (it->weight <= 1) { + continue; + } + ++num_fspgroup_entries; + ss << "UPDATE fspgroup SET weight = " << it->weight << + " WHERE variant_id = " << variant.id << + " AND instr2 = " << it->instr2 << + " AND data_address = " << it->data_address << + " AND fspmethod_id = " << m_method_id; + // pilot_id is known but should be identical + if (!db->query(ss.str().c_str())) return false; + if (db->affected_rows() != 1) { + LOG << "something is wrong, query affected unexpected (" + << db->affected_rows() + << " != 1) number of rows: " + << ss.str() << std::endl; + } + ss.str(""); + } + LOG << "updated " << num_fspgroup_entries << " fspgroup entries" << std::endl; + } else { + uint64_t num_fspgroup_entries = 0; + + LOG << "creating fspgroup entries ..." << std::endl; + + ss << "INSERT INTO fspgroup (variant_id, instr2, data_address, fspmethod_id, pilot_id, weight) VALUES "; + insert_sql = ss.str(); + ss.str(""); + + for (sumtree_type::iterator it = pop.begin(); it != pop.end(); ++it) { + if (it->weight == 0) { + continue; + } + ++num_fspgroup_entries; + ss << "(" << variant.id << "," << it->instr2 << "," << it->data_address + << "," << m_method_id << "," << it->id << "," << it->weight << ")"; + db->insert_multiple(insert_sql.c_str(), ss.str().c_str()); + ss.str(""); + } + db->insert_multiple(); + LOG << "created " << num_fspgroup_entries << " fspgroup entries" << std::endl; + } + + return true; +} diff --git a/tools/prune-trace/SamplingPruner.hpp b/tools/prune-trace/SamplingPruner.hpp new file mode 100644 index 00000000..db129f29 --- /dev/null +++ b/tools/prune-trace/SamplingPruner.hpp @@ -0,0 +1,37 @@ +#ifndef __SAMPLING_PRUNER_H__ +#define __SAMPLING_PRUNER_H__ + +#include +#include "Pruner.hpp" +#include "util/CommandLine.hpp" + +/// +/// SamplingPruner: implements sampling with equivalence-class reuse +/// +/// Unlike the FESamplingPruner, the SamplingPruner implements uniform +/// fault-space sampling that counts multiple hits of an equivalence class. +/// +class SamplingPruner : public Pruner { + fail::CommandLine::option_handle SAMPLESIZE; + fail::CommandLine::option_handle USE_KNOWN_RESULTS; + fail::CommandLine::option_handle NO_WEIGHTING; + + uint64_t m_samplesize; + bool m_use_known_results, m_weighting; + +public: + SamplingPruner() : m_samplesize(0), m_use_known_results(false), m_weighting(true) { } + virtual std::string method_name() { return "sampling"; } + virtual bool commandline_init(); + virtual bool prune_all(); + + void getAliases(std::deque *aliases) { + aliases->push_back("SamplingPruner"); + aliases->push_back("sampling"); + } + +private: + bool sampling_prune(const fail::Database::Variant& variant); +}; + +#endif diff --git a/tools/prune-trace/main.cc b/tools/prune-trace/main.cc index 295d5c9f..c0867495 100644 --- a/tools/prune-trace/main.cc +++ b/tools/prune-trace/main.cc @@ -14,6 +14,7 @@ using std::endl; #include "Pruner.hpp" #include "BasicPruner.hpp" #include "FESamplingPruner.hpp" +#include "SamplingPruner.hpp" int main(int argc, char *argv[]) { std::string username, hostname, database; @@ -26,6 +27,8 @@ int main(int argc, char *argv[]) { registry.add(&basicprunerleft); FESamplingPruner fesamplingpruner; registry.add(&fesamplingpruner); + SamplingPruner samplingpruner; + registry.add(&samplingpruner); std::string pruners = registry.getPrimeAliasesCSV();