From 79211fd31de458143ffb362701d5154fb0cf28ca Mon Sep 17 00:00:00 2001 From: Horst Schirmeier Date: Wed, 14 Jan 2015 23:40:03 +0100 Subject: [PATCH] prune-trace: add SamplingPruner The SamplingPruner implements "normal" sampling with equivalence-class reuse. Unlike the FESamplingPruner, the SamplingPruner implements uniform fault-space sampling that counts multiple hits of an equivalence class. This change modifies the database schema, more specifically it adds the "weight" column to the fspgroup table. Update existing databases with this query: ALTER TABLE fspgroup ADD COLUMN weight INT UNSIGNED; Change-Id: I668fc9b25fc4d79a60aa1ef8d69cdf5fa076cc6d --- tools/prune-trace/CMakeLists.txt | 1 + tools/prune-trace/FESamplingPruner.hpp | 1 - tools/prune-trace/Pruner.cc | 1 + tools/prune-trace/SamplingPruner.cc | 236 +++++++++++++++++++++++++ tools/prune-trace/SamplingPruner.hpp | 37 ++++ tools/prune-trace/main.cc | 3 + 6 files changed, 278 insertions(+), 1 deletion(-) create mode 100644 tools/prune-trace/SamplingPruner.cc create mode 100644 tools/prune-trace/SamplingPruner.hpp diff --git a/tools/prune-trace/CMakeLists.txt b/tools/prune-trace/CMakeLists.txt index 4043ffd4..aadf6545 100644 --- a/tools/prune-trace/CMakeLists.txt +++ b/tools/prune-trace/CMakeLists.txt @@ -2,6 +2,7 @@ set(SRCS Pruner.cc BasicPruner.cc FESamplingPruner.cc + SamplingPruner.cc ) find_package(MySQL REQUIRED) diff --git a/tools/prune-trace/FESamplingPruner.hpp b/tools/prune-trace/FESamplingPruner.hpp index d33c1e94..c938be20 100644 --- a/tools/prune-trace/FESamplingPruner.hpp +++ b/tools/prune-trace/FESamplingPruner.hpp @@ -30,7 +30,6 @@ public: void getAliases(std::deque *aliases) { aliases->push_back("FESamplingPruner"); - aliases->push_back("sampling"); } private: diff --git a/tools/prune-trace/Pruner.cc b/tools/prune-trace/Pruner.cc index 3e8c7e10..72c4854a 100644 --- a/tools/prune-trace/Pruner.cc +++ b/tools/prune-trace/Pruner.cc @@ -100,6 +100,7 @@ bool Pruner::create_database() { " data_address int(10) unsigned NOT NULL," " fspmethod_id int(11) NOT NULL," " pilot_id int(11) NOT NULL," + " weight int(11) UNSIGNED," " PRIMARY KEY (variant_id, data_address, instr2, fspmethod_id)," " KEY joinresults (pilot_id,fspmethod_id)) engine=MyISAM"; diff --git a/tools/prune-trace/SamplingPruner.cc b/tools/prune-trace/SamplingPruner.cc new file mode 100644 index 00000000..3a382a9d --- /dev/null +++ b/tools/prune-trace/SamplingPruner.cc @@ -0,0 +1,236 @@ +#include +#include +#include +#include +#include "SamplingPruner.hpp" +#include "util/Logger.hpp" +#include "util/CommandLine.hpp" +#include "util/SumTree.hpp" + +static fail::Logger LOG("SamplingPruner"); +using std::endl; + +struct WeightedPilot { + uint64_t duration; + + uint32_t instr2; + union { + uint32_t instr2_absolute; + uint32_t id; + }; + uint32_t data_address; + uint32_t weight; + + typedef uint64_t size_type; + size_type size() const { return duration; } +}; + +bool SamplingPruner::commandline_init() +{ + fail::CommandLine &cmd = fail::CommandLine::Inst(); + SAMPLESIZE = cmd.addOption("", "samplesize", Arg::Required, + "--samplesize N \tNumber of samples to take (per variant)"); + USE_KNOWN_RESULTS = cmd.addOption("", "use-known-results", Arg::None, + "--use-known-results \tReuse known results from a campaign with the 'basic' pruner "); + NO_WEIGHTING = cmd.addOption("", "no-weighting", Arg::None, + "--no-weighting \tDisable weighted sampling (weight = 1 for all ECs) " + "(don't do this unless you know what you're doing)"); + return true; +} + +bool SamplingPruner::prune_all() +{ + fail::CommandLine &cmd = fail::CommandLine::Inst(); + if (!cmd[SAMPLESIZE]) { + LOG << "parameter --samplesize required, aborting" << endl; + return false; + } + m_samplesize = strtoul(cmd[SAMPLESIZE].first()->arg, 0, 10); + + if (cmd[USE_KNOWN_RESULTS]) { + m_use_known_results = true; + } + + // for each variant: + for (std::vector::const_iterator it = m_variants.begin(); + it != m_variants.end(); ++it) { + if (!sampling_prune(*it)) { + return false; + } + } + + return true; +} + +// TODO: replace with a less syscall-intensive RNG +// TODO: deduplicate (copied from FESamplingPruner), put in a central place +static std::ifstream dev_urandom("/dev/urandom", std::ifstream::binary); +static uint64_t my_rand(uint64_t limit) +{ + // find smallest bitpos that satisfies (1 << bitpos) > limit + int bitpos = 0; + while (limit >> bitpos) { + bitpos++; + } + + uint64_t retval; + + do { + dev_urandom.read((char *) &retval, sizeof(retval)); + retval &= (1ULL << bitpos) - 1; + } while (retval > limit); + + return retval; +} + +bool SamplingPruner::sampling_prune(const fail::Database::Variant& variant) +{ + typedef fail::SumTree sumtree_type; + sumtree_type pop; // sample population + std::stringstream ss; + MYSQL_RES *res; + MYSQL_ROW row; + + uint64_t pilotcount = 0; + + if (!m_use_known_results) { + LOG << "loading trace entries for " << variant.variant << "/" << variant.benchmark << " ..." << endl; + + // load trace entries + ss << "SELECT instr2, instr2_absolute, data_address, time2-time1+1 AS duration" + << " FROM trace" + << " WHERE variant_id = " << variant.id + << " AND accesstype = 'R'"; + res = db->query_stream(ss.str().c_str()); + ss.str(""); + if (!res) return false; + while ((row = mysql_fetch_row(res))) { + WeightedPilot p; + p.instr2 = strtoul(row[0], 0, 10); + p.instr2_absolute = strtoul(row[1], 0, 10); + p.data_address = strtoul(row[2], 0, 10); + p.duration = m_weighting ? strtoull(row[3], 0, 10) : 1; + p.weight = 0; + pop.add(p); + ++pilotcount; + } + mysql_free_result(res); + } else { + LOG << "loading pilots for " << variant.variant << "/" << variant.benchmark << " ..." << endl; + + // load fsppilot entries + ss << "SELECT p.id, p.instr2, p.data_address, t.time2 - t.time1 + 1 AS duration" + << " FROM fsppilot p" + << " JOIN trace t" + << " ON t.variant_id = p.variant_id AND t.data_address = p.data_address AND t.instr2 = p.instr2" + << " WHERE p.fspmethod_id = " << db->get_fspmethod_id("basic") + << " AND p.variant_id = " << variant.id + << " AND p.known_outcome = 0"; + res = db->query_stream(ss.str().c_str()); + ss.str(""); + if (!res) return false; + while ((row = mysql_fetch_row(res))) { + WeightedPilot p; + p.id = strtoul(row[0], 0, 10); + p.instr2 = strtoul(row[1], 0, 10); + p.data_address = strtoul(row[2], 0, 10); + p.duration = m_weighting ? strtoull(row[3], 0, 10) : 1; + p.weight = 0; + pop.add(p); + ++pilotcount; + } + mysql_free_result(res); + } + + LOG << "loaded " << pilotcount << " entries, sampling " + << m_samplesize << " fault-space coordinates ..." << endl; + + ss << "INSERT INTO fsppilot (known_outcome, variant_id, instr2, injection_instr, " + << "injection_instr_absolute, data_address, data_width, fspmethod_id) VALUES "; + std::string insert_sql(ss.str()); + ss.str(""); + + uint64_t popsize = pop.get_size(); // stays constant + uint64_t num_fsppilot_entries = 0; + for (uint64_t i = 0; i < m_samplesize; ++i) { + uint64_t pos = my_rand(popsize - 1); + WeightedPilot& p = pop.get(pos); + p.weight++; + // first time we sample this pilot? + if (!m_use_known_results && p.weight == 1) { + ss << "(0," << variant.id << "," << p.instr2 << "," << p.instr2 + << "," << p.instr2_absolute << "," << p.data_address + << ",1," << m_method_id << ")"; + db->insert_multiple(insert_sql.c_str(), ss.str().c_str()); + ss.str(""); + ++num_fsppilot_entries; + } + } + + if (!m_use_known_results) { + db->insert_multiple(); + LOG << "created " << num_fsppilot_entries << " fsppilot entries" << std::endl; + } + + // fspgroup entries for sampled trace entries + if (!m_use_known_results) { + ss << "INSERT INTO fspgroup (variant_id, instr2, data_address, fspmethod_id, pilot_id, weight) " + << "SELECT p.variant_id, p.instr2, p.data_address, " << m_method_id << ", p.id, 1 " + << "FROM fsppilot p " + << "WHERE known_outcome = 0 AND p.fspmethod_id = " << m_method_id << " " + << "AND p.variant_id = " << variant.id; + + if (!db->query(ss.str().c_str())) return false; + ss.str(""); + uint64_t num_fspgroup_entries = db->affected_rows(); + LOG << "created " << num_fspgroup_entries << " fspgroup entries" << std::endl; + + // FIXME is this faster than manually INSERTing all fspgroup entries? + num_fspgroup_entries = 0; + LOG << "updating fspgroup entries with weight > 1 ..." << std::endl; + for (sumtree_type::iterator it = pop.begin(); it != pop.end(); ++it) { + if (it->weight <= 1) { + continue; + } + ++num_fspgroup_entries; + ss << "UPDATE fspgroup SET weight = " << it->weight << + " WHERE variant_id = " << variant.id << + " AND instr2 = " << it->instr2 << + " AND data_address = " << it->data_address << + " AND fspmethod_id = " << m_method_id; + // pilot_id is known but should be identical + if (!db->query(ss.str().c_str())) return false; + if (db->affected_rows() != 1) { + LOG << "something is wrong, query affected unexpected (" + << db->affected_rows() + << " != 1) number of rows: " + << ss.str() << std::endl; + } + ss.str(""); + } + LOG << "updated " << num_fspgroup_entries << " fspgroup entries" << std::endl; + } else { + uint64_t num_fspgroup_entries = 0; + + LOG << "creating fspgroup entries ..." << std::endl; + + ss << "INSERT INTO fspgroup (variant_id, instr2, data_address, fspmethod_id, pilot_id, weight) VALUES "; + insert_sql = ss.str(); + ss.str(""); + + for (sumtree_type::iterator it = pop.begin(); it != pop.end(); ++it) { + if (it->weight == 0) { + continue; + } + ++num_fspgroup_entries; + ss << "(" << variant.id << "," << it->instr2 << "," << it->data_address + << "," << m_method_id << "," << it->id << "," << it->weight << ")"; + db->insert_multiple(insert_sql.c_str(), ss.str().c_str()); + ss.str(""); + } + db->insert_multiple(); + LOG << "created " << num_fspgroup_entries << " fspgroup entries" << std::endl; + } + + return true; +} diff --git a/tools/prune-trace/SamplingPruner.hpp b/tools/prune-trace/SamplingPruner.hpp new file mode 100644 index 00000000..db129f29 --- /dev/null +++ b/tools/prune-trace/SamplingPruner.hpp @@ -0,0 +1,37 @@ +#ifndef __SAMPLING_PRUNER_H__ +#define __SAMPLING_PRUNER_H__ + +#include +#include "Pruner.hpp" +#include "util/CommandLine.hpp" + +/// +/// SamplingPruner: implements sampling with equivalence-class reuse +/// +/// Unlike the FESamplingPruner, the SamplingPruner implements uniform +/// fault-space sampling that counts multiple hits of an equivalence class. +/// +class SamplingPruner : public Pruner { + fail::CommandLine::option_handle SAMPLESIZE; + fail::CommandLine::option_handle USE_KNOWN_RESULTS; + fail::CommandLine::option_handle NO_WEIGHTING; + + uint64_t m_samplesize; + bool m_use_known_results, m_weighting; + +public: + SamplingPruner() : m_samplesize(0), m_use_known_results(false), m_weighting(true) { } + virtual std::string method_name() { return "sampling"; } + virtual bool commandline_init(); + virtual bool prune_all(); + + void getAliases(std::deque *aliases) { + aliases->push_back("SamplingPruner"); + aliases->push_back("sampling"); + } + +private: + bool sampling_prune(const fail::Database::Variant& variant); +}; + +#endif diff --git a/tools/prune-trace/main.cc b/tools/prune-trace/main.cc index 295d5c9f..c0867495 100644 --- a/tools/prune-trace/main.cc +++ b/tools/prune-trace/main.cc @@ -14,6 +14,7 @@ using std::endl; #include "Pruner.hpp" #include "BasicPruner.hpp" #include "FESamplingPruner.hpp" +#include "SamplingPruner.hpp" int main(int argc, char *argv[]) { std::string username, hostname, database; @@ -26,6 +27,8 @@ int main(int argc, char *argv[]) { registry.add(&basicprunerleft); FESamplingPruner fesamplingpruner; registry.add(&fesamplingpruner); + SamplingPruner samplingpruner; + registry.add(&samplingpruner); std::string pruners = registry.getPrimeAliasesCSV();