From 0da8ba0dec111d78292455bb5f17c6045820db25 Mon Sep 17 00:00:00 2001 From: Horst Schirmeier Date: Wed, 30 Apr 2014 14:24:49 +0200 Subject: [PATCH] prune-trace: added "sampling with fault expansion" The FESamplingPruner implements the fault-expansion variance reduction technique (FE-VRT) as described in: Smith, D. Todd and Johnson, Barry W. and Andrianos, Nikos and Profeta, III, Joseph A., "A variance-reduction technique via fault-expansion for fault-coverage estimation" (1997), 366--374. Change-Id: I04a0c9bb2622974278bd8c73793e51451119e650 --- tools/prune-trace/CMakeLists.txt | 1 + tools/prune-trace/FESamplingPruner.cc | 176 +++++++++++++++++++++++++ tools/prune-trace/FESamplingPruner.hpp | 31 +++++ tools/prune-trace/main.cc | 4 + 4 files changed, 212 insertions(+) create mode 100644 tools/prune-trace/FESamplingPruner.cc create mode 100644 tools/prune-trace/FESamplingPruner.hpp diff --git a/tools/prune-trace/CMakeLists.txt b/tools/prune-trace/CMakeLists.txt index a79e3451..4043ffd4 100644 --- a/tools/prune-trace/CMakeLists.txt +++ b/tools/prune-trace/CMakeLists.txt @@ -1,6 +1,7 @@ set(SRCS Pruner.cc BasicPruner.cc + FESamplingPruner.cc ) find_package(MySQL REQUIRED) diff --git a/tools/prune-trace/FESamplingPruner.cc b/tools/prune-trace/FESamplingPruner.cc new file mode 100644 index 00000000..fb4f925b --- /dev/null +++ b/tools/prune-trace/FESamplingPruner.cc @@ -0,0 +1,176 @@ +#include +#include +#include +#include +#include "FESamplingPruner.hpp" +#include "util/Logger.hpp" +#include "util/CommandLine.hpp" +#include "util/SumTree.hpp" + +static fail::Logger LOG("FESamplingPruner"); +using std::endl; + +struct Pilot { + uint64_t duration; + + uint32_t instr2; + uint32_t instr2_absolute; + uint32_t data_address; + + typedef uint64_t size_type; + size_type size() const { return duration; } +}; + +bool FESamplingPruner::commandline_init() +{ + fail::CommandLine &cmd = fail::CommandLine::Inst(); + SAMPLESIZE = cmd.addOption("", "samplesize", Arg::Required, + "--samplesize N \tNumber of samples to take (per variant)"); + return true; +} + +bool FESamplingPruner::prune_all() +{ + fail::CommandLine &cmd = fail::CommandLine::Inst(); + if (!cmd[SAMPLESIZE]) { + LOG << "parameter --samplesize required, aborting" << endl; + return false; + } + m_samplesize = strtoul(cmd[SAMPLESIZE].first()->arg, 0, 10); + + // for each variant: + for (std::vector::const_iterator it = m_variants.begin(); + it != m_variants.end(); ++it) { + if (!sampling_prune(*it)) { + return false; + } + } + + return true; +} + +// TODO: replace with a less syscall-intensive RNG +static std::ifstream dev_urandom("/dev/urandom", std::ifstream::binary); +static uint64_t my_rand(uint64_t limit) +{ + // find smallest bitpos that satisfies (1 << bitpos) > limit + int bitpos = 0; + while (limit >> bitpos) { + bitpos++; + } + + uint64_t retval; + + do { + dev_urandom.read((char *) &retval, sizeof(retval)); + retval &= (1ULL << bitpos) - 1; + } while (retval > limit); + + return retval; +} + +bool FESamplingPruner::sampling_prune(const fail::Database::Variant& variant) +{ + fail::SumTree pop; // sample population + std::stringstream ss; + MYSQL_RES *res; + MYSQL_ROW row; + + LOG << "loading trace entries for " << variant.variant << "/" << variant.benchmark << " ..." << endl; + + unsigned pilotcount = 0; + + // load trace entries + ss << "SELECT instr2, instr2_absolute, data_address, time2-time1+1 AS duration" + << " FROM trace" + << " WHERE variant_id = " << variant.id + << " AND accesstype = 'R'" + << " ORDER BY duration DESC"; // speeds up sampling, but query may be slow + res = db->query_stream(ss.str().c_str()); + ss.str(""); + if (!res) return false; + while ((row = mysql_fetch_row(res))) { + Pilot p; + p.instr2 = strtoul(row[0], 0, 10); + p.instr2_absolute = strtoul(row[1], 0, 10); + p.data_address = strtoul(row[2], 0, 10); + p.duration = strtoull(row[3], 0, 10); + pop.add(p); + ++pilotcount; + } + mysql_free_result(res); + + unsigned samplerows = std::min(pilotcount, m_samplesize); + + LOG << "loaded " << pilotcount << " entries, sampling " + << samplerows << " entries with fault expansion ..." << endl; + + // FIXME: change strategy when trace entries have IDs, insert into fspgroup first + ss << "INSERT INTO fsppilot (known_outcome, variant_id, instr2, injection_instr, " + << "injection_instr_absolute, data_address, data_width, fspmethod_id) VALUES "; + std::string insert_sql(ss.str()); + ss.str(""); + + for (unsigned i = 0; i < samplerows; ++i) { + uint64_t pos = my_rand(pop.get_size() - 1); + Pilot p = pop.get(pos); + ss << "(0," << variant.id << "," << p.instr2 << "," << p.instr2 + << "," << p.instr2_absolute << "," << p.data_address + << ",1," << m_method_id << ")"; + db->insert_multiple(insert_sql.c_str(), ss.str().c_str()); + ss.str(""); + } + db->insert_multiple(); + unsigned num_fsppilot_entries = samplerows; + + // single entry for known outcome (write access) + ss << "INSERT INTO fsppilot (known_outcome, variant_id, instr2, injection_instr, injection_instr_absolute, data_address, data_width, fspmethod_id) " + "SELECT 1, variant_id, instr2, instr2, instr2_absolute, " + " data_address, width, " << m_method_id << " " + "FROM trace " + "WHERE variant_id = " << variant.id << " AND accesstype = 'W' " + "ORDER BY instr2 ASC " + "LIMIT 1"; + if (!db->query(ss.str().c_str())) return false; + ss.str(""); + num_fsppilot_entries += db->affected_rows(); + assert(num_fsppilot_entries == (samplerows + 1)); + + LOG << "created " << num_fsppilot_entries << " fsppilot entries" << std::endl; + + // fspgroup entries for sampled trace entries + ss << "INSERT INTO fspgroup (variant_id, instr2, data_address, fspmethod_id, pilot_id) " + << "SELECT p.variant_id, p.instr2, p.data_address, p.fspmethod_id, p.id " + << "FROM fsppilot p " + << "WHERE known_outcome = 0 AND p.fspmethod_id = " << m_method_id << " " + << "AND p.variant_id = " << variant.id; + + if (!db->query(ss.str().c_str())) return false; + ss.str(""); + unsigned num_fspgroup_entries = db->affected_rows(); + +#if 0 // do it like the basic pruner: + // fspgroup entries for known (W) trace entries + ss << "INSERT INTO fspgroup (variant_id, instr2, data_address, fspmethod_id, pilot_id) " + "SELECT STRAIGHT_JOIN t.variant_id, t.instr2, t.data_address, p.fspmethod_id, p.id " + "FROM fsppilot p " + "JOIN trace t " + "ON t.variant_id = p.variant_id AND p.fspmethod_id = " << m_method_id << " AND p.known_outcome = 1 " + "WHERE t.variant_id = " << variant.id << " AND t.accesstype = 'W'"; +#else + // *one* fspgroup entry for known (W) trace entries (no need to create one + // for each W); this needs to be accounted for at data analysis time, + // though. + ss << "INSERT INTO fspgroup (variant_id, instr2, data_address, fspmethod_id, pilot_id) " + "SELECT variant_id, instr2, data_address, fspmethod_id, id " + "FROM fsppilot " + "WHERE variant_id = " << variant.id << " AND known_outcome = 1 AND fspmethod_id = " << m_method_id; +#endif + if (!db->query(ss.str().c_str())) return false; + ss.str(""); + num_fspgroup_entries += db->affected_rows(); + + LOG << "created " << num_fspgroup_entries << " fspgroup entries" << std::endl; + + return true; +} diff --git a/tools/prune-trace/FESamplingPruner.hpp b/tools/prune-trace/FESamplingPruner.hpp new file mode 100644 index 00000000..6d4dc0cf --- /dev/null +++ b/tools/prune-trace/FESamplingPruner.hpp @@ -0,0 +1,31 @@ +#ifndef __FESAMPLING_PRUNER_H__ +#define __FESAMPLING_PRUNER_H__ + +#include "Pruner.hpp" +#include "util/CommandLine.hpp" + +/// +/// FESamplingPruner: implements sampling with Fault Expansion +/// +/// The FESamplingPruner implements the fault-expansion variance reduction +/// technique (FE-VRT) as described in: Smith, D. Todd and Johnson, Barry W. +/// and Andrianos, Nikos and Profeta, III, Joseph A., "A variance-reduction +/// technique via fault-expansion for fault-coverage estimation" (1997), +/// 366--374. +/// +class FESamplingPruner : public Pruner { + fail::CommandLine::option_handle SAMPLESIZE; + + unsigned m_samplesize; + +public: + FESamplingPruner() : m_samplesize(0) { } + virtual std::string method_name() { return "FESampling"; } + virtual bool commandline_init(); + virtual bool prune_all(); + +private: + bool sampling_prune(const fail::Database::Variant& variant); +}; + +#endif diff --git a/tools/prune-trace/main.cc b/tools/prune-trace/main.cc index 7288085a..55d4c47a 100644 --- a/tools/prune-trace/main.cc +++ b/tools/prune-trace/main.cc @@ -11,6 +11,7 @@ using std::endl; #include "Pruner.hpp" #include "BasicPruner.hpp" +#include "FESamplingPruner.hpp" int main(int argc, char *argv[]) { std::string username, hostname, database; @@ -59,6 +60,9 @@ int main(int argc, char *argv[]) { } else if (imp == "BasicPrunerLeft" || imp == "basic-left") { LOG << "Using BasicPruner (use left border, instr1)" << endl; pruner = new BasicPruner(true); + } else if (imp == "FESamplingPruner" || imp == "sampling") { + LOG << "Using FESamplingPruner" << endl; + pruner = new FESamplingPruner; } else { LOG << "Unknown pruning method: " << imp << endl;