diff --git a/tools/prune-trace/FESamplingPruner.cc b/tools/prune-trace/FESamplingPruner.cc index fb4f925b..c0740de8 100644 --- a/tools/prune-trace/FESamplingPruner.cc +++ b/tools/prune-trace/FESamplingPruner.cc @@ -14,7 +14,10 @@ struct Pilot { uint64_t duration; uint32_t instr2; + union { uint32_t instr2_absolute; + uint32_t id; + }; uint32_t data_address; typedef uint64_t size_type; @@ -26,6 +29,9 @@ bool FESamplingPruner::commandline_init() fail::CommandLine &cmd = fail::CommandLine::Inst(); SAMPLESIZE = cmd.addOption("", "samplesize", Arg::Required, "--samplesize N \tNumber of samples to take (per variant)"); + USE_KNOWN_RESULTS = cmd.addOption("", "use-known-results", Arg::None, + "--use-known-results \tReuse known results from a campaign with the 'basic' pruner " + "(abuses the DB layout to a certain degree, use with caution)"); return true; } @@ -38,6 +44,10 @@ bool FESamplingPruner::prune_all() } m_samplesize = strtoul(cmd[SAMPLESIZE].first()->arg, 0, 10); + if (cmd[USE_KNOWN_RESULTS]) { + m_use_known_results = true; + } + // for each variant: for (std::vector::const_iterator it = m_variants.begin(); it != m_variants.end(); ++it) { @@ -76,78 +86,132 @@ bool FESamplingPruner::sampling_prune(const fail::Database::Variant& variant) MYSQL_RES *res; MYSQL_ROW row; - LOG << "loading trace entries for " << variant.variant << "/" << variant.benchmark << " ..." << endl; + unsigned pilotcount = 0, samplerows; - unsigned pilotcount = 0; + if (!m_use_known_results) { + LOG << "loading trace entries for " << variant.variant << "/" << variant.benchmark << " ..." << endl; - // load trace entries - ss << "SELECT instr2, instr2_absolute, data_address, time2-time1+1 AS duration" - << " FROM trace" - << " WHERE variant_id = " << variant.id - << " AND accesstype = 'R'" - << " ORDER BY duration DESC"; // speeds up sampling, but query may be slow - res = db->query_stream(ss.str().c_str()); - ss.str(""); - if (!res) return false; - while ((row = mysql_fetch_row(res))) { - Pilot p; - p.instr2 = strtoul(row[0], 0, 10); - p.instr2_absolute = strtoul(row[1], 0, 10); - p.data_address = strtoul(row[2], 0, 10); - p.duration = strtoull(row[3], 0, 10); - pop.add(p); - ++pilotcount; + // load trace entries + ss << "SELECT instr2, instr2_absolute, data_address, time2-time1+1 AS duration" + << " FROM trace" + << " WHERE variant_id = " << variant.id + << " AND accesstype = 'R'" + << " ORDER BY duration DESC"; // speeds up sampling, but query may be slow + res = db->query_stream(ss.str().c_str()); + ss.str(""); + if (!res) return false; + while ((row = mysql_fetch_row(res))) { + Pilot p; + p.instr2 = strtoul(row[0], 0, 10); + p.instr2_absolute = strtoul(row[1], 0, 10); + p.data_address = strtoul(row[2], 0, 10); + p.duration = strtoull(row[3], 0, 10); + pop.add(p); + ++pilotcount; + } + mysql_free_result(res); + + samplerows = std::min(pilotcount, m_samplesize); + } else { + LOG << "loading pilots for " << variant.variant << "/" << variant.benchmark << " ..." << endl; + + // load fsppilot entries + ss << "SELECT p.id, p.instr2, p.data_address, t.time2 - t.time1 + 1 AS duration" + << " FROM fsppilot p" + << " JOIN trace t" + << " ON t.variant_id = p.variant_id AND t.data_address = p.data_address AND t.instr2 = p.instr2" + << " WHERE p.fspmethod_id = " << db->get_fspmethod_id("basic") + << " AND p.variant_id = " << variant.id + << " AND p.known_outcome = 0" + << " ORDER BY duration DESC"; // speeds up sampling, but query may be slow + res = db->query_stream(ss.str().c_str()); + ss.str(""); + if (!res) return false; + while ((row = mysql_fetch_row(res))) { + Pilot p; + p.id = strtoul(row[0], 0, 10); + p.instr2 = strtoul(row[1], 0, 10); + p.data_address = strtoul(row[2], 0, 10); + p.duration = strtoull(row[3], 0, 10); + pop.add(p); + ++pilotcount; + } + mysql_free_result(res); + + samplerows = std::min(pilotcount, m_samplesize); } - mysql_free_result(res); - - unsigned samplerows = std::min(pilotcount, m_samplesize); LOG << "loaded " << pilotcount << " entries, sampling " << samplerows << " entries with fault expansion ..." << endl; - // FIXME: change strategy when trace entries have IDs, insert into fspgroup first - ss << "INSERT INTO fsppilot (known_outcome, variant_id, instr2, injection_instr, " - << "injection_instr_absolute, data_address, data_width, fspmethod_id) VALUES "; - std::string insert_sql(ss.str()); - ss.str(""); + unsigned num_fspgroup_entries = 0; + uint32_t known_pilot_method_id = m_method_id; - for (unsigned i = 0; i < samplerows; ++i) { - uint64_t pos = my_rand(pop.get_size() - 1); - Pilot p = pop.get(pos); - ss << "(0," << variant.id << "," << p.instr2 << "," << p.instr2 - << "," << p.instr2_absolute << "," << p.data_address - << ",1," << m_method_id << ")"; - db->insert_multiple(insert_sql.c_str(), ss.str().c_str()); + if (!m_use_known_results) { + // FIXME: change strategy when trace entries have IDs, insert into fspgroup first + ss << "INSERT INTO fsppilot (known_outcome, variant_id, instr2, injection_instr, " + << "injection_instr_absolute, data_address, data_width, fspmethod_id) VALUES "; + std::string insert_sql(ss.str()); ss.str(""); + + for (unsigned i = 0; i < samplerows; ++i) { + uint64_t pos = my_rand(pop.get_size() - 1); + Pilot p = pop.get(pos); + ss << "(0," << variant.id << "," << p.instr2 << "," << p.instr2 + << "," << p.instr2_absolute << "," << p.data_address + << ",1," << m_method_id << ")"; + db->insert_multiple(insert_sql.c_str(), ss.str().c_str()); + ss.str(""); + } + db->insert_multiple(); + + unsigned num_fsppilot_entries = samplerows; + + // single entry for known outcome (write access) + ss << "INSERT INTO fsppilot (known_outcome, variant_id, instr2, injection_instr, injection_instr_absolute, data_address, data_width, fspmethod_id) " + "SELECT 1, variant_id, instr2, instr2, instr2_absolute, " + " data_address, width, " << m_method_id << " " + "FROM trace " + "WHERE variant_id = " << variant.id << " AND accesstype = 'W' " + "ORDER BY instr2 ASC " + "LIMIT 1"; + if (!db->query(ss.str().c_str())) return false; + ss.str(""); + num_fsppilot_entries += db->affected_rows(); + assert(num_fsppilot_entries == (samplerows + 1)); + + LOG << "created " << num_fsppilot_entries << " fsppilot entries" << std::endl; + + // fspgroup entries for sampled trace entries + ss << "INSERT INTO fspgroup (variant_id, instr2, data_address, fspmethod_id, pilot_id) " + << "SELECT p.variant_id, p.instr2, p.data_address, p.fspmethod_id, p.id " + << "FROM fsppilot p " + << "WHERE known_outcome = 0 AND p.fspmethod_id = " << m_method_id << " " + << "AND p.variant_id = " << variant.id; + + if (!db->query(ss.str().c_str())) return false; + ss.str(""); + num_fspgroup_entries = db->affected_rows(); + } else { + ss << "INSERT INTO fspgroup (variant_id, instr2, data_address, fspmethod_id, pilot_id) VALUES "; + std::string insert_sql(ss.str()); + ss.str(""); + + for (unsigned i = 0; i < samplerows; ++i) { + uint64_t pos = my_rand(pop.get_size() - 1); + Pilot p = pop.get(pos); + ss << "(" << variant.id << "," << p.instr2 + << "," << p.data_address << "," << m_method_id + << "," << p.id << ")"; + db->insert_multiple(insert_sql.c_str(), ss.str().c_str()); + ss.str(""); + } + db->insert_multiple(); + num_fspgroup_entries = samplerows; + + // the known_outcome=1 pilot has been determined with the "basic" method + known_pilot_method_id = db->get_fspmethod_id("basic"); } - db->insert_multiple(); - unsigned num_fsppilot_entries = samplerows; - - // single entry for known outcome (write access) - ss << "INSERT INTO fsppilot (known_outcome, variant_id, instr2, injection_instr, injection_instr_absolute, data_address, data_width, fspmethod_id) " - "SELECT 1, variant_id, instr2, instr2, instr2_absolute, " - " data_address, width, " << m_method_id << " " - "FROM trace " - "WHERE variant_id = " << variant.id << " AND accesstype = 'W' " - "ORDER BY instr2 ASC " - "LIMIT 1"; - if (!db->query(ss.str().c_str())) return false; - ss.str(""); - num_fsppilot_entries += db->affected_rows(); - assert(num_fsppilot_entries == (samplerows + 1)); - - LOG << "created " << num_fsppilot_entries << " fsppilot entries" << std::endl; - - // fspgroup entries for sampled trace entries - ss << "INSERT INTO fspgroup (variant_id, instr2, data_address, fspmethod_id, pilot_id) " - << "SELECT p.variant_id, p.instr2, p.data_address, p.fspmethod_id, p.id " - << "FROM fsppilot p " - << "WHERE known_outcome = 0 AND p.fspmethod_id = " << m_method_id << " " - << "AND p.variant_id = " << variant.id; - - if (!db->query(ss.str().c_str())) return false; - ss.str(""); - unsigned num_fspgroup_entries = db->affected_rows(); #if 0 // do it like the basic pruner: // fspgroup entries for known (W) trace entries @@ -162,9 +226,9 @@ bool FESamplingPruner::sampling_prune(const fail::Database::Variant& variant) // for each W); this needs to be accounted for at data analysis time, // though. ss << "INSERT INTO fspgroup (variant_id, instr2, data_address, fspmethod_id, pilot_id) " - "SELECT variant_id, instr2, data_address, fspmethod_id, id " + "SELECT variant_id, instr2, data_address, " << m_method_id << ", id " "FROM fsppilot " - "WHERE variant_id = " << variant.id << " AND known_outcome = 1 AND fspmethod_id = " << m_method_id; + "WHERE variant_id = " << variant.id << " AND known_outcome = 1 AND fspmethod_id = " << known_pilot_method_id; #endif if (!db->query(ss.str().c_str())) return false; ss.str(""); diff --git a/tools/prune-trace/FESamplingPruner.hpp b/tools/prune-trace/FESamplingPruner.hpp index a9538622..7c71c530 100644 --- a/tools/prune-trace/FESamplingPruner.hpp +++ b/tools/prune-trace/FESamplingPruner.hpp @@ -15,11 +15,13 @@ /// class FESamplingPruner : public Pruner { fail::CommandLine::option_handle SAMPLESIZE; + fail::CommandLine::option_handle USE_KNOWN_RESULTS; unsigned m_samplesize; + bool m_use_known_results; public: - FESamplingPruner() : m_samplesize(0) { } + FESamplingPruner() : m_samplesize(0), m_use_known_results(false) { } virtual std::string method_name() { return "FESampling"; } virtual bool commandline_init(); virtual bool prune_all();