prune-trace: incremental mode for SamplingPruner

The --incremental switch allows to add more samples if the resulting
confidence intervals are not satisfactory yet.

Change-Id: I65dc99522f45f8a4eaf4ce68e832f7636585381d
This commit is contained in:
Horst Schirmeier
2015-01-15 19:11:33 +01:00
parent 79211fd31d
commit 4cbcf30b7c
5 changed files with 110 additions and 32 deletions

View File

@ -13,7 +13,7 @@ bool Pruner::init(
const std::vector<std::string>& variants_exclude, const std::vector<std::string>& variants_exclude,
const std::vector<std::string>& benchmarks, const std::vector<std::string>& benchmarks,
const std::vector<std::string>& benchmarks_exclude, const std::vector<std::string>& benchmarks_exclude,
bool overwrite) bool overwrite, bool incremental)
{ {
m_variants = db->get_variants( m_variants = db->get_variants(
variants, variants_exclude, variants, variants_exclude,
@ -26,8 +26,8 @@ bool Pruner::init(
<< std::endl; << std::endl;
// make sure we only prune variants that haven't been pruned previously // make sure we only prune variants that haven't been pruned previously
// (unless we run with --overwrite) // (unless we run with --overwrite or --incremental)
if (!overwrite) { if (!overwrite && !incremental) {
for (std::vector<fail::Database::Variant>::iterator it = m_variants.begin(); for (std::vector<fail::Database::Variant>::iterator it = m_variants.begin();
it != m_variants.end(); ) { it != m_variants.end(); ) {
std::stringstream ss; std::stringstream ss;

View File

@ -21,7 +21,7 @@ public:
const std::vector<std::string>& variants_exclude, const std::vector<std::string>& variants_exclude,
const std::vector<std::string>& benchmarks, const std::vector<std::string>& benchmarks,
const std::vector<std::string>& benchmarks_exclude, const std::vector<std::string>& benchmarks_exclude,
bool overwrite); bool overwrite, bool incremental);
/** /**
* Callback function that can be used to add command line options * Callback function that can be used to add command line options
@ -35,6 +35,14 @@ public:
virtual bool clear_database(); virtual bool clear_database();
virtual bool prune_all() = 0; virtual bool prune_all() = 0;
/**
* Tell the pruner to work incrementally. For example, a sampling pruner
* could add more pilots to already existing ones (which already may be
* associated with fault-injection results). Returns false if the pruner
* is incapable of working in the desired mode.
*/
virtual bool set_incremental(bool incremental) { return !incremental; }
}; };
#endif #endif

View File

@ -13,11 +13,9 @@ using std::endl;
struct WeightedPilot { struct WeightedPilot {
uint64_t duration; uint64_t duration;
uint32_t instr2;
union {
uint32_t instr2_absolute;
uint32_t id; uint32_t id;
}; uint32_t instr2;
uint32_t instr2_absolute;
uint32_t data_address; uint32_t data_address;
uint32_t weight; uint32_t weight;
@ -94,13 +92,27 @@ bool SamplingPruner::sampling_prune(const fail::Database::Variant& variant)
uint64_t pilotcount = 0; uint64_t pilotcount = 0;
if (!m_use_known_results) { if (!m_use_known_results) {
LOG << "loading trace entries for " << variant.variant << "/" << variant.benchmark << " ..." << endl; LOG << "loading trace entries "
<< (m_incremental ? "and existing pilots " : "")
<< "for " << variant.variant << "/" << variant.benchmark << " ..." << endl;
// load trace entries if (!m_incremental) {
ss << "SELECT instr2, instr2_absolute, data_address, time2-time1+1 AS duration" // load trace entries
<< " FROM trace" ss << "SELECT instr2, instr2_absolute, data_address, time2-time1+1 AS duration"
<< " WHERE variant_id = " << variant.id " FROM trace"
<< " AND accesstype = 'R'"; " WHERE variant_id = " << variant.id <<
" AND accesstype = 'R'";
} else {
// load trace entries and existing pilots
ss << "SELECT t.instr2, t.instr2_absolute, t.data_address, t.time2-t.time1+1 AS duration,"
" IFNULL(g.pilot_id, 0), IFNULL(g.weight, 0)"
" FROM trace t"
" LEFT JOIN fspgroup g"
" ON t.variant_id = g.variant_id AND t.data_address = g.data_address AND t.instr2 = g.instr2"
" AND g.fspmethod_id = " << m_method_id <<
" WHERE t.variant_id = " << variant.id <<
" AND t.accesstype = 'R'";
}
res = db->query_stream(ss.str().c_str()); res = db->query_stream(ss.str().c_str());
ss.str(""); ss.str("");
if (!res) return false; if (!res) return false;
@ -110,7 +122,8 @@ bool SamplingPruner::sampling_prune(const fail::Database::Variant& variant)
p.instr2_absolute = strtoul(row[1], 0, 10); p.instr2_absolute = strtoul(row[1], 0, 10);
p.data_address = strtoul(row[2], 0, 10); p.data_address = strtoul(row[2], 0, 10);
p.duration = m_weighting ? strtoull(row[3], 0, 10) : 1; p.duration = m_weighting ? strtoull(row[3], 0, 10) : 1;
p.weight = 0; p.id = m_incremental ? strtoul(row[4], 0, 10) : 0;
p.weight = m_incremental ? strtoul(row[5], 0, 10) : 0;
pop.add(p); pop.add(p);
++pilotcount; ++pilotcount;
} }
@ -118,14 +131,28 @@ bool SamplingPruner::sampling_prune(const fail::Database::Variant& variant)
} else { } else {
LOG << "loading pilots for " << variant.variant << "/" << variant.benchmark << " ..." << endl; LOG << "loading pilots for " << variant.variant << "/" << variant.benchmark << " ..." << endl;
// load fsppilot entries if (!m_incremental) {
ss << "SELECT p.id, p.instr2, p.data_address, t.time2 - t.time1 + 1 AS duration" // load fsppilot entries
<< " FROM fsppilot p" ss << "SELECT p.id, p.instr2, p.data_address, t.time2 - t.time1 + 1 AS duration"
<< " JOIN trace t" " FROM fsppilot p"
<< " ON t.variant_id = p.variant_id AND t.data_address = p.data_address AND t.instr2 = p.instr2" " JOIN trace t"
<< " WHERE p.fspmethod_id = " << db->get_fspmethod_id("basic") " ON t.variant_id = p.variant_id AND t.data_address = p.data_address AND t.instr2 = p.instr2"
<< " AND p.variant_id = " << variant.id " WHERE p.fspmethod_id = " << db->get_fspmethod_id("basic") <<
<< " AND p.known_outcome = 0"; " AND p.variant_id = " << variant.id <<
" AND p.known_outcome = 0";
} else {
// load fsppilot entries and existing sampling pilots
ss << "SELECT p.id, p.instr2, p.data_address, t.time2 - t.time1 + 1 AS duration, IFNULL(g.weight, 0)"
" FROM fsppilot p"
" JOIN trace t"
" ON t.variant_id = p.variant_id AND t.data_address = p.data_address AND t.instr2 = p.instr2"
" LEFT JOIN fspgroup g"
" ON t.variant_id = g.variant_id AND t.data_address = g.data_address AND t.instr2 = g.instr2"
" AND g.fspmethod_id = " << m_method_id <<
" WHERE p.fspmethod_id = " << db->get_fspmethod_id("basic") <<
" AND p.variant_id = " << variant.id <<
" AND p.known_outcome = 0";
}
res = db->query_stream(ss.str().c_str()); res = db->query_stream(ss.str().c_str());
ss.str(""); ss.str("");
if (!res) return false; if (!res) return false;
@ -135,7 +162,7 @@ bool SamplingPruner::sampling_prune(const fail::Database::Variant& variant)
p.instr2 = strtoul(row[1], 0, 10); p.instr2 = strtoul(row[1], 0, 10);
p.data_address = strtoul(row[2], 0, 10); p.data_address = strtoul(row[2], 0, 10);
p.duration = m_weighting ? strtoull(row[3], 0, 10) : 1; p.duration = m_weighting ? strtoull(row[3], 0, 10) : 1;
p.weight = 0; p.weight = m_incremental ? strtoull(row[4], 0, 10) : 0;
pop.add(p); pop.add(p);
++pilotcount; ++pilotcount;
} }
@ -158,6 +185,8 @@ bool SamplingPruner::sampling_prune(const fail::Database::Variant& variant)
p.weight++; p.weight++;
// first time we sample this pilot? // first time we sample this pilot?
if (!m_use_known_results && p.weight == 1) { if (!m_use_known_results && p.weight == 1) {
// no need to special-case existing pilots (incremental mode), as
// their initial weight is supposed to be at least 1
ss << "(0," << variant.id << "," << p.instr2 << "," << p.instr2 ss << "(0," << variant.id << "," << p.instr2 << "," << p.instr2
<< "," << p.instr2_absolute << "," << p.data_address << "," << p.instr2_absolute << "," << p.data_address
<< ",1," << m_method_id << ")"; << ",1," << m_method_id << ")";
@ -174,7 +203,13 @@ bool SamplingPruner::sampling_prune(const fail::Database::Variant& variant)
// fspgroup entries for sampled trace entries // fspgroup entries for sampled trace entries
if (!m_use_known_results) { if (!m_use_known_results) {
ss << "INSERT INTO fspgroup (variant_id, instr2, data_address, fspmethod_id, pilot_id, weight) " if (!m_incremental) {
ss << "INSERT";
} else {
// this spares us to delete existing pilots before
ss << "REPLACE";
}
ss << " INTO fspgroup (variant_id, instr2, data_address, fspmethod_id, pilot_id, weight) "
<< "SELECT p.variant_id, p.instr2, p.data_address, " << m_method_id << ", p.id, 1 " << "SELECT p.variant_id, p.instr2, p.data_address, " << m_method_id << ", p.id, 1 "
<< "FROM fsppilot p " << "FROM fsppilot p "
<< "WHERE known_outcome = 0 AND p.fspmethod_id = " << m_method_id << " " << "WHERE known_outcome = 0 AND p.fspmethod_id = " << m_method_id << " "
@ -182,7 +217,14 @@ bool SamplingPruner::sampling_prune(const fail::Database::Variant& variant)
if (!db->query(ss.str().c_str())) return false; if (!db->query(ss.str().c_str())) return false;
ss.str(""); ss.str("");
uint64_t num_fspgroup_entries = db->affected_rows(); uint64_t num_fspgroup_entries;
if (!m_incremental) {
num_fspgroup_entries = db->affected_rows();
} else {
// with REPLACE INTO, affected_rows does not yield the number of
// new rows; take num_fsppilot_entries instead
num_fspgroup_entries = num_fsppilot_entries;
}
LOG << "created " << num_fspgroup_entries << " fspgroup entries" << std::endl; LOG << "created " << num_fspgroup_entries << " fspgroup entries" << std::endl;
// FIXME is this faster than manually INSERTing all fspgroup entries? // FIXME is this faster than manually INSERTing all fspgroup entries?
@ -208,13 +250,25 @@ bool SamplingPruner::sampling_prune(const fail::Database::Variant& variant)
} }
ss.str(""); ss.str("");
} }
LOG << "updated " << num_fspgroup_entries << " fspgroup entries" << std::endl;
if (!m_incremental) {
LOG << "updated " << num_fspgroup_entries << " fspgroup entries" << std::endl;
} else {
// we don't know how many rows we really updated
LOG << "updated fspgroup entries" << std::endl;
}
} else { } else {
uint64_t num_fspgroup_entries = 0; uint64_t num_fspgroup_entries = 0;
LOG << "creating fspgroup entries ..." << std::endl; LOG << "creating fspgroup entries ..." << std::endl;
ss << "INSERT INTO fspgroup (variant_id, instr2, data_address, fspmethod_id, pilot_id, weight) VALUES "; if (!m_incremental) {
ss << "INSERT";
} else {
// this spares us to delete existing pilots before
ss << "REPLACE";
}
ss << " INTO fspgroup (variant_id, instr2, data_address, fspmethod_id, pilot_id, weight) VALUES ";
insert_sql = ss.str(); insert_sql = ss.str();
ss.str(""); ss.str("");

View File

@ -17,10 +17,10 @@ class SamplingPruner : public Pruner {
fail::CommandLine::option_handle NO_WEIGHTING; fail::CommandLine::option_handle NO_WEIGHTING;
uint64_t m_samplesize; uint64_t m_samplesize;
bool m_use_known_results, m_weighting; bool m_use_known_results, m_weighting, m_incremental;
public: public:
SamplingPruner() : m_samplesize(0), m_use_known_results(false), m_weighting(true) { } SamplingPruner() : m_samplesize(0), m_use_known_results(false), m_weighting(true), m_incremental(false) { }
virtual std::string method_name() { return "sampling"; } virtual std::string method_name() { return "sampling"; }
virtual bool commandline_init(); virtual bool commandline_init();
virtual bool prune_all(); virtual bool prune_all();
@ -30,6 +30,8 @@ public:
aliases->push_back("sampling"); aliases->push_back("sampling");
} }
virtual bool set_incremental(bool incremental) { m_incremental = incremental; return true; }
private: private:
bool sampling_prune(const fail::Database::Variant& variant); bool sampling_prune(const fail::Database::Variant& variant);
}; };

View File

@ -65,12 +65,20 @@ int main(int argc, char *argv[]) {
CommandLine::option_handle OVERWRITE = CommandLine::option_handle OVERWRITE =
cmd.addOption("", "overwrite", Arg::None, cmd.addOption("", "overwrite", Arg::None,
"--overwrite \tOverwrite already existing pruning data (the default is to skip variants with existing entries)"); "--overwrite \tOverwrite already existing pruning data (the default is to skip variants with existing entries)");
CommandLine::option_handle INCREMENTAL =
cmd.addOption("", "incremental", Arg::None,
"--incremental \tTell the pruner to work incrementally (if supported)");
if (!cmd.parse()) { if (!cmd.parse()) {
std::cerr << "Error parsing arguments." << std::endl; std::cerr << "Error parsing arguments." << std::endl;
exit(-1); exit(-1);
} }
if (cmd[OVERWRITE] && cmd[INCREMENTAL]) {
std::cerr << "--overwrite and --incremental cannot be used together." << std::endl;
exit(-1);
}
Pruner *pruner; Pruner *pruner;
std::string pruner_name = "BasicPruner"; std::string pruner_name = "BasicPruner";
if (cmd[PRUNER]) { if (cmd[PRUNER]) {
@ -110,6 +118,11 @@ int main(int argc, char *argv[]) {
Database *db = Database::cmdline_connect(); Database *db = Database::cmdline_connect();
pruner->set_db(db); pruner->set_db(db);
if (cmd[INCREMENTAL] && !pruner->set_incremental(true)) {
std::cerr << "Pruner is incapable of running incrementally" << std::endl;
exit(-1);
}
std::vector<std::string> variants, benchmarks, variants_exclude, benchmarks_exclude; std::vector<std::string> variants, benchmarks, variants_exclude, benchmarks_exclude;
if (cmd[VARIANT]) { if (cmd[VARIANT]) {
for (option::Option *o = cmd[VARIANT]; o; o = o->next()) { for (option::Option *o = cmd[VARIANT]; o; o = o->next()) {
@ -150,7 +163,8 @@ int main(int argc, char *argv[]) {
exit(-1); exit(-1);
} }
if (!pruner->init(variants, variants_exclude, benchmarks, benchmarks_exclude, cmd[OVERWRITE])) { if (!pruner->init(variants, variants_exclude, benchmarks, benchmarks_exclude,
cmd[OVERWRITE], cmd[INCREMENTAL])) {
LOG << "pruner->init() failed" << endl; LOG << "pruner->init() failed" << endl;
exit(-1); exit(-1);
} }
@ -158,7 +172,7 @@ int main(int argc, char *argv[]) {
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
// Do the actual pruning // Do the actual pruning
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
if (!cmd[NO_DELETE] && cmd[OVERWRITE] && !pruner->clear_database()) { if (!cmd[NO_DELETE] && cmd[OVERWRITE] && !cmd[INCREMENTAL] && !pruner->clear_database()) {
LOG << "clear_database() failed" << endl; LOG << "clear_database() failed" << endl;
exit(-1); exit(-1);
} }